2 # Copyright 2020-2023 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the Apache License 2.0 (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
10 #========================================================================
11 # Written by Xiaokang Qian <xiaokang.qian@arm.com> for the OpenSSL project,
12 # derived from https://github.com/ARM-software/AArch64cryptolib, original
13 # author Samuel Lee <Samuel.Lee@arm.com>. The module is, however, dual
14 # licensed under OpenSSL and SPDX BSD-3-Clause licenses depending on where you
16 #========================================================================
18 # Approach - We want to reload constants as we have plenty of spare ASIMD slots around crypto units for loading
19 # Unroll x8 in main loop, main loop to act on 8 16B blocks per iteration, and then do modulo of the accumulated
20 # intermediate hashesfrom the 8 blocks.
22 # ____________________________________________________
25 # |____________________________________________________|
27 # | CTR block 8k+13| AES block 8k+8 | GHASH block 8k+0 |
28 # |________________|________________|__________________|
30 # | CTR block 8k+14| AES block 8k+9 | GHASH block 8k+1 |
31 # |________________|________________|__________________|
33 # | CTR block 8k+15| AES block 8k+10| GHASH block 8k+2 |
34 # |________________|________________|__________________|
36 # | CTR block 8k+16| AES block 8k+11| GHASH block 8k+3 |
37 # |________________|________________|__________________|
39 # | CTR block 8k+17| AES block 8k+12| GHASH block 8k+4 |
40 # |________________|________________|__________________|
42 # | CTR block 8k+18| AES block 8k+13| GHASH block 8k+5 |
43 # |________________|________________|__________________|
45 # | CTR block 8k+19| AES block 8k+14| GHASH block 8k+6 |
46 # |________________|________________|__________________|
48 # | CTR block 8k+20| AES block 8k+15| GHASH block 8k+7 |
49 # |________________|____(mostly)____|__________________|
52 # |____________________________________________________|
55 # Ensure previous generated intermediate hash is aligned and merged with result for GHASH 4k+0
56 # EXT low_acc, low_acc, low_acc, #8
57 # EOR res_curr (8k+0), res_curr (4k+0), low_acc
60 # Increment and byte reverse counter in scalar registers and transfer to SIMD registers
61 # REV ctr32, rev_ctr32
62 # ORR ctr64, constctr96_top32, ctr32, LSL #32
63 # INS ctr_next.d[0], constctr96_bottom64 // Keeping this in scalar registers to free up space in SIMD RF
64 # INS ctr_next.d[1], ctr64X
68 # Do AES encryption/decryption on CTR block X and EOR it with input block X. Take 256 bytes key below for example.
69 # Doing small trick here of loading input in scalar registers, EORing with last key and then transferring
70 # Given we are very constrained in our ASIMD registers this is quite important
73 # LDR input_low, [ input_ptr ], #8
74 # LDR input_high, [ input_ptr ], #8
75 # EOR input_low, k14_low
76 # EOR input_high, k14_high
77 # INS res_curr.d[0], input_low
78 # INS res_curr.d[1], input_high
79 # AESE ctr_curr, k0; AESMC ctr_curr, ctr_curr
80 # AESE ctr_curr, k1; AESMC ctr_curr, ctr_curr
81 # AESE ctr_curr, k2; AESMC ctr_curr, ctr_curr
82 # AESE ctr_curr, k3; AESMC ctr_curr, ctr_curr
83 # AESE ctr_curr, k4; AESMC ctr_curr, ctr_curr
84 # AESE ctr_curr, k5; AESMC ctr_curr, ctr_curr
85 # AESE ctr_curr, k6; AESMC ctr_curr, ctr_curr
86 # AESE ctr_curr, k7; AESMC ctr_curr, ctr_curr
87 # AESE ctr_curr, k8; AESMC ctr_curr, ctr_curr
88 # AESE ctr_curr, k9; AESMC ctr_curr, ctr_curr
89 # AESE ctr_curr, k10; AESMC ctr_curr, ctr_curr
90 # AESE ctr_curr, k11; AESMC ctr_curr, ctr_curr
91 # AESE ctr_curr, k12; AESMC ctr_curr, ctr_curr
93 # EOR res_curr, res_curr, ctr_curr
94 # ST1 { res_curr.16b }, [ output_ptr ], #16
97 # AESE ctr_curr, k0; AESMC ctr_curr, ctr_curr
98 # AESE ctr_curr, k1; AESMC ctr_curr, ctr_curr
99 # AESE ctr_curr, k2; AESMC ctr_curr, ctr_curr
100 # AESE ctr_curr, k3; AESMC ctr_curr, ctr_curr
101 # AESE ctr_curr, k4; AESMC ctr_curr, ctr_curr
102 # AESE ctr_curr, k5; AESMC ctr_curr, ctr_curr
103 # AESE ctr_curr, k6; AESMC ctr_curr, ctr_curr
104 # AESE ctr_curr, k7; AESMC ctr_curr, ctr_curr
105 # AESE ctr_curr, k8; AESMC ctr_curr, ctr_curr
106 # AESE ctr_curr, k9; AESMC ctr_curr, ctr_curr
107 # AESE ctr_curr, k10; AESMC ctr_curr, ctr_curr
108 # AESE ctr_curr, k11; AESMC ctr_curr, ctr_curr
109 # AESE ctr_curr, k12; AESMC ctr_curr, ctr_curr
111 # LDR res_curr, [ input_ptr ], #16
112 # EOR res_curr, res_curr, ctr_curr
113 # MOV output_low, res_curr.d[0]
114 # MOV output_high, res_curr.d[1]
115 # EOR output_low, k14_low
116 # EOR output_high, k14_high
117 # STP output_low, output_high, [ output_ptr ], #16
120 # Do 128b karatsuba polynomial multiplication on block
121 # We only have 64b->128b polynomial multipliers, naively that means we need to do 4 64b multiplies to generate a 128b
124 # Pmull(A,B) == (Pmull(Ah,Bh)<<128 | Pmull(Al,Bl)) ^ (Pmull(Ah,Bl) ^ Pmull(Al,Bh))<<64
126 # The idea behind Karatsuba multiplication is that we can do just 3 64b multiplies:
127 # Pmull(A,B) == (Pmull(Ah,Bh)<<128 | Pmull(Al,Bl)) ^ (Pmull(Ah^Al,Bh^Bl) ^ Pmull(Ah,Bh) ^ Pmull(Al,Bl))<<64
129 # There is some complication here because the bit order of GHASH's PMULL is reversed compared to elsewhere, so we are
130 # multiplying with "twisted" powers of H
132 # Note: We can PMULL directly into the acc_x in first GHASH of the loop
133 # Note: For scheduling big cores we want to split the processing to happen over two loop iterations - otherwise the critical
134 # path latency dominates the performance
136 # This has a knock on effect on register pressure, so we have to be a bit more clever with our temporary registers
137 # than indicated here
138 # REV64 res_curr, res_curr
139 # INS t_m.d[0], res_curr.d[1]
140 # EOR t_m.8B, t_m.8B, res_curr.8B
141 # PMULL2 t_h, res_curr, HX
142 # PMULL t_l, res_curr, HX
143 # PMULL t_m, t_m, HX_k
144 # EOR acc_h, acc_h, t_h
145 # EOR acc_l, acc_l, t_l
146 # EOR acc_m, acc_m, t_m
148 # MODULO: take the partial accumulators (~representing sum of 256b multiplication results), from GHASH and do modulo reduction on them
149 # There is some complication here because the bit order of GHASH's PMULL is reversed compared to elsewhere, so we are doing modulo
150 # with a reversed constant
151 # EOR3 acc_m, acc_m, acc_l, acc_h // Finish off karatsuba processing
152 # PMULL t_mod, acc_h, mod_constant
153 # EXT acc_h, acc_h, acc_h, #8
154 # EOR3 acc_m, acc_m, t_mod, acc_h
155 # PMULL acc_h, acc_m, mod_constant
156 # EXT acc_m, acc_m, acc_m, #8
157 # EOR3 acc_l, acc_l, acc_m, acc_h
159 $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
160 $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
162 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
163 ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
164 ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate ) or
165 die "can't locate arm-xlate.pl";
167 die "only for 64 bit" if $flavour !~ /64/;
169 open OUT,"| \"$^X\" $xlate $flavour $output";
173 #include "arm_arch.h"
175 #if __ARM_MAX_ARCH__>=8
177 $code.=".arch armv8-a+crypto\n.text\n";
179 $input_ptr="x0"; #argument block
185 $constant_temp="x15";
186 $modulo_constant="x10";
189 my ($end_input_ptr,$main_end_input_ptr,$temp0_x,$temp1_x)=map("x$_",(4..7));
190 my ($temp2_x,$temp3_x)=map("x$_",(13..14));
191 my ($ctr0b,$ctr1b,$ctr2b,$ctr3b,$ctr4b,$ctr5b,$ctr6b,$ctr7b,$res0b,$res1b,$res2b,$res3b,$res4b,$res5b,$res6b,$res7b)=map("v$_.16b",(0..15));
192 my ($ctr0,$ctr1,$ctr2,$ctr3,$ctr4,$ctr5,$ctr6,$ctr7,$res0,$res1,$res2,$res3,$res4,$res5,$res6,$res7)=map("v$_",(0..15));
193 my ($ctr0d,$ctr1d,$ctr2d,$ctr3d,$ctr4d,$ctr5d,$ctr6d,$ctr7d)=map("d$_",(0..7));
194 my ($ctr0q,$ctr1q,$ctr2q,$ctr3q,$ctr4q,$ctr5q,$ctr6q,$ctr7q)=map("q$_",(0..7));
195 my ($res0q,$res1q,$res2q,$res3q,$res4q,$res5q,$res6q,$res7q)=map("q$_",(8..15));
197 my ($ctr_t0,$ctr_t1,$ctr_t2,$ctr_t3,$ctr_t4,$ctr_t5,$ctr_t6,$ctr_t7)=map("v$_",(8..15));
198 my ($ctr_t0b,$ctr_t1b,$ctr_t2b,$ctr_t3b,$ctr_t4b,$ctr_t5b,$ctr_t6b,$ctr_t7b)=map("v$_.16b",(8..15));
199 my ($ctr_t0q,$ctr_t1q,$ctr_t2q,$ctr_t3q,$ctr_t4q,$ctr_t5q,$ctr_t6q,$ctr_t7q)=map("q$_",(8..15));
201 my ($acc_hb,$acc_mb,$acc_lb)=map("v$_.16b",(17..19));
202 my ($acc_h,$acc_m,$acc_l)=map("v$_",(17..19));
204 my ($h1,$h12k,$h2,$h3,$h34k,$h4)=map("v$_",(20..25));
205 my ($h5,$h56k,$h6,$h7,$h78k,$h8)=map("v$_",(20..25));
206 my ($h1q,$h12kq,$h2q,$h3q,$h34kq,$h4q)=map("q$_",(20..25));
207 my ($h5q,$h56kq,$h6q,$h7q,$h78kq,$h8q)=map("q$_",(20..25));
233 my $mod_constantd=$t0d;
234 my $mod_constant=$t0;
236 my ($rk0,$rk1,$rk2)=map("v$_.16b",(26..28));
237 my ($rk3,$rk4,$rk5)=map("v$_.16b",(26..28));
238 my ($rk6,$rk7,$rk8)=map("v$_.16b",(26..28));
239 my ($rk9,$rk10,$rk11)=map("v$_.16b",(26..28));
240 my ($rk12,$rk13,$rk14)=map("v$_.16b",(26..28));
241 my ($rk0q,$rk1q,$rk2q)=map("q$_",(26..28));
242 my ($rk3q,$rk4q,$rk5q)=map("q$_",(26..28));
243 my ($rk6q,$rk7q,$rk8q)=map("q$_",(26..28));
244 my ($rk9q,$rk10q,$rk11q)=map("q$_",(26..28));
245 my ($rk12q,$rk13q,$rk14q)=map("q$_",(26..28));
251 #########################################################################################
252 # size_t unroll8_eor3_aes_gcm_enc_128_kernel(const uint8_t * plaintext,
253 # uint64_t plaintext_length,
254 # uint8_t * ciphertext,
256 # unsigned char ivec[16],
260 .global unroll8_eor3_aes_gcm_enc_128_kernel
261 .type unroll8_eor3_aes_gcm_enc_128_kernel,%function
263 unroll8_eor3_aes_gcm_enc_128_kernel:
264 AARCH64_VALID_CALL_TARGET
265 cbz x1, .L128_enc_ret
266 stp d8, d9, [sp, #-80]!
267 lsr $byte_length, $bit_length, #3
270 stp d10, d11, [sp, #16]
271 stp d12, d13, [sp, #32]
272 stp d14, d15, [sp, #48]
273 mov x5, #0xc200000000000000
274 stp x5, xzr, [sp, #64]
275 add $modulo_constant, sp, #64
277 mov $constant_temp, #0x100000000 @ set up counter increment
278 movi $rctr_inc.16b, #0x0
279 mov $rctr_inc.d[1], $constant_temp
280 mov $main_end_input_ptr, $byte_length
281 ld1 { $ctr0b}, [$counter] @ CTR block 0
283 sub $main_end_input_ptr, $main_end_input_ptr, #1 @ byte_len - 1
285 and $main_end_input_ptr, $main_end_input_ptr, #0xffffffffffffff80 @ number of bytes to be processed in main loop (at least 1 byte must be handled by tail)
287 rev32 $rtmp_ctr.16b, $ctr0.16b @ set up reversed counter
289 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 0
291 rev32 $ctr1.16b, $rtmp_ctr.16b @ CTR block 1
292 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 1
294 rev32 $ctr2.16b, $rtmp_ctr.16b @ CTR block 2
295 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 2
297 rev32 $ctr3.16b, $rtmp_ctr.16b @ CTR block 3
298 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 3
300 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 4
301 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 4
303 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 5
304 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 5
305 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1
307 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 6
308 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 6
310 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 7
311 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 7
313 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 0
314 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 0
315 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 0
317 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 0
318 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 0
319 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 0
321 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 0
322 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 0
323 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3
325 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 1
327 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 1
328 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 1
329 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 1
331 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 1
332 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 1
333 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 1
335 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 2
336 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 1
337 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 2
339 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 2
340 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 2
341 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 2
343 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 2
344 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 2
345 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 2
347 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 3
349 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5
350 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 3
351 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 3
353 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 3
354 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 3
355 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 3
357 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 3
359 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 4
360 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 3
361 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 4
363 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 4
364 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 4
365 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 4
367 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 4
368 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 4
369 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 4
371 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 5
372 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 5
373 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7
375 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 5
376 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 5
377 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 5
379 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 5
380 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 5
381 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 5
383 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 6
384 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 6
385 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 6
387 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 6
388 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 6
389 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 6
391 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 6
392 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 6
393 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9
395 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 7
397 ld1 { $acc_lb}, [$current_tag]
398 ext $acc_lb, $acc_lb, $acc_lb, #8
399 rev64 $acc_lb, $acc_lb
401 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 7
403 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 7
404 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 7
405 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 7
407 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 7
408 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 7
409 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 7
411 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 8
412 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 8
413 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 8
415 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 8
416 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 8
417 ldr $rk10q, [$cc, #160] @ load rk10
419 aese $ctr3b, $rk9 @ AES block 8k+11 - round 9
420 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 8
421 aese $ctr2b, $rk9 @ AES block 8k+10 - round 9
423 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 8
424 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 8
425 aese $ctr6b, $rk9 @ AES block 8k+14 - round 9
427 aese $ctr4b, $rk9 @ AES block 8k+12 - round 9
428 add $main_end_input_ptr, $main_end_input_ptr, $input_ptr
429 aese $ctr0b, $rk9 @ AES block 8k+8 - round 9
431 aese $ctr7b, $rk9 @ AES block 8k+15 - round 9
432 aese $ctr5b, $rk9 @ AES block 8k+13 - round 9
433 aese $ctr1b, $rk9 @ AES block 8k+9 - round 9
435 add $end_input_ptr, $input_ptr, $bit_length, lsr #3 @ end_input_ptr
436 cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks
437 b.ge .L128_enc_tail @ handle tail
439 ldp $ctr_t0q, $ctr_t1q, [$input_ptr], #32 @ AES block 0, 1 - load plaintext
441 ldp $ctr_t2q, $ctr_t3q, [$input_ptr], #32 @ AES block 2, 3 - load plaintext
443 ldp $ctr_t4q, $ctr_t5q, [$input_ptr], #32 @ AES block 4, 5 - load plaintext
445 ldp $ctr_t6q, $ctr_t7q, [$input_ptr], #32 @ AES block 6, 7 - load plaintext
446 cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks
448 eor3 $res0b, $ctr_t0b, $ctr0b, $rk10 @ AES block 0 - result
449 rev32 $ctr0.16b, $rtmp_ctr.16b @ CTR block 8
450 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8
452 eor3 $res1b, $ctr_t1b, $ctr1b, $rk10 @ AES block 1 - result
453 stp $res0q, $res1q, [$output_ptr], #32 @ AES block 0, 1 - store result
455 rev32 $ctr1.16b, $rtmp_ctr.16b @ CTR block 9
456 eor3 $res5b, $ctr_t5b, $ctr5b, $rk10 @ AES block 5 - result
457 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 9
459 eor3 $res2b, $ctr_t2b, $ctr2b, $rk10 @ AES block 2 - result
460 eor3 $res6b, $ctr_t6b, $ctr6b, $rk10 @ AES block 6 - result
461 eor3 $res4b, $ctr_t4b, $ctr4b, $rk10 @ AES block 4 - result
463 rev32 $ctr2.16b, $rtmp_ctr.16b @ CTR block 10
464 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 10
466 eor3 $res3b, $ctr_t3b, $ctr3b, $rk10 @ AES block 3 - result
467 eor3 $res7b, $ctr_t7b, $ctr7b,$rk10 @ AES block 7 - result
468 stp $res2q, $res3q, [$output_ptr], #32 @ AES block 2, 3 - store result
470 rev32 $ctr3.16b, $rtmp_ctr.16b @ CTR block 11
471 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 11
472 stp $res4q, $res5q, [$output_ptr], #32 @ AES block 4, 5 - store result
474 stp $res6q, $res7q, [$output_ptr], #32 @ AES block 6, 7 - store result
476 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 12
477 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 12
478 b.ge .L128_enc_prepretail @ do prepretail
480 .L128_enc_main_loop: @ main loop start
481 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 8k+13
482 ldr $h5q, [$current_tag, #128] @ load h5l | h5h
483 ext $h5.16b, $h5.16b, $h5.16b, #8
484 ldr $h6q, [$current_tag, #160] @ load h6l | h6h
485 ext $h6.16b, $h6.16b, $h6.16b, #8
486 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+13
488 rev64 $res1b, $res1b @ GHASH block 8k+1
489 rev64 $res0b, $res0b @ GHASH block 8k
490 ldr $h7q, [$current_tag, #176] @ load h7l | h7h
491 ext $h7.16b, $h7.16b, $h7.16b, #8
492 ldr $h8q, [$current_tag, #208] @ load h8l | h8h
493 ext $h8.16b, $h8.16b, $h8.16b, #8
495 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 8k+14
496 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+14
497 ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0
499 ldr $h56kq, [$current_tag, #144] @ load h6k | h5k
500 ldr $h78kq, [$current_tag, #192] @ load h8k | h7k
501 rev64 $res5b, $res5b @ GHASH block 8k+5 (t0, t1, t2 and t3 free)
502 rev64 $res3b, $res3b @ GHASH block 8k+3
504 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1
505 eor $res0b, $res0b, $acc_lb @ PRE 1
506 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 8k+15
508 rev64 $res7b, $res7b @ GHASH block 8k+7 (t0, t1, t2 and t3 free)
510 pmull2 $t0.1q, $res1.2d, $h7.2d @ GHASH block 8k+1 - high
511 rev64 $res2b, $res2b @ GHASH block 8k+2
512 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH block 8k - high
514 pmull $h7.1q, $res1.1d, $h7.1d @ GHASH block 8k+1 - low
515 trn1 $acc_m.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid
516 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH block 8k - low
518 trn2 $res0.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid
519 pmull2 $t1.1q, $res2.2d, $h6.2d @ GHASH block 8k+2 - high
520 pmull2 $t2.1q, $res3.2d, $h5.2d @ GHASH block 8k+3 - high
522 eor $acc_lb, $acc_lb, $h7.16b @ GHASH block 8k+1 - low
523 ldr $h3q, [$current_tag, #80] @ load h3l | h3h
524 ext $h3.16b, $h3.16b, $h3.16b, #8
525 ldr $h4q, [$current_tag, #112] @ load h3l | h3h
526 ext $h4.16b, $h4.16b, $h4.16b, #8
527 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 0
529 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 0
530 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 0
531 eor $acc_hb, $acc_hb, $t0.16b @ GHASH block 8k+1 - high
533 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+15
534 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 0
535 eor $res0.16b, $res0.16b, $acc_m.16b @ GHASH block 8k, 8k+1 - mid
537 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 0
538 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 1
539 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 0
541 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 1
542 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 0
543 pmull $h6.1q, $res2.1d, $h6.1d @ GHASH block 8k+2 - low
545 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 1
546 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 0
547 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 1
549 eor3 $acc_hb, $acc_hb, $t1.16b,$t2.16b @ GHASH block 8k+2, 8k+3 - high
550 trn1 $t3.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid
551 trn2 $res2.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid
553 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3
554 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 1
555 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 1
557 pmull $h5.1q, $res3.1d, $h5.1d @ GHASH block 8k+3 - low
558 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 1
559 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 1
561 pmull2 $acc_m.1q, $res0.2d, $h78k.2d @ GHASH block 8k - mid
562 eor $res2.16b, $res2.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid
563 pmull $h78k.1q, $res0.1d, $h78k.1d @ GHASH block 8k+1 - mid
565 rev64 $res6b, $res6b @ GHASH block 8k+6 (t0, t1, and t2 free)
566 eor3 $acc_lb, $acc_lb, $h6.16b, $h5.16b @ GHASH block 8k+2, 8k+3 - low
568 pmull2 $t3.1q, $res2.2d, $h56k.2d @ GHASH block 8k+2 - mid
569 eor $acc_mb, $acc_mb, $h78k.16b @ GHASH block 8k+1 - mid
570 pmull $h56k.1q, $res2.1d, $h56k.1d @ GHASH block 8k+3 - mid
572 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 2
573 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 2
574 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 2
576 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 2
577 eor3 $acc_mb, $acc_mb, $h56k.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid
578 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 2
580 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 2
581 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 2
582 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 2
584 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 3
585 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k
586 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k
587 rev64 $res4b, $res4b @ GHASH block 8k+4 (t0, t1, and t2 free)
589 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5
590 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 3
591 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 3
593 ldr $h1q, [$current_tag, #32] @ load h1l | h1h
594 ext $h1.16b, $h1.16b, $h1.16b, #8
595 ldr $h2q, [$current_tag, #64] @ load h1l | h1h
596 ext $h2.16b, $h2.16b, $h2.16b, #8
597 pmull2 $t4.1q, $res4.2d, $h4.2d @ GHASH block 8k+4 - high
598 pmull $h4.1q, $res4.1d, $h4.1d @ GHASH block 8k+4 - low
600 trn1 $t6.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid
601 trn2 $res4.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid
603 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 3
604 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 3
606 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 3
607 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 3
609 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 3
610 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 4
612 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 4
613 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 4
614 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 4
616 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 4
617 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 4
618 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 4
620 pmull2 $t5.1q, $res5.2d, $h3.2d @ GHASH block 8k+5 - high
621 eor $res4.16b, $res4.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid
622 pmull $h3.1q, $res5.1d, $h3.1d @ GHASH block 8k+5 - low
624 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 4
625 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7
626 trn1 $t9.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid
628 pmull2 $t6.1q, $res4.2d, $h34k.2d @ GHASH block 8k+4 - mid
629 pmull $h34k.1q, $res4.1d, $h34k.1d @ GHASH block 8k+5 - mid
630 pmull2 $t7.1q, $res6.2d, $h2.2d @ GHASH block 8k+6 - high
632 pmull2 $t8.1q, $res7.2d, $h1.2d @ GHASH block 8k+7 - high
633 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 5
634 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 5
636 pmull $h2.1q, $res6.1d, $h2.1d @ GHASH block 8k+6 - low
637 eor3 $acc_hb, $acc_hb, $t4.16b, $t5.16b @ GHASH block 8k+4, 8k+5 - high
638 trn2 $res6.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid
640 eor3 $acc_lb, $acc_lb, $h4.16b, $h3.16b @ GHASH block 8k+4, 8k+5 - low
641 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 5
643 eor $res6.16b, $res6.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid
644 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 5
645 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 5
647 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 5
648 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 5
649 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 5
651 eor3 $acc_mb, $acc_mb, $h34k.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid
652 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant
653 pmull $h1.1q, $res7.1d, $h1.1d @ GHASH block 8k+7 - low
655 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 6
656 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 6
658 pmull2 $t9.1q, $res6.2d, $h12k.2d @ GHASH block 8k+6 - mid
659 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 6
660 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 6
662 pmull $h12k.1q, $res6.1d, $h12k.1d @ GHASH block 8k+7 - mid
663 eor3 $acc_lb, $acc_lb, $h2.16b, $h1.16b @ GHASH block 8k+6, 8k+7 - low
664 ldp $ctr_t0q, $ctr_t1q, [$input_ptr], #32 @ AES block 8k+8, 8k+9 - load plaintext
666 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 6
667 rev32 $h1.16b, $rtmp_ctr.16b @ CTR block 8k+16
668 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+16
670 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 6
671 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 6
672 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 6
674 eor3 $acc_mb, $acc_mb, $h12k.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid
675 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9
676 eor3 $acc_hb, $acc_hb, $t7.16b, $t8.16b @ GHASH block 8k+6, 8k+7 - high
678 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 7
679 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 7
680 ldp $ctr_t2q, $ctr_t3q, [$input_ptr], #32 @ AES block 8k+10, 8k+11 - load plaintext
682 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 7
683 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 7
684 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 7
686 pmull $t11.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid
687 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 7
688 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 7
690 rev32 $h2.16b, $rtmp_ctr.16b @ CTR block 8k+17
691 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 7
693 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 8
694 ldp $ctr_t4q, $ctr_t5q, [$input_ptr], #32 @ AES block 8k+12, 8k+13 - load plaintext
695 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+17
697 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 8
698 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 8
699 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 8
701 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 8
702 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up
703 ldr $rk10q, [$cc, #160] @ load rk10
705 ext $t12.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment
706 rev32 $h3.16b, $rtmp_ctr.16b @ CTR block 8k+18
707 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+18
708 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 8
710 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 8
711 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid
712 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 8
714 aese $ctr2b, $rk9 @ AES block 8k+10 - round 9
715 aese $ctr4b, $rk9 @ AES block 8k+12 - round 9
716 aese $ctr1b, $rk9 @ AES block 8k+9 - round 9
718 ldp $ctr_t6q, $ctr_t7q, [$input_ptr], #32 @ AES block 8k+14, 8k+15 - load plaintext
719 rev32 $h4.16b, $rtmp_ctr.16b @ CTR block 8k+19
720 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+19
722 cmp $input_ptr, $main_end_input_ptr @ LOOP CONTROL
723 eor3 $res4b, $ctr_t4b, $ctr4b, $rk10 @ AES block 4 - result
724 aese $ctr7b, $rk9 @ AES block 8k+15 - round 9
726 aese $ctr6b, $rk9 @ AES block 8k+14 - round 9
727 aese $ctr3b, $rk9 @ AES block 8k+11 - round 9
729 eor3 $res2b, $ctr_t2b, $ctr2b, $rk10 @ AES block 8k+10 - result
731 mov $ctr2.16b, $h3.16b @ CTR block 8k+18
732 aese $ctr0b, $rk9 @ AES block 8k+8 - round 9
734 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 8k+20
735 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+20
737 eor3 $res7b, $ctr_t7b, $ctr7b, $rk10 @ AES block 7 - result
738 aese $ctr5b, $rk9 @ AES block 8k+13 - round 9
739 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low
741 eor3 $res1b, $ctr_t1b, $ctr1b, $rk10 @ AES block 8k+9 - result
742 eor3 $res3b, $ctr_t3b, $ctr3b, $rk10 @ AES block 8k+11 - result
743 mov $ctr3.16b, $h4.16b @ CTR block 8k+19
745 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment
746 eor3 $res5b, $ctr_t5b, $ctr5b, $rk10 @ AES block 5 - result
747 mov $ctr1.16b, $h2.16b @ CTR block 8k+17
749 eor3 $res0b, $ctr_t0b, $ctr0b, $rk10 @ AES block 8k+8 - result
750 mov $ctr0.16b, $h1.16b @ CTR block 8k+16
751 stp $res0q, $res1q, [$output_ptr], #32 @ AES block 8k+8, 8k+9 - store result
753 stp $res2q, $res3q, [$output_ptr], #32 @ AES block 8k+10, 8k+11 - store result
754 eor3 $res6b, $ctr_t6b, $ctr6b, $rk10 @ AES block 6 - result
756 stp $res4q, $res5q, [$output_ptr], #32 @ AES block 8k+12, 8k+13 - store result
757 eor3 $acc_lb, $acc_lb, $acc_hb, $t11.16b @ MODULO - fold into low
759 stp $res6q, $res7q, [$output_ptr], #32 @ AES block 8k+14, 8k+15 - store result
760 b.lt .L128_enc_main_loop
762 .L128_enc_prepretail: @ PREPRETAIL
763 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 8k+13
764 ldr $h7q, [$current_tag, #176] @ load h7l | h7h
765 ext $h7.16b, $h7.16b, $h7.16b, #8
766 ldr $h8q, [$current_tag, #208] @ load h8l | h8h
767 ext $h8.16b, $h8.16b, $h8.16b, #8
768 ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0
770 ldr $h5q, [$current_tag, #128] @ load h5l | h5h
771 ext $h5.16b, $h5.16b, $h5.16b, #8
772 ldr $h6q, [$current_tag, #160] @ load h6l | h6h
773 ext $h6.16b, $h6.16b, $h6.16b, #8
774 rev64 $res0b, $res0b @ GHASH block 8k
775 rev64 $res1b, $res1b @ GHASH block 8k+1
777 ldr $h56kq, [$current_tag, #144] @ load h6k | h5k
778 ldr $h78kq, [$current_tag, #192] @ load h6k | h5k
779 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+13
780 rev64 $res3b, $res3b @ GHASH block 8k+3
782 rev64 $res2b, $res2b @ GHASH block 8k+2
783 eor $res0b, $res0b, $acc_lb @ PRE 1
785 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 8k+14
787 pmull2 $t0.1q, $res1.2d, $h7.2d @ GHASH block 8k+1 - high
788 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH block 8k - low
789 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH block 8k - high
791 rev64 $res5b, $res5b @ GHASH block 8k+5 (t0, t1, t2 and t3 free)
792 trn1 $acc_m.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid
794 pmull $h7.1q, $res1.1d, $h7.1d @ GHASH block 8k+1 - low
795 eor $acc_hb, $acc_hb, $t0.16b @ GHASH block 8k+1 - high
796 trn2 $res0.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid
798 eor $acc_lb, $acc_lb, $h7.16b @ GHASH block 8k+1 - low
799 eor $res0.16b, $res0.16b, $acc_m.16b @ GHASH block 8k, 8k+1 - mid
801 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1
802 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+14
804 pmull2 $acc_m.1q, $res0.2d, $h78k.2d @ GHASH block 8k - mid
805 pmull $h78k.1q, $res0.1d, $h78k.1d @ GHASH block 8k+1 - mid
807 rev64 $res4b, $res4b @ GHASH block 8k+4 (t0, t1, and t2 free)
808 rev64 $res7b, $res7b @ GHASH block 8k+7 (t0, t1, t2 and t3 free)
810 eor $acc_mb, $acc_mb, $h78k.16b @ GHASH block 8k+1 - mid
812 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 8k+15
814 rev64 $res6b, $res6b @ GHASH block 8k+6 (t0, t1, and t2 free)
816 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 0
818 pmull2 $t2.1q, $res3.2d, $h5.2d @ GHASH block 8k+3 - high
819 pmull2 $t1.1q, $res2.2d, $h6.2d @ GHASH block 8k+2 - high
821 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 0
822 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 0
824 pmull $h6.1q, $res2.1d, $h6.1d @ GHASH block 8k+2 - low
825 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 0
827 eor3 $acc_hb, $acc_hb, $t1.16b, $t2.16b @ GHASH block 8k+2, 8k+3 - high
828 trn1 $t3.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid
829 trn2 $res2.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid
831 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 0
832 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 0
834 eor $res2.16b, $res2.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid
835 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 0
836 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 0
838 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 1
839 pmull $h5.1q, $res3.1d, $h5.1d @ GHASH block 8k+3 - low
841 ldr $h3q, [$current_tag, #80] @ load h3l | h3h
842 ext $h3.16b, $h3.16b, $h3.16b, #8
843 ldr $h4q, [$current_tag, #112] @ load h4l | h4h
844 ext $h4.16b, $h4.16b, $h4.16b, #8
846 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3
847 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 1
848 pmull2 $t3.1q, $res2.2d, $h56k.2d @ GHASH block 8k+2 - mid
850 eor3 $acc_lb, $acc_lb, $h6.16b, $h5.16b @ GHASH block 8k+2, 8k+3 - low
851 pmull $h56k.1q, $res2.1d, $h56k.1d @ GHASH block 8k+3 - mid
853 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 1
854 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 1
856 eor3 $acc_mb, $acc_mb, $h56k.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid
857 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k
858 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k
859 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 1
861 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 1
862 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 1
864 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 2
865 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 2
866 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 2
868 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 2
869 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 1
870 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 2
872 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 3
873 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 2
875 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 2
876 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 2
877 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5
879 ldr $h1q, [$current_tag, #32] @ load h1l | h1h
880 ext $h1.16b, $h1.16b, $h1.16b, #8
881 ldr $h2q, [$current_tag, #64] @ load h1l | h1h
882 ext $h2.16b, $h2.16b, $h2.16b, #8
883 trn1 $t6.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid
884 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 3
886 pmull2 $t4.1q, $res4.2d, $h4.2d @ GHASH block 8k+4 - high
887 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 3
888 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 3
890 pmull $h4.1q, $res4.1d, $h4.1d @ GHASH block 8k+4 - low
891 trn2 $res4.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid
892 pmull2 $t5.1q, $res5.2d, $h3.2d @ GHASH block 8k+5 - high
894 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 3
895 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+15
897 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 3
898 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 3
899 eor $res4.16b, $res4.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid
901 pmull $h3.1q, $res5.1d, $h3.1d @ GHASH block 8k+5 - low
902 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 3
903 pmull2 $t7.1q, $res6.2d, $h2.2d @ GHASH block 8k+6 - high
905 trn1 $t9.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid
906 pmull $h2.1q, $res6.1d, $h2.1d @ GHASH block 8k+6 - low
907 trn2 $res6.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid
909 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 4
910 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 4
911 eor3 $acc_hb, $acc_hb, $t4.16b, $t5.16b @ GHASH block 8k+4, 8k+5 - high
913 eor3 $acc_lb, $acc_lb, $h4.16b, $h3.16b @ GHASH block 8k+4, 8k+5 - low
914 eor $res6.16b, $res6.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid
915 pmull2 $t6.1q, $res4.2d, $h34k.2d @ GHASH block 8k+4 - mid
917 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 5
918 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 4
919 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 4
921 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 4
922 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 4
924 pmull $h34k.1q, $res4.1d, $h34k.1d @ GHASH block 8k+5 - mid
925 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 4
926 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 4
928 pmull2 $t8.1q, $res7.2d, $h1.2d @ GHASH block 8k+7 - high
929 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7
930 pmull $h1.1q, $res7.1d, $h1.1d @ GHASH block 8k+7 - low
932 eor3 $acc_mb, $acc_mb, $h34k.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid
933 pmull2 $t9.1q, $res6.2d, $h12k.2d @ GHASH block 8k+6 - mid
934 pmull $h12k.1q, $res6.1d, $h12k.1d @ GHASH block 8k+7 - mid
936 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 5
937 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 5
938 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant
940 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 5
941 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 5
943 eor3 $acc_hb, $acc_hb, $t7.16b, $t8.16b @ GHASH block 8k+6, 8k+7 - high
944 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 5
945 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 5
947 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 5
948 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 6
950 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 6
951 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 6
952 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 6
954 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 6
955 eor3 $acc_lb, $acc_lb, $h2.16b, $h1.16b @ GHASH block 8k+6, 8k+7 - low
956 eor3 $acc_mb, $acc_mb, $h12k.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid
958 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 6
959 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 6
960 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 6
962 pmull $t11.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid
963 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up
964 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9
966 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 7
967 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 7
968 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 7
969 ext $t12.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment
971 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 7
972 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 7
973 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid
975 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 7
976 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 7
978 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low
979 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 7
981 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 8
982 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 8
983 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 8
984 ext $acc_mb, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment
986 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 8
987 eor3 $acc_lb, $acc_lb, $acc_hb, $acc_mb @ MODULO - fold into low
988 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 8
990 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 8
991 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 8
992 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 8
994 ldr $rk10q, [$cc, #160] @ load rk10
995 aese $ctr6b, $rk9 @ AES block 8k+14 - round 9
996 aese $ctr2b, $rk9 @ AES block 8k+10 - round 9
998 aese $ctr0b, $rk9 @ AES block 8k+8 - round 9
999 aese $ctr1b, $rk9 @ AES block 8k+9 - round 9
1001 aese $ctr3b, $rk9 @ AES block 8k+11 - round 9
1002 aese $ctr5b, $rk9 @ AES block 8k+13 - round 9
1004 aese $ctr4b, $rk9 @ AES block 8k+12 - round 9
1005 aese $ctr7b, $rk9 @ AES block 8k+15 - round 9
1006 .L128_enc_tail: @ TAIL
1008 sub $main_end_input_ptr, $end_input_ptr, $input_ptr @ main_end_input_ptr is number of bytes left to process
1009 ldr $ctr_t0q, [$input_ptr], #16 @ AES block 8k+8 - load plaintext
1012 ldp $h5q, $h56kq, [$current_tag, #128] @ load h5l | h5h
1013 ext $h5.16b, $h5.16b, $h5.16b, #8
1015 eor3 $res1b, $ctr_t0b, $ctr0b, $t1.16b @ AES block 8k+8 - result
1016 ext $t0.16b, $acc_lb, $acc_lb, #8 @ prepare final partial tag
1017 ldp $h6q, $h7q, [$current_tag, #160] @ load h6l | h6h
1018 ext $h6.16b, $h6.16b, $h6.16b, #8
1019 ext $h7.16b, $h7.16b, $h7.16b, #8
1021 ldp $h78kq, $h8q, [$current_tag, #192] @ load h8k | h7k
1022 ext $h8.16b, $h8.16b, $h8.16b, #8
1023 cmp $main_end_input_ptr, #112
1024 b.gt .L128_enc_blocks_more_than_7
1030 cmp $main_end_input_ptr, #96
1031 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
1040 b.gt .L128_enc_blocks_more_than_6
1043 cmp $main_end_input_ptr, #80
1045 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
1051 b.gt .L128_enc_blocks_more_than_5
1053 cmp $main_end_input_ptr, #64
1054 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
1061 b.gt .L128_enc_blocks_more_than_4
1064 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
1068 cmp $main_end_input_ptr, #48
1069 b.gt .L128_enc_blocks_more_than_3
1071 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
1075 cmp $main_end_input_ptr, #32
1076 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k
1077 b.gt .L128_enc_blocks_more_than_2
1079 cmp $main_end_input_ptr, #16
1081 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
1083 b.gt .L128_enc_blocks_more_than_1
1085 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k
1086 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
1087 b .L128_enc_blocks_less_than_1
1088 .L128_enc_blocks_more_than_7: @ blocks left > 7
1089 st1 { $res1b}, [$output_ptr], #16 @ AES final-7 block - store result
1091 rev64 $res0b, $res1b @ GHASH final-7 block
1092 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-6 block - load plaintext
1094 eor $res0b, $res0b, $t0.16b @ feed in partial tag
1096 ins $rk4v.d[0], $res0.d[1] @ GHASH final-7 block - mid
1098 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH final-7 block - high
1100 ins $acc_m.d[0], $h78k.d[1] @ GHASH final-7 block - mid
1102 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-7 block - mid
1103 movi $t0.8b, #0 @ suppress further partial tag feed in
1105 eor3 $res1b, $ctr_t1b, $ctr1b, $t1.16b @ AES final-6 block - result
1107 pmull $acc_m.1q, $rk4v.1d, $acc_m.1d @ GHASH final-7 block - mid
1108 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH final-7 block - low
1109 .L128_enc_blocks_more_than_6: @ blocks left > 6
1111 st1 { $res1b}, [$output_ptr], #16 @ AES final-6 block - store result
1113 rev64 $res0b, $res1b @ GHASH final-6 block
1114 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-5 block - load plaintext
1116 eor $res0b, $res0b, $t0.16b @ feed in partial tag
1118 ins $rk4v.d[0], $res0.d[1] @ GHASH final-6 block - mid
1120 eor3 $res1b, $ctr_t1b, $ctr2b, $t1.16b @ AES final-5 block - result
1121 pmull $rk3q1, $res0.1d, $h7.1d @ GHASH final-6 block - low
1123 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-6 block - mid
1124 movi $t0.8b, #0 @ suppress further partial tag feed in
1126 pmull $rk4v.1q, $rk4v.1d, $h78k.1d @ GHASH final-6 block - mid
1127 pmull2 $rk2q1, $res0.2d, $h7.2d @ GHASH final-6 block - high
1129 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-6 block - low
1131 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-6 block - mid
1132 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-6 block - high
1133 .L128_enc_blocks_more_than_5: @ blocks left > 5
1135 st1 { $res1b}, [$output_ptr], #16 @ AES final-5 block - store result
1137 rev64 $res0b, $res1b @ GHASH final-5 block
1139 eor $res0b, $res0b, $t0.16b @ feed in partial tag
1141 ins $rk4v.d[0], $res0.d[1] @ GHASH final-5 block - mid
1142 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-4 block - load plaintext
1143 pmull2 $rk2q1, $res0.2d, $h6.2d @ GHASH final-5 block - high
1145 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-5 block - high
1147 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-5 block - mid
1149 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-5 block - mid
1151 eor3 $res1b, $ctr_t1b, $ctr3b, $t1.16b @ AES final-4 block - result
1152 pmull $rk3q1, $res0.1d, $h6.1d @ GHASH final-5 block - low
1153 movi $t0.8b, #0 @ suppress further partial tag feed in
1155 pmull2 $rk4v.1q, $rk4v.2d, $h56k.2d @ GHASH final-5 block - mid
1156 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-5 block - low
1158 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-5 block - mid
1159 .L128_enc_blocks_more_than_4: @ blocks left > 4
1161 st1 { $res1b}, [$output_ptr], #16 @ AES final-4 block - store result
1163 rev64 $res0b, $res1b @ GHASH final-4 block
1165 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-3 block - load plaintext
1167 eor $res0b, $res0b, $t0.16b @ feed in partial tag
1169 ins $rk4v.d[0], $res0.d[1] @ GHASH final-4 block - mid
1170 movi $t0.8b, #0 @ suppress further partial tag feed in
1171 pmull2 $rk2q1, $res0.2d, $h5.2d @ GHASH final-4 block - high
1173 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-4 block - mid
1175 pmull $rk3q1, $res0.1d, $h5.1d @ GHASH final-4 block - low
1177 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-4 block - high
1178 pmull $rk4v.1q, $rk4v.1d, $h56k.1d @ GHASH final-4 block - mid
1180 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-4 block - low
1182 eor3 $res1b, $ctr_t1b, $ctr4b, $t1.16b @ AES final-3 block - result
1183 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-4 block - mid
1184 .L128_enc_blocks_more_than_3: @ blocks left > 3
1186 st1 { $res1b}, [$output_ptr], #16 @ AES final-3 block - store result
1188 ldr $h4q, [$current_tag, #112] @ load h4l | h4h
1189 ext $h4.16b, $h4.16b, $h4.16b, #8
1191 rev64 $res0b, $res1b @ GHASH final-3 block
1193 eor $res0b, $res0b, $t0.16b @ feed in partial tag
1194 movi $t0.8b, #0 @ suppress further partial tag feed in
1196 ins $rk4v.d[0], $res0.d[1] @ GHASH final-3 block - mid
1197 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k
1198 pmull $rk3q1, $res0.1d, $h4.1d @ GHASH final-3 block - low
1200 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-2 block - load plaintext
1202 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-3 block - mid
1204 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-3 block - mid
1205 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-3 block - low
1207 eor3 $res1b, $ctr_t1b, $ctr5b, $t1.16b @ AES final-2 block - result
1209 pmull2 $rk4v.1q, $rk4v.2d, $h34k.2d @ GHASH final-3 block - mid
1210 pmull2 $rk2q1, $res0.2d, $h4.2d @ GHASH final-3 block - high
1212 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-3 block - mid
1213 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-3 block - high
1214 .L128_enc_blocks_more_than_2: @ blocks left > 2
1216 st1 { $res1b}, [$output_ptr], #16 @ AES final-2 block - store result
1218 rev64 $res0b, $res1b @ GHASH final-2 block
1220 eor $res0b, $res0b, $t0.16b @ feed in partial tag
1222 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-1 block - load plaintext
1224 ins $rk4v.d[0], $res0.d[1] @ GHASH final-2 block - mid
1225 ldr $h3q, [$current_tag, #80] @ load h3l | h3h
1226 ext $h3.16b, $h3.16b, $h3.16b, #8
1227 movi $t0.8b, #0 @ suppress further partial tag feed in
1229 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-2 block - mid
1230 eor3 $res1b, $ctr_t1b, $ctr6b, $t1.16b @ AES final-1 block - result
1232 pmull2 $rk2q1, $res0.2d, $h3.2d @ GHASH final-2 block - high
1234 pmull $rk3q1, $res0.1d, $h3.1d @ GHASH final-2 block - low
1235 pmull $rk4v.1q, $rk4v.1d, $h34k.1d @ GHASH final-2 block - mid
1237 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-2 block - high
1239 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-2 block - mid
1240 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-2 block - low
1241 .L128_enc_blocks_more_than_1: @ blocks left > 1
1243 st1 { $res1b}, [$output_ptr], #16 @ AES final-1 block - store result
1245 ldr $h2q, [$current_tag, #64] @ load h2l | h2h
1246 ext $h2.16b, $h2.16b, $h2.16b, #8
1247 rev64 $res0b, $res1b @ GHASH final-1 block
1248 ldr $ctr_t1q, [$input_ptr], #16 @ AES final block - load plaintext
1250 eor $res0b, $res0b, $t0.16b @ feed in partial tag
1252 movi $t0.8b, #0 @ suppress further partial tag feed in
1253 ins $rk4v.d[0], $res0.d[1] @ GHASH final-1 block - mid
1254 eor3 $res1b, $ctr_t1b, $ctr7b, $t1.16b @ AES final block - result
1256 pmull2 $rk2q1, $res0.2d, $h2.2d @ GHASH final-1 block - high
1258 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-1 block - mid
1260 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k
1262 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-1 block - mid
1264 pmull $rk3q1, $res0.1d, $h2.1d @ GHASH final-1 block - low
1265 pmull2 $rk4v.1q, $rk4v.2d, $h12k.2d @ GHASH final-1 block - mid
1267 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-1 block - high
1269 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-1 block - mid
1270 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-1 block - low
1271 .L128_enc_blocks_less_than_1: @ blocks left <= 1
1273 rev32 $rtmp_ctr.16b, $rtmp_ctr.16b
1274 str $rtmp_ctrq, [$counter] @ store the updated counter
1275 and $bit_length, $bit_length, #127 @ bit_length %= 128
1277 sub $bit_length, $bit_length, #128 @ bit_length -= 128
1279 neg $bit_length, $bit_length @ bit_length = 128 - #bits in input (in range [1,128])
1281 mvn $temp0_x, xzr @ temp0_x = 0xffffffffffffffff
1282 ld1 { $rk0}, [$output_ptr] @ load existing bytes where the possibly partial last block is to be stored
1283 and $bit_length, $bit_length, #127 @ bit_length %= 128
1285 lsr $temp0_x, $temp0_x, $bit_length @ temp0_x is mask for top 64b of last block
1286 mvn $temp1_x, xzr @ temp1_x = 0xffffffffffffffff
1287 cmp $bit_length, #64
1289 csel $temp2_x, $temp1_x, $temp0_x, lt
1290 csel $temp3_x, $temp0_x, xzr, lt
1292 mov $ctr0.d[1], $temp3_x
1293 mov $ctr0.d[0], $temp2_x @ ctr0b is mask for last block
1295 and $res1b, $res1b, $ctr0b @ possibly partial last block has zeroes in highest bits
1297 rev64 $res0b, $res1b @ GHASH final block
1299 bif $res1b, $rk0, $ctr0b @ insert existing bytes in top end of result before storing
1300 st1 { $res1b}, [$output_ptr] @ store all 16B
1302 eor $res0b, $res0b, $t0.16b @ feed in partial tag
1304 ins $t0.d[0], $res0.d[1] @ GHASH final block - mid
1306 eor $t0.8b, $t0.8b, $res0.8b @ GHASH final block - mid
1307 ldr $h1q, [$current_tag, #32] @ load h1l | h1h
1308 ext $h1.16b, $h1.16b, $h1.16b, #8
1310 pmull $t0.1q, $t0.1d, $h12k.1d @ GHASH final block - mid
1312 pmull2 $rk2q1, $res0.2d, $h1.2d @ GHASH final block - high
1313 eor $acc_mb, $acc_mb, $t0.16b @ GHASH final block - mid
1314 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant
1316 pmull $rk3q1, $res0.1d, $h1.1d @ GHASH final block - low
1318 eor $acc_hb, $acc_hb, $rk2 @ GHASH final block - high
1320 eor $acc_lb, $acc_lb, $rk3 @ GHASH final block - low
1322 ext $t11.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment
1323 pmull $t12.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid
1325 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up
1327 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid
1329 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low
1330 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment
1332 eor3 $acc_lb, $acc_lb, $acc_hb, $t11.16b @ MODULO - fold into low
1333 ext $acc_lb, $acc_lb, $acc_lb, #8
1334 rev64 $acc_lb, $acc_lb
1335 st1 { $acc_l.16b }, [$current_tag]
1336 mov x0, $byte_length
1338 ldp d10, d11, [sp, #16]
1339 ldp d12, d13, [sp, #32]
1340 ldp d14, d15, [sp, #48]
1341 ldp d8, d9, [sp], #80
1347 .size unroll8_eor3_aes_gcm_enc_128_kernel,.-unroll8_eor3_aes_gcm_enc_128_kernel
1350 #########################################################################################
1351 # size_t unroll8_eor3_aes_gcm_dec_128_kernel(const uint8_t * ciphertext,
1352 # uint64_t plaintext_length,
1353 # uint8_t * plaintext,
1355 # unsigned char ivec[16],
1359 .global unroll8_eor3_aes_gcm_dec_128_kernel
1360 .type unroll8_eor3_aes_gcm_dec_128_kernel,%function
1362 unroll8_eor3_aes_gcm_dec_128_kernel:
1363 AARCH64_VALID_CALL_TARGET
1364 cbz x1, .L128_dec_ret
1365 stp d8, d9, [sp, #-80]!
1366 lsr $byte_length, $bit_length, #3
1369 stp d10, d11, [sp, #16]
1370 stp d12, d13, [sp, #32]
1371 stp d14, d15, [sp, #48]
1372 mov x5, #0xc200000000000000
1373 stp x5, xzr, [sp, #64]
1374 add $modulo_constant, sp, #64
1376 mov $main_end_input_ptr, $byte_length
1377 ld1 { $ctr0b}, [$counter] @ CTR block 0
1379 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1
1380 sub $main_end_input_ptr, $main_end_input_ptr, #1 @ byte_len - 1
1382 mov $constant_temp, #0x100000000 @ set up counter increment
1383 movi $rctr_inc.16b, #0x0
1384 mov $rctr_inc.d[1], $constant_temp
1385 ld1 { $acc_lb}, [$current_tag]
1386 ext $acc_lb, $acc_lb, $acc_lb, #8
1387 rev64 $acc_lb, $acc_lb
1389 rev32 $rtmp_ctr.16b, $ctr0.16b @ set up reversed counter
1391 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 0
1393 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 0
1395 rev32 $ctr1.16b, $rtmp_ctr.16b @ CTR block 1
1396 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 1
1398 and $main_end_input_ptr, $main_end_input_ptr, #0xffffffffffffff80 @ number of bytes to be processed in main loop (at least 1 byte must be handled by tail)
1400 rev32 $ctr2.16b, $rtmp_ctr.16b @ CTR block 2
1401 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 2
1402 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 0
1404 rev32 $ctr3.16b, $rtmp_ctr.16b @ CTR block 3
1405 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 3
1407 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 1
1408 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 1
1410 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 4
1411 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 4
1413 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 5
1414 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 5
1416 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 0
1418 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 6
1419 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 6
1420 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 0
1422 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 0
1423 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 0
1425 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 7
1427 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 0
1428 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 1
1430 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 0
1432 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3
1434 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 1
1435 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 1
1437 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 1
1438 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 1
1440 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 2
1441 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 2
1442 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 1
1444 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 2
1445 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 2
1446 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 2
1448 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 2
1449 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 2
1450 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 2
1452 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 3
1453 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 3
1455 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5
1456 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 3
1458 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 3
1459 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 3
1461 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 3
1462 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 3
1464 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 4
1465 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 4
1466 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 3
1468 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 4
1469 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 4
1470 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 4
1472 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 4
1473 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 4
1474 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 4
1476 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7
1477 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 5
1478 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 5
1480 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 5
1481 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 5
1483 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 5
1484 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 5
1486 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 5
1488 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 6
1489 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 6
1490 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 5
1492 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 6
1493 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 6
1494 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 6
1496 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 6
1497 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 6
1498 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 6
1500 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 7
1501 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 7
1502 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 7
1504 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 7
1505 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 7
1506 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9
1508 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 7
1509 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 7
1510 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 7
1512 add $main_end_input_ptr, $main_end_input_ptr, $input_ptr
1513 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 7
1515 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 8
1516 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 8
1518 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 8
1519 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 8
1520 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 8
1522 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 8
1523 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 8
1524 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 8
1526 aese $ctr0b, $rk9 @ AES block 0 - round 9
1527 aese $ctr1b, $rk9 @ AES block 1 - round 9
1528 aese $ctr6b, $rk9 @ AES block 6 - round 9
1530 ldr $rk10q, [$cc, #160] @ load rk10
1531 aese $ctr4b, $rk9 @ AES block 4 - round 9
1532 aese $ctr3b, $rk9 @ AES block 3 - round 9
1534 aese $ctr2b, $rk9 @ AES block 2 - round 9
1535 aese $ctr5b, $rk9 @ AES block 5 - round 9
1536 aese $ctr7b, $rk9 @ AES block 7 - round 9
1538 add $end_input_ptr, $input_ptr, $bit_length, lsr #3 @ end_input_ptr
1539 cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks
1540 b.ge .L128_dec_tail @ handle tail
1542 ldp $res0q, $res1q, [$input_ptr], #32 @ AES block 0, 1 - load ciphertext
1544 eor3 $ctr0b, $res0b, $ctr0b, $rk10 @ AES block 0 - result
1545 eor3 $ctr1b, $res1b, $ctr1b, $rk10 @ AES block 1 - result
1546 stp $ctr0q, $ctr1q, [$output_ptr], #32 @ AES block 0, 1 - store result
1548 rev32 $ctr0.16b, $rtmp_ctr.16b @ CTR block 8
1549 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8
1550 ldp $res2q, $res3q, [$input_ptr], #32 @ AES block 2, 3 - load ciphertext
1552 ldp $res4q, $res5q, [$input_ptr], #32 @ AES block 4, 5 - load ciphertext
1554 rev32 $ctr1.16b, $rtmp_ctr.16b @ CTR block 9
1555 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 9
1556 ldp $res6q, $res7q, [$input_ptr], #32 @ AES block 6, 7 - load ciphertext
1558 eor3 $ctr3b, $res3b, $ctr3b, $rk10 @ AES block 3 - result
1559 eor3 $ctr2b, $res2b, $ctr2b, $rk10 @ AES block 2 - result
1560 stp $ctr2q, $ctr3q, [$output_ptr], #32 @ AES block 2, 3 - store result
1562 rev32 $ctr2.16b, $rtmp_ctr.16b @ CTR block 10
1563 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 10
1565 eor3 $ctr6b, $res6b, $ctr6b, $rk10 @ AES block 6 - result
1567 rev32 $ctr3.16b, $rtmp_ctr.16b @ CTR block 11
1568 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 11
1570 eor3 $ctr4b, $res4b, $ctr4b, $rk10 @ AES block 4 - result
1571 eor3 $ctr5b, $res5b, $ctr5b, $rk10 @ AES block 5 - result
1572 stp $ctr4q, $ctr5q, [$output_ptr], #32 @ AES block 4, 5 - store result
1574 eor3 $ctr7b, $res7b, $ctr7b, $rk10 @ AES block 7 - result
1575 stp $ctr6q, $ctr7q, [$output_ptr], #32 @ AES block 6, 7 - store result
1576 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 12
1578 cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks
1579 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 12
1580 b.ge .L128_dec_prepretail @ do prepretail
1582 .L128_dec_main_loop: @ main loop start
1583 ldr $h7q, [$current_tag, #176] @ load h7l | h7h
1584 ext $h7.16b, $h7.16b, $h7.16b, #8
1585 ldr $h8q, [$current_tag, #208] @ load h8l | h8h
1586 ext $h8.16b, $h8.16b, $h8.16b, #8
1588 rev64 $res1b, $res1b @ GHASH block 8k+1
1589 rev64 $res0b, $res0b @ GHASH block 8k
1590 ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0
1592 rev64 $res6b, $res6b @ GHASH block 8k+6
1593 ldr $h5q, [$current_tag, #128] @ load h5l | h5h
1594 ext $h5.16b, $h5.16b, $h5.16b, #8
1595 ldr $h6q, [$current_tag, #160] @ load h6l | h6h
1596 ext $h6.16b, $h6.16b, $h6.16b, #8
1598 eor $res0b, $res0b, $acc_lb @ PRE 1
1599 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 8k+13
1600 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+13
1602 rev64 $res2b, $res2b @ GHASH block 8k+2
1603 rev64 $res4b, $res4b @ GHASH block 8k+4
1604 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1
1606 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 8k+14
1607 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+14
1608 ldr $h56kq, [$current_tag, #144] @ load h6k | h5k
1609 ldr $h78kq, [$current_tag, #192] @ load h8k | h7k
1611 pmull2 $t0.1q, $res1.2d, $h7.2d @ GHASH block 8k+1 - high
1612 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH block 8k - high
1613 rev64 $res3b, $res3b @ GHASH block 8k+3
1615 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 8k+15
1616 trn1 $acc_m.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid
1617 rev64 $res5b, $res5b @ GHASH block 8k+5
1619 pmull $h7.1q, $res1.1d, $h7.1d @ GHASH block 8k+1 - low
1620 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH block 8k - low
1621 trn2 $res0.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid
1623 pmull2 $t1.1q, $res2.2d, $h6.2d @ GHASH block 8k+2 - high
1624 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 0
1625 pmull2 $t2.1q, $res3.2d, $h5.2d @ GHASH block 8k+3 - high
1627 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 0
1628 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 0
1629 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 0
1631 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 0
1632 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 0
1633 eor $acc_hb, $acc_hb, $t0.16b @ GHASH block 8k+1 - high
1635 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 0
1636 eor $res0.16b, $res0.16b, $acc_m.16b @ GHASH block 8k, 8k+1 - mid
1637 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 0
1639 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 1
1640 eor $acc_lb, $acc_lb, $h7.16b @ GHASH block 8k+1 - low
1641 eor3 $acc_hb, $acc_hb, $t1.16b, $t2.16b @ GHASH block 8k+2, 8k+3 - high
1643 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3
1644 trn1 $t3.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid
1645 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 1
1647 pmull $h6.1q, $res2.1d, $h6.1d @ GHASH block 8k+2 - low
1648 trn2 $res2.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid
1649 pmull2 $acc_m.1q, $res0.2d, $h78k.2d @ GHASH block 8k - mid
1651 ldr $h3q, [$current_tag, #80] @ load h3l | h3h
1652 ext $h3.16b, $h3.16b, $h3.16b, #8
1653 ldr $h4q, [$current_tag, #112] @ load h4l | h4h
1654 ext $h4.16b, $h4.16b, $h4.16b, #8
1655 pmull $h78k.1q, $res0.1d, $h78k.1d @ GHASH block 8k+1 - mid
1656 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 1
1658 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 1
1659 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 1
1660 pmull $h5.1q, $res3.1d, $h5.1d @ GHASH block 8k+3 - low
1662 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 1
1663 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 1
1664 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 1
1666 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 2
1667 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 2
1668 eor3 $acc_lb, $acc_lb, $h6.16b, $h5.16b @ GHASH block 8k+2, 8k+3 - low
1670 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 2
1671 eor $res2.16b, $res2.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid
1672 ldr $h1q, [$current_tag, #32] @ load h1l | h1h
1673 ext $h1.16b, $h1.16b, $h1.16b, #8
1674 ldr $h2q, [$current_tag, #64] @ load h2l | h2h
1675 ext $h2.16b, $h2.16b, $h2.16b, #8
1677 eor $acc_mb, $acc_mb, $h78k.16b @ GHASH block 8k+1 - mid
1678 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 2
1679 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 2
1681 trn1 $t6.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid
1682 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 2
1683 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 2
1685 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 2
1686 pmull2 $t3.1q, $res2.2d, $h56k.2d @ GHASH block 8k+2 - mid
1687 pmull $h56k.1q, $res2.1d, $h56k.1d @ GHASH block 8k+3 - mid
1689 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 3
1690 rev64 $res7b, $res7b @ GHASH block 8k+7
1691 pmull2 $t4.1q, $res4.2d, $h4.2d @ GHASH block 8k+4 - high
1693 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5
1694 pmull $h4.1q, $res4.1d, $h4.1d @ GHASH block 8k+4 - low
1695 eor3 $acc_mb, $acc_mb, $h56k.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid
1697 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k
1698 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k
1699 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 3
1700 trn2 $res4.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid
1702 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 3
1703 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 3
1704 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 3
1706 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 3
1707 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 3
1708 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 3
1710 pmull2 $t5.1q, $res5.2d, $h3.2d @ GHASH block 8k+5 - high
1711 pmull $h3.1q, $res5.1d, $h3.1d @ GHASH block 8k+5 - low
1712 pmull2 $t7.1q, $res6.2d, $h2.2d @ GHASH block 8k+6 - high
1714 pmull $h2.1q, $res6.1d, $h2.1d @ GHASH block 8k+6 - low
1715 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 4
1716 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 4
1718 eor $res4.16b, $res4.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid
1719 trn1 $t9.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid
1720 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 4
1722 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 4
1723 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 4
1724 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 4
1726 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 4
1727 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 4
1728 trn2 $res6.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid
1730 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7
1731 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 5
1732 pmull2 $t6.1q, $res4.2d, $h34k.2d @ GHASH block 8k+4 - mid
1734 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 5
1735 eor $res6.16b, $res6.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid
1736 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 5
1738 pmull $h34k.1q, $res4.1d, $h34k.1d @ GHASH block 8k+5 - mid
1739 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 5
1740 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 5
1742 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 5
1743 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 5
1744 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 5
1746 pmull2 $t8.1q, $res7.2d, $h1.2d @ GHASH block 8k+7 - high
1747 eor3 $acc_mb, $acc_mb, $h34k.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid
1748 eor3 $acc_lb, $acc_lb, $h4.16b, $h3.16b @ GHASH block 8k+4, 8k+5 - low
1750 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 6
1751 eor3 $acc_hb, $acc_hb, $t4.16b, $t5.16b @ GHASH block 8k+4, 8k+5 - high
1752 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 6
1754 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 6
1755 pmull2 $t9.1q, $res6.2d, $h12k.2d @ GHASH block 8k+6 - mid
1756 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 6
1758 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 6
1759 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 6
1760 pmull $h1.1q, $res7.1d, $h1.1d @ GHASH block 8k+7 - low
1762 pmull $h12k.1q, $res6.1d, $h12k.1d @ GHASH block 8k+7 - mid
1763 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 6
1764 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+15
1766 eor3 $acc_hb, $acc_hb, $t7.16b, $t8.16b @ GHASH block 8k+6, 8k+7 - high
1767 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 6
1768 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9
1770 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant
1771 eor3 $acc_lb, $acc_lb, $h2.16b, $h1.16b @ GHASH block 8k+6, 8k+7 - low
1772 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 7
1774 rev32 $h1.16b, $rtmp_ctr.16b @ CTR block 8k+16
1775 eor3 $acc_mb, $acc_mb, $h12k.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid
1776 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+16
1778 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 7
1779 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 7
1780 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 7
1782 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 7
1783 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 7
1784 rev32 $h2.16b, $rtmp_ctr.16b @ CTR block 8k+17
1786 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 7
1787 ext $t11.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment
1788 pmull $t12.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid
1790 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up
1791 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 7
1792 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+17
1794 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 8
1795 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 8
1796 ldp $res0q, $res1q, [$input_ptr], #32 @ AES block 8k+8, 8k+9 - load ciphertext
1798 ldp $res2q, $res3q, [$input_ptr], #32 @ AES block 8k+10, 8k+11 - load ciphertext
1799 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 8
1800 rev32 $h3.16b, $rtmp_ctr.16b @ CTR block 8k+18
1802 ldp $res4q, $res5q, [$input_ptr], #32 @ AES block 8k+12, 8k+13 - load ciphertext
1803 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 8
1804 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid
1806 ldp $res6q, $res7q, [$input_ptr], #32 @ AES block 8k+14, 8k+15 - load ciphertext
1807 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 8
1808 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+18
1810 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 8
1811 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 8
1812 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 8
1814 aese $ctr0b, $rk9 @ AES block 8k+8 - round 9
1815 aese $ctr1b, $rk9 @ AES block 8k+9 - round 9
1816 ldr $rk10q, [$cc, #160] @ load rk10
1818 aese $ctr6b, $rk9 @ AES block 8k+14 - round 9
1819 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low
1820 aese $ctr2b, $rk9 @ AES block 8k+10 - round 9
1822 aese $ctr7b, $rk9 @ AES block 8k+15 - round 9
1823 aese $ctr4b, $rk9 @ AES block 8k+12 - round 9
1824 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment
1826 rev32 $h4.16b, $rtmp_ctr.16b @ CTR block 8k+19
1827 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+19
1829 aese $ctr3b, $rk9 @ AES block 8k+11 - round 9
1830 aese $ctr5b, $rk9 @ AES block 8k+13 - round 9
1831 eor3 $ctr1b, $res1b, $ctr1b, $rk10 @ AES block 8k+9 - result
1833 eor3 $ctr0b, $res0b, $ctr0b, $rk10 @ AES block 8k+8 - result
1834 eor3 $ctr7b, $res7b, $ctr7b, $rk10 @ AES block 8k+15 - result
1835 eor3 $ctr6b, $res6b, $ctr6b, $rk10 @ AES block 8k+14 - result
1837 eor3 $ctr2b, $res2b, $ctr2b, $rk10 @ AES block 8k+10 - result
1838 stp $ctr0q, $ctr1q, [$output_ptr], #32 @ AES block 8k+8, 8k+9 - store result
1839 mov $ctr1.16b, $h2.16b @ CTR block 8k+17
1841 eor3 $ctr4b, $res4b, $ctr4b, $rk10 @ AES block 8k+12 - result
1842 eor3 $acc_lb, $acc_lb, $acc_hb, $t11.16b @ MODULO - fold into low
1843 mov $ctr0.16b, $h1.16b @ CTR block 8k+16
1845 eor3 $ctr3b, $res3b, $ctr3b, $rk10 @ AES block 8k+11 - result
1846 cmp $input_ptr, $main_end_input_ptr @ LOOP CONTROL
1847 stp $ctr2q, $ctr3q, [$output_ptr], #32 @ AES block 8k+10, 8k+11 - store result
1849 eor3 $ctr5b, $res5b, $ctr5b, $rk10 @ AES block 8k+13 - result
1850 mov $ctr2.16b, $h3.16b @ CTR block 8k+18
1852 stp $ctr4q, $ctr5q, [$output_ptr], #32 @ AES block 8k+12, 8k+13 - store result
1853 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 8k+20
1854 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+20
1856 stp $ctr6q, $ctr7q, [$output_ptr], #32 @ AES block 8k+14, 8k+15 - store result
1857 mov $ctr3.16b, $h4.16b @ CTR block 8k+19
1858 b.lt .L128_dec_main_loop
1860 .L128_dec_prepretail: @ PREPRETAIL
1861 rev64 $res3b, $res3b @ GHASH block 8k+3
1862 ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0
1863 rev64 $res0b, $res0b @ GHASH block 8k
1865 rev64 $res2b, $res2b @ GHASH block 8k+2
1866 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 8k+13
1867 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1
1869 ldr $h7q, [$current_tag, #176] @ load h7l | h7h
1870 ext $h7.16b, $h7.16b, $h7.16b, #8
1871 ldr $h8q, [$current_tag, #208] @ load h8l | h8h
1872 ext $h8.16b, $h8.16b, $h8.16b, #8
1873 eor $res0b, $res0b, $acc_lb @ PRE 1
1874 rev64 $res1b, $res1b @ GHASH block 8k+1
1876 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+13
1877 ldr $h5q, [$current_tag, #128] @ load h5l | h5h
1878 ext $h5.16b, $h5.16b, $h5.16b, #8
1879 ldr $h6q, [$current_tag, #160] @ load h6l | h6h
1880 ext $h6.16b, $h6.16b, $h6.16b, #8
1881 rev64 $res5b, $res5b @ GHASH block 8k+5
1883 rev64 $res4b, $res4b @ GHASH block 8k+4
1885 rev64 $res6b, $res6b @ GHASH block 8k+6
1887 ldr $h56kq, [$current_tag, #144] @ load h6k | h5k
1888 ldr $h78kq, [$current_tag, #192] @ load h8k | h7k
1889 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 8k+14
1890 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+14
1892 pmull2 $t0.1q, $res1.2d, $h7.2d @ GHASH block 8k+1 - high
1893 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH block 8k - low
1894 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH block 8k - high
1896 trn1 $acc_m.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid
1897 trn2 $res0.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid
1898 pmull2 $t1.1q, $res2.2d, $h6.2d @ GHASH block 8k+2 - high
1900 pmull $h7.1q, $res1.1d, $h7.1d @ GHASH block 8k+1 - low
1901 pmull2 $t2.1q, $res3.2d, $h5.2d @ GHASH block 8k+3 - high
1902 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 0
1904 eor $acc_hb, $acc_hb, $t0.16b @ GHASH block 8k+1 - high
1905 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 0
1906 eor $res0.16b, $res0.16b, $acc_m.16b @ GHASH block 8k, 8k+1 - mid
1908 pmull $h6.1q, $res2.1d, $h6.1d @ GHASH block 8k+2 - low
1909 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 8k+15
1910 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 0
1912 eor3 $acc_hb, $acc_hb, $t1.16b, $t2.16b @ GHASH block 8k+2, 8k+3 - high
1913 trn1 $t3.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid
1914 trn2 $res2.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid
1916 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 0
1917 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 0
1918 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 0
1920 pmull2 $acc_m.1q, $res0.2d, $h78k.2d @ GHASH block 8k - mid
1921 pmull $h78k.1q, $res0.1d, $h78k.1d @ GHASH block 8k+1 - mid
1922 pmull $h5.1q, $res3.1d, $h5.1d @ GHASH block 8k+3 - low
1924 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 1
1925 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 0
1926 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 0
1928 eor $acc_lb, $acc_lb, $h7.16b @ GHASH block 8k+1 - low
1929 eor $res2.16b, $res2.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid
1930 eor $acc_mb, $acc_mb, $h78k.16b @ GHASH block 8k+1 - mid
1932 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 1
1933 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 1
1934 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 1
1936 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3
1937 eor3 $acc_lb, $acc_lb, $h6.16b, $h5.16b @ GHASH block 8k+2, 8k+3 - low
1938 pmull2 $t3.1q, $res2.2d, $h56k.2d @ GHASH block 8k+2 - mid
1940 ldr $h3q, [$current_tag, #80] @ load h3l | h3h
1941 ext $h3.16b, $h3.16b, $h3.16b, #8
1942 ldr $h4q, [$current_tag, #112] @ load h4l | h4h
1943 ext $h4.16b, $h4.16b, $h4.16b, #8
1944 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 1
1945 pmull $h56k.1q, $res2.1d, $h56k.1d @ GHASH block 8k+3 - mid
1947 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 1
1948 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 1
1949 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 1
1951 ldr $h1q, [$current_tag, #32] @ load h1l | h1h
1952 ext $h1.16b, $h1.16b, $h1.16b, #8
1953 ldr $h2q, [$current_tag, #64] @ load h2l | h2h
1954 ext $h2.16b, $h2.16b, $h2.16b, #8
1955 eor3 $acc_mb, $acc_mb, $h56k.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid
1957 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 2
1958 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 2
1959 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 2
1961 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 2
1962 trn1 $t6.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid
1963 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 2
1965 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 2
1966 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 2
1967 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 2
1969 pmull2 $t4.1q, $res4.2d, $h4.2d @ GHASH block 8k+4 - high
1970 pmull $h4.1q, $res4.1d, $h4.1d @ GHASH block 8k+4 - low
1971 trn2 $res4.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid
1973 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5
1974 rev64 $res7b, $res7b @ GHASH block 8k+7
1975 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 3
1977 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k
1978 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k
1979 pmull2 $t5.1q, $res5.2d, $h3.2d @ GHASH block 8k+5 - high
1980 pmull $h3.1q, $res5.1d, $h3.1d @ GHASH block 8k+5 - low
1982 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 3
1983 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 3
1984 trn1 $t9.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid
1986 pmull2 $t7.1q, $res6.2d, $h2.2d @ GHASH block 8k+6 - high
1987 pmull $h2.1q, $res6.1d, $h2.1d @ GHASH block 8k+6 - low
1988 trn2 $res6.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid
1990 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 3
1991 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 3
1992 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 3
1994 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 3
1995 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 3
1996 eor $res4.16b, $res4.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid
1998 eor3 $acc_hb, $acc_hb, $t4.16b, $t5.16b @ GHASH block 8k+4, 8k+5 - high
1999 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 4
2000 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 4
2002 eor $res6.16b, $res6.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid
2003 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 4
2004 pmull2 $t6.1q, $res4.2d, $h34k.2d @ GHASH block 8k+4 - mid
2006 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 4
2007 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 4
2008 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 4
2010 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 4
2011 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 4
2012 pmull $h34k.1q, $res4.1d, $h34k.1d @ GHASH block 8k+5 - mid
2014 pmull2 $t8.1q, $res7.2d, $h1.2d @ GHASH block 8k+7 - high
2015 pmull2 $t9.1q, $res6.2d, $h12k.2d @ GHASH block 8k+6 - mid
2016 pmull $h12k.1q, $res6.1d, $h12k.1d @ GHASH block 8k+7 - mid
2018 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7
2019 eor3 $acc_mb, $acc_mb, $h34k.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid
2020 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 5
2022 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant
2023 pmull $h1.1q, $res7.1d, $h1.1d @ GHASH block 8k+7 - low
2024 eor3 $acc_lb, $acc_lb, $h4.16b, $h3.16b @ GHASH block 8k+4, 8k+5 - low
2026 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 5
2027 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 5
2028 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 5
2030 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 5
2031 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 5
2032 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 5
2034 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 5
2035 eor3 $acc_mb, $acc_mb, $h12k.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid
2036 eor3 $acc_lb, $acc_lb, $h2.16b, $h1.16b @ GHASH block 8k+6, 8k+7 - low
2038 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 6
2039 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 6
2040 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 6
2042 eor3 $acc_hb, $acc_hb, $t7.16b, $t8.16b @ GHASH block 8k+6, 8k+7 - high
2043 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 6
2044 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 6
2046 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 6
2047 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 6
2048 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 6
2050 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 7
2051 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up
2052 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9
2054 pmull $t12.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid
2055 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 7
2056 ext $t11.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment
2058 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 7
2059 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 7
2060 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 7
2062 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 7
2063 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 7
2064 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 7
2066 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid
2067 ldr $rk10q, [$cc, #160] @ load rk10
2069 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 8
2070 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 8
2072 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low
2073 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 8
2074 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment
2076 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 8
2077 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 8
2078 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 8
2080 aese $ctr6b, $rk9 @ AES block 8k+14 - round 9
2081 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 8
2082 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 8
2084 eor3 $acc_lb, $acc_lb, $acc_hb, $t11.16b @ MODULO - fold into low
2085 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+15
2086 aese $ctr2b, $rk9 @ AES block 8k+10 - round 9
2088 aese $ctr3b, $rk9 @ AES block 8k+11 - round 9
2089 aese $ctr5b, $rk9 @ AES block 8k+13 - round 9
2090 aese $ctr0b, $rk9 @ AES block 8k+8 - round 9
2092 aese $ctr4b, $rk9 @ AES block 8k+12 - round 9
2093 aese $ctr1b, $rk9 @ AES block 8k+9 - round 9
2094 aese $ctr7b, $rk9 @ AES block 8k+15 - round 9
2096 .L128_dec_tail: @ TAIL
2099 sub $main_end_input_ptr, $end_input_ptr, $input_ptr @ main_end_input_ptr is number of bytes left to process
2101 cmp $main_end_input_ptr, #112
2103 ldp $h78kq, $h8q, [$current_tag, #192] @ load h8k | h7k
2104 ext $h8.16b, $h8.16b, $h8.16b, #8
2105 ldr $res1q, [$input_ptr], #16 @ AES block 8k+8 - load ciphertext
2107 ldp $h5q, $h56kq, [$current_tag, #128] @ load h5l | h5h
2108 ext $h5.16b, $h5.16b, $h5.16b, #8
2109 ext $t0.16b, $acc_lb, $acc_lb, #8 @ prepare final partial tag
2111 ldp $h6q, $h7q, [$current_tag, #160] @ load h6l | h6h
2112 ext $h6.16b, $h6.16b, $h6.16b, #8
2113 ext $h7.16b, $h7.16b, $h7.16b, #8
2115 eor3 $res4b, $res1b, $ctr0b, $t1.16b @ AES block 8k+8 - result
2116 b.gt .L128_dec_blocks_more_than_7
2118 cmp $main_end_input_ptr, #96
2131 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
2132 b.gt .L128_dec_blocks_more_than_6
2134 cmp $main_end_input_ptr, #80
2135 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
2143 b.gt .L128_dec_blocks_more_than_5
2145 cmp $main_end_input_ptr, #64
2152 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
2153 b.gt .L128_dec_blocks_more_than_4
2155 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
2160 cmp $main_end_input_ptr, #48
2161 b.gt .L128_dec_blocks_more_than_3
2163 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
2165 cmp $main_end_input_ptr, #32
2167 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k
2169 b.gt .L128_dec_blocks_more_than_2
2171 cmp $main_end_input_ptr, #16
2174 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
2175 b.gt L128_dec_blocks_more_than_1
2177 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
2178 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k
2179 b .L128_dec_blocks_less_than_1
2180 .L128_dec_blocks_more_than_7: @ blocks left > 7
2181 rev64 $res0b, $res1b @ GHASH final-7 block
2183 eor $res0b, $res0b, $t0.16b @ feed in partial tag
2185 ins $acc_m.d[0], $h78k.d[1] @ GHASH final-7 block - mid
2187 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH final-7 block - low
2188 ins $rk4v.d[0], $res0.d[1] @ GHASH final-7 block - mid
2190 movi $t0.8b, #0 @ suppress further partial tag feed in
2191 ldr $res1q, [$input_ptr], #16 @ AES final-6 block - load ciphertext
2193 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-7 block - mid
2195 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH final-7 block - high
2196 st1 { $res4b}, [$output_ptr], #16 @ AES final-7 block - store result
2197 eor3 $res4b, $res1b, $ctr1b, $t1.16b @ AES final-6 block - result
2199 pmull $acc_m.1q, $rk4v.1d, $acc_m.1d @ GHASH final-7 block - mid
2200 .L128_dec_blocks_more_than_6: @ blocks left > 6
2202 rev64 $res0b, $res1b @ GHASH final-6 block
2204 eor $res0b, $res0b, $t0.16b @ feed in partial tag
2206 ins $rk4v.d[0], $res0.d[1] @ GHASH final-6 block - mid
2208 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-6 block - mid
2210 pmull $rk3q1, $res0.1d, $h7.1d @ GHASH final-6 block - low
2211 ldr $res1q, [$input_ptr], #16 @ AES final-5 block - load ciphertext
2212 movi $t0.8b, #0 @ suppress further partial tag feed in
2214 pmull $rk4v.1q, $rk4v.1d, $h78k.1d @ GHASH final-6 block - mid
2215 st1 { $res4b}, [$output_ptr], #16 @ AES final-6 block - store result
2216 pmull2 $rk2q1, $res0.2d, $h7.2d @ GHASH final-6 block - high
2218 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-6 block - low
2219 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-6 block - high
2221 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-6 block - mid
2222 eor3 $res4b, $res1b, $ctr2b, $t1.16b @ AES final-5 block - result
2223 .L128_dec_blocks_more_than_5: @ blocks left > 5
2225 rev64 $res0b, $res1b @ GHASH final-5 block
2227 ldr $res1q, [$input_ptr], #16 @ AES final-4 block - load ciphertext
2228 st1 { $res4b}, [$output_ptr], #16 @ AES final-5 block - store result
2230 eor $res0b, $res0b, $t0.16b @ feed in partial tag
2232 ins $rk4v.d[0], $res0.d[1] @ GHASH final-5 block - mid
2234 eor3 $res4b, $res1b, $ctr3b, $t1.16b @ AES final-4 block - result
2236 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-5 block - mid
2238 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-5 block - mid
2239 pmull $rk3q1, $res0.1d, $h6.1d @ GHASH final-5 block - low
2240 movi $t0.8b, #0 @ suppress further partial tag feed in
2242 pmull2 $rk4v.1q, $rk4v.2d, $h56k.2d @ GHASH final-5 block - mid
2243 pmull2 $rk2q1, $res0.2d, $h6.2d @ GHASH final-5 block - high
2244 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-5 block - low
2246 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-5 block - mid
2247 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-5 block - high
2248 .L128_dec_blocks_more_than_4: @ blocks left > 4
2250 rev64 $res0b, $res1b @ GHASH final-4 block
2252 eor $res0b, $res0b, $t0.16b @ feed in partial tag
2253 ldr $res1q, [$input_ptr], #16 @ AES final-3 block - load ciphertext
2255 ins $rk4v.d[0], $res0.d[1] @ GHASH final-4 block - mid
2256 movi $t0.8b, #0 @ suppress further partial tag feed in
2257 pmull2 $rk2q1, $res0.2d, $h5.2d @ GHASH final-4 block - high
2259 pmull $rk3q1, $res0.1d, $h5.1d @ GHASH final-4 block - low
2261 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-4 block - high
2263 st1 { $res4b}, [$output_ptr], #16 @ AES final-4 block - store result
2264 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-4 block - mid
2266 eor3 $res4b, $res1b, $ctr4b, $t1.16b @ AES final-3 block - result
2267 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-4 block - low
2269 pmull $rk4v.1q, $rk4v.1d, $h56k.1d @ GHASH final-4 block - mid
2271 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-4 block - mid
2272 .L128_dec_blocks_more_than_3: @ blocks left > 3
2274 st1 { $res4b}, [$output_ptr], #16 @ AES final-3 block - store result
2275 rev64 $res0b, $res1b @ GHASH final-3 block
2277 eor $res0b, $res0b, $t0.16b @ feed in partial tag
2279 ins $rk4v.d[0], $res0.d[1] @ GHASH final-3 block - mid
2281 ldr $h4q, [$current_tag, #112] @ load h4l | h4h
2282 ext $h4.16b, $h4.16b, $h4.16b, #8
2283 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k
2285 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-3 block - mid
2287 ldr $res1q, [$input_ptr], #16 @ AES final-2 block - load ciphertext
2289 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-3 block - mid
2290 pmull $rk3q1, $res0.1d, $h4.1d @ GHASH final-3 block - low
2291 pmull2 $rk2q1, $res0.2d, $h4.2d @ GHASH final-3 block - high
2293 movi $t0.8b, #0 @ suppress further partial tag feed in
2294 eor3 $res4b, $res1b, $ctr5b, $t1.16b @ AES final-2 block - result
2295 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-3 block - low
2297 pmull2 $rk4v.1q, $rk4v.2d, $h34k.2d @ GHASH final-3 block - mid
2299 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-3 block - high
2300 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-3 block - mid
2301 .L128_dec_blocks_more_than_2: @ blocks left > 2
2303 rev64 $res0b, $res1b @ GHASH final-2 block
2305 st1 { $res4b}, [$output_ptr], #16 @ AES final-2 block - store result
2307 eor $res0b, $res0b, $t0.16b @ feed in partial tag
2308 ldr $h3q, [$current_tag, #80] @ load h3l | h3h
2309 ext $h3.16b, $h3.16b, $h3.16b, #8
2310 movi $t0.8b, #0 @ suppress further partial tag feed in
2312 ins $rk4v.d[0], $res0.d[1] @ GHASH final-2 block - mid
2314 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-2 block - mid
2316 pmull $rk3q1, $res0.1d, $h3.1d @ GHASH final-2 block - low
2318 pmull2 $rk2q1, $res0.2d, $h3.2d @ GHASH final-2 block - high
2319 pmull $rk4v.1q, $rk4v.1d, $h34k.1d @ GHASH final-2 block - mid
2320 ldr $res1q, [$input_ptr], #16 @ AES final-1 block - load ciphertext
2322 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-2 block - mid
2324 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-2 block - low
2326 eor3 $res4b, $res1b, $ctr6b, $t1.16b @ AES final-1 block - result
2327 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-2 block - high
2328 .L128_dec_blocks_more_than_1: @ blocks left > 1
2330 st1 { $res4b}, [$output_ptr], #16 @ AES final-1 block - store result
2331 rev64 $res0b, $res1b @ GHASH final-1 block
2333 ldr $h2q, [$current_tag, #64] @ load h2l | h2h
2334 ext $h2.16b, $h2.16b, $h2.16b, #8
2336 eor $res0b, $res0b, $t0.16b @ feed in partial tag
2338 movi $t0.8b, #0 @ suppress further partial tag feed in
2340 ins $rk4v.d[0], $res0.d[1] @ GHASH final-1 block - mid
2342 ldr $res1q, [$input_ptr], #16 @ AES final block - load ciphertext
2343 pmull2 $rk2q1, $res0.2d, $h2.2d @ GHASH final-1 block - high
2345 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-1 block - mid
2346 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-1 block - high
2347 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k
2349 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-1 block - mid
2350 eor3 $res4b, $res1b, $ctr7b, $t1.16b @ AES final block - result
2352 pmull $rk3q1, $res0.1d, $h2.1d @ GHASH final-1 block - low
2354 pmull2 $rk4v.1q, $rk4v.2d, $h12k.2d @ GHASH final-1 block - mid
2356 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-1 block - low
2358 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-1 block - mid
2359 .L128_dec_blocks_less_than_1: @ blocks left <= 1
2361 and $bit_length, $bit_length, #127 @ bit_length %= 128
2363 sub $bit_length, $bit_length, #128 @ bit_length -= 128
2365 neg $bit_length, $bit_length @ bit_length = 128 - #bits in input (in range [1,128])
2367 mvn $temp0_x, xzr @ temp0_x = 0xffffffffffffffff
2368 and $bit_length, $bit_length, #127 @ bit_length %= 128
2370 lsr $temp0_x, $temp0_x, $bit_length @ temp0_x is mask for top 64b of last block
2371 cmp $bit_length, #64
2372 mvn $temp1_x, xzr @ temp1_x = 0xffffffffffffffff
2374 csel $temp2_x, $temp1_x, $temp0_x, lt
2375 csel $temp3_x, $temp0_x, xzr, lt
2377 mov $ctr0.d[1], $temp3_x
2378 mov $ctr0.d[0], $temp2_x @ ctr0b is mask for last block
2380 ldr $h1q, [$current_tag, #32] @ load h1l | h1h
2381 ext $h1.16b, $h1.16b, $h1.16b, #8
2382 ld1 { $rk0}, [$output_ptr] @ load existing bytes where the possibly partial last block is to be stored
2384 and $res1b, $res1b, $ctr0b @ possibly partial last block has zeroes in highest bits
2386 rev64 $res0b, $res1b @ GHASH final block
2388 eor $res0b, $res0b, $t0.16b @ feed in partial tag
2390 pmull2 $rk2q1, $res0.2d, $h1.2d @ GHASH final block - high
2391 ins $t0.d[0], $res0.d[1] @ GHASH final block - mid
2393 eor $acc_hb, $acc_hb, $rk2 @ GHASH final block - high
2394 eor $t0.8b, $t0.8b, $res0.8b @ GHASH final block - mid
2396 bif $res4b, $rk0, $ctr0b @ insert existing bytes in top end of result before storing
2398 pmull $t0.1q, $t0.1d, $h12k.1d @ GHASH final block - mid
2399 st1 { $res4b}, [$output_ptr] @ store all 16B
2401 pmull $rk3q1, $res0.1d, $h1.1d @ GHASH final block - low
2403 eor $acc_mb, $acc_mb, $t0.16b @ GHASH final block - mid
2404 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant
2406 eor $acc_lb, $acc_lb, $rk3 @ GHASH final block - low
2408 eor $t10.16b, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up
2410 pmull $t11.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid
2411 ext $acc_hb, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment
2413 eor $acc_mb, $acc_mb, $t10.16b @ MODULO - karatsuba tidy up
2415 eor3 $acc_mb, $acc_mb, $acc_hb, $t11.16b @ MODULO - fold into mid
2417 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low
2418 ext $acc_mb, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment
2420 eor3 $acc_lb, $acc_lb, $acc_mb, $acc_hb @ MODULO - fold into low
2421 ext $acc_lb, $acc_lb, $acc_lb, #8
2422 rev64 $acc_lb, $acc_lb
2423 st1 { $acc_l.16b }, [$current_tag]
2424 rev32 $rtmp_ctr.16b, $rtmp_ctr.16b
2426 str $rtmp_ctrq, [$counter] @ store the updated counter
2428 mov x0, $byte_length
2430 ldp d10, d11, [sp, #16]
2431 ldp d12, d13, [sp, #32]
2432 ldp d14, d15, [sp, #48]
2433 ldp d8, d9, [sp], #80
2438 .size unroll8_eor3_aes_gcm_dec_128_kernel,.-unroll8_eor3_aes_gcm_dec_128_kernel
2443 my ($end_input_ptr,$main_end_input_ptr,$temp0_x,$temp1_x)=map("x$_",(4..7));
2444 my ($temp2_x,$temp3_x)=map("x$_",(13..14));
2445 my ($ctr0b,$ctr1b,$ctr2b,$ctr3b,$ctr4b,$ctr5b,$ctr6b,$ctr7b,$res0b,$res1b,$res2b,$res3b,$res4b,$res5b,$res6b,$res7b)=map("v$_.16b",(0..15));
2446 my ($ctr0,$ctr1,$ctr2,$ctr3,$ctr4,$ctr5,$ctr6,$ctr7,$res0,$res1,$res2,$res3,$res4,$res5,$res6,$res7)=map("v$_",(0..15));
2447 my ($ctr0d,$ctr1d,$ctr2d,$ctr3d,$ctr4d,$ctr5d,$ctr6d,$ctr7d)=map("d$_",(0..7));
2448 my ($ctr0q,$ctr1q,$ctr2q,$ctr3q,$ctr4q,$ctr5q,$ctr6q,$ctr7q)=map("q$_",(0..7));
2449 my ($res0q,$res1q,$res2q,$res3q,$res4q,$res5q,$res6q,$res7q)=map("q$_",(8..15));
2451 my ($ctr_t0,$ctr_t1,$ctr_t2,$ctr_t3,$ctr_t4,$ctr_t5,$ctr_t6,$ctr_t7)=map("v$_",(8..15));
2452 my ($ctr_t0b,$ctr_t1b,$ctr_t2b,$ctr_t3b,$ctr_t4b,$ctr_t5b,$ctr_t6b,$ctr_t7b)=map("v$_.16b",(8..15));
2453 my ($ctr_t0q,$ctr_t1q,$ctr_t2q,$ctr_t3q,$ctr_t4q,$ctr_t5q,$ctr_t6q,$ctr_t7q)=map("q$_",(8..15));
2455 my ($acc_hb,$acc_mb,$acc_lb)=map("v$_.16b",(17..19));
2456 my ($acc_h,$acc_m,$acc_l)=map("v$_",(17..19));
2458 my ($h1,$h12k,$h2,$h3,$h34k,$h4)=map("v$_",(20..25));
2459 my ($h5,$h56k,$h6,$h7,$h78k,$h8)=map("v$_",(20..25));
2460 my ($h1q,$h12kq,$h2q,$h3q,$h34kq,$h4q)=map("q$_",(20..25));
2461 my ($h5q,$h56kq,$h6q,$h7q,$h78kq,$h8q)=map("q$_",(20..25));
2483 my $rtmp_ctrq="q30";
2485 my $rctr_incd="d31";
2487 my $mod_constantd=$t0d;
2488 my $mod_constant=$t0;
2490 my ($rk0,$rk1,$rk2)=map("v$_.16b",(26..28));
2491 my ($rk3,$rk4,$rk5)=map("v$_.16b",(26..28));
2492 my ($rk6,$rk7,$rk8)=map("v$_.16b",(26..28));
2493 my ($rk9,$rk10,$rk11)=map("v$_.16b",(26..28));
2494 my ($rk12,$rk13,$rk14)=map("v$_.16b",(26..28));
2495 my ($rk0q,$rk1q,$rk2q)=map("q$_",(26..28));
2496 my ($rk3q,$rk4q,$rk5q)=map("q$_",(26..28));
2497 my ($rk6q,$rk7q,$rk8q)=map("q$_",(26..28));
2498 my ($rk9q,$rk10q,$rk11q)=map("q$_",(26..28));
2499 my ($rk12q,$rk13q,$rk14q)=map("q$_",(26..28));
2504 #########################################################################################
2505 # size_t unroll8_eor3_aes_gcm_enc_192_kernel(const uint8_t * plaintext,
2506 # uint64_t plaintext_length,
2507 # uint8_t * ciphertext,
2509 # unsigned char ivec[16],
2513 .global unroll8_eor3_aes_gcm_enc_192_kernel
2514 .type unroll8_eor3_aes_gcm_enc_192_kernel,%function
2516 unroll8_eor3_aes_gcm_enc_192_kernel:
2517 AARCH64_VALID_CALL_TARGET
2518 cbz x1, .L192_enc_ret
2519 stp d8, d9, [sp, #-80]!
2520 lsr $byte_length, $bit_length, #3
2523 stp d10, d11, [sp, #16]
2524 stp d12, d13, [sp, #32]
2525 stp d14, d15, [sp, #48]
2526 mov x5, #0xc200000000000000
2527 stp x5, xzr, [sp, #64]
2528 add $modulo_constant, sp, #64
2530 mov $main_end_input_ptr, $byte_length
2531 ld1 { $ctr0b}, [$counter] @ CTR block 0
2533 mov $constant_temp, #0x100000000 @ set up counter increment
2534 movi $rctr_inc.16b, #0x0
2535 mov $rctr_inc.d[1], $constant_temp
2537 rev32 $rtmp_ctr.16b, $ctr0.16b @ set up reversed counter
2539 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 0
2541 rev32 $ctr1.16b, $rtmp_ctr.16b @ CTR block 1
2542 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 1
2544 rev32 $ctr2.16b, $rtmp_ctr.16b @ CTR block 2
2545 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 2
2547 rev32 $ctr3.16b, $rtmp_ctr.16b @ CTR block 3
2548 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 3
2550 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 4
2551 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 4
2552 sub $main_end_input_ptr, $main_end_input_ptr, #1 @ byte_len - 1
2554 and $main_end_input_ptr, $main_end_input_ptr, #0xffffffffffffff80 @ number of bytes to be processed in main loop (at least 1 byte must be handled by tail)
2556 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 5
2557 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 5
2558 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1
2560 add $main_end_input_ptr, $main_end_input_ptr, $input_ptr
2562 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 6
2563 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 6
2565 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 7
2567 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 0
2568 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 0
2569 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 0
2571 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 0
2572 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 0
2573 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 0
2575 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 0
2576 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 0
2577 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3
2579 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 1
2580 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 1
2582 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 1
2583 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 1
2584 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 1
2586 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 2
2587 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 1
2588 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 1
2590 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 1
2591 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 2
2592 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 2
2594 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 2
2595 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 2
2597 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 2
2598 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 2
2599 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 2
2601 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5
2602 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 3
2604 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 3
2605 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 3
2606 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 3
2608 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 3
2610 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 3
2612 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 3
2614 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 4
2615 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 4
2616 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 3
2618 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 4
2619 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 4
2620 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 4
2622 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 4
2623 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 4
2624 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 4
2626 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 5
2627 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7
2628 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 5
2630 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 5
2631 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 5
2632 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 5
2634 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 5
2635 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 5
2636 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 5
2638 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 7
2640 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 6
2641 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 6
2642 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 6
2644 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 6
2645 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 6
2646 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 6
2648 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 6
2649 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 6
2650 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9
2652 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 7
2653 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 7
2655 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 7
2656 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 7
2658 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 7
2659 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 7
2661 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 7
2662 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 7
2664 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 8
2665 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 8
2667 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 8
2668 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 8
2669 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 8
2671 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 8
2672 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 8
2673 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 8
2675 add $end_input_ptr, $input_ptr, $bit_length, lsr #3 @ end_input_ptr
2676 cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks
2677 aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 9
2679 ld1 { $acc_lb}, [$current_tag]
2680 ext $acc_lb, $acc_lb, $acc_lb, #8
2681 rev64 $acc_lb, $acc_lb
2682 ldp $rk10q, $rk11q, [$cc, #160] @ load rk10, rk11
2684 aese $ctr6b, $rk9 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 9
2685 aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 9
2687 aese $ctr5b, $rk9 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 9
2688 aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 9
2690 aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 9
2691 aese $ctr4b, $rk9 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 9
2693 aese $ctr6b, $rk10 \n aesmc $ctr6b, $ctr6b @ AES block 14 - round 10
2694 aese $ctr7b, $rk9 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 9
2695 aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 11 - round 10
2697 aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 9 - round 10
2698 aese $ctr5b, $rk10 \n aesmc $ctr5b, $ctr5b @ AES block 13 - round 10
2699 aese $ctr4b, $rk10 \n aesmc $ctr4b, $ctr4b @ AES block 12 - round 10
2701 aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 8 - round 10
2702 aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 10 - round 10
2703 aese $ctr7b, $rk10 \n aesmc $ctr7b, $ctr7b @ AES block 15 - round 10
2705 aese $ctr6b, $rk11 @ AES block 14 - round 11
2706 aese $ctr3b, $rk11 @ AES block 11 - round 11
2708 aese $ctr4b, $rk11 @ AES block 12 - round 11
2709 aese $ctr7b, $rk11 @ AES block 15 - round 11
2710 ldr $rk12q, [$cc, #192] @ load rk12
2712 aese $ctr1b, $rk11 @ AES block 9 - round 11
2713 aese $ctr5b, $rk11 @ AES block 13 - round 11
2715 aese $ctr2b, $rk11 @ AES block 10 - round 11
2716 aese $ctr0b, $rk11 @ AES block 8 - round 11
2717 b.ge .L192_enc_tail @ handle tail
2719 ldp $ctr_t0q, $ctr_t1q, [$input_ptr], #32 @ AES block 0, 1 - load plaintext
2721 ldp $ctr_t2q, $ctr_t3q, [$input_ptr], #32 @ AES block 2, 3 - load plaintext
2723 ldp $ctr_t4q, $ctr_t5q, [$input_ptr], #32 @ AES block 4, 5 - load plaintext
2725 ldp $ctr_t6q, $ctr_t7q, [$input_ptr], #32 @ AES block 6, 7 - load plaintext
2727 eor3 $res0b, $ctr_t0b, $ctr0b, $rk12 @ AES block 0 - result
2728 rev32 $ctr0.16b, $rtmp_ctr.16b @ CTR block 8
2729 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8
2731 eor3 $res3b, $ctr_t3b, $ctr3b, $rk12 @ AES block 3 - result
2732 eor3 $res1b, $ctr_t1b, $ctr1b, $rk12 @ AES block 1 - result
2734 rev32 $ctr1.16b, $rtmp_ctr.16b @ CTR block 9
2735 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 9
2736 eor3 $res4b, $ctr_t4b, $ctr4b, $rk12 @ AES block 4 - result
2738 eor3 $res5b, $ctr_t5b, $ctr5b, $rk12 @ AES block 5 - result
2739 eor3 $res7b, $ctr_t7b, $ctr7b, $rk12 @ AES block 7 - result
2740 stp $res0q, $res1q, [$output_ptr], #32 @ AES block 0, 1 - store result
2742 eor3 $res2b, $ctr_t2b, $ctr2b, $rk12 @ AES block 2 - result
2743 rev32 $ctr2.16b, $rtmp_ctr.16b @ CTR block 10
2744 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 10
2746 stp $res2q, $res3q, [$output_ptr], #32 @ AES block 2, 3 - store result
2747 cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks
2749 rev32 $ctr3.16b, $rtmp_ctr.16b @ CTR block 11
2750 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 11
2751 eor3 $res6b, $ctr_t6b, $ctr6b, $rk12 @ AES block 6 - result
2753 stp $res4q, $res5q, [$output_ptr], #32 @ AES block 4, 5 - store result
2755 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 12
2756 stp $res6q, $res7q, [$output_ptr], #32 @ AES block 6, 7 - store result
2757 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 12
2759 b.ge .L192_enc_prepretail @ do prepretail
2761 .L192_enc_main_loop: @ main loop start
2762 rev64 $res4b, $res4b @ GHASH block 8k+4 (t0, t1, and t2 free)
2763 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1
2764 rev64 $res2b, $res2b @ GHASH block 8k+2
2766 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 8k+13
2767 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+13
2768 ldr $h7q, [$current_tag, #176] @ load h7l | h7h
2769 ext $h7.16b, $h7.16b, $h7.16b, #8
2770 ldr $h8q, [$current_tag, #208] @ load h8l | h8h
2771 ext $h8.16b, $h8.16b, $h8.16b, #8
2773 ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0
2774 rev64 $res0b, $res0b @ GHASH block 8k
2775 ldr $h5q, [$current_tag, #128] @ load h5l | h5h
2776 ext $h5.16b, $h5.16b, $h5.16b, #8
2777 ldr $h6q, [$current_tag, #160] @ load h6l | h6h
2778 ext $h6.16b, $h6.16b, $h6.16b, #8
2780 rev64 $res1b, $res1b @ GHASH block 8k+1
2781 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 8k+14
2782 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+14
2784 eor $res0b, $res0b, $acc_lb @ PRE 1
2785 rev64 $res3b, $res3b @ GHASH block 8k+3
2786 rev64 $res5b, $res5b @ GHASH block 8k+5 (t0, t1, t2 and t3 free)
2788 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 0
2789 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 8k+15
2790 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 0
2792 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 0
2793 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 0
2794 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 0
2796 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 0
2797 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 0
2798 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 0
2800 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3
2801 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH block 8k - high
2802 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 1
2804 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 1
2805 pmull2 $t0.1q, $res1.2d, $h7.2d @ GHASH block 8k+1 - high
2806 pmull $h7.1q, $res1.1d, $h7.1d @ GHASH block 8k+1 - low
2808 trn1 $acc_m.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid
2809 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 1
2810 ldr $h56kq, [$current_tag, #144] @ load h6k | h5k
2811 ldr $h78kq, [$current_tag, #192] @ load h8k | h7k
2813 pmull2 $t1.1q, $res2.2d, $h6.2d @ GHASH block 8k+2 - high
2814 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH block 8k - low
2815 trn2 $res0.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid
2817 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 1
2818 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 1
2819 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 1
2821 eor $acc_hb, $acc_hb, $t0.16b @ GHASH block 8k+1 - high
2822 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 1
2823 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 1
2825 pmull2 $t2.1q, $res3.2d, $h5.2d @ GHASH block 8k+3 - high
2826 eor $res0.16b, $res0.16b, $acc_m.16b @ GHASH block 8k, 8k+1 - mid
2827 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 2
2829 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 2
2830 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 2
2831 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 2
2833 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 2
2834 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 3
2835 eor3 $acc_hb, $acc_hb, $t1.16b, $t2.16b @ GHASH block 8k+2, 8k+3 - high
2837 pmull $h6.1q, $res2.1d, $h6.1d @ GHASH block 8k+2 - low
2838 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 2
2839 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 3
2841 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 2
2842 trn1 $t3.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid
2843 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 2
2845 trn2 $res2.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid
2846 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 3
2847 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5
2849 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 3
2850 eor $acc_lb, $acc_lb, $h7.16b @ GHASH block 8k+1 - low
2851 ldr $h3q, [$current_tag, #80] @ load h3l | h3h
2852 ext $h3.16b, $h3.16b, $h3.16b, #8
2853 ldr $h4q, [$current_tag, #112] @ load h4l | h4h
2854 ext $h4.16b, $h4.16b, $h4.16b, #8
2856 pmull2 $acc_m.1q, $res0.2d, $h78k.2d @ GHASH block 8k - mid
2857 pmull $h78k.1q, $res0.1d, $h78k.1d @ GHASH block 8k+1 - mid
2858 pmull $h5.1q, $res3.1d, $h5.1d @ GHASH block 8k+3 - low
2860 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 3
2861 eor $res2.16b, $res2.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid
2862 trn1 $t6.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid
2864 eor $acc_mb, $acc_mb, $h78k.16b @ GHASH block 8k+1 - mid
2865 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 3
2866 eor3 $acc_lb, $acc_lb, $h6.16b, $h5.16b @ GHASH block 8k+2, 8k+3 - low
2868 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 4
2869 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 4
2870 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 3
2872 pmull2 $t3.1q, $res2.2d, $h56k.2d @ GHASH block 8k+2 - mid
2873 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 4
2874 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 3
2876 pmull $h56k.1q, $res2.1d, $h56k.1d @ GHASH block 8k+3 - mid
2877 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 4
2878 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 4
2880 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 4
2881 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 4
2882 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 4
2884 eor3 $acc_mb, $acc_mb, $h56k.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid
2885 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 5
2886 ldr $h1q, [$current_tag, #32] @ load h1l | h1h
2887 ext $h1.16b, $h1.16b, $h1.16b, #8
2888 ldr $h2q, [$current_tag, #64] @ load h2l | h2h
2889 ext $h2.16b, $h2.16b, $h2.16b, #8
2891 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7
2892 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 5
2893 rev64 $res7b, $res7b @ GHASH block 8k+7 (t0, t1, t2 and t3 free)
2895 rev64 $res6b, $res6b @ GHASH block 8k+6 (t0, t1, and t2 free)
2896 pmull2 $t4.1q, $res4.2d, $h4.2d @ GHASH block 8k+4 - high
2897 pmull $h4.1q, $res4.1d, $h4.1d @ GHASH block 8k+4 - low
2899 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 5
2900 trn2 $res4.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid
2902 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 5
2903 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k
2904 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k
2906 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 5
2907 pmull2 $t5.1q, $res5.2d, $h3.2d @ GHASH block 8k+5 - high
2908 eor $res4.16b, $res4.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid
2910 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 5
2911 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 5
2912 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 5
2914 pmull $h3.1q, $res5.1d, $h3.1d @ GHASH block 8k+5 - low
2915 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 6
2916 trn1 $t9.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid
2918 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 6
2919 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 6
2920 pmull2 $t7.1q, $res6.2d, $h2.2d @ GHASH block 8k+6 - high
2922 pmull $h2.1q, $res6.1d, $h2.1d @ GHASH block 8k+6 - low
2923 trn2 $res6.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid
2924 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 6
2926 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 6
2927 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 6
2929 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 6
2930 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 7
2931 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 6
2933 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 7
2934 eor $res6.16b, $res6.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid
2936 pmull2 $t6.1q, $res4.2d, $h34k.2d @ GHASH block 8k+4 - mid
2937 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9
2938 pmull $h34k.1q, $res4.1d, $h34k.1d @ GHASH block 8k+5 - mid
2940 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 7
2941 pmull2 $t8.1q, $res7.2d, $h1.2d @ GHASH block 8k+7 - high
2942 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 7
2944 eor3 $acc_mb, $acc_mb, $h34k.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid
2945 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 7
2946 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+15
2948 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant
2949 eor3 $acc_hb, $acc_hb, $t4.16b, $t5.16b @ GHASH block 8k+4, 8k+5 - high
2950 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 7
2952 pmull2 $t9.1q, $res6.2d, $h12k.2d @ GHASH block 8k+6 - mid
2953 pmull $h1.1q, $res7.1d, $h1.1d @ GHASH block 8k+7 - low
2954 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 7
2956 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 8
2957 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 8
2958 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 8
2960 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 8
2961 eor3 $acc_lb, $acc_lb, $h4.16b, $h3.16b @ GHASH block 8k+4, 8k+5 - low
2962 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 7
2964 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 8
2965 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 8
2966 pmull $h12k.1q, $res6.1d, $h12k.1d @ GHASH block 8k+7 - mid
2968 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 8
2969 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 8
2970 ldp $rk10q, $rk11q, [$cc, #160] @ load rk10, rk11
2972 eor3 $acc_lb, $acc_lb, $h2.16b, $h1.16b @ GHASH block 8k+6, 8k+7 - low
2973 rev32 $h1.16b, $rtmp_ctr.16b @ CTR block 8k+16
2974 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+16
2976 aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 9
2977 eor3 $acc_mb, $acc_mb, $h12k.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid
2978 eor3 $acc_hb, $acc_hb, $t7.16b, $t8.16b @ GHASH block 8k+6, 8k+7 - high
2980 aese $ctr6b, $rk9 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 9
2981 aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 9
2982 ldp $ctr_t0q, $ctr_t1q, [$input_ptr], #32 @ AES block 8k+8, 8k+9 - load plaintext
2984 pmull $t11.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid
2985 rev32 $h2.16b, $rtmp_ctr.16b @ CTR block 8k+17
2986 aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 9
2988 aese $ctr4b, $rk9 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 9
2989 aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 9
2990 aese $ctr7b, $rk9 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 9
2992 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up
2993 aese $ctr5b, $rk9 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 9
2994 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+17
2996 aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 10
2997 aese $ctr4b, $rk10 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 10
2998 ldr $rk12q, [$cc, #192] @ load rk12
2999 ext $t12.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment
3001 aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 10
3002 aese $ctr7b, $rk10 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 10
3003 ldp $ctr_t2q, $ctr_t3q, [$input_ptr], #32 @ AES block 8k+10, 8k+11 - load plaintext
3005 aese $ctr4b, $rk11 @ AES block 8k+12 - round 11
3006 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid
3007 ldp $ctr_t4q, $ctr_t5q, [$input_ptr], #32 @ AES block 8k+12, 8k+13 - load plaintext
3009 ldp $ctr_t6q, $ctr_t7q, [$input_ptr], #32 @ AES block 8k+14, 8k+15 - load plaintext
3010 aese $ctr2b, $rk11 @ AES block 8k+10 - round 11
3011 aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 10
3013 rev32 $h3.16b, $rtmp_ctr.16b @ CTR block 8k+18
3014 aese $ctr5b, $rk10 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 10
3016 aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 10
3017 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low
3019 aese $ctr6b, $rk10 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 10
3020 aese $ctr5b, $rk11 @ AES block 8k+13 - round 11
3021 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+18
3023 aese $ctr7b, $rk11 @ AES block 8k+15 - round 11
3024 aese $ctr0b, $rk11 @ AES block 8k+8 - round 11
3025 eor3 $res4b, $ctr_t4b, $ctr4b, $rk12 @ AES block 4 - result
3027 aese $ctr6b, $rk11 @ AES block 8k+14 - round 11
3028 aese $ctr3b, $rk11 @ AES block 8k+11 - round 11
3029 aese $ctr1b, $rk11 @ AES block 8k+9 - round 11
3031 rev32 $h4.16b, $rtmp_ctr.16b @ CTR block 8k+19
3032 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+19
3033 eor3 $res7b, $ctr_t7b, $ctr7b, $rk12 @ AES block 7 - result
3035 eor3 $res2b, $ctr_t2b, $ctr2b, $rk12 @ AES block 8k+10 - result
3036 eor3 $res0b, $ctr_t0b, $ctr0b, $rk12 @ AES block 8k+8 - result
3037 mov $ctr2.16b, $h3.16b @ CTR block 8k+18
3039 eor3 $res1b, $ctr_t1b, $ctr1b, $rk12 @ AES block 8k+9 - result
3040 mov $ctr1.16b, $h2.16b @ CTR block 8k+17
3041 stp $res0q, $res1q, [$output_ptr], #32 @ AES block 8k+8, 8k+9 - store result
3042 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment
3044 eor3 $res6b, $ctr_t6b, $ctr6b, $rk12 @ AES block 6 - result
3045 mov $ctr0.16b, $h1.16b @ CTR block 8k+16
3046 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 8k+20
3048 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+20
3049 eor3 $res5b, $ctr_t5b, $ctr5b, $rk12 @ AES block 5 - result
3050 eor3 $acc_lb, $acc_lb, $acc_hb, $t11.16b @ MODULO - fold into low
3052 eor3 $res3b, $ctr_t3b, $ctr3b, $rk12 @ AES block 8k+11 - result
3053 mov $ctr3.16b, $h4.16b @ CTR block 8k+19
3055 stp $res2q, $res3q, [$output_ptr], #32 @ AES block 8k+10, 8k+11 - store result
3057 stp $res4q, $res5q, [$output_ptr], #32 @ AES block 8k+12, 8k+13 - store result
3059 cmp $input_ptr, $main_end_input_ptr @ LOOP CONTROL
3060 stp $res6q, $res7q, [$output_ptr], #32 @ AES block 8k+14, 8k+15 - store result
3061 b.lt .L192_enc_main_loop
3063 .L192_enc_prepretail: @ PREPRETAIL
3064 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 8k+13
3065 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1
3066 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+13
3068 ldr $h7q, [$current_tag, #176] @ load h7l | h7h
3069 ext $h7.16b, $h7.16b, $h7.16b, #8
3070 ldr $h8q, [$current_tag, #208] @ load h8l | h8h
3071 ext $h8.16b, $h8.16b, $h8.16b, #8
3072 rev64 $res0b, $res0b @ GHASH block 8k
3073 ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0
3075 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 8k+14
3076 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+14
3077 ldr $h56kq, [$current_tag, #144] @ load h6k | h5k
3078 ldr $h78kq, [$current_tag, #192] @ load h8k | h7k
3080 rev64 $res3b, $res3b @ GHASH block 8k+3
3081 rev64 $res2b, $res2b @ GHASH block 8k+2
3082 ldr $h5q, [$current_tag, #128] @ load h5l | h5h
3083 ext $h5.16b, $h5.16b, $h5.16b, #8
3084 ldr $h6q, [$current_tag, #160] @ load h6l | h6h
3085 ext $h6.16b, $h6.16b, $h6.16b, #8
3087 eor $res0b, $res0b, $acc_lb @ PRE 1
3088 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 8k+15
3089 rev64 $res1b, $res1b @ GHASH block 8k+1
3091 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 0
3092 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 0
3093 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 0
3095 pmull2 $t0.1q, $res1.2d, $h7.2d @ GHASH block 8k+1 - high
3096 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 0
3097 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 0
3099 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 0
3100 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 0
3101 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH block 8k - high
3103 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 1
3104 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH block 8k - low
3105 trn1 $acc_m.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid
3107 trn2 $res0.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid
3108 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 0
3109 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3
3111 pmull $h7.1q, $res1.1d, $h7.1d @ GHASH block 8k+1 - low
3112 eor $acc_hb, $acc_hb, $t0.16b @ GHASH block 8k+1 - high
3113 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 1
3115 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 1
3116 eor $res0.16b, $res0.16b, $acc_m.16b @ GHASH block 8k, 8k+1 - mid
3117 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 1
3119 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 1
3120 pmull2 $t2.1q, $res3.2d, $h5.2d @ GHASH block 8k+3 - high
3121 pmull2 $t1.1q, $res2.2d, $h6.2d @ GHASH block 8k+2 - high
3123 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 1
3124 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 1
3125 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 1
3127 pmull $h6.1q, $res2.1d, $h6.1d @ GHASH block 8k+2 - low
3128 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 2
3129 eor $acc_lb, $acc_lb, $h7.16b @ GHASH block 8k+1 - low
3131 pmull $h5.1q, $res3.1d, $h5.1d @ GHASH block 8k+3 - low
3132 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 2
3133 eor3 $acc_hb, $acc_hb, $t1.16b, $t2.16b @ GHASH block 8k+2, 8k+3 - high
3135 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 3
3136 trn1 $t3.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid
3137 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 2
3139 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 2
3140 pmull2 $acc_m.1q, $res0.2d, $h78k.2d @ GHASH block 8k - mid
3141 trn2 $res2.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid
3143 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 2
3144 rev64 $res5b, $res5b @ GHASH block 8k+5 (t0, t1, t2 and t3 free)
3145 rev64 $res6b, $res6b @ GHASH block 8k+6 (t0, t1, and t2 free)
3147 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 2
3148 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 2
3149 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 2
3151 eor $res2.16b, $res2.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid
3152 pmull $h78k.1q, $res0.1d, $h78k.1d @ GHASH block 8k+1 - mid
3153 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5
3155 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 3
3156 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 3
3157 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 3
3159 eor $acc_mb, $acc_mb, $h78k.16b @ GHASH block 8k+1 - mid
3160 eor3 $acc_lb, $acc_lb, $h6.16b, $h5.16b @ GHASH block 8k+2, 8k+3 - low
3161 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 3
3163 ldr $h3q, [$current_tag, #80] @ load h3l | h3h
3164 ext $h3.16b, $h3.16b, $h3.16b, #8
3165 ldr $h4q, [$current_tag, #112] @ load h4l | h4h
3166 ext $h4.16b, $h4.16b, $h4.16b, #8
3167 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 3
3168 pmull2 $t3.1q, $res2.2d, $h56k.2d @ GHASH block 8k+2 - mid
3170 ldr $h1q, [$current_tag, #32] @ load h1l | h1h
3171 ext $h1.16b, $h1.16b, $h1.16b, #8
3172 ldr $h2q, [$current_tag, #64] @ load h2l | h2h
3173 ext $h2.16b, $h2.16b, $h2.16b, #8
3174 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 3
3175 rev64 $res4b, $res4b @ GHASH block 8k+4 (t0, t1, and t2 free)
3177 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 3
3178 pmull $h56k.1q, $res2.1d, $h56k.1d @ GHASH block 8k+3 - mid
3179 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 4
3181 trn1 $t6.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid
3182 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 4
3183 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 4
3185 eor3 $acc_mb, $acc_mb, $h56k.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid
3186 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 4
3187 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 4
3189 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 4
3190 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 4
3191 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 4
3193 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 5
3194 rev64 $res7b, $res7b @ GHASH block 8k+7 (t0, t1, t2 and t3 free)
3195 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k
3196 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k
3198 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 5
3199 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 5
3200 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7
3202 pmull2 $t7.1q, $res6.2d, $h2.2d @ GHASH block 8k+6 - high
3203 pmull2 $t4.1q, $res4.2d, $h4.2d @ GHASH block 8k+4 - high
3204 pmull $h4.1q, $res4.1d, $h4.1d @ GHASH block 8k+4 - low
3206 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 5
3207 trn2 $res4.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid
3209 pmull2 $t5.1q, $res5.2d, $h3.2d @ GHASH block 8k+5 - high
3210 pmull $h3.1q, $res5.1d, $h3.1d @ GHASH block 8k+5 - low
3211 pmull $h2.1q, $res6.1d, $h2.1d @ GHASH block 8k+6 - low
3213 trn1 $t9.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid
3214 eor $res4.16b, $res4.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid
3215 trn2 $res6.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid
3217 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 5
3218 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 6
3219 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 5
3221 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 5
3222 eor $res6.16b, $res6.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid
3223 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 5
3225 pmull2 $t6.1q, $res4.2d, $h34k.2d @ GHASH block 8k+4 - mid
3226 pmull $h34k.1q, $res4.1d, $h34k.1d @ GHASH block 8k+5 - mid
3228 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 6
3229 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 6
3230 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 7
3232 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 6
3233 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 6
3234 eor3 $acc_mb, $acc_mb, $h34k.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid
3236 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 6
3237 eor3 $acc_hb, $acc_hb, $t4.16b, $t5.16b @ GHASH block 8k+4, 8k+5 - high
3238 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 7
3240 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 6
3241 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant
3242 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 6
3244 pmull2 $t9.1q, $res6.2d, $h12k.2d @ GHASH block 8k+6 - mid
3245 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 7
3246 eor3 $acc_lb, $acc_lb, $h4.16b, $h3.16b @ GHASH block 8k+4, 8k+5 - low
3248 pmull2 $t8.1q, $res7.2d, $h1.2d @ GHASH block 8k+7 - high
3249 pmull $h12k.1q, $res6.1d, $h12k.1d @ GHASH block 8k+7 - mid
3250 pmull $h1.1q, $res7.1d, $h1.1d @ GHASH block 8k+7 - low
3252 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 7
3253 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 7
3254 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9
3256 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 7
3257 eor3 $acc_mb, $acc_mb, $h12k.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid
3259 eor3 $acc_lb, $acc_lb, $h2.16b, $h1.16b @ GHASH block 8k+6, 8k+7 - low
3260 eor3 $acc_hb, $acc_hb, $t7.16b, $t8.16b @ GHASH block 8k+6, 8k+7 - high
3262 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up
3263 ext $t12.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment
3264 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 7
3265 pmull $t11.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid
3267 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 8
3268 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 8
3270 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 7
3271 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 8
3272 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid
3274 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 8
3275 aese $ctr5b, $rk9 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 9
3276 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 8
3278 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 8
3279 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 8
3280 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 8
3282 aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 9
3283 ldp $rk10q, $rk11q, [$cc, #160] @ load rk10, rk11
3284 aese $ctr4b, $rk9 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 9
3286 aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 9
3287 aese $ctr7b, $rk9 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 9
3289 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment
3290 aese $ctr6b, $rk9 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 9
3291 aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 9
3292 aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 9
3294 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low
3295 ldr $rk12q, [$cc, #192] @ load rk12
3297 aese $ctr7b, $rk10 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 10
3298 aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 10
3299 aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 10
3301 eor3 $acc_lb, $acc_lb, $acc_hb, $t11.16b @ MODULO - fold into low
3302 aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 10
3303 aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 10
3305 aese $ctr1b, $rk11 @ AES block 8k+9 - round 11
3306 aese $ctr7b, $rk11 @ AES block 8k+15 - round 11
3308 aese $ctr4b, $rk10 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 10
3309 aese $ctr3b, $rk11 @ AES block 8k+11 - round 11
3311 aese $ctr5b, $rk10 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 10
3312 aese $ctr6b, $rk10 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 10
3314 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+15
3315 aese $ctr2b, $rk11 @ AES block 8k+10 - round 11
3316 aese $ctr0b, $rk11 @ AES block 8k+8 - round 11
3318 aese $ctr6b, $rk11 @ AES block 8k+14 - round 11
3319 aese $ctr4b, $rk11 @ AES block 8k+12 - round 11
3320 aese $ctr5b, $rk11 @ AES block 8k+13 - round 11
3322 .L192_enc_tail: @ TAIL
3324 ldp $h5q, $h56kq, [$current_tag, #128] @ load h5l | h5h
3325 ext $h5.16b, $h5.16b, $h5.16b, #8
3326 sub $main_end_input_ptr, $end_input_ptr, $input_ptr @ main_end_input_ptr is number of bytes left to process
3328 ldr $ctr_t0q, [$input_ptr], #16 @ AES block 8k+8 - l3ad plaintext
3330 ldp $h78kq, $h8q, [$current_tag, #192] @ load h8k | h7k
3331 ext $h8.16b, $h8.16b, $h8.16b, #8
3335 ldp $h6q, $h7q, [$current_tag, #160] @ load h6l | h6h
3336 ext $h6.16b, $h6.16b, $h6.16b, #8
3337 ext $h7.16b, $h7.16b, $h7.16b, #8
3338 cmp $main_end_input_ptr, #112
3340 eor3 $res1b, $ctr_t0b, $ctr0b, $t1.16b @ AES block 8k+8 - result
3341 ext $t0.16b, $acc_lb, $acc_lb, #8 @ prepare final partial tag
3342 b.gt .L192_enc_blocks_more_than_7
3344 cmp $main_end_input_ptr, #96
3350 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
3358 b.gt .L192_enc_blocks_more_than_6
3361 cmp $main_end_input_ptr, #80
3368 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
3369 b.gt .L192_enc_blocks_more_than_5
3371 cmp $main_end_input_ptr, #64
3372 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
3379 b.gt .L192_enc_blocks_more_than_4
3385 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
3386 cmp $main_end_input_ptr, #48
3387 b.gt .L192_enc_blocks_more_than_3
3391 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
3393 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k
3394 cmp $main_end_input_ptr, #32
3395 b.gt .L192_enc_blocks_more_than_2
3397 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
3399 cmp $main_end_input_ptr, #16
3401 b.gt .L192_enc_blocks_more_than_1
3403 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
3404 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k
3405 b .L192_enc_blocks_less_than_1
3406 .L192_enc_blocks_more_than_7: @ blocks left > 7
3407 st1 { $res1b}, [$output_ptr], #16 @ AES final-7 block - store result
3409 rev64 $res0b, $res1b @ GHASH final-7 block
3410 ins $acc_m.d[0], $h78k.d[1] @ GHASH final-7 block - mid
3412 eor $res0b, $res0b, $t0.16b @ feed in partial tag
3414 ins $rk4v.d[0], $res0.d[1] @ GHASH final-7 block - mid
3416 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-6 block - load plaintext
3418 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-7 block - mid
3419 movi $t0.8b, #0 @ suppress further partial tag feed in
3420 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH final-7 block - low
3422 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH final-7 block - high
3424 pmull $acc_m.1q, $rk4v.1d, $acc_m.1d @ GHASH final-7 block - mid
3425 eor3 $res1b, $ctr_t1b, $ctr1b, $t1.16b @ AES final-6 block - result
3426 .L192_enc_blocks_more_than_6: @ blocks left > 6
3428 st1 { $res1b}, [$output_ptr], #16 @ AES final-6 block - store result
3430 rev64 $res0b, $res1b @ GHASH final-6 block
3432 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-5 block - load plaintext
3434 eor $res0b, $res0b, $t0.16b @ feed in partial tag
3436 ins $rk4v.d[0], $res0.d[1] @ GHASH final-6 block - mid
3438 pmull $rk3q1, $res0.1d, $h7.1d @ GHASH final-6 block - low
3439 eor3 $res1b, $ctr_t1b, $ctr2b, $t1.16b @ AES final-5 block - result
3441 movi $t0.8b, #0 @ suppress further partial tag feed in
3442 pmull2 $rk2q1, $res0.2d, $h7.2d @ GHASH final-6 block - high
3443 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-6 block - mid
3445 pmull $rk4v.1q, $rk4v.1d, $h78k.1d @ GHASH final-6 block - mid
3447 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-6 block - high
3448 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-6 block - low
3450 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-6 block - mid
3451 .L192_enc_blocks_more_than_5: @ blocks left > 5
3453 st1 { $res1b}, [$output_ptr], #16 @ AES final-5 block - store result
3455 rev64 $res0b, $res1b @ GHASH final-5 block
3457 eor $res0b, $res0b, $t0.16b @ feed in partial tag
3459 ins $rk4v.d[0], $res0.d[1] @ GHASH final-5 block - mid
3461 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-4 block - load plaintext
3462 pmull2 $rk2q1, $res0.2d, $h6.2d @ GHASH final-5 block - high
3464 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-5 block - mid
3465 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-5 block - high
3467 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-5 block - mid
3468 pmull $rk3q1, $res0.1d, $h6.1d @ GHASH final-5 block - low
3470 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-5 block - low
3471 pmull2 $rk4v.1q, $rk4v.2d, $h56k.2d @ GHASH final-5 block - mid
3473 eor3 $res1b, $ctr_t1b, $ctr3b, $t1.16b @ AES final-4 block - result
3474 movi $t0.8b, #0 @ suppress further partial tag feed in
3476 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-5 block - mid
3477 .L192_enc_blocks_more_than_4: @ blocks left > 4
3479 st1 { $res1b}, [$output_ptr], #16 @ AES final-4 block - store result
3481 rev64 $res0b, $res1b @ GHASH final-4 block
3483 eor $res0b, $res0b, $t0.16b @ feed in partial tag
3485 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-3 block - load plaintext
3486 pmull2 $rk2q1, $res0.2d, $h5.2d @ GHASH final-4 block - high
3487 ins $rk4v.d[0], $res0.d[1] @ GHASH final-4 block - mid
3489 pmull $rk3q1, $res0.1d, $h5.1d @ GHASH final-4 block - low
3490 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-4 block - high
3492 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-4 block - mid
3494 movi $t0.8b, #0 @ suppress further partial tag feed in
3495 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-4 block - low
3497 pmull $rk4v.1q, $rk4v.1d, $h56k.1d @ GHASH final-4 block - mid
3499 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-4 block - mid
3500 eor3 $res1b, $ctr_t1b, $ctr4b, $t1.16b @ AES final-3 block - result
3501 .L192_enc_blocks_more_than_3: @ blocks left > 3
3503 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k
3504 st1 { $res1b}, [$output_ptr], #16 @ AES final-3 block - store result
3506 rev64 $res0b, $res1b @ GHASH final-3 block
3508 eor $res0b, $res0b, $t0.16b @ feed in partial tag
3509 movi $t0.8b, #0 @ suppress further partial tag feed in
3511 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-2 block - load plaintext
3512 ldr $h4q, [$current_tag, #112] @ load h4l | h4h
3513 ext $h4.16b, $h4.16b, $h4.16b, #8
3515 ins $rk4v.d[0], $res0.d[1] @ GHASH final-3 block - mid
3517 eor3 $res1b, $ctr_t1b, $ctr5b, $t1.16b @ AES final-2 block - result
3518 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-3 block - mid
3520 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-3 block - mid
3521 pmull $rk3q1, $res0.1d, $h4.1d @ GHASH final-3 block - low
3523 pmull2 $rk2q1, $res0.2d, $h4.2d @ GHASH final-3 block - high
3524 pmull2 $rk4v.1q, $rk4v.2d, $h34k.2d @ GHASH final-3 block - mid
3526 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-3 block - low
3528 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-3 block - mid
3529 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-3 block - high
3530 .L192_enc_blocks_more_than_2: @ blocks left > 2
3532 st1 { $res1b}, [$output_ptr], #16 @ AES final-2 block - store result
3534 rev64 $res0b, $res1b @ GHASH final-2 block
3535 ldr $h3q, [$current_tag, #80] @ load h3l | h3h
3536 ext $h3.16b, $h3.16b, $h3.16b, #8
3538 eor $res0b, $res0b, $t0.16b @ feed in partial tag
3540 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-1 block - load plaintext
3541 ins $rk4v.d[0], $res0.d[1] @ GHASH final-2 block - mid
3543 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-2 block - mid
3545 pmull $rk3q1, $res0.1d, $h3.1d @ GHASH final-2 block - low
3546 pmull2 $rk2q1, $res0.2d, $h3.2d @ GHASH final-2 block - high
3547 movi $t0.8b, #0 @ suppress further partial tag feed in
3549 pmull $rk4v.1q, $rk4v.1d, $h34k.1d @ GHASH final-2 block - mid
3551 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-2 block - low
3552 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-2 block - high
3554 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-2 block - mid
3555 eor3 $res1b, $ctr_t1b, $ctr6b, $t1.16b @ AES final-1 block - result
3556 .L192_enc_blocks_more_than_1: @ blocks left > 1
3558 ldr $h2q, [$current_tag, #64] @ load h1l | h1h
3559 ext $h2.16b, $h2.16b, $h2.16b, #8
3560 st1 { $res1b}, [$output_ptr], #16 @ AES final-1 block - store result
3562 rev64 $res0b, $res1b @ GHASH final-1 block
3564 eor $res0b, $res0b, $t0.16b @ feed in partial tag
3566 ins $rk4v.d[0], $res0.d[1] @ GHASH final-1 block - mid
3567 pmull $rk3q1, $res0.1d, $h2.1d @ GHASH final-1 block - low
3569 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-1 block - low
3570 pmull2 $rk2q1, $res0.2d, $h2.2d @ GHASH final-1 block - high
3571 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-1 block - mid
3573 ldr $ctr_t1q, [$input_ptr], #16 @ AES final block - load plaintext
3574 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k
3576 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-1 block - mid
3578 eor3 $res1b, $ctr_t1b, $ctr7b, $t1.16b @ AES final block - result
3579 pmull2 $rk4v.1q, $rk4v.2d, $h12k.2d @ GHASH final-1 block - mid
3581 movi $t0.8b, #0 @ suppress further partial tag feed in
3583 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-1 block - mid
3584 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-1 block - high
3585 .L192_enc_blocks_less_than_1: @ blocks left <= 1
3587 mvn $temp0_x, xzr @ temp0_x = 0xffffffffffffffff
3588 and $bit_length, $bit_length, #127 @ bit_length %= 128
3590 sub $bit_length, $bit_length, #128 @ bit_length -= 128
3592 neg $bit_length, $bit_length @ bit_length = 128 - #bits in input (in range [1,128])
3594 and $bit_length, $bit_length, #127 @ bit_length %= 128
3596 lsr $temp0_x, $temp0_x, $bit_length @ temp0_x is mask for top 64b of last block
3597 cmp $bit_length, #64
3598 mvn $temp1_x, xzr @ temp1_x = 0xffffffffffffffff
3600 csel $temp2_x, $temp1_x, $temp0_x, lt
3601 csel $temp3_x, $temp0_x, xzr, lt
3603 mov $ctr0.d[1], $temp3_x
3604 ldr $h1q, [$current_tag, #32] @ load h1l | h1h
3605 ext $h1.16b, $h1.16b, $h1.16b, #8
3607 ld1 { $rk0}, [$output_ptr] @ load existing bytes where the possibly partial last block is to be stored
3608 mov $ctr0.d[0], $temp2_x @ ctr0b is mask for last block
3610 and $res1b, $res1b, $ctr0b @ possibly partial last block has zeroes in highest bits
3612 rev64 $res0b, $res1b @ GHASH final block
3613 bif $res1b, $rk0, $ctr0b @ insert existing bytes in top end of result before storing
3615 st1 { $res1b}, [$output_ptr] @ store all 16B
3617 eor $res0b, $res0b, $t0.16b @ feed in partial tag
3619 ins $t0.d[0], $res0.d[1] @ GHASH final block - mid
3620 pmull2 $rk2q1, $res0.2d, $h1.2d @ GHASH final block - high
3622 eor $acc_hb, $acc_hb, $rk2 @ GHASH final block - high
3623 pmull $rk3q1, $res0.1d, $h1.1d @ GHASH final block - low
3625 eor $t0.8b, $t0.8b, $res0.8b @ GHASH final block - mid
3627 pmull $t0.1q, $t0.1d, $h12k.1d @ GHASH final block - mid
3629 eor $acc_mb, $acc_mb, $t0.16b @ GHASH final block - mid
3630 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant
3632 eor $acc_lb, $acc_lb, $rk3 @ GHASH final block - low
3633 ext $t11.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment
3635 rev32 $rtmp_ctr.16b, $rtmp_ctr.16b
3637 str $rtmp_ctrq, [$counter] @ store the updated counter
3638 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up
3640 pmull $t12.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid
3642 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid
3644 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low
3645 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment
3647 eor3 $acc_lb, $acc_lb, $acc_hb, $t11.16b @ MODULO - fold into low
3648 ext $acc_lb, $acc_lb, $acc_lb, #8
3649 rev64 $acc_lb, $acc_lb
3650 st1 { $acc_l.16b }, [$current_tag]
3652 mov x0, $byte_length @ return sizes
3654 ldp d10, d11, [sp, #16]
3655 ldp d12, d13, [sp, #32]
3656 ldp d14, d15, [sp, #48]
3657 ldp d8, d9, [sp], #80
3663 .size unroll8_eor3_aes_gcm_enc_192_kernel,.-unroll8_eor3_aes_gcm_enc_192_kernel
3666 #########################################################################################
3667 # size_t unroll8_eor3_aes_gcm_dec_192_kernel(const uint8_t * ciphertext,
3668 # uint64_t plaintext_length,
3669 # uint8_t * plaintext,
3671 # unsigned char ivec[16],
3675 .global unroll8_eor3_aes_gcm_dec_192_kernel
3676 .type unroll8_eor3_aes_gcm_dec_192_kernel,%function
3678 unroll8_eor3_aes_gcm_dec_192_kernel:
3679 AARCH64_VALID_CALL_TARGET
3680 cbz x1, .L192_dec_ret
3681 stp d8, d9, [sp, #-80]!
3682 lsr $byte_length, $bit_length, #3
3685 stp d10, d11, [sp, #16]
3686 stp d12, d13, [sp, #32]
3687 stp d14, d15, [sp, #48]
3688 mov x5, #0xc200000000000000
3689 stp x5, xzr, [sp, #64]
3690 add $modulo_constant, sp, #64
3692 mov $main_end_input_ptr, $byte_length
3693 ld1 { $ctr0b}, [$counter] @ CTR block 0
3694 ld1 { $acc_lb}, [$current_tag]
3696 mov $constant_temp, #0x100000000 @ set up counter increment
3697 movi $rctr_inc.16b, #0x0
3698 mov $rctr_inc.d[1], $constant_temp
3700 rev32 $rtmp_ctr.16b, $ctr0.16b @ set up reversed counter
3702 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 0
3704 rev32 $ctr1.16b, $rtmp_ctr.16b @ CTR block 1
3705 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 1
3707 rev32 $ctr2.16b, $rtmp_ctr.16b @ CTR block 2
3708 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 2
3710 rev32 $ctr3.16b, $rtmp_ctr.16b @ CTR block 3
3711 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 3
3713 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 4
3714 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 4
3716 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 5
3717 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 5
3718 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1
3720 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 6
3721 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 6
3723 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 7
3725 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 0
3726 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 0
3727 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 0
3729 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 0
3730 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 0
3731 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 0
3733 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 0
3734 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 0
3735 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3
3737 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 1
3739 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 1
3741 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 1
3742 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 1
3743 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 1
3745 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 1
3746 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 1
3748 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 2
3749 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 2
3750 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 1
3752 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 2
3753 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 2
3754 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 2
3756 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 2
3757 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 2
3758 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 2
3760 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 3
3762 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5
3763 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 3
3764 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 3
3766 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 3
3767 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 3
3769 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 3
3770 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 3
3771 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 3
3773 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 4
3774 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 4
3775 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 4
3777 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 4
3778 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 4
3779 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 4
3781 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 4
3782 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 5
3783 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 4
3785 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 5
3786 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7
3788 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 5
3789 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 5
3790 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 5
3792 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 5
3793 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 5
3794 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 5
3796 sub $main_end_input_ptr, $main_end_input_ptr, #1 @ byte_len - 1
3798 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 6
3799 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 6
3800 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 6
3802 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 6
3803 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 6
3804 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 6
3806 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 6
3807 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 6
3808 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9
3810 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 7
3812 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 7
3813 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 7
3815 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 7
3816 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 7
3817 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 7
3819 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 7
3820 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 7
3821 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 7
3823 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 8
3824 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 8
3825 and $main_end_input_ptr, $main_end_input_ptr, #0xffffffffffffff80 @ number of bytes to be processed in main loop (at least 1 byte must be handled by tail)
3827 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 8
3828 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 8
3829 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 8
3831 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 8
3832 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 8
3833 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 8
3835 add $end_input_ptr, $input_ptr, $bit_length, lsr #3 @ end_input_ptr
3836 aese $ctr6b, $rk9 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 9
3838 ld1 { $acc_lb}, [$current_tag]
3839 ext $acc_lb, $acc_lb, $acc_lb, #8
3840 rev64 $acc_lb, $acc_lb
3842 ldp $rk10q, $rk11q, [$cc, #160] @ load rk10, rk11
3844 aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 9
3845 add $main_end_input_ptr, $main_end_input_ptr, $input_ptr
3847 aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 9
3848 aese $ctr7b, $rk9 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 9
3849 aese $ctr4b, $rk9 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 9
3851 cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks
3852 aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 9
3854 aese $ctr5b, $rk9 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 9
3855 aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 9
3857 aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 10
3858 aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 10
3859 aese $ctr7b, $rk10 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 10
3861 aese $ctr4b, $rk10 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 10
3862 aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 10
3863 aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 10
3865 aese $ctr6b, $rk10 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 10
3866 aese $ctr5b, $rk10 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 10
3867 ldr $rk12q, [$cc, #192] @ load rk12
3869 aese $ctr0b, $rk11 @ AES block 0 - round 11
3870 aese $ctr1b, $rk11 @ AES block 1 - round 11
3871 aese $ctr4b, $rk11 @ AES block 4 - round 11
3873 aese $ctr6b, $rk11 @ AES block 6 - round 11
3874 aese $ctr5b, $rk11 @ AES block 5 - round 11
3875 aese $ctr7b, $rk11 @ AES block 7 - round 11
3877 aese $ctr2b, $rk11 @ AES block 2 - round 11
3878 aese $ctr3b, $rk11 @ AES block 3 - round 11
3879 b.ge .L192_dec_tail @ handle tail
3881 ldp $res0q, $res1q, [$input_ptr], #32 @ AES block 0, 1 - load ciphertext
3883 ldp $res2q, $res3q, [$input_ptr], #32 @ AES block 2, 3 - load ciphertext
3885 ldp $res4q, $res5q, [$input_ptr], #32 @ AES block 4, 5 - load ciphertext
3887 eor3 $ctr1b, $res1b, $ctr1b, $rk12 @ AES block 1 - result
3888 eor3 $ctr0b, $res0b, $ctr0b, $rk12 @ AES block 0 - result
3889 stp $ctr0q, $ctr1q, [$output_ptr], #32 @ AES block 0, 1 - store result
3891 rev32 $ctr0.16b, $rtmp_ctr.16b @ CTR block 8
3892 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8
3894 rev32 $ctr1.16b, $rtmp_ctr.16b @ CTR block 9
3895 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 9
3896 eor3 $ctr3b, $res3b, $ctr3b, $rk12 @ AES block 3 - result
3898 eor3 $ctr2b, $res2b, $ctr2b, $rk12 @ AES block 2 - result
3899 stp $ctr2q, $ctr3q, [$output_ptr], #32 @ AES block 2, 3 - store result
3900 ldp $res6q, $res7q, [$input_ptr], #32 @ AES block 6, 7 - load ciphertext
3902 rev32 $ctr2.16b, $rtmp_ctr.16b @ CTR block 10
3903 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 10
3905 eor3 $ctr4b, $res4b, $ctr4b, $rk12 @ AES block 4 - result
3907 rev32 $ctr3.16b, $rtmp_ctr.16b @ CTR block 11
3908 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 11
3910 eor3 $ctr5b, $res5b, $ctr5b, $rk12 @ AES block 5 - result
3911 stp $ctr4q, $ctr5q, [$output_ptr], #32 @ AES block 4, 5 - store result
3912 cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks
3914 eor3 $ctr6b, $res6b, $ctr6b, $rk12 @ AES block 6 - result
3915 eor3 $ctr7b, $res7b, $ctr7b, $rk12 @ AES block 7 - result
3916 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 12
3918 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 12
3919 stp $ctr6q, $ctr7q, [$output_ptr], #32 @ AES block 6, 7 - store result
3920 b.ge .L192_dec_prepretail @ do prepretail
3922 .L192_dec_main_loop: @ main loop start
3923 rev64 $res1b, $res1b @ GHASH block 8k+1
3924 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1
3925 ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0
3927 rev64 $res0b, $res0b @ GHASH block 8k
3928 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 8k+13
3929 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+13
3931 ldr $h7q, [$current_tag, #176] @ load h7l | h7h
3932 ext $h7.16b, $h7.16b, $h7.16b, #8
3933 ldr $h8q, [$current_tag, #208] @ load h8l | h8h
3934 ext $h8.16b, $h8.16b, $h8.16b, #8
3935 rev64 $res4b, $res4b @ GHASH block 8k+4
3936 rev64 $res3b, $res3b @ GHASH block 8k+3
3938 eor $res0b, $res0b, $acc_lb @ PRE 1
3939 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 8k+14
3940 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+14
3942 rev64 $res5b, $res5b @ GHASH block 8k+5
3944 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 8k+15
3945 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 0
3946 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 0
3948 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 0
3949 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 0
3950 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 0
3952 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 0
3953 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 0
3954 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 0
3956 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH block 8k - low
3957 pmull2 $t0.1q, $res1.2d, $h7.2d @ GHASH block 8k+1 - high
3958 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3
3960 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 1
3961 pmull $h7.1q, $res1.1d, $h7.1d @ GHASH block 8k+1 - low
3962 ldr $h5q, [$current_tag, #128] @ load h5l | h5h
3963 ext $h5.16b, $h5.16b, $h5.16b, #8
3964 ldr $h6q, [$current_tag, #160] @ load h6l | h6h
3965 ext $h6.16b, $h6.16b, $h6.16b, #8
3967 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 1
3968 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 1
3969 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 1
3971 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH block 8k - high
3972 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 1
3973 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 1
3975 trn1 $acc_m.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid
3976 rev64 $res2b, $res2b @ GHASH block 8k+2
3977 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 1
3979 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 1
3980 ldr $h56kq, [$current_tag, #144] @ load h6k | h5k
3981 ldr $h78kq, [$current_tag, #192] @ load h8k | h7k
3982 trn2 $res0.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid
3984 eor $acc_hb, $acc_hb, $t0.16b @ GHASH block 8k+1 - high
3985 pmull2 $t2.1q, $res3.2d, $h5.2d @ GHASH block 8k+3 - high
3986 pmull2 $t1.1q, $res2.2d, $h6.2d @ GHASH block 8k+2 - high
3988 eor $res0.16b, $res0.16b, $acc_m.16b @ GHASH block 8k, 8k+1 - mid
3989 eor $acc_lb, $acc_lb, $h7.16b @ GHASH block 8k+1 - low
3990 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 2
3992 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 2
3993 pmull $h5.1q, $res3.1d, $h5.1d @ GHASH block 8k+3 - low
3994 eor3 $acc_hb, $acc_hb, $t1.16b, $t2.16b @ GHASH block 8k+2, 8k+3 - high
3996 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 2
3997 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 3
3998 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 2
4000 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 2
4001 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 2
4002 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 2
4004 ldr $h3q, [$current_tag, #80] @ load h3l | h3h
4005 ext $h3.16b, $h3.16b, $h3.16b, #8
4006 ldr $h4q, [$current_tag, #112] @ load h4l | h4h
4007 ext $h4.16b, $h4.16b, $h4.16b, #8
4008 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 2
4009 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 3
4011 pmull $h6.1q, $res2.1d, $h6.1d @ GHASH block 8k+2 - low
4012 trn1 $t3.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid
4013 trn2 $res2.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid
4015 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 3
4016 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 3
4018 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 3
4019 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 3
4020 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5
4022 eor $res2.16b, $res2.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid
4023 eor3 $acc_lb, $acc_lb, $h6.16b, $h5.16b @ GHASH block 8k+2, 8k+3 - low
4024 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 3
4026 trn1 $t6.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid
4027 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+15
4029 pmull2 $t3.1q, $res2.2d, $h56k.2d @ GHASH block 8k+2 - mid
4030 pmull2 $acc_m.1q, $res0.2d, $h78k.2d @ GHASH block 8k - mid
4031 pmull $h78k.1q, $res0.1d, $h78k.1d @ GHASH block 8k+1 - mid
4033 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 3
4034 pmull $h56k.1q, $res2.1d, $h56k.1d @ GHASH block 8k+3 - mid
4035 pmull2 $t4.1q, $res4.2d, $h4.2d @ GHASH block 8k+4 - high
4037 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 4
4038 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 4
4039 eor $acc_mb, $acc_mb, $h78k.16b @ GHASH block 8k+1 - mid
4041 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 4
4042 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 4
4043 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 4
4045 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 4
4046 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 4
4047 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 4
4049 ldr $h1q, [$current_tag, #32] @ load h1l | h1h
4050 ext $h1.16b, $h1.16b, $h1.16b, #8
4051 ldr $h2q, [$current_tag, #64] @ load h2l | h2h
4052 ext $h2.16b, $h2.16b, $h2.16b, #8
4053 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 5
4054 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 5
4056 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7
4057 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 5
4058 rev64 $res7b, $res7b @ GHASH block 8k+7
4060 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 5
4061 eor3 $acc_mb, $acc_mb, $h56k.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid
4062 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 5
4064 pmull $h4.1q, $res4.1d, $h4.1d @ GHASH block 8k+4 - low
4065 trn2 $res4.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid
4066 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 5
4068 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 5
4069 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 5
4070 rev64 $res6b, $res6b @ GHASH block 8k+6
4072 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k
4073 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k
4074 pmull2 $t5.1q, $res5.2d, $h3.2d @ GHASH block 8k+5 - high
4075 pmull $h3.1q, $res5.1d, $h3.1d @ GHASH block 8k+5 - low
4077 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 6
4078 eor $res4.16b, $res4.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid
4079 trn1 $t9.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid
4081 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 6
4082 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 6
4083 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 6
4085 pmull2 $t7.1q, $res6.2d, $h2.2d @ GHASH block 8k+6 - high
4086 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 6
4087 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 6
4089 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 7
4090 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 7
4091 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 6
4093 pmull2 $t6.1q, $res4.2d, $h34k.2d @ GHASH block 8k+4 - mid
4094 eor3 $acc_hb, $acc_hb, $t4.16b, $t5.16b @ GHASH block 8k+4, 8k+5 - high
4095 eor3 $acc_lb, $acc_lb, $h4.16b, $h3.16b @ GHASH block 8k+4, 8k+5 - low
4097 pmull $h2.1q, $res6.1d, $h2.1d @ GHASH block 8k+6 - low
4098 trn2 $res6.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid
4099 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 6
4101 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 7
4102 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9
4103 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 7
4105 eor $res6.16b, $res6.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid
4106 pmull $h34k.1q, $res4.1d, $h34k.1d @ GHASH block 8k+5 - mid
4107 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 7
4109 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 7
4110 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 7
4111 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 7
4113 eor3 $acc_mb, $acc_mb, $h34k.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid
4114 pmull2 $t9.1q, $res6.2d, $h12k.2d @ GHASH block 8k+6 - mid
4115 pmull2 $t8.1q, $res7.2d, $h1.2d @ GHASH block 8k+7 - high
4117 pmull $h12k.1q, $res6.1d, $h12k.1d @ GHASH block 8k+7 - mid
4118 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant
4119 pmull $h1.1q, $res7.1d, $h1.1d @ GHASH block 8k+7 - low
4121 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 8
4122 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 8
4123 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 8
4125 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 8
4126 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 8
4127 eor3 $acc_lb, $acc_lb, $h2.16b, $h1.16b @ GHASH block 8k+6, 8k+7 - low
4129 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 8
4130 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 8
4131 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 8
4133 eor3 $acc_hb, $acc_hb, $t7.16b, $t8.16b @ GHASH block 8k+6, 8k+7 - high
4134 rev32 $h1.16b, $rtmp_ctr.16b @ CTR block 8k+16
4135 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+16
4137 aese $ctr5b, $rk9 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 9
4138 eor3 $acc_mb, $acc_mb, $h12k.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid
4139 aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 9
4141 aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 9
4142 aese $ctr7b, $rk9 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 9
4143 ldp $rk10q, $rk11q, [$cc, #160] @ load rk10, rk11
4145 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up
4146 ldp $res0q, $res1q, [$input_ptr], #32 @ AES block 8k+8, 8k+9 - load ciphertext
4148 aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 9
4149 aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 9
4150 ldp $res2q, $res3q, [$input_ptr], #32 @ AES block 8k+10, 8k+11 - load ciphertext
4152 rev32 $h2.16b, $rtmp_ctr.16b @ CTR block 8k+17
4153 pmull $t12.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid
4154 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+17
4156 aese $ctr6b, $rk9 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 9
4157 aese $ctr4b, $rk9 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 9
4158 ext $t11.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment
4160 aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 10
4161 aese $ctr7b, $rk10 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 10
4162 ldp $res4q, $res5q, [$input_ptr], #32 @ AES block 8k+12, 8k+13 - load ciphertext
4164 rev32 $h3.16b, $rtmp_ctr.16b @ CTR block 8k+18
4165 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+18
4166 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid
4168 aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 10
4169 aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 10
4170 ldr $rk12q, [$cc, #192] @ load rk12
4172 ldp $res6q, $res7q, [$input_ptr], #32 @ AES block 8k+14, 8k+15 - load ciphertext
4173 aese $ctr4b, $rk10 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 10
4174 aese $ctr6b, $rk10 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 10
4176 aese $ctr0b, $rk11 @ AES block 8k+8 - round 11
4177 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment
4178 aese $ctr1b, $rk11 @ AES block 8k+9 - round 11
4180 aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 10
4181 aese $ctr6b, $rk11 @ AES block 8k+14 - round 11
4182 aese $ctr3b, $rk11 @ AES block 8k+11 - round 11
4184 eor3 $ctr0b, $res0b, $ctr0b, $rk12 @ AES block 8k+8 - result
4185 rev32 $h4.16b, $rtmp_ctr.16b @ CTR block 8k+19
4186 aese $ctr5b, $rk10 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 10
4188 aese $ctr4b, $rk11 @ AES block 8k+12 - round 11
4189 aese $ctr2b, $rk11 @ AES block 8k+10 - round 11
4190 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+19
4192 aese $ctr7b, $rk11 @ AES block 8k+15 - round 11
4193 aese $ctr5b, $rk11 @ AES block 8k+13 - round 11
4194 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low
4196 eor3 $ctr1b, $res1b, $ctr1b, $rk12 @ AES block 8k+9 - result
4197 stp $ctr0q, $ctr1q, [$output_ptr], #32 @ AES block 8k+8, 8k+9 - store result
4198 eor3 $ctr3b, $res3b, $ctr3b, $rk12 @ AES block 8k+11 - result
4200 eor3 $ctr2b, $res2b, $ctr2b, $rk12 @ AES block 8k+10 - result
4201 eor3 $ctr7b, $res7b, $ctr7b, $rk12 @ AES block 8k+15 - result
4202 stp $ctr2q, $ctr3q, [$output_ptr], #32 @ AES block 8k+10, 8k+11 - store result
4204 eor3 $ctr5b, $res5b, $ctr5b, $rk12 @ AES block 8k+13 - result
4205 eor3 $acc_lb, $acc_lb, $acc_hb, $t11.16b @ MODULO - fold into low
4206 mov $ctr3.16b, $h4.16b @ CTR block 8k+19
4208 eor3 $ctr4b, $res4b, $ctr4b, $rk12 @ AES block 8k+12 - result
4209 stp $ctr4q, $ctr5q, [$output_ptr], #32 @ AES block 8k+12, 8k+13 - store result
4210 cmp $input_ptr, $main_end_input_ptr @ LOOP CONTROL
4212 eor3 $ctr6b, $res6b, $ctr6b, $rk12 @ AES block 8k+14 - result
4213 stp $ctr6q, $ctr7q, [$output_ptr], #32 @ AES block 8k+14, 8k+15 - store result
4214 mov $ctr0.16b, $h1.16b @ CTR block 8k+16
4216 mov $ctr1.16b, $h2.16b @ CTR block 8k+17
4217 mov $ctr2.16b, $h3.16b @ CTR block 8k+18
4219 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 8k+20
4220 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+20
4221 b.lt .L192_dec_main_loop
4223 .L192_dec_prepretail: @ PREPRETAIL
4224 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1
4225 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 8k+13
4226 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+13
4228 ldr $h7q, [$current_tag, #176] @ load h7l | h7h
4229 ext $h7.16b, $h7.16b, $h7.16b, #8
4230 ldr $h8q, [$current_tag, #208] @ load h8l | h8h
4231 ext $h8.16b, $h8.16b, $h8.16b, #8
4232 rev64 $res0b, $res0b @ GHASH block 8k
4233 ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0
4235 rev64 $res3b, $res3b @ GHASH block 8k+3
4236 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 8k+14
4237 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+14
4239 eor $res0b, $res0b, $acc_lb @ PRE 1
4240 rev64 $res2b, $res2b @ GHASH block 8k+2
4241 rev64 $res1b, $res1b @ GHASH block 8k+1
4243 ldr $h5q, [$current_tag, #128] @ load h5l | h5h
4244 ext $h5.16b, $h5.16b, $h5.16b, #8
4245 ldr $h6q, [$current_tag, #160] @ load h6l | h6h
4246 ext $h6.16b, $h6.16b, $h6.16b, #8
4247 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 8k+15
4249 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 0
4250 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 0
4251 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 0
4253 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 0
4254 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 0
4255 pmull2 $t0.1q, $res1.2d, $h7.2d @ GHASH block 8k+1 - high
4257 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 0
4258 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH block 8k - high
4259 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 0
4261 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 1
4262 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 0
4263 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3
4265 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 1
4266 pmull2 $t1.1q, $res2.2d, $h6.2d @ GHASH block 8k+2 - high
4267 pmull $h6.1q, $res2.1d, $h6.1d @ GHASH block 8k+2 - low
4269 pmull $h7.1q, $res1.1d, $h7.1d @ GHASH block 8k+1 - low
4270 eor $acc_hb, $acc_hb, $t0.16b @ GHASH block 8k+1 - high
4271 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 1
4273 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH block 8k - low
4274 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 1
4275 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 1
4277 trn1 $acc_m.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid
4278 trn2 $res0.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid
4279 pmull2 $t2.1q, $res3.2d, $h5.2d @ GHASH block 8k+3 - high
4281 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 1
4282 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 1
4283 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 1
4285 ldr $h56kq, [$current_tag, #144] @ load h6k | h5k
4286 ldr $h78kq, [$current_tag, #192] @ load h8k | h7k
4287 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 2
4288 eor $res0.16b, $res0.16b, $acc_m.16b @ GHASH block 8k, 8k+1 - mid
4290 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 2
4291 rev64 $res5b, $res5b @ GHASH block 8k+5
4292 pmull $h5.1q, $res3.1d, $h5.1d @ GHASH block 8k+3 - low
4294 eor3 $acc_hb, $acc_hb, $t1.16b, $t2.16b @ GHASH block 8k+2, 8k+3 - high
4295 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 2
4296 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 2
4298 trn1 $t3.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid
4299 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 3
4300 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 2
4302 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 2
4303 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 2
4304 trn2 $res2.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid
4306 pmull2 $acc_m.1q, $res0.2d, $h78k.2d @ GHASH block 8k - mid
4307 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 2
4308 pmull $h78k.1q, $res0.1d, $h78k.1d @ GHASH block 8k+1 - mid
4310 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 3
4311 eor $res2.16b, $res2.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid
4312 eor $acc_lb, $acc_lb, $h7.16b @ GHASH block 8k+1 - low
4314 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 3
4315 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 3
4316 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 3
4318 eor3 $acc_lb, $acc_lb, $h6.16b, $h5.16b @ GHASH block 8k+2, 8k+3 - low
4319 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5
4320 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 3
4322 ldr $h3q, [$current_tag, #80] @ load h3l | h3h
4323 ext $h3.16b, $h3.16b, $h3.16b, #8
4324 ldr $h4q, [$current_tag, #112] @ load h4l | h4h
4325 ext $h4.16b, $h4.16b, $h4.16b, #8
4326 pmull2 $t3.1q, $res2.2d, $h56k.2d @ GHASH block 8k+2 - mid
4327 pmull $h56k.1q, $res2.1d, $h56k.1d @ GHASH block 8k+3 - mid
4329 ldr $h1q, [$current_tag, #32] @ load h1l | h1h
4330 ext $h1.16b, $h1.16b, $h1.16b, #8
4331 ldr $h2q, [$current_tag, #64] @ load h2l | h2h
4332 ext $h2.16b, $h2.16b, $h2.16b, #8
4333 eor $acc_mb, $acc_mb, $h78k.16b @ GHASH block 8k+1 - mid
4334 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 3
4336 rev64 $res7b, $res7b @ GHASH block 8k+7
4338 eor3 $acc_mb, $acc_mb, $h56k.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid
4339 rev64 $res4b, $res4b @ GHASH block 8k+4
4341 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 4
4342 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 4
4343 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 3
4345 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 4
4346 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 4
4347 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 4
4349 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 4
4350 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 4
4351 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 4
4353 rev64 $res6b, $res6b @ GHASH block 8k+6
4354 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k
4355 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k
4356 trn1 $t6.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid
4358 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 5
4359 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 5
4360 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 5
4362 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7
4363 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 5
4364 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 5
4366 pmull2 $t7.1q, $res6.2d, $h2.2d @ GHASH block 8k+6 - high
4367 pmull2 $t4.1q, $res4.2d, $h4.2d @ GHASH block 8k+4 - high
4368 pmull $h2.1q, $res6.1d, $h2.1d @ GHASH block 8k+6 - low
4370 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 5
4372 pmull $h4.1q, $res4.1d, $h4.1d @ GHASH block 8k+4 - low
4373 trn2 $res4.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid
4374 pmull2 $t5.1q, $res5.2d, $h3.2d @ GHASH block 8k+5 - high
4376 pmull $h3.1q, $res5.1d, $h3.1d @ GHASH block 8k+5 - low
4377 trn1 $t9.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid
4378 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 5
4380 trn2 $res6.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid
4381 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 5
4382 eor $res4.16b, $res4.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid
4384 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 6
4385 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 6
4387 eor $res6.16b, $res6.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid
4388 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 6
4389 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 6
4391 pmull2 $t6.1q, $res4.2d, $h34k.2d @ GHASH block 8k+4 - mid
4392 pmull $h34k.1q, $res4.1d, $h34k.1d @ GHASH block 8k+5 - mid
4393 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 6
4395 pmull2 $t9.1q, $res6.2d, $h12k.2d @ GHASH block 8k+6 - mid
4396 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 6
4397 pmull2 $t8.1q, $res7.2d, $h1.2d @ GHASH block 8k+7 - high
4399 eor3 $acc_mb, $acc_mb, $h34k.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid
4400 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 7
4401 eor3 $acc_lb, $acc_lb, $h4.16b, $h3.16b @ GHASH block 8k+4, 8k+5 - low
4403 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 6
4404 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 6
4405 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 7
4407 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9
4408 pmull $h12k.1q, $res6.1d, $h12k.1d @ GHASH block 8k+7 - mid
4409 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 7
4411 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant
4412 eor3 $acc_hb, $acc_hb, $t4.16b, $t5.16b @ GHASH block 8k+4, 8k+5 - high
4413 pmull $h1.1q, $res7.1d, $h1.1d @ GHASH block 8k+7 - low
4415 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 7
4416 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 7
4417 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 7
4419 eor3 $acc_hb, $acc_hb, $t7.16b, $t8.16b @ GHASH block 8k+6, 8k+7 - high
4420 eor3 $acc_lb, $acc_lb, $h2.16b, $h1.16b @ GHASH block 8k+6, 8k+7 - low
4421 eor3 $acc_mb, $acc_mb, $h12k.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid
4423 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 7
4424 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 7
4426 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up
4427 ext $t11.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment
4428 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 8
4430 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 8
4431 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 8
4432 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 8
4434 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 8
4435 pmull $t12.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid
4436 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 8
4438 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 8
4439 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 8
4440 ldp $rk10q, $rk11q, [$cc, #160] @ load rk10, rk11
4442 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid
4443 aese $ctr7b, $rk9 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 9
4444 aese $ctr6b, $rk9 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 9
4446 aese $ctr5b, $rk9 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 9
4447 aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 9
4448 aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 9
4450 aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 9
4451 aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 9
4452 aese $ctr4b, $rk9 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 9
4454 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low
4455 ldr $rk12q, [$cc, #192] @ load rk12
4456 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment
4458 aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 10
4459 aese $ctr5b, $rk10 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 10
4460 aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 10
4462 aese $ctr4b, $rk10 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 10
4463 aese $ctr6b, $rk10 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 10
4464 aese $ctr7b, $rk10 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 10
4466 aese $ctr0b, $rk11 @ AES block 8k+8 - round 11
4467 eor3 $acc_lb, $acc_lb, $acc_hb, $t11.16b @ MODULO - fold into low
4468 aese $ctr5b, $rk11 @ AES block 8k+13 - round 11
4470 aese $ctr2b, $rk11 @ AES block 8k+10 - round 11
4471 aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 10
4472 aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 10
4474 aese $ctr6b, $rk11 @ AES block 8k+14 - round 11
4475 aese $ctr4b, $rk11 @ AES block 8k+12 - round 11
4476 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+15
4478 aese $ctr3b, $rk11 @ AES block 8k+11 - round 11
4479 aese $ctr1b, $rk11 @ AES block 8k+9 - round 11
4480 aese $ctr7b, $rk11 @ AES block 8k+15 - round 11
4482 .L192_dec_tail: @ TAIL
4484 sub $main_end_input_ptr, $end_input_ptr, $input_ptr @ main_end_input_ptr is number of bytes left to process
4486 ldp $h5q, $h56kq, [$current_tag, #128] @ load h5l | h5h
4487 ext $h5.16b, $h5.16b, $h5.16b, #8
4488 ldr $res1q, [$input_ptr], #16 @ AES block 8k+8 - load ciphertext
4490 ldp $h78kq, $h8q, [$current_tag, #192] @ load h8k | h7k
4491 ext $h8.16b, $h8.16b, $h8.16b, #8
4495 ldp $h6q, $h7q, [$current_tag, #160] @ load h6l | h6h
4496 ext $h6.16b, $h6.16b, $h6.16b, #8
4497 ext $h7.16b, $h7.16b, $h7.16b, #8
4498 ext $t0.16b, $acc_lb, $acc_lb, #8 @ prepare final partial tag
4500 eor3 $res4b, $res1b, $ctr0b, $t1.16b @ AES block 8k+8 - result
4501 cmp $main_end_input_ptr, #112
4502 b.gt .L192_dec_blocks_more_than_7
4506 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
4512 cmp $main_end_input_ptr, #96
4518 b.gt .L192_dec_blocks_more_than_6
4527 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
4528 cmp $main_end_input_ptr, #80
4529 b.gt .L192_dec_blocks_more_than_5
4536 cmp $main_end_input_ptr, #64
4538 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
4539 b.gt .L192_dec_blocks_more_than_4
4541 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
4546 cmp $main_end_input_ptr, #48
4547 b.gt .L192_dec_blocks_more_than_3
4549 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
4551 cmp $main_end_input_ptr, #32
4554 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k
4555 b.gt .L192_dec_blocks_more_than_2
4557 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
4560 cmp $main_end_input_ptr, #16
4561 b.gt .L192_dec_blocks_more_than_1
4563 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
4564 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k
4565 b .L192_dec_blocks_less_than_1
4566 .L192_dec_blocks_more_than_7: @ blocks left > 7
4567 rev64 $res0b, $res1b @ GHASH final-7 block
4569 ins $acc_m.d[0], $h78k.d[1] @ GHASH final-7 block - mid
4570 eor $res0b, $res0b, $t0.16b @ feed in partial tag
4572 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH final-7 block - high
4573 ins $rk4v.d[0], $res0.d[1] @ GHASH final-7 block - mid
4574 ldr $res1q, [$input_ptr], #16 @ AES final-6 block - load ciphertext
4576 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH final-7 block - low
4578 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-7 block - mid
4579 st1 { $res4b}, [$output_ptr], #16 @ AES final-7 block - store result
4581 eor3 $res4b, $res1b, $ctr1b, $t1.16b @ AES final-6 block - result
4583 pmull $acc_m.1q, $rk4v.1d, $acc_m.1d @ GHASH final-7 block - mid
4584 movi $t0.8b, #0 @ suppress further partial tag feed in
4585 .L192_dec_blocks_more_than_6: @ blocks left > 6
4587 rev64 $res0b, $res1b @ GHASH final-6 block
4589 eor $res0b, $res0b, $t0.16b @ feed in partial tag
4591 ldr $res1q, [$input_ptr], #16 @ AES final-5 block - load ciphertext
4592 ins $rk4v.d[0], $res0.d[1] @ GHASH final-6 block - mid
4594 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-6 block - mid
4595 movi $t0.8b, #0 @ suppress further partial tag feed in
4596 pmull2 $rk2q1, $res0.2d, $h7.2d @ GHASH final-6 block - high
4598 st1 { $res4b}, [$output_ptr], #16 @ AES final-6 block - store result
4599 eor3 $res4b, $res1b, $ctr2b, $t1.16b @ AES final-5 block - result
4601 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-6 block - high
4602 pmull $rk4v.1q, $rk4v.1d, $h78k.1d @ GHASH final-6 block - mid
4603 pmull $rk3q1, $res0.1d, $h7.1d @ GHASH final-6 block - low
4605 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-6 block - mid
4606 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-6 block - low
4607 .L192_dec_blocks_more_than_5: @ blocks left > 5
4609 rev64 $res0b, $res1b @ GHASH final-5 block
4611 eor $res0b, $res0b, $t0.16b @ feed in partial tag
4613 ins $rk4v.d[0], $res0.d[1] @ GHASH final-5 block - mid
4615 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-5 block - mid
4617 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-5 block - mid
4618 pmull2 $rk2q1, $res0.2d, $h6.2d @ GHASH final-5 block - high
4620 ldr $res1q, [$input_ptr], #16 @ AES final-4 block - load ciphertext
4622 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-5 block - high
4623 pmull $rk3q1, $res0.1d, $h6.1d @ GHASH final-5 block - low
4625 pmull2 $rk4v.1q, $rk4v.2d, $h56k.2d @ GHASH final-5 block - mid
4627 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-5 block - low
4628 movi $t0.8b, #0 @ suppress further partial tag feed in
4629 st1 { $res4b}, [$output_ptr], #16 @ AES final-5 block - store result
4631 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-5 block - mid
4632 eor3 $res4b, $res1b, $ctr3b, $t1.16b @ AES final-4 block - result
4633 .L192_dec_blocks_more_than_4: @ blocks left > 4
4635 rev64 $res0b, $res1b @ GHASH final-4 block
4637 eor $res0b, $res0b, $t0.16b @ feed in partial tag
4638 movi $t0.8b, #0 @ suppress further partial tag feed in
4640 ldr $res1q, [$input_ptr], #16 @ AES final-3 block - load ciphertext
4641 ins $rk4v.d[0], $res0.d[1] @ GHASH final-4 block - mid
4642 pmull $rk3q1, $res0.1d, $h5.1d @ GHASH final-4 block - low
4644 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-4 block - mid
4646 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-4 block - low
4648 pmull $rk4v.1q, $rk4v.1d, $h56k.1d @ GHASH final-4 block - mid
4649 st1 { $res4b}, [$output_ptr], #16 @ AES final-4 block - store result
4650 pmull2 $rk2q1, $res0.2d, $h5.2d @ GHASH final-4 block - high
4652 eor3 $res4b, $res1b, $ctr4b, $t1.16b @ AES final-3 block - result
4654 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-4 block - mid
4655 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-4 block - high
4656 .L192_dec_blocks_more_than_3: @ blocks left > 3
4658 ldr $h4q, [$current_tag, #112] @ load h4l | h4h
4659 ext $h4.16b, $h4.16b, $h4.16b, #8
4660 rev64 $res0b, $res1b @ GHASH final-3 block
4661 ldr $res1q, [$input_ptr], #16 @ AES final-2 block - load ciphertext
4663 eor $res0b, $res0b, $t0.16b @ feed in partial tag
4665 ins $rk4v.d[0], $res0.d[1] @ GHASH final-3 block - mid
4666 pmull2 $rk2q1, $res0.2d, $h4.2d @ GHASH final-3 block - high
4668 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-3 block - high
4669 movi $t0.8b, #0 @ suppress further partial tag feed in
4670 pmull $rk3q1, $res0.1d, $h4.1d @ GHASH final-3 block - low
4672 st1 { $res4b}, [$output_ptr], #16 @ AES final-3 block - store result
4673 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-3 block - mid
4674 eor3 $res4b, $res1b, $ctr5b, $t1.16b @ AES final-2 block - result
4676 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-3 block - low
4677 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k
4679 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-3 block - mid
4681 pmull2 $rk4v.1q, $rk4v.2d, $h34k.2d @ GHASH final-3 block - mid
4683 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-3 block - mid
4684 .L192_dec_blocks_more_than_2: @ blocks left > 2
4686 rev64 $res0b, $res1b @ GHASH final-2 block
4687 ldr $h3q, [$current_tag, #80] @ load h3l | h3h
4688 ext $h3.16b, $h3.16b, $h3.16b, #8
4690 eor $res0b, $res0b, $t0.16b @ feed in partial tag
4692 ins $rk4v.d[0], $res0.d[1] @ GHASH final-2 block - mid
4693 ldr $res1q, [$input_ptr], #16 @ AES final-1 block - load ciphertext
4695 pmull2 $rk2q1, $res0.2d, $h3.2d @ GHASH final-2 block - high
4697 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-2 block - mid
4699 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-2 block - high
4700 pmull $rk3q1, $res0.1d, $h3.1d @ GHASH final-2 block - low
4702 pmull $rk4v.1q, $rk4v.1d, $h34k.1d @ GHASH final-2 block - mid
4703 movi $t0.8b, #0 @ suppress further partial tag feed in
4705 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-2 block - low
4706 st1 { $res4b}, [$output_ptr], #16 @ AES final-2 block - store result
4708 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-2 block - mid
4709 eor3 $res4b, $res1b, $ctr6b, $t1.16b @ AES final-1 block - result
4710 .L192_dec_blocks_more_than_1: @ blocks left > 1
4712 rev64 $res0b, $res1b @ GHASH final-1 block
4713 ldr $res1q, [$input_ptr], #16 @ AES final block - load ciphertext
4714 ldr $h2q, [$current_tag, #64] @ load h1l | h1h
4715 ext $h2.16b, $h2.16b, $h2.16b, #8
4717 eor $res0b, $res0b, $t0.16b @ feed in partial tag
4718 movi $t0.8b, #0 @ suppress further partial tag feed in
4719 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k
4721 pmull $rk3q1, $res0.1d, $h2.1d @ GHASH final-1 block - low
4722 ins $rk4v.d[0], $res0.d[1] @ GHASH final-1 block - mid
4723 st1 { $res4b}, [$output_ptr], #16 @ AES final-1 block - store result
4725 pmull2 $rk2q1, $res0.2d, $h2.2d @ GHASH final-1 block - high
4727 eor3 $res4b, $res1b, $ctr7b, $t1.16b @ AES final block - result
4729 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-1 block - mid
4731 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-1 block - mid
4733 pmull2 $rk4v.1q, $rk4v.2d, $h12k.2d @ GHASH final-1 block - mid
4735 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-1 block - low
4737 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-1 block - mid
4738 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-1 block - high
4739 .L192_dec_blocks_less_than_1: @ blocks left <= 1
4741 rev32 $rtmp_ctr.16b, $rtmp_ctr.16b
4742 and $bit_length, $bit_length, #127 @ bit_length %= 128
4744 sub $bit_length, $bit_length, #128 @ bit_length -= 128
4745 str $rtmp_ctrq, [$counter] @ store the updated counter
4747 neg $bit_length, $bit_length @ bit_length = 128 - #bits in input (in range [1,128])
4748 mvn $temp0_x, xzr @ temp0_x = 0xffffffffffffffff
4750 and $bit_length, $bit_length, #127 @ bit_length %= 128
4752 mvn $temp1_x, xzr @ temp1_x = 0xffffffffffffffff
4753 lsr $temp0_x, $temp0_x, $bit_length @ temp0_x is mask for top 64b of last block
4754 cmp $bit_length, #64
4756 csel $temp2_x, $temp1_x, $temp0_x, lt
4757 csel $temp3_x, $temp0_x, xzr, lt
4758 ldr $h1q, [$current_tag, #32] @ load h1l | h1h
4759 ext $h1.16b, $h1.16b, $h1.16b, #8
4761 mov $ctr0.d[1], $temp3_x
4762 ld1 { $rk0}, [$output_ptr] @ load existing bytes where the possibly partial last block is to be stored
4764 mov $ctr0.d[0], $temp2_x @ ctr0b is mask for last block
4766 and $res1b, $res1b, $ctr0b @ possibly partial last block has zeroes in highest bits
4767 bif $res4b, $rk0, $ctr0b @ insert existing bytes in top end of result before storing
4769 rev64 $res0b, $res1b @ GHASH final block
4771 st1 { $res4b}, [$output_ptr] @ store all 16B
4773 eor $res0b, $res0b, $t0.16b @ feed in partial tag
4775 ins $t0.d[0], $res0.d[1] @ GHASH final block - mid
4776 pmull $rk3q1, $res0.1d, $h1.1d @ GHASH final block - low
4778 eor $t0.8b, $t0.8b, $res0.8b @ GHASH final block - mid
4779 pmull2 $rk2q1, $res0.2d, $h1.2d @ GHASH final block - high
4780 eor $acc_lb, $acc_lb, $rk3 @ GHASH final block - low
4782 pmull $t0.1q, $t0.1d, $h12k.1d @ GHASH final block - mid
4783 eor $acc_hb, $acc_hb, $rk2 @ GHASH final block - high
4785 eor $t10.16b, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up
4786 eor $acc_mb, $acc_mb, $t0.16b @ GHASH final block - mid
4787 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant
4789 pmull $t11.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid
4790 ext $acc_hb, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment
4792 eor $acc_mb, $acc_mb, $t10.16b @ MODULO - karatsuba tidy up
4794 eor3 $acc_mb, $acc_mb, $acc_hb, $t11.16b @ MODULO - fold into mid
4796 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low
4797 ext $acc_mb, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment
4799 eor3 $acc_lb, $acc_lb, $acc_mb, $acc_hb @ MODULO - fold into low
4800 ext $acc_lb, $acc_lb, $acc_lb, #8
4801 rev64 $acc_lb, $acc_lb
4802 st1 { $acc_l.16b }, [$current_tag]
4804 mov x0, $byte_length
4806 ldp d10, d11, [sp, #16]
4807 ldp d12, d13, [sp, #32]
4808 ldp d14, d15, [sp, #48]
4809 ldp d8, d9, [sp], #80
4815 .size unroll8_eor3_aes_gcm_dec_192_kernel,.-unroll8_eor3_aes_gcm_dec_192_kernel
4821 my ($end_input_ptr,$main_end_input_ptr,$temp0_x,$temp1_x)=map("x$_",(4..7));
4822 my ($temp2_x,$temp3_x)=map("x$_",(13..14));
4823 my ($ctr0b,$ctr1b,$ctr2b,$ctr3b,$ctr4b,$ctr5b,$ctr6b,$ctr7b,$res0b,$res1b,$res2b,$res3b,$res4b,$res5b,$res6b,$res7b)=map("v$_.16b",(0..15));
4824 my ($ctr0,$ctr1,$ctr2,$ctr3,$ctr4,$ctr5,$ctr6,$ctr7,$res0,$res1,$res2,$res3,$res4,$res5,$res6,$res7)=map("v$_",(0..15));
4825 my ($ctr0d,$ctr1d,$ctr2d,$ctr3d,$ctr4d,$ctr5d,$ctr6d,$ctr7d)=map("d$_",(0..7));
4826 my ($ctr0q,$ctr1q,$ctr2q,$ctr3q,$ctr4q,$ctr5q,$ctr6q,$ctr7q)=map("q$_",(0..7));
4827 my ($res0q,$res1q,$res2q,$res3q,$res4q,$res5q,$res6q,$res7q)=map("q$_",(8..15));
4829 my ($ctr_t0,$ctr_t1,$ctr_t2,$ctr_t3,$ctr_t4,$ctr_t5,$ctr_t6,$ctr_t7)=map("v$_",(8..15));
4830 my ($ctr_t0b,$ctr_t1b,$ctr_t2b,$ctr_t3b,$ctr_t4b,$ctr_t5b,$ctr_t6b,$ctr_t7b)=map("v$_.16b",(8..15));
4831 my ($ctr_t0q,$ctr_t1q,$ctr_t2q,$ctr_t3q,$ctr_t4q,$ctr_t5q,$ctr_t6q,$ctr_t7q)=map("q$_",(8..15));
4833 my ($acc_hb,$acc_mb,$acc_lb)=map("v$_.16b",(17..19));
4834 my ($acc_h,$acc_m,$acc_l)=map("v$_",(17..19));
4836 my ($h1,$h12k,$h2,$h3,$h34k,$h4)=map("v$_",(20..25));
4837 my ($h5,$h56k,$h6,$h7,$h78k,$h8)=map("v$_",(20..25));
4838 my ($h1q,$h12kq,$h2q,$h3q,$h34kq,$h4q)=map("q$_",(20..25));
4839 my ($h5q,$h56kq,$h6q,$h7q,$h78kq,$h8q)=map("q$_",(20..25));
4861 my $rtmp_ctrq="q30";
4863 my $rctr_incd="d31";
4865 my $mod_constantd=$t0d;
4866 my $mod_constant=$t0;
4868 my ($rk0,$rk1,$rk2)=map("v$_.16b",(26..28));
4869 my ($rk3,$rk4,$rk5)=map("v$_.16b",(26..28));
4870 my ($rk6,$rk7,$rk8)=map("v$_.16b",(26..28));
4871 my ($rk9,$rk10,$rk11)=map("v$_.16b",(26..28));
4872 my ($rk12,$rk13,$rk14)=map("v$_.16b",(26..28));
4873 my ($rk0q,$rk1q,$rk2q)=map("q$_",(26..28));
4874 my ($rk3q,$rk4q,$rk5q)=map("q$_",(26..28));
4875 my ($rk6q,$rk7q,$rk8q)=map("q$_",(26..28));
4876 my ($rk9q,$rk10q,$rk11q)=map("q$_",(26..28));
4877 my ($rk12q,$rk13q,$rk14q)=map("q$_",(26..28));
4881 #########################################################################################
4882 # size_t unroll8_eor3_aes_gcm_enc_256_kernel(const uint8_t * plaintext,
4883 # uint64_t plaintext_length,
4884 # uint8_t * ciphertext,
4886 # unsigned char ivec[16],
4890 .global unroll8_eor3_aes_gcm_enc_256_kernel
4891 .type unroll8_eor3_aes_gcm_enc_256_kernel,%function
4893 unroll8_eor3_aes_gcm_enc_256_kernel:
4894 AARCH64_VALID_CALL_TARGET
4895 cbz x1, .L256_enc_ret
4896 stp d8, d9, [sp, #-80]!
4897 lsr $byte_length, $bit_length, #3
4900 stp d10, d11, [sp, #16]
4901 stp d12, d13, [sp, #32]
4902 stp d14, d15, [sp, #48]
4903 mov x5, #0xc200000000000000
4904 stp x5, xzr, [sp, #64]
4905 add $modulo_constant, sp, #64
4907 ld1 { $ctr0b}, [$counter] @ CTR block 0
4909 mov $main_end_input_ptr, $byte_length
4911 mov $constant_temp, #0x100000000 @ set up counter increment
4912 movi $rctr_inc.16b, #0x0
4913 mov $rctr_inc.d[1], $constant_temp
4914 sub $main_end_input_ptr, $main_end_input_ptr, #1 @ byte_len - 1
4916 and $main_end_input_ptr, $main_end_input_ptr, #0xffffffffffffff80 @ number of bytes to be processed in main loop (at least 1 byte must be handled by tail)
4918 add $main_end_input_ptr, $main_end_input_ptr, $input_ptr
4920 rev32 $rtmp_ctr.16b, $ctr0.16b @ set up reversed counter
4922 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 0
4924 rev32 $ctr1.16b, $rtmp_ctr.16b @ CTR block 1
4925 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 1
4927 rev32 $ctr2.16b, $rtmp_ctr.16b @ CTR block 2
4928 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 2
4930 rev32 $ctr3.16b, $rtmp_ctr.16b @ CTR block 3
4931 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 3
4933 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 4
4934 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 4
4936 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 5
4937 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 5
4938 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1
4940 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 6
4941 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 6
4943 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 7
4945 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 0
4946 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 0
4947 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 0
4949 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 0
4950 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 0
4951 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 0
4953 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 0
4954 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 0
4955 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3
4957 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 1
4958 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 1
4959 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 1
4961 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 1
4962 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 1
4964 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 1
4966 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 1
4968 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 2
4969 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 2
4970 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 1
4972 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 2
4973 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 2
4974 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 2
4976 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 2
4977 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 2
4978 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 2
4980 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 3
4981 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 3
4982 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5
4984 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 3
4986 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 3
4987 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 3
4988 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 3
4990 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 3
4991 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 3
4993 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 4
4994 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 4
4995 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 4
4997 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 4
4998 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 4
5000 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 4
5001 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 4
5002 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 4
5004 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 5
5005 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 5
5006 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7
5008 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 5
5009 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 5
5010 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 5
5012 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 5
5013 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 5
5014 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 5
5016 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 6
5017 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 6
5018 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 6
5020 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 6
5021 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 6
5022 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 6
5024 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 6
5025 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 6
5026 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9
5028 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 7
5029 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 7
5031 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 7
5032 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 7
5033 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 7
5035 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 7
5036 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 7
5038 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 7
5040 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 8
5041 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 8
5043 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 8
5044 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 8
5045 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 8
5047 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 8
5048 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 8
5049 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 8
5051 ld1 { $acc_lb}, [$current_tag]
5052 ext $acc_lb, $acc_lb, $acc_lb, #8
5053 rev64 $acc_lb, $acc_lb
5054 ldp $rk10q, $rk11q, [$cc, #160] @ load rk10, rk11
5056 aese $ctr6b, $rk9 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 9
5057 aese $ctr7b, $rk9 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 9
5058 aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 9
5060 aese $ctr4b, $rk9 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 9
5061 aese $ctr5b, $rk9 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 9
5062 aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 9
5064 aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 9
5066 aese $ctr7b, $rk10 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 10
5067 aese $ctr4b, $rk10 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 10
5068 aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 9
5070 aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 10
5071 aese $ctr5b, $rk10 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 10
5072 aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 10
5074 aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 10
5075 aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 10
5076 aese $ctr6b, $rk10 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 10
5078 aese $ctr4b, $rk11 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 11
5079 ldp $rk12q, $rk13q, [$cc, #192] @ load rk12, rk13
5080 aese $ctr5b, $rk11 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 11
5082 aese $ctr2b, $rk11 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 11
5083 aese $ctr6b, $rk11 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 11
5084 aese $ctr1b, $rk11 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 11
5086 aese $ctr0b, $rk11 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 11
5087 aese $ctr3b, $rk11 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 11
5088 aese $ctr7b, $rk11 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 11
5090 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 7
5091 ldr $rk14q, [$cc, #224] @ load rk14
5093 aese $ctr4b, $rk12 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 12
5094 aese $ctr2b, $rk12 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 12
5095 aese $ctr1b, $rk12 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 12
5097 aese $ctr0b, $rk12 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 12
5098 aese $ctr5b, $rk12 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 12
5099 aese $ctr3b, $rk12 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 12
5101 aese $ctr2b, $rk13 @ AES block 2 - round 13
5102 aese $ctr1b, $rk13 @ AES block 1 - round 13
5103 aese $ctr4b, $rk13 @ AES block 4 - round 13
5105 aese $ctr6b, $rk12 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 12
5106 aese $ctr7b, $rk12 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 12
5108 aese $ctr0b, $rk13 @ AES block 0 - round 13
5109 aese $ctr5b, $rk13 @ AES block 5 - round 13
5111 aese $ctr6b, $rk13 @ AES block 6 - round 13
5112 aese $ctr7b, $rk13 @ AES block 7 - round 13
5113 aese $ctr3b, $rk13 @ AES block 3 - round 13
5115 add $end_input_ptr, $input_ptr, $bit_length, lsr #3 @ end_input_ptr
5116 cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks
5117 b.ge .L256_enc_tail @ handle tail
5119 ldp $ctr_t0q, $ctr_t1q, [$input_ptr], #32 @ AES block 0, 1 - load plaintext
5121 ldp $ctr_t2q, $ctr_t3q, [$input_ptr], #32 @ AES block 2, 3 - load plaintext
5123 eor3 $res0b, $ctr_t0b, $ctr0b, $rk14 @ AES block 0 - result
5124 rev32 $ctr0.16b, $rtmp_ctr.16b @ CTR block 8
5125 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8
5127 eor3 $res1b, $ctr_t1b, $ctr1b, $rk14 @ AES block 1 - result
5128 eor3 $res3b, $ctr_t3b, $ctr3b, $rk14 @ AES block 3 - result
5130 rev32 $ctr1.16b, $rtmp_ctr.16b @ CTR block 9
5131 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 9
5132 ldp $ctr_t4q, $ctr_t5q, [$input_ptr], #32 @ AES block 4, 5 - load plaintext
5134 ldp $ctr_t6q, $ctr_t7q, [$input_ptr], #32 @ AES block 6, 7 - load plaintext
5135 eor3 $res2b, $ctr_t2b, $ctr2b, $rk14 @ AES block 2 - result
5136 cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks
5138 rev32 $ctr2.16b, $rtmp_ctr.16b @ CTR block 10
5139 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 10
5140 stp $res0q, $res1q, [$output_ptr], #32 @ AES block 0, 1 - store result
5142 stp $res2q, $res3q, [$output_ptr], #32 @ AES block 2, 3 - store result
5144 rev32 $ctr3.16b, $rtmp_ctr.16b @ CTR block 11
5145 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 11
5147 eor3 $res4b, $ctr_t4b, $ctr4b, $rk14 @ AES block 4 - result
5149 eor3 $res7b, $ctr_t7b, $ctr7b, $rk14 @ AES block 7 - result
5150 eor3 $res6b, $ctr_t6b, $ctr6b, $rk14 @ AES block 6 - result
5151 eor3 $res5b, $ctr_t5b, $ctr5b, $rk14 @ AES block 5 - result
5153 stp $res4q, $res5q, [$output_ptr], #32 @ AES block 4, 5 - store result
5154 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 12
5156 stp $res6q, $res7q, [$output_ptr], #32 @ AES block 6, 7 - store result
5157 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 12
5158 b.ge .L256_enc_prepretail @ do prepretail
5160 .L256_enc_main_loop: @ main loop start
5161 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1
5163 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 8k+13
5164 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+13
5165 ldr $h56kq, [$current_tag, #144] @ load h6k | h5k
5166 ldr $h78kq, [$current_tag, #192] @ load h8k | h7k
5168 rev64 $res3b, $res3b @ GHASH block 8k+3
5169 ldr $h5q, [$current_tag, #128] @ load h5l | h5h
5170 ext $h5.16b, $h5.16b, $h5.16b, #8
5171 ldr $h6q, [$current_tag, #160] @ load h6l | h6h
5172 ext $h6.16b, $h6.16b, $h6.16b, #8
5173 rev64 $res1b, $res1b @ GHASH block 8k+1
5175 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 8k+14
5176 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+14
5177 rev64 $res0b, $res0b @ GHASH block 8k
5179 rev64 $res4b, $res4b @ GHASH block 8k+4
5180 ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0
5181 ldr $h7q, [$current_tag, #176] @ load h7l | h7h
5182 ext $h7.16b, $h7.16b, $h7.16b, #8
5183 ldr $h8q, [$current_tag, #208] @ load h8l | h8h
5184 ext $h8.16b, $h8.16b, $h8.16b, #8
5186 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 0
5187 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 0
5188 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 8k+15
5190 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 0
5191 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 0
5192 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 0
5194 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 0
5195 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 0
5196 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 0
5198 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3
5199 eor $res0b, $res0b, $acc_lb @ PRE 1
5200 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 1
5202 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 1
5203 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 1
5204 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 1
5206 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 1
5207 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 1
5208 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 1
5210 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH block 8k - high
5211 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH block 8k - low
5212 pmull2 $t0.1q, $res1.2d, $h7.2d @ GHASH block 8k+1 - high
5214 trn1 $acc_m.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid
5215 trn2 $res0.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid
5216 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 1
5218 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 2
5219 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 2
5220 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 2
5222 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 2
5223 pmull $h7.1q, $res1.1d, $h7.1d @ GHASH block 8k+1 - low
5224 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 2
5226 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 3
5227 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 3
5228 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 2
5230 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 3
5231 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 2
5232 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 2
5234 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 3
5235 rev64 $res6b, $res6b @ GHASH block 8k+6
5236 pmull2 $t2.1q, $res3.2d, $h5.2d @ GHASH block 8k+3 - high
5238 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 3
5239 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5
5240 rev64 $res2b, $res2b @ GHASH block 8k+2
5242 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 3
5243 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 3
5244 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 3
5246 eor $acc_hb, $acc_hb, $t0.16b @ GHASH block 8k+1 - high
5247 pmull2 $t1.1q, $res2.2d, $h6.2d @ GHASH block 8k+2 - high
5248 rev64 $res5b, $res5b @ GHASH block 8k+5
5250 pmull $h5.1q, $res3.1d, $h5.1d @ GHASH block 8k+3 - low
5251 eor $acc_lb, $acc_lb, $h7.16b @ GHASH block 8k+1 - low
5252 ldr $h3q, [$current_tag, #80] @ load h3l | h3h
5253 ext $h3.16b, $h3.16b, $h3.16b, #8
5254 ldr $h4q, [$current_tag, #112] @ load h4l | h4h
5255 ext $h4.16b, $h4.16b, $h4.16b, #8
5257 trn1 $t6.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid
5258 eor3 $acc_hb, $acc_hb, $t1.16b, $t2.16b @ GHASH block 8k+2, 8k+3 - high
5259 pmull $h6.1q, $res2.1d, $h6.1d @ GHASH block 8k+2 - low
5261 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 4
5262 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 4
5263 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 4
5265 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 4
5266 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 4
5267 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 4
5269 trn1 $t3.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid
5270 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 4
5271 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 4
5273 trn2 $res2.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid
5274 eor $res0.16b, $res0.16b, $acc_m.16b @ GHASH block 8k, 8k+1 - mid
5275 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7
5277 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 5
5278 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 5
5279 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 5
5281 eor $res2.16b, $res2.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid
5282 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 5
5283 rev64 $res7b, $res7b @ GHASH block 8k+7
5285 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 5
5286 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 5
5287 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 5
5289 pmull2 $t3.1q, $res2.2d, $h56k.2d @ GHASH block 8k+2 - mid
5290 pmull2 $acc_m.1q, $res0.2d, $h78k.2d @ GHASH block 8k - mid
5291 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 5
5293 pmull $h78k.1q, $res0.1d, $h78k.1d @ GHASH block 8k+1 - mid
5294 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 6
5295 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 6
5297 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 6
5298 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 6
5299 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 6
5301 eor $acc_mb, $acc_mb, $h78k.16b @ GHASH block 8k+1 - mid
5302 pmull $h56k.1q, $res2.1d, $h56k.1d @ GHASH block 8k+3 - mid
5303 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 6
5305 eor3 $acc_lb, $acc_lb, $h6.16b, $h5.16b @ GHASH block 8k+2, 8k+3 - low
5306 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 6
5307 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 6
5309 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9
5310 pmull2 $t4.1q, $res4.2d, $h4.2d @ GHASH block 8k+4 - high
5311 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 7
5313 ldr $h1q, [$current_tag, #32] @ load h1l | h1h
5314 ext $h1.16b, $h1.16b, $h1.16b, #8
5315 ldr $h2q, [$current_tag, #64] @ load h2l | h2h
5316 ext $h2.16b, $h2.16b, $h2.16b, #8
5317 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 7
5318 eor3 $acc_mb, $acc_mb, $h56k.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid
5320 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k
5321 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k
5322 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 7
5323 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 7
5325 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 7
5326 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 7
5327 pmull $h4.1q, $res4.1d, $h4.1d @ GHASH block 8k+4 - low
5329 trn2 $res4.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid
5330 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 7
5331 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 7
5333 pmull2 $t5.1q, $res5.2d, $h3.2d @ GHASH block 8k+5 - high
5334 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 8
5335 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 8
5337 pmull $h3.1q, $res5.1d, $h3.1d @ GHASH block 8k+5 - low
5338 trn1 $t9.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid
5339 eor $res4.16b, $res4.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid
5341 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 8
5342 aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 9
5343 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 8
5345 pmull2 $t6.1q, $res4.2d, $h34k.2d @ GHASH block 8k+4 - mid
5346 pmull $h34k.1q, $res4.1d, $h34k.1d @ GHASH block 8k+5 - mid
5347 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 8
5349 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 8
5350 pmull2 $t7.1q, $res6.2d, $h2.2d @ GHASH block 8k+6 - high
5351 pmull $h2.1q, $res6.1d, $h2.1d @ GHASH block 8k+6 - low
5353 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 8
5354 trn2 $res6.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid
5355 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 8
5357 eor3 $acc_mb, $acc_mb, $h34k.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid
5358 aese $ctr7b, $rk9 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 9
5359 aese $ctr5b, $rk9 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 9
5361 eor $res6.16b, $res6.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid
5362 aese $ctr6b, $rk9 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 9
5363 aese $ctr4b, $rk9 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 9
5365 ldp $rk10q, $rk11q, [$cc, #160] @ load rk10, rk11
5366 aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 9
5367 aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 9
5369 pmull2 $t8.1q, $res7.2d, $h1.2d @ GHASH block 8k+7 - high
5370 eor3 $acc_lb, $acc_lb, $h4.16b, $h3.16b @ GHASH block 8k+4, 8k+5 - low
5371 pmull $h1.1q, $res7.1d, $h1.1d @ GHASH block 8k+7 - low
5373 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant
5374 pmull2 $t9.1q, $res6.2d, $h12k.2d @ GHASH block 8k+6 - mid
5375 pmull $h12k.1q, $res6.1d, $h12k.1d @ GHASH block 8k+7 - mid
5377 aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 9
5379 eor3 $acc_mb, $acc_mb, $h12k.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid
5380 eor3 $acc_lb, $acc_lb, $h2.16b, $h1.16b @ GHASH block 8k+6, 8k+7 - low
5381 eor3 $acc_hb, $acc_hb, $t4.16b, $t5.16b @ GHASH block 8k+4, 8k+5 - high
5383 aese $ctr4b, $rk10 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 10
5384 aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 10
5385 aese $ctr5b, $rk10 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 10
5387 aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 10
5388 aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 10
5389 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+15
5391 aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 10
5392 aese $ctr7b, $rk10 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 10
5393 aese $ctr6b, $rk10 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 10
5395 eor3 $acc_hb, $acc_hb, $t7.16b, $t8.16b @ GHASH block 8k+6, 8k+7 - high
5397 ldp $rk12q, $rk13q, [$cc, #192] @ load rk12, rk13
5398 rev32 $h1.16b, $rtmp_ctr.16b @ CTR block 8k+16
5400 ext $t11.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment
5401 ldp $ctr_t0q, $ctr_t1q, [$input_ptr], #32 @ AES block 8k+8, 8k+9 - load plaintext
5402 aese $ctr2b, $rk11 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 11
5404 aese $ctr6b, $rk11 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 11
5405 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+16
5406 aese $ctr3b, $rk11 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 11
5408 aese $ctr0b, $rk11 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 11
5409 aese $ctr7b, $rk11 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 11
5411 pmull $t12.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid
5412 aese $ctr1b, $rk11 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 11
5414 aese $ctr7b, $rk12 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 12
5415 aese $ctr5b, $rk11 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 11
5417 aese $ctr3b, $rk12 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 12
5418 aese $ctr6b, $rk12 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 12
5419 rev32 $h2.16b, $rtmp_ctr.16b @ CTR block 8k+17
5421 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+17
5422 aese $ctr4b, $rk11 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 11
5423 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up
5425 aese $ctr5b, $rk12 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 12
5426 ldr $rk14q, [$cc, #224] @ load rk14
5427 aese $ctr7b, $rk13 @ AES block 8k+15 - round 13
5429 ldp $ctr_t2q, $ctr_t3q, [$input_ptr], #32 @ AES block 8k+10, 8k+11 - load plaintext
5430 aese $ctr2b, $rk12 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 12
5431 aese $ctr4b, $rk12 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 12
5433 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid
5434 aese $ctr1b, $rk12 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 12
5435 ldp $ctr_t4q, $ctr_t5q, [$input_ptr], #32 @ AES block 4, 5 - load plaintext
5437 ldp $ctr_t6q, $ctr_t7q, [$input_ptr], #32 @ AES block 6, 7 - load plaintext
5438 aese $ctr2b, $rk13 @ AES block 8k+10 - round 13
5439 aese $ctr4b, $rk13 @ AES block 8k+12 - round 13
5441 rev32 $h3.16b, $rtmp_ctr.16b @ CTR block 8k+18
5442 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+18
5443 aese $ctr5b, $rk13 @ AES block 8k+13 - round 13
5445 aese $ctr0b, $rk12 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 12
5446 aese $ctr3b, $rk13 @ AES block 8k+11 - round 13
5447 cmp $input_ptr, $main_end_input_ptr @ LOOP CONTROL
5449 eor3 $res2b, $ctr_t2b, $ctr2b, $rk14 @ AES block 8k+10 - result
5450 rev32 $h4.16b, $rtmp_ctr.16b @ CTR block 8k+19
5451 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+19
5453 aese $ctr0b, $rk13 @ AES block 8k+8 - round 13
5454 aese $ctr6b, $rk13 @ AES block 8k+14 - round 13
5455 eor3 $res5b, $ctr_t5b, $ctr5b, $rk14 @ AES block 5 - result
5457 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment
5458 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low
5459 aese $ctr1b, $rk13 @ AES block 8k+9 - round 13
5461 eor3 $res4b, $ctr_t4b, $ctr4b, $rk14 @ AES block 4 - result
5462 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 8k+20
5463 eor3 $res3b, $ctr_t3b, $ctr3b, $rk14 @ AES block 8k+11 - result
5465 mov $ctr3.16b, $h4.16b @ CTR block 8k+19
5466 eor3 $res1b, $ctr_t1b, $ctr1b, $rk14 @ AES block 8k+9 - result
5467 eor3 $res0b, $ctr_t0b, $ctr0b, $rk14 @ AES block 8k+8 - result
5469 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+20
5470 stp $res0q, $res1q, [$output_ptr], #32 @ AES block 8k+8, 8k+9 - store result
5471 mov $ctr2.16b, $h3.16b @ CTR block 8k+18
5473 eor3 $res7b, $ctr_t7b, $ctr7b, $rk14 @ AES block 7 - result
5474 eor3 $acc_lb, $acc_lb, $t11.16b, $acc_hb @ MODULO - fold into low
5475 stp $res2q, $res3q, [$output_ptr], #32 @ AES block 8k+10, 8k+11 - store result
5477 eor3 $res6b, $ctr_t6b, $ctr6b, $rk14 @ AES block 6 - result
5478 mov $ctr1.16b, $h2.16b @ CTR block 8k+17
5479 stp $res4q, $res5q, [$output_ptr], #32 @ AES block 4, 5 - store result
5481 stp $res6q, $res7q, [$output_ptr], #32 @ AES block 6, 7 - store result
5482 mov $ctr0.16b, $h1.16b @ CTR block 8k+16
5483 b.lt .L256_enc_main_loop
5485 .L256_enc_prepretail: @ PREPRETAIL
5486 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 8k+13
5487 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1
5488 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+13
5490 rev64 $res2b, $res2b @ GHASH block 8k+2
5492 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 8k+14
5493 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+14
5495 rev64 $res5b, $res5b @ GHASH block 8k+5
5496 ldr $h56kq, [$current_tag, #144] @ load h6k | h5k
5497 ldr $h78kq, [$current_tag, #192] @ load h8k | h7k
5499 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 8k+15
5501 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 0
5502 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 0
5503 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 0
5505 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 0
5506 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 0
5508 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 0
5509 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 0
5510 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 0
5512 ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0
5513 rev64 $res0b, $res0b @ GHASH block 8k
5514 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 1
5516 rev64 $res1b, $res1b @ GHASH block 8k+1
5517 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3
5518 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 1
5520 ldr $h7q, [$current_tag, #176] @ load h7l | h7h
5521 ext $h7.16b, $h7.16b, $h7.16b, #8
5522 ldr $h8q, [$current_tag, #208] @ load h8l | h8h
5523 ext $h8.16b, $h8.16b, $h8.16b, #8
5524 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 1
5526 ldr $h5q, [$current_tag, #128] @ load h5l | h5h
5527 ext $h5.16b, $h5.16b, $h5.16b, #8
5528 ldr $h6q, [$current_tag, #160] @ load h6l | h6h
5529 ext $h6.16b, $h6.16b, $h6.16b, #8
5530 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 1
5531 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 1
5533 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 1
5534 eor $res0b, $res0b, $acc_lb @ PRE 1
5536 rev64 $res3b, $res3b @ GHASH block 8k+3
5537 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 1
5539 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 2
5540 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 2
5541 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 1
5543 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 2
5544 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 2
5545 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 2
5547 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 2
5548 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 2
5549 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 2
5551 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5
5552 trn1 $acc_m.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid
5553 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH block 8k - high
5555 rev64 $res6b, $res6b @ GHASH block 8k+6
5556 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 3
5557 pmull2 $t0.1q, $res1.2d, $h7.2d @ GHASH block 8k+1 - high
5559 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 3
5560 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH block 8k - low
5561 trn2 $res0.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid
5563 pmull2 $t1.1q, $res2.2d, $h6.2d @ GHASH block 8k+2 - high
5564 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 3
5566 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 3
5567 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 3
5568 eor $acc_hb, $acc_hb, $t0.16b @ GHASH block 8k+1 - high
5570 pmull $h7.1q, $res1.1d, $h7.1d @ GHASH block 8k+1 - low
5571 pmull2 $t2.1q, $res3.2d, $h5.2d @ GHASH block 8k+3 - high
5572 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 3
5574 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 3
5575 eor $res0.16b, $res0.16b, $acc_m.16b @ GHASH block 8k, 8k+1 - mid
5576 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 3
5578 pmull $h6.1q, $res2.1d, $h6.1d @ GHASH block 8k+2 - low
5579 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 4
5580 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 4
5582 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 4
5583 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 4
5584 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 4
5586 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 5
5587 pmull2 $acc_m.1q, $res0.2d, $h78k.2d @ GHASH block 8k - mid
5588 eor3 $acc_hb, $acc_hb, $t1.16b, $t2.16b @ GHASH block 8k+2, 8k+3 - high
5590 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 4
5591 trn1 $t3.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid
5592 trn2 $res2.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid
5594 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 4
5595 eor $acc_lb, $acc_lb, $h7.16b @ GHASH block 8k+1 - low
5596 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 4
5598 pmull $h5.1q, $res3.1d, $h5.1d @ GHASH block 8k+3 - low
5599 pmull $h78k.1q, $res0.1d, $h78k.1d @ GHASH block 8k+1 - mid
5600 eor $res2.16b, $res2.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid
5602 rev64 $res4b, $res4b @ GHASH block 8k+4
5603 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 5
5604 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 5
5606 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 5
5607 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 5
5608 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7
5610 ldr $h3q, [$current_tag, #80] @ load h3l | h3h
5611 ext $h3.16b, $h3.16b, $h3.16b, #8
5612 ldr $h4q, [$current_tag, #112] @ load h4l | h4h
5613 ext $h4.16b, $h4.16b, $h4.16b, #8
5614 pmull2 $t3.1q, $res2.2d, $h56k.2d @ GHASH block 8k+2 - mid
5615 pmull $h56k.1q, $res2.1d, $h56k.1d @ GHASH block 8k+3 - mid
5617 eor3 $acc_lb, $acc_lb, $h6.16b, $h5.16b @ GHASH block 8k+2, 8k+3 - low
5618 eor $acc_mb, $acc_mb, $h78k.16b @ GHASH block 8k+1 - mid
5620 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 5
5621 rev64 $res7b, $res7b @ GHASH block 8k+7
5622 trn1 $t6.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid
5624 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 5
5625 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 5
5626 eor3 $acc_mb, $acc_mb, $h56k.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid
5628 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 6
5629 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 6
5630 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 6
5632 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k
5633 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k
5634 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 6
5635 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 6
5637 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 6
5638 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 6
5639 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 6
5641 pmull2 $t4.1q, $res4.2d, $h4.2d @ GHASH block 8k+4 - high
5642 pmull $h4.1q, $res4.1d, $h4.1d @ GHASH block 8k+4 - low
5643 ldr $h1q, [$current_tag, #32] @ load h1l | h1h
5644 ext $h1.16b, $h1.16b, $h1.16b, #8
5645 ldr $h2q, [$current_tag, #64] @ load h2l | h2h
5646 ext $h2.16b, $h2.16b, $h2.16b, #8
5648 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9
5649 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 7
5650 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 7
5652 pmull2 $t5.1q, $res5.2d, $h3.2d @ GHASH block 8k+5 - high
5653 trn2 $res4.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid
5655 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 7
5656 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 7
5657 pmull $h3.1q, $res5.1d, $h3.1d @ GHASH block 8k+5 - low
5659 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 7
5660 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 7
5661 eor $res4.16b, $res4.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid
5663 pmull2 $t7.1q, $res6.2d, $h2.2d @ GHASH block 8k+6 - high
5664 pmull $h2.1q, $res6.1d, $h2.1d @ GHASH block 8k+6 - low
5665 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 7
5667 trn1 $t9.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid
5668 trn2 $res6.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid
5669 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 7
5671 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 8
5672 eor3 $acc_lb, $acc_lb, $h4.16b, $h3.16b @ GHASH block 8k+4, 8k+5 - low
5673 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 8
5675 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 8
5676 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 8
5677 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 8
5679 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 8
5680 eor $res6.16b, $res6.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid
5681 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 8
5683 pmull2 $t6.1q, $res4.2d, $h34k.2d @ GHASH block 8k+4 - mid
5684 pmull $h34k.1q, $res4.1d, $h34k.1d @ GHASH block 8k+5 - mid
5685 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 8
5687 pmull2 $t8.1q, $res7.2d, $h1.2d @ GHASH block 8k+7 - high
5688 pmull2 $t9.1q, $res6.2d, $h12k.2d @ GHASH block 8k+6 - mid
5689 pmull $h12k.1q, $res6.1d, $h12k.1d @ GHASH block 8k+7 - mid
5691 pmull $h1.1q, $res7.1d, $h1.1d @ GHASH block 8k+7 - low
5692 eor3 $acc_mb, $acc_mb, $h34k.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid
5693 eor3 $acc_hb, $acc_hb, $t4.16b, $t5.16b @ GHASH block 8k+4, 8k+5 - high
5695 ldp $rk10q, $rk11q, [$cc, #160] @ load rk10, rk11
5696 aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 9
5697 aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 9
5699 eor3 $acc_hb, $acc_hb, $t7.16b, $t8.16b @ GHASH block 8k+6, 8k+7 - high
5700 eor3 $acc_mb, $acc_mb, $h12k.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid
5701 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant
5703 eor3 $acc_lb, $acc_lb, $h2.16b, $h1.16b @ GHASH block 8k+6, 8k+7 - low
5705 aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 9
5706 aese $ctr7b, $rk9 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 9
5707 aese $ctr5b, $rk9 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 9
5709 aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 9
5710 aese $ctr6b, $rk9 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 9
5712 aese $ctr5b, $rk10 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 10
5713 aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 10
5714 aese $ctr4b, $rk9 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 9
5716 aese $ctr7b, $rk10 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 10
5717 aese $ctr6b, $rk10 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 10
5718 aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 10
5720 aese $ctr4b, $rk10 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 10
5721 aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 10
5722 aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 10
5724 pmull $t12.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid
5725 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up
5726 aese $ctr7b, $rk11 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 11
5728 ldp $rk12q, $rk13q, [$cc, #192] @ load rk12, rk13
5729 ext $t11.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment
5730 aese $ctr2b, $rk11 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 11
5732 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid
5733 aese $ctr1b, $rk11 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 11
5734 aese $ctr6b, $rk11 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 11
5736 aese $ctr0b, $rk11 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 11
5737 aese $ctr4b, $rk11 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 11
5738 aese $ctr5b, $rk11 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 11
5740 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low
5741 aese $ctr3b, $rk11 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 11
5742 ldr $rk14q, [$cc, #224] @ load rk14
5744 aese $ctr1b, $rk12 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 12
5745 aese $ctr2b, $rk12 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 12
5746 aese $ctr0b, $rk12 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 12
5748 aese $ctr6b, $rk12 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 12
5749 aese $ctr5b, $rk12 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 12
5750 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment
5752 aese $ctr4b, $rk12 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 12
5753 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+15
5755 aese $ctr3b, $rk12 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 12
5756 aese $ctr7b, $rk12 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 12
5757 aese $ctr0b, $rk13 @ AES block 8k+8 - round 13
5759 eor3 $acc_lb, $acc_lb, $t11.16b, $acc_hb @ MODULO - fold into low
5760 aese $ctr5b, $rk13 @ AES block 8k+13 - round 13
5761 aese $ctr1b, $rk13 @ AES block 8k+9 - round 13
5763 aese $ctr3b, $rk13 @ AES block 8k+11 - round 13
5764 aese $ctr4b, $rk13 @ AES block 8k+12 - round 13
5765 aese $ctr7b, $rk13 @ AES block 8k+15 - round 13
5767 aese $ctr2b, $rk13 @ AES block 8k+10 - round 13
5768 aese $ctr6b, $rk13 @ AES block 8k+14 - round 13
5769 .L256_enc_tail: @ TAIL
5771 ldp $h78kq, $h8q, [$current_tag, #192] @ load h8l | h8h
5772 ext $h8.16b, $h8.16b, $h8.16b, #8
5773 sub $main_end_input_ptr, $end_input_ptr, $input_ptr @ main_end_input_ptr is number of bytes left to process
5775 ldr $ctr_t0q, [$input_ptr], #16 @ AES block 8k+8 - load plaintext
5777 ldp $h5q, $h56kq, [$current_tag, #128] @ load h5l | h5h
5778 ext $h5.16b, $h5.16b, $h5.16b, #8
5780 ext $t0.16b, $acc_lb, $acc_lb, #8 @ prepare final partial tag
5781 ldp $h6q, $h7q, [$current_tag, #160] @ load h6l | h6h
5782 ext $h6.16b, $h6.16b, $h6.16b, #8
5783 ext $h7.16b, $h7.16b, $h7.16b, #8
5786 cmp $main_end_input_ptr, #112
5787 eor3 $res1b, $ctr_t0b, $ctr0b, $t1.16b @ AES block 8k+8 - result
5788 b.gt .L256_enc_blocks_more_than_7
5799 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
5803 cmp $main_end_input_ptr, #96
5804 b.gt .L256_enc_blocks_more_than_6
5808 cmp $main_end_input_ptr, #80
5814 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
5815 b.gt .L256_enc_blocks_more_than_5
5818 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
5823 cmp $main_end_input_ptr, #64
5825 b.gt .L256_enc_blocks_more_than_4
5827 cmp $main_end_input_ptr, #48
5832 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
5833 b.gt .L256_enc_blocks_more_than_3
5835 cmp $main_end_input_ptr, #32
5837 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k
5840 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
5841 b.gt .L256_enc_blocks_more_than_2
5845 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
5846 cmp $main_end_input_ptr, #16
5847 b.gt .L256_enc_blocks_more_than_1
5849 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
5850 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k
5851 b .L256_enc_blocks_less_than_1
5852 .L256_enc_blocks_more_than_7: @ blocks left > 7
5853 st1 { $res1b}, [$output_ptr], #16 @ AES final-7 block - store result
5855 rev64 $res0b, $res1b @ GHASH final-7 block
5857 eor $res0b, $res0b, $t0.16b @ feed in partial tag
5859 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-6 block - load plaintext
5861 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH final-7 block - high
5862 ins $rk4v.d[0], $res0.d[1] @ GHASH final-7 block - mid
5863 ins $acc_m.d[0], $h78k.d[1] @ GHASH final-7 block - mid
5865 movi $t0.8b, #0 @ suppress further partial tag feed in
5867 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-7 block - mid
5868 eor3 $res1b, $ctr_t1b, $ctr1b, $t1.16b @ AES final-6 block - result
5870 pmull $acc_m.1q, $rk4v.1d, $acc_m.1d @ GHASH final-7 block - mid
5871 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH final-7 block - low
5872 .L256_enc_blocks_more_than_6: @ blocks left > 6
5874 st1 { $res1b}, [$output_ptr], #16 @ AES final-6 block - store result
5876 rev64 $res0b, $res1b @ GHASH final-6 block
5878 eor $res0b, $res0b, $t0.16b @ feed in partial tag
5880 pmull $rk3q1, $res0.1d, $h7.1d @ GHASH final-6 block - low
5881 ins $rk4v.d[0], $res0.d[1] @ GHASH final-6 block - mid
5882 pmull2 $rk2q1, $res0.2d, $h7.2d @ GHASH final-6 block - high
5884 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-5 block - load plaintext
5886 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-6 block - low
5888 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-6 block - mid
5890 pmull $rk4v.1q, $rk4v.1d, $h78k.1d @ GHASH final-6 block - mid
5891 eor3 $res1b, $ctr_t1b, $ctr2b, $t1.16b @ AES final-5 block - result
5893 movi $t0.8b, #0 @ suppress further partial tag feed in
5895 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-6 block - mid
5896 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-6 block - high
5897 .L256_enc_blocks_more_than_5: @ blocks left > 5
5899 st1 { $res1b}, [$output_ptr], #16 @ AES final-5 block - store result
5901 rev64 $res0b, $res1b @ GHASH final-5 block
5903 eor $res0b, $res0b, $t0.16b @ feed in partial tag
5905 ins $rk4v.d[0], $res0.d[1] @ GHASH final-5 block - mid
5907 pmull2 $rk2q1, $res0.2d, $h6.2d @ GHASH final-5 block - high
5909 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-5 block - high
5910 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-5 block - mid
5912 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-5 block - mid
5914 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-4 block - load plaintext
5915 pmull $rk3q1, $res0.1d, $h6.1d @ GHASH final-5 block - low
5917 pmull2 $rk4v.1q, $rk4v.2d, $h56k.2d @ GHASH final-5 block - mid
5918 movi $t0.8b, #0 @ suppress further partial tag feed in
5919 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-5 block - low
5921 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-5 block - mid
5922 eor3 $res1b, $ctr_t1b, $ctr3b, $t1.16b @ AES final-4 block - result
5923 .L256_enc_blocks_more_than_4: @ blocks left > 4
5925 st1 { $res1b}, [$output_ptr], #16 @ AES final-4 block - store result
5927 rev64 $res0b, $res1b @ GHASH final-4 block
5929 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-3 block - load plaintext
5931 eor $res0b, $res0b, $t0.16b @ feed in partial tag
5933 ins $rk4v.d[0], $res0.d[1] @ GHASH final-4 block - mid
5934 pmull2 $rk2q1, $res0.2d, $h5.2d @ GHASH final-4 block - high
5936 eor3 $res1b, $ctr_t1b, $ctr4b, $t1.16b @ AES final-3 block - result
5937 pmull $rk3q1, $res0.1d, $h5.1d @ GHASH final-4 block - low
5939 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-4 block - mid
5940 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-4 block - low
5942 pmull $rk4v.1q, $rk4v.1d, $h56k.1d @ GHASH final-4 block - mid
5944 movi $t0.8b, #0 @ suppress further partial tag feed in
5946 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-4 block - mid
5947 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-4 block - high
5948 .L256_enc_blocks_more_than_3: @ blocks left > 3
5950 st1 { $res1b}, [$output_ptr], #16 @ AES final-3 block - store result
5952 ldr $h4q, [$current_tag, #112] @ load h4l | h4h
5953 ext $h4.16b, $h4.16b, $h4.16b, #8
5954 rev64 $res0b, $res1b @ GHASH final-3 block
5956 eor $res0b, $res0b, $t0.16b @ feed in partial tag
5958 ins $rk4v.d[0], $res0.d[1] @ GHASH final-3 block - mid
5959 pmull2 $rk2q1, $res0.2d, $h4.2d @ GHASH final-3 block - high
5961 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-3 block - high
5962 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-3 block - mid
5963 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k
5965 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-3 block - mid
5966 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-2 block - load plaintext
5968 pmull2 $rk4v.1q, $rk4v.2d, $h34k.2d @ GHASH final-3 block - mid
5969 pmull $rk3q1, $res0.1d, $h4.1d @ GHASH final-3 block - low
5971 eor3 $res1b, $ctr_t1b, $ctr5b, $t1.16b @ AES final-2 block - result
5972 movi $t0.8b, #0 @ suppress further partial tag feed in
5974 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-3 block - mid
5975 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-3 block - low
5976 .L256_enc_blocks_more_than_2: @ blocks left > 2
5978 ldr $h3q, [$current_tag, #80] @ load h3l | h3h
5979 ext $h3.16b, $h3.16b, $h3.16b, #8
5981 st1 { $res1b}, [$output_ptr], #16 @ AES final-2 block - store result
5983 rev64 $res0b, $res1b @ GHASH final-2 block
5984 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-1 block - load plaintext
5986 eor $res0b, $res0b, $t0.16b @ feed in partial tag
5988 ins $rk4v.d[0], $res0.d[1] @ GHASH final-2 block - mid
5990 movi $t0.8b, #0 @ suppress further partial tag feed in
5992 pmull2 $rk2q1, $res0.2d, $h3.2d @ GHASH final-2 block - high
5993 eor3 $res1b, $ctr_t1b, $ctr6b, $t1.16b @ AES final-1 block - result
5995 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-2 block - mid
5997 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-2 block - high
5999 pmull $rk4v.1q, $rk4v.1d, $h34k.1d @ GHASH final-2 block - mid
6000 pmull $rk3q1, $res0.1d, $h3.1d @ GHASH final-2 block - low
6002 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-2 block - mid
6003 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-2 block - low
6004 .L256_enc_blocks_more_than_1: @ blocks left > 1
6006 st1 { $res1b}, [$output_ptr], #16 @ AES final-1 block - store result
6008 ldr $h2q, [$current_tag, #64] @ load h2l | h2h
6009 ext $h2.16b, $h2.16b, $h2.16b, #8
6010 rev64 $res0b, $res1b @ GHASH final-1 block
6011 ldr $ctr_t1q, [$input_ptr], #16 @ AES final block - load plaintext
6013 eor $res0b, $res0b, $t0.16b @ feed in partial tag
6014 movi $t0.8b, #0 @ suppress further partial tag feed in
6016 ins $rk4v.d[0], $res0.d[1] @ GHASH final-1 block - mid
6017 pmull2 $rk2q1, $res0.2d, $h2.2d @ GHASH final-1 block - high
6019 eor3 $res1b, $ctr_t1b, $ctr7b, $t1.16b @ AES final block - result
6020 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-1 block - high
6022 pmull $rk3q1, $res0.1d, $h2.1d @ GHASH final-1 block - low
6023 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-1 block - mid
6025 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k
6027 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-1 block - low
6028 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-1 block - mid
6030 pmull2 $rk4v.1q, $rk4v.2d, $h12k.2d @ GHASH final-1 block - mid
6032 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-1 block - mid
6033 .L256_enc_blocks_less_than_1: @ blocks left <= 1
6035 and $bit_length, $bit_length, #127 @ bit_length %= 128
6037 sub $bit_length, $bit_length, #128 @ bit_length -= 128
6039 neg $bit_length, $bit_length @ bit_length = 128 - #bits in input (in range [1,128])
6041 mvn $temp0_x, xzr @ temp0_x = 0xffffffffffffffff
6042 and $bit_length, $bit_length, #127 @ bit_length %= 128
6044 lsr $temp0_x, $temp0_x, $bit_length @ temp0_x is mask for top 64b of last block
6045 cmp $bit_length, #64
6046 mvn $temp1_x, xzr @ temp1_x = 0xffffffffffffffff
6048 csel $temp3_x, $temp0_x, xzr, lt
6049 csel $temp2_x, $temp1_x, $temp0_x, lt
6051 mov $ctr0.d[0], $temp2_x @ ctr0b is mask for last block
6052 ldr $h1q, [$current_tag, #32] @ load h1l | h1h
6053 ext $h1.16b, $h1.16b, $h1.16b, #8
6055 ld1 { $rk0}, [$output_ptr] @ load existing bytes where the possibly partial last block is to be stored
6056 mov $ctr0.d[1], $temp3_x
6058 and $res1b, $res1b, $ctr0b @ possibly partial last block has zeroes in highest bits
6060 rev64 $res0b, $res1b @ GHASH final block
6062 rev32 $rtmp_ctr.16b, $rtmp_ctr.16b
6063 bif $res1b, $rk0, $ctr0b @ insert existing bytes in top end of result before storing
6064 str $rtmp_ctrq, [$counter] @ store the updated counter
6066 eor $res0b, $res0b, $t0.16b @ feed in partial tag
6067 st1 { $res1b}, [$output_ptr] @ store all 16B
6069 ins $t0.d[0], $res0.d[1] @ GHASH final block - mid
6070 pmull2 $rk2q1, $res0.2d, $h1.2d @ GHASH final block - high
6071 pmull $rk3q1, $res0.1d, $h1.1d @ GHASH final block - low
6073 eor $acc_hb, $acc_hb, $rk2 @ GHASH final block - high
6074 eor $acc_lb, $acc_lb, $rk3 @ GHASH final block - low
6076 eor $t0.8b, $t0.8b, $res0.8b @ GHASH final block - mid
6078 pmull $t0.1q, $t0.1d, $h12k.1d @ GHASH final block - mid
6080 eor $acc_mb, $acc_mb, $t0.16b @ GHASH final block - mid
6081 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant
6083 ext $t11.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment
6085 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up
6086 pmull $t12.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid
6088 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid
6090 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low
6091 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment
6093 eor3 $acc_lb, $acc_lb, $acc_hb, $t11.16b @ MODULO - fold into low
6094 ext $acc_lb, $acc_lb, $acc_lb, #8
6095 rev64 $acc_lb, $acc_lb
6096 st1 { $acc_l.16b }, [$current_tag]
6097 mov x0, $byte_length @ return sizes
6099 ldp d10, d11, [sp, #16]
6100 ldp d12, d13, [sp, #32]
6101 ldp d14, d15, [sp, #48]
6102 ldp d8, d9, [sp], #80
6108 .size unroll8_eor3_aes_gcm_enc_256_kernel,.-unroll8_eor3_aes_gcm_enc_256_kernel
6112 #########################################################################################
6113 # size_t unroll8_eor3_aes_gcm_dec_256_kernel(const uint8_t * ciphertext,
6114 # uint64_t plaintext_length,
6115 # uint8_t * plaintext,
6117 # unsigned char ivec[16],
6121 .global unroll8_eor3_aes_gcm_dec_256_kernel
6122 .type unroll8_eor3_aes_gcm_dec_256_kernel,%function
6124 unroll8_eor3_aes_gcm_dec_256_kernel:
6125 AARCH64_VALID_CALL_TARGET
6126 cbz x1, .L256_dec_ret
6127 stp d8, d9, [sp, #-80]!
6128 lsr $byte_length, $bit_length, #3
6131 stp d10, d11, [sp, #16]
6132 stp d12, d13, [sp, #32]
6133 stp d14, d15, [sp, #48]
6134 mov x5, #0xc200000000000000
6135 stp x5, xzr, [sp, #64]
6136 add $modulo_constant, sp, #64
6138 ld1 { $ctr0b}, [$counter] @ CTR block 0
6140 mov $constant_temp, #0x100000000 @ set up counter increment
6141 movi $rctr_inc.16b, #0x0
6142 mov $rctr_inc.d[1], $constant_temp
6143 mov $main_end_input_ptr, $byte_length
6145 sub $main_end_input_ptr, $main_end_input_ptr, #1 @ byte_len - 1
6147 rev32 $rtmp_ctr.16b, $ctr0.16b @ set up reversed counter
6149 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 0
6151 rev32 $ctr1.16b, $rtmp_ctr.16b @ CTR block 1
6152 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 1
6154 rev32 $ctr2.16b, $rtmp_ctr.16b @ CTR block 2
6155 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 2
6156 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1
6158 rev32 $ctr3.16b, $rtmp_ctr.16b @ CTR block 3
6159 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 3
6161 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 4
6162 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 4
6164 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 0
6166 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 5
6167 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 5
6169 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 0
6170 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 0
6172 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 6
6173 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 6
6175 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 7
6176 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 0
6178 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 0
6179 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 0
6181 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 0
6182 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 0
6183 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3
6185 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 1
6186 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 1
6187 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 1
6189 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 1
6190 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 1
6191 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 1
6193 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 1
6194 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 1
6196 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 2
6197 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 2
6198 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 2
6200 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 2
6201 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 2
6202 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 2
6204 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 2
6205 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 2
6206 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5
6208 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 3
6209 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 3
6211 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 3
6212 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 3
6214 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 3
6215 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 3
6216 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 3
6218 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 3
6220 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 4
6221 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 4
6223 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 4
6224 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 4
6225 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 4
6227 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 4
6228 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 4
6229 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 4
6231 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 5
6232 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 5
6234 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7
6235 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 5
6236 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 5
6238 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 5
6240 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 5
6241 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 5
6243 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 5
6245 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 6
6246 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 6
6247 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 6
6249 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 6
6250 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 6
6251 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 6
6253 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 6
6254 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 6
6255 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9
6257 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 7
6258 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 7
6260 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 7
6261 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 7
6262 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 7
6264 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 7
6265 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 7
6266 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 7
6268 and $main_end_input_ptr, $main_end_input_ptr, #0xffffffffffffff80 @ number of bytes to be processed in main loop (at least 1 byte must be handled by tail)
6269 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 8
6270 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 8
6272 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 8
6273 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 8
6274 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 8
6276 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 8
6277 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 8
6278 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 8
6280 aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 9
6282 ld1 { $acc_lb}, [$current_tag]
6283 ext $acc_lb, $acc_lb, $acc_lb, #8
6284 rev64 $acc_lb, $acc_lb
6285 ldp $rk10q, $rk11q, [$cc, #160] @ load rk10, rk11
6286 add $end_input_ptr, $input_ptr, $bit_length, lsr #3 @ end_input_ptr
6287 add $main_end_input_ptr, $main_end_input_ptr, $input_ptr
6289 aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 9
6290 aese $ctr6b, $rk9 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 9
6292 aese $ctr4b, $rk9 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 9
6293 aese $ctr5b, $rk9 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 9
6295 aese $ctr7b, $rk9 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 9
6297 aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 9
6298 aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 9
6300 aese $ctr4b, $rk10 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 10
6301 aese $ctr7b, $rk10 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 10
6302 aese $ctr5b, $rk10 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 10
6304 aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 10
6305 aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 10
6306 aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 10
6308 aese $ctr6b, $rk10 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 10
6309 aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 10
6310 ldp $rk12q, $rk13q, [$cc, #192] @ load rk12, rk13
6312 aese $ctr0b, $rk11 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 11
6313 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 7
6315 aese $ctr7b, $rk11 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 11
6316 aese $ctr3b, $rk11 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 11
6317 aese $ctr1b, $rk11 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 11
6319 aese $ctr5b, $rk11 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 11
6320 aese $ctr4b, $rk11 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 11
6321 aese $ctr2b, $rk11 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 11
6323 aese $ctr6b, $rk11 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 11
6324 ldr $rk14q, [$cc, #224] @ load rk14
6326 aese $ctr1b, $rk12 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 12
6327 aese $ctr4b, $rk12 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 12
6328 aese $ctr5b, $rk12 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 12
6330 cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks
6331 aese $ctr3b, $rk12 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 12
6332 aese $ctr2b, $rk12 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 12
6334 aese $ctr6b, $rk12 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 12
6335 aese $ctr0b, $rk12 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 12
6336 aese $ctr7b, $rk12 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 12
6338 aese $ctr5b, $rk13 @ AES block 5 - round 13
6339 aese $ctr1b, $rk13 @ AES block 1 - round 13
6340 aese $ctr2b, $rk13 @ AES block 2 - round 13
6342 aese $ctr0b, $rk13 @ AES block 0 - round 13
6343 aese $ctr4b, $rk13 @ AES block 4 - round 13
6344 aese $ctr6b, $rk13 @ AES block 6 - round 13
6346 aese $ctr3b, $rk13 @ AES block 3 - round 13
6347 aese $ctr7b, $rk13 @ AES block 7 - round 13
6348 b.ge .L256_dec_tail @ handle tail
6350 ldp $res0q, $res1q, [$input_ptr], #32 @ AES block 0, 1 - load ciphertext
6352 ldp $res2q, $res3q, [$input_ptr], #32 @ AES block 2, 3 - load ciphertext
6354 ldp $res4q, $res5q, [$input_ptr], #32 @ AES block 4, 5 - load ciphertext
6356 ldp $res6q, $res7q, [$input_ptr], #32 @ AES block 6, 7 - load ciphertext
6357 cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks
6359 eor3 $ctr1b, $res1b, $ctr1b, $rk14 @ AES block 1 - result
6360 eor3 $ctr0b, $res0b, $ctr0b, $rk14 @ AES block 0 - result
6361 stp $ctr0q, $ctr1q, [$output_ptr], #32 @ AES block 0, 1 - store result
6363 rev32 $ctr0.16b, $rtmp_ctr.16b @ CTR block 8
6364 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8
6365 eor3 $ctr3b, $res3b, $ctr3b, $rk14 @ AES block 3 - result
6367 eor3 $ctr5b, $res5b, $ctr5b, $rk14 @ AES block 5 - result
6369 eor3 $ctr4b, $res4b, $ctr4b, $rk14 @ AES block 4 - result
6370 rev32 $ctr1.16b, $rtmp_ctr.16b @ CTR block 9
6371 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 9
6373 eor3 $ctr2b, $res2b, $ctr2b, $rk14 @ AES block 2 - result
6374 stp $ctr2q, $ctr3q, [$output_ptr], #32 @ AES block 2, 3 - store result
6376 rev32 $ctr2.16b, $rtmp_ctr.16b @ CTR block 10
6377 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 10
6379 eor3 $ctr6b, $res6b, $ctr6b, $rk14 @ AES block 6 - result
6381 rev32 $ctr3.16b, $rtmp_ctr.16b @ CTR block 11
6382 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 11
6383 stp $ctr4q, $ctr5q, [$output_ptr], #32 @ AES block 4, 5 - store result
6385 eor3 $ctr7b, $res7b, $ctr7b, $rk14 @ AES block 7 - result
6386 stp $ctr6q, $ctr7q, [$output_ptr], #32 @ AES block 6, 7 - store result
6388 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 12
6389 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 12
6390 b.ge .L256_dec_prepretail @ do prepretail
6392 .L256_dec_main_loop: @ main loop start
6393 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 8k+13
6394 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1
6395 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+13
6397 rev64 $res1b, $res1b @ GHASH block 8k+1
6398 ldr $h7q, [$current_tag, #176] @ load h7l | h7h
6399 ext $h7.16b, $h7.16b, $h7.16b, #8
6400 ldr $h8q, [$current_tag, #208] @ load h8l | h8h
6401 ext $h8.16b, $h8.16b, $h8.16b, #8
6403 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 8k+14
6404 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+14
6405 rev64 $res0b, $res0b @ GHASH block 8k
6407 ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0
6408 rev64 $res4b, $res4b @ GHASH block 8k+4
6409 rev64 $res3b, $res3b @ GHASH block 8k+3
6411 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 8k+15
6412 rev64 $res7b, $res7b @ GHASH block 8k+7
6414 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 0
6415 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 0
6416 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 0
6418 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 0
6419 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 0
6420 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 0
6422 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 0
6423 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 0
6424 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3
6426 eor $res0b, $res0b, $acc_lb @ PRE 1
6427 ldr $h5q, [$current_tag, #128] @ load h5l | h5h
6428 ext $h5.16b, $h5.16b, $h5.16b, #8
6429 ldr $h6q, [$current_tag, #160] @ load h6l | h6h
6430 ext $h6.16b, $h6.16b, $h6.16b, #8
6431 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 1
6433 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 1
6434 rev64 $res2b, $res2b @ GHASH block 8k+2
6435 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 1
6437 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 1
6438 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 1
6439 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 1
6441 trn1 $acc_m.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid
6442 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 1
6443 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 1
6445 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 2
6446 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 2
6447 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 2
6449 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 2
6450 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 2
6451 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH block 8k - low
6453 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 2
6454 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 2
6455 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 2
6457 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5
6458 pmull2 $t1.1q, $res2.2d, $h6.2d @ GHASH block 8k+2 - high
6459 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 3
6461 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 3
6462 pmull2 $t0.1q, $res1.2d, $h7.2d @ GHASH block 8k+1 - high
6463 pmull $h7.1q, $res1.1d, $h7.1d @ GHASH block 8k+1 - low
6465 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 3
6466 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 3
6467 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH block 8k - high
6469 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 3
6470 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 3
6471 trn2 $res0.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid
6473 pmull2 $t2.1q, $res3.2d, $h5.2d @ GHASH block 8k+3 - high
6474 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 3
6475 eor $acc_hb, $acc_hb, $t0.16b @ GHASH block 8k+1 - high
6477 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 4
6478 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 3
6479 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 4
6481 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 4
6482 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 4
6483 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 4
6485 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 4
6486 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 4
6487 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 4
6489 ldr $h56kq, [$current_tag, #144] @ load h6k | h5k
6490 ldr $h78kq, [$current_tag, #192] @ load h8k | h7k
6491 eor $res0.16b, $res0.16b, $acc_m.16b @ GHASH block 8k, 8k+1 - mid
6492 pmull $h6.1q, $res2.1d, $h6.1d @ GHASH block 8k+2 - low
6494 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7
6495 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 5
6496 eor $acc_lb, $acc_lb, $h7.16b @ GHASH block 8k+1 - low
6498 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 5
6499 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 5
6500 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 5
6502 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 5
6503 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 5
6504 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 5
6506 eor3 $acc_hb, $acc_hb, $t1.16b, $t2.16b @ GHASH block 8k+2, 8k+3 - high
6507 trn1 $t3.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid
6508 rev64 $res5b, $res5b @ GHASH block 8k+5
6510 pmull2 $acc_m.1q, $res0.2d, $h78k.2d @ GHASH block 8k - mid
6511 pmull $h78k.1q, $res0.1d, $h78k.1d @ GHASH block 8k+1 - mid
6512 trn2 $res2.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid
6514 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 6
6515 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 6
6516 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 5
6518 trn1 $t6.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid
6519 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 6
6520 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 6
6522 eor $res2.16b, $res2.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid
6523 pmull $h5.1q, $res3.1d, $h5.1d @ GHASH block 8k+3 - low
6524 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 6
6526 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 6
6527 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 6
6528 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 6
6530 pmull2 $t3.1q, $res2.2d, $h56k.2d @ GHASH block 8k+2 - mid
6531 pmull $h56k.1q, $res2.1d, $h56k.1d @ GHASH block 8k+3 - mid
6532 eor3 $acc_lb, $acc_lb, $h6.16b, $h5.16b @ GHASH block 8k+2, 8k+3 - low
6534 ldr $h3q, [$current_tag, #80] @ load h3l | h3h
6535 ext $h3.16b, $h3.16b, $h3.16b, #8
6536 ldr $h4q, [$current_tag, #112] @ load h4l | h4h
6537 ext $h4.16b, $h4.16b, $h4.16b, #8
6538 rev64 $res6b, $res6b @ GHASH block 8k+6
6539 eor $acc_mb, $acc_mb, $h78k.16b @ GHASH block 8k+1 - mid
6541 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 7
6542 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 7
6543 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9
6545 ldr $h1q, [$current_tag, #32] @ load h1l | h1h
6546 ext $h1.16b, $h1.16b, $h1.16b, #8
6547 ldr $h2q, [$current_tag, #64] @ load h2l | h2h
6548 ext $h2.16b, $h2.16b, $h2.16b, #8
6549 eor3 $acc_mb, $acc_mb, $h56k.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid
6550 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 7
6552 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 7
6553 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 7
6554 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 7
6556 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k
6557 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k
6558 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 7
6559 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 7
6561 pmull2 $t4.1q, $res4.2d, $h4.2d @ GHASH block 8k+4 - high
6562 pmull $h4.1q, $res4.1d, $h4.1d @ GHASH block 8k+4 - low
6563 trn2 $res4.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid
6565 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 8
6566 pmull2 $t5.1q, $res5.2d, $h3.2d @ GHASH block 8k+5 - high
6567 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 8
6569 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 8
6570 pmull $h3.1q, $res5.1d, $h3.1d @ GHASH block 8k+5 - low
6571 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 8
6573 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 8
6574 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 8
6575 pmull2 $t7.1q, $res6.2d, $h2.2d @ GHASH block 8k+6 - high
6577 trn1 $t9.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid
6578 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 8
6579 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 8
6581 ldp $rk10q, $rk11q, [$cc, #160] @ load rk10, rk11
6582 pmull $h2.1q, $res6.1d, $h2.1d @ GHASH block 8k+6 - low
6583 trn2 $res6.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid
6585 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+15
6586 eor3 $acc_hb, $acc_hb, $t4.16b, $t5.16b @ GHASH block 8k+4, 8k+5 - high
6587 aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 9
6589 aese $ctr6b, $rk9 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 9
6590 eor $res6.16b, $res6.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid
6591 aese $ctr5b, $rk9 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 9
6593 ldp $res0q, $res1q, [$input_ptr], #32 @ AES block 8k+8, 8k+9 - load ciphertext
6594 eor $res4.16b, $res4.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid
6595 aese $ctr7b, $rk9 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 9
6597 pmull2 $t9.1q, $res6.2d, $h12k.2d @ GHASH block 8k+6 - mid
6598 aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 9
6599 aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 9
6601 pmull2 $t6.1q, $res4.2d, $h34k.2d @ GHASH block 8k+4 - mid
6602 pmull $h34k.1q, $res4.1d, $h34k.1d @ GHASH block 8k+5 - mid
6603 pmull2 $t8.1q, $res7.2d, $h1.2d @ GHASH block 8k+7 - high
6605 pmull $h1.1q, $res7.1d, $h1.1d @ GHASH block 8k+7 - low
6606 aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 10
6607 aese $ctr6b, $rk10 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 10
6609 pmull $h12k.1q, $res6.1d, $h12k.1d @ GHASH block 8k+7 - mid
6610 aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 9
6611 eor3 $acc_lb, $acc_lb, $h4.16b, $h3.16b @ GHASH block 8k+4, 8k+5 - low
6613 aese $ctr4b, $rk9 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 9
6614 eor3 $acc_mb, $acc_mb, $h34k.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid
6615 eor3 $acc_hb, $acc_hb, $t7.16b, $t8.16b @ GHASH block 8k+6, 8k+7 - high
6617 aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 10
6618 aese $ctr5b, $rk10 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 10
6619 aese $ctr7b, $rk10 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 10
6621 aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 10
6622 aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 10
6623 aese $ctr4b, $rk10 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 10
6625 eor3 $acc_lb, $acc_lb, $h2.16b, $h1.16b @ GHASH block 8k+6, 8k+7 - low
6626 rev32 $h1.16b, $rtmp_ctr.16b @ CTR block 8k+16
6627 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant
6629 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+16
6630 aese $ctr1b, $rk11 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 11
6631 ldp $rk12q, $rk13q, [$cc, #192] @ load rk12, rk13
6633 aese $ctr0b, $rk11 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 11
6634 aese $ctr6b, $rk11 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 11
6636 eor3 $acc_mb, $acc_mb, $h12k.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid
6637 rev32 $h2.16b, $rtmp_ctr.16b @ CTR block 8k+17
6638 aese $ctr2b, $rk11 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 11
6640 ldp $res2q, $res3q, [$input_ptr], #32 @ AES block 8k+10, 8k+11 - load ciphertext
6641 aese $ctr7b, $rk11 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 11
6642 ext $t11.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment
6644 aese $ctr5b, $rk11 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 11
6645 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+17
6646 aese $ctr3b, $rk11 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 11
6648 aese $ctr2b, $rk12 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 12
6649 aese $ctr7b, $rk12 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 12
6650 aese $ctr6b, $rk12 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 12
6652 rev32 $h3.16b, $rtmp_ctr.16b @ CTR block 8k+18
6653 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+18
6654 pmull $t12.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid
6656 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up
6657 aese $ctr1b, $rk12 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 12
6658 aese $ctr4b, $rk11 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 11
6660 ldr $rk14q, [$cc, #224] @ load rk14
6661 aese $ctr5b, $rk12 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 12
6662 aese $ctr3b, $rk12 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 12
6664 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid
6665 aese $ctr0b, $rk12 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 12
6666 aese $ctr4b, $rk12 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 12
6668 ldp $res4q, $res5q, [$input_ptr], #32 @ AES block 8k+12, 8k+13 - load ciphertext
6669 aese $ctr1b, $rk13 @ AES block 8k+9 - round 13
6670 aese $ctr2b, $rk13 @ AES block 8k+10 - round 13
6672 ldp $res6q, $res7q, [$input_ptr], #32 @ AES block 8k+14, 8k+15 - load ciphertext
6673 aese $ctr0b, $rk13 @ AES block 8k+8 - round 13
6674 aese $ctr5b, $rk13 @ AES block 8k+13 - round 13
6676 rev32 $h4.16b, $rtmp_ctr.16b @ CTR block 8k+19
6677 eor3 $ctr2b, $res2b, $ctr2b, $rk14 @ AES block 8k+10 - result
6678 eor3 $ctr1b, $res1b, $ctr1b, $rk14 @ AES block 8k+9 - result
6680 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment
6681 aese $ctr7b, $rk13 @ AES block 8k+15 - round 13
6683 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+19
6684 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low
6685 aese $ctr4b, $rk13 @ AES block 8k+12 - round 13
6687 eor3 $ctr5b, $res5b, $ctr5b, $rk14 @ AES block 8k+13 - result
6688 eor3 $ctr0b, $res0b, $ctr0b, $rk14 @ AES block 8k+8 - result
6689 aese $ctr3b, $rk13 @ AES block 8k+11 - round 13
6691 stp $ctr0q, $ctr1q, [$output_ptr], #32 @ AES block 8k+8, 8k+9 - store result
6692 mov $ctr0.16b, $h1.16b @ CTR block 8k+16
6693 eor3 $ctr4b, $res4b, $ctr4b, $rk14 @ AES block 8k+12 - result
6695 eor3 $acc_lb, $acc_lb, $t11.16b, $acc_hb @ MODULO - fold into low
6696 eor3 $ctr3b, $res3b, $ctr3b, $rk14 @ AES block 8k+11 - result
6697 stp $ctr2q, $ctr3q, [$output_ptr], #32 @ AES block 8k+10, 8k+11 - store result
6699 mov $ctr3.16b, $h4.16b @ CTR block 8k+19
6700 mov $ctr2.16b, $h3.16b @ CTR block 8k+18
6701 aese $ctr6b, $rk13 @ AES block 8k+14 - round 13
6703 mov $ctr1.16b, $h2.16b @ CTR block 8k+17
6704 stp $ctr4q, $ctr5q, [$output_ptr], #32 @ AES block 8k+12, 8k+13 - store result
6705 eor3 $ctr7b, $res7b, $ctr7b, $rk14 @ AES block 8k+15 - result
6707 eor3 $ctr6b, $res6b, $ctr6b, $rk14 @ AES block 8k+14 - result
6708 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 8k+20
6709 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+20
6711 cmp $input_ptr, $main_end_input_ptr @ LOOP CONTROL
6712 stp $ctr6q, $ctr7q, [$output_ptr], #32 @ AES block 8k+14, 8k+15 - store result
6713 b.lt .L256_dec_main_loop
6715 .L256_dec_prepretail: @ PREPRETAIL
6716 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1
6717 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 8k+13
6718 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+13
6720 rev64 $res4b, $res4b @ GHASH block 8k+4
6721 ldr $h56kq, [$current_tag, #144] @ load h6k | h5k
6722 ldr $h78kq, [$current_tag, #192] @ load h8k | h7k
6724 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 8k+14
6725 rev64 $res0b, $res0b @ GHASH block 8k
6726 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+14
6728 ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0
6729 ldr $h7q, [$current_tag, #176] @ load h7l | h7h
6730 ext $h7.16b, $h7.16b, $h7.16b, #8
6731 ldr $h8q, [$current_tag, #208] @ load h8l | h8h
6732 ext $h8.16b, $h8.16b, $h8.16b, #8
6733 rev64 $res1b, $res1b @ GHASH block 8k+1
6735 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 8k+15
6736 rev64 $res2b, $res2b @ GHASH block 8k+2
6737 ldr $h5q, [$current_tag, #128] @ load h5l | h5h
6738 ext $h5.16b, $h5.16b, $h5.16b, #8
6739 ldr $h6q, [$current_tag, #160] @ load h6l | h6h
6740 ext $h6.16b, $h6.16b, $h6.16b, #8
6742 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 0
6743 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 0
6744 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 0
6746 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 0
6747 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 0
6748 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 0
6750 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 1
6751 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 0
6752 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 0
6754 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3
6755 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 1
6756 eor $res0b, $res0b, $acc_lb @ PRE 1
6758 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 1
6759 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 1
6760 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 1
6762 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 1
6763 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 1
6764 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 1
6766 pmull2 $t0.1q, $res1.2d, $h7.2d @ GHASH block 8k+1 - high
6767 trn1 $acc_m.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid
6768 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH block 8k - low
6770 rev64 $res3b, $res3b @ GHASH block 8k+3
6771 pmull $h7.1q, $res1.1d, $h7.1d @ GHASH block 8k+1 - low
6773 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 2
6774 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 2
6775 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 2
6777 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 2
6778 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 2
6779 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH block 8k - high
6781 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 2
6782 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 3
6784 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 3
6785 rev64 $res6b, $res6b @ GHASH block 8k+6
6787 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 3
6788 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 2
6789 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 3
6791 pmull2 $t1.1q, $res2.2d, $h6.2d @ GHASH block 8k+2 - high
6792 trn2 $res0.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid
6793 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 2
6795 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5
6796 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 3
6797 pmull2 $t2.1q, $res3.2d, $h5.2d @ GHASH block 8k+3 - high
6799 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 3
6800 eor $acc_hb, $acc_hb, $t0.16b @ GHASH block 8k+1 - high
6801 eor $res0.16b, $res0.16b, $acc_m.16b @ GHASH block 8k, 8k+1 - mid
6803 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 3
6804 pmull $h6.1q, $res2.1d, $h6.1d @ GHASH block 8k+2 - low
6805 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 3
6807 eor3 $acc_hb, $acc_hb, $t1.16b, $t2.16b @ GHASH block 8k+2, 8k+3 - high
6808 trn1 $t3.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid
6809 trn2 $res2.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid
6811 pmull2 $acc_m.1q, $res0.2d, $h78k.2d @ GHASH block 8k - mid
6812 pmull $h5.1q, $res3.1d, $h5.1d @ GHASH block 8k+3 - low
6813 eor $acc_lb, $acc_lb, $h7.16b @ GHASH block 8k+1 - low
6815 pmull $h78k.1q, $res0.1d, $h78k.1d @ GHASH block 8k+1 - mid
6816 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 4
6817 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 4
6819 eor3 $acc_lb, $acc_lb, $h6.16b, $h5.16b @ GHASH block 8k+2, 8k+3 - low
6820 ldr $h1q, [$current_tag, #32] @ load h1l | h1h
6821 ext $h1.16b, $h1.16b, $h1.16b, #8
6822 ldr $h2q, [$current_tag, #64] @ load h2l | h2h
6823 ext $h2.16b, $h2.16b, $h2.16b, #8
6824 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 4
6826 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 4
6827 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 4
6828 eor $acc_mb, $acc_mb, $h78k.16b @ GHASH block 8k+1 - mid
6830 eor $res2.16b, $res2.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid
6831 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 5
6832 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 4
6834 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 5
6835 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 4
6836 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 4
6838 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 5
6839 pmull2 $t3.1q, $res2.2d, $h56k.2d @ GHASH block 8k+2 - mid
6840 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 5
6842 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 5
6843 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 5
6844 pmull $h56k.1q, $res2.1d, $h56k.1d @ GHASH block 8k+3 - mid
6846 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 5
6847 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 5
6848 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7
6850 ldr $h3q, [$current_tag, #80] @ load h3l | h3h
6851 ext $h3.16b, $h3.16b, $h3.16b, #8
6852 ldr $h4q, [$current_tag, #112] @ load h4l | h4h
6853 ext $h4.16b, $h4.16b, $h4.16b, #8
6854 rev64 $res7b, $res7b @ GHASH block 8k+7
6855 rev64 $res5b, $res5b @ GHASH block 8k+5
6857 eor3 $acc_mb, $acc_mb, $h56k.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid
6859 trn1 $t6.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid
6861 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 6
6862 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k
6863 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k
6864 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 6
6866 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 6
6867 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 6
6869 pmull2 $t4.1q, $res4.2d, $h4.2d @ GHASH block 8k+4 - high
6870 pmull2 $t5.1q, $res5.2d, $h3.2d @ GHASH block 8k+5 - high
6871 pmull $h4.1q, $res4.1d, $h4.1d @ GHASH block 8k+4 - low
6873 trn2 $res4.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid
6874 pmull $h3.1q, $res5.1d, $h3.1d @ GHASH block 8k+5 - low
6875 trn1 $t9.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid
6877 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 7
6878 pmull2 $t7.1q, $res6.2d, $h2.2d @ GHASH block 8k+6 - high
6879 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 6
6881 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 6
6882 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 6
6883 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 6
6885 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9
6886 pmull $h2.1q, $res6.1d, $h2.1d @ GHASH block 8k+6 - low
6887 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 7
6889 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 7
6890 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 7
6892 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 7
6893 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 7
6894 eor3 $acc_hb, $acc_hb, $t4.16b, $t5.16b @ GHASH block 8k+4, 8k+5 - high
6896 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 7
6897 trn2 $res6.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid
6898 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 7
6900 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 8
6901 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 8
6902 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 8
6904 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 8
6905 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 8
6906 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 8
6908 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 8
6909 aese $ctr4b, $rk9 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 9
6910 eor $res4.16b, $res4.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid
6912 aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 9
6913 aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 9
6914 eor $res6.16b, $res6.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid
6916 aese $ctr6b, $rk9 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 9
6917 aese $ctr7b, $rk9 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 9
6918 pmull2 $t6.1q, $res4.2d, $h34k.2d @ GHASH block 8k+4 - mid
6920 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 8
6921 pmull $h34k.1q, $res4.1d, $h34k.1d @ GHASH block 8k+5 - mid
6922 pmull2 $t8.1q, $res7.2d, $h1.2d @ GHASH block 8k+7 - high
6924 pmull2 $t9.1q, $res6.2d, $h12k.2d @ GHASH block 8k+6 - mid
6925 pmull $h12k.1q, $res6.1d, $h12k.1d @ GHASH block 8k+7 - mid
6926 pmull $h1.1q, $res7.1d, $h1.1d @ GHASH block 8k+7 - low
6928 ldp $rk10q, $rk11q, [$cc, #160] @ load rk10, rk11
6929 eor3 $acc_lb, $acc_lb, $h4.16b, $h3.16b @ GHASH block 8k+4, 8k+5 - low
6930 eor3 $acc_mb, $acc_mb, $h34k.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid
6932 aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 9
6933 aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 9
6934 aese $ctr5b, $rk9 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 9
6936 eor3 $acc_hb, $acc_hb, $t7.16b, $t8.16b @ GHASH block 8k+6, 8k+7 - high
6937 eor3 $acc_lb, $acc_lb, $h2.16b, $h1.16b @ GHASH block 8k+6, 8k+7 - low
6938 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant
6940 eor3 $acc_mb, $acc_mb, $h12k.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid
6942 aese $ctr4b, $rk10 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 10
6943 aese $ctr6b, $rk10 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 10
6944 aese $ctr5b, $rk10 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 10
6946 aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 10
6947 aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 10
6948 aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 10
6950 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up
6952 aese $ctr7b, $rk10 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 10
6953 aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 10
6954 ldp $rk12q, $rk13q, [$cc, #192] @ load rk12, rk13
6956 ext $t11.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment
6958 aese $ctr2b, $rk11 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 11
6959 aese $ctr1b, $rk11 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 11
6960 aese $ctr0b, $rk11 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 11
6962 pmull $t12.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid
6963 aese $ctr3b, $rk11 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 11
6965 aese $ctr7b, $rk11 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 11
6966 aese $ctr6b, $rk11 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 11
6967 aese $ctr4b, $rk11 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 11
6969 aese $ctr5b, $rk11 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 11
6970 aese $ctr3b, $rk12 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 12
6972 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid
6974 aese $ctr3b, $rk13 @ AES block 8k+11 - round 13
6975 aese $ctr2b, $rk12 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 12
6976 aese $ctr6b, $rk12 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 12
6978 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low
6979 aese $ctr4b, $rk12 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 12
6980 aese $ctr7b, $rk12 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 12
6982 aese $ctr0b, $rk12 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 12
6983 ldr $rk14q, [$cc, #224] @ load rk14
6984 aese $ctr1b, $rk12 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 12
6986 aese $ctr4b, $rk13 @ AES block 8k+12 - round 13
6987 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment
6988 aese $ctr5b, $rk12 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 12
6990 aese $ctr6b, $rk13 @ AES block 8k+14 - round 13
6991 aese $ctr2b, $rk13 @ AES block 8k+10 - round 13
6992 aese $ctr1b, $rk13 @ AES block 8k+9 - round 13
6994 aese $ctr5b, $rk13 @ AES block 8k+13 - round 13
6995 eor3 $acc_lb, $acc_lb, $t11.16b, $acc_hb @ MODULO - fold into low
6996 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+15
6998 aese $ctr7b, $rk13 @ AES block 8k+15 - round 13
6999 aese $ctr0b, $rk13 @ AES block 8k+8 - round 13
7000 .L256_dec_tail: @ TAIL
7002 ext $t0.16b, $acc_lb, $acc_lb, #8 @ prepare final partial tag
7003 sub $main_end_input_ptr, $end_input_ptr, $input_ptr @ main_end_input_ptr is number of bytes left to process
7004 cmp $main_end_input_ptr, #112
7006 ldr $res1q, [$input_ptr], #16 @ AES block 8k+8 - load ciphertext
7008 ldp $h78kq, $h8q, [$current_tag, #192] @ load h8k | h7k
7009 ext $h8.16b, $h8.16b, $h8.16b, #8
7012 ldp $h5q, $h56kq, [$current_tag, #128] @ load h5l | h5h
7013 ext $h5.16b, $h5.16b, $h5.16b, #8
7015 eor3 $res4b, $res1b, $ctr0b, $t1.16b @ AES block 8k+8 - result
7016 ldp $h6q, $h7q, [$current_tag, #160] @ load h6l | h6h
7017 ext $h6.16b, $h6.16b, $h6.16b, #8
7018 ext $h7.16b, $h7.16b, $h7.16b, #8
7019 b.gt .L256_dec_blocks_more_than_7
7022 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
7033 cmp $main_end_input_ptr, #96
7035 b.gt .L256_dec_blocks_more_than_6
7041 cmp $main_end_input_ptr, #80
7042 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
7046 b.gt .L256_dec_blocks_more_than_5
7048 cmp $main_end_input_ptr, #64
7050 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
7056 b.gt .L256_dec_blocks_more_than_4
7058 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
7060 cmp $main_end_input_ptr, #48
7064 b.gt .L256_dec_blocks_more_than_3
7066 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k
7067 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
7070 cmp $main_end_input_ptr, #32
7072 b.gt .L256_dec_blocks_more_than_2
7074 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
7077 cmp $main_end_input_ptr, #16
7078 b.gt .L256_dec_blocks_more_than_1
7080 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s
7081 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k
7082 b .L256_dec_blocks_less_than_1
7083 .L256_dec_blocks_more_than_7: @ blocks left > 7
7084 rev64 $res0b, $res1b @ GHASH final-7 block
7085 ldr $res1q, [$input_ptr], #16 @ AES final-6 block - load ciphertext
7086 st1 { $res4b}, [$output_ptr], #16 @ AES final-7 block - store result
7088 ins $acc_m.d[0], $h78k.d[1] @ GHASH final-7 block - mid
7090 eor $res0b, $res0b, $t0.16b @ feed in partial tag
7092 ins $rk4v.d[0], $res0.d[1] @ GHASH final-7 block - mid
7093 eor3 $res4b, $res1b, $ctr1b, $t1.16b @ AES final-6 block - result
7095 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH final-7 block - high
7097 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-7 block - mid
7098 movi $t0.8b, #0 @ suppress further partial tag feed in
7100 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH final-7 block - low
7101 pmull $acc_m.1q, $rk4v.1d, $acc_m.1d @ GHASH final-7 block - mid
7102 .L256_dec_blocks_more_than_6: @ blocks left > 6
7104 rev64 $res0b, $res1b @ GHASH final-6 block
7106 eor $res0b, $res0b, $t0.16b @ feed in partial tag
7107 ldr $res1q, [$input_ptr], #16 @ AES final-5 block - load ciphertext
7108 movi $t0.8b, #0 @ suppress further partial tag feed in
7110 ins $rk4v.d[0], $res0.d[1] @ GHASH final-6 block - mid
7111 st1 { $res4b}, [$output_ptr], #16 @ AES final-6 block - store result
7112 pmull2 $rk2q1, $res0.2d, $h7.2d @ GHASH final-6 block - high
7114 pmull $rk3q1, $res0.1d, $h7.1d @ GHASH final-6 block - low
7116 eor3 $res4b, $res1b, $ctr2b, $t1.16b @ AES final-5 block - result
7117 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-6 block - low
7118 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-6 block - mid
7120 pmull $rk4v.1q, $rk4v.1d, $h78k.1d @ GHASH final-6 block - mid
7122 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-6 block - mid
7123 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-6 block - high
7124 .L256_dec_blocks_more_than_5: @ blocks left > 5
7126 rev64 $res0b, $res1b @ GHASH final-5 block
7128 eor $res0b, $res0b, $t0.16b @ feed in partial tag
7130 pmull2 $rk2q1, $res0.2d, $h6.2d @ GHASH final-5 block - high
7131 ins $rk4v.d[0], $res0.d[1] @ GHASH final-5 block - mid
7133 ldr $res1q, [$input_ptr], #16 @ AES final-4 block - load ciphertext
7135 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-5 block - mid
7136 st1 { $res4b}, [$output_ptr], #16 @ AES final-5 block - store result
7138 pmull $rk3q1, $res0.1d, $h6.1d @ GHASH final-5 block - low
7139 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-5 block - mid
7141 pmull2 $rk4v.1q, $rk4v.2d, $h56k.2d @ GHASH final-5 block - mid
7143 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-5 block - high
7144 eor3 $res4b, $res1b, $ctr3b, $t1.16b @ AES final-4 block - result
7145 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-5 block - low
7147 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-5 block - mid
7148 movi $t0.8b, #0 @ suppress further partial tag feed in
7149 .L256_dec_blocks_more_than_4: @ blocks left > 4
7151 rev64 $res0b, $res1b @ GHASH final-4 block
7153 eor $res0b, $res0b, $t0.16b @ feed in partial tag
7155 ins $rk4v.d[0], $res0.d[1] @ GHASH final-4 block - mid
7156 ldr $res1q, [$input_ptr], #16 @ AES final-3 block - load ciphertext
7158 movi $t0.8b, #0 @ suppress further partial tag feed in
7160 pmull $rk3q1, $res0.1d, $h5.1d @ GHASH final-4 block - low
7161 pmull2 $rk2q1, $res0.2d, $h5.2d @ GHASH final-4 block - high
7163 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-4 block - mid
7165 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-4 block - high
7167 pmull $rk4v.1q, $rk4v.1d, $h56k.1d @ GHASH final-4 block - mid
7169 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-4 block - low
7170 st1 { $res4b}, [$output_ptr], #16 @ AES final-4 block - store result
7172 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-4 block - mid
7173 eor3 $res4b, $res1b, $ctr4b, $t1.16b @ AES final-3 block - result
7174 .L256_dec_blocks_more_than_3: @ blocks left > 3
7176 ldr $h4q, [$current_tag, #112] @ load h4l | h4h
7177 ext $h4.16b, $h4.16b, $h4.16b, #8
7178 rev64 $res0b, $res1b @ GHASH final-3 block
7180 eor $res0b, $res0b, $t0.16b @ feed in partial tag
7181 ldr $res1q, [$input_ptr], #16 @ AES final-2 block - load ciphertext
7182 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k
7184 ins $rk4v.d[0], $res0.d[1] @ GHASH final-3 block - mid
7185 st1 { $res4b}, [$output_ptr], #16 @ AES final-3 block - store result
7187 eor3 $res4b, $res1b, $ctr5b, $t1.16b @ AES final-2 block - result
7189 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-3 block - mid
7191 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-3 block - mid
7192 pmull $rk3q1, $res0.1d, $h4.1d @ GHASH final-3 block - low
7193 pmull2 $rk2q1, $res0.2d, $h4.2d @ GHASH final-3 block - high
7195 movi $t0.8b, #0 @ suppress further partial tag feed in
7196 pmull2 $rk4v.1q, $rk4v.2d, $h34k.2d @ GHASH final-3 block - mid
7197 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-3 block - low
7199 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-3 block - high
7201 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-3 block - mid
7202 .L256_dec_blocks_more_than_2: @ blocks left > 2
7204 rev64 $res0b, $res1b @ GHASH final-2 block
7206 ldr $h3q, [$current_tag, #80] @ load h3l | h3h
7207 ext $h3.16b, $h3.16b, $h3.16b, #8
7208 ldr $res1q, [$input_ptr], #16 @ AES final-1 block - load ciphertext
7210 eor $res0b, $res0b, $t0.16b @ feed in partial tag
7212 ins $rk4v.d[0], $res0.d[1] @ GHASH final-2 block - mid
7214 pmull $rk3q1, $res0.1d, $h3.1d @ GHASH final-2 block - low
7215 st1 { $res4b}, [$output_ptr], #16 @ AES final-2 block - store result
7216 eor3 $res4b, $res1b, $ctr6b, $t1.16b @ AES final-1 block - result
7218 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-2 block - mid
7219 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-2 block - low
7220 movi $t0.8b, #0 @ suppress further partial tag feed in
7222 pmull $rk4v.1q, $rk4v.1d, $h34k.1d @ GHASH final-2 block - mid
7223 pmull2 $rk2q1, $res0.2d, $h3.2d @ GHASH final-2 block - high
7225 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-2 block - mid
7226 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-2 block - high
7227 .L256_dec_blocks_more_than_1: @ blocks left > 1
7229 rev64 $res0b, $res1b @ GHASH final-1 block
7231 eor $res0b, $res0b, $t0.16b @ feed in partial tag
7233 ins $rk4v.d[0], $res0.d[1] @ GHASH final-1 block - mid
7234 ldr $h2q, [$current_tag, #64] @ load h2l | h2h
7235 ext $h2.16b, $h2.16b, $h2.16b, #8
7237 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-1 block - mid
7238 ldr $res1q, [$input_ptr], #16 @ AES final block - load ciphertext
7239 st1 { $res4b}, [$output_ptr], #16 @ AES final-1 block - store result
7241 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k
7242 pmull $rk3q1, $res0.1d, $h2.1d @ GHASH final-1 block - low
7244 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-1 block - mid
7246 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-1 block - low
7248 eor3 $res4b, $res1b, $ctr7b, $t1.16b @ AES final block - result
7249 pmull2 $rk2q1, $res0.2d, $h2.2d @ GHASH final-1 block - high
7251 pmull2 $rk4v.1q, $rk4v.2d, $h12k.2d @ GHASH final-1 block - mid
7253 movi $t0.8b, #0 @ suppress further partial tag feed in
7254 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-1 block - high
7256 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-1 block - mid
7257 .L256_dec_blocks_less_than_1: @ blocks left <= 1
7259 ld1 { $rk0}, [$output_ptr] @ load existing bytes where the possibly partial last block is to be stored
7260 mvn $temp0_x, xzr @ temp0_x = 0xffffffffffffffff
7261 and $bit_length, $bit_length, #127 @ bit_length %= 128
7263 sub $bit_length, $bit_length, #128 @ bit_length -= 128
7264 rev32 $rtmp_ctr.16b, $rtmp_ctr.16b
7265 str $rtmp_ctrq, [$counter] @ store the updated counter
7267 neg $bit_length, $bit_length @ bit_length = 128 - #bits in input (in range [1,128])
7269 and $bit_length, $bit_length, #127 @ bit_length %= 128
7271 lsr $temp0_x, $temp0_x, $bit_length @ temp0_x is mask for top 64b of last block
7272 cmp $bit_length, #64
7273 mvn $temp1_x, xzr @ temp1_x = 0xffffffffffffffff
7275 csel $temp3_x, $temp0_x, xzr, lt
7276 csel $temp2_x, $temp1_x, $temp0_x, lt
7278 mov $ctr0.d[0], $temp2_x @ ctr0b is mask for last block
7279 mov $ctr0.d[1], $temp3_x
7281 and $res1b, $res1b, $ctr0b @ possibly partial last block has zeroes in highest bits
7282 ldr $h1q, [$current_tag, #32] @ load h1l | h1h
7283 ext $h1.16b, $h1.16b, $h1.16b, #8
7284 bif $res4b, $rk0, $ctr0b @ insert existing bytes in top end of result before storing
7286 rev64 $res0b, $res1b @ GHASH final block
7288 eor $res0b, $res0b, $t0.16b @ feed in partial tag
7290 ins $t0.d[0], $res0.d[1] @ GHASH final block - mid
7291 pmull2 $rk2q1, $res0.2d, $h1.2d @ GHASH final block - high
7293 eor $t0.8b, $t0.8b, $res0.8b @ GHASH final block - mid
7295 pmull $rk3q1, $res0.1d, $h1.1d @ GHASH final block - low
7296 eor $acc_hb, $acc_hb, $rk2 @ GHASH final block - high
7298 pmull $t0.1q, $t0.1d, $h12k.1d @ GHASH final block - mid
7300 eor $acc_mb, $acc_mb, $t0.16b @ GHASH final block - mid
7301 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant
7302 eor $acc_lb, $acc_lb, $rk3 @ GHASH final block - low
7304 pmull $t11.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid
7305 eor $t10.16b, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up
7307 ext $acc_hb, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment
7308 st1 { $res4b}, [$output_ptr] @ store all 16B
7310 eor $acc_mb, $acc_mb, $t10.16b @ MODULO - karatsuba tidy up
7312 eor $t11.16b, $acc_hb, $t11.16b @ MODULO - fold into mid
7313 eor $acc_mb, $acc_mb, $t11.16b @ MODULO - fold into mid
7315 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low
7317 ext $acc_mb, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment
7318 eor $acc_lb, $acc_lb, $acc_hb @ MODULO - fold into low
7320 eor $acc_lb, $acc_lb, $acc_mb @ MODULO - fold into low
7321 ext $acc_lb, $acc_lb, $acc_lb, #8
7322 rev64 $acc_lb, $acc_lb
7323 st1 { $acc_l.16b }, [$current_tag]
7324 mov x0, $byte_length
7326 ldp d10, d11, [sp, #16]
7327 ldp d12, d13, [sp, #32]
7328 ldp d14, d15, [sp, #48]
7329 ldp d8, d9, [sp], #80
7335 .size unroll8_eor3_aes_gcm_dec_256_kernel,.-unroll8_eor3_aes_gcm_dec_256_kernel
7341 .asciz "AES GCM module for ARMv8, SPDX BSD-3-Clause by <xiaokang.qian\@arm.com>"
7348 "rax1" => 0xce608c00, "eor3" => 0xce000000,
7349 "bcax" => 0xce200000, "xar" => 0xce800000 );
7352 my ($mnemonic,$arg)=@_;
7354 $arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv#]([0-9\-]+))?)?/
7356 sprintf ".inst\t0x%08x\t//%s %s",
7357 $opcode{$mnemonic}|$1|($2<<5)|($3<<16)|(eval($4)<<10),
7363 $arg =~ m/q([0-9]+)#(lo|hi),\s*q([0-9]+)#(lo|hi)/o &&
7364 sprintf "ins v%d.d[%d],v%d.d[%d]",$1<8?$1:$1+8,($2 eq "lo")?0:1,
7365 $3<8?$3:$3+8,($4 eq "lo")?0:1;
7368 foreach(split("\n",$code)) {
7369 s/@\s/\/\//o; # old->new style commentary
7370 s/\`([^\`]*)\`/eval($1)/ge;
7372 m/\bld1r\b/ and s/\.16b/.2d/g or
7373 s/\b(eor3|rax1|xar|bcax)\s+(v.*)/unsha3($1,$2)/ge;
7378 close STDOUT or die "error closing STDOUT: $!"; # enforce flush