2 # This file is dual-licensed, meaning that you can use it under your
3 # choice of either of the following two licenses:
5 # Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
7 # Licensed under the Apache License 2.0 (the "License"). You can obtain
8 # a copy in the file LICENSE in the source distribution or at
9 # https://www.openssl.org/source/license.html
13 # Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
14 # All rights reserved.
16 # Redistribution and use in source and binary forms, with or without
17 # modification, are permitted provided that the following conditions
19 # 1. Redistributions of source code must retain the above copyright
20 # notice, this list of conditions and the following disclaimer.
21 # 2. Redistributions in binary form must reproduce the above copyright
22 # notice, this list of conditions and the following disclaimer in the
23 # documentation and/or other materials provided with the distribution.
25 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 # - RISC-V Vector ('V') with VLEN >= 128
39 # - RISC-V Vector Bit-manipulation extension ('Zvbb')
40 # - RISC-V Vector GCM/GMAC extension ('Zvkg')
41 # - RISC-V Vector AES block cipher extension ('Zvkned')
42 # - RISC-V Zicclsm(Main memory supports misaligned loads/stores)
49 use lib "$Bin/../../perlasm";
52 # $output is the last argument if it looks like a file (it has an extension)
53 # $flavour is the first argument if it doesn't look like a file
54 my $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
55 my $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
57 $output and open STDOUT,">$output";
64 ################################################################################
65 # void rv64i_zvbb_zvkg_zvkned_aes_xts_encrypt(const unsigned char *in,
66 # unsigned char *out, size_t length,
67 # const AES_KEY *key1,
68 # const AES_KEY *key2,
69 # const unsigned char iv[16])
70 my ($INPUT, $OUTPUT, $LENGTH, $KEY1, $KEY2, $IV) = ("a0", "a1", "a2", "a3", "a4", "a5");
71 my ($TAIL_LENGTH) = ("a6");
73 my ($T0, $T1, $T2) = ("t0", "t1", "t2");
74 my ($STORE_LEN32) = ("t3");
76 my ($V0, $V1, $V2, $V3, $V4, $V5, $V6, $V7,
77 $V8, $V9, $V10, $V11, $V12, $V13, $V14, $V15,
78 $V16, $V17, $V18, $V19, $V20, $V21, $V22, $V23,
79 $V24, $V25, $V26, $V27, $V28, $V29, $V30, $V31,
80 ) = map("v$_",(0..31));
84 # Load number of rounds
86 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
87 @{[vle32_v $V28, $IV]}
88 @{[vle32_v $V29, $KEY2]}
89 @{[vaesz_vs $V28, $V29]}
93 @{[vle32_v $V29, $KEY2]}
94 @{[vaesem_vs $V28, $V29]}
98 @{[vle32_v $V29, $KEY2]}
99 @{[vaesef_vs $V28, $V29]}
105 # prepare input data(v24), iv(v28), bit-reversed-iv(v16), bit-reversed-iv-multiplier(v20)
106 sub init_first_round {
109 @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "ma"]}
110 @{[vle32_v $V24, $INPUT]}
113 # We could simplify the initialization steps if we have `block<=1`.
116 # Note: We use `vgmul` for GF(2^128) multiplication. The `vgmul` uses
117 # different order of coefficients. We should use`vbrev8` to reverse the
118 # data when we use `vgmul`.
119 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
120 @{[vbrev8_v $V0, $V28]}
121 @{[vsetvli "zero", $LEN32, "e32", "m4", "ta", "ma"]}
123 # v16: [r-IV0, r-IV0, ...]
124 @{[vaesz_vs $V16, $V0]}
126 # Prepare GF(2^128) multiplier [1, x, x^2, x^3, ...] in v8.
128 @{[vsetvli "zero", $T0, "e32", "m1", "ta", "ma"]}
129 # v2: [`1`, `1`, `1`, `1`, ...]
131 # v3: [`0`, `1`, `2`, `3`, ...]
133 @{[vsetvli "zero", $T0, "e64", "m2", "ta", "ma"]}
134 # v4: [`1`, 0, `1`, 0, `1`, 0, `1`, 0, ...]
135 @{[vzext_vf2 $V4, $V2]}
136 # v6: [`0`, 0, `1`, 0, `2`, 0, `3`, 0, ...]
137 @{[vzext_vf2 $V6, $V3]}
139 @{[vsetvli "zero", $T0, "e32", "m2", "ta", "ma"]}
140 # v8: [1<<0=1, 0, 0, 0, 1<<1=x, 0, 0, 0, 1<<2=x^2, 0, 0, 0, ...]
141 @{[vwsll_vv $V8, $V4, $V6]}
143 # Compute [r-IV0*1, r-IV0*x, r-IV0*x^2, r-IV0*x^3, ...] in v16
144 @{[vsetvli "zero", $LEN32, "e32", "m4", "ta", "ma"]}
145 @{[vbrev8_v $V8, $V8]}
146 @{[vgmul_vv $V16, $V8]}
148 # Compute [IV0*1, IV0*x, IV0*x^2, IV0*x^3, ...] in v28.
149 # Reverse the bits order back.
150 @{[vbrev8_v $V28, $V16]}
152 # Prepare the x^n multiplier in v20. The `n` is the aes-xts block number
153 # in a LMUL=4 register group.
154 # n = ((VLEN*LMUL)/(32*4)) = ((VLEN*4)/(32*4))
156 # We could use vsetvli with `e32, m1` to compute the `n` number.
157 @{[vsetvli $T0, "zero", "e32", "m1", "ta", "ma"]}
160 @{[vsetivli "zero", 2, "e64", "m1", "ta", "ma"]}
162 @{[vsetivli "zero", 1, "e64", "m1", "tu", "ma"]}
163 @{[vmv_v_x $V0, $T0]}
164 @{[vsetivli "zero", 2, "e64", "m1", "ta", "ma"]}
165 @{[vbrev8_v $V0, $V0]}
166 @{[vsetvli "zero", $LEN32, "e32", "m4", "ta", "ma"]}
168 @{[vaesz_vs $V20, $V0]}
172 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
173 @{[vbrev8_v $V16, $V28]}
180 # prepare xts enc last block's input(v24) and iv(v28)
181 sub handle_xts_enc_last_block {
183 bnez $TAIL_LENGTH, 1f
186 # slidedown second to last block
188 @{[vsetivli "zero", 4, "e32", "m4", "ta", "ma"]}
190 @{[vslidedown_vx $V24, $V24, $VL]}
192 @{[vslidedown_vx $V16, $V16, $VL]}
194 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
195 @{[vmv_v_v $V25, $V24]}
197 # load last block into v24
198 # note: We should load the last block before store the second to last block
199 # for in-place operation.
200 @{[vsetvli "zero", $TAIL_LENGTH, "e8", "m1", "tu", "ma"]}
201 @{[vle8_v $V24, $INPUT]}
203 # setup `x` multiplier with byte-reversed order
204 # 0b00000010 => 0b01000000 (0x40)
206 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
208 @{[vsetivli "zero", 1, "e8", "m1", "tu", "ma"]}
209 @{[vmv_v_x $V28, $T0]}
211 # compute IV for last block
212 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
213 @{[vgmul_vv $V16, $V28]}
214 @{[vbrev8_v $V28, $V16]}
216 # store second to last block
217 @{[vsetvli "zero", $TAIL_LENGTH, "e8", "m1", "ta", "ma"]}
218 @{[vse8_v $V25, $OUTPUT]}
224 # prepare xts dec second to last block's input(v24) and iv(v29) and
225 # last block's and iv(v28)
226 sub handle_xts_dec_last_block {
228 bnez $TAIL_LENGTH, 1f
231 # load second to last block's ciphertext
232 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
233 @{[vle32_v $V24, $INPUT]}
234 addi $INPUT, $INPUT, 16
236 # setup `x` multiplier with byte-reversed order
237 # 0b00000010 => 0b01000000 (0x40)
239 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
241 @{[vsetivli "zero", 1, "e8", "m1", "tu", "ma"]}
242 @{[vmv_v_x $V20, $T0]}
245 # slidedown third to last block
247 @{[vsetivli "zero", 4, "e32", "m4", "ta", "ma"]}
249 @{[vslidedown_vx $V16, $V16, $VL]}
251 # compute IV for last block
252 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
253 @{[vgmul_vv $V16, $V20]}
254 @{[vbrev8_v $V28, $V16]}
256 # compute IV for second to last block
257 @{[vgmul_vv $V16, $V20]}
258 @{[vbrev8_v $V29, $V16]}
261 # compute IV for second to last block
262 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
263 @{[vgmul_vv $V16, $V20]}
264 @{[vbrev8_v $V29, $V16]}
271 # Load all 11 round keys to v1-v11 registers.
272 sub aes_128_load_key {
274 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
275 @{[vle32_v $V1, $KEY1]}
276 addi $KEY1, $KEY1, 16
277 @{[vle32_v $V2, $KEY1]}
278 addi $KEY1, $KEY1, 16
279 @{[vle32_v $V3, $KEY1]}
280 addi $KEY1, $KEY1, 16
281 @{[vle32_v $V4, $KEY1]}
282 addi $KEY1, $KEY1, 16
283 @{[vle32_v $V5, $KEY1]}
284 addi $KEY1, $KEY1, 16
285 @{[vle32_v $V6, $KEY1]}
286 addi $KEY1, $KEY1, 16
287 @{[vle32_v $V7, $KEY1]}
288 addi $KEY1, $KEY1, 16
289 @{[vle32_v $V8, $KEY1]}
290 addi $KEY1, $KEY1, 16
291 @{[vle32_v $V9, $KEY1]}
292 addi $KEY1, $KEY1, 16
293 @{[vle32_v $V10, $KEY1]}
294 addi $KEY1, $KEY1, 16
295 @{[vle32_v $V11, $KEY1]}
301 # Load all 15 round keys to v1-v15 registers.
302 sub aes_256_load_key {
304 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
305 @{[vle32_v $V1, $KEY1]}
306 addi $KEY1, $KEY1, 16
307 @{[vle32_v $V2, $KEY1]}
308 addi $KEY1, $KEY1, 16
309 @{[vle32_v $V3, $KEY1]}
310 addi $KEY1, $KEY1, 16
311 @{[vle32_v $V4, $KEY1]}
312 addi $KEY1, $KEY1, 16
313 @{[vle32_v $V5, $KEY1]}
314 addi $KEY1, $KEY1, 16
315 @{[vle32_v $V6, $KEY1]}
316 addi $KEY1, $KEY1, 16
317 @{[vle32_v $V7, $KEY1]}
318 addi $KEY1, $KEY1, 16
319 @{[vle32_v $V8, $KEY1]}
320 addi $KEY1, $KEY1, 16
321 @{[vle32_v $V9, $KEY1]}
322 addi $KEY1, $KEY1, 16
323 @{[vle32_v $V10, $KEY1]}
324 addi $KEY1, $KEY1, 16
325 @{[vle32_v $V11, $KEY1]}
326 addi $KEY1, $KEY1, 16
327 @{[vle32_v $V12, $KEY1]}
328 addi $KEY1, $KEY1, 16
329 @{[vle32_v $V13, $KEY1]}
330 addi $KEY1, $KEY1, 16
331 @{[vle32_v $V14, $KEY1]}
332 addi $KEY1, $KEY1, 16
333 @{[vle32_v $V15, $KEY1]}
339 # aes-128 enc with round keys v1-v11
342 @{[vaesz_vs $V24, $V1]}
343 @{[vaesem_vs $V24, $V2]}
344 @{[vaesem_vs $V24, $V3]}
345 @{[vaesem_vs $V24, $V4]}
346 @{[vaesem_vs $V24, $V5]}
347 @{[vaesem_vs $V24, $V6]}
348 @{[vaesem_vs $V24, $V7]}
349 @{[vaesem_vs $V24, $V8]}
350 @{[vaesem_vs $V24, $V9]}
351 @{[vaesem_vs $V24, $V10]}
352 @{[vaesef_vs $V24, $V11]}
358 # aes-128 dec with round keys v1-v11
361 @{[vaesz_vs $V24, $V11]}
362 @{[vaesdm_vs $V24, $V10]}
363 @{[vaesdm_vs $V24, $V9]}
364 @{[vaesdm_vs $V24, $V8]}
365 @{[vaesdm_vs $V24, $V7]}
366 @{[vaesdm_vs $V24, $V6]}
367 @{[vaesdm_vs $V24, $V5]}
368 @{[vaesdm_vs $V24, $V4]}
369 @{[vaesdm_vs $V24, $V3]}
370 @{[vaesdm_vs $V24, $V2]}
371 @{[vaesdf_vs $V24, $V1]}
377 # aes-256 enc with round keys v1-v15
380 @{[vaesz_vs $V24, $V1]}
381 @{[vaesem_vs $V24, $V2]}
382 @{[vaesem_vs $V24, $V3]}
383 @{[vaesem_vs $V24, $V4]}
384 @{[vaesem_vs $V24, $V5]}
385 @{[vaesem_vs $V24, $V6]}
386 @{[vaesem_vs $V24, $V7]}
387 @{[vaesem_vs $V24, $V8]}
388 @{[vaesem_vs $V24, $V9]}
389 @{[vaesem_vs $V24, $V10]}
390 @{[vaesem_vs $V24, $V11]}
391 @{[vaesem_vs $V24, $V12]}
392 @{[vaesem_vs $V24, $V13]}
393 @{[vaesem_vs $V24, $V14]}
394 @{[vaesef_vs $V24, $V15]}
400 # aes-256 dec with round keys v1-v15
403 @{[vaesz_vs $V24, $V15]}
404 @{[vaesdm_vs $V24, $V14]}
405 @{[vaesdm_vs $V24, $V13]}
406 @{[vaesdm_vs $V24, $V12]}
407 @{[vaesdm_vs $V24, $V11]}
408 @{[vaesdm_vs $V24, $V10]}
409 @{[vaesdm_vs $V24, $V9]}
410 @{[vaesdm_vs $V24, $V8]}
411 @{[vaesdm_vs $V24, $V7]}
412 @{[vaesdm_vs $V24, $V6]}
413 @{[vaesdm_vs $V24, $V5]}
414 @{[vaesdm_vs $V24, $V4]}
415 @{[vaesdm_vs $V24, $V3]}
416 @{[vaesdm_vs $V24, $V2]}
417 @{[vaesdf_vs $V24, $V1]}
425 .globl rv64i_zvbb_zvkg_zvkned_aes_xts_encrypt
426 .type rv64i_zvbb_zvkg_zvkned_aes_xts_encrypt,\@function
427 rv64i_zvbb_zvkg_zvkned_aes_xts_encrypt:
430 # aes block size is 16
431 andi $TAIL_LENGTH, $LENGTH, 15
432 mv $STORE_LEN32, $LENGTH
433 beqz $TAIL_LENGTH, 1f
434 sub $LENGTH, $LENGTH, $TAIL_LENGTH
435 addi $STORE_LEN32, $LENGTH, -16
437 # We make the `LENGTH` become e32 length here.
438 srli $LEN32, $LENGTH, 2
439 srli $STORE_LEN32, $STORE_LEN32, 2
441 # Load number of rounds
445 beq $T0, $T1, aes_xts_enc_256
446 beq $T0, $T2, aes_xts_enc_128
447 .size rv64i_zvbb_zvkg_zvkned_aes_xts_encrypt,.-rv64i_zvbb_zvkg_zvkned_aes_xts_encrypt
453 @{[init_first_round]}
454 @{[aes_128_load_key]}
456 @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "ma"]}
460 @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "ma"]}
461 # load plaintext into v24
462 @{[vle32_v $V24, $INPUT]}
464 @{[vgmul_vv $V16, $V20]}
465 # reverse the iv's bits order back
466 @{[vbrev8_v $V28, $V16]}
468 @{[vxor_vv $V24, $V24, $V28]}
470 sub $LEN32, $LEN32, $VL
471 add $INPUT, $INPUT, $T0
473 @{[vxor_vv $V24, $V24, $V28]}
476 @{[vsetvli "zero", $STORE_LEN32, "e32", "m4", "ta", "ma"]}
477 @{[vse32_v $V24, $OUTPUT]}
478 add $OUTPUT, $OUTPUT, $T0
479 sub $STORE_LEN32, $STORE_LEN32, $VL
481 bnez $LEN32, .Lenc_blocks_128
483 @{[handle_xts_enc_last_block]}
486 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
487 @{[vxor_vv $V24, $V24, $V28]}
489 @{[vxor_vv $V24, $V24, $V28]}
491 # store last block ciphertext
492 addi $OUTPUT, $OUTPUT, -16
493 @{[vse32_v $V24, $OUTPUT]}
496 .size aes_xts_enc_128,.-aes_xts_enc_128
502 @{[init_first_round]}
503 @{[aes_256_load_key]}
505 @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "ma"]}
509 @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "ma"]}
510 # load plaintext into v24
511 @{[vle32_v $V24, $INPUT]}
513 @{[vgmul_vv $V16, $V20]}
514 # reverse the iv's bits order back
515 @{[vbrev8_v $V28, $V16]}
517 @{[vxor_vv $V24, $V24, $V28]}
519 sub $LEN32, $LEN32, $VL
520 add $INPUT, $INPUT, $T0
522 @{[vxor_vv $V24, $V24, $V28]}
525 @{[vsetvli "zero", $STORE_LEN32, "e32", "m4", "ta", "ma"]}
526 @{[vse32_v $V24, $OUTPUT]}
527 add $OUTPUT, $OUTPUT, $T0
528 sub $STORE_LEN32, $STORE_LEN32, $VL
530 bnez $LEN32, .Lenc_blocks_256
532 @{[handle_xts_enc_last_block]}
535 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
536 @{[vxor_vv $V24, $V24, $V28]}
538 @{[vxor_vv $V24, $V24, $V28]}
540 # store last block ciphertext
541 addi $OUTPUT, $OUTPUT, -16
542 @{[vse32_v $V24, $OUTPUT]}
545 .size aes_xts_enc_256,.-aes_xts_enc_256
548 ################################################################################
549 # void rv64i_zvbb_zvkg_zvkned_aes_xts_decrypt(const unsigned char *in,
550 # unsigned char *out, size_t length,
551 # const AES_KEY *key1,
552 # const AES_KEY *key2,
553 # const unsigned char iv[16])
556 .globl rv64i_zvbb_zvkg_zvkned_aes_xts_decrypt
557 .type rv64i_zvbb_zvkg_zvkned_aes_xts_decrypt,\@function
558 rv64i_zvbb_zvkg_zvkned_aes_xts_decrypt:
561 # aes block size is 16
562 andi $TAIL_LENGTH, $LENGTH, 15
563 beqz $TAIL_LENGTH, 1f
564 sub $LENGTH, $LENGTH, $TAIL_LENGTH
565 addi $LENGTH, $LENGTH, -16
567 # We make the `LENGTH` become e32 length here.
568 srli $LEN32, $LENGTH, 2
570 # Load number of rounds
574 beq $T0, $T1, aes_xts_dec_256
575 beq $T0, $T2, aes_xts_dec_128
576 .size rv64i_zvbb_zvkg_zvkned_aes_xts_decrypt,.-rv64i_zvbb_zvkg_zvkned_aes_xts_decrypt
582 @{[init_first_round]}
583 @{[aes_128_load_key]}
587 @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "ma"]}
591 @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "ma"]}
592 # load ciphertext into v24
593 @{[vle32_v $V24, $INPUT]}
595 @{[vgmul_vv $V16, $V20]}
596 # reverse the iv's bits order back
597 @{[vbrev8_v $V28, $V16]}
599 @{[vxor_vv $V24, $V24, $V28]}
601 sub $LEN32, $LEN32, $VL
602 add $INPUT, $INPUT, $T0
604 @{[vxor_vv $V24, $V24, $V28]}
607 @{[vse32_v $V24, $OUTPUT]}
608 add $OUTPUT, $OUTPUT, $T0
610 bnez $LEN32, .Ldec_blocks_128
613 @{[handle_xts_dec_last_block]}
615 ## xts second to last block
616 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
617 @{[vxor_vv $V24, $V24, $V29]}
619 @{[vxor_vv $V24, $V24, $V29]}
620 @{[vmv_v_v $V25, $V24]}
622 # load last block ciphertext
623 @{[vsetvli "zero", $TAIL_LENGTH, "e8", "m1", "tu", "ma"]}
624 @{[vle8_v $V24, $INPUT]}
626 # store second to last block plaintext
627 addi $T0, $OUTPUT, 16
628 @{[vse8_v $V25, $T0]}
631 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
632 @{[vxor_vv $V24, $V24, $V28]}
634 @{[vxor_vv $V24, $V24, $V28]}
636 # store second to last block plaintext
637 @{[vse32_v $V24, $OUTPUT]}
640 .size aes_xts_dec_128,.-aes_xts_dec_128
646 @{[init_first_round]}
647 @{[aes_256_load_key]}
651 @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "ma"]}
655 @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "ma"]}
656 # load ciphertext into v24
657 @{[vle32_v $V24, $INPUT]}
659 @{[vgmul_vv $V16, $V20]}
660 # reverse the iv's bits order back
661 @{[vbrev8_v $V28, $V16]}
663 @{[vxor_vv $V24, $V24, $V28]}
665 sub $LEN32, $LEN32, $VL
666 add $INPUT, $INPUT, $T0
668 @{[vxor_vv $V24, $V24, $V28]}
671 @{[vse32_v $V24, $OUTPUT]}
672 add $OUTPUT, $OUTPUT, $T0
674 bnez $LEN32, .Ldec_blocks_256
677 @{[handle_xts_dec_last_block]}
679 ## xts second to last block
680 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
681 @{[vxor_vv $V24, $V24, $V29]}
683 @{[vxor_vv $V24, $V24, $V29]}
684 @{[vmv_v_v $V25, $V24]}
686 # load last block ciphertext
687 @{[vsetvli "zero", $TAIL_LENGTH, "e8", "m1", "tu", "ma"]}
688 @{[vle8_v $V24, $INPUT]}
690 # store second to last block plaintext
691 addi $T0, $OUTPUT, 16
692 @{[vse8_v $V25, $T0]}
695 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
696 @{[vxor_vv $V24, $V24, $V28]}
698 @{[vxor_vv $V24, $V24, $V28]}
700 # store second to last block plaintext
701 @{[vse32_v $V24, $OUTPUT]}
704 .size aes_xts_dec_256,.-aes_xts_dec_256
710 close STDOUT or die "error closing STDOUT: $!";