2 # This file is dual-licensed, meaning that you can use it under your
3 # choice of either of the following two licenses:
5 # Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
7 # Licensed under the Apache License 2.0 (the "License"). You can obtain
8 # a copy in the file LICENSE in the source distribution or at
9 # https://www.openssl.org/source/license.html
13 # Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
14 # All rights reserved.
16 # Redistribution and use in source and binary forms, with or without
17 # modification, are permitted provided that the following conditions
19 # 1. Redistributions of source code must retain the above copyright
20 # notice, this list of conditions and the following disclaimer.
21 # 2. Redistributions in binary form must reproduce the above copyright
22 # notice, this list of conditions and the following disclaimer in the
23 # documentation and/or other materials provided with the distribution.
25 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 # - RISC-V Vector ('V') with VLEN >= 128
39 # - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
40 # - RISC-V Vector AES block cipher extension ('Zvkned')
41 # - RISC-V Zicclsm(Main memory supports misaligned loads/stores)
48 use lib "$Bin/../../perlasm";
51 # $output is the last argument if it looks like a file (it has an extension)
52 # $flavour is the first argument if it doesn't look like a file
53 my $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
54 my $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
56 $output and open STDOUT,">$output";
62 ################################################################################
63 # void rv64i_zvkb_zvkned_ctr32_encrypt_blocks(const unsigned char *in,
64 # unsigned char *out, size_t blocks,
66 # const unsigned char ivec[16]);
68 my ($INP, $OUTP, $BLOCK_NUM, $KEYP, $IVP) = ("a0", "a1", "a2", "a3", "a4");
69 my ($T0, $T1, $T2, $T3) = ("t0", "t1", "t2", "t3");
74 my ($V0, $V1, $V2, $V3, $V4, $V5, $V6, $V7,
75 $V8, $V9, $V10, $V11, $V12, $V13, $V14, $V15,
76 $V16, $V17, $V18, $V19, $V20, $V21, $V22, $V23,
77 $V24, $V25, $V26, $V27, $V28, $V29, $V30, $V31,
78 ) = map("v$_",(0..31));
80 # Prepare the AES ctr input data into v16.
81 sub init_aes_ctr_input {
84 # The mask pattern for 4*N-th elements
85 # mask v0: [000100010001....]
87 # We could setup the mask just for the maximum element length instead of
90 @{[vsetvli $T2, "zero", "e8", "m1", "ta", "ma"]}
91 @{[vmv_v_x $MASK, $T0]}
93 # v31:[IV0, IV1, IV2, big-endian count]
94 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
95 @{[vle32_v $V31, $IVP]}
96 # Convert the big-endian counter into little-endian.
97 @{[vsetivli "zero", 4, "e32", "m1", "ta", "mu"]}
98 @{[vrev8_v $V31, $V31, $MASK]}
100 @{[vsetvli "zero", $LEN32, "e32", "m4", "ta", "ma"]}
102 @{[vaesz_vs $V16, $V31]}
103 # Prepare the ctr pattern into v20
104 # v20: [x, x, x, 0, x, x, x, 1, x, x, x, 2, ...]
105 @{[viota_m $V20, $MASK, $MASK]}
106 # v16:[IV0, IV1, IV2, count+0, IV0, IV1, IV2, count+1, ...]
107 @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "mu"]}
108 @{[vadd_vv $V16, $V16, $V20, $MASK]}
116 .globl rv64i_zvkb_zvkned_ctr32_encrypt_blocks
117 .type rv64i_zvkb_zvkned_ctr32_encrypt_blocks,\@function
118 rv64i_zvkb_zvkned_ctr32_encrypt_blocks:
121 # Load number of rounds
127 slli $LEN32, $BLOCK_NUM, 2
129 beq $T0, $T1, ctr32_encrypt_blocks_256
130 beq $T0, $T2, ctr32_encrypt_blocks_192
131 beq $T0, $T3, ctr32_encrypt_blocks_128
136 .size rv64i_zvkb_zvkned_ctr32_encrypt_blocks,.-rv64i_zvkb_zvkned_ctr32_encrypt_blocks
141 ctr32_encrypt_blocks_128:
142 # Load all 11 round keys to v1-v11 registers.
143 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
144 @{[vle32_v $V1, $KEYP]}
145 addi $KEYP, $KEYP, 16
146 @{[vle32_v $V2, $KEYP]}
147 addi $KEYP, $KEYP, 16
148 @{[vle32_v $V3, $KEYP]}
149 addi $KEYP, $KEYP, 16
150 @{[vle32_v $V4, $KEYP]}
151 addi $KEYP, $KEYP, 16
152 @{[vle32_v $V5, $KEYP]}
153 addi $KEYP, $KEYP, 16
154 @{[vle32_v $V6, $KEYP]}
155 addi $KEYP, $KEYP, 16
156 @{[vle32_v $V7, $KEYP]}
157 addi $KEYP, $KEYP, 16
158 @{[vle32_v $V8, $KEYP]}
159 addi $KEYP, $KEYP, 16
160 @{[vle32_v $V9, $KEYP]}
161 addi $KEYP, $KEYP, 16
162 @{[vle32_v $V10, $KEYP]}
163 addi $KEYP, $KEYP, 16
164 @{[vle32_v $V11, $KEYP]}
166 @{[init_aes_ctr_input]}
171 @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "mu"]}
172 # Increase ctr in v16.
173 @{[vadd_vx $V16, $V16, $CTR, $MASK]}
175 # Load plaintext into v20
176 @{[vle32_v $V20, $INP]}
179 sub $LEN32, $LEN32, $VL
181 # Prepare the AES ctr input into v24.
182 # The ctr data uses big-endian form.
183 @{[vmv_v_v $V24, $V16]}
184 @{[vrev8_v $V24, $V24, $MASK]}
186 @{[vaesz_vs $V24, $V1]}
187 @{[vaesem_vs $V24, $V2]}
188 @{[vaesem_vs $V24, $V3]}
189 @{[vaesem_vs $V24, $V4]}
190 @{[vaesem_vs $V24, $V5]}
191 @{[vaesem_vs $V24, $V6]}
192 @{[vaesem_vs $V24, $V7]}
193 @{[vaesem_vs $V24, $V8]}
194 @{[vaesem_vs $V24, $V9]}
195 @{[vaesem_vs $V24, $V10]}
196 @{[vaesef_vs $V24, $V11]}
199 @{[vxor_vv $V24, $V24, $V20]}
201 # Store the ciphertext.
202 @{[vse32_v $V24, $OUTP]}
203 add $OUTP, $OUTP, $T0
208 .size ctr32_encrypt_blocks_128,.-ctr32_encrypt_blocks_128
213 ctr32_encrypt_blocks_192:
214 # Load all 13 round keys to v1-v13 registers.
215 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
216 @{[vle32_v $V1, $KEYP]}
217 addi $KEYP, $KEYP, 16
218 @{[vle32_v $V2, $KEYP]}
219 addi $KEYP, $KEYP, 16
220 @{[vle32_v $V3, $KEYP]}
221 addi $KEYP, $KEYP, 16
222 @{[vle32_v $V4, $KEYP]}
223 addi $KEYP, $KEYP, 16
224 @{[vle32_v $V5, $KEYP]}
225 addi $KEYP, $KEYP, 16
226 @{[vle32_v $V6, $KEYP]}
227 addi $KEYP, $KEYP, 16
228 @{[vle32_v $V7, $KEYP]}
229 addi $KEYP, $KEYP, 16
230 @{[vle32_v $V8, $KEYP]}
231 addi $KEYP, $KEYP, 16
232 @{[vle32_v $V9, $KEYP]}
233 addi $KEYP, $KEYP, 16
234 @{[vle32_v $V10, $KEYP]}
235 addi $KEYP, $KEYP, 16
236 @{[vle32_v $V11, $KEYP]}
237 addi $KEYP, $KEYP, 16
238 @{[vle32_v $V12, $KEYP]}
239 addi $KEYP, $KEYP, 16
240 @{[vle32_v $V13, $KEYP]}
242 @{[init_aes_ctr_input]}
247 @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "mu"]}
248 # Increase ctr in v16.
249 @{[vadd_vx $V16, $V16, $CTR, $MASK]}
251 # Load plaintext into v20
252 @{[vle32_v $V20, $INP]}
255 sub $LEN32, $LEN32, $VL
257 # Prepare the AES ctr input into v24.
258 # The ctr data uses big-endian form.
259 @{[vmv_v_v $V24, $V16]}
260 @{[vrev8_v $V24, $V24, $MASK]}
262 @{[vaesz_vs $V24, $V1]}
263 @{[vaesem_vs $V24, $V2]}
264 @{[vaesem_vs $V24, $V3]}
265 @{[vaesem_vs $V24, $V4]}
266 @{[vaesem_vs $V24, $V5]}
267 @{[vaesem_vs $V24, $V6]}
268 @{[vaesem_vs $V24, $V7]}
269 @{[vaesem_vs $V24, $V8]}
270 @{[vaesem_vs $V24, $V9]}
271 @{[vaesem_vs $V24, $V10]}
272 @{[vaesem_vs $V24, $V11]}
273 @{[vaesem_vs $V24, $V12]}
274 @{[vaesef_vs $V24, $V13]}
277 @{[vxor_vv $V24, $V24, $V20]}
279 # Store the ciphertext.
280 @{[vse32_v $V24, $OUTP]}
281 add $OUTP, $OUTP, $T0
286 .size ctr32_encrypt_blocks_192,.-ctr32_encrypt_blocks_192
291 ctr32_encrypt_blocks_256:
292 # Load all 15 round keys to v1-v15 registers.
293 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
294 @{[vle32_v $V1, $KEYP]}
295 addi $KEYP, $KEYP, 16
296 @{[vle32_v $V2, $KEYP]}
297 addi $KEYP, $KEYP, 16
298 @{[vle32_v $V3, $KEYP]}
299 addi $KEYP, $KEYP, 16
300 @{[vle32_v $V4, $KEYP]}
301 addi $KEYP, $KEYP, 16
302 @{[vle32_v $V5, $KEYP]}
303 addi $KEYP, $KEYP, 16
304 @{[vle32_v $V6, $KEYP]}
305 addi $KEYP, $KEYP, 16
306 @{[vle32_v $V7, $KEYP]}
307 addi $KEYP, $KEYP, 16
308 @{[vle32_v $V8, $KEYP]}
309 addi $KEYP, $KEYP, 16
310 @{[vle32_v $V9, $KEYP]}
311 addi $KEYP, $KEYP, 16
312 @{[vle32_v $V10, $KEYP]}
313 addi $KEYP, $KEYP, 16
314 @{[vle32_v $V11, $KEYP]}
315 addi $KEYP, $KEYP, 16
316 @{[vle32_v $V12, $KEYP]}
317 addi $KEYP, $KEYP, 16
318 @{[vle32_v $V13, $KEYP]}
319 addi $KEYP, $KEYP, 16
320 @{[vle32_v $V14, $KEYP]}
321 addi $KEYP, $KEYP, 16
322 @{[vle32_v $V15, $KEYP]}
324 @{[init_aes_ctr_input]}
329 @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "mu"]}
330 # Increase ctr in v16.
331 @{[vadd_vx $V16, $V16, $CTR, $MASK]}
333 # Load plaintext into v20
334 @{[vle32_v $V20, $INP]}
337 sub $LEN32, $LEN32, $VL
339 # Prepare the AES ctr input into v24.
340 # The ctr data uses big-endian form.
341 @{[vmv_v_v $V24, $V16]}
342 @{[vrev8_v $V24, $V24, $MASK]}
344 @{[vaesz_vs $V24, $V1]}
345 @{[vaesem_vs $V24, $V2]}
346 @{[vaesem_vs $V24, $V3]}
347 @{[vaesem_vs $V24, $V4]}
348 @{[vaesem_vs $V24, $V5]}
349 @{[vaesem_vs $V24, $V6]}
350 @{[vaesem_vs $V24, $V7]}
351 @{[vaesem_vs $V24, $V8]}
352 @{[vaesem_vs $V24, $V9]}
353 @{[vaesem_vs $V24, $V10]}
354 @{[vaesem_vs $V24, $V11]}
355 @{[vaesem_vs $V24, $V12]}
356 @{[vaesem_vs $V24, $V13]}
357 @{[vaesem_vs $V24, $V14]}
358 @{[vaesef_vs $V24, $V15]}
361 @{[vxor_vv $V24, $V24, $V20]}
363 # Store the ciphertext.
364 @{[vse32_v $V24, $OUTP]}
365 add $OUTP, $OUTP, $T0
370 .size ctr32_encrypt_blocks_256,.-ctr32_encrypt_blocks_256
376 close STDOUT or die "error closing STDOUT: $!";