2 # Copyright 2022 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the Apache License 2.0 (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
9 # This file is dual-licensed and is also available under the following
12 # Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
13 # All rights reserved.
15 # Redistribution and use in source and binary forms, with or without
16 # modification, are permitted provided that the following conditions
18 # 1. Redistributions of source code must retain the above copyright
19 # notice, this list of conditions and the following disclaimer.
20 # 2. Redistributions in binary form must reproduce the above copyright
21 # notice, this list of conditions and the following disclaimer in the
22 # documentation and/or other materials provided with the distribution.
24 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
28 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
30 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
31 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
32 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
33 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
34 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 use lib "$Bin/../../perlasm";
44 # $output is the last argument if it looks like a file (it has an extension)
45 # $flavour is the first argument if it doesn't look like a file
46 my $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
47 my $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
49 $output and open STDOUT,">$output";
55 ################################################################################
56 # void gcm_init_rv64i_zbc(u128 Htable[16], const u64 H[2]);
57 # void gcm_init_rv64i_zbc__zbb(u128 Htable[16], const u64 H[2]);
58 # void gcm_init_rv64i_zbc__zbkb(u128 Htable[16], const u64 H[2]);
60 # input: H: 128-bit H - secret parameter E(K, 0^128)
61 # output: Htable: Preprocessed key data for gcm_gmult_rv64i_zbc* and
62 # gcm_ghash_rv64i_zbc*
64 # All callers of this function revert the byte-order unconditionally
65 # on little-endian machines. So we need to revert the byte-order back.
66 # Additionally we reverse the bits of each byte.
69 my ($Htable,$H,$VAL0,$VAL1,$TMP0,$TMP1,$TMP2) = ("a0","a1","a2","a3","t0","t1","t2");
73 .globl gcm_init_rv64i_zbc
74 .type gcm_init_rv64i_zbc,\@function
78 @{[brev8_rv64i $VAL0, $TMP0, $TMP1, $TMP2]}
79 @{[brev8_rv64i $VAL1, $TMP0, $TMP1, $TMP2]}
80 @{[sd_rev8_rv64i $VAL0, $Htable, 0, $TMP0]}
81 @{[sd_rev8_rv64i $VAL1, $Htable, 8, $TMP0]}
83 .size gcm_init_rv64i_zbc,.-gcm_init_rv64i_zbc
88 my ($Htable,$H,$VAL0,$VAL1,$TMP0,$TMP1,$TMP2) = ("a0","a1","a2","a3","t0","t1","t2");
92 .globl gcm_init_rv64i_zbc__zbb
93 .type gcm_init_rv64i_zbc__zbb,\@function
94 gcm_init_rv64i_zbc__zbb:
97 @{[brev8_rv64i $VAL0, $TMP0, $TMP1, $TMP2]}
98 @{[brev8_rv64i $VAL1, $TMP0, $TMP1, $TMP2]}
99 @{[rev8 $VAL0, $VAL0]}
100 @{[rev8 $VAL1, $VAL1]}
104 .size gcm_init_rv64i_zbc__zbb,.-gcm_init_rv64i_zbc__zbb
109 my ($Htable,$H,$TMP0,$TMP1) = ("a0","a1","t0","t1");
113 .globl gcm_init_rv64i_zbc__zbkb
114 .type gcm_init_rv64i_zbc__zbkb,\@function
115 gcm_init_rv64i_zbc__zbkb:
118 @{[brev8 $TMP0, $TMP0]}
119 @{[brev8 $TMP1, $TMP1]}
120 @{[rev8 $TMP0, $TMP0]}
121 @{[rev8 $TMP1, $TMP1]}
125 .size gcm_init_rv64i_zbc__zbkb,.-gcm_init_rv64i_zbc__zbkb
129 ################################################################################
130 # void gcm_gmult_rv64i_zbc(u64 Xi[2], const u128 Htable[16]);
131 # void gcm_gmult_rv64i_zbc__zbkb(u64 Xi[2], const u128 Htable[16]);
133 # input: Xi: current hash value
135 # output: Xi: next hash value Xi
137 # Compute GMULT (Xi*H mod f) using the Zbc (clmul) and Zbb (basic bit manip)
138 # extensions. Using the no-Karatsuba approach and clmul for the final reduction.
139 # This results in an implementation with minimized number of instructions.
140 # HW with clmul latencies higher than 2 cycles might observe a performance
141 # improvement with Karatsuba. HW with clmul latencies higher than 6 cycles
142 # might observe a performance improvement with additionally converting the
143 # reduction to shift&xor. For a full discussion of this estimates see
144 # https://github.com/riscv/riscv-crypto/blob/master/doc/supp/gcm-mode-cmul.adoc
146 my ($Xi,$Htable,$x0,$x1,$y0,$y1) = ("a0","a1","a4","a5","a6","a7");
147 my ($z0,$z1,$z2,$z3,$t0,$t1,$polymod) = ("t0","t1","t2","t3","t4","t5","t6");
151 .globl gcm_gmult_rv64i_zbc
152 .type gcm_gmult_rv64i_zbc,\@function
154 # Load Xi and bit-reverse it
157 @{[brev8_rv64i $x0, $z0, $z1, $z2]}
158 @{[brev8_rv64i $x1, $z0, $z1, $z2]}
160 # Load the key (already bit-reversed)
164 # Load the reduction constant
165 la $polymod, Lpolymod
166 lbu $polymod, 0($polymod)
168 # Multiplication (without Karatsuba)
169 @{[clmulh $z3, $x1, $y1]}
170 @{[clmul $z2, $x1, $y1]}
171 @{[clmulh $t1, $x0, $y1]}
172 @{[clmul $z1, $x0, $y1]}
174 @{[clmulh $t1, $x1, $y0]}
175 @{[clmul $t0, $x1, $y0]}
178 @{[clmulh $t1, $x0, $y0]}
179 @{[clmul $z0, $x0, $y0]}
182 # Reduction with clmul
183 @{[clmulh $t1, $z3, $polymod]}
184 @{[clmul $t0, $z3, $polymod]}
187 @{[clmulh $t1, $z2, $polymod]}
188 @{[clmul $t0, $z2, $polymod]}
192 # Bit-reverse Xi back and store it
193 @{[brev8_rv64i $x0, $z0, $z1, $z2]}
194 @{[brev8_rv64i $x1, $z0, $z1, $z2]}
198 .size gcm_gmult_rv64i_zbc,.-gcm_gmult_rv64i_zbc
203 my ($Xi,$Htable,$x0,$x1,$y0,$y1) = ("a0","a1","a4","a5","a6","a7");
204 my ($z0,$z1,$z2,$z3,$t0,$t1,$polymod) = ("t0","t1","t2","t3","t4","t5","t6");
208 .globl gcm_gmult_rv64i_zbc__zbkb
209 .type gcm_gmult_rv64i_zbc__zbkb,\@function
210 gcm_gmult_rv64i_zbc__zbkb:
211 # Load Xi and bit-reverse it
217 # Load the key (already bit-reversed)
221 # Load the reduction constant
222 la $polymod, Lpolymod
223 lbu $polymod, 0($polymod)
225 # Multiplication (without Karatsuba)
226 @{[clmulh $z3, $x1, $y1]}
227 @{[clmul $z2, $x1, $y1]}
228 @{[clmulh $t1, $x0, $y1]}
229 @{[clmul $z1, $x0, $y1]}
231 @{[clmulh $t1, $x1, $y0]}
232 @{[clmul $t0, $x1, $y0]}
235 @{[clmulh $t1, $x0, $y0]}
236 @{[clmul $z0, $x0, $y0]}
239 # Reduction with clmul
240 @{[clmulh $t1, $z3, $polymod]}
241 @{[clmul $t0, $z3, $polymod]}
244 @{[clmulh $t1, $z2, $polymod]}
245 @{[clmul $t0, $z2, $polymod]}
249 # Bit-reverse Xi back and store it
255 .size gcm_gmult_rv64i_zbc__zbkb,.-gcm_gmult_rv64i_zbc__zbkb
259 ################################################################################
260 # void gcm_ghash_rv64i_zbc(u64 Xi[2], const u128 Htable[16],
261 # const u8 *inp, size_t len);
262 # void gcm_ghash_rv64i_zbc__zbkb(u64 Xi[2], const u128 Htable[16],
263 # const u8 *inp, size_t len);
265 # input: Xi: current hash value
267 # inp: pointer to input data
268 # len: length of input data in bytes (mutiple of block size)
269 # output: Xi: Xi+1 (next hash value Xi)
271 my ($Xi,$Htable,$inp,$len,$x0,$x1,$y0,$y1) = ("a0","a1","a2","a3","a4","a5","a6","a7");
272 my ($z0,$z1,$z2,$z3,$t0,$t1,$polymod) = ("t0","t1","t2","t3","t4","t5","t6");
276 .globl gcm_ghash_rv64i_zbc
277 .type gcm_ghash_rv64i_zbc,\@function
279 # Load Xi and bit-reverse it
282 @{[brev8_rv64i $x0, $z0, $z1, $z2]}
283 @{[brev8_rv64i $x1, $z0, $z1, $z2]}
285 # Load the key (already bit-reversed)
289 # Load the reduction constant
290 la $polymod, Lpolymod
291 lbu $polymod, 0($polymod)
294 # Load the input data, bit-reverse them, and XOR them with Xi
299 @{[brev8_rv64i $t0, $z0, $z1, $z2]}
300 @{[brev8_rv64i $t1, $z0, $z1, $z2]}
304 # Multiplication (without Karatsuba)
305 @{[clmulh $z3, $x1, $y1]}
306 @{[clmul $z2, $x1, $y1]}
307 @{[clmulh $t1, $x0, $y1]}
308 @{[clmul $z1, $x0, $y1]}
310 @{[clmulh $t1, $x1, $y0]}
311 @{[clmul $t0, $x1, $y0]}
314 @{[clmulh $t1, $x0, $y0]}
315 @{[clmul $z0, $x0, $y0]}
318 # Reduction with clmul
319 @{[clmulh $t1, $z3, $polymod]}
320 @{[clmul $t0, $z3, $polymod]}
323 @{[clmulh $t1, $z2, $polymod]}
324 @{[clmul $t0, $z2, $polymod]}
328 # Iterate over all blocks
331 # Bit-reverse final Xi back and store it
332 @{[brev8_rv64i $x0, $z0, $z1, $z2]}
333 @{[brev8_rv64i $x1, $z0, $z1, $z2]}
337 .size gcm_ghash_rv64i_zbc,.-gcm_ghash_rv64i_zbc
342 my ($Xi,$Htable,$inp,$len,$x0,$x1,$y0,$y1) = ("a0","a1","a2","a3","a4","a5","a6","a7");
343 my ($z0,$z1,$z2,$z3,$t0,$t1,$polymod) = ("t0","t1","t2","t3","t4","t5","t6");
347 .globl gcm_ghash_rv64i_zbc__zbkb
348 .type gcm_ghash_rv64i_zbc__zbkb,\@function
349 gcm_ghash_rv64i_zbc__zbkb:
350 # Load Xi and bit-reverse it
356 # Load the key (already bit-reversed)
360 # Load the reduction constant
361 la $polymod, Lpolymod
362 lbu $polymod, 0($polymod)
365 # Load the input data, bit-reverse them, and XOR them with Xi
375 # Multiplication (without Karatsuba)
376 @{[clmulh $z3, $x1, $y1]}
377 @{[clmul $z2, $x1, $y1]}
378 @{[clmulh $t1, $x0, $y1]}
379 @{[clmul $z1, $x0, $y1]}
381 @{[clmulh $t1, $x1, $y0]}
382 @{[clmul $t0, $x1, $y0]}
385 @{[clmulh $t1, $x0, $y0]}
386 @{[clmul $z0, $x0, $y0]}
389 # Reduction with clmul
390 @{[clmulh $t1, $z3, $polymod]}
391 @{[clmul $t0, $z3, $polymod]}
394 @{[clmulh $t1, $z2, $polymod]}
395 @{[clmul $t0, $z2, $polymod]}
399 # Iterate over all blocks
400 bnez $len, Lstep_zkbk
402 # Bit-reverse final Xi back and store it
408 .size gcm_ghash_rv64i_zbc__zbkb,.-gcm_ghash_rv64i_zbc__zbkb
415 .dword 0xAAAAAAAAAAAAAAAA
416 .dword 0xCCCCCCCCCCCCCCCC
417 .dword 0xF0F0F0F0F0F0F0F0
418 .size Lbrev8_const,.-Lbrev8_const
422 .size Lpolymod,.-Lpolymod
427 close STDOUT or die "error closing STDOUT: $!";