2 # Copyright 2022 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the Apache License 2.0 (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
14 use lib "$Bin/../../perlasm";
17 # $output is the last argument if it looks like a file (it has an extension)
18 # $flavour is the first argument if it doesn't look like a file
19 my $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
20 my $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
22 $output and open STDOUT,">$output";
28 ################################################################################
29 # void gcm_init_rv64i_zbc(u128 Htable[16], const u64 H[2]);
30 # void gcm_init_rv64i_zbc__zbb(u128 Htable[16], const u64 H[2]);
31 # void gcm_init_rv64i_zbc__zbkb(u128 Htable[16], const u64 H[2]);
33 # input: H: 128-bit H - secret parameter E(K, 0^128)
34 # output: Htable: Preprocessed key data for gcm_gmult_rv64i_zbc* and
35 # gcm_ghash_rv64i_zbc*
37 # All callers of this function revert the byte-order unconditionally
38 # on little-endian machines. So we need to revert the byte-order back.
39 # Additionally we reverse the bits of each byte.
42 my ($Htable,$H,$VAL0,$VAL1,$TMP0,$TMP1,$TMP2) = ("a0","a1","a2","a3","t0","t1","t2");
46 .globl gcm_init_rv64i_zbc
47 .type gcm_init_rv64i_zbc,\@function
51 @{[brev8_rv64i $VAL0, $TMP0, $TMP1, $TMP2]}
52 @{[brev8_rv64i $VAL1, $TMP0, $TMP1, $TMP2]}
53 @{[sd_rev8_rv64i $VAL0, $Htable, 0, $TMP0]}
54 @{[sd_rev8_rv64i $VAL1, $Htable, 8, $TMP0]}
56 .size gcm_init_rv64i_zbc,.-gcm_init_rv64i_zbc
61 my ($Htable,$H,$VAL0,$VAL1,$TMP0,$TMP1,$TMP2) = ("a0","a1","a2","a3","t0","t1","t2");
65 .globl gcm_init_rv64i_zbc__zbb
66 .type gcm_init_rv64i_zbc__zbb,\@function
67 gcm_init_rv64i_zbc__zbb:
70 @{[brev8_rv64i $VAL0, $TMP0, $TMP1, $TMP2]}
71 @{[brev8_rv64i $VAL1, $TMP0, $TMP1, $TMP2]}
72 @{[rev8 $VAL0, $VAL0]}
73 @{[rev8 $VAL1, $VAL1]}
77 .size gcm_init_rv64i_zbc__zbb,.-gcm_init_rv64i_zbc__zbb
82 my ($Htable,$H,$TMP0,$TMP1) = ("a0","a1","t0","t1");
86 .globl gcm_init_rv64i_zbc__zbkb
87 .type gcm_init_rv64i_zbc__zbkb,\@function
88 gcm_init_rv64i_zbc__zbkb:
91 @{[brev8 $TMP0, $TMP0]}
92 @{[brev8 $TMP1, $TMP1]}
93 @{[rev8 $TMP0, $TMP0]}
94 @{[rev8 $TMP1, $TMP1]}
98 .size gcm_init_rv64i_zbc__zbkb,.-gcm_init_rv64i_zbc__zbkb
102 ################################################################################
103 # void gcm_gmult_rv64i_zbc(u64 Xi[2], const u128 Htable[16]);
104 # void gcm_gmult_rv64i_zbc__zbkb(u64 Xi[2], const u128 Htable[16]);
106 # input: Xi: current hash value
108 # output: Xi: next hash value Xi
110 # Compute GMULT (Xi*H mod f) using the Zbc (clmul) and Zbb (basic bit manip)
111 # extensions. Using the no-Karatsuba approach and clmul for the final reduction.
112 # This results in an implementation with minimized number of instructions.
113 # HW with clmul latencies higher than 2 cycles might observe a performance
114 # improvement with Karatsuba. HW with clmul latencies higher than 6 cycles
115 # might observe a performance improvement with additionally converting the
116 # reduction to shift&xor. For a full discussion of this estimates see
117 # https://github.com/riscv/riscv-crypto/blob/master/doc/supp/gcm-mode-cmul.adoc
119 my ($Xi,$Htable,$x0,$x1,$y0,$y1) = ("a0","a1","a4","a5","a6","a7");
120 my ($z0,$z1,$z2,$z3,$t0,$t1,$polymod) = ("t0","t1","t2","t3","t4","t5","t6");
124 .globl gcm_gmult_rv64i_zbc
125 .type gcm_gmult_rv64i_zbc,\@function
127 # Load Xi and bit-reverse it
130 @{[brev8_rv64i $x0, $z0, $z1, $z2]}
131 @{[brev8_rv64i $x1, $z0, $z1, $z2]}
133 # Load the key (already bit-reversed)
137 # Load the reduction constant
138 la $polymod, Lpolymod
139 lbu $polymod, 0($polymod)
141 # Multiplication (without Karatsuba)
142 @{[clmulh $z3, $x1, $y1]}
143 @{[clmul $z2, $x1, $y1]}
144 @{[clmulh $t1, $x0, $y1]}
145 @{[clmul $z1, $x0, $y1]}
147 @{[clmulh $t1, $x1, $y0]}
148 @{[clmul $t0, $x1, $y0]}
151 @{[clmulh $t1, $x0, $y0]}
152 @{[clmul $z0, $x0, $y0]}
155 # Reduction with clmul
156 @{[clmulh $t1, $z3, $polymod]}
157 @{[clmul $t0, $z3, $polymod]}
160 @{[clmulh $t1, $z2, $polymod]}
161 @{[clmul $t0, $z2, $polymod]}
165 # Bit-reverse Xi back and store it
166 @{[brev8_rv64i $x0, $z0, $z1, $z2]}
167 @{[brev8_rv64i $x1, $z0, $z1, $z2]}
171 .size gcm_gmult_rv64i_zbc,.-gcm_gmult_rv64i_zbc
176 my ($Xi,$Htable,$x0,$x1,$y0,$y1) = ("a0","a1","a4","a5","a6","a7");
177 my ($z0,$z1,$z2,$z3,$t0,$t1,$polymod) = ("t0","t1","t2","t3","t4","t5","t6");
181 .globl gcm_gmult_rv64i_zbc__zbkb
182 .type gcm_gmult_rv64i_zbc__zbkb,\@function
183 gcm_gmult_rv64i_zbc__zbkb:
184 # Load Xi and bit-reverse it
190 # Load the key (already bit-reversed)
194 # Load the reduction constant
195 la $polymod, Lpolymod
196 lbu $polymod, 0($polymod)
198 # Multiplication (without Karatsuba)
199 @{[clmulh $z3, $x1, $y1]}
200 @{[clmul $z2, $x1, $y1]}
201 @{[clmulh $t1, $x0, $y1]}
202 @{[clmul $z1, $x0, $y1]}
204 @{[clmulh $t1, $x1, $y0]}
205 @{[clmul $t0, $x1, $y0]}
208 @{[clmulh $t1, $x0, $y0]}
209 @{[clmul $z0, $x0, $y0]}
212 # Reduction with clmul
213 @{[clmulh $t1, $z3, $polymod]}
214 @{[clmul $t0, $z3, $polymod]}
217 @{[clmulh $t1, $z2, $polymod]}
218 @{[clmul $t0, $z2, $polymod]}
222 # Bit-reverse Xi back and store it
228 .size gcm_gmult_rv64i_zbc__zbkb,.-gcm_gmult_rv64i_zbc__zbkb
235 .dword 0xAAAAAAAAAAAAAAAA
236 .dword 0xCCCCCCCCCCCCCCCC
237 .dword 0xF0F0F0F0F0F0F0F0
238 .size Lbrev8_const,.-Lbrev8_const
242 .size Lpolymod,.-Lpolymod
247 close STDOUT or die "error closing STDOUT: $!";