crypto/modes/asm/ghash-riscv64.pl

   1 #! /usr/bin/env perl
   2 # Copyright 2022 The OpenSSL Project Authors. All Rights Reserved.
   3 #
   4 # Licensed under the Apache License 2.0 (the "License").  You may not use
   5 # this file except in compliance with the License.  You can obtain a copy
   6 # in the file LICENSE in the source distribution or at
   7 # https://www.openssl.org/source/license.html
   8
   9 # This file is dual-licensed and is also available under the following
  10 # terms:
  11 #
  12 # Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
  13 # All rights reserved.
  14 #
  15 # Redistribution and use in source and binary forms, with or without
  16 # modification, are permitted provided that the following conditions
  17 # are met:
  18 # 1. Redistributions of source code must retain the above copyright
  19 #    notice, this list of conditions and the following disclaimer.
  20 # 2. Redistributions in binary form must reproduce the above copyright
  21 #    notice, this list of conditions and the following disclaimer in the
  22 #    documentation and/or other materials provided with the distribution.
  23 #
  24 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  25 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  26 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  27 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  28 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  29 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  30 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  31 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  32 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  33 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  34 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  35
  36 use strict;
  37 use warnings;
  38
  39 use FindBin qw($Bin);
  40 use lib "$Bin";
  41 use lib "$Bin/../../perlasm";
  42 use riscv;
  43
  44 # $output is the last argument if it looks like a file (it has an extension)
  45 # $flavour is the first argument if it doesn't look like a file
  46 my $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
  47 my $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
  48
  49 $output and open STDOUT,">$output";
  50
  51 my $code=<<___;
  52 .text
  53 ___
  54
  55 ################################################################################
  56 # void gcm_init_rv64i_zbc(u128 Htable[16], const u64 H[2]);
  57 # void gcm_init_rv64i_zbc__zbb(u128 Htable[16], const u64 H[2]);
  58 # void gcm_init_rv64i_zbc__zbkb(u128 Htable[16], const u64 H[2]);
  59 #
  60 # input:  H: 128-bit H - secret parameter E(K, 0^128)
  61 # output: Htable: Preprocessed key data for gcm_gmult_rv64i_zbc* and
  62 #                 gcm_ghash_rv64i_zbc*
  63 #
  64 # All callers of this function revert the byte-order unconditionally
  65 # on little-endian machines. So we need to revert the byte-order back.
  66 # Additionally we reverse the bits of each byte.
  67
  68 {
  69 my ($Htable,$H,$VAL0,$VAL1,$TMP0,$TMP1,$TMP2) = ("a0","a1","a2","a3","t0","t1","t2");
  70
  71 $code .= <<___;
  72 .p2align 3
  73 .globl gcm_init_rv64i_zbc
  74 .type gcm_init_rv64i_zbc,\@function
  75 gcm_init_rv64i_zbc:
  76     ld      $VAL0,0($H)
  77     ld      $VAL1,8($H)
  78     @{[brev8_rv64i   $VAL0, $TMP0, $TMP1, $TMP2]}
  79     @{[brev8_rv64i   $VAL1, $TMP0, $TMP1, $TMP2]}
  80     @{[sd_rev8_rv64i $VAL0, $Htable, 0, $TMP0]}
  81     @{[sd_rev8_rv64i $VAL1, $Htable, 8, $TMP0]}
  82     ret
  83 .size gcm_init_rv64i_zbc,.-gcm_init_rv64i_zbc
  84 ___
  85 }
  86
  87 {
  88 my ($Htable,$H,$VAL0,$VAL1,$TMP0,$TMP1,$TMP2) = ("a0","a1","a2","a3","t0","t1","t2");
  89
  90 $code .= <<___;
  91 .p2align 3
  92 .globl gcm_init_rv64i_zbc__zbb
  93 .type gcm_init_rv64i_zbc__zbb,\@function
  94 gcm_init_rv64i_zbc__zbb:
  95     ld      $VAL0,0($H)
  96     ld      $VAL1,8($H)
  97     @{[brev8_rv64i $VAL0, $TMP0, $TMP1, $TMP2]}
  98     @{[brev8_rv64i $VAL1, $TMP0, $TMP1, $TMP2]}
  99     @{[rev8 $VAL0, $VAL0]}
 100     @{[rev8 $VAL1, $VAL1]}
 101     sd      $VAL0,0($Htable)
 102     sd      $VAL1,8($Htable)
 103     ret
 104 .size gcm_init_rv64i_zbc__zbb,.-gcm_init_rv64i_zbc__zbb
 105 ___
 106 }
 107
 108 {
 109 my ($Htable,$H,$TMP0,$TMP1) = ("a0","a1","t0","t1");
 110
 111 $code .= <<___;
 112 .p2align 3
 113 .globl gcm_init_rv64i_zbc__zbkb
 114 .type gcm_init_rv64i_zbc__zbkb,\@function
 115 gcm_init_rv64i_zbc__zbkb:
 116     ld      $TMP0,0($H)
 117     ld      $TMP1,8($H)
 118     @{[brev8 $TMP0, $TMP0]}
 119     @{[brev8 $TMP1, $TMP1]}
 120     @{[rev8 $TMP0, $TMP0]}
 121     @{[rev8 $TMP1, $TMP1]}
 122     sd      $TMP0,0($Htable)
 123     sd      $TMP1,8($Htable)
 124     ret
 125 .size gcm_init_rv64i_zbc__zbkb,.-gcm_init_rv64i_zbc__zbkb
 126 ___
 127 }
 128
 129 ################################################################################
 130 # void gcm_gmult_rv64i_zbc(u64 Xi[2], const u128 Htable[16]);
 131 # void gcm_gmult_rv64i_zbc__zbkb(u64 Xi[2], const u128 Htable[16]);
 132 #
 133 # input:  Xi: current hash value
 134 #         Htable: copy of H
 135 # output: Xi: next hash value Xi
 136 #
 137 # Compute GMULT (Xi*H mod f) using the Zbc (clmul) and Zbb (basic bit manip)
 138 # extensions. Using the no-Karatsuba approach and clmul for the final reduction.
 139 # This results in an implementation with minimized number of instructions.
 140 # HW with clmul latencies higher than 2 cycles might observe a performance
 141 # improvement with Karatsuba. HW with clmul latencies higher than 6 cycles
 142 # might observe a performance improvement with additionally converting the
 143 # reduction to shift&xor. For a full discussion of this estimates see
 144 # https://github.com/riscv/riscv-crypto/blob/master/doc/supp/gcm-mode-cmul.adoc
 145 {
 146 my ($Xi,$Htable,$x0,$x1,$y0,$y1) = ("a0","a1","a4","a5","a6","a7");
 147 my ($z0,$z1,$z2,$z3,$t0,$t1,$polymod) = ("t0","t1","t2","t3","t4","t5","t6");
 148
 149 $code .= <<___;
 150 .p2align 3
 151 .globl gcm_gmult_rv64i_zbc
 152 .type gcm_gmult_rv64i_zbc,\@function
 153 gcm_gmult_rv64i_zbc:
 154     # Load Xi and bit-reverse it
 155     ld        $x0, 0($Xi)
 156     ld        $x1, 8($Xi)
 157     @{[brev8_rv64i $x0, $z0, $z1, $z2]}
 158     @{[brev8_rv64i $x1, $z0, $z1, $z2]}
 159
 160     # Load the key (already bit-reversed)
 161     ld        $y0, 0($Htable)
 162     ld        $y1, 8($Htable)
 163
 164     # Load the reduction constant
 165     la        $polymod, Lpolymod
 166     lbu       $polymod, 0($polymod)
 167
 168     # Multiplication (without Karatsuba)
 169     @{[clmulh $z3, $x1, $y1]}
 170     @{[clmul  $z2, $x1, $y1]}
 171     @{[clmulh $t1, $x0, $y1]}
 172     @{[clmul  $z1, $x0, $y1]}
 173     xor       $z2, $z2, $t1
 174     @{[clmulh $t1, $x1, $y0]}
 175     @{[clmul  $t0, $x1, $y0]}
 176     xor       $z2, $z2, $t1
 177     xor       $z1, $z1, $t0
 178     @{[clmulh $t1, $x0, $y0]}
 179     @{[clmul  $z0, $x0, $y0]}
 180     xor       $z1, $z1, $t1
 181
 182     # Reduction with clmul
 183     @{[clmulh $t1, $z3, $polymod]}
 184     @{[clmul  $t0, $z3, $polymod]}
 185     xor       $z2, $z2, $t1
 186     xor       $z1, $z1, $t0
 187     @{[clmulh $t1, $z2, $polymod]}
 188     @{[clmul  $t0, $z2, $polymod]}
 189     xor       $x1, $z1, $t1
 190     xor       $x0, $z0, $t0
 191
 192     # Bit-reverse Xi back and store it
 193     @{[brev8_rv64i $x0, $z0, $z1, $z2]}
 194     @{[brev8_rv64i $x1, $z0, $z1, $z2]}
 195     sd        $x0, 0($Xi)
 196     sd        $x1, 8($Xi)
 197     ret
 198 .size gcm_gmult_rv64i_zbc,.-gcm_gmult_rv64i_zbc
 199 ___
 200 }
 201
 202 {
 203 my ($Xi,$Htable,$x0,$x1,$y0,$y1) = ("a0","a1","a4","a5","a6","a7");
 204 my ($z0,$z1,$z2,$z3,$t0,$t1,$polymod) = ("t0","t1","t2","t3","t4","t5","t6");
 205
 206 $code .= <<___;
 207 .p2align 3
 208 .globl gcm_gmult_rv64i_zbc__zbkb
 209 .type gcm_gmult_rv64i_zbc__zbkb,\@function
 210 gcm_gmult_rv64i_zbc__zbkb:
 211     # Load Xi and bit-reverse it
 212     ld        $x0, 0($Xi)
 213     ld        $x1, 8($Xi)
 214     @{[brev8  $x0, $x0]}
 215     @{[brev8  $x1, $x1]}
 216
 217     # Load the key (already bit-reversed)
 218     ld        $y0, 0($Htable)
 219     ld        $y1, 8($Htable)
 220
 221     # Load the reduction constant
 222     la        $polymod, Lpolymod
 223     lbu       $polymod, 0($polymod)
 224
 225     # Multiplication (without Karatsuba)
 226     @{[clmulh $z3, $x1, $y1]}
 227     @{[clmul  $z2, $x1, $y1]}
 228     @{[clmulh $t1, $x0, $y1]}
 229     @{[clmul  $z1, $x0, $y1]}
 230     xor       $z2, $z2, $t1
 231     @{[clmulh $t1, $x1, $y0]}
 232     @{[clmul  $t0, $x1, $y0]}
 233     xor       $z2, $z2, $t1
 234     xor       $z1, $z1, $t0
 235     @{[clmulh $t1, $x0, $y0]}
 236     @{[clmul  $z0, $x0, $y0]}
 237     xor       $z1, $z1, $t1
 238
 239     # Reduction with clmul
 240     @{[clmulh $t1, $z3, $polymod]}
 241     @{[clmul  $t0, $z3, $polymod]}
 242     xor       $z2, $z2, $t1
 243     xor       $z1, $z1, $t0
 244     @{[clmulh $t1, $z2, $polymod]}
 245     @{[clmul  $t0, $z2, $polymod]}
 246     xor       $x1, $z1, $t1
 247     xor       $x0, $z0, $t0
 248
 249     # Bit-reverse Xi back and store it
 250     @{[brev8  $x0, $x0]}
 251     @{[brev8  $x1, $x1]}
 252     sd        $x0, 0($Xi)
 253     sd        $x1, 8($Xi)
 254     ret
 255 .size gcm_gmult_rv64i_zbc__zbkb,.-gcm_gmult_rv64i_zbc__zbkb
 256 ___
 257 }
 258
 259 ################################################################################
 260 # void gcm_ghash_rv64i_zbc(u64 Xi[2], const u128 Htable[16],
 261 #                          const u8 *inp, size_t len);
 262 # void gcm_ghash_rv64i_zbc__zbkb(u64 Xi[2], const u128 Htable[16],
 263 #                                const u8 *inp, size_t len);
 264 #
 265 # input:  Xi: current hash value
 266 #         Htable: copy of H
 267 #         inp: pointer to input data
 268 #         len: length of input data in bytes (mutiple of block size)
 269 # output: Xi: Xi+1 (next hash value Xi)
 270 {
 271 my ($Xi,$Htable,$inp,$len,$x0,$x1,$y0,$y1) = ("a0","a1","a2","a3","a4","a5","a6","a7");
 272 my ($z0,$z1,$z2,$z3,$t0,$t1,$polymod) = ("t0","t1","t2","t3","t4","t5","t6");
 273
 274 $code .= <<___;
 275 .p2align 3
 276 .globl gcm_ghash_rv64i_zbc
 277 .type gcm_ghash_rv64i_zbc,\@function
 278 gcm_ghash_rv64i_zbc:
 279     # Load Xi and bit-reverse it
 280     ld        $x0, 0($Xi)
 281     ld        $x1, 8($Xi)
 282     @{[brev8_rv64i $x0, $z0, $z1, $z2]}
 283     @{[brev8_rv64i $x1, $z0, $z1, $z2]}
 284
 285     # Load the key (already bit-reversed)
 286     ld        $y0, 0($Htable)
 287     ld        $y1, 8($Htable)
 288
 289     # Load the reduction constant
 290     la        $polymod, Lpolymod
 291     lbu       $polymod, 0($polymod)
 292
 293 Lstep:
 294     # Load the input data, bit-reverse them, and XOR them with Xi
 295     ld        $t0, 0($inp)
 296     ld        $t1, 8($inp)
 297     add       $inp, $inp, 16
 298     add       $len, $len, -16
 299     @{[brev8_rv64i $t0, $z0, $z1, $z2]}
 300     @{[brev8_rv64i $t1, $z0, $z1, $z2]}
 301     xor       $x0, $x0, $t0
 302     xor       $x1, $x1, $t1
 303
 304     # Multiplication (without Karatsuba)
 305     @{[clmulh $z3, $x1, $y1]}
 306     @{[clmul  $z2, $x1, $y1]}
 307     @{[clmulh $t1, $x0, $y1]}
 308     @{[clmul  $z1, $x0, $y1]}
 309     xor       $z2, $z2, $t1
 310     @{[clmulh $t1, $x1, $y0]}
 311     @{[clmul  $t0, $x1, $y0]}
 312     xor       $z2, $z2, $t1
 313     xor       $z1, $z1, $t0
 314     @{[clmulh $t1, $x0, $y0]}
 315     @{[clmul  $z0, $x0, $y0]}
 316     xor       $z1, $z1, $t1
 317
 318     # Reduction with clmul
 319     @{[clmulh $t1, $z3, $polymod]}
 320     @{[clmul  $t0, $z3, $polymod]}
 321     xor       $z2, $z2, $t1
 322     xor       $z1, $z1, $t0
 323     @{[clmulh $t1, $z2, $polymod]}
 324     @{[clmul  $t0, $z2, $polymod]}
 325     xor       $x1, $z1, $t1
 326     xor       $x0, $z0, $t0
 327
 328     # Iterate over all blocks
 329     bnez      $len, Lstep
 330
 331     # Bit-reverse final Xi back and store it
 332     @{[brev8_rv64i $x0, $z0, $z1, $z2]}
 333     @{[brev8_rv64i $x1, $z0, $z1, $z2]}
 334     sd        $x0, 0($Xi)
 335     sd        $x1, 8($Xi)
 336     ret
 337 .size gcm_ghash_rv64i_zbc,.-gcm_ghash_rv64i_zbc
 338 ___
 339 }
 340
 341 {
 342 my ($Xi,$Htable,$inp,$len,$x0,$x1,$y0,$y1) = ("a0","a1","a2","a3","a4","a5","a6","a7");
 343 my ($z0,$z1,$z2,$z3,$t0,$t1,$polymod) = ("t0","t1","t2","t3","t4","t5","t6");
 344
 345 $code .= <<___;
 346 .p2align 3
 347 .globl gcm_ghash_rv64i_zbc__zbkb
 348 .type gcm_ghash_rv64i_zbc__zbkb,\@function
 349 gcm_ghash_rv64i_zbc__zbkb:
 350     # Load Xi and bit-reverse it
 351     ld        $x0, 0($Xi)
 352     ld        $x1, 8($Xi)
 353     @{[brev8  $x0, $x0]}
 354     @{[brev8  $x1, $x1]}
 355
 356     # Load the key (already bit-reversed)
 357     ld        $y0, 0($Htable)
 358     ld        $y1, 8($Htable)
 359
 360     # Load the reduction constant
 361     la        $polymod, Lpolymod
 362     lbu       $polymod, 0($polymod)
 363
 364 Lstep_zkbk:
 365     # Load the input data, bit-reverse them, and XOR them with Xi
 366     ld        $t0, 0($inp)
 367     ld        $t1, 8($inp)
 368     add       $inp, $inp, 16
 369     add       $len, $len, -16
 370     @{[brev8  $t0, $t0]}
 371     @{[brev8  $t1, $t1]}
 372     xor       $x0, $x0, $t0
 373     xor       $x1, $x1, $t1
 374
 375     # Multiplication (without Karatsuba)
 376     @{[clmulh $z3, $x1, $y1]}
 377     @{[clmul  $z2, $x1, $y1]}
 378     @{[clmulh $t1, $x0, $y1]}
 379     @{[clmul  $z1, $x0, $y1]}
 380     xor       $z2, $z2, $t1
 381     @{[clmulh $t1, $x1, $y0]}
 382     @{[clmul  $t0, $x1, $y0]}
 383     xor       $z2, $z2, $t1
 384     xor       $z1, $z1, $t0
 385     @{[clmulh $t1, $x0, $y0]}
 386     @{[clmul  $z0, $x0, $y0]}
 387     xor       $z1, $z1, $t1
 388
 389     # Reduction with clmul
 390     @{[clmulh $t1, $z3, $polymod]}
 391     @{[clmul  $t0, $z3, $polymod]}
 392     xor       $z2, $z2, $t1
 393     xor       $z1, $z1, $t0
 394     @{[clmulh $t1, $z2, $polymod]}
 395     @{[clmul  $t0, $z2, $polymod]}
 396     xor       $x1, $z1, $t1
 397     xor       $x0, $z0, $t0
 398
 399     # Iterate over all blocks
 400     bnez      $len, Lstep_zkbk
 401
 402     # Bit-reverse final Xi back and store it
 403     @{[brev8  $x0, $x0]}
 404     @{[brev8  $x1, $x1]}
 405     sd $x0,  0($Xi)
 406     sd $x1,  8($Xi)
 407     ret
 408 .size gcm_ghash_rv64i_zbc__zbkb,.-gcm_ghash_rv64i_zbc__zbkb
 409 ___
 410 }
 411
 412 $code .= <<___;
 413 .p2align 3
 414 Lbrev8_const:
 415     .dword  0xAAAAAAAAAAAAAAAA
 416     .dword  0xCCCCCCCCCCCCCCCC
 417     .dword  0xF0F0F0F0F0F0F0F0
 418 .size Lbrev8_const,.-Lbrev8_const
 419
 420 Lpolymod:
 421     .byte 0x87
 422 .size Lpolymod,.-Lpolymod
 423 ___
 424
 425 print $code;
 426
 427 close STDOUT or die "error closing STDOUT: $!";