ecstresstest.c: Fix memory leak on error

[openssl.git] / crypto / aes / asm / aes-sparcv9.pl
diff --git a/crypto/aes/asm/aes-sparcv9.pl b/crypto/aes/asm/aes-sparcv9.pl

index 30f38d7a5a2ec060b97513cf16939c3ca146434b..d15640e3d76b12a099a17837300dde4e708d171c 100755 (executable)
--- a/crypto/aes/asm/aes-sparcv9.pl
+++ b/crypto/aes/asm/aes-sparcv9.pl
@@ -1,12 +1,19 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2005-2021 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the Apache License 2.0 (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
  #
  # ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
  # project. Rights for redistribution and usage in source and binary
-# forms are granted according to the OpenSSL license.
+# forms are granted according to the License.
  # ====================================================================
  #
-# Version 1.0
+# Version 1.1
  #
  # The major reason for undertaken effort was to mitigate the hazard of
  # cache-timing attack. This is [currently and initially!] addressed in
@@ -16,6 +23,9 @@
  # is an initial draft and one should expect more countermeasures to
  # be implemented...
  #
+# Version 1.1 prefetches T[ed]4 in order to mitigate attack on last
+# round.
+#
  # Even though performance was not the primary goal [on the contrary,
  # extra shifts "induced" by compressed S-box and longer loop epilogue
  # "induced" by scheduling for L2 have negative effect on performance],
@@ -27,10 +37,10 @@
  # optimal decrypt procedure]. Compared to GNU C generated code both
  # procedures are more than 60% faster:-)
  
-$bits=32;
-for (@ARGV)    { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
-if ($bits==64) { $bias=2047; $frame=192; }
-else           { $bias=0;    $frame=112; }
+$output = pop and open STDOUT,">$output";
+
+$frame="STACK_FRAME";
+$bias="STACK_BIAS";
  $locals=16;
  
  $acc0="%l0";
@@ -71,14 +81,19 @@ sub _data_word()
      while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
  }
  
-$code.=<<___ if ($bits==64);
+$code.=<<___;
+#ifndef __ASSEMBLER__
+# define __ASSEMBLER__ 1
+#endif
+#include "crypto/sparc_arch.h"
+
+#ifdef  __arch64__
  .register      %g2,#scratch
  .register      %g3,#scratch
-___
-$code.=<<___;
+#endif
  .section       ".text",#alloc,#execinstr
  
-.align 64
+.align 256
  AES_Te:
  ___
  &_data_word(
@@ -364,20 +379,28 @@ _sparcv9_AES_encrypt:
         ld      [$key+28],$t3                   !
                 srlx    $acc9,8,$acc9
                 xor     $acc5,$s1,$s1
+       ldx     [$tbl+2048+0],%g0               ! prefetch te4
                 srlx    $acc10,16,$acc10
                 xor     $acc6,$s1,$s1
+       ldx     [$tbl+2048+32],%g0              ! prefetch te4
                 srlx    $acc11,24,$acc11
                 xor     $acc7,$s1,$s1
+       ldx     [$tbl+2048+64],%g0              ! prefetch te4
                 srlx    $acc13,8,$acc13
                 xor     $acc8,$s2,$s2
+       ldx     [$tbl+2048+96],%g0              ! prefetch te4
                 srlx    $acc14,16,$acc14        !
                 xor     $acc9,$s2,$s2
+       ldx     [$tbl+2048+128],%g0             ! prefetch te4
                 srlx    $acc15,24,$acc15
                 xor     $acc10,$s2,$s2
+       ldx     [$tbl+2048+160],%g0             ! prefetch te4
         srl     $s0,21,$acc0
                 xor     $acc11,$s2,$s2
+       ldx     [$tbl+2048+192],%g0             ! prefetch te4
                 xor     $acc12,$acc14,$acc14
                 xor     $acc13,$s3,$s3
+       ldx     [$tbl+2048+224],%g0             ! prefetch te4
         srl     $s1,13,$acc1                    !
                 xor     $acc14,$s3,$s3
                 xor     $acc15,$s3,$s3
@@ -512,10 +535,9 @@ AES_encrypt:
         ld      [%i0+12],%o3
  
  1:     call    .+8
-       mov     %i2,%o5
-       sub     %o7,1b-AES_Te,%o4
+       add     %o7,AES_Te-1b,%o4
         call    _sparcv9_AES_encrypt
-       nop
+       mov     %i2,%o5
  
         st      %o0,[%i1+0]
         st      %o1,[%i1+4]
@@ -573,10 +595,9 @@ AES_encrypt:
         or      %l4,%l6,%o3
  
  1:     call    .+8
-       mov     %i2,%o5
-       sub     %o7,1b-AES_Te,%o4
+       add     %o7,AES_Te-1b,%o4
         call    _sparcv9_AES_encrypt
-       nop
+       mov     %i2,%o5
  
         srl     %o0,24,%l0
         srl     %o0,16,%l1
@@ -618,7 +639,7 @@ AES_encrypt:
  ___
  
  $code.=<<___;
-.align 64
+.align 256
  AES_Td:
  ___
  &_data_word(
@@ -904,20 +925,28 @@ _sparcv9_AES_decrypt:
         ld      [$key+28],$t3                   !
                 srlx    $acc9,8,$acc9
                 xor     $acc5,$s1,$s1
+       ldx     [$tbl+2048+0],%g0               ! prefetch td4
                 srlx    $acc10,16,$acc10
                 xor     $acc6,$s1,$s1
+       ldx     [$tbl+2048+32],%g0              ! prefetch td4
                 srlx    $acc11,24,$acc11
                 xor     $acc7,$s1,$s1
+       ldx     [$tbl+2048+64],%g0              ! prefetch td4
                 srlx    $acc13,8,$acc13
                 xor     $acc8,$s2,$s2
+       ldx     [$tbl+2048+96],%g0              ! prefetch td4
                 srlx    $acc14,16,$acc14        !
                 xor     $acc9,$s2,$s2
+       ldx     [$tbl+2048+128],%g0             ! prefetch td4
                 srlx    $acc15,24,$acc15
                 xor     $acc10,$s2,$s2
+       ldx     [$tbl+2048+160],%g0             ! prefetch td4
         srl     $s0,21,$acc0
                 xor     $acc11,$s2,$s2
+       ldx     [$tbl+2048+192],%g0             ! prefetch td4
                 xor     $acc12,$acc14,$acc14
                 xor     $acc13,$s3,$s3
+       ldx     [$tbl+2048+224],%g0             ! prefetch td4
         and     $acc0,2040,$acc0                !
                 xor     $acc14,$s3,$s3
                 xor     $acc15,$s3,$s3
@@ -1052,10 +1081,9 @@ AES_decrypt:
         ld      [%i0+12],%o3
  
  1:     call    .+8
-       mov     %i2,%o5
-       sub     %o7,1b-AES_Td,%o4
+       add     %o7,AES_Td-1b,%o4
         call    _sparcv9_AES_decrypt
-       nop
+       mov     %i2,%o5
  
         st      %o0,[%i1+0]
         st      %o1,[%i1+4]
@@ -1113,10 +1141,9 @@ AES_decrypt:
         or      %l4,%l6,%o3
  
  1:     call    .+8
-       mov     %i2,%o5
-       sub     %o7,1b-AES_Td,%o4
+       add     %o7,AES_Td-1b,%o4
         call    _sparcv9_AES_decrypt
-       nop
+       mov     %i2,%o5
  
         srl     %o0,24,%l0
         srl     %o0,16,%l1
@@ -1161,6 +1188,7 @@ ___
  # As UltraSPARC T1, a.k.a. Niagara, has shared FPU, FP nops can have
  # undesired effect, so just omit them and sacrifice some portion of
  # percent in performance...
-$code =~ s/fmovs.*$//gem;
+$code =~ s/fmovs.*$//gm;
  
  print $code;
+close STDOUT or die "error closing STDOUT: $!";        # ensure flush