-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2005-2021 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the Apache License 2.0 (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
#
# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. Rights for redistribution and usage in source and binary
-# forms are granted according to the OpenSSL license.
+# forms are granted according to the License.
# ====================================================================
#
-# Version 1.0
+# Version 1.1
#
# The major reason for undertaken effort was to mitigate the hazard of
# cache-timing attack. This is [currently and initially!] addressed in
# is an initial draft and one should expect more countermeasures to
# be implemented...
#
+# Version 1.1 prefetches T[ed]4 in order to mitigate attack on last
+# round.
+#
# Even though performance was not the primary goal [on the contrary,
# extra shifts "induced" by compressed S-box and longer loop epilogue
# "induced" by scheduling for L2 have negative effect on performance],
# optimal decrypt procedure]. Compared to GNU C generated code both
# procedures are more than 60% faster:-)
-$bits=32;
-for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
-if ($bits==64) { $bias=2047; $frame=192; }
-else { $bias=0; $frame=112; }
+$output = pop and open STDOUT,">$output";
+
+$frame="STACK_FRAME";
+$bias="STACK_BIAS";
$locals=16;
$acc0="%l0";
while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
}
-$code.=<<___ if ($bits==64);
+$code.=<<___;
+#ifndef __ASSEMBLER__
+# define __ASSEMBLER__ 1
+#endif
+#include "crypto/sparc_arch.h"
+
+#ifdef __arch64__
.register %g2,#scratch
.register %g3,#scratch
-___
-$code.=<<___;
+#endif
.section ".text",#alloc,#execinstr
-.align 64
+.align 256
AES_Te:
___
&_data_word(
ld [$key+28],$t3 !
srlx $acc9,8,$acc9
xor $acc5,$s1,$s1
+ ldx [$tbl+2048+0],%g0 ! prefetch te4
srlx $acc10,16,$acc10
xor $acc6,$s1,$s1
+ ldx [$tbl+2048+32],%g0 ! prefetch te4
srlx $acc11,24,$acc11
xor $acc7,$s1,$s1
+ ldx [$tbl+2048+64],%g0 ! prefetch te4
srlx $acc13,8,$acc13
xor $acc8,$s2,$s2
+ ldx [$tbl+2048+96],%g0 ! prefetch te4
srlx $acc14,16,$acc14 !
xor $acc9,$s2,$s2
+ ldx [$tbl+2048+128],%g0 ! prefetch te4
srlx $acc15,24,$acc15
xor $acc10,$s2,$s2
+ ldx [$tbl+2048+160],%g0 ! prefetch te4
srl $s0,21,$acc0
xor $acc11,$s2,$s2
+ ldx [$tbl+2048+192],%g0 ! prefetch te4
xor $acc12,$acc14,$acc14
xor $acc13,$s3,$s3
+ ldx [$tbl+2048+224],%g0 ! prefetch te4
srl $s1,13,$acc1 !
xor $acc14,$s3,$s3
xor $acc15,$s3,$s3
ld [%i0+12],%o3
1: call .+8
- mov %i2,%o5
- sub %o7,1b-AES_Te,%o4
+ add %o7,AES_Te-1b,%o4
call _sparcv9_AES_encrypt
- nop
+ mov %i2,%o5
st %o0,[%i1+0]
st %o1,[%i1+4]
or %l4,%l6,%o3
1: call .+8
- mov %i2,%o5
- sub %o7,1b-AES_Te,%o4
+ add %o7,AES_Te-1b,%o4
call _sparcv9_AES_encrypt
- nop
+ mov %i2,%o5
srl %o0,24,%l0
srl %o0,16,%l1
___
$code.=<<___;
-.align 64
+.align 256
AES_Td:
___
&_data_word(
ld [$key+28],$t3 !
srlx $acc9,8,$acc9
xor $acc5,$s1,$s1
+ ldx [$tbl+2048+0],%g0 ! prefetch td4
srlx $acc10,16,$acc10
xor $acc6,$s1,$s1
+ ldx [$tbl+2048+32],%g0 ! prefetch td4
srlx $acc11,24,$acc11
xor $acc7,$s1,$s1
+ ldx [$tbl+2048+64],%g0 ! prefetch td4
srlx $acc13,8,$acc13
xor $acc8,$s2,$s2
+ ldx [$tbl+2048+96],%g0 ! prefetch td4
srlx $acc14,16,$acc14 !
xor $acc9,$s2,$s2
+ ldx [$tbl+2048+128],%g0 ! prefetch td4
srlx $acc15,24,$acc15
xor $acc10,$s2,$s2
+ ldx [$tbl+2048+160],%g0 ! prefetch td4
srl $s0,21,$acc0
xor $acc11,$s2,$s2
+ ldx [$tbl+2048+192],%g0 ! prefetch td4
xor $acc12,$acc14,$acc14
xor $acc13,$s3,$s3
+ ldx [$tbl+2048+224],%g0 ! prefetch td4
and $acc0,2040,$acc0 !
xor $acc14,$s3,$s3
xor $acc15,$s3,$s3
ld [%i0+12],%o3
1: call .+8
- mov %i2,%o5
- sub %o7,1b-AES_Td,%o4
+ add %o7,AES_Td-1b,%o4
call _sparcv9_AES_decrypt
- nop
+ mov %i2,%o5
st %o0,[%i1+0]
st %o1,[%i1+4]
or %l4,%l6,%o3
1: call .+8
- mov %i2,%o5
- sub %o7,1b-AES_Td,%o4
+ add %o7,AES_Td-1b,%o4
call _sparcv9_AES_decrypt
- nop
+ mov %i2,%o5
srl %o0,24,%l0
srl %o0,16,%l1
# As UltraSPARC T1, a.k.a. Niagara, has shared FPU, FP nops can have
# undesired effect, so just omit them and sacrifice some portion of
# percent in performance...
-$code =~ s/fmovs.*$//gem;
+$code =~ s/fmovs.*$//gm;
print $code;
+close STDOUT or die "error closing STDOUT: $!"; # ensure flush