-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2010-2020 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the Apache License 2.0 (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# saturates at ~15.5x single-process result on 8-core processor,
# or ~20.5GBps per 2.85GHz socket.
-$bits=32;
-for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
-if ($bits==64) { $bias=2047; $frame=192; }
-else { $bias=0; $frame=112; }
+$output=pop and open STDOUT,">$output";
-$output=shift;
-open STDOUT,">$output";
+$frame="STACK_FRAME";
+$bias="STACK_BIAS";
$Zhi="%o0"; # 64-bit values
$Zlo="%o1";
$inp="%i2";
$len="%i3";
-$code.=<<___ if ($bits==64);
+$code.=<<___;
+#ifndef __ASSEMBLER__
+# define __ASSEMBLER__ 1
+#endif
+#include "crypto/sparc_arch.h"
+
+#ifdef __arch64__
.register %g2,#scratch
.register %g3,#scratch
-___
-$code.=<<___;
+#endif
+
.section ".text",#alloc,#execinstr
.align 64
add $inp,16,$inp
cmp $inp,$len
- be,pn `$bits==64?"%xcc":"%icc"`,.Ldone
+ be,pn SIZE_T_CC,.Ldone
and $Zlo,0xf,$remi
ldx [$Htblo+$nhi],$Tlo
or $V,%lo(0xA0406080),$V
or %l0,%lo(0x20C0E000),%l0
sllx $V,32,$V
- or %l0,$V,$V ! (0xE0·i)&0xff=0xA040608020C0E000
+ or %l0,$V,$V ! (0xE0·i)&0xff=0xA040608020C0E000
stx $V,[%i0+16]
ret
mov 0xE1,%l7
sllx %l7,57,$xE1 ! 57 is not a typo
- ldx [$Htable+16],$V ! (0xE0·i)&0xff=0xA040608020C0E000
+ ldx [$Htable+16],$V ! (0xE0·i)&0xff=0xA040608020C0E000
xor $Hhi,$Hlo,$Hhl ! Karatsuba pre-processing
xmulx $Xlo,$Hlo,$C0
xmulx $Xhi,$Hhi,$Xhi
sll $C0,3,$sqr
- srlx $V,$sqr,$sqr ! ·0xE0 [implicit &(7<<3)]
+ srlx $V,$sqr,$sqr ! ·0xE0 [implicit &(7<<3)]
xor $C0,$sqr,$sqr
- sllx $sqr,57,$sqr ! ($C0·0xE1)<<1<<56 [implicit &0x7f]
+ sllx $sqr,57,$sqr ! ($C0·0xE1)<<1<<56 [implicit &0x7f]
xor $C0,$C1,$C1 ! Karatsuba post-processing
xor $Xlo,$C2,$C2
xor $Xhi,$C2,$C2
xor $Xhi,$C1,$C1
- xmulxhi $C0,$xE1,$Xlo ! ·0xE1<<1<<56
+ xmulxhi $C0,$xE1,$Xlo ! ·0xE1<<1<<56
xor $C0,$C2,$C2
xmulx $C1,$xE1,$C0
xor $C1,$C3,$C3
.align 32
gcm_ghash_vis3:
save %sp,-$frame,%sp
+ nop
+ srln $len,0,$len ! needed on v8+, "nop" on v9
ldx [$Xip+8],$C2 ! load Xi
ldx [$Xip+0],$C3
mov 0xE1,%l7
sllx %l7,57,$xE1 ! 57 is not a typo
- ldx [$Htable+16],$V ! (0xE0·i)&0xff=0xA040608020C0E000
+ ldx [$Htable+16],$V ! (0xE0·i)&0xff=0xA040608020C0E000
and $inp,7,$shl
andn $inp,7,$inp
xmulx $Xhi,$Hhi,$Xhi
sll $C0,3,$sqr
- srlx $V,$sqr,$sqr ! ·0xE0 [implicit &(7<<3)]
+ srlx $V,$sqr,$sqr ! ·0xE0 [implicit &(7<<3)]
xor $C0,$sqr,$sqr
- sllx $sqr,57,$sqr ! ($C0·0xE1)<<1<<56 [implicit &0x7f]
+ sllx $sqr,57,$sqr ! ($C0·0xE1)<<1<<56 [implicit &0x7f]
xor $C0,$C1,$C1 ! Karatsuba post-processing
xor $Xlo,$C2,$C2
xor $Xhi,$C2,$C2
xor $Xhi,$C1,$C1
- xmulxhi $C0,$xE1,$Xlo ! ·0xE1<<1<<56
+ xmulxhi $C0,$xE1,$Xlo ! ·0xE1<<1<<56
xor $C0,$C2,$C2
xmulx $C1,$xE1,$C0
xor $C1,$C3,$C3
\f
# Purpose of these subroutines is to explicitly encode VIS instructions,
# so that one can compile the module without having to specify VIS
-# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.
+# extensions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.
# Idea is to reserve for option to produce "universal" binary and let
# programmer detect if current CPU is VIS capable at run-time.
sub unvis3 {
print $_,"\n";
}
-close STDOUT;
+close STDOUT or die "error closing STDOUT: $!";