-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
#
# IALU(*) FMA
#
-# UltraSPARC III 11.9(**)
-# SPARC T3 7.85
-# SPARC T4 1.67(***) 6.55
-# SPARC64 X 5.54 3.64
+# UltraSPARC III 12.3(**)
+# SPARC T3 7.92
+# SPARC T4 1.70(***) 6.55
+# SPARC64 X 5.60 3.64
#
# (*) Comparison to compiler-generated code is really problematic,
# because latter's performance varies too much depending on too
my ($h0,$h1,$h2,$h3, $t0,$t1,$t2) = map("%o$_",(0..5,7));
my ($d0,$d1,$d2,$d3) = map("%g$_",(1..4));
+my $output = pop;
+open STDOUT,">$stdout";
+
$code.=<<___;
#include "sparc_arch.h"
SPARC_LOAD_ADDRESS(OPENSSL_sparcv9cap_P,%g1)
ld [%g1],%g1
- and %g1,SPARCV9_FMADD|SPARCV9_PREFER_FPU|SPARCV9_VIS3,%g1
- cmp %g1,SPARCV9_FMADD|SPARCV9_PREFER_FPU
+ and %g1,SPARCV9_FMADD|SPARCV9_VIS3,%g1
+ cmp %g1,SPARCV9_FMADD
be .Lpoly1305_init_fma
nop
.Lno_key:
ret
restore %g0,%g0,%o0 ! return 0
+.type poly1305_init,#function
.size poly1305_init,.-poly1305_init
.globl poly1305_blocks
.align 32
poly1305_blocks:
save %sp,-STACK_FRAME,%sp
- andn $len,15,$len
+ srln $len,4,$len
brz,pn $len,.Lno_data
nop
umul $r1,$h0,$d1
umul $r2,$h0,$d2
umul $r3,$h0,$d3
- sub $len,16,$len
+ sub $len,1,$len
add $inp,16,$inp
umul $s3,$h1,$t0
addcc $t0,$d0,$h0
addccc %g0,$h1,$h1
addccc %g0,$h2,$h2
+ addccc %g0,$h3,$h3
brnz,pt $len,.Loop
- addc %g0,$h3,$h3
+ addc %g0,$h4,$h4
st $h1,[$ctx+0] ! store hash value
st $h0,[$ctx+4]
.Lno_data:
ret
restore
+.type poly1305_blocks,#function
.size poly1305_blocks,.-poly1305_blocks
___
########################################################################
.align 32
poly1305_blocks_vis3:
save %sp,-STACK_FRAME,%sp
- andn $len,15,$len
+ srln $len,4,$len
brz,pn $len,.Lno_data
nop
neg $shr,$shl
srlx $R1,2,$S1
+ b .Loop_vis3
add $R1,$S1,$S1
.Loop_vis3:
.Linp_aligned_vis3:
addcc $D0,$H0,$H0 ! accumulate input
- sub $len,16,$len
+ sub $len,1,$len
addxccc $D1,$H1,$H1
add $inp,16,$inp
add $T1,$T0,$T0
addcc $T0,$D0,$H0
+ addxccc %g0,$D1,$H1
brnz,pt $len,.Loop_vis3
- addxc %g0,$D1,$H1
+ addxc %g0,$H2,$H2
stx $H0,[$ctx+0] ! store hash value
stx $H1,[$ctx+8]
ret
restore
+.type poly1305_blocks_vis3,#function
.size poly1305_blocks_vis3,.-poly1305_blocks_vis3
___
}
ret
restore
+.type poly1305_emit,#function
.size poly1305_emit,.-poly1305_emit
___
.Lno_key_fma:
ret
restore %g0,%g0,%o0 ! return 0
+.type poly1305_init_fma,#function
.size poly1305_init_fma,.-poly1305_init_fma
.align 32
poly1305_blocks_fma:
save %sp,-STACK_FRAME-48,%sp
- srlx $len,4,$len
+ srln $len,4,$len
brz,pn $len,.Labort
sub $len,1,$len
.Labort:
ret
restore
+.type poly1305_blocks_fma,#function
.size poly1305_blocks_fma,.-poly1305_blocks_fma
___
{
ret
restore
+.type poly1305_emit_fma,#function
.size poly1305_emit_fma,.-poly1305_emit_fma
___
}