# ====================================================================
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
-# project. Rights for redistribution and usage in source and binary
-# forms are granted according to the OpenSSL license.
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
# ====================================================================
-# I let hardware handle unaligned input, except on page boundaries
+# I let hardware handle unaligned input(*), except on page boundaries
# (see below for details). Otherwise straightforward implementation
# with X vector in register bank. The module is big-endian [which is
# not big deal as there're no little-endian targets left around].
+#
+# (*) this means that this module is inappropriate for PPC403? Does
+# anybody know if pre-POWER3 can sustain unaligned load?
-# gcc-4.0.0 -m64 -m32
-# --------------------------
-# sha1 +76% +59%
+# -m64 -m32
+# ----------------------------------
+# PPC970,gcc-4.0.0 +76% +59%
+# Power6,xlc-7 +68% +33%
$output = shift;
if ($output =~ /64\.s/) {
$SIZE_T =8;
- $RZONE =288;
$UCMP ="cmpld";
$STU ="stdu";
$POP ="ld";
$PUSH ="std";
} elsif ($output =~ /32\.s/) {
$SIZE_T =4;
- $RZONE =224;
$UCMP ="cmplw";
$STU ="stwu";
$POP ="lwz";
$PUSH ="stw";
} else { die "nonsense $output"; }
-( defined shift || open STDOUT,"| $^X ../perlasm/ppc-xlate.pl $output" ) ||
- die "can't call ../perlasm/ppc-xlate.pl: $!";
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
+die "can't locate ppc-xlate.pl";
+
+( defined shift || open STDOUT,"| $^X $xlate $output" ) ||
+ die "can't call $xlate: $!";
$FRAME=24*$SIZE_T;
my ($i,$a,$b,$c,$d,$e,$f)=@_;
my $j=$i+1;
$code.=<<___ if ($i==0);
- lwz @X[$i],$i*4($inp)
+ lwz @X[$i],`$i*4`($inp)
___
$code.=<<___ if ($i<15);
- lwz @X[$j],$j*4($inp)
+ lwz @X[$j],`$j*4`($inp)
add $f,$K,$e
rotlwi $e,$a,5
add $f,$f,@X[$i]
$code=<<___;
.text
-.globl .sha1_block_asm_data_order
+.globl .sha1_block_data_order
.align 4
-.sha1_block_asm_data_order:
+.sha1_block_data_order:
mflr r0
- $STU $sp,`-($FRAME+64+$RZONE)`($sp)
+ $STU $sp,`-($FRAME+64)`($sp)
$PUSH r0,`$FRAME-$SIZE_T*18`($sp)
$PUSH r15,`$FRAME-$SIZE_T*17`($sp)
$PUSH r16,`$FRAME-$SIZE_T*16`($sp)
$POP r30,`$FRAME-$SIZE_T*2`($sp)
$POP r31,`$FRAME-$SIZE_T*1`($sp)
mtlr r0
- addi $sp,$sp,`$FRAME+64+$RZONE`
+ addi $sp,$sp,`$FRAME+64`
blr
___
$code.=<<___;
.align 4
Lunaligned:
- li $t1,4096
- subf $t1,$inp,$t1
+ subfic $t1,$inp,4096
andi. $t1,$t1,4095 ; distance to closest page boundary
srwi. $t1,$t1,6 ; t1/=64
beq Lcross_page
$UCMP $num,$t1
ble- Laligned ; didn't cross the page boundary
mtctr $t1
- subf $num,$t1,$num
+ subfc $num,$t1,$num
bl Lsha1_block_private
Lcross_page:
li $t1,16
bdnz- Lsha1_block_private
blr
___
+$code.=<<___;
+.asciz "SHA1 block transform for PPC, CRYPTOGAMS by <appro\@fy.chalmers.se>"
+___
$code =~ s/\`([^\`]*)\`/eval $1/gem;
print $code;