X-Git-Url: https://git.openssl.org/gitweb/?p=openssl.git;a=blobdiff_plain;f=crypto%2Fsha%2Fasm%2Fsha1-ppc.pl;h=8aa5a37865571614da68c087f9e9a8b30fe13bac;hp=704887b7abf46d3bf3f4f647114387ee5a58284c;hb=76c15d790e07f6cc098be2d7b7f6ddc8acd11ca6;hpb=4dca00cec8be66dc12c8652a5b456dcc9e692f62 diff --git a/crypto/sha/asm/sha1-ppc.pl b/crypto/sha/asm/sha1-ppc.pl index 704887b7ab..8aa5a37865 100755 --- a/crypto/sha/asm/sha1-ppc.pl +++ b/crypto/sha/asm/sha1-ppc.pl @@ -2,8 +2,9 @@ # ==================================================================== # Written by Andy Polyakov for the OpenSSL -# project. Rights for redistribution and usage in source and binary -# forms are granted according to the OpenSSL license. +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. # ==================================================================== # I let hardware handle unaligned input(*), except on page boundaries @@ -17,27 +18,35 @@ # -m64 -m32 # ---------------------------------- # PPC970,gcc-4.0.0 +76% +59% +# Power6,xlc-7 +68% +33% -$output = shift; +$flavour = shift; -if ($output =~ /64\.s/) { +if ($flavour =~ /64/) { $SIZE_T =8; + $LRSAVE =2*$SIZE_T; $UCMP ="cmpld"; $STU ="stdu"; $POP ="ld"; $PUSH ="std"; -} elsif ($output =~ /32\.s/) { +} elsif ($flavour =~ /32/) { $SIZE_T =4; + $LRSAVE =$SIZE_T; $UCMP ="cmplw"; $STU ="stwu"; $POP ="lwz"; $PUSH ="stw"; -} else { die "nonsense $output"; } +} else { die "nonsense $flavour"; } -( defined shift || open STDOUT,"| $^X ../perlasm/ppc-xlate.pl $output" ) || - die "can't call ../perlasm/ppc-xlate.pl: $!"; +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or +die "can't locate ppc-xlate.pl"; -$FRAME=24*$SIZE_T; +open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; + +$FRAME=24*$SIZE_T+64; +$LOCALS=6*$SIZE_T; $K ="r0"; $sp ="r1"; @@ -150,14 +159,14 @@ ___ } $code=<<___; +.machine "any" .text -.globl .sha1_block_asm_data_order +.globl .sha1_block_data_order .align 4 -.sha1_block_asm_data_order: +.sha1_block_data_order: + $STU $sp,-$FRAME($sp) mflr r0 - $STU $sp,`-($FRAME+64)`($sp) - $PUSH r0,`$FRAME-$SIZE_T*18`($sp) $PUSH r15,`$FRAME-$SIZE_T*17`($sp) $PUSH r16,`$FRAME-$SIZE_T*16`($sp) $PUSH r17,`$FRAME-$SIZE_T*15`($sp) @@ -175,6 +184,7 @@ $code=<<___; $PUSH r29,`$FRAME-$SIZE_T*3`($sp) $PUSH r30,`$FRAME-$SIZE_T*2`($sp) $PUSH r31,`$FRAME-$SIZE_T*1`($sp) + $PUSH r0,`$FRAME+$LRSAVE`($sp) lwz $A,0($ctx) lwz $B,4($ctx) lwz $C,8($ctx) @@ -185,37 +195,14 @@ $code=<<___; Laligned: mtctr $num bl Lsha1_block_private -Ldone: - $POP r0,`$FRAME-$SIZE_T*18`($sp) - $POP r15,`$FRAME-$SIZE_T*17`($sp) - $POP r16,`$FRAME-$SIZE_T*16`($sp) - $POP r17,`$FRAME-$SIZE_T*15`($sp) - $POP r18,`$FRAME-$SIZE_T*14`($sp) - $POP r19,`$FRAME-$SIZE_T*13`($sp) - $POP r20,`$FRAME-$SIZE_T*12`($sp) - $POP r21,`$FRAME-$SIZE_T*11`($sp) - $POP r22,`$FRAME-$SIZE_T*10`($sp) - $POP r23,`$FRAME-$SIZE_T*9`($sp) - $POP r24,`$FRAME-$SIZE_T*8`($sp) - $POP r25,`$FRAME-$SIZE_T*7`($sp) - $POP r26,`$FRAME-$SIZE_T*6`($sp) - $POP r27,`$FRAME-$SIZE_T*5`($sp) - $POP r28,`$FRAME-$SIZE_T*4`($sp) - $POP r29,`$FRAME-$SIZE_T*3`($sp) - $POP r30,`$FRAME-$SIZE_T*2`($sp) - $POP r31,`$FRAME-$SIZE_T*1`($sp) - mtlr r0 - addi $sp,$sp,`$FRAME+64` - blr -___ + b Ldone -# PowerPC specification allows an implementation to be ill-behaved -# upon unaligned access which crosses page boundary. "Better safe -# than sorry" principle makes me treat it specially. But I don't -# look for particular offending word, but rather for 64-byte input -# block which crosses the boundary. Once found that block is aligned -# and hashed separately... -$code.=<<___; +; PowerPC specification allows an implementation to be ill-behaved +; upon unaligned access which crosses page boundary. "Better safe +; than sorry" principle makes me treat it specially. But I don't +; look for particular offending word, but rather for 64-byte input +; block which crosses the boundary. Once found that block is aligned +; and hashed separately... .align 4 Lunaligned: subfic $t1,$inp,4096 @@ -230,7 +217,7 @@ Lunaligned: Lcross_page: li $t1,16 mtctr $t1 - addi r20,$sp,$FRAME ; spot below the frame + addi r20,$sp,$LOCALS ; spot within the frame Lmemcpy: lbz r16,0($inp) lbz r17,1($inp) @@ -244,15 +231,40 @@ Lmemcpy: addi r20,r20,4 bdnz Lmemcpy - $PUSH $inp,`$FRAME-$SIZE_T*19`($sp) + $PUSH $inp,`$FRAME-$SIZE_T*18`($sp) li $t1,1 - addi $inp,$sp,$FRAME + addi $inp,$sp,$LOCALS mtctr $t1 bl Lsha1_block_private - $POP $inp,`$FRAME-$SIZE_T*19`($sp) + $POP $inp,`$FRAME-$SIZE_T*18`($sp) addic. $num,$num,-1 bne- Lunaligned - b Ldone + +Ldone: + $POP r0,`$FRAME+$LRSAVE`($sp) + $POP r15,`$FRAME-$SIZE_T*17`($sp) + $POP r16,`$FRAME-$SIZE_T*16`($sp) + $POP r17,`$FRAME-$SIZE_T*15`($sp) + $POP r18,`$FRAME-$SIZE_T*14`($sp) + $POP r19,`$FRAME-$SIZE_T*13`($sp) + $POP r20,`$FRAME-$SIZE_T*12`($sp) + $POP r21,`$FRAME-$SIZE_T*11`($sp) + $POP r22,`$FRAME-$SIZE_T*10`($sp) + $POP r23,`$FRAME-$SIZE_T*9`($sp) + $POP r24,`$FRAME-$SIZE_T*8`($sp) + $POP r25,`$FRAME-$SIZE_T*7`($sp) + $POP r26,`$FRAME-$SIZE_T*6`($sp) + $POP r27,`$FRAME-$SIZE_T*5`($sp) + $POP r28,`$FRAME-$SIZE_T*4`($sp) + $POP r29,`$FRAME-$SIZE_T*3`($sp) + $POP r30,`$FRAME-$SIZE_T*2`($sp) + $POP r31,`$FRAME-$SIZE_T*1`($sp) + mtlr r0 + addi $sp,$sp,$FRAME + blr + .long 0 + .byte 0,12,4,1,0x80,18,3,0 + .long 0 ___ # This is private block function, which uses tailored calling @@ -302,6 +314,12 @@ $code.=<<___; addi $inp,$inp,`16*4` bdnz- Lsha1_block_private blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 +.size .sha1_block_data_order,.-.sha1_block_data_order +___ +$code.=<<___; +.asciz "SHA1 block transform for PPC, CRYPTOGAMS by " ___ $code =~ s/\`([^\`]*)\`/eval $1/gem;