X-Git-Url: https://git.openssl.org/gitweb/?p=openssl.git;a=blobdiff_plain;f=crypto%2Fsha%2Fasm%2Fsha1-armv4-large.pl;h=feeec9372de95bca34020da25ec3be2e764fd2cc;hp=f99606a32ea50e22197fdd4b7643519b1d29339d;hb=c981086d40b6cc7f59ca1273c92d77e2dfacd156;hpb=5727f1f790b880e323eae4849132d61996710035 diff --git a/crypto/sha/asm/sha1-armv4-large.pl b/crypto/sha/asm/sha1-armv4-large.pl index f99606a32e..feeec9372d 100644 --- a/crypto/sha/asm/sha1-armv4-large.pl +++ b/crypto/sha/asm/sha1-armv4-large.pl @@ -37,9 +37,11 @@ # modes are limited. As result it takes more instructions to do # the same job in Thumb, therefore the code is never twice as # small and always slower. -# [***] which is also ~35% better than compiler generated code. +# [***] which is also ~35% better than compiler generated code. Dual- +# issue Cortex A8 core was measured to process input block in +# ~990 cycles. -$output=shift; +while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} open STDOUT,">$output"; $ctx="r0"; @@ -236,6 +238,7 @@ $code.=<<___; .LK_60_79: .word 0xca62c1d6 .size sha1_block_data_order,.-sha1_block_data_order .asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by " +.align 2 ___ $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4