sha1-armv4-large.pl: add performance data for Cortex A8 core.

[openssl.git] / crypto / sha / asm / sha1-armv4-large.pl
diff --git a/crypto/sha/asm/sha1-armv4-large.pl b/crypto/sha/asm/sha1-armv4-large.pl

index f99606a32ea50e22197fdd4b7643519b1d29339d..feeec9372de95bca34020da25ec3be2e764fd2cc 100644 (file)
--- a/crypto/sha/asm/sha1-armv4-large.pl
+++ b/crypto/sha/asm/sha1-armv4-large.pl
@@ -37,9 +37,11 @@
  #      modes are limited. As result it takes more instructions to do
  #      the same job in Thumb, therefore the code is never twice as
  #      small and always slower.
  #      modes are limited. As result it takes more instructions to do
  #      the same job in Thumb, therefore the code is never twice as
  #      small and always slower.
-# [***]        which is also ~35% better than compiler generated code.
+# [***]        which is also ~35% better than compiler generated code. Dual-
+#      issue Cortex A8 core was measured to process input block in
+#      ~990 cycles.
  
  
-$output=shift;
+while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
  open STDOUT,">$output";
  
  $ctx="r0";
  open STDOUT,">$output";
  
  $ctx="r0";
@@ -236,6 +238,7 @@ $code.=<<___;
  .LK_60_79:     .word   0xca62c1d6
  .size  sha1_block_data_order,.-sha1_block_data_order
  .asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
  .LK_60_79:     .word   0xca62c1d6
  .size  sha1_block_data_order,.-sha1_block_data_order
  .asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
+.align 2
  ___
  
  $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;   # make it possible to compile with -march=armv4
  ___
  
  $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;   # make it possible to compile with -march=armv4