# modes are limited. As result it takes more instructions to do
# the same job in Thumb, therefore the code is never twice as
# small and always slower.
-# [***] which is also ~35% better than compiler generated code.
+# [***] which is also ~35% better than compiler generated code. Dual-
+# issue Cortex A8 core was measured to process input block in
+# ~990 cycles.
-$output=shift;
+while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
open STDOUT,">$output";
$ctx="r0";
.LK_60_79: .word 0xca62c1d6
.size sha1_block_data_order,.-sha1_block_data_order
.asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
+.align 2
___
$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4