projects
/
openssl.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
ARMv4 assembly pack: implement support for Thumb2.
[openssl.git]
/
crypto
/
sha
/
asm
/
sha1-armv4-large.pl
diff --git
a/crypto/sha/asm/sha1-armv4-large.pl
b/crypto/sha/asm/sha1-armv4-large.pl
index 356b52fc1be10ca72a6bb807c49567a7b199a165..9d34e04f7b632d25b146ce16f49be87fa3bb6829 100644
(file)
--- a/
crypto/sha/asm/sha1-armv4-large.pl
+++ b/
crypto/sha/asm/sha1-armv4-large.pl
@@
-181,7
+181,12
@@
$code=<<___;
#include "arm_arch.h"
.text
#include "arm_arch.h"
.text
+#if defined(__thumb2__) && !defined(__APPLE__)
+.syntax unified
+.thumb
+#else
.code 32
.code 32
+#endif
.global sha1_block_data_order
.type sha1_block_data_order,%function
.global sha1_block_data_order
.type sha1_block_data_order,%function
@@
-189,7
+194,8
@@
$code=<<___;
.align 5
sha1_block_data_order:
#if __ARM_MAX_ARCH__>=7
.align 5
sha1_block_data_order:
#if __ARM_MAX_ARCH__>=7
- sub r3,pc,#8 @ sha1_block_data_order
+.Lsha1_block:
+ adr r3,.Lsha1_block
ldr r12,.LOPENSSL_armcap
ldr r12,[r3,r12] @ OPENSSL_armcap_P
#ifdef __APPLE__
ldr r12,.LOPENSSL_armcap
ldr r12,[r3,r12] @ OPENSSL_armcap_P
#ifdef __APPLE__
@@
-216,7
+222,12
@@
for($i=0;$i<5;$i++) {
&BODY_00_15(@V); unshift(@V,pop(@V));
}
$code.=<<___;
&BODY_00_15(@V); unshift(@V,pop(@V));
}
$code.=<<___;
+#if defined(__thumb2__) && !defined(__APPLE__)
+ mov $t3,sp
+ teq $Xi,$t3
+#else
teq $Xi,sp
teq $Xi,sp
+#endif
bne .L_00_15 @ [((11+4)*5+2)*3]
sub sp,sp,#25*4
___
bne .L_00_15 @ [((11+4)*5+2)*3]
sub sp,sp,#25*4
___
@@
-235,7
+246,12
@@
for($i=0;$i<5;$i++) {
&BODY_20_39(@V); unshift(@V,pop(@V));
}
$code.=<<___;
&BODY_20_39(@V); unshift(@V,pop(@V));
}
$code.=<<___;
+#if defined(__thumb2__) && !defined(__APPLE__)
+ mov $t3,sp
+ teq $Xi,$t3
+#else
teq $Xi,sp @ preserve carry
teq $Xi,sp @ preserve carry
+#endif
bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4]
bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes
bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4]
bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes
@@
-247,7
+263,12
@@
for($i=0;$i<5;$i++) {
&BODY_40_59(@V); unshift(@V,pop(@V));
}
$code.=<<___;
&BODY_40_59(@V); unshift(@V,pop(@V));
}
$code.=<<___;
+#if defined(__thumb2__) && !defined(__APPLE__)
+ mov $t3,sp
+ teq $Xi,$t3
+#else
teq $Xi,sp
teq $Xi,sp
+#endif
bne .L_40_59 @ [+((12+5)*5+2)*4]
ldr $K,.LK_60_79
bne .L_40_59 @ [+((12+5)*5+2)*4]
ldr $K,.LK_60_79
@@
-283,7
+304,7
@@
$code.=<<___;
.LK_60_79: .word 0xca62c1d6
#if __ARM_MAX_ARCH__>=7
.LOPENSSL_armcap:
.LK_60_79: .word 0xca62c1d6
#if __ARM_MAX_ARCH__>=7
.LOPENSSL_armcap:
-.word OPENSSL_armcap_P-
sha1_block_data_order
+.word OPENSSL_armcap_P-
.Lsha1_block
#endif
.asciz "SHA1 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
.align 5
#endif
.asciz "SHA1 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
.align 5
@@
-458,6
+479,7
@@
sub Xuplast_80 ()
&teq ($inp,$len);
&sub ($K_XX_XX,$K_XX_XX,16); # rewind $K_XX_XX
&teq ($inp,$len);
&sub ($K_XX_XX,$K_XX_XX,16); # rewind $K_XX_XX
+ &it ("eq");
&subeq ($inp,$inp,64); # reload last block to avoid SEGV
&vld1_8 ("{@X[-4&7]-@X[-3&7]}","[$inp]!");
eval(shift(@insns));
&subeq ($inp,$inp,64); # reload last block to avoid SEGV
&vld1_8 ("{@X[-4&7]-@X[-3&7]}","[$inp]!");
eval(shift(@insns));
@@
-508,12
+530,12
@@
sha1_block_data_order_neon:
@ dmb @ errata #451034 on early Cortex A8
@ vstmdb sp!,{d8-d15} @ ABI specification says so
mov $saved_sp,sp
@ dmb @ errata #451034 on early Cortex A8
@ vstmdb sp!,{d8-d15} @ ABI specification says so
mov $saved_sp,sp
- sub
sp,sp,#64 @ alloca
+ sub
$Xfer,sp,#64
adr $K_XX_XX,.LK_00_19
adr $K_XX_XX,.LK_00_19
- bic
sp,sp,#15
@ align for 128-bit stores
+ bic
$Xfer,$Xfer,#15
@ align for 128-bit stores
ldmia $ctx,{$a,$b,$c,$d,$e} @ load context
ldmia $ctx,{$a,$b,$c,$d,$e} @ load context
- mov
$Xfer,sp
+ mov
sp,$Xfer @ alloca
vld1.8 {@X[-4&7]-@X[-3&7]},[$inp]! @ handles unaligned
veor $zero,$zero,$zero
vld1.8 {@X[-4&7]-@X[-3&7]},[$inp]! @ handles unaligned
veor $zero,$zero,$zero
@@
-560,10
+582,13
@@
$code.=<<___;
add $b,$b,$t0
add $c,$c,$t1
add $d,$d,$Xfer
add $b,$b,$t0
add $c,$c,$t1
add $d,$d,$Xfer
+ it eq
moveq sp,$saved_sp
add $e,$e,$Ki
moveq sp,$saved_sp
add $e,$e,$Ki
+ it ne
ldrne $Ki,[sp]
stmia $ctx,{$a,$b,$c,$d,$e}
ldrne $Ki,[sp]
stmia $ctx,{$a,$b,$c,$d,$e}
+ itt ne
addne $Xfer,sp,#3*16
bne .Loop_neon
addne $Xfer,sp,#3*16
bne .Loop_neon
@@
-584,6
+609,13
@@
my ($W0,$W1,$ABCD_SAVE)=map("q$_",(12..14));
$code.=<<___;
#if __ARM_MAX_ARCH__>=7
$code.=<<___;
#if __ARM_MAX_ARCH__>=7
+
+# if defined(__thumb2__) && !defined(__APPLE__)
+# define INST(a,b,c,d) .byte c,d|0xf,a,b
+# else
+# define INST(a,b,c,d) .byte a,b,c,d|0x10
+# endif
+
.type sha1_block_data_order_armv8,%function
.align 5
sha1_block_data_order_armv8:
.type sha1_block_data_order_armv8,%function
.align 5
sha1_block_data_order_armv8:
@@
-677,7
+709,10
@@
___
# since ARMv7 instructions are always encoded little-endian.
# correct solution is to use .inst directive, but older
# assemblers don't implement it:-(
# since ARMv7 instructions are always encoded little-endian.
# correct solution is to use .inst directive, but older
# assemblers don't implement it:-(
- sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s",
+
+ # this fix-up provides Thumb encoding in conjunction with INST
+ $word &= ~0x10000000 if (($word & 0x0f000000) == 0x02000000);
+ sprintf "INST(0x%02x,0x%02x,0x%02x,0x%02x)\t@ %s %s",
$word&0xff,($word>>8)&0xff,
($word>>16)&0xff,($word>>24)&0xff,
$mnemonic,$arg;
$word&0xff,($word>>8)&0xff,
($word>>16)&0xff,($word>>24)&0xff,
$mnemonic,$arg;