X-Git-Url: https://git.openssl.org/?p=openssl.git;a=blobdiff_plain;f=crypto%2Fsha%2Fasm%2Fsha1-armv4-large.pl;h=9d34e04f7b632d25b146ce16f49be87fa3bb6829;hp=356b52fc1be10ca72a6bb807c49567a7b199a165;hb=11208dcfb9105e8afa37233185decefd45e89e17;hpb=e7a68985d5b734890489a9d47f82210c384e8c3a diff --git a/crypto/sha/asm/sha1-armv4-large.pl b/crypto/sha/asm/sha1-armv4-large.pl index 356b52fc1b..9d34e04f7b 100644 --- a/crypto/sha/asm/sha1-armv4-large.pl +++ b/crypto/sha/asm/sha1-armv4-large.pl @@ -181,7 +181,12 @@ $code=<<___; #include "arm_arch.h" .text +#if defined(__thumb2__) && !defined(__APPLE__) +.syntax unified +.thumb +#else .code 32 +#endif .global sha1_block_data_order .type sha1_block_data_order,%function @@ -189,7 +194,8 @@ $code=<<___; .align 5 sha1_block_data_order: #if __ARM_MAX_ARCH__>=7 - sub r3,pc,#8 @ sha1_block_data_order +.Lsha1_block: + adr r3,.Lsha1_block ldr r12,.LOPENSSL_armcap ldr r12,[r3,r12] @ OPENSSL_armcap_P #ifdef __APPLE__ @@ -216,7 +222,12 @@ for($i=0;$i<5;$i++) { &BODY_00_15(@V); unshift(@V,pop(@V)); } $code.=<<___; +#if defined(__thumb2__) && !defined(__APPLE__) + mov $t3,sp + teq $Xi,$t3 +#else teq $Xi,sp +#endif bne .L_00_15 @ [((11+4)*5+2)*3] sub sp,sp,#25*4 ___ @@ -235,7 +246,12 @@ for($i=0;$i<5;$i++) { &BODY_20_39(@V); unshift(@V,pop(@V)); } $code.=<<___; +#if defined(__thumb2__) && !defined(__APPLE__) + mov $t3,sp + teq $Xi,$t3 +#else teq $Xi,sp @ preserve carry +#endif bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4] bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes @@ -247,7 +263,12 @@ for($i=0;$i<5;$i++) { &BODY_40_59(@V); unshift(@V,pop(@V)); } $code.=<<___; +#if defined(__thumb2__) && !defined(__APPLE__) + mov $t3,sp + teq $Xi,$t3 +#else teq $Xi,sp +#endif bne .L_40_59 @ [+((12+5)*5+2)*4] ldr $K,.LK_60_79 @@ -283,7 +304,7 @@ $code.=<<___; .LK_60_79: .word 0xca62c1d6 #if __ARM_MAX_ARCH__>=7 .LOPENSSL_armcap: -.word OPENSSL_armcap_P-sha1_block_data_order +.word OPENSSL_armcap_P-.Lsha1_block #endif .asciz "SHA1 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by " .align 5 @@ -458,6 +479,7 @@ sub Xuplast_80 () &teq ($inp,$len); &sub ($K_XX_XX,$K_XX_XX,16); # rewind $K_XX_XX + &it ("eq"); &subeq ($inp,$inp,64); # reload last block to avoid SEGV &vld1_8 ("{@X[-4&7]-@X[-3&7]}","[$inp]!"); eval(shift(@insns)); @@ -508,12 +530,12 @@ sha1_block_data_order_neon: @ dmb @ errata #451034 on early Cortex A8 @ vstmdb sp!,{d8-d15} @ ABI specification says so mov $saved_sp,sp - sub sp,sp,#64 @ alloca + sub $Xfer,sp,#64 adr $K_XX_XX,.LK_00_19 - bic sp,sp,#15 @ align for 128-bit stores + bic $Xfer,$Xfer,#15 @ align for 128-bit stores ldmia $ctx,{$a,$b,$c,$d,$e} @ load context - mov $Xfer,sp + mov sp,$Xfer @ alloca vld1.8 {@X[-4&7]-@X[-3&7]},[$inp]! @ handles unaligned veor $zero,$zero,$zero @@ -560,10 +582,13 @@ $code.=<<___; add $b,$b,$t0 add $c,$c,$t1 add $d,$d,$Xfer + it eq moveq sp,$saved_sp add $e,$e,$Ki + it ne ldrne $Ki,[sp] stmia $ctx,{$a,$b,$c,$d,$e} + itt ne addne $Xfer,sp,#3*16 bne .Loop_neon @@ -584,6 +609,13 @@ my ($W0,$W1,$ABCD_SAVE)=map("q$_",(12..14)); $code.=<<___; #if __ARM_MAX_ARCH__>=7 + +# if defined(__thumb2__) && !defined(__APPLE__) +# define INST(a,b,c,d) .byte c,d|0xf,a,b +# else +# define INST(a,b,c,d) .byte a,b,c,d|0x10 +# endif + .type sha1_block_data_order_armv8,%function .align 5 sha1_block_data_order_armv8: @@ -677,7 +709,10 @@ ___ # since ARMv7 instructions are always encoded little-endian. # correct solution is to use .inst directive, but older # assemblers don't implement it:-( - sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s", + + # this fix-up provides Thumb encoding in conjunction with INST + $word &= ~0x10000000 if (($word & 0x0f000000) == 0x02000000); + sprintf "INST(0x%02x,0x%02x,0x%02x,0x%02x)\t@ %s %s", $word&0xff,($word>>8)&0xff, ($word>>16)&0xff,($word>>24)&0xff, $mnemonic,$arg;