sha/asm/sha256-armv4.pl: one of "universal" flags combination didn't compile.
[openssl.git] / crypto / sha / asm / sha256-armv4.pl
index f14c9c3cb5a190b34786b2eeda36106c114da78b..5e56992d8ef6d2be9de7a7df96f168c480e2e887 100644 (file)
@@ -5,6 +5,8 @@
 # project. The module is, however, dual licensed under OpenSSL and
 # CRYPTOGAMS licenses depending on where you obtain it. For further
 # details see http://www.openssl.org/~appro/cryptogams/.
+#
+# Permission to use under GPL terms is granted.
 # ====================================================================
 
 # SHA256 block procedure for ARMv4. May 2007.
 #
 # Add ARMv8 code path performing at 2.0 cpb on Apple A7.
 
-while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
-open STDOUT,">$output";
+$flavour = shift;
+if ($flavour=~/^\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
+else { while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} }
+
+if ($flavour && $flavour ne "void") {
+    $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+    ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+    ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+    die "can't locate arm-xlate.pl";
+
+    open STDOUT,"| \"$^X\" $xlate $flavour $output";
+} else {
+    open STDOUT,">$output";
+}
 
 $ctx="r0";     $t0="r0";
 $inp="r1";     $t4="r1";
@@ -71,7 +85,9 @@ $code.=<<___ if ($i<16);
        eor     $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`
        add     $a,$a,$t2                       @ h+=Maj(a,b,c) from the past
        eor     $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]`  @ Sigma1(e)
+# ifndef __ARMEB__
        rev     $t1,$t1
+# endif
 #else
        @ ldrb  $t1,[$inp,#3]                   @ $i
        add     $a,$a,$t2                       @ h+=Maj(a,b,c) from the past
@@ -151,10 +167,20 @@ ___
 }
 
 $code=<<___;
-#include "arm_arch.h"
+#ifndef __KERNEL__
+# include "arm_arch.h"
+#else
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_MAX_ARCH__ 7
+#endif
 
 .text
-.code  32
+#if defined(__thumb2__)
+.syntax unified
+.thumb
+#else
+.code   32
+#endif
 
 .type  K256,%object
 .align 5
@@ -177,25 +203,33 @@ K256:
 .word  0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
 .size  K256,.-K256
 .word  0                               @ terminator
-#if __ARM_MAX_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
 .LOPENSSL_armcap:
-.word  OPENSSL_armcap_P-sha256_block_data_order
+.word  OPENSSL_armcap_P-.Lsha256_block_data_order
 #endif
 .align 5
 
 .global        sha256_block_data_order
 .type  sha256_block_data_order,%function
 sha256_block_data_order:
+.Lsha256_block_data_order:
+#if __ARM_ARCH__<7 && !defined(__thumb2__)
        sub     r3,pc,#8                @ sha256_block_data_order
-       add     $len,$inp,$len,lsl#6    @ len to point at the end of inp
-#if __ARM_MAX_ARCH__>=7
+#else
+       adr     r3,.Lsha256_block_data_order
+#endif
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
        ldr     r12,.LOPENSSL_armcap
        ldr     r12,[r3,r12]            @ OPENSSL_armcap_P
+#ifdef __APPLE__
+       ldr     r12,[r12]
+#endif
        tst     r12,#ARMV8_SHA256
        bne     .LARMv8
        tst     r12,#ARMV7_NEON
        bne     .LNEON
 #endif
+       add     $len,$inp,$len,lsl#6    @ len to point at the end of inp
        stmdb   sp!,{$ctx,$inp,$len,r4-r11,lr}
        ldmia   $ctx,{$A,$B,$C,$D,$E,$F,$G,$H}
        sub     $Ktbl,r3,#256+32        @ K256
@@ -213,6 +247,9 @@ for($i=0;$i<16;$i++)        { &BODY_00_15($i,@V); unshift(@V,pop(@V)); }
 $code.=".Lrounds_16_xx:\n";
 for (;$i<32;$i++)      { &BODY_16_XX($i,@V); unshift(@V,pop(@V)); }
 $code.=<<___;
+#if __ARM_ARCH__>=7
+       ite     eq                      @ Thumb2 thing, sanity check in ARM
+#endif
        ldreq   $t3,[sp,#16*4]          @ pull ctx
        bne     .Lrounds_16_xx
 
@@ -429,16 +466,20 @@ $code.=<<___;
 .arch  armv7-a
 .fpu   neon
 
+.global        sha256_block_data_order_neon
 .type  sha256_block_data_order_neon,%function
-.align 4
+.align 5
+.skip  16
 sha256_block_data_order_neon:
 .LNEON:
        stmdb   sp!,{r4-r12,lr}
 
+       sub     $H,sp,#16*4+16
+       adr     $Ktbl,K256
+       bic     $H,$H,#15               @ align for 128-bit stores
        mov     $t2,sp
-       sub     sp,sp,#16*4+16          @ alloca
-       sub     $Ktbl,r3,#256+32        @ K256
-       bic     sp,sp,#15               @ align for 128-bit stores
+       mov     sp,$H                   @ alloca
+       add     $len,$inp,$len,lsl#6    @ len to point at the end of inp
 
        vld1.8          {@X[0]},[$inp]!
        vld1.8          {@X[1]},[$inp]!
@@ -490,11 +531,13 @@ $code.=<<___;
        ldr             $t0,[sp,#72]
        sub             $Ktbl,$Ktbl,#256        @ rewind $Ktbl
        teq             $inp,$t0
+       it              eq
        subeq           $inp,$inp,#64           @ avoid SEGV
        vld1.8          {@X[0]},[$inp]!         @ load next input block
        vld1.8          {@X[1]},[$inp]!
        vld1.8          {@X[2]},[$inp]!
        vld1.8          {@X[3]},[$inp]!
+       it              ne
        strne           $inp,[sp,#68]
        mov             $Xfer,sp
 ___
@@ -526,10 +569,12 @@ $code.=<<___;
        str     $D,[$t1],#4
        stmia   $t1,{$E-$H}
 
+       ittte   ne
        movne   $Xfer,sp
        ldrne   $t1,[sp,#0]
        eorne   $t2,$t2,$t2
        ldreq   sp,[sp,#76]                     @ restore original sp
+       itt     ne
        eorne   $t3,$B,$C
        bne     .L_00_48
 
@@ -548,14 +593,24 @@ my ($W0,$W1,$ABCD_SAVE,$EFGH_SAVE)=map("q$_",(12..15));
 my $Ktbl="r3";
 
 $code.=<<___;
-#if __ARM_MAX_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+
+# if defined(__thumb2__)
+#  define INST(a,b,c,d)        .byte   c,d|0xc,a,b
+# else
+#  define INST(a,b,c,d)        .byte   a,b,c,d
+# endif
+
 .type  sha256_block_data_order_armv8,%function
 .align 5
 sha256_block_data_order_armv8:
 .LARMv8:
        vld1.32 {$ABCD,$EFGH},[$ctx]
-       sub     $Ktbl,r3,#sha256_block_data_order-K256
+       sub     $Ktbl,$Ktbl,#256+32
+       add     $len,$inp,$len,lsl#6    @ len to point at the end of inp
+       b       .Loop_v8
 
+.align 4
 .Loop_v8:
        vld1.8          {@MSG[0]-@MSG[1]},[$inp]!
        vld1.8          {@MSG[2]-@MSG[3]},[$inp]!
@@ -607,6 +662,7 @@ $code.=<<___;
 
        vadd.i32        $ABCD,$ABCD,$ABCD_SAVE
        vadd.i32        $EFGH,$EFGH,$EFGH_SAVE
+       it              ne
        bne             .Loop_v8
 
        vst1.32         {$ABCD,$EFGH},[$ctx]
@@ -619,11 +675,19 @@ ___
 $code.=<<___;
 .asciz  "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
 .align 2
-#if __ARM_MAX_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
 .comm   OPENSSL_armcap_P,4,4
 #endif
 ___
 
+open SELF,$0;
+while(<SELF>) {
+       next if (/^#!/);
+       last if (!s/^#/@/ and !/^$/);
+       print;
+}
+close SELF;
+
 {   my  %opcode = (
        "sha256h"       => 0xf3000c40,  "sha256h2"      => 0xf3100c40,
        "sha256su0"     => 0xf3ba03c0,  "sha256su1"     => 0xf3200c40   );
@@ -638,7 +702,7 @@ ___
            # since ARMv7 instructions are always encoded little-endian.
            # correct solution is to use .inst directive, but older
            # assemblers don't implement it:-(
-           sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s",
+           sprintf "INST(0x%02x,0x%02x,0x%02x,0x%02x)\t@ %s %s",
                        $word&0xff,($word>>8)&0xff,
                        ($word>>16)&0xff,($word>>24)&0xff,
                        $mnemonic,$arg;