sha/asm/sha512-armv4.pl: adapt for use in Linux kernel context.
authorAndy Polyakov <appro@openssl.org>
Sat, 28 Mar 2015 14:27:34 +0000 (15:27 +0100)
committerAndy Polyakov <appro@openssl.org>
Sun, 29 Mar 2015 13:24:20 +0000 (15:24 +0200)
Follow-up to sha256-armv4.pl in cooperation with Ard Biesheuvel
(Linaro) and Sami Tolvanen (Google).

Reviewed-by: Rich Salz <rsalz@openssl.org>
crypto/sha/asm/sha512-armv4.pl

index daab297..a2b11a8 100644 (file)
@@ -5,6 +5,8 @@
 # project. The module is, however, dual licensed under OpenSSL and
 # CRYPTOGAMS licenses depending on where you obtain it. For further
 # details see http://www.openssl.org/~appro/cryptogams/.
+#
+# Permission to use under GPL terms is granted.
 # ====================================================================
 
 # SHA512 block procedure for ARMv4. September 2007.
@@ -136,6 +138,9 @@ $code.=<<___;
        teq     $t0,#$magic
 
        ldr     $t3,[sp,#$Coff+0]       @ c.lo
+#if __ARM_ARCH__>=7
+       it      eq                      @ Thumb2 thing, sanity check in ARM
+#endif
        orreq   $Ktbl,$Ktbl,#1
        @ Sigma0(x)     (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
        @ LO            lo>>28^hi<<4  ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
@@ -173,7 +178,17 @@ $code.=<<___;
 ___
 }
 $code=<<___;
-#include "arm_arch.h"
+#ifndef __KERNEL__
+# include "arm_arch.h"
+# define VFP_ABI_PUSH  vstmdb  sp!,{d8-d15}
+# define VFP_ABI_POP   vldmia  sp!,{d8-d15}
+#else
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_MAX_ARCH__ 7
+# define VFP_ABI_PUSH
+# define VFP_ABI_POP
+#endif
+
 #ifdef __ARMEL__
 # define LO 0
 # define HI 4
@@ -185,7 +200,18 @@ $code=<<___;
 #endif
 
 .text
+#if __ARM_ARCH__<7
 .code  32
+#else
+.syntax unified
+# ifdef __thumb2__
+#  define adrl adr
+.thumb
+# else
+.code   32
+# endif
+#endif
+
 .type  K512,%object
 .align 5
 K512:
@@ -230,7 +256,7 @@ WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c)
 WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
 WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
 .size  K512,.-K512
-#if __ARM_MAX_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
 .LOPENSSL_armcap:
 .word  OPENSSL_armcap_P-sha512_block_data_order
 .skip  32-4
@@ -241,14 +267,18 @@ WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
 .global        sha512_block_data_order
 .type  sha512_block_data_order,%function
 sha512_block_data_order:
+#if __ARM_ARCH__<7
        sub     r3,pc,#8                @ sha512_block_data_order
-       add     $len,$inp,$len,lsl#7    @ len to point at the end of inp
-#if __ARM_MAX_ARCH__>=7
+#else
+       adr     r3,sha512_block_data_order
+#endif
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
        ldr     r12,.LOPENSSL_armcap
        ldr     r12,[r3,r12]            @ OPENSSL_armcap_P
        tst     r12,#1
        bne     .LNEON
 #endif
+       add     $len,$inp,$len,lsl#7    @ len to point at the end of inp
        stmdb   sp!,{r4-r12,lr}
        sub     $Ktbl,r3,#672           @ K512
        sub     sp,sp,#9*8
@@ -362,6 +392,9 @@ $code.=<<___;
 ___
        &BODY_00_15(0x17);
 $code.=<<___;
+#if __ARM_ARCH__>=7
+       ittt    eq                      @ Thumb2 thing, sanity check in ARM
+#endif
        ldreq   $t0,[sp,#`$Xoff+8*(16-1)`+0]
        ldreq   $t1,[sp,#`$Xoff+8*(16-1)`+4]
        beq     .L16_79
@@ -446,6 +479,7 @@ $code.=<<___;
        moveq   pc,lr                   @ be binary compatible with V4, yet
        bx      lr                      @ interoperable with Thumb ISA:-)
 #endif
+.size  sha512_block_data_order,.-sha512_block_data_order
 ___
 
 {
@@ -552,11 +586,15 @@ $code.=<<___;
 .arch  armv7-a
 .fpu   neon
 
+.global        sha512_block_data_order_neon
+.type  sha512_block_data_order_neon,%function
 .align 4
+sha512_block_data_order_neon:
 .LNEON:
        dmb                             @ errata #451034 on early Cortex A8
-       vstmdb  sp!,{d8-d15}            @ ABI specification says so
-       sub     $Ktbl,r3,#672           @ K512
+       add     $len,$inp,$len,lsl#7    @ len to point at the end of inp
+       VFP_ABI_PUSH
+       adrl    $Ktbl,K512
        vldmia  $ctx,{$A-$H}            @ load context
 .Loop_neon:
 ___
@@ -581,16 +619,16 @@ $code.=<<___;
        sub             $Ktbl,#640      @ rewind K512
        bne             .Loop_neon
 
-       vldmia  sp!,{d8-d15}            @ epilogue
+       VFP_ABI_POP
        ret                             @ bx lr
+.size  sha512_block_data_order_neon,.-sha512_block_data_order_neon
 #endif
 ___
 }
 $code.=<<___;
-.size  sha512_block_data_order,.-sha512_block_data_order
 .asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
 .align 2
-#if __ARM_MAX_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
 .comm  OPENSSL_armcap_P,4,4
 #endif
 ___
@@ -598,5 +636,14 @@ ___
 $code =~ s/\`([^\`]*)\`/eval $1/gem;
 $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;   # make it possible to compile with -march=armv4
 $code =~ s/\bret\b/bx  lr/gm;
+
+open SELF,$0;
+while(<SELF>) {
+       next if (/^#!/);
+       last if (!s/^#/@/ and !/^$/);
+       print;
+}
+close SELF;
+
 print $code;
 close STDOUT; # enforce flush