sha/asm/sha512-armv4.pl: adapt for use in Linux kernel context.

[openssl.git] / crypto / sha / asm / sha512-armv4.pl
diff --git a/crypto/sha/asm/sha512-armv4.pl b/crypto/sha/asm/sha512-armv4.pl

index daab297db589206f3c27786c52d7fdf408f25d7b..a2b11a84435776aca76fc4f479a95468a252964d 100644 (file)
--- a/crypto/sha/asm/sha512-armv4.pl
+++ b/crypto/sha/asm/sha512-armv4.pl
@@ -5,6 +5,8 @@
  # project. The module is, however, dual licensed under OpenSSL and
  # CRYPTOGAMS licenses depending on where you obtain it. For further
  # details see http://www.openssl.org/~appro/cryptogams/.
  # project. The module is, however, dual licensed under OpenSSL and
  # CRYPTOGAMS licenses depending on where you obtain it. For further
  # details see http://www.openssl.org/~appro/cryptogams/.
+#
+# Permission to use under GPL terms is granted.
  # ====================================================================
  
  # SHA512 block procedure for ARMv4. September 2007.
  # ====================================================================
  
  # SHA512 block procedure for ARMv4. September 2007.
@@ -136,6 +138,9 @@ $code.=<<___;
         teq     $t0,#$magic
  
         ldr     $t3,[sp,#$Coff+0]       @ c.lo
         teq     $t0,#$magic
  
         ldr     $t3,[sp,#$Coff+0]       @ c.lo
+#if __ARM_ARCH__>=7
+       it      eq                      @ Thumb2 thing, sanity check in ARM
+#endif
         orreq   $Ktbl,$Ktbl,#1
         @ Sigma0(x)     (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
         @ LO            lo>>28^hi<<4  ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
         orreq   $Ktbl,$Ktbl,#1
         @ Sigma0(x)     (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
         @ LO            lo>>28^hi<<4  ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
@@ -173,7 +178,17 @@ $code.=<<___;
  ___
  }
  $code=<<___;
  ___
  }
  $code=<<___;
-#include "arm_arch.h"
+#ifndef __KERNEL__
+# include "arm_arch.h"
+# define VFP_ABI_PUSH  vstmdb  sp!,{d8-d15}
+# define VFP_ABI_POP   vldmia  sp!,{d8-d15}
+#else
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_MAX_ARCH__ 7
+# define VFP_ABI_PUSH
+# define VFP_ABI_POP
+#endif
+
  #ifdef __ARMEL__
  # define LO 0
  # define HI 4
  #ifdef __ARMEL__
  # define LO 0
  # define HI 4
@@ -185,7 +200,18 @@ $code=<<___;
  #endif
  
  .text
  #endif
  
  .text
+#if __ARM_ARCH__<7
  .code  32
  .code  32
+#else
+.syntax unified
+# ifdef __thumb2__
+#  define adrl adr
+.thumb
+# else
+.code   32
+# endif
+#endif
+
  .type  K512,%object
  .align 5
  K512:
  .type  K512,%object
  .align 5
  K512:
@@ -230,7 +256,7 @@ WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c)
  WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
  WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
  .size  K512,.-K512
  WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
  WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
  .size  K512,.-K512
-#if __ARM_MAX_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
  .LOPENSSL_armcap:
  .word  OPENSSL_armcap_P-sha512_block_data_order
  .skip  32-4
  .LOPENSSL_armcap:
  .word  OPENSSL_armcap_P-sha512_block_data_order
  .skip  32-4
@@ -241,14 +267,18 @@ WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
  .global        sha512_block_data_order
  .type  sha512_block_data_order,%function
  sha512_block_data_order:
  .global        sha512_block_data_order
  .type  sha512_block_data_order,%function
  sha512_block_data_order:
+#if __ARM_ARCH__<7
         sub     r3,pc,#8                @ sha512_block_data_order
         sub     r3,pc,#8                @ sha512_block_data_order
-       add     $len,$inp,$len,lsl#7    @ len to point at the end of inp
-#if __ARM_MAX_ARCH__>=7
+#else
+       adr     r3,sha512_block_data_order
+#endif
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
         ldr     r12,.LOPENSSL_armcap
         ldr     r12,[r3,r12]            @ OPENSSL_armcap_P
         tst     r12,#1
         bne     .LNEON
  #endif
         ldr     r12,.LOPENSSL_armcap
         ldr     r12,[r3,r12]            @ OPENSSL_armcap_P
         tst     r12,#1
         bne     .LNEON
  #endif
+       add     $len,$inp,$len,lsl#7    @ len to point at the end of inp
         stmdb   sp!,{r4-r12,lr}
         sub     $Ktbl,r3,#672           @ K512
         sub     sp,sp,#9*8
         stmdb   sp!,{r4-r12,lr}
         sub     $Ktbl,r3,#672           @ K512
         sub     sp,sp,#9*8
@@ -362,6 +392,9 @@ $code.=<<___;
  ___
         &BODY_00_15(0x17);
  $code.=<<___;
  ___
         &BODY_00_15(0x17);
  $code.=<<___;
+#if __ARM_ARCH__>=7
+       ittt    eq                      @ Thumb2 thing, sanity check in ARM
+#endif
         ldreq   $t0,[sp,#`$Xoff+8*(16-1)`+0]
         ldreq   $t1,[sp,#`$Xoff+8*(16-1)`+4]
         beq     .L16_79
         ldreq   $t0,[sp,#`$Xoff+8*(16-1)`+0]
         ldreq   $t1,[sp,#`$Xoff+8*(16-1)`+4]
         beq     .L16_79
@@ -446,6 +479,7 @@ $code.=<<___;
         moveq   pc,lr                   @ be binary compatible with V4, yet
         bx      lr                      @ interoperable with Thumb ISA:-)
  #endif
         moveq   pc,lr                   @ be binary compatible with V4, yet
         bx      lr                      @ interoperable with Thumb ISA:-)
  #endif
+.size  sha512_block_data_order,.-sha512_block_data_order
  ___
  
  {
  ___
  
  {
@@ -552,11 +586,15 @@ $code.=<<___;
  .arch  armv7-a
  .fpu   neon
  
  .arch  armv7-a
  .fpu   neon
  
+.global        sha512_block_data_order_neon
+.type  sha512_block_data_order_neon,%function
  .align 4
  .align 4
+sha512_block_data_order_neon:
  .LNEON:
         dmb                             @ errata #451034 on early Cortex A8
  .LNEON:
         dmb                             @ errata #451034 on early Cortex A8
-       vstmdb  sp!,{d8-d15}            @ ABI specification says so
-       sub     $Ktbl,r3,#672           @ K512
+       add     $len,$inp,$len,lsl#7    @ len to point at the end of inp
+       VFP_ABI_PUSH
+       adrl    $Ktbl,K512
         vldmia  $ctx,{$A-$H}            @ load context
  .Loop_neon:
  ___
         vldmia  $ctx,{$A-$H}            @ load context
  .Loop_neon:
  ___
@@ -581,16 +619,16 @@ $code.=<<___;
         sub             $Ktbl,#640      @ rewind K512
         bne             .Loop_neon
  
         sub             $Ktbl,#640      @ rewind K512
         bne             .Loop_neon
  
-       vldmia  sp!,{d8-d15}            @ epilogue
+       VFP_ABI_POP
         ret                             @ bx lr
         ret                             @ bx lr
+.size  sha512_block_data_order_neon,.-sha512_block_data_order_neon
  #endif
  ___
  }
  $code.=<<___;
  #endif
  ___
  }
  $code.=<<___;
-.size  sha512_block_data_order,.-sha512_block_data_order
  .asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
  .align 2
  .asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
  .align 2
-#if __ARM_MAX_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
  .comm  OPENSSL_armcap_P,4,4
  #endif
  ___
  .comm  OPENSSL_armcap_P,4,4
  #endif
  ___
@@ -598,5 +636,14 @@ ___
  $code =~ s/\`([^\`]*)\`/eval $1/gem;
  $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;   # make it possible to compile with -march=armv4
  $code =~ s/\bret\b/bx  lr/gm;
  $code =~ s/\`([^\`]*)\`/eval $1/gem;
  $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;   # make it possible to compile with -march=armv4
  $code =~ s/\bret\b/bx  lr/gm;
+
+open SELF,$0;
+while(<SELF>) {
+       next if (/^#!/);
+       last if (!s/^#/@/ and !/^$/);
+       print;
+}
+close SELF;
+
  print $code;
  close STDOUT; # enforce flush
  print $code;
  close STDOUT; # enforce flush