Allow ILP32 compilation in AArch64 assembly pack.

[openssl.git] / crypto / sha / asm / sha512-armv8.pl
diff --git a/crypto/sha/asm/sha512-armv8.pl b/crypto/sha/asm/sha512-armv8.pl

index bd7a0a5662e0a236f8f35dcdedd83954c9a7d1f8..7d69f0f4958dad08329eb4bc603986fcd24acf10 100644 (file)
--- a/crypto/sha/asm/sha512-armv8.pl
+++ b/crypto/sha/asm/sha512-armv8.pl
@@ -14,8 +14,10 @@
  #
  #              SHA256-hw       SHA256(*)       SHA512
  # Apple A7     1.97            10.5 (+33%)     6.73 (-1%(**))
  #
  #              SHA256-hw       SHA256(*)       SHA512
  # Apple A7     1.97            10.5 (+33%)     6.73 (-1%(**))
-# Cortex-A53   2.38            15.6 (+110%)    10.1 (+190%(***))
+# Cortex-A53   2.38            15.5 (+115%)    10.0 (+150%(***))
  # Cortex-A57   2.31            11.6 (+86%)     7.51 (+260%(***))
  # Cortex-A57   2.31            11.6 (+86%)     7.51 (+260%(***))
+# Denver       2.01            10.5 (+26%)     6.70 (+8%)
+# X-Gene                       20.0 (+100%)    12.8 (+300%(***))
  # 
  # (*)  Software SHA256 results are of lesser relevance, presented
  #      mostly for informational purposes.
  # 
  # (*)  Software SHA256 results are of lesser relevance, presented
  #      mostly for informational purposes.
@@ -25,11 +27,18 @@
  # (***)        Super-impressive coefficients over gcc-generated code are
  #      indication of some compiler "pathology", most notably code
  #      generated with -mgeneral-regs-only is significanty faster
  # (***)        Super-impressive coefficients over gcc-generated code are
  #      indication of some compiler "pathology", most notably code
  #      generated with -mgeneral-regs-only is significanty faster
-#      and lags behind assembly only by 50-90%.
+#      and the gap is only 40-90%.
  
  $flavour=shift;
  $output=shift;
  
  $flavour=shift;
  $output=shift;
-open STDOUT,">$output";
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+die "can't locate arm-xlate.pl";
+
+open OUT,"| \"$^X\" $xlate $flavour $output";
+*STDOUT=*OUT;
  
  if ($output =~ /512/) {
         $BITS=512;
  
  if ($output =~ /512/) {
         $BITS=512;
@@ -153,13 +162,18 @@ $code.=<<___;
  
  .text
  
  
  .text
  
+.extern        OPENSSL_armcap_P
  .globl $func
  .type  $func,%function
  .align 6
  $func:
  ___
  $code.=<<___   if ($SZ==4);
  .globl $func
  .type  $func,%function
  .align 6
  $func:
  ___
  $code.=<<___   if ($SZ==4);
+#ifdef __ILP32__
+       ldrsw   x16,.LOPENSSL_armcap_P
+#else
         ldr     x16,.LOPENSSL_armcap_P
         ldr     x16,.LOPENSSL_armcap_P
+#endif
         adr     x17,.LOPENSSL_armcap_P
         add     x16,x16,x17
         ldr     w16,[x16]
         adr     x17,.LOPENSSL_armcap_P
         add     x16,x16,x17
         ldr     w16,[x16]
@@ -182,7 +196,7 @@ $code.=<<___;
         ldp     $E,$F,[$ctx,#4*$SZ]
         add     $num,$inp,$num,lsl#`log(16*$SZ)/log(2)` // end of input
         ldp     $G,$H,[$ctx,#6*$SZ]
         ldp     $E,$F,[$ctx,#4*$SZ]
         add     $num,$inp,$num,lsl#`log(16*$SZ)/log(2)` // end of input
         ldp     $G,$H,[$ctx,#6*$SZ]
-       adr     $Ktbl,K$BITS
+       adr     $Ktbl,.LK$BITS
         stp     $ctx,$num,[x29,#96]
  
  .Loop:
         stp     $ctx,$num,[x29,#96]
  
  .Loop:
@@ -232,8 +246,8 @@ $code.=<<___;
  .size  $func,.-$func
  
  .align 6
  .size  $func,.-$func
  
  .align 6
-.type  K$BITS,%object
-K$BITS:
+.type  .LK$BITS,%object
+.LK$BITS:
  ___
  $code.=<<___ if ($SZ==8);
         .quad   0x428a2f98d728ae22,0x7137449123ef65cd
  ___
  $code.=<<___ if ($SZ==8);
         .quad   0x428a2f98d728ae22,0x7137449123ef65cd
@@ -298,10 +312,14 @@ $code.=<<___ if ($SZ==4);
         .long   0       //terminator
  ___
  $code.=<<___;
         .long   0       //terminator
  ___
  $code.=<<___;
-.size  K$BITS,.-K$BITS
+.size  .LK$BITS,.-.LK$BITS
  .align 3
  .LOPENSSL_armcap_P:
  .align 3
  .LOPENSSL_armcap_P:
+#ifdef __ILP32__
+       .long   OPENSSL_armcap_P-.
+#else
         .quad   OPENSSL_armcap_P-.
         .quad   OPENSSL_armcap_P-.
+#endif
  .asciz "SHA$BITS block transform for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
  .align 2
  ___
  .asciz "SHA$BITS block transform for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
  .align 2
  ___
@@ -323,7 +341,7 @@ sha256_block_armv8:
         add             x29,sp,#0
  
         ld1.32          {$ABCD,$EFGH},[$ctx]
         add             x29,sp,#0
  
         ld1.32          {$ABCD,$EFGH},[$ctx]
-       adr             $Ktbl,K256
+       adr             $Ktbl,.LK256
  
  .Loop_hw:
         ld1             {@MSG[0]-@MSG[3]},[$inp],#64
  
  .Loop_hw:
         ld1             {@MSG[0]-@MSG[3]},[$inp],#64