modes/asm/ghash-armv4.pl: improve interoperability with Android NDK.
[openssl.git] / crypto / modes / asm / ghashv8-armx.pl
index 3750d251f047c2e289877bad8519c9faf1dff6aa..cb4537b2217c7e9f5909ddefe46cd1a981d60909 100644 (file)
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
 #
 # ====================================================================
 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -28,6 +35,7 @@
 # Cortex-A53   1.01            8.39
 # Cortex-A57   1.17            7.61
 # Denver       0.71            6.02
+# Mongoose     1.10            8.06
 #
 # (*)  presented for reference/comparison purposes;
 
@@ -144,10 +152,10 @@ gcm_gmult_v8:
 #endif
        vext.8          $IN,$t1,$t1,#8
 
-       vpmull.p64      $Xl,$H,$IN              @ H.lo·Xi.lo
+       vpmull.p64      $Xl,$H,$IN              @ H.lo·Xi.lo
        veor            $t1,$t1,$IN             @ Karatsuba pre-processing
-       vpmull2.p64     $Xh,$H,$IN              @ H.hi·Xi.hi
-       vpmull.p64      $Xm,$Hhl,$t1            @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
+       vpmull2.p64     $Xh,$H,$IN              @ H.hi·Xi.hi
+       vpmull.p64      $Xm,$Hhl,$t1            @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
 
        vext.8          $t1,$Xl,$Xh,#8          @ Karatsuba post-processing
        veor            $t2,$Xl,$Xh
@@ -235,7 +243,7 @@ $code.=<<___;
 #endif
        vext.8          $In,$t1,$t1,#8
        veor            $IN,$IN,$Xl             @ I[i]^=Xi
-       vpmull.p64      $Xln,$H,$In             @ H·Ii+1
+       vpmull.p64      $Xln,$H,$In             @ H·Ii+1
        veor            $t1,$t1,$In             @ Karatsuba pre-processing
        vpmull2.p64     $Xhn,$H,$In
        b               .Loop_mod2x_v8
@@ -244,14 +252,14 @@ $code.=<<___;
 .Loop_mod2x_v8:
        vext.8          $t2,$IN,$IN,#8
        subs            $len,$len,#32           @ is there more data?
-       vpmull.p64      $Xl,$H2,$IN             @ H^2.lo·Xi.lo
+       vpmull.p64      $Xl,$H2,$IN             @ H^2.lo·Xi.lo
        cclr            $inc,lo                 @ is it time to zero $inc?
 
         vpmull.p64     $Xmn,$Hhl,$t1
        veor            $t2,$t2,$IN             @ Karatsuba pre-processing
-       vpmull2.p64     $Xh,$H2,$IN             @ H^2.hi·Xi.hi
+       vpmull2.p64     $Xh,$H2,$IN             @ H^2.hi·Xi.hi
        veor            $Xl,$Xl,$Xln            @ accumulate
-       vpmull2.p64     $Xm,$Hhl,$t2            @ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
+       vpmull2.p64     $Xm,$Hhl,$t2            @ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
         vld1.64        {$t0},[$inp],$inc       @ load [rotated] I[i+2]
 
        veor            $Xh,$Xh,$Xhn
@@ -276,7 +284,7 @@ $code.=<<___;
         vext.8         $In,$t1,$t1,#8
         vext.8         $IN,$t0,$t0,#8
        veor            $Xl,$Xm,$t2
-        vpmull.p64     $Xln,$H,$In             @ H·Ii+1
+        vpmull.p64     $Xln,$H,$In             @ H·Ii+1
        veor            $IN,$IN,$Xh             @ accumulate $IN early
 
        vext.8          $t2,$Xl,$Xl,#8          @ 2nd phase of reduction
@@ -300,10 +308,10 @@ $code.=<<___;
        veor            $IN,$IN,$Xl             @ inp^=Xi
        veor            $t1,$t0,$t2             @ $t1 is rotated inp^Xi
 
-       vpmull.p64      $Xl,$H,$IN              @ H.lo·Xi.lo
+       vpmull.p64      $Xl,$H,$IN              @ H.lo·Xi.lo
        veor            $t1,$t1,$IN             @ Karatsuba pre-processing
-       vpmull2.p64     $Xh,$H,$IN              @ H.hi·Xi.hi
-       vpmull.p64      $Xm,$Hhl,$t1            @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
+       vpmull2.p64     $Xh,$H,$IN              @ H.hi·Xi.hi
+       vpmull.p64      $Xm,$Hhl,$t1            @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
 
        vext.8          $t1,$Xl,$Xh,#8          @ Karatsuba post-processing
        veor            $t2,$Xl,$Xh