ppccap.c: engage new multipplication and squaring subroutines.

author Andy Polyakov <appro@openssl.org>

Fri, 9 Sep 2016 15:25:04 +0000 (17:25 +0200)

committer Andy Polyakov <appro@openssl.org>

Mon, 24 Oct 2016 18:00:40 +0000 (20:00 +0200)
author Andy Polyakov <appro@openssl.org>
Fri, 9 Sep 2016 15:25:04 +0000 (17:25 +0200)
committer Andy Polyakov <appro@openssl.org>
Mon, 24 Oct 2016 18:00:40 +0000 (20:00 +0200)
diff --git a/Configurations/00-base-templates.conf b/Configurations/00-base-templates.conf

index 8bb4de75f90ce7099914702c18af8402d2146b3a..ed50910eeddc7b570a2f348b430bc54780b67c11 100644 (file)
--- a/Configurations/00-base-templates.conf
+++ b/Configurations/00-base-templates.conf
@@ -278,7 +278,7 @@
      ppc32_asm => {
         template        => 1,
         cpuid_asm_src   => "ppccpuid.s ppccap.c",
-       bn_asm_src      => "bn-ppc.s ppc-mont.s ppc64-mont.s",
+       bn_asm_src      => "bn-ppc.s ppc-mont.s",
         aes_asm_src     => "aes_core.c aes_cbc.c aes-ppc.s vpaes-ppc.s aesp8-ppc.s",
         sha1_asm_src    => "sha1-ppc.s sha256-ppc.s sha512-ppc.s sha256p8-ppc.s sha512p8-ppc.s",
         modes_asm_src   => "ghashp8-ppc.s",
diff --git a/crypto/bn/asm/ppc-mont.pl b/crypto/bn/asm/ppc-mont.pl

index ce0b061a7d66fa745dcd00b10caea72b74d08aa4..fdc049ae0e3b70a8c24ff0939aa4014878f25d4b 100644 (file)
--- a/crypto/bn/asm/ppc-mont.pl
+++ b/crypto/bn/asm/ppc-mont.pl
@@ -134,10 +134,7 @@ $code=<<___;
  .globl .bn_mul_mont_int
  .align 5
  .bn_mul_mont_int:
-       cmpwi   $num,4
         mr      $rp,r3          ; $rp is reassigned
-       li      r3,0
-       bltlr
  ___
  $code.=<<___ if ($BNSZ==4);
         cmpwi   $num,32         ; longer key performance is not better
diff --git a/crypto/ppccap.c b/crypto/ppccap.c

index 28cfa199e5297092688147510e7bd8f06458819c..b2b898e797a021aaf36088820bd040470cee2871 100644 (file)
--- a/crypto/ppccap.c
+++ b/crypto/ppccap.c
@@ -35,38 +35,24 @@ static sigset_t all_masked;
  int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
                  const BN_ULONG *np, const BN_ULONG *n0, int num)
  {
-    int bn_mul_mont_fpu64(BN_ULONG *rp, const BN_ULONG *ap,
-                          const BN_ULONG *bp, const BN_ULONG *np,
-                          const BN_ULONG *n0, int num);
      int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
                          const BN_ULONG *np, const BN_ULONG *n0, int num);
+    int bn_mul4x_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
+                          const BN_ULONG *np, const BN_ULONG *n0, int num);
  
-    if (sizeof(size_t) == 4) {
-# if 1 || (defined(__APPLE__) && defined(__MACH__))
-        if (num >= 8 && (num & 3) == 0 && (OPENSSL_ppccap_P & PPC_FPU64))
-            return bn_mul_mont_fpu64(rp, ap, bp, np, n0, num);
-# else
-        /*
-         * boundary of 32 was experimentally determined on Linux 2.6.22,
-         * might have to be adjusted on AIX...
-         */
-        if (num >= 32 && (num & 3) == 0 && (OPENSSL_ppccap_P & PPC_FPU64)) {
-            sigset_t oset;
-            int ret;
-
-            sigprocmask(SIG_SETMASK, &all_masked, &oset);
-            ret = bn_mul_mont_fpu64(rp, ap, bp, np, n0, num);
-            sigprocmask(SIG_SETMASK, &oset, NULL);
-
-            return ret;
-        }
-# endif
-    } else if ((OPENSSL_ppccap_P & PPC_FPU64))
-        /*
-         * this is a "must" on POWER6, but run-time detection is not
-         * implemented yet...
-         */
-        return bn_mul_mont_fpu64(rp, ap, bp, np, n0, num);
+    if (num < 4)
+        return 0;
+
+    if ((num & 3) == 0)
+        return bn_mul4x_mont_int(rp, ap, bp, np, n0, num);
+
+    /*
+     * There used to be [optional] call to bn_mul_mont_fpu64 here,
+     * but above subroutine is faster on contemporary processors.
+     * Formulation means that there might be old processors where
+     * FPU code path would be faster, POWER6 perhaps, but there was
+     * no opportunity to figure it out...
+     */
  
      return bn_mul_mont_int(rp, ap, bp, np, n0, num);
  }
author	Andy Polyakov <appro@openssl.org>
	Fri, 9 Sep 2016 15:25:04 +0000 (17:25 +0200)
committer	Andy Polyakov <appro@openssl.org>
	Mon, 24 Oct 2016 18:00:40 +0000 (20:00 +0200)
Configurations/00-base-templates.conf		patch \| blob \| history
crypto/bn/asm/ppc-mont.pl		patch \| blob \| history
crypto/ppccap.c		patch \| blob \| history