bn/bn_add.c: address performance regression.
authorAndy Polyakov <appro@openssl.org>
Wed, 15 Nov 2017 11:25:02 +0000 (12:25 +0100)
committerAndy Polyakov <appro@openssl.org>
Thu, 16 Nov 2017 12:57:55 +0000 (13:57 +0100)
Performance regression was reported for EC key generation between
1.0.2 and 1.1.x [in GH#2891]. It naturally depends on platform,
values between 6 and 9% were observed.

Reviewed-by: Richard Levitte <levitte@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/4743)

crypto/bn/asm/x86_64-gcc.c
crypto/bn/bn_add.c

index 7a3b1a1..d38f337 100644 (file)
@@ -225,9 +225,10 @@ BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
                   "       adcq    (%5,%2,8),%0    \n"
                   "       movq    %0,(%3,%2,8)    \n"
                   "       lea     1(%2),%2        \n"
-                  "       loop    1b              \n"
-                  "       sbbq    %0,%0           \n":"=&r" (ret), "+c"(n),
-                  "+r"(i)
+                  "       dec     %1              \n"
+                  "       jnz     1b              \n"
+                  "       sbbq    %0,%0           \n"
+                  :"=&r" (ret), "+c"(n), "+r"(i)
                   :"r"(rp), "r"(ap), "r"(bp)
                   :"cc", "memory");
 
@@ -251,9 +252,10 @@ BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
                   "       sbbq    (%5,%2,8),%0    \n"
                   "       movq    %0,(%3,%2,8)    \n"
                   "       lea     1(%2),%2        \n"
-                  "       loop    1b              \n"
-                  "       sbbq    %0,%0           \n":"=&r" (ret), "+c"(n),
-                  "+r"(i)
+                  "       dec     %1              \n"
+                  "       jnz     1b              \n"
+                  "       sbbq    %0,%0           \n"
+                  :"=&r" (ret), "+c"(n), "+r"(i)
                   :"r"(rp), "r"(ap), "r"(bp)
                   :"cc", "memory");
 
index 6479650..7cdefa7 100644 (file)
@@ -141,9 +141,13 @@ int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
         borrow &= (t1 == 0);
     }
 
+    while (max && *--rp == 0)
+        max--;
+
     r->top = max;
     r->neg = 0;
-    bn_correct_top(r);
+    bn_pollute(r);
+
     return 1;
 }