bn/bn_add.c: address performance regression.
[openssl.git] / crypto / bn / asm / x86_64-gcc.c
index 7a3b1a12b2b24e5cda1df2e3c5bcfaacbf87993a..d38f3371647732b3242f6fc00de3163a7c623240 100644 (file)
@@ -225,9 +225,10 @@ BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
                   "       adcq    (%5,%2,8),%0    \n"
                   "       movq    %0,(%3,%2,8)    \n"
                   "       lea     1(%2),%2        \n"
-                  "       loop    1b              \n"
-                  "       sbbq    %0,%0           \n":"=&r" (ret), "+c"(n),
-                  "+r"(i)
+                  "       dec     %1              \n"
+                  "       jnz     1b              \n"
+                  "       sbbq    %0,%0           \n"
+                  :"=&r" (ret), "+c"(n), "+r"(i)
                   :"r"(rp), "r"(ap), "r"(bp)
                   :"cc", "memory");
 
@@ -251,9 +252,10 @@ BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
                   "       sbbq    (%5,%2,8),%0    \n"
                   "       movq    %0,(%3,%2,8)    \n"
                   "       lea     1(%2),%2        \n"
-                  "       loop    1b              \n"
-                  "       sbbq    %0,%0           \n":"=&r" (ret), "+c"(n),
-                  "+r"(i)
+                  "       dec     %1              \n"
+                  "       jnz     1b              \n"
+                  "       sbbq    %0,%0           \n"
+                  :"=&r" (ret), "+c"(n), "+r"(i)
                   :"r"(rp), "r"(ap), "r"(bp)
                   :"cc", "memory");