Fix for CVE-2014-3570 (with minor bn_asm.c revamp).
[openssl.git] / crypto / bn / asm / x86_64-gcc.c
index d34d2dce09a745ca0c76fe0ce3b111c9c9b57da6..7f7e5c2f0ae57a3c82c80e566c04df94b25c1a6a 100644 (file)
@@ -2,7 +2,7 @@
 #if !(defined(__GNUC__) && __GNUC__>=2)
 # include "../bn_asm.c"        /* kind of dirty hack for Sun Studio */
 #else
-/*
+/*-
  * x86_64 BIGNUM accelerator version 0.1, December 2002.
  *
  * Implemented by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
@@ -64,7 +64,7 @@
 #undef mul
 #undef mul_add
 
-/*
+/*-
  * "m"(a), "+m"(r)     is the way to favor DirectPath ยต-code;
  * "g"(0)              let the compiler to decide where does it
  *                     want to keep the value of zero;
@@ -189,7 +189,7 @@ BN_ULONG bn_add_words (BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int
 
        if (n <= 0) return 0;
 
-       asm (
+       asm volatile (
        "       subq    %0,%0           \n"     /* clear carry */
        "       jmp     1f              \n"
        ".p2align 4                     \n"
@@ -199,9 +199,9 @@ BN_ULONG bn_add_words (BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int
        "       lea     1(%2),%2        \n"
        "       loop    1b              \n"
        "       sbbq    %0,%0           \n"
-               : "=r"(ret),"+c"(n),"+r"(i)
+               : "=&r"(ret),"+c"(n),"+r"(i)
                : "r"(rp),"r"(ap),"r"(bp)
-               : "cc"
+               : "cc", "memory"
        );
 
   return ret&1;
@@ -214,7 +214,7 @@ BN_ULONG bn_sub_words (BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int
 
        if (n <= 0) return 0;
 
-       asm (
+       asm volatile (
        "       subq    %0,%0           \n"     /* clear borrow */
        "       jmp     1f              \n"
        ".p2align 4                     \n"
@@ -224,9 +224,9 @@ BN_ULONG bn_sub_words (BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int
        "       lea     1(%2),%2        \n"
        "       loop    1b              \n"
        "       sbbq    %0,%0           \n"
-               : "=r"(ret),"+c"(n),"+r"(i)
+               : "=&r"(ret),"+c"(n),"+r"(i)
                : "r"(rp),"r"(ap),"r"(bp)
-               : "cc"
+               : "cc", "memory"
        );
 
   return ret&1;
@@ -276,77 +276,76 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
 /* sqr_add_c(a,i,c0,c1,c2)  -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
 /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
 
+/*
+ * Keep in mind that carrying into high part of multiplication result
+ * can not overflow, because it cannot be all-ones.
+ */
 #if 0
 /* original macros are kept for reference purposes */
-#define mul_add_c(a,b,c0,c1,c2) {      \
-       BN_ULONG ta=(a),tb=(b);         \
-       t1 = ta * tb;                   \
-       t2 = BN_UMULT_HIGH(ta,tb);      \
-       c0 += t1; t2 += (c0<t1)?1:0;    \
-       c1 += t2; c2 += (c1<t2)?1:0;    \
-       }
-
-#define mul_add_c2(a,b,c0,c1,c2) {     \
-       BN_ULONG ta=(a),tb=(b),t0;      \
-       t1 = BN_UMULT_HIGH(ta,tb);      \
-       t0 = ta * tb;                   \
-       t2 = t1+t1; c2 += (t2<t1)?1:0;  \
-       t1 = t0+t0; t2 += (t1<t0)?1:0;  \
-       c0 += t1; t2 += (c0<t1)?1:0;    \
-       c1 += t2; c2 += (c1<t2)?1:0;    \
-       }
+#define mul_add_c(a,b,c0,c1,c2)                do {    \
+       BN_ULONG ta = (a), tb = (b);            \
+       BN_ULONG lo, hi;                        \
+       BN_UMULT_LOHI(lo,hi,ta,tb);             \
+       c0 += lo; hi += (c0<lo)?1:0;            \
+       c1 += hi; c2 += (c1<hi)?1:0;            \
+       } while(0)
+
+#define mul_add_c2(a,b,c0,c1,c2)       do {    \
+       BN_ULONG ta = (a), tb = (b);            \
+       BN_ULONG lo, hi, tt;                    \
+       BN_UMULT_LOHI(lo,hi,ta,tb);             \
+       c0 += lo; tt = hi+((c0<lo)?1:0);        \
+       c1 += tt; c2 += (c1<tt)?1:0;            \
+       c0 += lo; hi += (c0<lo)?1:0;            \
+       c1 += hi; c2 += (c1<hi)?1:0;            \
+       } while(0)
+
+#define sqr_add_c(a,i,c0,c1,c2)                do {    \
+       BN_ULONG ta = (a)[i];                   \
+       BN_ULONG lo, hi;                        \
+       BN_UMULT_LOHI(lo,hi,ta,ta);             \
+       c0 += lo; hi += (c0<lo)?1:0;            \
+       c1 += hi; c2 += (c1<hi)?1:0;            \
+       } while(0)
 #else
 #define mul_add_c(a,b,c0,c1,c2)        do {    \
+       BN_ULONG t1,t2;                 \
        asm ("mulq %3"                  \
                : "=a"(t1),"=d"(t2)     \
                : "a"(a),"m"(b)         \
                : "cc");                \
-       asm ("addq %2,%0; adcq %3,%1"   \
-               : "+r"(c0),"+d"(t2)     \
-               : "a"(t1),"g"(0)        \
-               : "cc");                \
-       asm ("addq %2,%0; adcq %3,%1"   \
-               : "+r"(c1),"+r"(c2)     \
-               : "d"(t2),"g"(0)        \
-               : "cc");                \
+       asm ("addq %3,%0; adcq %4,%1; adcq %5,%2"       \
+               : "+r"(c0),"+r"(c1),"+r"(c2)            \
+               : "r"(t1),"r"(t2),"g"(0)                \
+               : "cc");                                \
        } while (0)
 
 #define sqr_add_c(a,i,c0,c1,c2)        do {    \
+       BN_ULONG t1,t2;                 \
        asm ("mulq %2"                  \
                : "=a"(t1),"=d"(t2)     \
                : "a"(a[i])             \
                : "cc");                \
-       asm ("addq %2,%0; adcq %3,%1"   \
-               : "+r"(c0),"+d"(t2)     \
-               : "a"(t1),"g"(0)        \
-               : "cc");                \
-       asm ("addq %2,%0; adcq %3,%1"   \
-               : "+r"(c1),"+r"(c2)     \
-               : "d"(t2),"g"(0)        \
-               : "cc");                \
+       asm ("addq %3,%0; adcq %4,%1; adcq %5,%2"       \
+               : "+r"(c0),"+r"(c1),"+r"(c2)            \
+               : "r"(t1),"r"(t2),"g"(0)                \
+               : "cc");                                \
        } while (0)
 
 #define mul_add_c2(a,b,c0,c1,c2) do {  \
+       BN_ULONG t1,t2;                 \
        asm ("mulq %3"                  \
                : "=a"(t1),"=d"(t2)     \
                : "a"(a),"m"(b)         \
                : "cc");                \
-       asm ("addq %0,%0; adcq %2,%1"   \
-               : "+d"(t2),"+r"(c2)     \
-               : "g"(0)                \
-               : "cc");                \
-       asm ("addq %0,%0; adcq %2,%1"   \
-               : "+a"(t1),"+d"(t2)     \
-               : "g"(0)                \
-               : "cc");                \
-       asm ("addq %2,%0; adcq %3,%1"   \
-               : "+r"(c0),"+d"(t2)     \
-               : "a"(t1),"g"(0)        \
-               : "cc");                \
-       asm ("addq %2,%0; adcq %3,%1"   \
-               : "+r"(c1),"+r"(c2)     \
-               : "d"(t2),"g"(0)        \
-               : "cc");                \
+       asm ("addq %3,%0; adcq %4,%1; adcq %5,%2"       \
+               : "+r"(c0),"+r"(c1),"+r"(c2)            \
+               : "r"(t1),"r"(t2),"g"(0)                \
+               : "cc");                                \
+       asm ("addq %3,%0; adcq %4,%1; adcq %5,%2"       \
+               : "+r"(c0),"+r"(c1),"+r"(c2)            \
+               : "r"(t1),"r"(t2),"g"(0)                \
+               : "cc");                                \
        } while (0)
 #endif
 
@@ -355,7 +354,6 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
 
 void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
        {
-       BN_ULONG t1,t2;
        BN_ULONG c1,c2,c3;
 
        c1=0;
@@ -459,7 +457,6 @@ void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
 
 void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
        {
-       BN_ULONG t1,t2;
        BN_ULONG c1,c2,c3;
 
        c1=0;
@@ -499,7 +496,6 @@ void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
 
 void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
        {
-       BN_ULONG t1,t2;
        BN_ULONG c1,c2,c3;
 
        c1=0;
@@ -575,7 +571,6 @@ void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
 
 void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
        {
-       BN_ULONG t1,t2;
        BN_ULONG c1,c2,c3;
 
        c1=0;