Fix for CVE-2014-3570 (with minor bn_asm.c revamp).
[openssl.git] / crypto / bn / asm / x86_64-gcc.c
index c6d12f4..7f7e5c2 100644 (file)
@@ -276,77 +276,76 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
 /* sqr_add_c(a,i,c0,c1,c2)  -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
 /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
 
+/*
+ * Keep in mind that carrying into high part of multiplication result
+ * can not overflow, because it cannot be all-ones.
+ */
 #if 0
 /* original macros are kept for reference purposes */
-#define mul_add_c(a,b,c0,c1,c2) {      \
-       BN_ULONG ta=(a),tb=(b);         \
-       t1 = ta * tb;                   \
-       t2 = BN_UMULT_HIGH(ta,tb);      \
-       c0 += t1; t2 += (c0<t1)?1:0;    \
-       c1 += t2; c2 += (c1<t2)?1:0;    \
-       }
-
-#define mul_add_c2(a,b,c0,c1,c2) {     \
-       BN_ULONG ta=(a),tb=(b),t0;      \
-       t1 = BN_UMULT_HIGH(ta,tb);      \
-       t0 = ta * tb;                   \
-       t2 = t1+t1; c2 += (t2<t1)?1:0;  \
-       t1 = t0+t0; t2 += (t1<t0)?1:0;  \
-       c0 += t1; t2 += (c0<t1)?1:0;    \
-       c1 += t2; c2 += (c1<t2)?1:0;    \
-       }
+#define mul_add_c(a,b,c0,c1,c2)                do {    \
+       BN_ULONG ta = (a), tb = (b);            \
+       BN_ULONG lo, hi;                        \
+       BN_UMULT_LOHI(lo,hi,ta,tb);             \
+       c0 += lo; hi += (c0<lo)?1:0;            \
+       c1 += hi; c2 += (c1<hi)?1:0;            \
+       } while(0)
+
+#define mul_add_c2(a,b,c0,c1,c2)       do {    \
+       BN_ULONG ta = (a), tb = (b);            \
+       BN_ULONG lo, hi, tt;                    \
+       BN_UMULT_LOHI(lo,hi,ta,tb);             \
+       c0 += lo; tt = hi+((c0<lo)?1:0);        \
+       c1 += tt; c2 += (c1<tt)?1:0;            \
+       c0 += lo; hi += (c0<lo)?1:0;            \
+       c1 += hi; c2 += (c1<hi)?1:0;            \
+       } while(0)
+
+#define sqr_add_c(a,i,c0,c1,c2)                do {    \
+       BN_ULONG ta = (a)[i];                   \
+       BN_ULONG lo, hi;                        \
+       BN_UMULT_LOHI(lo,hi,ta,ta);             \
+       c0 += lo; hi += (c0<lo)?1:0;            \
+       c1 += hi; c2 += (c1<hi)?1:0;            \
+       } while(0)
 #else
 #define mul_add_c(a,b,c0,c1,c2)        do {    \
+       BN_ULONG t1,t2;                 \
        asm ("mulq %3"                  \
                : "=a"(t1),"=d"(t2)     \
                : "a"(a),"m"(b)         \
                : "cc");                \
-       asm ("addq %2,%0; adcq %3,%1"   \
-               : "+r"(c0),"+d"(t2)     \
-               : "a"(t1),"g"(0)        \
-               : "cc");                \
-       asm ("addq %2,%0; adcq %3,%1"   \
-               : "+r"(c1),"+r"(c2)     \
-               : "d"(t2),"g"(0)        \
-               : "cc");                \
+       asm ("addq %3,%0; adcq %4,%1; adcq %5,%2"       \
+               : "+r"(c0),"+r"(c1),"+r"(c2)            \
+               : "r"(t1),"r"(t2),"g"(0)                \
+               : "cc");                                \
        } while (0)
 
 #define sqr_add_c(a,i,c0,c1,c2)        do {    \
+       BN_ULONG t1,t2;                 \
        asm ("mulq %2"                  \
                : "=a"(t1),"=d"(t2)     \
                : "a"(a[i])             \
                : "cc");                \
-       asm ("addq %2,%0; adcq %3,%1"   \
-               : "+r"(c0),"+d"(t2)     \
-               : "a"(t1),"g"(0)        \
-               : "cc");                \
-       asm ("addq %2,%0; adcq %3,%1"   \
-               : "+r"(c1),"+r"(c2)     \
-               : "d"(t2),"g"(0)        \
-               : "cc");                \
+       asm ("addq %3,%0; adcq %4,%1; adcq %5,%2"       \
+               : "+r"(c0),"+r"(c1),"+r"(c2)            \
+               : "r"(t1),"r"(t2),"g"(0)                \
+               : "cc");                                \
        } while (0)
 
 #define mul_add_c2(a,b,c0,c1,c2) do {  \
+       BN_ULONG t1,t2;                 \
        asm ("mulq %3"                  \
                : "=a"(t1),"=d"(t2)     \
                : "a"(a),"m"(b)         \
                : "cc");                \
-       asm ("addq %0,%0; adcq %2,%1"   \
-               : "+d"(t2),"+r"(c2)     \
-               : "g"(0)                \
-               : "cc");                \
-       asm ("addq %0,%0; adcq %2,%1"   \
-               : "+a"(t1),"+d"(t2)     \
-               : "g"(0)                \
-               : "cc");                \
-       asm ("addq %2,%0; adcq %3,%1"   \
-               : "+r"(c0),"+d"(t2)     \
-               : "a"(t1),"g"(0)        \
-               : "cc");                \
-       asm ("addq %2,%0; adcq %3,%1"   \
-               : "+r"(c1),"+r"(c2)     \
-               : "d"(t2),"g"(0)        \
-               : "cc");                \
+       asm ("addq %3,%0; adcq %4,%1; adcq %5,%2"       \
+               : "+r"(c0),"+r"(c1),"+r"(c2)            \
+               : "r"(t1),"r"(t2),"g"(0)                \
+               : "cc");                                \
+       asm ("addq %3,%0; adcq %4,%1; adcq %5,%2"       \
+               : "+r"(c0),"+r"(c1),"+r"(c2)            \
+               : "r"(t1),"r"(t2),"g"(0)                \
+               : "cc");                                \
        } while (0)
 #endif
 
@@ -355,7 +354,6 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
 
 void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
        {
-       BN_ULONG t1,t2;
        BN_ULONG c1,c2,c3;
 
        c1=0;
@@ -459,7 +457,6 @@ void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
 
 void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
        {
-       BN_ULONG t1,t2;
        BN_ULONG c1,c2,c3;
 
        c1=0;
@@ -499,7 +496,6 @@ void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
 
 void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
        {
-       BN_ULONG t1,t2;
        BN_ULONG c1,c2,c3;
 
        c1=0;
@@ -575,7 +571,6 @@ void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
 
 void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
        {
-       BN_ULONG t1,t2;
        BN_ULONG c1,c2,c3;
 
        c1=0;