index c6d12f4..7f7e5c2 100644 (file)
@@ -276,77 +276,76 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
/* sqr_add_c(a,i,c0,c1,c2)  -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
/* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */

+/*
+ * Keep in mind that carrying into high part of multiplication result
+ * can not overflow, because it cannot be all-ones.
+ */
#if 0
/* original macros are kept for reference purposes */
-       BN_ULONG ta=(a),tb=(b);         \
-       t1 = ta * tb;                   \
-       t2 = BN_UMULT_HIGH(ta,tb);      \
-       c0 += t1; t2 += (c0<t1)?1:0;    \
-       c1 += t2; c2 += (c1<t2)?1:0;    \
-       }
-
-       BN_ULONG ta=(a),tb=(b),t0;      \
-       t1 = BN_UMULT_HIGH(ta,tb);      \
-       t0 = ta * tb;                   \
-       t2 = t1+t1; c2 += (t2<t1)?1:0;  \
-       t1 = t0+t0; t2 += (t1<t0)?1:0;  \
-       c0 += t1; t2 += (c0<t1)?1:0;    \
-       c1 += t2; c2 += (c1<t2)?1:0;    \
-       }
+       BN_ULONG ta = (a), tb = (b);            \
+       BN_ULONG lo, hi;                        \
+       BN_UMULT_LOHI(lo,hi,ta,tb);             \
+       c0 += lo; hi += (c0<lo)?1:0;            \
+       c1 += hi; c2 += (c1<hi)?1:0;            \
+       } while(0)
+
+       BN_ULONG ta = (a), tb = (b);            \
+       BN_ULONG lo, hi, tt;                    \
+       BN_UMULT_LOHI(lo,hi,ta,tb);             \
+       c0 += lo; tt = hi+((c0<lo)?1:0);        \
+       c1 += tt; c2 += (c1<tt)?1:0;            \
+       c0 += lo; hi += (c0<lo)?1:0;            \
+       c1 += hi; c2 += (c1<hi)?1:0;            \
+       } while(0)
+
+       BN_ULONG ta = (a)[i];                   \
+       BN_ULONG lo, hi;                        \
+       BN_UMULT_LOHI(lo,hi,ta,ta);             \
+       c0 += lo; hi += (c0<lo)?1:0;            \
+       c1 += hi; c2 += (c1<hi)?1:0;            \
+       } while(0)
#else
+       BN_ULONG t1,t2;                 \
asm ("mulq %3"                  \
: "=a"(t1),"=d"(t2)     \
: "a"(a),"m"(b)         \
: "cc");                \
-               : "+r"(c0),"+d"(t2)     \
-               : "a"(t1),"g"(0)        \
-               : "cc");                \
-               : "+r"(c1),"+r"(c2)     \
-               : "d"(t2),"g"(0)        \
-               : "cc");                \
+               : "+r"(c0),"+r"(c1),"+r"(c2)            \
+               : "r"(t1),"r"(t2),"g"(0)                \
+               : "cc");                                \
} while (0)

+       BN_ULONG t1,t2;                 \
asm ("mulq %2"                  \
: "=a"(t1),"=d"(t2)     \
: "a"(a[i])             \
: "cc");                \
-               : "+r"(c0),"+d"(t2)     \
-               : "a"(t1),"g"(0)        \
-               : "cc");                \
-               : "+r"(c1),"+r"(c2)     \
-               : "d"(t2),"g"(0)        \
-               : "cc");                \
+               : "+r"(c0),"+r"(c1),"+r"(c2)            \
+               : "r"(t1),"r"(t2),"g"(0)                \
+               : "cc");                                \
} while (0)

+       BN_ULONG t1,t2;                 \
asm ("mulq %3"                  \
: "=a"(t1),"=d"(t2)     \
: "a"(a),"m"(b)         \
: "cc");                \
-               : "+d"(t2),"+r"(c2)     \
-               : "g"(0)                \
-               : "cc");                \
-               : "+a"(t1),"+d"(t2)     \
-               : "g"(0)                \
-               : "cc");                \
-               : "+r"(c0),"+d"(t2)     \
-               : "a"(t1),"g"(0)        \
-               : "cc");                \
-               : "+r"(c1),"+r"(c2)     \
-               : "d"(t2),"g"(0)        \
-               : "cc");                \
+               : "+r"(c0),"+r"(c1),"+r"(c2)            \
+               : "r"(t1),"r"(t2),"g"(0)                \
+               : "cc");                                \
+               : "+r"(c0),"+r"(c1),"+r"(c2)            \
+               : "r"(t1),"r"(t2),"g"(0)                \
+               : "cc");                                \
} while (0)
#endif

@@ -355,7 +354,6 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)

void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
{
-       BN_ULONG t1,t2;
BN_ULONG c1,c2,c3;

c1=0;
@@ -459,7 +457,6 @@ void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)

void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
{
-       BN_ULONG t1,t2;
BN_ULONG c1,c2,c3;

c1=0;
@@ -499,7 +496,6 @@ void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)

void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
{
-       BN_ULONG t1,t2;
BN_ULONG c1,c2,c3;

c1=0;
@@ -575,7 +571,6 @@ void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)

void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
{
-       BN_ULONG t1,t2;
BN_ULONG c1,c2,c3;

c1=0;