+ for (;;)
+ {
+ cpy_mul_add(r[0], r[1], a[0], w, c);
+ if (--num == 0) break;
+ cpy_mul_add(r[1], r[2], a[1], w, c);
+ if (--num == 0) break;
+ cpy_mul_add(r[2], r[3], a[2], w, c);
+ if (--num == 0) break;
+ cpy_mul_add(r[3], r[4], a[3], w, c);
+ if (--num == 0) break;
+ a += 4;
+ r += 4;
+ }
+
+ return c;
+ }
+#else
+
+#define cpy_mul_add(r, b, a, bl, bh, c) { \
+ BN_ULONG l,h; \
+ \
+ h=(a); \
+ l=LBITS(h); \
+ h=HBITS(h); \
+ mul64(l,h,(bl),(bh)); \
+ \
+ /* non-multiply part */ \
+ l=(l+(c))&BN_MASK2; if (l < (c)) h++; \
+ (c)=(b); \
+ l=(l+(c))&BN_MASK2; if (l < (c)) h++; \
+ (c)=h&BN_MASK2; \
+ (r)=l; \
+ }
+
+static BN_ULONG BN_mul_add_rshift(BN_ULONG *r, BN_ULONG *a, int num, BN_ULONG w)
+/* ret = (ret + a * w) << shift * BN_BITS2 */
+ {
+ BN_ULONG c = 0;
+ BN_ULONG bl, bh;
+
+ bl = LBITS(w);
+ bh = HBITS(w);
+
+ mul_add(r[0], a[0], bl, bh, c);
+ if (--num == 0) return c;
+ a++;
+
+ for (;;)
+ {
+ cpy_mul_add(r[0], r[1], a[0], bl, bh, c);
+ if (--num == 0) break;
+ cpy_mul_add(r[1], r[2], a[1], bl, bh, c);
+ if (--num == 0) break;
+ cpy_mul_add(r[2], r[3], a[2], bl, bh, c);
+ if (--num == 0) break;
+ cpy_mul_add(r[3], r[4], a[3], bl, bh, c);
+ if (--num == 0) break;
+ a += 4;
+ r += 4;
+ }
+ return c;
+ }
+#endif /* BN_LLONG */
+
+
+
+int BN_mont_red(BIGNUM *y, BN_MONTGOMERY *mont)