X-Git-Url: https://git.openssl.org/?a=blobdiff_plain;f=crypto%2Fbn%2Fbn_asm.c;h=be8aa3ffc5a4717cf43f81597f4f3a2f5d8a22bf;hb=e102a3dcfdff8f3dbe81286926525318611fc23a;hp=c9eb0e9d0545b6a0fa129a49cd81befc48e969e0;hpb=31b8d8684441e6cd5138832bb1b2ddb10acd6ba6;p=openssl.git diff --git a/crypto/bn/bn_asm.c b/crypto/bn/bn_asm.c index c9eb0e9d05..be8aa3ffc5 100644 --- a/crypto/bn/bn_asm.c +++ b/crypto/bn/bn_asm.c @@ -56,107 +56,95 @@ * [including the GNU Public Licence.] */ +#ifndef BN_DEBUG +# undef NDEBUG /* avoid conflicting definitions */ +# define NDEBUG +#endif + #include +#include #include "cryptlib.h" #include "bn_lcl.h" -#ifdef BN_LLONG +#if defined(BN_LLONG) || defined(BN_UMULT_HIGH) -BN_ULONG bn_mul_add_words(rp,ap,num,w) -BN_ULONG *rp,*ap; -int num; -BN_ULONG w; +BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) { BN_ULONG c1=0; - bn_check_num(num); + assert(num >= 0); if (num <= 0) return(c1); - for (;;) + while (num&~3) { mul_add(rp[0],ap[0],w,c1); - if (--num == 0) break; mul_add(rp[1],ap[1],w,c1); - if (--num == 0) break; mul_add(rp[2],ap[2],w,c1); - if (--num == 0) break; mul_add(rp[3],ap[3],w,c1); - if (--num == 0) break; - ap+=4; - rp+=4; + ap+=4; rp+=4; num-=4; + } + if (num) + { + mul_add(rp[0],ap[0],w,c1); if (--num==0) return c1; + mul_add(rp[1],ap[1],w,c1); if (--num==0) return c1; + mul_add(rp[2],ap[2],w,c1); return c1; } return(c1); } -BN_ULONG bn_mul_words(rp,ap,num,w) -BN_ULONG *rp,*ap; -int num; -BN_ULONG w; +BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) { BN_ULONG c1=0; - bn_check_num(num); + assert(num >= 0); if (num <= 0) return(c1); - for (;;) + while (num&~3) { mul(rp[0],ap[0],w,c1); - if (--num == 0) break; mul(rp[1],ap[1],w,c1); - if (--num == 0) break; mul(rp[2],ap[2],w,c1); - if (--num == 0) break; mul(rp[3],ap[3],w,c1); - if (--num == 0) break; - ap+=4; - rp+=4; + ap+=4; rp+=4; num-=4; + } + if (num) + { + mul(rp[0],ap[0],w,c1); if (--num == 0) return c1; + mul(rp[1],ap[1],w,c1); if (--num == 0) return c1; + mul(rp[2],ap[2],w,c1); } return(c1); } -void bn_sqr_words(r,a,n) -BN_ULONG *r,*a; -int n; +void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) { - bn_check_num(n); + assert(n >= 0); if (n <= 0) return; - for (;;) + while (n&~3) { - BN_ULLONG t; - - t=(BN_ULLONG)(a[0])*(a[0]); - r[0]=Lw(t); r[1]=Hw(t); - if (--n == 0) break; - - t=(BN_ULLONG)(a[1])*(a[1]); - r[2]=Lw(t); r[3]=Hw(t); - if (--n == 0) break; - - t=(BN_ULLONG)(a[2])*(a[2]); - r[4]=Lw(t); r[5]=Hw(t); - if (--n == 0) break; - - t=(BN_ULLONG)(a[3])*(a[3]); - r[6]=Lw(t); r[7]=Hw(t); - if (--n == 0) break; - - a+=4; - r+=8; + sqr(r[0],r[1],a[0]); + sqr(r[2],r[3],a[1]); + sqr(r[4],r[5],a[2]); + sqr(r[6],r[7],a[3]); + a+=4; r+=8; n-=4; + } + if (n) + { + sqr(r[0],r[1],a[0]); if (--n == 0) return; + sqr(r[2],r[3],a[1]); if (--n == 0) return; + sqr(r[4],r[5],a[2]); } } -#else +#else /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */ -BN_ULONG bn_mul_add_words(rp,ap,num,w) -BN_ULONG *rp,*ap; -int num; -BN_ULONG w; +BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) { BN_ULONG c=0; BN_ULONG bl,bh; - bn_check_num(num); + assert(num >= 0); if (num <= 0) return((BN_ULONG)0); bl=LBITS(w); @@ -178,15 +166,12 @@ BN_ULONG w; return(c); } -BN_ULONG bn_mul_words(rp,ap,num,w) -BN_ULONG *rp,*ap; -int num; -BN_ULONG w; +BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) { BN_ULONG carry=0; BN_ULONG bl,bh; - bn_check_num(num); + assert(num >= 0); if (num <= 0) return((BN_ULONG)0); bl=LBITS(w); @@ -208,11 +193,9 @@ BN_ULONG w; return(carry); } -void bn_sqr_words(r,a,n) -BN_ULONG *r,*a; -int n; +void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) { - bn_check_num(n); + assert(n >= 0); if (n <= 0) return; for (;;) { @@ -233,22 +216,20 @@ int n; } } -#endif +#endif /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */ #if defined(BN_LLONG) && defined(BN_DIV2W) -BN_ULONG bn_div_words(h,l,d) -BN_ULONG h,l,d; +BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) { return((BN_ULONG)(((((BN_ULLONG)h)< (BN_ULONG)1< (BN_ULONG)1<= d) h-=d; @@ -281,18 +257,20 @@ BN_ULONG h,l,d; else q=h/dh; + th=q*dh; + tl=dl*q; for (;;) { - t=(h-q*dh); + t=h-th; if ((t&BN_MASK2h) || - ((dl*q) <= ( - (t<>BN_BITS4)))) break; q--; + th-=dh; + tl-=dl; } - th=q*dh; - tl=q*dl; t=(tl>>BN_BITS4); tl=(tl<= 0); if (n <= 0) return((BN_ULONG)0); for (;;) @@ -355,14 +331,12 @@ int n; } return((BN_ULONG)ll); } -#else -BN_ULONG bn_add_words(r,a,b,n) -BN_ULONG *r,*a,*b; -int n; +#else /* !BN_LLONG */ +BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) { BN_ULONG c,l,t; - bn_check_num(n); + assert(n >= 0); if (n <= 0) return((BN_ULONG)0); c=0; @@ -406,16 +380,14 @@ int n; } return((BN_ULONG)c); } -#endif +#endif /* !BN_LLONG */ -BN_ULONG bn_sub_words(r,a,b,n) -BN_ULONG *r,*a,*b; -int n; +BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) { BN_ULONG t1,t2; int c=0; - bn_check_num(n); + assert(n >= 0); if (n <= 0) return((BN_ULONG)0); for (;;) @@ -447,13 +419,18 @@ int n; return(c); } -#ifdef BN_COMBA +#ifdef BN_MUL_COMBA #undef bn_mul_comba8 #undef bn_mul_comba4 #undef bn_sqr_comba8 #undef bn_sqr_comba4 +/* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */ +/* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */ +/* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */ +/* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ + #ifdef BN_LLONG #define mul_add_c(a,b,c0,c1,c2) \ t=(BN_ULLONG)a*b; \ @@ -481,7 +458,39 @@ int n; #define sqr_add_c2(a,i,j,c0,c1,c2) \ mul_add_c2((a)[i],(a)[j],c0,c1,c2) -#else + +#elif defined(BN_UMULT_HIGH) + +#define mul_add_c(a,b,c0,c1,c2) { \ + BN_ULONG ta=(a),tb=(b); \ + t1 = ta * tb; \ + t2 = BN_UMULT_HIGH(ta,tb); \ + c0 += t1; t2 += (c0