X-Git-Url: https://git.openssl.org/gitweb/?p=openssl.git;a=blobdiff_plain;f=crypto%2Fbn%2Fbn_asm.c;h=741cf813dbe9e5905b815d2d55ac1868bb723e91;hp=3329cc18e6ca6989ae70a4084593935688b472a0;hb=682b112abc7489a0e24039b9516457f13bb91309;hpb=707ef52e21e07cd1ea7a370cf568da222bcb07cf;ds=sidebyside diff --git a/crypto/bn/bn_asm.c b/crypto/bn/bn_asm.c index 3329cc18e6..741cf813db 100644 --- a/crypto/bn/bn_asm.c +++ b/crypto/bn/bn_asm.c @@ -68,13 +68,14 @@ #if defined(BN_LLONG) || defined(BN_UMULT_HIGH) -BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) +BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) { BN_ULONG c1=0; assert(num >= 0); if (num <= 0) return(c1); +#ifndef OPENSSL_SMALL_FOOTPRINT while (num&~3) { mul_add(rp[0],ap[0],w,c1); @@ -83,23 +84,24 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) mul_add(rp[3],ap[3],w,c1); ap+=4; rp+=4; num-=4; } - if (num) +#endif + while (num) { - mul_add(rp[0],ap[0],w,c1); if (--num==0) return c1; - mul_add(rp[1],ap[1],w,c1); if (--num==0) return c1; - mul_add(rp[2],ap[2],w,c1); return c1; + mul_add(rp[0],ap[0],w,c1); + ap++; rp++; num--; } return(c1); } -BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) +BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) { BN_ULONG c1=0; assert(num >= 0); if (num <= 0) return(c1); +#ifndef OPENSSL_SMALL_FOOTPRINT while (num&~3) { mul(rp[0],ap[0],w,c1); @@ -108,19 +110,21 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) mul(rp[3],ap[3],w,c1); ap+=4; rp+=4; num-=4; } - if (num) +#endif + while (num) { - mul(rp[0],ap[0],w,c1); if (--num == 0) return c1; - mul(rp[1],ap[1],w,c1); if (--num == 0) return c1; - mul(rp[2],ap[2],w,c1); + mul(rp[0],ap[0],w,c1); + ap++; rp++; num--; } return(c1); } -void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n) +void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) { assert(n >= 0); if (n <= 0) return; + +#ifndef OPENSSL_SMALL_FOOTPRINT while (n&~3) { sqr(r[0],r[1],a[0]); @@ -129,17 +133,17 @@ void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n) sqr(r[6],r[7],a[3]); a+=4; r+=8; n-=4; } - if (n) +#endif + while (n) { - sqr(r[0],r[1],a[0]); if (--n == 0) return; - sqr(r[2],r[3],a[1]); if (--n == 0) return; - sqr(r[4],r[5],a[2]); + sqr(r[0],r[1],a[0]); + a++; r+=2; n--; } } #else /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */ -BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) +BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) { BN_ULONG c=0; BN_ULONG bl,bh; @@ -150,23 +154,25 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) bl=LBITS(w); bh=HBITS(w); - for (;;) +#ifndef OPENSSL_SMALL_FOOTPRINT + while (num&~3) { mul_add(rp[0],ap[0],bl,bh,c); - if (--num == 0) break; mul_add(rp[1],ap[1],bl,bh,c); - if (--num == 0) break; mul_add(rp[2],ap[2],bl,bh,c); - if (--num == 0) break; mul_add(rp[3],ap[3],bl,bh,c); - if (--num == 0) break; - ap+=4; - rp+=4; + ap+=4; rp+=4; num-=4; + } +#endif + while (num) + { + mul_add(rp[0],ap[0],bl,bh,c); + ap++; rp++; num--; } return(c); } -BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) +BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) { BN_ULONG carry=0; BN_ULONG bl,bh; @@ -177,42 +183,43 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) bl=LBITS(w); bh=HBITS(w); - for (;;) +#ifndef OPENSSL_SMALL_FOOTPRINT + while (num&~3) { mul(rp[0],ap[0],bl,bh,carry); - if (--num == 0) break; mul(rp[1],ap[1],bl,bh,carry); - if (--num == 0) break; mul(rp[2],ap[2],bl,bh,carry); - if (--num == 0) break; mul(rp[3],ap[3],bl,bh,carry); - if (--num == 0) break; - ap+=4; - rp+=4; + ap+=4; rp+=4; num-=4; + } +#endif + while (num) + { + mul(rp[0],ap[0],bl,bh,carry); + ap++; rp++; num--; } return(carry); } -void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n) +void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) { assert(n >= 0); if (n <= 0) return; - for (;;) + +#ifndef OPENSSL_SMALL_FOOTPRINT + while (n&~3) { sqr64(r[0],r[1],a[0]); - if (--n == 0) break; - sqr64(r[2],r[3],a[1]); - if (--n == 0) break; - sqr64(r[4],r[5],a[2]); - if (--n == 0) break; - sqr64(r[6],r[7],a[3]); - if (--n == 0) break; - - a+=4; - r+=8; + a+=4; r+=8; n-=4; + } +#endif + while (n) + { + sqr64(r[0],r[1],a[0]); + a++; r+=2; n--; } } @@ -227,7 +234,7 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) #else -/* Divide h-l by d and return the result. */ +/* Divide h,l by d and return the result. */ /* I need to test this some more :-( */ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) { @@ -237,13 +244,8 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) if (d == 0) return(BN_MASK2); i=BN_num_bits_word(d); - if ((i != BN_BITS2) && (h > (BN_ULONG)1<= d) h-=d; @@ -301,43 +303,42 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) #endif /* !defined(BN_LLONG) && defined(BN_DIV2W) */ #ifdef BN_LLONG -BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) +BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) { BN_ULLONG ll=0; assert(n >= 0); if (n <= 0) return((BN_ULONG)0); - for (;;) +#ifndef OPENSSL_SMALL_FOOTPRINT + while (n&~3) { ll+=(BN_ULLONG)a[0]+b[0]; r[0]=(BN_ULONG)ll&BN_MASK2; ll>>=BN_BITS2; - if (--n <= 0) break; - ll+=(BN_ULLONG)a[1]+b[1]; r[1]=(BN_ULONG)ll&BN_MASK2; ll>>=BN_BITS2; - if (--n <= 0) break; - ll+=(BN_ULLONG)a[2]+b[2]; r[2]=(BN_ULONG)ll&BN_MASK2; ll>>=BN_BITS2; - if (--n <= 0) break; - ll+=(BN_ULLONG)a[3]+b[3]; r[3]=(BN_ULONG)ll&BN_MASK2; ll>>=BN_BITS2; - if (--n <= 0) break; - - a+=4; - b+=4; - r+=4; + a+=4; b+=4; r+=4; n-=4; + } +#endif + while (n) + { + ll+=(BN_ULLONG)a[0]+b[0]; + r[0]=(BN_ULONG)ll&BN_MASK2; + ll>>=BN_BITS2; + a++; b++; r++; n--; } return((BN_ULONG)ll); } #else /* !BN_LLONG */ -BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) +BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) { BN_ULONG c,l,t; @@ -345,7 +346,8 @@ BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) if (n <= 0) return((BN_ULONG)0); c=0; - for (;;) +#ifndef OPENSSL_SMALL_FOOTPRINT + while (n&~3) { t=a[0]; t=(t+c)&BN_MASK2; @@ -353,41 +355,42 @@ BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) l=(t+b[0])&BN_MASK2; c+=(l < t); r[0]=l; - if (--n <= 0) break; - t=a[1]; t=(t+c)&BN_MASK2; c=(t < c); l=(t+b[1])&BN_MASK2; c+=(l < t); r[1]=l; - if (--n <= 0) break; - t=a[2]; t=(t+c)&BN_MASK2; c=(t < c); l=(t+b[2])&BN_MASK2; c+=(l < t); r[2]=l; - if (--n <= 0) break; - t=a[3]; t=(t+c)&BN_MASK2; c=(t < c); l=(t+b[3])&BN_MASK2; c+=(l < t); r[3]=l; - if (--n <= 0) break; - - a+=4; - b+=4; - r+=4; + a+=4; b+=4; r+=4; n-=4; + } +#endif + while(n) + { + t=a[0]; + t=(t+c)&BN_MASK2; + c=(t < c); + l=(t+b[0])&BN_MASK2; + c+=(l < t); + r[0]=l; + a++; b++; r++; n--; } return((BN_ULONG)c); } #endif /* !BN_LLONG */ -BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) +BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) { BN_ULONG t1,t2; int c=0; @@ -395,36 +398,35 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) assert(n >= 0); if (n <= 0) return((BN_ULONG)0); - for (;;) +#ifndef OPENSSL_SMALL_FOOTPRINT + while (n&~3) { t1=a[0]; t2=b[0]; r[0]=(t1-t2-c)&BN_MASK2; if (t1 != t2) c=(t1 < t2); - if (--n <= 0) break; - t1=a[1]; t2=b[1]; r[1]=(t1-t2-c)&BN_MASK2; if (t1 != t2) c=(t1 < t2); - if (--n <= 0) break; - t1=a[2]; t2=b[2]; r[2]=(t1-t2-c)&BN_MASK2; if (t1 != t2) c=(t1 < t2); - if (--n <= 0) break; - t1=a[3]; t2=b[3]; r[3]=(t1-t2-c)&BN_MASK2; if (t1 != t2) c=(t1 < t2); - if (--n <= 0) break; - - a+=4; - b+=4; - r+=4; + a+=4; b+=4; r+=4; n-=4; + } +#endif + while (n) + { + t1=a[0]; t2=b[0]; + r[0]=(t1-t2-c)&BN_MASK2; + if (t1 != t2) c=(t1 < t2); + a++; b++; r++; n--; } return(c); } -#ifdef BN_MUL_COMBA +#if defined(BN_MUL_COMBA) && !defined(OPENSSL_SMALL_FOOTPRINT) #undef bn_mul_comba8 #undef bn_mul_comba4 @@ -464,6 +466,34 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) #define sqr_add_c2(a,i,j,c0,c1,c2) \ mul_add_c2((a)[i],(a)[j],c0,c1,c2) +#elif defined(BN_UMULT_LOHI) + +#define mul_add_c(a,b,c0,c1,c2) { \ + BN_ULONG ta=(a),tb=(b); \ + BN_UMULT_LOHI(t1,t2,ta,tb); \ + c0 += t1; t2 += (c0=np[num-1]) + { + c0 = bn_sub_words(rp,tp,np,num); + if (tp[num]!=0 || c0==0) + { + for(i=0;i=np[num-1]) + { + c0 = bn_sub_words(rp,tp,np,num); + if (tp[num]!=0 || c0==0) + { + for(i=0;i