RT2163: Remove some unneeded #include's
[openssl.git] / crypto / bn / bn_mul.c
1 /* crypto/bn/bn_mul.c */
2 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3  * All rights reserved.
4  *
5  * This package is an SSL implementation written
6  * by Eric Young (eay@cryptsoft.com).
7  * The implementation was written so as to conform with Netscapes SSL.
8  * 
9  * This library is free for commercial and non-commercial use as long as
10  * the following conditions are aheared to.  The following conditions
11  * apply to all code found in this distribution, be it the RC4, RSA,
12  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
13  * included with this distribution is covered by the same copyright terms
14  * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15  * 
16  * Copyright remains Eric Young's, and as such any Copyright notices in
17  * the code are not to be removed.
18  * If this package is used in a product, Eric Young should be given attribution
19  * as the author of the parts of the library used.
20  * This can be in the form of a textual message at program startup or
21  * in documentation (online or textual) provided with the package.
22  * 
23  * Redistribution and use in source and binary forms, with or without
24  * modification, are permitted provided that the following conditions
25  * are met:
26  * 1. Redistributions of source code must retain the copyright
27  *    notice, this list of conditions and the following disclaimer.
28  * 2. Redistributions in binary form must reproduce the above copyright
29  *    notice, this list of conditions and the following disclaimer in the
30  *    documentation and/or other materials provided with the distribution.
31  * 3. All advertising materials mentioning features or use of this software
32  *    must display the following acknowledgement:
33  *    "This product includes cryptographic software written by
34  *     Eric Young (eay@cryptsoft.com)"
35  *    The word 'cryptographic' can be left out if the rouines from the library
36  *    being used are not cryptographic related :-).
37  * 4. If you include any Windows specific code (or a derivative thereof) from 
38  *    the apps directory (application code) you must include an acknowledgement:
39  *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40  * 
41  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51  * SUCH DAMAGE.
52  * 
53  * The licence and distribution terms for any publically available version or
54  * derivative of this code cannot be changed.  i.e. this code cannot simply be
55  * copied and put under another distribution licence
56  * [including the GNU Public Licence.]
57  */
58
59 #ifndef BN_DEBUG
60 # undef NDEBUG /* avoid conflicting definitions */
61 # define NDEBUG
62 #endif
63
64 #include <assert.h>
65 #include "cryptlib.h"
66 #include "bn_lcl.h"
67
68 #if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS)
69 /* Here follows specialised variants of bn_add_words() and
70    bn_sub_words().  They have the property performing operations on
71    arrays of different sizes.  The sizes of those arrays is expressed through
72    cl, which is the common length ( basicall, min(len(a),len(b)) ), and dl,
73    which is the delta between the two lengths, calculated as len(a)-len(b).
74    All lengths are the number of BN_ULONGs...  For the operations that require
75    a result array as parameter, it must have the length cl+abs(dl).
76    These functions should probably end up in bn_asm.c as soon as there are
77    assembler counterparts for the systems that use assembler files.  */
78
79 BN_ULONG bn_sub_part_words(BN_ULONG *r,
80         const BN_ULONG *a, const BN_ULONG *b,
81         int cl, int dl)
82         {
83         BN_ULONG c, t;
84
85         assert(cl >= 0);
86         c = bn_sub_words(r, a, b, cl);
87
88         if (dl == 0)
89                 return c;
90
91         r += cl;
92         a += cl;
93         b += cl;
94
95         if (dl < 0)
96                 {
97                 for (;;)
98                         {
99                         t = b[0];
100                         r[0] = (0-t-c)&BN_MASK2;
101                         if (t != 0) c=1;
102                         if (++dl >= 0) break;
103
104                         t = b[1];
105                         r[1] = (0-t-c)&BN_MASK2;
106                         if (t != 0) c=1;
107                         if (++dl >= 0) break;
108
109                         t = b[2];
110                         r[2] = (0-t-c)&BN_MASK2;
111                         if (t != 0) c=1;
112                         if (++dl >= 0) break;
113
114                         t = b[3];
115                         r[3] = (0-t-c)&BN_MASK2;
116                         if (t != 0) c=1;
117                         if (++dl >= 0) break;
118
119                         b += 4;
120                         r += 4;
121                         }
122                 }
123         else
124                 {
125                 int save_dl = dl;
126                 while(c)
127                         {
128                         t = a[0];
129                         r[0] = (t-c)&BN_MASK2;
130                         if (t != 0) c=0;
131                         if (--dl <= 0) break;
132
133                         t = a[1];
134                         r[1] = (t-c)&BN_MASK2;
135                         if (t != 0) c=0;
136                         if (--dl <= 0) break;
137
138                         t = a[2];
139                         r[2] = (t-c)&BN_MASK2;
140                         if (t != 0) c=0;
141                         if (--dl <= 0) break;
142
143                         t = a[3];
144                         r[3] = (t-c)&BN_MASK2;
145                         if (t != 0) c=0;
146                         if (--dl <= 0) break;
147
148                         save_dl = dl;
149                         a += 4;
150                         r += 4;
151                         }
152                 if (dl > 0)
153                         {
154                         if (save_dl > dl)
155                                 {
156                                 switch (save_dl - dl)
157                                         {
158                                 case 1:
159                                         r[1] = a[1];
160                                         if (--dl <= 0) break;
161                                 case 2:
162                                         r[2] = a[2];
163                                         if (--dl <= 0) break;
164                                 case 3:
165                                         r[3] = a[3];
166                                         if (--dl <= 0) break;
167                                         }
168                                 a += 4;
169                                 r += 4;
170                                 }
171                         }
172                 if (dl > 0)
173                         {
174                         for(;;)
175                                 {
176                                 r[0] = a[0];
177                                 if (--dl <= 0) break;
178                                 r[1] = a[1];
179                                 if (--dl <= 0) break;
180                                 r[2] = a[2];
181                                 if (--dl <= 0) break;
182                                 r[3] = a[3];
183                                 if (--dl <= 0) break;
184
185                                 a += 4;
186                                 r += 4;
187                                 }
188                         }
189                 }
190         return c;
191         }
192 #endif
193
194 BN_ULONG bn_add_part_words(BN_ULONG *r,
195         const BN_ULONG *a, const BN_ULONG *b,
196         int cl, int dl)
197         {
198         BN_ULONG c, l, t;
199
200         assert(cl >= 0);
201         c = bn_add_words(r, a, b, cl);
202
203         if (dl == 0)
204                 return c;
205
206         r += cl;
207         a += cl;
208         b += cl;
209
210         if (dl < 0)
211                 {
212                 int save_dl = dl;
213                 while (c)
214                         {
215                         l=(c+b[0])&BN_MASK2;
216                         c=(l < c);
217                         r[0]=l;
218                         if (++dl >= 0) break;
219
220                         l=(c+b[1])&BN_MASK2;
221                         c=(l < c);
222                         r[1]=l;
223                         if (++dl >= 0) break;
224
225                         l=(c+b[2])&BN_MASK2;
226                         c=(l < c);
227                         r[2]=l;
228                         if (++dl >= 0) break;
229
230                         l=(c+b[3])&BN_MASK2;
231                         c=(l < c);
232                         r[3]=l;
233                         if (++dl >= 0) break;
234
235                         save_dl = dl;
236                         b+=4;
237                         r+=4;
238                         }
239                 if (dl < 0)
240                         {
241                         if (save_dl < dl)
242                                 {
243                                 switch (dl - save_dl)
244                                         {
245                                 case 1:
246                                         r[1] = b[1];
247                                         if (++dl >= 0) break;
248                                 case 2:
249                                         r[2] = b[2];
250                                         if (++dl >= 0) break;
251                                 case 3:
252                                         r[3] = b[3];
253                                         if (++dl >= 0) break;
254                                         }
255                                 b += 4;
256                                 r += 4;
257                                 }
258                         }
259                 if (dl < 0)
260                         {
261                         for(;;)
262                                 {
263                                 r[0] = b[0];
264                                 if (++dl >= 0) break;
265                                 r[1] = b[1];
266                                 if (++dl >= 0) break;
267                                 r[2] = b[2];
268                                 if (++dl >= 0) break;
269                                 r[3] = b[3];
270                                 if (++dl >= 0) break;
271
272                                 b += 4;
273                                 r += 4;
274                                 }
275                         }
276                 }
277         else
278                 {
279                 int save_dl = dl;
280                 while (c)
281                         {
282                         t=(a[0]+c)&BN_MASK2;
283                         c=(t < c);
284                         r[0]=t;
285                         if (--dl <= 0) break;
286
287                         t=(a[1]+c)&BN_MASK2;
288                         c=(t < c);
289                         r[1]=t;
290                         if (--dl <= 0) break;
291
292                         t=(a[2]+c)&BN_MASK2;
293                         c=(t < c);
294                         r[2]=t;
295                         if (--dl <= 0) break;
296
297                         t=(a[3]+c)&BN_MASK2;
298                         c=(t < c);
299                         r[3]=t;
300                         if (--dl <= 0) break;
301
302                         save_dl = dl;
303                         a+=4;
304                         r+=4;
305                         }
306                 if (dl > 0)
307                         {
308                         if (save_dl > dl)
309                                 {
310                                 switch (save_dl - dl)
311                                         {
312                                 case 1:
313                                         r[1] = a[1];
314                                         if (--dl <= 0) break;
315                                 case 2:
316                                         r[2] = a[2];
317                                         if (--dl <= 0) break;
318                                 case 3:
319                                         r[3] = a[3];
320                                         if (--dl <= 0) break;
321                                         }
322                                 a += 4;
323                                 r += 4;
324                                 }
325                         }
326                 if (dl > 0)
327                         {
328                         for(;;)
329                                 {
330                                 r[0] = a[0];
331                                 if (--dl <= 0) break;
332                                 r[1] = a[1];
333                                 if (--dl <= 0) break;
334                                 r[2] = a[2];
335                                 if (--dl <= 0) break;
336                                 r[3] = a[3];
337                                 if (--dl <= 0) break;
338
339                                 a += 4;
340                                 r += 4;
341                                 }
342                         }
343                 }
344         return c;
345         }
346
347 #ifdef BN_RECURSION
348 /* Karatsuba recursive multiplication algorithm
349  * (cf. Knuth, The Art of Computer Programming, Vol. 2) */
350
351 /* r is 2*n2 words in size,
352  * a and b are both n2 words in size.
353  * n2 must be a power of 2.
354  * We multiply and return the result.
355  * t must be 2*n2 words in size
356  * We calculate
357  * a[0]*b[0]
358  * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
359  * a[1]*b[1]
360  */
361 /* dnX may not be positive, but n2/2+dnX has to be */
362 void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
363         int dna, int dnb, BN_ULONG *t)
364         {
365         int n=n2/2,c1,c2;
366         int tna=n+dna, tnb=n+dnb;
367         unsigned int neg,zero;
368         BN_ULONG ln,lo,*p;
369
370 # ifdef BN_MUL_COMBA
371 #  if 0
372         if (n2 == 4)
373                 {
374                 bn_mul_comba4(r,a,b);
375                 return;
376                 }
377 #  endif
378         /* Only call bn_mul_comba 8 if n2 == 8 and the
379          * two arrays are complete [steve]
380          */
381         if (n2 == 8 && dna == 0 && dnb == 0)
382                 {
383                 bn_mul_comba8(r,a,b);
384                 return; 
385                 }
386 # endif /* BN_MUL_COMBA */
387         /* Else do normal multiply */
388         if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL)
389                 {
390                 bn_mul_normal(r,a,n2+dna,b,n2+dnb);
391                 if ((dna + dnb) < 0)
392                         memset(&r[2*n2 + dna + dnb], 0,
393                                 sizeof(BN_ULONG) * -(dna + dnb));
394                 return;
395                 }
396         /* r=(a[0]-a[1])*(b[1]-b[0]) */
397         c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna);
398         c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n);
399         zero=neg=0;
400         switch (c1*3+c2)
401                 {
402         case -4:
403                 bn_sub_part_words(t,      &(a[n]),a,      tna,tna-n); /* - */
404                 bn_sub_part_words(&(t[n]),b,      &(b[n]),tnb,n-tnb); /* - */
405                 break;
406         case -3:
407                 zero=1;
408                 break;
409         case -2:
410                 bn_sub_part_words(t,      &(a[n]),a,      tna,tna-n); /* - */
411                 bn_sub_part_words(&(t[n]),&(b[n]),b,      tnb,tnb-n); /* + */
412                 neg=1;
413                 break;
414         case -1:
415         case 0:
416         case 1:
417                 zero=1;
418                 break;
419         case 2:
420                 bn_sub_part_words(t,      a,      &(a[n]),tna,n-tna); /* + */
421                 bn_sub_part_words(&(t[n]),b,      &(b[n]),tnb,n-tnb); /* - */
422                 neg=1;
423                 break;
424         case 3:
425                 zero=1;
426                 break;
427         case 4:
428                 bn_sub_part_words(t,      a,      &(a[n]),tna,n-tna);
429                 bn_sub_part_words(&(t[n]),&(b[n]),b,      tnb,tnb-n);
430                 break;
431                 }
432
433 # ifdef BN_MUL_COMBA
434         if (n == 4 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba4 could take
435                                                extra args to do this well */
436                 {
437                 if (!zero)
438                         bn_mul_comba4(&(t[n2]),t,&(t[n]));
439                 else
440                         memset(&(t[n2]),0,8*sizeof(BN_ULONG));
441                 
442                 bn_mul_comba4(r,a,b);
443                 bn_mul_comba4(&(r[n2]),&(a[n]),&(b[n]));
444                 }
445         else if (n == 8 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba8 could
446                                                     take extra args to do this
447                                                     well */
448                 {
449                 if (!zero)
450                         bn_mul_comba8(&(t[n2]),t,&(t[n]));
451                 else
452                         memset(&(t[n2]),0,16*sizeof(BN_ULONG));
453                 
454                 bn_mul_comba8(r,a,b);
455                 bn_mul_comba8(&(r[n2]),&(a[n]),&(b[n]));
456                 }
457         else
458 # endif /* BN_MUL_COMBA */
459                 {
460                 p= &(t[n2*2]);
461                 if (!zero)
462                         bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p);
463                 else
464                         memset(&(t[n2]),0,n2*sizeof(BN_ULONG));
465                 bn_mul_recursive(r,a,b,n,0,0,p);
466                 bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),n,dna,dnb,p);
467                 }
468
469         /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
470          * r[10] holds (a[0]*b[0])
471          * r[32] holds (b[1]*b[1])
472          */
473
474         c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
475
476         if (neg) /* if t[32] is negative */
477                 {
478                 c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
479                 }
480         else
481                 {
482                 /* Might have a carry */
483                 c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),t,n2));
484                 }
485
486         /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
487          * r[10] holds (a[0]*b[0])
488          * r[32] holds (b[1]*b[1])
489          * c1 holds the carry bits
490          */
491         c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
492         if (c1)
493                 {
494                 p= &(r[n+n2]);
495                 lo= *p;
496                 ln=(lo+c1)&BN_MASK2;
497                 *p=ln;
498
499                 /* The overflow will stop before we over write
500                  * words we should not overwrite */
501                 if (ln < (BN_ULONG)c1)
502                         {
503                         do      {
504                                 p++;
505                                 lo= *p;
506                                 ln=(lo+1)&BN_MASK2;
507                                 *p=ln;
508                                 } while (ln == 0);
509                         }
510                 }
511         }
512
513 /* n+tn is the word length
514  * t needs to be n*4 is size, as does r */
515 /* tnX may not be negative but less than n */
516 void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n,
517              int tna, int tnb, BN_ULONG *t)
518         {
519         int i,j,n2=n*2;
520         int c1,c2,neg;
521         BN_ULONG ln,lo,*p;
522
523         if (n < 8)
524                 {
525                 bn_mul_normal(r,a,n+tna,b,n+tnb);
526                 return;
527                 }
528
529         /* r=(a[0]-a[1])*(b[1]-b[0]) */
530         c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna);
531         c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n);
532         neg=0;
533         switch (c1*3+c2)
534                 {
535         case -4:
536                 bn_sub_part_words(t,      &(a[n]),a,      tna,tna-n); /* - */
537                 bn_sub_part_words(&(t[n]),b,      &(b[n]),tnb,n-tnb); /* - */
538                 break;
539         case -3:
540                 /* break; */
541         case -2:
542                 bn_sub_part_words(t,      &(a[n]),a,      tna,tna-n); /* - */
543                 bn_sub_part_words(&(t[n]),&(b[n]),b,      tnb,tnb-n); /* + */
544                 neg=1;
545                 break;
546         case -1:
547         case 0:
548         case 1:
549                 /* break; */
550         case 2:
551                 bn_sub_part_words(t,      a,      &(a[n]),tna,n-tna); /* + */
552                 bn_sub_part_words(&(t[n]),b,      &(b[n]),tnb,n-tnb); /* - */
553                 neg=1;
554                 break;
555         case 3:
556                 /* break; */
557         case 4:
558                 bn_sub_part_words(t,      a,      &(a[n]),tna,n-tna);
559                 bn_sub_part_words(&(t[n]),&(b[n]),b,      tnb,tnb-n);
560                 break;
561                 }
562                 /* The zero case isn't yet implemented here. The speedup
563                    would probably be negligible. */
564 # if 0
565         if (n == 4)
566                 {
567                 bn_mul_comba4(&(t[n2]),t,&(t[n]));
568                 bn_mul_comba4(r,a,b);
569                 bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn);
570                 memset(&(r[n2+tn*2]),0,sizeof(BN_ULONG)*(n2-tn*2));
571                 }
572         else
573 # endif
574         if (n == 8)
575                 {
576                 bn_mul_comba8(&(t[n2]),t,&(t[n]));
577                 bn_mul_comba8(r,a,b);
578                 bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb);
579                 memset(&(r[n2+tna+tnb]),0,sizeof(BN_ULONG)*(n2-tna-tnb));
580                 }
581         else
582                 {
583                 p= &(t[n2*2]);
584                 bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p);
585                 bn_mul_recursive(r,a,b,n,0,0,p);
586                 i=n/2;
587                 /* If there is only a bottom half to the number,
588                  * just do it */
589                 if (tna > tnb)
590                         j = tna - i;
591                 else
592                         j = tnb - i;
593                 if (j == 0)
594                         {
595                         bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),
596                                 i,tna-i,tnb-i,p);
597                         memset(&(r[n2+i*2]),0,sizeof(BN_ULONG)*(n2-i*2));
598                         }
599                 else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */
600                                 {
601                                 bn_mul_part_recursive(&(r[n2]),&(a[n]),&(b[n]),
602                                         i,tna-i,tnb-i,p);
603                                 memset(&(r[n2+tna+tnb]),0,
604                                         sizeof(BN_ULONG)*(n2-tna-tnb));
605                                 }
606                 else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
607                         {
608                         memset(&(r[n2]),0,sizeof(BN_ULONG)*n2);
609                         if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL
610                                 && tnb < BN_MUL_RECURSIVE_SIZE_NORMAL)
611                                 {
612                                 bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb);
613                                 }
614                         else
615                                 {
616                                 for (;;)
617                                         {
618                                         i/=2;
619                                         /* these simplified conditions work
620                                          * exclusively because difference
621                                          * between tna and tnb is 1 or 0 */
622                                         if (i < tna || i < tnb)
623                                                 {
624                                                 bn_mul_part_recursive(&(r[n2]),
625                                                         &(a[n]),&(b[n]),
626                                                         i,tna-i,tnb-i,p);
627                                                 break;
628                                                 }
629                                         else if (i == tna || i == tnb)
630                                                 {
631                                                 bn_mul_recursive(&(r[n2]),
632                                                         &(a[n]),&(b[n]),
633                                                         i,tna-i,tnb-i,p);
634                                                 break;
635                                                 }
636                                         }
637                                 }
638                         }
639                 }
640
641         /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
642          * r[10] holds (a[0]*b[0])
643          * r[32] holds (b[1]*b[1])
644          */
645
646         c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
647
648         if (neg) /* if t[32] is negative */
649                 {
650                 c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
651                 }
652         else
653                 {
654                 /* Might have a carry */
655                 c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),t,n2));
656                 }
657
658         /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
659          * r[10] holds (a[0]*b[0])
660          * r[32] holds (b[1]*b[1])
661          * c1 holds the carry bits
662          */
663         c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
664         if (c1)
665                 {
666                 p= &(r[n+n2]);
667                 lo= *p;
668                 ln=(lo+c1)&BN_MASK2;
669                 *p=ln;
670
671                 /* The overflow will stop before we over write
672                  * words we should not overwrite */
673                 if (ln < (BN_ULONG)c1)
674                         {
675                         do      {
676                                 p++;
677                                 lo= *p;
678                                 ln=(lo+1)&BN_MASK2;
679                                 *p=ln;
680                                 } while (ln == 0);
681                         }
682                 }
683         }
684
685 /* a and b must be the same size, which is n2.
686  * r needs to be n2 words and t needs to be n2*2
687  */
688 void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
689              BN_ULONG *t)
690         {
691         int n=n2/2;
692
693         bn_mul_recursive(r,a,b,n,0,0,&(t[0]));
694         if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL)
695                 {
696                 bn_mul_low_recursive(&(t[0]),&(a[0]),&(b[n]),n,&(t[n2]));
697                 bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
698                 bn_mul_low_recursive(&(t[0]),&(a[n]),&(b[0]),n,&(t[n2]));
699                 bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
700                 }
701         else
702                 {
703                 bn_mul_low_normal(&(t[0]),&(a[0]),&(b[n]),n);
704                 bn_mul_low_normal(&(t[n]),&(a[n]),&(b[0]),n);
705                 bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
706                 bn_add_words(&(r[n]),&(r[n]),&(t[n]),n);
707                 }
708         }
709
710 /* a and b must be the same size, which is n2.
711  * r needs to be n2 words and t needs to be n2*2
712  * l is the low words of the output.
713  * t needs to be n2*3
714  */
715 void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2,
716              BN_ULONG *t)
717         {
718         int i,n;
719         int c1,c2;
720         int neg,oneg,zero;
721         BN_ULONG ll,lc,*lp,*mp;
722
723         n=n2/2;
724
725         /* Calculate (al-ah)*(bh-bl) */
726         neg=zero=0;
727         c1=bn_cmp_words(&(a[0]),&(a[n]),n);
728         c2=bn_cmp_words(&(b[n]),&(b[0]),n);
729         switch (c1*3+c2)
730                 {
731         case -4:
732                 bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n);
733                 bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n);
734                 break;
735         case -3:
736                 zero=1;
737                 break;
738         case -2:
739                 bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n);
740                 bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n);
741                 neg=1;
742                 break;
743         case -1:
744         case 0:
745         case 1:
746                 zero=1;
747                 break;
748         case 2:
749                 bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n);
750                 bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n);
751                 neg=1;
752                 break;
753         case 3:
754                 zero=1;
755                 break;
756         case 4:
757                 bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n);
758                 bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n);
759                 break;
760                 }
761                 
762         oneg=neg;
763         /* t[10] = (a[0]-a[1])*(b[1]-b[0]) */
764         /* r[10] = (a[1]*b[1]) */
765 # ifdef BN_MUL_COMBA
766         if (n == 8)
767                 {
768                 bn_mul_comba8(&(t[0]),&(r[0]),&(r[n]));
769                 bn_mul_comba8(r,&(a[n]),&(b[n]));
770                 }
771         else
772 # endif
773                 {
774                 bn_mul_recursive(&(t[0]),&(r[0]),&(r[n]),n,0,0,&(t[n2]));
775                 bn_mul_recursive(r,&(a[n]),&(b[n]),n,0,0,&(t[n2]));
776                 }
777
778         /* s0 == low(al*bl)
779          * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl)
780          * We know s0 and s1 so the only unknown is high(al*bl)
781          * high(al*bl) == s1 - low(ah*bh+s0+(al-ah)*(bh-bl))
782          * high(al*bl) == s1 - (r[0]+l[0]+t[0])
783          */
784         if (l != NULL)
785                 {
786                 lp= &(t[n2+n]);
787                 c1=(int)(bn_add_words(lp,&(r[0]),&(l[0]),n));
788                 }
789         else
790                 {
791                 c1=0;
792                 lp= &(r[0]);
793                 }
794
795         if (neg)
796                 neg=(int)(bn_sub_words(&(t[n2]),lp,&(t[0]),n));
797         else
798                 {
799                 bn_add_words(&(t[n2]),lp,&(t[0]),n);
800                 neg=0;
801                 }
802
803         if (l != NULL)
804                 {
805                 bn_sub_words(&(t[n2+n]),&(l[n]),&(t[n2]),n);
806                 }
807         else
808                 {
809                 lp= &(t[n2+n]);
810                 mp= &(t[n2]);
811                 for (i=0; i<n; i++)
812                         lp[i]=((~mp[i])+1)&BN_MASK2;
813                 }
814
815         /* s[0] = low(al*bl)
816          * t[3] = high(al*bl)
817          * t[10] = (a[0]-a[1])*(b[1]-b[0]) neg is the sign
818          * r[10] = (a[1]*b[1])
819          */
820         /* R[10] = al*bl
821          * R[21] = al*bl + ah*bh + (a[0]-a[1])*(b[1]-b[0])
822          * R[32] = ah*bh
823          */
824         /* R[1]=t[3]+l[0]+r[0](+-)t[0] (have carry/borrow)
825          * R[2]=r[0]+t[3]+r[1](+-)t[1] (have carry/borrow)
826          * R[3]=r[1]+(carry/borrow)
827          */
828         if (l != NULL)
829                 {
830                 lp= &(t[n2]);
831                 c1= (int)(bn_add_words(lp,&(t[n2+n]),&(l[0]),n));
832                 }
833         else
834                 {
835                 lp= &(t[n2+n]);
836                 c1=0;
837                 }
838         c1+=(int)(bn_add_words(&(t[n2]),lp,  &(r[0]),n));
839         if (oneg)
840                 c1-=(int)(bn_sub_words(&(t[n2]),&(t[n2]),&(t[0]),n));
841         else
842                 c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),&(t[0]),n));
843
844         c2 =(int)(bn_add_words(&(r[0]),&(r[0]),&(t[n2+n]),n));
845         c2+=(int)(bn_add_words(&(r[0]),&(r[0]),&(r[n]),n));
846         if (oneg)
847                 c2-=(int)(bn_sub_words(&(r[0]),&(r[0]),&(t[n]),n));
848         else
849                 c2+=(int)(bn_add_words(&(r[0]),&(r[0]),&(t[n]),n));
850         
851         if (c1 != 0) /* Add starting at r[0], could be +ve or -ve */
852                 {
853                 i=0;
854                 if (c1 > 0)
855                         {
856                         lc=c1;
857                         do      {
858                                 ll=(r[i]+lc)&BN_MASK2;
859                                 r[i++]=ll;
860                                 lc=(lc > ll);
861                                 } while (lc);
862                         }
863                 else
864                         {
865                         lc= -c1;
866                         do      {
867                                 ll=r[i];
868                                 r[i++]=(ll-lc)&BN_MASK2;
869                                 lc=(lc > ll);
870                                 } while (lc);
871                         }
872                 }
873         if (c2 != 0) /* Add starting at r[1] */
874                 {
875                 i=n;
876                 if (c2 > 0)
877                         {
878                         lc=c2;
879                         do      {
880                                 ll=(r[i]+lc)&BN_MASK2;
881                                 r[i++]=ll;
882                                 lc=(lc > ll);
883                                 } while (lc);
884                         }
885                 else
886                         {
887                         lc= -c2;
888                         do      {
889                                 ll=r[i];
890                                 r[i++]=(ll-lc)&BN_MASK2;
891                                 lc=(lc > ll);
892                                 } while (lc);
893                         }
894                 }
895         }
896 #endif /* BN_RECURSION */
897
898 int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
899         {
900         int ret=0;
901         int top,al,bl;
902         BIGNUM *rr;
903 #if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
904         int i;
905 #endif
906 #ifdef BN_RECURSION
907         BIGNUM *t=NULL;
908         int j=0,k;
909 #endif
910
911         bn_check_top(a);
912         bn_check_top(b);
913         bn_check_top(r);
914
915         al=a->top;
916         bl=b->top;
917
918         if ((al == 0) || (bl == 0))
919                 {
920                 BN_zero(r);
921                 return(1);
922                 }
923         top=al+bl;
924
925         BN_CTX_start(ctx);
926         if ((r == a) || (r == b))
927                 {
928                 if ((rr = BN_CTX_get(ctx)) == NULL) goto err;
929                 }
930         else
931                 rr = r;
932         rr->neg=a->neg^b->neg;
933
934 #if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
935         i = al-bl;
936 #endif
937 #ifdef BN_MUL_COMBA
938         if (i == 0)
939                 {
940 # if 0
941                 if (al == 4)
942                         {
943                         if (bn_wexpand(rr,8) == NULL) goto err;
944                         rr->top=8;
945                         bn_mul_comba4(rr->d,a->d,b->d);
946                         goto end;
947                         }
948 # endif
949                 if (al == 8)
950                         {
951                         if (bn_wexpand(rr,16) == NULL) goto err;
952                         rr->top=16;
953                         bn_mul_comba8(rr->d,a->d,b->d);
954                         goto end;
955                         }
956                 }
957 #endif /* BN_MUL_COMBA */
958 #ifdef BN_RECURSION
959         if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL))
960                 {
961                 if (i >= -1 && i <= 1)
962                         {
963                         /* Find out the power of two lower or equal
964                            to the longest of the two numbers */
965                         if (i >= 0)
966                                 {
967                                 j = BN_num_bits_word((BN_ULONG)al);
968                                 }
969                         if (i == -1)
970                                 {
971                                 j = BN_num_bits_word((BN_ULONG)bl);
972                                 }
973                         j = 1<<(j-1);
974                         assert(j <= al || j <= bl);
975                         k = j+j;
976                         t = BN_CTX_get(ctx);
977                         if (t == NULL)
978                                 goto err;
979                         if (al > j || bl > j)
980                                 {
981                                 if (bn_wexpand(t,k*4) == NULL) goto err;
982                                 if (bn_wexpand(rr,k*4) == NULL) goto err;
983                                 bn_mul_part_recursive(rr->d,a->d,b->d,
984                                         j,al-j,bl-j,t->d);
985                                 }
986                         else    /* al <= j || bl <= j */
987                                 {
988                                 if (bn_wexpand(t,k*2) == NULL) goto err;
989                                 if (bn_wexpand(rr,k*2) == NULL) goto err;
990                                 bn_mul_recursive(rr->d,a->d,b->d,
991                                         j,al-j,bl-j,t->d);
992                                 }
993                         rr->top=top;
994                         goto end;
995                         }
996 #if 0
997                 if (i == 1 && !BN_get_flags(b,BN_FLG_STATIC_DATA))
998                         {
999                         BIGNUM *tmp_bn = (BIGNUM *)b;
1000                         if (bn_wexpand(tmp_bn,al) == NULL) goto err;
1001                         tmp_bn->d[bl]=0;
1002                         bl++;
1003                         i--;
1004                         }
1005                 else if (i == -1 && !BN_get_flags(a,BN_FLG_STATIC_DATA))
1006                         {
1007                         BIGNUM *tmp_bn = (BIGNUM *)a;
1008                         if (bn_wexpand(tmp_bn,bl) == NULL) goto err;
1009                         tmp_bn->d[al]=0;
1010                         al++;
1011                         i++;
1012                         }
1013                 if (i == 0)
1014                         {
1015                         /* symmetric and > 4 */
1016                         /* 16 or larger */
1017                         j=BN_num_bits_word((BN_ULONG)al);
1018                         j=1<<(j-1);
1019                         k=j+j;
1020                         t = BN_CTX_get(ctx);
1021                         if (al == j) /* exact multiple */
1022                                 {
1023                                 if (bn_wexpand(t,k*2) == NULL) goto err;
1024                                 if (bn_wexpand(rr,k*2) == NULL) goto err;
1025                                 bn_mul_recursive(rr->d,a->d,b->d,al,t->d);
1026                                 }
1027                         else
1028                                 {
1029                                 if (bn_wexpand(t,k*4) == NULL) goto err;
1030                                 if (bn_wexpand(rr,k*4) == NULL) goto err;
1031                                 bn_mul_part_recursive(rr->d,a->d,b->d,al-j,j,t->d);
1032                                 }
1033                         rr->top=top;
1034                         goto end;
1035                         }
1036 #endif
1037                 }
1038 #endif /* BN_RECURSION */
1039         if (bn_wexpand(rr,top) == NULL) goto err;
1040         rr->top=top;
1041         bn_mul_normal(rr->d,a->d,al,b->d,bl);
1042
1043 #if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
1044 end:
1045 #endif
1046         bn_correct_top(rr);
1047         if (r != rr) BN_copy(r,rr);
1048         ret=1;
1049 err:
1050         bn_check_top(r);
1051         BN_CTX_end(ctx);
1052         return(ret);
1053         }
1054
1055 void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
1056         {
1057         BN_ULONG *rr;
1058
1059         if (na < nb)
1060                 {
1061                 int itmp;
1062                 BN_ULONG *ltmp;
1063
1064                 itmp=na; na=nb; nb=itmp;
1065                 ltmp=a;   a=b;   b=ltmp;
1066
1067                 }
1068         rr= &(r[na]);
1069         if (nb <= 0)
1070                 {
1071                 (void)bn_mul_words(r,a,na,0);
1072                 return;
1073                 }
1074         else
1075                 rr[0]=bn_mul_words(r,a,na,b[0]);
1076
1077         for (;;)
1078                 {
1079                 if (--nb <= 0) return;
1080                 rr[1]=bn_mul_add_words(&(r[1]),a,na,b[1]);
1081                 if (--nb <= 0) return;
1082                 rr[2]=bn_mul_add_words(&(r[2]),a,na,b[2]);
1083                 if (--nb <= 0) return;
1084                 rr[3]=bn_mul_add_words(&(r[3]),a,na,b[3]);
1085                 if (--nb <= 0) return;
1086                 rr[4]=bn_mul_add_words(&(r[4]),a,na,b[4]);
1087                 rr+=4;
1088                 r+=4;
1089                 b+=4;
1090                 }
1091         }
1092
1093 void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
1094         {
1095         bn_mul_words(r,a,n,b[0]);
1096
1097         for (;;)
1098                 {
1099                 if (--n <= 0) return;
1100                 bn_mul_add_words(&(r[1]),a,n,b[1]);
1101                 if (--n <= 0) return;
1102                 bn_mul_add_words(&(r[2]),a,n,b[2]);
1103                 if (--n <= 0) return;
1104                 bn_mul_add_words(&(r[3]),a,n,b[3]);
1105                 if (--n <= 0) return;
1106                 bn_mul_add_words(&(r[4]),a,n,b[4]);
1107                 r+=4;
1108                 b+=4;
1109                 }
1110         }