mark all block comments that need format preserving so that
[openssl.git] / crypto / bn / bn_mul.c
1 /* crypto/bn/bn_mul.c */
2 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3  * All rights reserved.
4  *
5  * This package is an SSL implementation written
6  * by Eric Young (eay@cryptsoft.com).
7  * The implementation was written so as to conform with Netscapes SSL.
8  * 
9  * This library is free for commercial and non-commercial use as long as
10  * the following conditions are aheared to.  The following conditions
11  * apply to all code found in this distribution, be it the RC4, RSA,
12  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
13  * included with this distribution is covered by the same copyright terms
14  * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15  * 
16  * Copyright remains Eric Young's, and as such any Copyright notices in
17  * the code are not to be removed.
18  * If this package is used in a product, Eric Young should be given attribution
19  * as the author of the parts of the library used.
20  * This can be in the form of a textual message at program startup or
21  * in documentation (online or textual) provided with the package.
22  * 
23  * Redistribution and use in source and binary forms, with or without
24  * modification, are permitted provided that the following conditions
25  * are met:
26  * 1. Redistributions of source code must retain the copyright
27  *    notice, this list of conditions and the following disclaimer.
28  * 2. Redistributions in binary form must reproduce the above copyright
29  *    notice, this list of conditions and the following disclaimer in the
30  *    documentation and/or other materials provided with the distribution.
31  * 3. All advertising materials mentioning features or use of this software
32  *    must display the following acknowledgement:
33  *    "This product includes cryptographic software written by
34  *     Eric Young (eay@cryptsoft.com)"
35  *    The word 'cryptographic' can be left out if the rouines from the library
36  *    being used are not cryptographic related :-).
37  * 4. If you include any Windows specific code (or a derivative thereof) from 
38  *    the apps directory (application code) you must include an acknowledgement:
39  *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40  * 
41  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51  * SUCH DAMAGE.
52  * 
53  * The licence and distribution terms for any publically available version or
54  * derivative of this code cannot be changed.  i.e. this code cannot simply be
55  * copied and put under another distribution licence
56  * [including the GNU Public Licence.]
57  */
58
59 #ifndef BN_DEBUG
60 # undef NDEBUG /* avoid conflicting definitions */
61 # define NDEBUG
62 #endif
63
64 #include <assert.h>
65 #include "cryptlib.h"
66 #include "bn_lcl.h"
67
68 #if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS)
69 /* Here follows specialised variants of bn_add_words() and
70    bn_sub_words().  They have the property performing operations on
71    arrays of different sizes.  The sizes of those arrays is expressed through
72    cl, which is the common length ( basicall, min(len(a),len(b)) ), and dl,
73    which is the delta between the two lengths, calculated as len(a)-len(b).
74    All lengths are the number of BN_ULONGs...  For the operations that require
75    a result array as parameter, it must have the length cl+abs(dl).
76    These functions should probably end up in bn_asm.c as soon as there are
77    assembler counterparts for the systems that use assembler files.  */
78
79 BN_ULONG bn_sub_part_words(BN_ULONG *r,
80         const BN_ULONG *a, const BN_ULONG *b,
81         int cl, int dl)
82         {
83         BN_ULONG c, t;
84
85         assert(cl >= 0);
86         c = bn_sub_words(r, a, b, cl);
87
88         if (dl == 0)
89                 return c;
90
91         r += cl;
92         a += cl;
93         b += cl;
94
95         if (dl < 0)
96                 {
97                 for (;;)
98                         {
99                         t = b[0];
100                         r[0] = (0-t-c)&BN_MASK2;
101                         if (t != 0) c=1;
102                         if (++dl >= 0) break;
103
104                         t = b[1];
105                         r[1] = (0-t-c)&BN_MASK2;
106                         if (t != 0) c=1;
107                         if (++dl >= 0) break;
108
109                         t = b[2];
110                         r[2] = (0-t-c)&BN_MASK2;
111                         if (t != 0) c=1;
112                         if (++dl >= 0) break;
113
114                         t = b[3];
115                         r[3] = (0-t-c)&BN_MASK2;
116                         if (t != 0) c=1;
117                         if (++dl >= 0) break;
118
119                         b += 4;
120                         r += 4;
121                         }
122                 }
123         else
124                 {
125                 int save_dl = dl;
126                 while(c)
127                         {
128                         t = a[0];
129                         r[0] = (t-c)&BN_MASK2;
130                         if (t != 0) c=0;
131                         if (--dl <= 0) break;
132
133                         t = a[1];
134                         r[1] = (t-c)&BN_MASK2;
135                         if (t != 0) c=0;
136                         if (--dl <= 0) break;
137
138                         t = a[2];
139                         r[2] = (t-c)&BN_MASK2;
140                         if (t != 0) c=0;
141                         if (--dl <= 0) break;
142
143                         t = a[3];
144                         r[3] = (t-c)&BN_MASK2;
145                         if (t != 0) c=0;
146                         if (--dl <= 0) break;
147
148                         save_dl = dl;
149                         a += 4;
150                         r += 4;
151                         }
152                 if (dl > 0)
153                         {
154                         if (save_dl > dl)
155                                 {
156                                 switch (save_dl - dl)
157                                         {
158                                 case 1:
159                                         r[1] = a[1];
160                                         if (--dl <= 0) break;
161                                 case 2:
162                                         r[2] = a[2];
163                                         if (--dl <= 0) break;
164                                 case 3:
165                                         r[3] = a[3];
166                                         if (--dl <= 0) break;
167                                         }
168                                 a += 4;
169                                 r += 4;
170                                 }
171                         }
172                 if (dl > 0)
173                         {
174                         for(;;)
175                                 {
176                                 r[0] = a[0];
177                                 if (--dl <= 0) break;
178                                 r[1] = a[1];
179                                 if (--dl <= 0) break;
180                                 r[2] = a[2];
181                                 if (--dl <= 0) break;
182                                 r[3] = a[3];
183                                 if (--dl <= 0) break;
184
185                                 a += 4;
186                                 r += 4;
187                                 }
188                         }
189                 }
190         return c;
191         }
192 #endif
193
194 BN_ULONG bn_add_part_words(BN_ULONG *r,
195         const BN_ULONG *a, const BN_ULONG *b,
196         int cl, int dl)
197         {
198         BN_ULONG c, l, t;
199
200         assert(cl >= 0);
201         c = bn_add_words(r, a, b, cl);
202
203         if (dl == 0)
204                 return c;
205
206         r += cl;
207         a += cl;
208         b += cl;
209
210         if (dl < 0)
211                 {
212                 int save_dl = dl;
213                 while (c)
214                         {
215                         l=(c+b[0])&BN_MASK2;
216                         c=(l < c);
217                         r[0]=l;
218                         if (++dl >= 0) break;
219
220                         l=(c+b[1])&BN_MASK2;
221                         c=(l < c);
222                         r[1]=l;
223                         if (++dl >= 0) break;
224
225                         l=(c+b[2])&BN_MASK2;
226                         c=(l < c);
227                         r[2]=l;
228                         if (++dl >= 0) break;
229
230                         l=(c+b[3])&BN_MASK2;
231                         c=(l < c);
232                         r[3]=l;
233                         if (++dl >= 0) break;
234
235                         save_dl = dl;
236                         b+=4;
237                         r+=4;
238                         }
239                 if (dl < 0)
240                         {
241                         if (save_dl < dl)
242                                 {
243                                 switch (dl - save_dl)
244                                         {
245                                 case 1:
246                                         r[1] = b[1];
247                                         if (++dl >= 0) break;
248                                 case 2:
249                                         r[2] = b[2];
250                                         if (++dl >= 0) break;
251                                 case 3:
252                                         r[3] = b[3];
253                                         if (++dl >= 0) break;
254                                         }
255                                 b += 4;
256                                 r += 4;
257                                 }
258                         }
259                 if (dl < 0)
260                         {
261                         for(;;)
262                                 {
263                                 r[0] = b[0];
264                                 if (++dl >= 0) break;
265                                 r[1] = b[1];
266                                 if (++dl >= 0) break;
267                                 r[2] = b[2];
268                                 if (++dl >= 0) break;
269                                 r[3] = b[3];
270                                 if (++dl >= 0) break;
271
272                                 b += 4;
273                                 r += 4;
274                                 }
275                         }
276                 }
277         else
278                 {
279                 int save_dl = dl;
280                 while (c)
281                         {
282                         t=(a[0]+c)&BN_MASK2;
283                         c=(t < c);
284                         r[0]=t;
285                         if (--dl <= 0) break;
286
287                         t=(a[1]+c)&BN_MASK2;
288                         c=(t < c);
289                         r[1]=t;
290                         if (--dl <= 0) break;
291
292                         t=(a[2]+c)&BN_MASK2;
293                         c=(t < c);
294                         r[2]=t;
295                         if (--dl <= 0) break;
296
297                         t=(a[3]+c)&BN_MASK2;
298                         c=(t < c);
299                         r[3]=t;
300                         if (--dl <= 0) break;
301
302                         save_dl = dl;
303                         a+=4;
304                         r+=4;
305                         }
306                 if (dl > 0)
307                         {
308                         if (save_dl > dl)
309                                 {
310                                 switch (save_dl - dl)
311                                         {
312                                 case 1:
313                                         r[1] = a[1];
314                                         if (--dl <= 0) break;
315                                 case 2:
316                                         r[2] = a[2];
317                                         if (--dl <= 0) break;
318                                 case 3:
319                                         r[3] = a[3];
320                                         if (--dl <= 0) break;
321                                         }
322                                 a += 4;
323                                 r += 4;
324                                 }
325                         }
326                 if (dl > 0)
327                         {
328                         for(;;)
329                                 {
330                                 r[0] = a[0];
331                                 if (--dl <= 0) break;
332                                 r[1] = a[1];
333                                 if (--dl <= 0) break;
334                                 r[2] = a[2];
335                                 if (--dl <= 0) break;
336                                 r[3] = a[3];
337                                 if (--dl <= 0) break;
338
339                                 a += 4;
340                                 r += 4;
341                                 }
342                         }
343                 }
344         return c;
345         }
346
347 #ifdef BN_RECURSION
348 /* Karatsuba recursive multiplication algorithm
349  * (cf. Knuth, The Art of Computer Programming, Vol. 2) */
350
351 /*-
352  * r is 2*n2 words in size,
353  * a and b are both n2 words in size.
354  * n2 must be a power of 2.
355  * We multiply and return the result.
356  * t must be 2*n2 words in size
357  * We calculate
358  * a[0]*b[0]
359  * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
360  * a[1]*b[1]
361  */
362 /* dnX may not be positive, but n2/2+dnX has to be */
363 void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
364         int dna, int dnb, BN_ULONG *t)
365         {
366         int n=n2/2,c1,c2;
367         int tna=n+dna, tnb=n+dnb;
368         unsigned int neg,zero;
369         BN_ULONG ln,lo,*p;
370
371 # ifdef BN_MUL_COMBA
372 #  if 0
373         if (n2 == 4)
374                 {
375                 bn_mul_comba4(r,a,b);
376                 return;
377                 }
378 #  endif
379         /* Only call bn_mul_comba 8 if n2 == 8 and the
380          * two arrays are complete [steve]
381          */
382         if (n2 == 8 && dna == 0 && dnb == 0)
383                 {
384                 bn_mul_comba8(r,a,b);
385                 return; 
386                 }
387 # endif /* BN_MUL_COMBA */
388         /* Else do normal multiply */
389         if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL)
390                 {
391                 bn_mul_normal(r,a,n2+dna,b,n2+dnb);
392                 if ((dna + dnb) < 0)
393                         memset(&r[2*n2 + dna + dnb], 0,
394                                 sizeof(BN_ULONG) * -(dna + dnb));
395                 return;
396                 }
397         /* r=(a[0]-a[1])*(b[1]-b[0]) */
398         c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna);
399         c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n);
400         zero=neg=0;
401         switch (c1*3+c2)
402                 {
403         case -4:
404                 bn_sub_part_words(t,      &(a[n]),a,      tna,tna-n); /* - */
405                 bn_sub_part_words(&(t[n]),b,      &(b[n]),tnb,n-tnb); /* - */
406                 break;
407         case -3:
408                 zero=1;
409                 break;
410         case -2:
411                 bn_sub_part_words(t,      &(a[n]),a,      tna,tna-n); /* - */
412                 bn_sub_part_words(&(t[n]),&(b[n]),b,      tnb,tnb-n); /* + */
413                 neg=1;
414                 break;
415         case -1:
416         case 0:
417         case 1:
418                 zero=1;
419                 break;
420         case 2:
421                 bn_sub_part_words(t,      a,      &(a[n]),tna,n-tna); /* + */
422                 bn_sub_part_words(&(t[n]),b,      &(b[n]),tnb,n-tnb); /* - */
423                 neg=1;
424                 break;
425         case 3:
426                 zero=1;
427                 break;
428         case 4:
429                 bn_sub_part_words(t,      a,      &(a[n]),tna,n-tna);
430                 bn_sub_part_words(&(t[n]),&(b[n]),b,      tnb,tnb-n);
431                 break;
432                 }
433
434 # ifdef BN_MUL_COMBA
435         if (n == 4 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba4 could take
436                                                extra args to do this well */
437                 {
438                 if (!zero)
439                         bn_mul_comba4(&(t[n2]),t,&(t[n]));
440                 else
441                         memset(&(t[n2]),0,8*sizeof(BN_ULONG));
442                 
443                 bn_mul_comba4(r,a,b);
444                 bn_mul_comba4(&(r[n2]),&(a[n]),&(b[n]));
445                 }
446         else if (n == 8 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba8 could
447                                                     take extra args to do this
448                                                     well */
449                 {
450                 if (!zero)
451                         bn_mul_comba8(&(t[n2]),t,&(t[n]));
452                 else
453                         memset(&(t[n2]),0,16*sizeof(BN_ULONG));
454                 
455                 bn_mul_comba8(r,a,b);
456                 bn_mul_comba8(&(r[n2]),&(a[n]),&(b[n]));
457                 }
458         else
459 # endif /* BN_MUL_COMBA */
460                 {
461                 p= &(t[n2*2]);
462                 if (!zero)
463                         bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p);
464                 else
465                         memset(&(t[n2]),0,n2*sizeof(BN_ULONG));
466                 bn_mul_recursive(r,a,b,n,0,0,p);
467                 bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),n,dna,dnb,p);
468                 }
469
470         /*-
471          * t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
472          * r[10] holds (a[0]*b[0])
473          * r[32] holds (b[1]*b[1])
474          */
475
476         c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
477
478         if (neg) /* if t[32] is negative */
479                 {
480                 c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
481                 }
482         else
483                 {
484                 /* Might have a carry */
485                 c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),t,n2));
486                 }
487
488         /*-
489          * t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
490          * r[10] holds (a[0]*b[0])
491          * r[32] holds (b[1]*b[1])
492          * c1 holds the carry bits
493          */
494         c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
495         if (c1)
496                 {
497                 p= &(r[n+n2]);
498                 lo= *p;
499                 ln=(lo+c1)&BN_MASK2;
500                 *p=ln;
501
502                 /* The overflow will stop before we over write
503                  * words we should not overwrite */
504                 if (ln < (BN_ULONG)c1)
505                         {
506                         do      {
507                                 p++;
508                                 lo= *p;
509                                 ln=(lo+1)&BN_MASK2;
510                                 *p=ln;
511                                 } while (ln == 0);
512                         }
513                 }
514         }
515
516 /* n+tn is the word length
517  * t needs to be n*4 is size, as does r */
518 /* tnX may not be negative but less than n */
519 void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n,
520              int tna, int tnb, BN_ULONG *t)
521         {
522         int i,j,n2=n*2;
523         int c1,c2,neg;
524         BN_ULONG ln,lo,*p;
525
526         if (n < 8)
527                 {
528                 bn_mul_normal(r,a,n+tna,b,n+tnb);
529                 return;
530                 }
531
532         /* r=(a[0]-a[1])*(b[1]-b[0]) */
533         c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna);
534         c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n);
535         neg=0;
536         switch (c1*3+c2)
537                 {
538         case -4:
539                 bn_sub_part_words(t,      &(a[n]),a,      tna,tna-n); /* - */
540                 bn_sub_part_words(&(t[n]),b,      &(b[n]),tnb,n-tnb); /* - */
541                 break;
542         case -3:
543                 /* break; */
544         case -2:
545                 bn_sub_part_words(t,      &(a[n]),a,      tna,tna-n); /* - */
546                 bn_sub_part_words(&(t[n]),&(b[n]),b,      tnb,tnb-n); /* + */
547                 neg=1;
548                 break;
549         case -1:
550         case 0:
551         case 1:
552                 /* break; */
553         case 2:
554                 bn_sub_part_words(t,      a,      &(a[n]),tna,n-tna); /* + */
555                 bn_sub_part_words(&(t[n]),b,      &(b[n]),tnb,n-tnb); /* - */
556                 neg=1;
557                 break;
558         case 3:
559                 /* break; */
560         case 4:
561                 bn_sub_part_words(t,      a,      &(a[n]),tna,n-tna);
562                 bn_sub_part_words(&(t[n]),&(b[n]),b,      tnb,tnb-n);
563                 break;
564                 }
565                 /* The zero case isn't yet implemented here. The speedup
566                    would probably be negligible. */
567 # if 0
568         if (n == 4)
569                 {
570                 bn_mul_comba4(&(t[n2]),t,&(t[n]));
571                 bn_mul_comba4(r,a,b);
572                 bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn);
573                 memset(&(r[n2+tn*2]),0,sizeof(BN_ULONG)*(n2-tn*2));
574                 }
575         else
576 # endif
577         if (n == 8)
578                 {
579                 bn_mul_comba8(&(t[n2]),t,&(t[n]));
580                 bn_mul_comba8(r,a,b);
581                 bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb);
582                 memset(&(r[n2+tna+tnb]),0,sizeof(BN_ULONG)*(n2-tna-tnb));
583                 }
584         else
585                 {
586                 p= &(t[n2*2]);
587                 bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p);
588                 bn_mul_recursive(r,a,b,n,0,0,p);
589                 i=n/2;
590                 /* If there is only a bottom half to the number,
591                  * just do it */
592                 if (tna > tnb)
593                         j = tna - i;
594                 else
595                         j = tnb - i;
596                 if (j == 0)
597                         {
598                         bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),
599                                 i,tna-i,tnb-i,p);
600                         memset(&(r[n2+i*2]),0,sizeof(BN_ULONG)*(n2-i*2));
601                         }
602                 else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */
603                                 {
604                                 bn_mul_part_recursive(&(r[n2]),&(a[n]),&(b[n]),
605                                         i,tna-i,tnb-i,p);
606                                 memset(&(r[n2+tna+tnb]),0,
607                                         sizeof(BN_ULONG)*(n2-tna-tnb));
608                                 }
609                 else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
610                         {
611                         memset(&(r[n2]),0,sizeof(BN_ULONG)*n2);
612                         if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL
613                                 && tnb < BN_MUL_RECURSIVE_SIZE_NORMAL)
614                                 {
615                                 bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb);
616                                 }
617                         else
618                                 {
619                                 for (;;)
620                                         {
621                                         i/=2;
622                                         /* these simplified conditions work
623                                          * exclusively because difference
624                                          * between tna and tnb is 1 or 0 */
625                                         if (i < tna || i < tnb)
626                                                 {
627                                                 bn_mul_part_recursive(&(r[n2]),
628                                                         &(a[n]),&(b[n]),
629                                                         i,tna-i,tnb-i,p);
630                                                 break;
631                                                 }
632                                         else if (i == tna || i == tnb)
633                                                 {
634                                                 bn_mul_recursive(&(r[n2]),
635                                                         &(a[n]),&(b[n]),
636                                                         i,tna-i,tnb-i,p);
637                                                 break;
638                                                 }
639                                         }
640                                 }
641                         }
642                 }
643
644         /*-
645          * t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
646          * r[10] holds (a[0]*b[0])
647          * r[32] holds (b[1]*b[1])
648          */
649
650         c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
651
652         if (neg) /* if t[32] is negative */
653                 {
654                 c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
655                 }
656         else
657                 {
658                 /* Might have a carry */
659                 c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),t,n2));
660                 }
661
662         /*-
663          * t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
664          * r[10] holds (a[0]*b[0])
665          * r[32] holds (b[1]*b[1])
666          * c1 holds the carry bits
667          */
668         c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
669         if (c1)
670                 {
671                 p= &(r[n+n2]);
672                 lo= *p;
673                 ln=(lo+c1)&BN_MASK2;
674                 *p=ln;
675
676                 /* The overflow will stop before we over write
677                  * words we should not overwrite */
678                 if (ln < (BN_ULONG)c1)
679                         {
680                         do      {
681                                 p++;
682                                 lo= *p;
683                                 ln=(lo+1)&BN_MASK2;
684                                 *p=ln;
685                                 } while (ln == 0);
686                         }
687                 }
688         }
689
690 /*-
691  * a and b must be the same size, which is n2.
692  * r needs to be n2 words and t needs to be n2*2
693  */
694 void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
695              BN_ULONG *t)
696         {
697         int n=n2/2;
698
699         bn_mul_recursive(r,a,b,n,0,0,&(t[0]));
700         if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL)
701                 {
702                 bn_mul_low_recursive(&(t[0]),&(a[0]),&(b[n]),n,&(t[n2]));
703                 bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
704                 bn_mul_low_recursive(&(t[0]),&(a[n]),&(b[0]),n,&(t[n2]));
705                 bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
706                 }
707         else
708                 {
709                 bn_mul_low_normal(&(t[0]),&(a[0]),&(b[n]),n);
710                 bn_mul_low_normal(&(t[n]),&(a[n]),&(b[0]),n);
711                 bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
712                 bn_add_words(&(r[n]),&(r[n]),&(t[n]),n);
713                 }
714         }
715
716 /*-
717  * a and b must be the same size, which is n2.
718  * r needs to be n2 words and t needs to be n2*2
719  * l is the low words of the output.
720  * t needs to be n2*3
721  */
722 void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2,
723              BN_ULONG *t)
724         {
725         int i,n;
726         int c1,c2;
727         int neg,oneg,zero;
728         BN_ULONG ll,lc,*lp,*mp;
729
730         n=n2/2;
731
732         /* Calculate (al-ah)*(bh-bl) */
733         neg=zero=0;
734         c1=bn_cmp_words(&(a[0]),&(a[n]),n);
735         c2=bn_cmp_words(&(b[n]),&(b[0]),n);
736         switch (c1*3+c2)
737                 {
738         case -4:
739                 bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n);
740                 bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n);
741                 break;
742         case -3:
743                 zero=1;
744                 break;
745         case -2:
746                 bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n);
747                 bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n);
748                 neg=1;
749                 break;
750         case -1:
751         case 0:
752         case 1:
753                 zero=1;
754                 break;
755         case 2:
756                 bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n);
757                 bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n);
758                 neg=1;
759                 break;
760         case 3:
761                 zero=1;
762                 break;
763         case 4:
764                 bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n);
765                 bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n);
766                 break;
767                 }
768                 
769         oneg=neg;
770         /* t[10] = (a[0]-a[1])*(b[1]-b[0]) */
771         /* r[10] = (a[1]*b[1]) */
772 # ifdef BN_MUL_COMBA
773         if (n == 8)
774                 {
775                 bn_mul_comba8(&(t[0]),&(r[0]),&(r[n]));
776                 bn_mul_comba8(r,&(a[n]),&(b[n]));
777                 }
778         else
779 # endif
780                 {
781                 bn_mul_recursive(&(t[0]),&(r[0]),&(r[n]),n,0,0,&(t[n2]));
782                 bn_mul_recursive(r,&(a[n]),&(b[n]),n,0,0,&(t[n2]));
783                 }
784
785         /*-
786          * s0 == low(al*bl)
787          * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl)
788          * We know s0 and s1 so the only unknown is high(al*bl)
789          * high(al*bl) == s1 - low(ah*bh+s0+(al-ah)*(bh-bl))
790          * high(al*bl) == s1 - (r[0]+l[0]+t[0])
791          */
792         if (l != NULL)
793                 {
794                 lp= &(t[n2+n]);
795                 c1=(int)(bn_add_words(lp,&(r[0]),&(l[0]),n));
796                 }
797         else
798                 {
799                 c1=0;
800                 lp= &(r[0]);
801                 }
802
803         if (neg)
804                 neg=(int)(bn_sub_words(&(t[n2]),lp,&(t[0]),n));
805         else
806                 {
807                 bn_add_words(&(t[n2]),lp,&(t[0]),n);
808                 neg=0;
809                 }
810
811         if (l != NULL)
812                 {
813                 bn_sub_words(&(t[n2+n]),&(l[n]),&(t[n2]),n);
814                 }
815         else
816                 {
817                 lp= &(t[n2+n]);
818                 mp= &(t[n2]);
819                 for (i=0; i<n; i++)
820                         lp[i]=((~mp[i])+1)&BN_MASK2;
821                 }
822
823         /*-
824          * s[0] = low(al*bl)
825          * t[3] = high(al*bl)
826          * t[10] = (a[0]-a[1])*(b[1]-b[0]) neg is the sign
827          * r[10] = (a[1]*b[1])
828          */
829         /*-
830          * R[10] = al*bl
831          * R[21] = al*bl + ah*bh + (a[0]-a[1])*(b[1]-b[0])
832          * R[32] = ah*bh
833          */
834         /*-
835          * R[1]=t[3]+l[0]+r[0](+-)t[0] (have carry/borrow)
836          * R[2]=r[0]+t[3]+r[1](+-)t[1] (have carry/borrow)
837          * R[3]=r[1]+(carry/borrow)
838          */
839         if (l != NULL)
840                 {
841                 lp= &(t[n2]);
842                 c1= (int)(bn_add_words(lp,&(t[n2+n]),&(l[0]),n));
843                 }
844         else
845                 {
846                 lp= &(t[n2+n]);
847                 c1=0;
848                 }
849         c1+=(int)(bn_add_words(&(t[n2]),lp,  &(r[0]),n));
850         if (oneg)
851                 c1-=(int)(bn_sub_words(&(t[n2]),&(t[n2]),&(t[0]),n));
852         else
853                 c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),&(t[0]),n));
854
855         c2 =(int)(bn_add_words(&(r[0]),&(r[0]),&(t[n2+n]),n));
856         c2+=(int)(bn_add_words(&(r[0]),&(r[0]),&(r[n]),n));
857         if (oneg)
858                 c2-=(int)(bn_sub_words(&(r[0]),&(r[0]),&(t[n]),n));
859         else
860                 c2+=(int)(bn_add_words(&(r[0]),&(r[0]),&(t[n]),n));
861         
862         if (c1 != 0) /* Add starting at r[0], could be +ve or -ve */
863                 {
864                 i=0;
865                 if (c1 > 0)
866                         {
867                         lc=c1;
868                         do      {
869                                 ll=(r[i]+lc)&BN_MASK2;
870                                 r[i++]=ll;
871                                 lc=(lc > ll);
872                                 } while (lc);
873                         }
874                 else
875                         {
876                         lc= -c1;
877                         do      {
878                                 ll=r[i];
879                                 r[i++]=(ll-lc)&BN_MASK2;
880                                 lc=(lc > ll);
881                                 } while (lc);
882                         }
883                 }
884         if (c2 != 0) /* Add starting at r[1] */
885                 {
886                 i=n;
887                 if (c2 > 0)
888                         {
889                         lc=c2;
890                         do      {
891                                 ll=(r[i]+lc)&BN_MASK2;
892                                 r[i++]=ll;
893                                 lc=(lc > ll);
894                                 } while (lc);
895                         }
896                 else
897                         {
898                         lc= -c2;
899                         do      {
900                                 ll=r[i];
901                                 r[i++]=(ll-lc)&BN_MASK2;
902                                 lc=(lc > ll);
903                                 } while (lc);
904                         }
905                 }
906         }
907 #endif /* BN_RECURSION */
908
909 int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
910         {
911         int ret=0;
912         int top,al,bl;
913         BIGNUM *rr;
914 #if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
915         int i;
916 #endif
917 #ifdef BN_RECURSION
918         BIGNUM *t=NULL;
919         int j=0,k;
920 #endif
921
922         bn_check_top(a);
923         bn_check_top(b);
924         bn_check_top(r);
925
926         al=a->top;
927         bl=b->top;
928
929         if ((al == 0) || (bl == 0))
930                 {
931                 BN_zero(r);
932                 return(1);
933                 }
934         top=al+bl;
935
936         BN_CTX_start(ctx);
937         if ((r == a) || (r == b))
938                 {
939                 if ((rr = BN_CTX_get(ctx)) == NULL) goto err;
940                 }
941         else
942                 rr = r;
943         rr->neg=a->neg^b->neg;
944
945 #if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
946         i = al-bl;
947 #endif
948 #ifdef BN_MUL_COMBA
949         if (i == 0)
950                 {
951 # if 0
952                 if (al == 4)
953                         {
954                         if (bn_wexpand(rr,8) == NULL) goto err;
955                         rr->top=8;
956                         bn_mul_comba4(rr->d,a->d,b->d);
957                         goto end;
958                         }
959 # endif
960                 if (al == 8)
961                         {
962                         if (bn_wexpand(rr,16) == NULL) goto err;
963                         rr->top=16;
964                         bn_mul_comba8(rr->d,a->d,b->d);
965                         goto end;
966                         }
967                 }
968 #endif /* BN_MUL_COMBA */
969 #ifdef BN_RECURSION
970         if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL))
971                 {
972                 if (i >= -1 && i <= 1)
973                         {
974                         /* Find out the power of two lower or equal
975                            to the longest of the two numbers */
976                         if (i >= 0)
977                                 {
978                                 j = BN_num_bits_word((BN_ULONG)al);
979                                 }
980                         if (i == -1)
981                                 {
982                                 j = BN_num_bits_word((BN_ULONG)bl);
983                                 }
984                         j = 1<<(j-1);
985                         assert(j <= al || j <= bl);
986                         k = j+j;
987                         t = BN_CTX_get(ctx);
988                         if (t == NULL)
989                                 goto err;
990                         if (al > j || bl > j)
991                                 {
992                                 if (bn_wexpand(t,k*4) == NULL) goto err;
993                                 if (bn_wexpand(rr,k*4) == NULL) goto err;
994                                 bn_mul_part_recursive(rr->d,a->d,b->d,
995                                         j,al-j,bl-j,t->d);
996                                 }
997                         else    /* al <= j || bl <= j */
998                                 {
999                                 if (bn_wexpand(t,k*2) == NULL) goto err;
1000                                 if (bn_wexpand(rr,k*2) == NULL) goto err;
1001                                 bn_mul_recursive(rr->d,a->d,b->d,
1002                                         j,al-j,bl-j,t->d);
1003                                 }
1004                         rr->top=top;
1005                         goto end;
1006                         }
1007 #if 0
1008                 if (i == 1 && !BN_get_flags(b,BN_FLG_STATIC_DATA))
1009                         {
1010                         BIGNUM *tmp_bn = (BIGNUM *)b;
1011                         if (bn_wexpand(tmp_bn,al) == NULL) goto err;
1012                         tmp_bn->d[bl]=0;
1013                         bl++;
1014                         i--;
1015                         }
1016                 else if (i == -1 && !BN_get_flags(a,BN_FLG_STATIC_DATA))
1017                         {
1018                         BIGNUM *tmp_bn = (BIGNUM *)a;
1019                         if (bn_wexpand(tmp_bn,bl) == NULL) goto err;
1020                         tmp_bn->d[al]=0;
1021                         al++;
1022                         i++;
1023                         }
1024                 if (i == 0)
1025                         {
1026                         /* symmetric and > 4 */
1027                         /* 16 or larger */
1028                         j=BN_num_bits_word((BN_ULONG)al);
1029                         j=1<<(j-1);
1030                         k=j+j;
1031                         t = BN_CTX_get(ctx);
1032                         if (al == j) /* exact multiple */
1033                                 {
1034                                 if (bn_wexpand(t,k*2) == NULL) goto err;
1035                                 if (bn_wexpand(rr,k*2) == NULL) goto err;
1036                                 bn_mul_recursive(rr->d,a->d,b->d,al,t->d);
1037                                 }
1038                         else
1039                                 {
1040                                 if (bn_wexpand(t,k*4) == NULL) goto err;
1041                                 if (bn_wexpand(rr,k*4) == NULL) goto err;
1042                                 bn_mul_part_recursive(rr->d,a->d,b->d,al-j,j,t->d);
1043                                 }
1044                         rr->top=top;
1045                         goto end;
1046                         }
1047 #endif
1048                 }
1049 #endif /* BN_RECURSION */
1050         if (bn_wexpand(rr,top) == NULL) goto err;
1051         rr->top=top;
1052         bn_mul_normal(rr->d,a->d,al,b->d,bl);
1053
1054 #if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
1055 end:
1056 #endif
1057         bn_correct_top(rr);
1058         if (r != rr) BN_copy(r,rr);
1059         ret=1;
1060 err:
1061         bn_check_top(r);
1062         BN_CTX_end(ctx);
1063         return(ret);
1064         }
1065
1066 void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
1067         {
1068         BN_ULONG *rr;
1069
1070         if (na < nb)
1071                 {
1072                 int itmp;
1073                 BN_ULONG *ltmp;
1074
1075                 itmp=na; na=nb; nb=itmp;
1076                 ltmp=a;   a=b;   b=ltmp;
1077
1078                 }
1079         rr= &(r[na]);
1080         if (nb <= 0)
1081                 {
1082                 (void)bn_mul_words(r,a,na,0);
1083                 return;
1084                 }
1085         else
1086                 rr[0]=bn_mul_words(r,a,na,b[0]);
1087
1088         for (;;)
1089                 {
1090                 if (--nb <= 0) return;
1091                 rr[1]=bn_mul_add_words(&(r[1]),a,na,b[1]);
1092                 if (--nb <= 0) return;
1093                 rr[2]=bn_mul_add_words(&(r[2]),a,na,b[2]);
1094                 if (--nb <= 0) return;
1095                 rr[3]=bn_mul_add_words(&(r[3]),a,na,b[3]);
1096                 if (--nb <= 0) return;
1097                 rr[4]=bn_mul_add_words(&(r[4]),a,na,b[4]);
1098                 rr+=4;
1099                 r+=4;
1100                 b+=4;
1101                 }
1102         }
1103
1104 void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
1105         {
1106         bn_mul_words(r,a,n,b[0]);
1107
1108         for (;;)
1109                 {
1110                 if (--n <= 0) return;
1111                 bn_mul_add_words(&(r[1]),a,n,b[1]);
1112                 if (--n <= 0) return;
1113                 bn_mul_add_words(&(r[2]),a,n,b[2]);
1114                 if (--n <= 0) return;
1115                 bn_mul_add_words(&(r[3]),a,n,b[3]);
1116                 if (--n <= 0) return;
1117                 bn_mul_add_words(&(r[4]),a,n,b[4]);
1118                 r+=4;
1119                 b+=4;
1120                 }
1121         }