b7f8a51675b35830e68538f72d1887e23a92cd4b
[openssl.git] / crypto / bn / bn_mul.c
1 /* crypto/bn/bn_mul.c */
2 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3  * All rights reserved.
4  *
5  * This package is an SSL implementation written
6  * by Eric Young (eay@cryptsoft.com).
7  * The implementation was written so as to conform with Netscapes SSL.
8  * 
9  * This library is free for commercial and non-commercial use as long as
10  * the following conditions are aheared to.  The following conditions
11  * apply to all code found in this distribution, be it the RC4, RSA,
12  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
13  * included with this distribution is covered by the same copyright terms
14  * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15  * 
16  * Copyright remains Eric Young's, and as such any Copyright notices in
17  * the code are not to be removed.
18  * If this package is used in a product, Eric Young should be given attribution
19  * as the author of the parts of the library used.
20  * This can be in the form of a textual message at program startup or
21  * in documentation (online or textual) provided with the package.
22  * 
23  * Redistribution and use in source and binary forms, with or without
24  * modification, are permitted provided that the following conditions
25  * are met:
26  * 1. Redistributions of source code must retain the copyright
27  *    notice, this list of conditions and the following disclaimer.
28  * 2. Redistributions in binary form must reproduce the above copyright
29  *    notice, this list of conditions and the following disclaimer in the
30  *    documentation and/or other materials provided with the distribution.
31  * 3. All advertising materials mentioning features or use of this software
32  *    must display the following acknowledgement:
33  *    "This product includes cryptographic software written by
34  *     Eric Young (eay@cryptsoft.com)"
35  *    The word 'cryptographic' can be left out if the rouines from the library
36  *    being used are not cryptographic related :-).
37  * 4. If you include any Windows specific code (or a derivative thereof) from 
38  *    the apps directory (application code) you must include an acknowledgement:
39  *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40  * 
41  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51  * SUCH DAMAGE.
52  * 
53  * The licence and distribution terms for any publically available version or
54  * derivative of this code cannot be changed.  i.e. this code cannot simply be
55  * copied and put under another distribution licence
56  * [including the GNU Public Licence.]
57  */
58
59 #include <stdio.h>
60 #include <assert.h>
61 #include "cryptlib.h"
62 #include "bn_lcl.h"
63
64 /* Here follows specialised variants of bn_add_words() and
65    bn_sub_words().  They have the property performing operations on
66    arrays of different sizes.  The sizes of those arrays is expressed through
67    cl, which is the common length ( basicall, min(len(a),len(b)) ), and dl,
68    which is the delta between the two lengths, calculated as len(a)-len(b).
69    All lengths are the number of BN_ULONGs...  For the operations that require
70    a result array as parameter, it must have the length cl+abs(dl).
71    These functions should probably end up in bn_asm.c as soon as there are
72    assembler counterparts for the systems that use assembler files.  */
73
74 BN_ULONG bn_sub_part_words(BN_ULONG *r,
75         const BN_ULONG *a, const BN_ULONG *b,
76         int cl, int dl)
77         {
78         BN_ULONG c, t;
79
80         assert(cl >= 0);
81         c = bn_sub_words(r, a, b, cl);
82
83         if (dl == 0)
84                 return c;
85
86         r += cl;
87         a += cl;
88         b += cl;
89
90         if (dl < 0)
91                 {
92 #ifdef BN_COUNT
93                 fprintf(stderr, "  bn_sub_part_words %d + %d (dl < 0, c = %d)\n", cl, dl, c);
94 #endif
95                 for (;;)
96                         {
97                         t = b[0];
98                         r[0] = (0-t-c)&BN_MASK2;
99                         if (t != 0) c=1;
100                         if (++dl >= 0) break;
101
102                         t = b[1];
103                         r[1] = (0-t-c)&BN_MASK2;
104                         if (t != 0) c=1;
105                         if (++dl >= 0) break;
106
107                         t = b[2];
108                         r[2] = (0-t-c)&BN_MASK2;
109                         if (t != 0) c=1;
110                         if (++dl >= 0) break;
111
112                         t = b[3];
113                         r[3] = (0-t-c)&BN_MASK2;
114                         if (t != 0) c=1;
115                         if (++dl >= 0) break;
116
117                         b += 4;
118                         r += 4;
119                         }
120                 }
121         else
122                 {
123                 int save_dl = dl;
124 #ifdef BN_COUNT
125                 fprintf(stderr, "  bn_sub_part_words %d + %d (dl > 0, c = %d)\n", cl, dl, c);
126 #endif
127                 while(c)
128                         {
129                         t = a[0];
130                         r[0] = (t-c)&BN_MASK2;
131                         if (t != 0) c=0;
132                         if (--dl <= 0) break;
133
134                         t = a[1];
135                         r[1] = (t-c)&BN_MASK2;
136                         if (t != 0) c=0;
137                         if (--dl <= 0) break;
138
139                         t = a[2];
140                         r[2] = (t-c)&BN_MASK2;
141                         if (t != 0) c=0;
142                         if (--dl <= 0) break;
143
144                         t = a[3];
145                         r[3] = (t-c)&BN_MASK2;
146                         if (t != 0) c=0;
147                         if (--dl <= 0) break;
148
149                         save_dl = dl;
150                         a += 4;
151                         r += 4;
152                         }
153                 if (dl > 0)
154                         {
155 #ifdef BN_COUNT
156                         fprintf(stderr, "  bn_sub_part_words %d + %d (dl > 0, c == 0)\n", cl, dl);
157 #endif
158                         if (save_dl > dl)
159                                 {
160                                 switch (save_dl - dl)
161                                         {
162                                 case 1:
163                                         r[1] = a[1];
164                                         if (--dl <= 0) break;
165                                 case 2:
166                                         r[2] = a[2];
167                                         if (--dl <= 0) break;
168                                 case 3:
169                                         r[3] = a[3];
170                                         if (--dl <= 0) break;
171                                         }
172                                 a += 4;
173                                 r += 4;
174                                 }
175                         }
176                 if (dl > 0)
177                         {
178 #ifdef BN_COUNT
179                         fprintf(stderr, "  bn_sub_part_words %d + %d (dl > 0, copy)\n", cl, dl);
180 #endif
181                         for(;;)
182                                 {
183                                 r[0] = a[0];
184                                 if (--dl <= 0) break;
185                                 r[1] = a[1];
186                                 if (--dl <= 0) break;
187                                 r[2] = a[2];
188                                 if (--dl <= 0) break;
189                                 r[3] = a[3];
190                                 if (--dl <= 0) break;
191
192                                 a += 4;
193                                 r += 4;
194                                 }
195                         }
196                 }
197         return c;
198         }
199
200 BN_ULONG bn_add_part_words(BN_ULONG *r,
201         const BN_ULONG *a, const BN_ULONG *b,
202         int cl, int dl)
203         {
204         BN_ULONG c, l, t;
205
206         assert(cl >= 0);
207         c = bn_add_words(r, a, b, cl);
208
209         if (dl == 0)
210                 return c;
211
212         r += cl;
213         a += cl;
214         b += cl;
215
216         if (dl < 0)
217                 {
218                 int save_dl = dl;
219 #ifdef BN_COUNT
220                 fprintf(stderr, "  bn_add_part_words %d + %d (dl < 0, c = %d)\n", cl, dl, c);
221 #endif
222                 while (c)
223                         {
224                         l=(c+b[0])&BN_MASK2;
225                         c=(l < c);
226                         r[0]=l;
227                         if (++dl >= 0) break;
228
229                         l=(c+b[1])&BN_MASK2;
230                         c=(l < c);
231                         r[1]=l;
232                         if (++dl >= 0) break;
233
234                         l=(c+b[2])&BN_MASK2;
235                         c=(l < c);
236                         r[2]=l;
237                         if (++dl >= 0) break;
238
239                         l=(c+b[3])&BN_MASK2;
240                         c=(l < c);
241                         r[3]=l;
242                         if (++dl >= 0) break;
243
244                         save_dl = dl;
245                         b+=4;
246                         r+=4;
247                         }
248                 if (dl < 0)
249                         {
250 #ifdef BN_COUNT
251                         fprintf(stderr, "  bn_add_part_words %d + %d (dl < 0, c == 0)\n", cl, dl);
252 #endif
253                         if (save_dl < dl)
254                                 {
255                                 switch (dl - save_dl)
256                                         {
257                                 case 1:
258                                         r[1] = b[1];
259                                         if (++dl >= 0) break;
260                                 case 2:
261                                         r[2] = b[2];
262                                         if (++dl >= 0) break;
263                                 case 3:
264                                         r[3] = b[3];
265                                         if (++dl >= 0) break;
266                                         }
267                                 b += 4;
268                                 r += 4;
269                                 }
270                         }
271                 if (dl < 0)
272                         {
273 #ifdef BN_COUNT
274                         fprintf(stderr, "  bn_add_part_words %d + %d (dl < 0, copy)\n", cl, dl);
275 #endif
276                         for(;;)
277                                 {
278                                 r[0] = b[0];
279                                 if (++dl >= 0) break;
280                                 r[1] = b[1];
281                                 if (++dl >= 0) break;
282                                 r[2] = b[2];
283                                 if (++dl >= 0) break;
284                                 r[3] = b[3];
285                                 if (++dl >= 0) break;
286
287                                 b += 4;
288                                 r += 4;
289                                 }
290                         }
291                 }
292         else
293                 {
294                 int save_dl = dl;
295 #ifdef BN_COUNT
296                 fprintf(stderr, "  bn_add_part_words %d + %d (dl > 0)\n", cl, dl);
297 #endif
298                 while (c)
299                         {
300                         t=(a[0]+c)&BN_MASK2;
301                         c=(t < c);
302                         r[0]=t;
303                         if (--dl <= 0) break;
304
305                         t=(a[1]+c)&BN_MASK2;
306                         c=(t < c);
307                         r[1]=t;
308                         if (--dl <= 0) break;
309
310                         t=(a[2]+c)&BN_MASK2;
311                         c=(t < c);
312                         r[2]=t;
313                         if (--dl <= 0) break;
314
315                         t=(a[3]+c)&BN_MASK2;
316                         c=(t < c);
317                         r[3]=t;
318                         if (--dl <= 0) break;
319
320                         save_dl = dl;
321                         a+=4;
322                         r+=4;
323                         }
324 #ifdef BN_COUNT
325                 fprintf(stderr, "  bn_add_part_words %d + %d (dl > 0, c == 0)\n", cl, dl);
326 #endif
327                 if (dl > 0)
328                         {
329                         if (save_dl > dl)
330                                 {
331                                 switch (save_dl - dl)
332                                         {
333                                 case 1:
334                                         r[1] = a[1];
335                                         if (--dl <= 0) break;
336                                 case 2:
337                                         r[2] = a[2];
338                                         if (--dl <= 0) break;
339                                 case 3:
340                                         r[3] = a[3];
341                                         if (--dl <= 0) break;
342                                         }
343                                 a += 4;
344                                 r += 4;
345                                 }
346                         }
347                 if (dl > 0)
348                         {
349 #ifdef BN_COUNT
350                         fprintf(stderr, "  bn_add_part_words %d + %d (dl > 0, copy)\n", cl, dl);
351 #endif
352                         for(;;)
353                                 {
354                                 r[0] = a[0];
355                                 if (--dl <= 0) break;
356                                 r[1] = a[1];
357                                 if (--dl <= 0) break;
358                                 r[2] = a[2];
359                                 if (--dl <= 0) break;
360                                 r[3] = a[3];
361                                 if (--dl <= 0) break;
362
363                                 a += 4;
364                                 r += 4;
365                                 }
366                         }
367                 }
368         return c;
369         }
370
371 #ifdef BN_RECURSION
372 /* Karatsuba recursive multiplication algorithm
373  * (cf. Knuth, The Art of Computer Programming, Vol. 2) */
374
375 /* r is 2*n2 words in size,
376  * a and b are both n2 words in size.
377  * n2 must be a power of 2.
378  * We multiply and return the result.
379  * t must be 2*n2 words in size
380  * We calculate
381  * a[0]*b[0]
382  * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
383  * a[1]*b[1]
384  */
385 void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
386         int dna, int dnb, BN_ULONG *t)
387         {
388         int n=n2/2,c1,c2;
389         int tna=n+dna, tnb=n+dnb;
390         unsigned int neg,zero;
391         BN_ULONG ln,lo,*p;
392
393 # ifdef BN_COUNT
394         fprintf(stderr," bn_mul_recursive %d * %d\n",n2,n2);
395 # endif
396 # ifdef BN_MUL_COMBA
397 #  if 0
398         if (n2 == 4)
399                 {
400                 bn_mul_comba4(r,a,b);
401                 return;
402                 }
403 #  endif
404         if (n2 == 8)
405                 {
406                 bn_mul_comba8(r,a,b);
407                 return; 
408                 }
409 # endif /* BN_MUL_COMBA */
410         if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL)
411                 {
412                 /* This should not happen */
413                 bn_mul_normal(r,a,n2,b,n2);
414                 return;
415                 }
416         /* r=(a[0]-a[1])*(b[1]-b[0]) */
417         c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna);
418         c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n);
419         zero=neg=0;
420         switch (c1*3+c2)
421                 {
422         case -4:
423                 bn_sub_part_words(t,      &(a[n]),a,      tna,tna-n); /* - */
424                 bn_sub_part_words(&(t[n]),b,      &(b[n]),tnb,n-tnb); /* - */
425                 break;
426         case -3:
427                 zero=1;
428                 break;
429         case -2:
430                 bn_sub_part_words(t,      &(a[n]),a,      tna,tna-n); /* - */
431                 bn_sub_part_words(&(t[n]),&(b[n]),b,      tnb,tnb-n); /* + */
432                 neg=1;
433                 break;
434         case -1:
435         case 0:
436         case 1:
437                 zero=1;
438                 break;
439         case 2:
440                 bn_sub_part_words(t,      a,      &(a[n]),tna,n-tna); /* + */
441                 bn_sub_part_words(&(t[n]),b,      &(b[n]),tnb,n-tnb); /* - */
442                 neg=1;
443                 break;
444         case 3:
445                 zero=1;
446                 break;
447         case 4:
448                 bn_sub_part_words(t,      a,      &(a[n]),tna,n-tna);
449                 bn_sub_part_words(&(t[n]),&(b[n]),b,      tnb,tnb-n);
450                 break;
451                 }
452
453 # ifdef BN_MUL_COMBA
454         if (n == 4)
455                 {
456                 if (!zero)
457                         bn_mul_comba4(&(t[n2]),t,&(t[n]));
458                 else
459                         memset(&(t[n2]),0,8*sizeof(BN_ULONG));
460                 
461                 bn_mul_comba4(r,a,b);
462                 bn_mul_comba4(&(r[n2]),&(a[n]),&(b[n]));
463                 }
464         else if (n == 8)
465                 {
466                 if (!zero)
467                         bn_mul_comba8(&(t[n2]),t,&(t[n]));
468                 else
469                         memset(&(t[n2]),0,16*sizeof(BN_ULONG));
470                 
471                 bn_mul_comba8(r,a,b);
472                 bn_mul_comba8(&(r[n2]),&(a[n]),&(b[n]));
473                 }
474         else
475 # endif /* BN_MUL_COMBA */
476                 {
477                 p= &(t[n2*2]);
478                 if (!zero)
479                         bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p);
480                 else
481                         memset(&(t[n2]),0,n2*sizeof(BN_ULONG));
482                 bn_mul_recursive(r,a,b,n,0,0,p);
483                 bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),n,dna,dnb,p);
484                 }
485
486         /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
487          * r[10] holds (a[0]*b[0])
488          * r[32] holds (b[1]*b[1])
489          */
490
491         c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
492
493         if (neg) /* if t[32] is negative */
494                 {
495                 c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
496                 }
497         else
498                 {
499                 /* Might have a carry */
500                 c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),t,n2));
501                 }
502
503         /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
504          * r[10] holds (a[0]*b[0])
505          * r[32] holds (b[1]*b[1])
506          * c1 holds the carry bits
507          */
508         c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
509         if (c1)
510                 {
511                 p= &(r[n+n2]);
512                 lo= *p;
513                 ln=(lo+c1)&BN_MASK2;
514                 *p=ln;
515
516                 /* The overflow will stop before we over write
517                  * words we should not overwrite */
518                 if (ln < (BN_ULONG)c1)
519                         {
520                         do      {
521                                 p++;
522                                 lo= *p;
523                                 ln=(lo+1)&BN_MASK2;
524                                 *p=ln;
525                                 } while (ln == 0);
526                         }
527                 }
528         }
529
530 /* n+tn is the word length
531  * t needs to be n*4 is size, as does r */
532 void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n,
533              int tna, int tnb, BN_ULONG *t)
534         {
535         int i,j,n2=n*2;
536         unsigned int c1,c2,neg,zero;
537         BN_ULONG ln,lo,*p;
538
539 # ifdef BN_COUNT
540         fprintf(stderr," bn_mul_part_recursive (%d+%d) * (%d+%d)\n",
541                 tna, n, tnb, n);
542 # endif
543         if (n < 8)
544                 {
545                 bn_mul_normal(r,a,n+tna,b,n+tnb);
546                 return;
547                 }
548
549         /* r=(a[0]-a[1])*(b[1]-b[0]) */
550         c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna);
551         c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n);
552         zero=neg=0;
553         switch (c1*3+c2)
554                 {
555         case -4:
556                 bn_sub_part_words(t,      &(a[n]),a,      tna,tna-n); /* - */
557                 bn_sub_part_words(&(t[n]),b,      &(b[n]),tnb,n-tnb); /* - */
558                 break;
559         case -3:
560                 zero=1;
561                 /* break; */
562         case -2:
563                 bn_sub_part_words(t,      &(a[n]),a,      tna,tna-n); /* - */
564                 bn_sub_part_words(&(t[n]),&(b[n]),b,      tnb,tnb-n); /* + */
565                 neg=1;
566                 break;
567         case -1:
568         case 0:
569         case 1:
570                 zero=1;
571                 /* break; */
572         case 2:
573                 bn_sub_part_words(t,      a,      &(a[n]),tna,n-tna); /* + */
574                 bn_sub_part_words(&(t[n]),b,      &(b[n]),tnb,n-tnb); /* - */
575                 neg=1;
576                 break;
577         case 3:
578                 zero=1;
579                 /* break; */
580         case 4:
581                 bn_sub_part_words(t,      a,      &(a[n]),tna,n-tna);
582                 bn_sub_part_words(&(t[n]),&(b[n]),b,      tnb,tnb-n);
583                 break;
584                 }
585                 /* The zero case isn't yet implemented here. The speedup
586                    would probably be negligible. */
587 # if 0
588         if (n == 4)
589                 {
590                 bn_mul_comba4(&(t[n2]),t,&(t[n]));
591                 bn_mul_comba4(r,a,b);
592                 bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn);
593                 memset(&(r[n2+tn*2]),0,sizeof(BN_ULONG)*(n2-tn*2));
594                 }
595         else
596 # endif
597         if (n == 8)
598                 {
599                 bn_mul_comba8(&(t[n2]),t,&(t[n]));
600                 bn_mul_comba8(r,a,b);
601                 bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb);
602                 memset(&(r[n2+tna+tnb]),0,sizeof(BN_ULONG)*(n2-tna-tnb));
603                 }
604         else
605                 {
606                 p= &(t[n2*2]);
607                 bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p);
608                 bn_mul_recursive(r,a,b,n,0,0,p);
609                 i=n/2;
610                 /* If there is only a bottom half to the number,
611                  * just do it */
612                 if (tna > tnb)
613                         j = tna - i;
614                 else
615                         j = tnb - i;
616                 if (j == 0)
617                         {
618                         bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),
619                                 i,tna-i,tnb-i,p);
620                         memset(&(r[n2+i*2]),0,sizeof(BN_ULONG)*(n2-i*2));
621                         }
622                 else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */
623                                 {
624                                 bn_mul_part_recursive(&(r[n2]),&(a[n]),&(b[n]),
625                                         i,tna-i,tnb-i,p);
626                                 memset(&(r[n2+tna+tnb]),0,
627                                         sizeof(BN_ULONG)*(n2-tna-tnb));
628                                 }
629                 else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
630                         {
631                         memset(&(r[n2]),0,sizeof(BN_ULONG)*n2);
632                         if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL
633                                 && tnb < BN_MUL_RECURSIVE_SIZE_NORMAL)
634                                 {
635                                 bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb);
636                                 }
637                         else
638                                 {
639                                 for (;;)
640                                         {
641                                         i/=2;
642                                         if (i < tna && i < tnb)
643                                                 {
644                                                 bn_mul_part_recursive(&(r[n2]),
645                                                         &(a[n]),&(b[n]),
646                                                         i,tna-i,tnb-i,p);
647                                                 break;
648                                                 }
649                                         else if (i <= tna && i <= tnb)
650                                                 {
651                                                 bn_mul_recursive(&(r[n2]),
652                                                         &(a[n]),&(b[n]),
653                                                         i,tna-i,tnb-i,p);
654                                                 break;
655                                                 }
656                                         }
657                                 }
658                         }
659                 }
660
661         /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
662          * r[10] holds (a[0]*b[0])
663          * r[32] holds (b[1]*b[1])
664          */
665
666         c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
667
668         if (neg) /* if t[32] is negative */
669                 {
670                 c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
671                 }
672         else
673                 {
674                 /* Might have a carry */
675                 c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),t,n2));
676                 }
677
678         /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
679          * r[10] holds (a[0]*b[0])
680          * r[32] holds (b[1]*b[1])
681          * c1 holds the carry bits
682          */
683         c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
684         if (c1)
685                 {
686                 p= &(r[n+n2]);
687                 lo= *p;
688                 ln=(lo+c1)&BN_MASK2;
689                 *p=ln;
690
691                 /* The overflow will stop before we over write
692                  * words we should not overwrite */
693                 if (ln < c1)
694                         {
695                         do      {
696                                 p++;
697                                 lo= *p;
698                                 ln=(lo+1)&BN_MASK2;
699                                 *p=ln;
700                                 } while (ln == 0);
701                         }
702                 }
703         }
704
705 /* a and b must be the same size, which is n2.
706  * r needs to be n2 words and t needs to be n2*2
707  */
708 void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
709              BN_ULONG *t)
710         {
711         int n=n2/2;
712
713 # ifdef BN_COUNT
714         fprintf(stderr," bn_mul_low_recursive %d * %d\n",n2,n2);
715 # endif
716
717         bn_mul_recursive(r,a,b,n,0,0,&(t[0]));
718         if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL)
719                 {
720                 bn_mul_low_recursive(&(t[0]),&(a[0]),&(b[n]),n,&(t[n2]));
721                 bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
722                 bn_mul_low_recursive(&(t[0]),&(a[n]),&(b[0]),n,&(t[n2]));
723                 bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
724                 }
725         else
726                 {
727                 bn_mul_low_normal(&(t[0]),&(a[0]),&(b[n]),n);
728                 bn_mul_low_normal(&(t[n]),&(a[n]),&(b[0]),n);
729                 bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
730                 bn_add_words(&(r[n]),&(r[n]),&(t[n]),n);
731                 }
732         }
733
734 /* a and b must be the same size, which is n2.
735  * r needs to be n2 words and t needs to be n2*2
736  * l is the low words of the output.
737  * t needs to be n2*3
738  */
739 void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2,
740              BN_ULONG *t)
741         {
742         int i,n;
743         int c1,c2;
744         int neg,oneg,zero;
745         BN_ULONG ll,lc,*lp,*mp;
746
747 # ifdef BN_COUNT
748         fprintf(stderr," bn_mul_high %d * %d\n",n2,n2);
749 # endif
750         n=n2/2;
751
752         /* Calculate (al-ah)*(bh-bl) */
753         neg=zero=0;
754         c1=bn_cmp_words(&(a[0]),&(a[n]),n);
755         c2=bn_cmp_words(&(b[n]),&(b[0]),n);
756         switch (c1*3+c2)
757                 {
758         case -4:
759                 bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n);
760                 bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n);
761                 break;
762         case -3:
763                 zero=1;
764                 break;
765         case -2:
766                 bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n);
767                 bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n);
768                 neg=1;
769                 break;
770         case -1:
771         case 0:
772         case 1:
773                 zero=1;
774                 break;
775         case 2:
776                 bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n);
777                 bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n);
778                 neg=1;
779                 break;
780         case 3:
781                 zero=1;
782                 break;
783         case 4:
784                 bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n);
785                 bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n);
786                 break;
787                 }
788                 
789         oneg=neg;
790         /* t[10] = (a[0]-a[1])*(b[1]-b[0]) */
791         /* r[10] = (a[1]*b[1]) */
792 # ifdef BN_MUL_COMBA
793         if (n == 8)
794                 {
795                 bn_mul_comba8(&(t[0]),&(r[0]),&(r[n]));
796                 bn_mul_comba8(r,&(a[n]),&(b[n]));
797                 }
798         else
799 # endif
800                 {
801                 bn_mul_recursive(&(t[0]),&(r[0]),&(r[n]),n,0,0,&(t[n2]));
802                 bn_mul_recursive(r,&(a[n]),&(b[n]),n,0,0,&(t[n2]));
803                 }
804
805         /* s0 == low(al*bl)
806          * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl)
807          * We know s0 and s1 so the only unknown is high(al*bl)
808          * high(al*bl) == s1 - low(ah*bh+s0+(al-ah)*(bh-bl))
809          * high(al*bl) == s1 - (r[0]+l[0]+t[0])
810          */
811         if (l != NULL)
812                 {
813                 lp= &(t[n2+n]);
814                 c1=(int)(bn_add_words(lp,&(r[0]),&(l[0]),n));
815                 }
816         else
817                 {
818                 c1=0;
819                 lp= &(r[0]);
820                 }
821
822         if (neg)
823                 neg=(int)(bn_sub_words(&(t[n2]),lp,&(t[0]),n));
824         else
825                 {
826                 bn_add_words(&(t[n2]),lp,&(t[0]),n);
827                 neg=0;
828                 }
829
830         if (l != NULL)
831                 {
832                 bn_sub_words(&(t[n2+n]),&(l[n]),&(t[n2]),n);
833                 }
834         else
835                 {
836                 lp= &(t[n2+n]);
837                 mp= &(t[n2]);
838                 for (i=0; i<n; i++)
839                         lp[i]=((~mp[i])+1)&BN_MASK2;
840                 }
841
842         /* s[0] = low(al*bl)
843          * t[3] = high(al*bl)
844          * t[10] = (a[0]-a[1])*(b[1]-b[0]) neg is the sign
845          * r[10] = (a[1]*b[1])
846          */
847         /* R[10] = al*bl
848          * R[21] = al*bl + ah*bh + (a[0]-a[1])*(b[1]-b[0])
849          * R[32] = ah*bh
850          */
851         /* R[1]=t[3]+l[0]+r[0](+-)t[0] (have carry/borrow)
852          * R[2]=r[0]+t[3]+r[1](+-)t[1] (have carry/borrow)
853          * R[3]=r[1]+(carry/borrow)
854          */
855         if (l != NULL)
856                 {
857                 lp= &(t[n2]);
858                 c1= (int)(bn_add_words(lp,&(t[n2+n]),&(l[0]),n));
859                 }
860         else
861                 {
862                 lp= &(t[n2+n]);
863                 c1=0;
864                 }
865         c1+=(int)(bn_add_words(&(t[n2]),lp,  &(r[0]),n));
866         if (oneg)
867                 c1-=(int)(bn_sub_words(&(t[n2]),&(t[n2]),&(t[0]),n));
868         else
869                 c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),&(t[0]),n));
870
871         c2 =(int)(bn_add_words(&(r[0]),&(r[0]),&(t[n2+n]),n));
872         c2+=(int)(bn_add_words(&(r[0]),&(r[0]),&(r[n]),n));
873         if (oneg)
874                 c2-=(int)(bn_sub_words(&(r[0]),&(r[0]),&(t[n]),n));
875         else
876                 c2+=(int)(bn_add_words(&(r[0]),&(r[0]),&(t[n]),n));
877         
878         if (c1 != 0) /* Add starting at r[0], could be +ve or -ve */
879                 {
880                 i=0;
881                 if (c1 > 0)
882                         {
883                         lc=c1;
884                         do      {
885                                 ll=(r[i]+lc)&BN_MASK2;
886                                 r[i++]=ll;
887                                 lc=(lc > ll);
888                                 } while (lc);
889                         }
890                 else
891                         {
892                         lc= -c1;
893                         do      {
894                                 ll=r[i];
895                                 r[i++]=(ll-lc)&BN_MASK2;
896                                 lc=(lc > ll);
897                                 } while (lc);
898                         }
899                 }
900         if (c2 != 0) /* Add starting at r[1] */
901                 {
902                 i=n;
903                 if (c2 > 0)
904                         {
905                         lc=c2;
906                         do      {
907                                 ll=(r[i]+lc)&BN_MASK2;
908                                 r[i++]=ll;
909                                 lc=(lc > ll);
910                                 } while (lc);
911                         }
912                 else
913                         {
914                         lc= -c2;
915                         do      {
916                                 ll=r[i];
917                                 r[i++]=(ll-lc)&BN_MASK2;
918                                 lc=(lc > ll);
919                                 } while (lc);
920                         }
921                 }
922         }
923 #endif /* BN_RECURSION */
924
925 int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
926         {
927         int ret=0;
928         int top,al,bl;
929         BIGNUM *rr;
930 #if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
931         int i;
932 #endif
933 #ifdef BN_RECURSION
934         BIGNUM *t;
935         int j,k;
936 #endif
937
938 #ifdef BN_COUNT
939         fprintf(stderr,"BN_mul %d * %d\n",a->top,b->top);
940 #endif
941
942         bn_check_top(a);
943         bn_check_top(b);
944         bn_check_top(r);
945
946         al=a->top;
947         bl=b->top;
948
949         if ((al == 0) || (bl == 0))
950                 {
951                 BN_zero(r);
952                 return(1);
953                 }
954         top=al+bl;
955
956         BN_CTX_start(ctx);
957         if ((r == a) || (r == b))
958                 {
959                 if ((rr = BN_CTX_get(ctx)) == NULL) goto err;
960                 }
961         else
962                 rr = r;
963         rr->neg=a->neg^b->neg;
964
965 #if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
966         i = al-bl;
967 #endif
968 #ifdef BN_MUL_COMBA
969         if (i == 0)
970                 {
971 # if 0
972                 if (al == 4)
973                         {
974                         if (bn_wexpand(rr,8) == NULL) goto err;
975                         rr->top=8;
976                         bn_mul_comba4(rr->d,a->d,b->d);
977                         goto end;
978                         }
979 # endif
980                 if (al == 8)
981                         {
982                         if (bn_wexpand(rr,16) == NULL) goto err;
983                         rr->top=16;
984                         bn_mul_comba8(rr->d,a->d,b->d);
985                         goto end;
986                         }
987                 }
988 #endif /* BN_MUL_COMBA */
989 #ifdef BN_RECURSION
990         if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL))
991                 {
992                 if (i >= -1 && i <= 1)
993                         {
994                         int sav_j =0;
995                         /* Find out the power of two lower or equal
996                            to the longest of the two numbers */
997                         if (i >= 0)
998                                 {
999                                 j = BN_num_bits_word((BN_ULONG)al);
1000                                 }
1001                         if (i == -1)
1002                                 {
1003                                 j = BN_num_bits_word((BN_ULONG)bl);
1004                                 }
1005                         sav_j = j;
1006                         j = 1<<(j-1);
1007                         assert(j <= al || j <= bl);
1008                         k = j+j;
1009                         t = BN_CTX_get(ctx);
1010                         if (al > j || bl > j)
1011                                 {
1012                                 bn_wexpand(t,k*4);
1013                                 bn_wexpand(rr,k*4);
1014                                 bn_mul_part_recursive(rr->d,a->d,b->d,
1015                                         j,al-j,bl-j,t->d);
1016                                 }
1017                         else    /* al <= j || bl <= j */
1018                                 {
1019                                 bn_wexpand(t,k*2);
1020                                 bn_wexpand(rr,k*2);
1021                                 bn_mul_recursive(rr->d,a->d,b->d,
1022                                         j,al-j,bl-j,t->d);
1023                                 }
1024                         rr->top=top;
1025                         goto end;
1026                         }
1027 #if 0
1028                 if (i == 1 && !BN_get_flags(b,BN_FLG_STATIC_DATA))
1029                         {
1030                         BIGNUM *tmp_bn = (BIGNUM *)b;
1031                         bn_wexpand(tmp_bn,al);
1032                         tmp_bn->d[bl]=0;
1033                         bl++;
1034                         i--;
1035                         }
1036                 else if (i == -1 && !BN_get_flags(a,BN_FLG_STATIC_DATA))
1037                         {
1038                         BIGNUM *tmp_bn = (BIGNUM *)a;
1039                         bn_wexpand(tmp_bn,bl);
1040                         tmp_bn->d[al]=0;
1041                         al++;
1042                         i++;
1043                         }
1044                 if (i == 0)
1045                         {
1046                         /* symmetric and > 4 */
1047                         /* 16 or larger */
1048                         j=BN_num_bits_word((BN_ULONG)al);
1049                         j=1<<(j-1);
1050                         k=j+j;
1051                         t = BN_CTX_get(ctx);
1052                         if (al == j) /* exact multiple */
1053                                 {
1054                                 bn_wexpand(t,k*2);
1055                                 bn_wexpand(rr,k*2);
1056                                 bn_mul_recursive(rr->d,a->d,b->d,al,t->d);
1057                                 }
1058                         else
1059                                 {
1060                                 bn_wexpand(t,k*4);
1061                                 bn_wexpand(rr,k*4);
1062                                 bn_mul_part_recursive(rr->d,a->d,b->d,al-j,j,t->d);
1063                                 }
1064                         rr->top=top;
1065                         goto end;
1066                         }
1067 #endif
1068                 }
1069 #endif /* BN_RECURSION */
1070         if (bn_wexpand(rr,top) == NULL) goto err;
1071         rr->top=top;
1072         bn_mul_normal(rr->d,a->d,al,b->d,bl);
1073
1074 #if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
1075 end:
1076 #endif
1077         bn_fix_top(rr);
1078         if (r != rr) BN_copy(r,rr);
1079         ret=1;
1080 err:
1081         BN_CTX_end(ctx);
1082         return(ret);
1083         }
1084
1085 void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
1086         {
1087         BN_ULONG *rr;
1088
1089 #ifdef BN_COUNT
1090         fprintf(stderr," bn_mul_normal %d * %d\n",na,nb);
1091 #endif
1092
1093         if (na < nb)
1094                 {
1095                 int itmp;
1096                 BN_ULONG *ltmp;
1097
1098                 itmp=na; na=nb; nb=itmp;
1099                 ltmp=a;   a=b;   b=ltmp;
1100
1101                 }
1102         rr= &(r[na]);
1103         rr[0]=bn_mul_words(r,a,na,b[0]);
1104
1105         for (;;)
1106                 {
1107                 if (--nb <= 0) return;
1108                 rr[1]=bn_mul_add_words(&(r[1]),a,na,b[1]);
1109                 if (--nb <= 0) return;
1110                 rr[2]=bn_mul_add_words(&(r[2]),a,na,b[2]);
1111                 if (--nb <= 0) return;
1112                 rr[3]=bn_mul_add_words(&(r[3]),a,na,b[3]);
1113                 if (--nb <= 0) return;
1114                 rr[4]=bn_mul_add_words(&(r[4]),a,na,b[4]);
1115                 rr+=4;
1116                 r+=4;
1117                 b+=4;
1118                 }
1119         }
1120
1121 void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
1122         {
1123 #ifdef BN_COUNT
1124         fprintf(stderr," bn_mul_low_normal %d * %d\n",n,n);
1125 #endif
1126         bn_mul_words(r,a,n,b[0]);
1127
1128         for (;;)
1129                 {
1130                 if (--n <= 0) return;
1131                 bn_mul_add_words(&(r[1]),a,n,b[1]);
1132                 if (--n <= 0) return;
1133                 bn_mul_add_words(&(r[2]),a,n,b[2]);
1134                 if (--n <= 0) return;
1135                 bn_mul_add_words(&(r[3]),a,n,b[3]);
1136                 if (--n <= 0) return;
1137                 bn_mul_add_words(&(r[4]),a,n,b[4]);
1138                 r+=4;
1139                 b+=4;
1140                 }
1141         }