eb5d5256137ea5861f3cf71d2f07ae83b75afbe9
[openssl.git] / crypto / bn / bn_mul.c
1 /* crypto/bn/bn_mul.c */
2 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3  * All rights reserved.
4  *
5  * This package is an SSL implementation written
6  * by Eric Young (eay@cryptsoft.com).
7  * The implementation was written so as to conform with Netscapes SSL.
8  * 
9  * This library is free for commercial and non-commercial use as long as
10  * the following conditions are aheared to.  The following conditions
11  * apply to all code found in this distribution, be it the RC4, RSA,
12  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
13  * included with this distribution is covered by the same copyright terms
14  * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15  * 
16  * Copyright remains Eric Young's, and as such any Copyright notices in
17  * the code are not to be removed.
18  * If this package is used in a product, Eric Young should be given attribution
19  * as the author of the parts of the library used.
20  * This can be in the form of a textual message at program startup or
21  * in documentation (online or textual) provided with the package.
22  * 
23  * Redistribution and use in source and binary forms, with or without
24  * modification, are permitted provided that the following conditions
25  * are met:
26  * 1. Redistributions of source code must retain the copyright
27  *    notice, this list of conditions and the following disclaimer.
28  * 2. Redistributions in binary form must reproduce the above copyright
29  *    notice, this list of conditions and the following disclaimer in the
30  *    documentation and/or other materials provided with the distribution.
31  * 3. All advertising materials mentioning features or use of this software
32  *    must display the following acknowledgement:
33  *    "This product includes cryptographic software written by
34  *     Eric Young (eay@cryptsoft.com)"
35  *    The word 'cryptographic' can be left out if the rouines from the library
36  *    being used are not cryptographic related :-).
37  * 4. If you include any Windows specific code (or a derivative thereof) from 
38  *    the apps directory (application code) you must include an acknowledgement:
39  *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40  * 
41  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51  * SUCH DAMAGE.
52  * 
53  * The licence and distribution terms for any publically available version or
54  * derivative of this code cannot be changed.  i.e. this code cannot simply be
55  * copied and put under another distribution licence
56  * [including the GNU Public Licence.]
57  */
58
59 #ifndef BN_DEBUG
60 # undef NDEBUG /* avoid conflicting definitions */
61 # define NDEBUG
62 #endif
63
64 #include <stdio.h>
65 #include <assert.h>
66 #include "cryptlib.h"
67 #include "bn_lcl.h"
68
69 /* Here follows specialised variants of bn_add_words() and
70    bn_sub_words().  They have the property performing operations on
71    arrays of different sizes.  The sizes of those arrays is expressed through
72    cl, which is the common length ( basicall, min(len(a),len(b)) ), and dl,
73    which is the delta between the two lengths, calculated as len(a)-len(b).
74    All lengths are the number of BN_ULONGs...  For the operations that require
75    a result array as parameter, it must have the length cl+abs(dl).
76    These functions should probably end up in bn_asm.c as soon as there are
77    assembler counterparts for the systems that use assembler files.  */
78
79 BN_ULONG bn_sub_part_words(BN_ULONG *r,
80         const BN_ULONG *a, const BN_ULONG *b,
81         int cl, int dl)
82         {
83         BN_ULONG c, t;
84
85         assert(cl >= 0);
86         c = bn_sub_words(r, a, b, cl);
87
88         if (dl == 0)
89                 return c;
90
91         r += cl;
92         a += cl;
93         b += cl;
94
95         if (dl < 0)
96                 {
97 #ifdef BN_COUNT
98                 fprintf(stderr, "  bn_sub_part_words %d + %d (dl < 0, c = %d)\n", cl, dl, c);
99 #endif
100                 for (;;)
101                         {
102                         t = b[0];
103                         r[0] = (0-t-c)&BN_MASK2;
104                         if (t != 0) c=1;
105                         if (++dl >= 0) break;
106
107                         t = b[1];
108                         r[1] = (0-t-c)&BN_MASK2;
109                         if (t != 0) c=1;
110                         if (++dl >= 0) break;
111
112                         t = b[2];
113                         r[2] = (0-t-c)&BN_MASK2;
114                         if (t != 0) c=1;
115                         if (++dl >= 0) break;
116
117                         t = b[3];
118                         r[3] = (0-t-c)&BN_MASK2;
119                         if (t != 0) c=1;
120                         if (++dl >= 0) break;
121
122                         b += 4;
123                         r += 4;
124                         }
125                 }
126         else
127                 {
128                 int save_dl = dl;
129 #ifdef BN_COUNT
130                 fprintf(stderr, "  bn_sub_part_words %d + %d (dl > 0, c = %d)\n", cl, dl, c);
131 #endif
132                 while(c)
133                         {
134                         t = a[0];
135                         r[0] = (t-c)&BN_MASK2;
136                         if (t != 0) c=0;
137                         if (--dl <= 0) break;
138
139                         t = a[1];
140                         r[1] = (t-c)&BN_MASK2;
141                         if (t != 0) c=0;
142                         if (--dl <= 0) break;
143
144                         t = a[2];
145                         r[2] = (t-c)&BN_MASK2;
146                         if (t != 0) c=0;
147                         if (--dl <= 0) break;
148
149                         t = a[3];
150                         r[3] = (t-c)&BN_MASK2;
151                         if (t != 0) c=0;
152                         if (--dl <= 0) break;
153
154                         save_dl = dl;
155                         a += 4;
156                         r += 4;
157                         }
158                 if (dl > 0)
159                         {
160 #ifdef BN_COUNT
161                         fprintf(stderr, "  bn_sub_part_words %d + %d (dl > 0, c == 0)\n", cl, dl);
162 #endif
163                         if (save_dl > dl)
164                                 {
165                                 switch (save_dl - dl)
166                                         {
167                                 case 1:
168                                         r[1] = a[1];
169                                         if (--dl <= 0) break;
170                                 case 2:
171                                         r[2] = a[2];
172                                         if (--dl <= 0) break;
173                                 case 3:
174                                         r[3] = a[3];
175                                         if (--dl <= 0) break;
176                                         }
177                                 a += 4;
178                                 r += 4;
179                                 }
180                         }
181                 if (dl > 0)
182                         {
183 #ifdef BN_COUNT
184                         fprintf(stderr, "  bn_sub_part_words %d + %d (dl > 0, copy)\n", cl, dl);
185 #endif
186                         for(;;)
187                                 {
188                                 r[0] = a[0];
189                                 if (--dl <= 0) break;
190                                 r[1] = a[1];
191                                 if (--dl <= 0) break;
192                                 r[2] = a[2];
193                                 if (--dl <= 0) break;
194                                 r[3] = a[3];
195                                 if (--dl <= 0) break;
196
197                                 a += 4;
198                                 r += 4;
199                                 }
200                         }
201                 }
202         return c;
203         }
204
205 BN_ULONG bn_add_part_words(BN_ULONG *r,
206         const BN_ULONG *a, const BN_ULONG *b,
207         int cl, int dl)
208         {
209         BN_ULONG c, l, t;
210
211         assert(cl >= 0);
212         c = bn_add_words(r, a, b, cl);
213
214         if (dl == 0)
215                 return c;
216
217         r += cl;
218         a += cl;
219         b += cl;
220
221         if (dl < 0)
222                 {
223                 int save_dl = dl;
224 #ifdef BN_COUNT
225                 fprintf(stderr, "  bn_add_part_words %d + %d (dl < 0, c = %d)\n", cl, dl, c);
226 #endif
227                 while (c)
228                         {
229                         l=(c+b[0])&BN_MASK2;
230                         c=(l < c);
231                         r[0]=l;
232                         if (++dl >= 0) break;
233
234                         l=(c+b[1])&BN_MASK2;
235                         c=(l < c);
236                         r[1]=l;
237                         if (++dl >= 0) break;
238
239                         l=(c+b[2])&BN_MASK2;
240                         c=(l < c);
241                         r[2]=l;
242                         if (++dl >= 0) break;
243
244                         l=(c+b[3])&BN_MASK2;
245                         c=(l < c);
246                         r[3]=l;
247                         if (++dl >= 0) break;
248
249                         save_dl = dl;
250                         b+=4;
251                         r+=4;
252                         }
253                 if (dl < 0)
254                         {
255 #ifdef BN_COUNT
256                         fprintf(stderr, "  bn_add_part_words %d + %d (dl < 0, c == 0)\n", cl, dl);
257 #endif
258                         if (save_dl < dl)
259                                 {
260                                 switch (dl - save_dl)
261                                         {
262                                 case 1:
263                                         r[1] = b[1];
264                                         if (++dl >= 0) break;
265                                 case 2:
266                                         r[2] = b[2];
267                                         if (++dl >= 0) break;
268                                 case 3:
269                                         r[3] = b[3];
270                                         if (++dl >= 0) break;
271                                         }
272                                 b += 4;
273                                 r += 4;
274                                 }
275                         }
276                 if (dl < 0)
277                         {
278 #ifdef BN_COUNT
279                         fprintf(stderr, "  bn_add_part_words %d + %d (dl < 0, copy)\n", cl, dl);
280 #endif
281                         for(;;)
282                                 {
283                                 r[0] = b[0];
284                                 if (++dl >= 0) break;
285                                 r[1] = b[1];
286                                 if (++dl >= 0) break;
287                                 r[2] = b[2];
288                                 if (++dl >= 0) break;
289                                 r[3] = b[3];
290                                 if (++dl >= 0) break;
291
292                                 b += 4;
293                                 r += 4;
294                                 }
295                         }
296                 }
297         else
298                 {
299                 int save_dl = dl;
300 #ifdef BN_COUNT
301                 fprintf(stderr, "  bn_add_part_words %d + %d (dl > 0)\n", cl, dl);
302 #endif
303                 while (c)
304                         {
305                         t=(a[0]+c)&BN_MASK2;
306                         c=(t < c);
307                         r[0]=t;
308                         if (--dl <= 0) break;
309
310                         t=(a[1]+c)&BN_MASK2;
311                         c=(t < c);
312                         r[1]=t;
313                         if (--dl <= 0) break;
314
315                         t=(a[2]+c)&BN_MASK2;
316                         c=(t < c);
317                         r[2]=t;
318                         if (--dl <= 0) break;
319
320                         t=(a[3]+c)&BN_MASK2;
321                         c=(t < c);
322                         r[3]=t;
323                         if (--dl <= 0) break;
324
325                         save_dl = dl;
326                         a+=4;
327                         r+=4;
328                         }
329 #ifdef BN_COUNT
330                 fprintf(stderr, "  bn_add_part_words %d + %d (dl > 0, c == 0)\n", cl, dl);
331 #endif
332                 if (dl > 0)
333                         {
334                         if (save_dl > dl)
335                                 {
336                                 switch (save_dl - dl)
337                                         {
338                                 case 1:
339                                         r[1] = a[1];
340                                         if (--dl <= 0) break;
341                                 case 2:
342                                         r[2] = a[2];
343                                         if (--dl <= 0) break;
344                                 case 3:
345                                         r[3] = a[3];
346                                         if (--dl <= 0) break;
347                                         }
348                                 a += 4;
349                                 r += 4;
350                                 }
351                         }
352                 if (dl > 0)
353                         {
354 #ifdef BN_COUNT
355                         fprintf(stderr, "  bn_add_part_words %d + %d (dl > 0, copy)\n", cl, dl);
356 #endif
357                         for(;;)
358                                 {
359                                 r[0] = a[0];
360                                 if (--dl <= 0) break;
361                                 r[1] = a[1];
362                                 if (--dl <= 0) break;
363                                 r[2] = a[2];
364                                 if (--dl <= 0) break;
365                                 r[3] = a[3];
366                                 if (--dl <= 0) break;
367
368                                 a += 4;
369                                 r += 4;
370                                 }
371                         }
372                 }
373         return c;
374         }
375
376 #ifdef BN_RECURSION
377 /* Karatsuba recursive multiplication algorithm
378  * (cf. Knuth, The Art of Computer Programming, Vol. 2) */
379
380 /* r is 2*n2 words in size,
381  * a and b are both n2 words in size.
382  * n2 must be a power of 2.
383  * We multiply and return the result.
384  * t must be 2*n2 words in size
385  * We calculate
386  * a[0]*b[0]
387  * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
388  * a[1]*b[1]
389  */
390 void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
391         int dna, int dnb, BN_ULONG *t)
392         {
393         int n=n2/2,c1,c2;
394         int tna=n+dna, tnb=n+dnb;
395         unsigned int neg,zero;
396         BN_ULONG ln,lo,*p;
397
398 # ifdef BN_COUNT
399         fprintf(stderr," bn_mul_recursive %d * %d\n",n2,n2);
400 # endif
401 # ifdef BN_MUL_COMBA
402 #  if 0
403         if (n2 == 4)
404                 {
405                 bn_mul_comba4(r,a,b);
406                 return;
407                 }
408 #  endif
409         if (n2 == 8)
410                 {
411                 bn_mul_comba8(r,a,b);
412                 return; 
413                 }
414 # endif /* BN_MUL_COMBA */
415         if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL)
416                 {
417                 /* This should not happen */
418                 bn_mul_normal(r,a,n2,b,n2);
419                 return;
420                 }
421         /* r=(a[0]-a[1])*(b[1]-b[0]) */
422         c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna);
423         c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n);
424         zero=neg=0;
425         switch (c1*3+c2)
426                 {
427         case -4:
428                 bn_sub_part_words(t,      &(a[n]),a,      tna,tna-n); /* - */
429                 bn_sub_part_words(&(t[n]),b,      &(b[n]),tnb,n-tnb); /* - */
430                 break;
431         case -3:
432                 zero=1;
433                 break;
434         case -2:
435                 bn_sub_part_words(t,      &(a[n]),a,      tna,tna-n); /* - */
436                 bn_sub_part_words(&(t[n]),&(b[n]),b,      tnb,tnb-n); /* + */
437                 neg=1;
438                 break;
439         case -1:
440         case 0:
441         case 1:
442                 zero=1;
443                 break;
444         case 2:
445                 bn_sub_part_words(t,      a,      &(a[n]),tna,n-tna); /* + */
446                 bn_sub_part_words(&(t[n]),b,      &(b[n]),tnb,n-tnb); /* - */
447                 neg=1;
448                 break;
449         case 3:
450                 zero=1;
451                 break;
452         case 4:
453                 bn_sub_part_words(t,      a,      &(a[n]),tna,n-tna);
454                 bn_sub_part_words(&(t[n]),&(b[n]),b,      tnb,tnb-n);
455                 break;
456                 }
457
458 # ifdef BN_MUL_COMBA
459         if (n == 4)
460                 {
461                 if (!zero)
462                         bn_mul_comba4(&(t[n2]),t,&(t[n]));
463                 else
464                         memset(&(t[n2]),0,8*sizeof(BN_ULONG));
465                 
466                 bn_mul_comba4(r,a,b);
467                 bn_mul_comba4(&(r[n2]),&(a[n]),&(b[n]));
468                 }
469         else if (n == 8)
470                 {
471                 if (!zero)
472                         bn_mul_comba8(&(t[n2]),t,&(t[n]));
473                 else
474                         memset(&(t[n2]),0,16*sizeof(BN_ULONG));
475                 
476                 bn_mul_comba8(r,a,b);
477                 bn_mul_comba8(&(r[n2]),&(a[n]),&(b[n]));
478                 }
479         else
480 # endif /* BN_MUL_COMBA */
481                 {
482                 p= &(t[n2*2]);
483                 if (!zero)
484                         bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p);
485                 else
486                         memset(&(t[n2]),0,n2*sizeof(BN_ULONG));
487                 bn_mul_recursive(r,a,b,n,0,0,p);
488                 bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),n,dna,dnb,p);
489                 }
490
491         /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
492          * r[10] holds (a[0]*b[0])
493          * r[32] holds (b[1]*b[1])
494          */
495
496         c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
497
498         if (neg) /* if t[32] is negative */
499                 {
500                 c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
501                 }
502         else
503                 {
504                 /* Might have a carry */
505                 c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),t,n2));
506                 }
507
508         /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
509          * r[10] holds (a[0]*b[0])
510          * r[32] holds (b[1]*b[1])
511          * c1 holds the carry bits
512          */
513         c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
514         if (c1)
515                 {
516                 p= &(r[n+n2]);
517                 lo= *p;
518                 ln=(lo+c1)&BN_MASK2;
519                 *p=ln;
520
521                 /* The overflow will stop before we over write
522                  * words we should not overwrite */
523                 if (ln < (BN_ULONG)c1)
524                         {
525                         do      {
526                                 p++;
527                                 lo= *p;
528                                 ln=(lo+1)&BN_MASK2;
529                                 *p=ln;
530                                 } while (ln == 0);
531                         }
532                 }
533         }
534
535 /* n+tn is the word length
536  * t needs to be n*4 is size, as does r */
537 void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n,
538              int tna, int tnb, BN_ULONG *t)
539         {
540         int i,j,n2=n*2;
541         unsigned int c1,c2,neg,zero;
542         BN_ULONG ln,lo,*p;
543
544 # ifdef BN_COUNT
545         fprintf(stderr," bn_mul_part_recursive (%d+%d) * (%d+%d)\n",
546                 tna, n, tnb, n);
547 # endif
548         if (n < 8)
549                 {
550                 bn_mul_normal(r,a,n+tna,b,n+tnb);
551                 return;
552                 }
553
554         /* r=(a[0]-a[1])*(b[1]-b[0]) */
555         c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna);
556         c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n);
557         zero=neg=0;
558         switch (c1*3+c2)
559                 {
560         case -4:
561                 bn_sub_part_words(t,      &(a[n]),a,      tna,tna-n); /* - */
562                 bn_sub_part_words(&(t[n]),b,      &(b[n]),tnb,n-tnb); /* - */
563                 break;
564         case -3:
565                 zero=1;
566                 /* break; */
567         case -2:
568                 bn_sub_part_words(t,      &(a[n]),a,      tna,tna-n); /* - */
569                 bn_sub_part_words(&(t[n]),&(b[n]),b,      tnb,tnb-n); /* + */
570                 neg=1;
571                 break;
572         case -1:
573         case 0:
574         case 1:
575                 zero=1;
576                 /* break; */
577         case 2:
578                 bn_sub_part_words(t,      a,      &(a[n]),tna,n-tna); /* + */
579                 bn_sub_part_words(&(t[n]),b,      &(b[n]),tnb,n-tnb); /* - */
580                 neg=1;
581                 break;
582         case 3:
583                 zero=1;
584                 /* break; */
585         case 4:
586                 bn_sub_part_words(t,      a,      &(a[n]),tna,n-tna);
587                 bn_sub_part_words(&(t[n]),&(b[n]),b,      tnb,tnb-n);
588                 break;
589                 }
590                 /* The zero case isn't yet implemented here. The speedup
591                    would probably be negligible. */
592 # if 0
593         if (n == 4)
594                 {
595                 bn_mul_comba4(&(t[n2]),t,&(t[n]));
596                 bn_mul_comba4(r,a,b);
597                 bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn);
598                 memset(&(r[n2+tn*2]),0,sizeof(BN_ULONG)*(n2-tn*2));
599                 }
600         else
601 # endif
602         if (n == 8)
603                 {
604                 bn_mul_comba8(&(t[n2]),t,&(t[n]));
605                 bn_mul_comba8(r,a,b);
606                 bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb);
607                 memset(&(r[n2+tna+tnb]),0,sizeof(BN_ULONG)*(n2-tna-tnb));
608                 }
609         else
610                 {
611                 p= &(t[n2*2]);
612                 bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p);
613                 bn_mul_recursive(r,a,b,n,0,0,p);
614                 i=n/2;
615                 /* If there is only a bottom half to the number,
616                  * just do it */
617                 if (tna > tnb)
618                         j = tna - i;
619                 else
620                         j = tnb - i;
621                 if (j == 0)
622                         {
623                         bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),
624                                 i,tna-i,tnb-i,p);
625                         memset(&(r[n2+i*2]),0,sizeof(BN_ULONG)*(n2-i*2));
626                         }
627                 else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */
628                                 {
629                                 bn_mul_part_recursive(&(r[n2]),&(a[n]),&(b[n]),
630                                         i,tna-i,tnb-i,p);
631                                 memset(&(r[n2+tna+tnb]),0,
632                                         sizeof(BN_ULONG)*(n2-tna-tnb));
633                                 }
634                 else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
635                         {
636                         memset(&(r[n2]),0,sizeof(BN_ULONG)*n2);
637                         if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL
638                                 && tnb < BN_MUL_RECURSIVE_SIZE_NORMAL)
639                                 {
640                                 bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb);
641                                 }
642                         else
643                                 {
644                                 for (;;)
645                                         {
646                                         i/=2;
647                                         if (i < tna && i < tnb)
648                                                 {
649                                                 bn_mul_part_recursive(&(r[n2]),
650                                                         &(a[n]),&(b[n]),
651                                                         i,tna-i,tnb-i,p);
652                                                 break;
653                                                 }
654                                         else if (i <= tna && i <= tnb)
655                                                 {
656                                                 bn_mul_recursive(&(r[n2]),
657                                                         &(a[n]),&(b[n]),
658                                                         i,tna-i,tnb-i,p);
659                                                 break;
660                                                 }
661                                         }
662                                 }
663                         }
664                 }
665
666         /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
667          * r[10] holds (a[0]*b[0])
668          * r[32] holds (b[1]*b[1])
669          */
670
671         c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
672
673         if (neg) /* if t[32] is negative */
674                 {
675                 c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
676                 }
677         else
678                 {
679                 /* Might have a carry */
680                 c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),t,n2));
681                 }
682
683         /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
684          * r[10] holds (a[0]*b[0])
685          * r[32] holds (b[1]*b[1])
686          * c1 holds the carry bits
687          */
688         c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
689         if (c1)
690                 {
691                 p= &(r[n+n2]);
692                 lo= *p;
693                 ln=(lo+c1)&BN_MASK2;
694                 *p=ln;
695
696                 /* The overflow will stop before we over write
697                  * words we should not overwrite */
698                 if (ln < c1)
699                         {
700                         do      {
701                                 p++;
702                                 lo= *p;
703                                 ln=(lo+1)&BN_MASK2;
704                                 *p=ln;
705                                 } while (ln == 0);
706                         }
707                 }
708         }
709
710 /* a and b must be the same size, which is n2.
711  * r needs to be n2 words and t needs to be n2*2
712  */
713 void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
714              BN_ULONG *t)
715         {
716         int n=n2/2;
717
718 # ifdef BN_COUNT
719         fprintf(stderr," bn_mul_low_recursive %d * %d\n",n2,n2);
720 # endif
721
722         bn_mul_recursive(r,a,b,n,0,0,&(t[0]));
723         if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL)
724                 {
725                 bn_mul_low_recursive(&(t[0]),&(a[0]),&(b[n]),n,&(t[n2]));
726                 bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
727                 bn_mul_low_recursive(&(t[0]),&(a[n]),&(b[0]),n,&(t[n2]));
728                 bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
729                 }
730         else
731                 {
732                 bn_mul_low_normal(&(t[0]),&(a[0]),&(b[n]),n);
733                 bn_mul_low_normal(&(t[n]),&(a[n]),&(b[0]),n);
734                 bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
735                 bn_add_words(&(r[n]),&(r[n]),&(t[n]),n);
736                 }
737         }
738
739 /* a and b must be the same size, which is n2.
740  * r needs to be n2 words and t needs to be n2*2
741  * l is the low words of the output.
742  * t needs to be n2*3
743  */
744 void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2,
745              BN_ULONG *t)
746         {
747         int i,n;
748         int c1,c2;
749         int neg,oneg,zero;
750         BN_ULONG ll,lc,*lp,*mp;
751
752 # ifdef BN_COUNT
753         fprintf(stderr," bn_mul_high %d * %d\n",n2,n2);
754 # endif
755         n=n2/2;
756
757         /* Calculate (al-ah)*(bh-bl) */
758         neg=zero=0;
759         c1=bn_cmp_words(&(a[0]),&(a[n]),n);
760         c2=bn_cmp_words(&(b[n]),&(b[0]),n);
761         switch (c1*3+c2)
762                 {
763         case -4:
764                 bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n);
765                 bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n);
766                 break;
767         case -3:
768                 zero=1;
769                 break;
770         case -2:
771                 bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n);
772                 bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n);
773                 neg=1;
774                 break;
775         case -1:
776         case 0:
777         case 1:
778                 zero=1;
779                 break;
780         case 2:
781                 bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n);
782                 bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n);
783                 neg=1;
784                 break;
785         case 3:
786                 zero=1;
787                 break;
788         case 4:
789                 bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n);
790                 bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n);
791                 break;
792                 }
793                 
794         oneg=neg;
795         /* t[10] = (a[0]-a[1])*(b[1]-b[0]) */
796         /* r[10] = (a[1]*b[1]) */
797 # ifdef BN_MUL_COMBA
798         if (n == 8)
799                 {
800                 bn_mul_comba8(&(t[0]),&(r[0]),&(r[n]));
801                 bn_mul_comba8(r,&(a[n]),&(b[n]));
802                 }
803         else
804 # endif
805                 {
806                 bn_mul_recursive(&(t[0]),&(r[0]),&(r[n]),n,0,0,&(t[n2]));
807                 bn_mul_recursive(r,&(a[n]),&(b[n]),n,0,0,&(t[n2]));
808                 }
809
810         /* s0 == low(al*bl)
811          * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl)
812          * We know s0 and s1 so the only unknown is high(al*bl)
813          * high(al*bl) == s1 - low(ah*bh+s0+(al-ah)*(bh-bl))
814          * high(al*bl) == s1 - (r[0]+l[0]+t[0])
815          */
816         if (l != NULL)
817                 {
818                 lp= &(t[n2+n]);
819                 c1=(int)(bn_add_words(lp,&(r[0]),&(l[0]),n));
820                 }
821         else
822                 {
823                 c1=0;
824                 lp= &(r[0]);
825                 }
826
827         if (neg)
828                 neg=(int)(bn_sub_words(&(t[n2]),lp,&(t[0]),n));
829         else
830                 {
831                 bn_add_words(&(t[n2]),lp,&(t[0]),n);
832                 neg=0;
833                 }
834
835         if (l != NULL)
836                 {
837                 bn_sub_words(&(t[n2+n]),&(l[n]),&(t[n2]),n);
838                 }
839         else
840                 {
841                 lp= &(t[n2+n]);
842                 mp= &(t[n2]);
843                 for (i=0; i<n; i++)
844                         lp[i]=((~mp[i])+1)&BN_MASK2;
845                 }
846
847         /* s[0] = low(al*bl)
848          * t[3] = high(al*bl)
849          * t[10] = (a[0]-a[1])*(b[1]-b[0]) neg is the sign
850          * r[10] = (a[1]*b[1])
851          */
852         /* R[10] = al*bl
853          * R[21] = al*bl + ah*bh + (a[0]-a[1])*(b[1]-b[0])
854          * R[32] = ah*bh
855          */
856         /* R[1]=t[3]+l[0]+r[0](+-)t[0] (have carry/borrow)
857          * R[2]=r[0]+t[3]+r[1](+-)t[1] (have carry/borrow)
858          * R[3]=r[1]+(carry/borrow)
859          */
860         if (l != NULL)
861                 {
862                 lp= &(t[n2]);
863                 c1= (int)(bn_add_words(lp,&(t[n2+n]),&(l[0]),n));
864                 }
865         else
866                 {
867                 lp= &(t[n2+n]);
868                 c1=0;
869                 }
870         c1+=(int)(bn_add_words(&(t[n2]),lp,  &(r[0]),n));
871         if (oneg)
872                 c1-=(int)(bn_sub_words(&(t[n2]),&(t[n2]),&(t[0]),n));
873         else
874                 c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),&(t[0]),n));
875
876         c2 =(int)(bn_add_words(&(r[0]),&(r[0]),&(t[n2+n]),n));
877         c2+=(int)(bn_add_words(&(r[0]),&(r[0]),&(r[n]),n));
878         if (oneg)
879                 c2-=(int)(bn_sub_words(&(r[0]),&(r[0]),&(t[n]),n));
880         else
881                 c2+=(int)(bn_add_words(&(r[0]),&(r[0]),&(t[n]),n));
882         
883         if (c1 != 0) /* Add starting at r[0], could be +ve or -ve */
884                 {
885                 i=0;
886                 if (c1 > 0)
887                         {
888                         lc=c1;
889                         do      {
890                                 ll=(r[i]+lc)&BN_MASK2;
891                                 r[i++]=ll;
892                                 lc=(lc > ll);
893                                 } while (lc);
894                         }
895                 else
896                         {
897                         lc= -c1;
898                         do      {
899                                 ll=r[i];
900                                 r[i++]=(ll-lc)&BN_MASK2;
901                                 lc=(lc > ll);
902                                 } while (lc);
903                         }
904                 }
905         if (c2 != 0) /* Add starting at r[1] */
906                 {
907                 i=n;
908                 if (c2 > 0)
909                         {
910                         lc=c2;
911                         do      {
912                                 ll=(r[i]+lc)&BN_MASK2;
913                                 r[i++]=ll;
914                                 lc=(lc > ll);
915                                 } while (lc);
916                         }
917                 else
918                         {
919                         lc= -c2;
920                         do      {
921                                 ll=r[i];
922                                 r[i++]=(ll-lc)&BN_MASK2;
923                                 lc=(lc > ll);
924                                 } while (lc);
925                         }
926                 }
927         }
928 #endif /* BN_RECURSION */
929
930 int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
931         {
932         int ret=0;
933         int top,al,bl;
934         BIGNUM *rr;
935 #if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
936         int i;
937 #endif
938 #ifdef BN_RECURSION
939         BIGNUM *t;
940         int j,k;
941 #endif
942
943 #ifdef BN_COUNT
944         fprintf(stderr,"BN_mul %d * %d\n",a->top,b->top);
945 #endif
946
947         bn_check_top(a);
948         bn_check_top(b);
949         bn_check_top(r);
950
951         al=a->top;
952         bl=b->top;
953
954         if ((al == 0) || (bl == 0))
955                 {
956                 BN_zero(r);
957                 return(1);
958                 }
959         top=al+bl;
960
961         BN_CTX_start(ctx);
962         if ((r == a) || (r == b))
963                 {
964                 if ((rr = BN_CTX_get(ctx)) == NULL) goto err;
965                 }
966         else
967                 rr = r;
968         rr->neg=a->neg^b->neg;
969
970 #if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
971         i = al-bl;
972 #endif
973 #ifdef BN_MUL_COMBA
974         if (i == 0)
975                 {
976 # if 0
977                 if (al == 4)
978                         {
979                         if (bn_wexpand(rr,8) == NULL) goto err;
980                         rr->top=8;
981                         bn_mul_comba4(rr->d,a->d,b->d);
982                         goto end;
983                         }
984 # endif
985                 if (al == 8)
986                         {
987                         if (bn_wexpand(rr,16) == NULL) goto err;
988                         rr->top=16;
989                         bn_mul_comba8(rr->d,a->d,b->d);
990                         goto end;
991                         }
992                 }
993 #endif /* BN_MUL_COMBA */
994 #ifdef BN_RECURSION
995         if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL))
996                 {
997                 if (i >= -1 && i <= 1)
998                         {
999                         int sav_j =0;
1000                         /* Find out the power of two lower or equal
1001                            to the longest of the two numbers */
1002                         if (i >= 0)
1003                                 {
1004                                 j = BN_num_bits_word((BN_ULONG)al);
1005                                 }
1006                         if (i == -1)
1007                                 {
1008                                 j = BN_num_bits_word((BN_ULONG)bl);
1009                                 }
1010                         sav_j = j;
1011                         j = 1<<(j-1);
1012                         assert(j <= al || j <= bl);
1013                         k = j+j;
1014                         t = BN_CTX_get(ctx);
1015                         if (al > j || bl > j)
1016                                 {
1017                                 bn_wexpand(t,k*4);
1018                                 bn_wexpand(rr,k*4);
1019                                 bn_mul_part_recursive(rr->d,a->d,b->d,
1020                                         j,al-j,bl-j,t->d);
1021                                 }
1022                         else    /* al <= j || bl <= j */
1023                                 {
1024                                 bn_wexpand(t,k*2);
1025                                 bn_wexpand(rr,k*2);
1026                                 bn_mul_recursive(rr->d,a->d,b->d,
1027                                         j,al-j,bl-j,t->d);
1028                                 }
1029                         rr->top=top;
1030                         goto end;
1031                         }
1032 #if 0
1033                 if (i == 1 && !BN_get_flags(b,BN_FLG_STATIC_DATA))
1034                         {
1035                         BIGNUM *tmp_bn = (BIGNUM *)b;
1036                         bn_wexpand(tmp_bn,al);
1037                         tmp_bn->d[bl]=0;
1038                         bl++;
1039                         i--;
1040                         }
1041                 else if (i == -1 && !BN_get_flags(a,BN_FLG_STATIC_DATA))
1042                         {
1043                         BIGNUM *tmp_bn = (BIGNUM *)a;
1044                         bn_wexpand(tmp_bn,bl);
1045                         tmp_bn->d[al]=0;
1046                         al++;
1047                         i++;
1048                         }
1049                 if (i == 0)
1050                         {
1051                         /* symmetric and > 4 */
1052                         /* 16 or larger */
1053                         j=BN_num_bits_word((BN_ULONG)al);
1054                         j=1<<(j-1);
1055                         k=j+j;
1056                         t = BN_CTX_get(ctx);
1057                         if (al == j) /* exact multiple */
1058                                 {
1059                                 bn_wexpand(t,k*2);
1060                                 bn_wexpand(rr,k*2);
1061                                 bn_mul_recursive(rr->d,a->d,b->d,al,t->d);
1062                                 }
1063                         else
1064                                 {
1065                                 bn_wexpand(t,k*4);
1066                                 bn_wexpand(rr,k*4);
1067                                 bn_mul_part_recursive(rr->d,a->d,b->d,al-j,j,t->d);
1068                                 }
1069                         rr->top=top;
1070                         goto end;
1071                         }
1072 #endif
1073                 }
1074 #endif /* BN_RECURSION */
1075         if (bn_wexpand(rr,top) == NULL) goto err;
1076         rr->top=top;
1077         bn_mul_normal(rr->d,a->d,al,b->d,bl);
1078
1079 #if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
1080 end:
1081 #endif
1082         bn_fix_top(rr);
1083         if (r != rr) BN_copy(r,rr);
1084         ret=1;
1085 err:
1086         BN_CTX_end(ctx);
1087         return(ret);
1088         }
1089
1090 void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
1091         {
1092         BN_ULONG *rr;
1093
1094 #ifdef BN_COUNT
1095         fprintf(stderr," bn_mul_normal %d * %d\n",na,nb);
1096 #endif
1097
1098         if (na < nb)
1099                 {
1100                 int itmp;
1101                 BN_ULONG *ltmp;
1102
1103                 itmp=na; na=nb; nb=itmp;
1104                 ltmp=a;   a=b;   b=ltmp;
1105
1106                 }
1107         rr= &(r[na]);
1108         rr[0]=bn_mul_words(r,a,na,b[0]);
1109
1110         for (;;)
1111                 {
1112                 if (--nb <= 0) return;
1113                 rr[1]=bn_mul_add_words(&(r[1]),a,na,b[1]);
1114                 if (--nb <= 0) return;
1115                 rr[2]=bn_mul_add_words(&(r[2]),a,na,b[2]);
1116                 if (--nb <= 0) return;
1117                 rr[3]=bn_mul_add_words(&(r[3]),a,na,b[3]);
1118                 if (--nb <= 0) return;
1119                 rr[4]=bn_mul_add_words(&(r[4]),a,na,b[4]);
1120                 rr+=4;
1121                 r+=4;
1122                 b+=4;
1123                 }
1124         }
1125
1126 void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
1127         {
1128 #ifdef BN_COUNT
1129         fprintf(stderr," bn_mul_low_normal %d * %d\n",n,n);
1130 #endif
1131         bn_mul_words(r,a,n,b[0]);
1132
1133         for (;;)
1134                 {
1135                 if (--n <= 0) return;
1136                 bn_mul_add_words(&(r[1]),a,n,b[1]);
1137                 if (--n <= 0) return;
1138                 bn_mul_add_words(&(r[2]),a,n,b[2]);
1139                 if (--n <= 0) return;
1140                 bn_mul_add_words(&(r[3]),a,n,b[3]);
1141                 if (--n <= 0) return;
1142                 bn_mul_add_words(&(r[4]),a,n,b[4]);
1143                 r+=4;
1144                 b+=4;
1145                 }
1146         }