4f4e9e329989d5b415bba73dd14eda0a245e7f83
[openssl.git] / crypto / bn / bn_exp2.c
1 #include <stdio.h>
2 #include "cryptlib.h"
3 #include "bn_lcl.h"
4
5 /* I've done some timing with different table sizes.
6  * The main hassle is that even with bits set at 3, this requires
7  * 63 BIGNUMs to store the pre-calculated values.
8  *          512   1024 
9  * bits=1  75.4%  79.4%
10  * bits=2  61.2%  62.4%
11  * bits=3  61.3%  59.3%
12  * The lack of speed improvement is also a function of the pre-calculation
13  * which could be removed.
14  */
15 #define EXP2_TABLE_BITS 2 /* 1  2  3  4  5  */
16 #define EXP2_TABLE_SIZE 4 /* 2  4  8 16 32  */
17
18 int BN_mod_exp2_mont(BIGNUM *rr, BIGNUM *a1, BIGNUM *p1, BIGNUM *a2,
19              BIGNUM *p2, BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
20         {
21         int i,j,k,bits,bits1,bits2,ret=0,wstart,wend,window,xvalue,yvalue;
22         int start=1,ts=0,x,y;
23         BIGNUM *d,*aa1,*aa2,*r;
24         BIGNUM val[EXP2_TABLE_SIZE][EXP2_TABLE_SIZE];
25         BN_MONT_CTX *mont=NULL;
26
27         bn_check_top(a1);
28         bn_check_top(p1);
29         bn_check_top(a2);
30         bn_check_top(p2);
31         bn_check_top(m);
32
33         if (!(m->d[0] & 1))
34                 {
35                 BNerr(BN_F_BN_MOD_EXP_MONT,BN_R_CALLED_WITH_EVEN_MODULUS);
36                 return(0);
37                 }
38         bits1=BN_num_bits(p1);
39         bits2=BN_num_bits(p2);
40         if ((bits1 == 0) && (bits2 == 0))
41                 {
42                 BN_one(rr);
43                 return(1);
44                 }
45
46         BN_CTX_start(ctx);
47         d = BN_CTX_get(ctx);
48         r = BN_CTX_get(ctx);
49         if (d == NULL || r == NULL) goto err;
50
51         bits=(bits1 > bits2)?bits1:bits2;
52
53         /* If this is not done, things will break in the montgomery
54          * part */
55
56         if (in_mont != NULL)
57                 mont=in_mont;
58         else
59                 {
60                 if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
61                 if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
62                 }
63
64         BN_init(&(val[0][0]));
65         BN_init(&(val[1][1]));
66         BN_init(&(val[0][1]));
67         BN_init(&(val[1][0]));
68         ts=1;
69         if (BN_ucmp(a1,m) >= 0)
70                 {
71                 BN_mod(&(val[1][0]),a1,m,ctx);
72                 aa1= &(val[1][0]);
73                 }
74         else
75                 aa1=a1;
76         if (BN_ucmp(a2,m) >= 0)
77                 {
78                 BN_mod(&(val[0][1]),a2,m,ctx);
79                 aa2= &(val[0][1]);
80                 }
81         else
82                 aa2=a2;
83         if (!BN_to_montgomery(&(val[1][0]),aa1,mont,ctx)) goto err;
84         if (!BN_to_montgomery(&(val[0][1]),aa2,mont,ctx)) goto err;
85         if (!BN_mod_mul_montgomery(&(val[1][1]),
86                 &(val[1][0]),&(val[0][1]),mont,ctx))
87                 goto err;
88
89 #if 0
90         if (bits <= 20) /* This is probably 3 or 0x10001, so just do singles */
91                 window=1;
92         else if (bits > 250)
93                 window=5;       /* max size of window */
94         else if (bits >= 120)
95                 window=4;
96         else
97                 window=3;
98 #else
99         window=EXP2_TABLE_BITS;
100 #endif
101
102         k=1<<window;
103         for (x=0; x<k; x++)
104                 {
105                 if (x >= 2)
106                         {
107                         BN_init(&(val[x][0]));
108                         BN_init(&(val[x][1]));
109                         if (!BN_mod_mul_montgomery(&(val[x][0]),
110                                 &(val[1][0]),&(val[x-1][0]),mont,ctx)) goto err;
111                         if (!BN_mod_mul_montgomery(&(val[x][1]),
112                                 &(val[1][0]),&(val[x-1][1]),mont,ctx)) goto err;
113                         }
114                 for (y=2; y<k; y++)
115                         {
116                         BN_init(&(val[x][y]));
117                         if (!BN_mod_mul_montgomery(&(val[x][y]),
118                                 &(val[x][y-1]),&(val[0][1]),mont,ctx))
119                                 goto err;
120                         }
121                 }
122         ts=k;
123
124         start=1;        /* This is used to avoid multiplication etc
125                          * when there is only the value '1' in the
126                          * buffer. */
127         xvalue=0;       /* The 'x value' of the window */
128         yvalue=0;       /* The 'y value' of the window */
129         wstart=bits-1;  /* The top bit of the window */
130         wend=0;         /* The bottom bit of the window */
131
132         if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err;
133         for (;;)
134                 {
135                 xvalue=BN_is_bit_set(p1,wstart);
136                 yvalue=BN_is_bit_set(p2,wstart);
137                 if (!(xvalue || yvalue))
138                         {
139                         if (!start)
140                                 {
141                                 if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
142                                         goto err;
143                                 }
144                         wstart--;
145                         if (wstart < 0) break;
146                         continue;
147                         }
148                 /* We now have wstart on a 'set' bit, we now need to work out
149                  * how bit a window to do.  To do this we need to scan
150                  * forward until the last set bit before the end of the
151                  * window */
152                 j=wstart;
153                 /* xvalue=BN_is_bit_set(p1,wstart); already set */
154                 /* yvalue=BN_is_bit_set(p1,wstart); already set */
155                 wend=0;
156                 for (i=1; i<window; i++)
157                         {
158                         if (wstart-i < 0) break;
159                         xvalue+=xvalue;
160                         xvalue|=BN_is_bit_set(p1,wstart-i);
161                         yvalue+=yvalue;
162                         yvalue|=BN_is_bit_set(p2,wstart-i);
163                         }
164
165                 /* i is the size of the current window */
166                 /* add the 'bytes above' */
167                 if (!start)
168                         for (j=0; j<i; j++)
169                                 {
170                                 if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
171                                         goto err;
172                                 }
173                 
174                 /* wvalue will be an odd number < 2^window */
175                 if (xvalue || yvalue)
176                         {
177                         if (!BN_mod_mul_montgomery(r,r,&(val[xvalue][yvalue]),
178                                 mont,ctx)) goto err;
179                         }
180
181                 /* move the 'window' down further */
182                 wstart-=i;
183                 start=0;
184                 if (wstart < 0) break;
185                 }
186         BN_from_montgomery(rr,r,mont,ctx);
187         ret=1;
188 err:
189         if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
190         BN_CTX_end(ctx);
191         for (i=0; i<ts; i++)
192                 {
193                 for (j=0; j<ts; j++)
194                         {
195                         BN_clear_free(&(val[i][j]));
196                         }
197                 }
198         return(ret);
199         }