Minor MIPS III/IV tune-up.
[openssl.git] / crypto / bn / asm / bn-586.pl
1 #!/usr/local/bin/perl
2
3 push(@INC,"perlasm","../../perlasm");
4 require "x86asm.pl";
5
6 &asm_init($ARGV[0],$0);
7
8 &bn_mul_add_words("bn_mul_add_words");
9 &bn_mul_words("bn_mul_words");
10 &bn_sqr_words("bn_sqr_words");
11 &bn_div_words("bn_div_words");
12 &bn_add_words("bn_add_words");
13 &bn_sub_words("bn_sub_words");
14
15 &asm_finish();
16
17 sub bn_mul_add_words
18         {
19         local($name)=@_;
20
21         &function_begin($name,"");
22
23         &comment("");
24         $Low="eax";
25         $High="edx";
26         $a="ebx";
27         $w="ebp";
28         $r="edi";
29         $c="esi";
30
31         &xor($c,$c);            # clear carry
32         &mov($r,&wparam(0));    #
33
34         &mov("ecx",&wparam(2)); #
35         &mov($a,&wparam(1));    #
36
37         &and("ecx",0xfffffff8); # num / 8
38         &mov($w,&wparam(3));    #
39
40         &push("ecx");           # Up the stack for a tmp variable
41
42         &jz(&label("maw_finish"));
43
44         &set_label("maw_loop",0);
45
46         &mov(&swtmp(0),"ecx");  #
47
48         for ($i=0; $i<32; $i+=4)
49                 {
50                 &comment("Round $i");
51
52                  &mov("eax",&DWP($i,$a,"",0));  # *a
53                 &mul($w);                       # *a * w
54                 &add("eax",$c);         # L(t)+= *r
55                  &mov($c,&DWP($i,$r,"",0));     # L(t)+= *r
56                 &adc("edx",0);                  # H(t)+=carry
57                  &add("eax",$c);                # L(t)+=c
58                 &adc("edx",0);                  # H(t)+=carry
59                  &mov(&DWP($i,$r,"",0),"eax");  # *r= L(t);
60                 &mov($c,"edx");                 # c=  H(t);
61                 }
62
63         &comment("");
64         &mov("ecx",&swtmp(0));  #
65         &add($a,32);
66         &add($r,32);
67         &sub("ecx",8);
68         &jnz(&label("maw_loop"));
69
70         &set_label("maw_finish",0);
71         &mov("ecx",&wparam(2)); # get num
72         &and("ecx",7);
73         &jnz(&label("maw_finish2"));    # helps branch prediction
74         &jmp(&label("maw_end"));
75
76         &set_label("maw_finish2",1);
77         for ($i=0; $i<7; $i++)
78                 {
79                 &comment("Tail Round $i");
80                  &mov("eax",&DWP($i*4,$a,"",0));# *a
81                 &mul($w);                       # *a * w
82                 &add("eax",$c);                 # L(t)+=c
83                  &mov($c,&DWP($i*4,$r,"",0));   # L(t)+= *r
84                 &adc("edx",0);                  # H(t)+=carry
85                  &add("eax",$c);
86                 &adc("edx",0);                  # H(t)+=carry
87                  &dec("ecx") if ($i != 7-1);
88                 &mov(&DWP($i*4,$r,"",0),"eax"); # *r= L(t);
89                  &mov($c,"edx");                        # c=  H(t);
90                 &jz(&label("maw_end")) if ($i != 7-1);
91                 }
92         &set_label("maw_end",0);
93         &mov("eax",$c);
94
95         &pop("ecx");    # clear variable from
96
97         &function_end($name);
98         }
99
100 sub bn_mul_words
101         {
102         local($name)=@_;
103
104         &function_begin($name,"");
105
106         &comment("");
107         $Low="eax";
108         $High="edx";
109         $a="ebx";
110         $w="ecx";
111         $r="edi";
112         $c="esi";
113         $num="ebp";
114
115         &xor($c,$c);            # clear carry
116         &mov($r,&wparam(0));    #
117         &mov($a,&wparam(1));    #
118         &mov($num,&wparam(2));  #
119         &mov($w,&wparam(3));    #
120
121         &and($num,0xfffffff8);  # num / 8
122         &jz(&label("mw_finish"));
123
124         &set_label("mw_loop",0);
125         for ($i=0; $i<32; $i+=4)
126                 {
127                 &comment("Round $i");
128
129                  &mov("eax",&DWP($i,$a,"",0));  # *a
130                 &mul($w);                       # *a * w
131                 &add("eax",$c);                 # L(t)+=c
132                  # XXX
133
134                 &adc("edx",0);                  # H(t)+=carry
135                  &mov(&DWP($i,$r,"",0),"eax");  # *r= L(t);
136
137                 &mov($c,"edx");                 # c=  H(t);
138                 }
139
140         &comment("");
141         &add($a,32);
142         &add($r,32);
143         &sub($num,8);
144         &jz(&label("mw_finish"));
145         &jmp(&label("mw_loop"));
146
147         &set_label("mw_finish",0);
148         &mov($num,&wparam(2));  # get num
149         &and($num,7);
150         &jnz(&label("mw_finish2"));
151         &jmp(&label("mw_end"));
152
153         &set_label("mw_finish2",1);
154         for ($i=0; $i<7; $i++)
155                 {
156                 &comment("Tail Round $i");
157                  &mov("eax",&DWP($i*4,$a,"",0));# *a
158                 &mul($w);                       # *a * w
159                 &add("eax",$c);                 # L(t)+=c
160                  # XXX
161                 &adc("edx",0);                  # H(t)+=carry
162                  &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t);
163                 &mov($c,"edx");                 # c=  H(t);
164                  &dec($num) if ($i != 7-1);
165                 &jz(&label("mw_end")) if ($i != 7-1);
166                 }
167         &set_label("mw_end",0);
168         &mov("eax",$c);
169
170         &function_end($name);
171         }
172
173 sub bn_sqr_words
174         {
175         local($name)=@_;
176
177         &function_begin($name,"");
178
179         &comment("");
180         $r="esi";
181         $a="edi";
182         $num="ebx";
183
184         &mov($r,&wparam(0));    #
185         &mov($a,&wparam(1));    #
186         &mov($num,&wparam(2));  #
187
188         &and($num,0xfffffff8);  # num / 8
189         &jz(&label("sw_finish"));
190
191         &set_label("sw_loop",0);
192         for ($i=0; $i<32; $i+=4)
193                 {
194                 &comment("Round $i");
195                 &mov("eax",&DWP($i,$a,"",0));   # *a
196                  # XXX
197                 &mul("eax");                    # *a * *a
198                 &mov(&DWP($i*2,$r,"",0),"eax"); #
199                  &mov(&DWP($i*2+4,$r,"",0),"edx");#
200                 }
201
202         &comment("");
203         &add($a,32);
204         &add($r,64);
205         &sub($num,8);
206         &jnz(&label("sw_loop"));
207
208         &set_label("sw_finish",0);
209         &mov($num,&wparam(2));  # get num
210         &and($num,7);
211         &jz(&label("sw_end"));
212
213         for ($i=0; $i<7; $i++)
214                 {
215                 &comment("Tail Round $i");
216                 &mov("eax",&DWP($i*4,$a,"",0)); # *a
217                  # XXX
218                 &mul("eax");                    # *a * *a
219                 &mov(&DWP($i*8,$r,"",0),"eax"); #
220                  &dec($num) if ($i != 7-1);
221                 &mov(&DWP($i*8+4,$r,"",0),"edx");
222                  &jz(&label("sw_end")) if ($i != 7-1);
223                 }
224         &set_label("sw_end",0);
225
226         &function_end($name);
227         }
228
229 sub bn_div_words
230         {
231         local($name)=@_;
232
233         &function_begin($name,"");
234         &mov("edx",&wparam(0)); #
235         &mov("eax",&wparam(1)); #
236         &mov("ebx",&wparam(2)); #
237         &div("ebx");
238         &function_end($name);
239         }
240
241 sub bn_add_words
242         {
243         local($name)=@_;
244
245         &function_begin($name,"");
246
247         &comment("");
248         $a="esi";
249         $b="edi";
250         $c="eax";
251         $r="ebx";
252         $tmp1="ecx";
253         $tmp2="edx";
254         $num="ebp";
255
256         &mov($r,&wparam(0));    # get r
257          &mov($a,&wparam(1));   # get a
258         &mov($b,&wparam(2));    # get b
259          &mov($num,&wparam(3)); # get num
260         &xor($c,$c);            # clear carry
261          &and($num,0xfffffff8); # num / 8
262
263         &jz(&label("aw_finish"));
264
265         &set_label("aw_loop",0);
266         for ($i=0; $i<8; $i++)
267                 {
268                 &comment("Round $i");
269
270                 &mov($tmp1,&DWP($i*4,$a,"",0));         # *a
271                  &mov($tmp2,&DWP($i*4,$b,"",0));        # *b
272                 &add($tmp1,$c);
273                  &mov($c,0);
274                 &adc($c,$c);
275                  &add($tmp1,$tmp2);
276                 &adc($c,0);
277                  &mov(&DWP($i*4,$r,"",0),$tmp1);        # *r
278                 }
279
280         &comment("");
281         &add($a,32);
282          &add($b,32);
283         &add($r,32);
284          &sub($num,8);
285         &jnz(&label("aw_loop"));
286
287         &set_label("aw_finish",0);
288         &mov($num,&wparam(3));  # get num
289         &and($num,7);
290          &jz(&label("aw_end"));
291
292         for ($i=0; $i<7; $i++)
293                 {
294                 &comment("Tail Round $i");
295                 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
296                  &mov($tmp2,&DWP($i*4,$b,"",0));# *b
297                 &add($tmp1,$c);
298                  &mov($c,0);
299                 &adc($c,$c);
300                  &add($tmp1,$tmp2);
301                 &adc($c,0);
302                  &dec($num) if ($i != 6);
303                 &mov(&DWP($i*4,$r,"",0),$tmp1); # *a
304                  &jz(&label("aw_end")) if ($i != 6);
305                 }
306         &set_label("aw_end",0);
307
308 #       &mov("eax",$c);         # $c is "eax"
309
310         &function_end($name);
311         }
312
313 sub bn_sub_words
314         {
315         local($name)=@_;
316
317         &function_begin($name,"");
318
319         &comment("");
320         $a="esi";
321         $b="edi";
322         $c="eax";
323         $r="ebx";
324         $tmp1="ecx";
325         $tmp2="edx";
326         $num="ebp";
327
328         &mov($r,&wparam(0));    # get r
329          &mov($a,&wparam(1));   # get a
330         &mov($b,&wparam(2));    # get b
331          &mov($num,&wparam(3)); # get num
332         &xor($c,$c);            # clear carry
333          &and($num,0xfffffff8); # num / 8
334
335         &jz(&label("aw_finish"));
336
337         &set_label("aw_loop",0);
338         for ($i=0; $i<8; $i++)
339                 {
340                 &comment("Round $i");
341
342                 &mov($tmp1,&DWP($i*4,$a,"",0));         # *a
343                  &mov($tmp2,&DWP($i*4,$b,"",0));        # *b
344                 &sub($tmp1,$c);
345                  &mov($c,0);
346                 &adc($c,$c);
347                  &sub($tmp1,$tmp2);
348                 &adc($c,0);
349                  &mov(&DWP($i*4,$r,"",0),$tmp1);        # *r
350                 }
351
352         &comment("");
353         &add($a,32);
354          &add($b,32);
355         &add($r,32);
356          &sub($num,8);
357         &jnz(&label("aw_loop"));
358
359         &set_label("aw_finish",0);
360         &mov($num,&wparam(3));  # get num
361         &and($num,7);
362          &jz(&label("aw_end"));
363
364         for ($i=0; $i<7; $i++)
365                 {
366                 &comment("Tail Round $i");
367                 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
368                  &mov($tmp2,&DWP($i*4,$b,"",0));# *b
369                 &sub($tmp1,$c);
370                  &mov($c,0);
371                 &adc($c,$c);
372                  &sub($tmp1,$tmp2);
373                 &adc($c,0);
374                  &dec($num) if ($i != 6);
375                 &mov(&DWP($i*4,$r,"",0),$tmp1); # *a
376                  &jz(&label("aw_end")) if ($i != 6);
377                 }
378         &set_label("aw_end",0);
379
380 #       &mov("eax",$c);         # $c is "eax"
381
382         &function_end($name);
383         }
384