remove useless instruction
[openssl.git] / crypto / bn / asm / bn-586.pl
1 #!/usr/local/bin/perl
2
3 push(@INC,"perlasm","../../perlasm");
4 require "x86asm.pl";
5
6 &asm_init($ARGV[0],$0);
7
8 &bn_mul_add_words("bn_mul_add_words");
9 &bn_mul_words("bn_mul_words");
10 &bn_sqr_words("bn_sqr_words");
11 &bn_div_words("bn_div_words");
12 &bn_add_words("bn_add_words");
13 &bn_sub_words("bn_sub_words");
14 &bn_sub_part_words("bn_sub_part_words");
15
16 &asm_finish();
17
18 sub bn_mul_add_words
19         {
20         local($name)=@_;
21
22         &function_begin($name,"");
23
24         &comment("");
25         $Low="eax";
26         $High="edx";
27         $a="ebx";
28         $w="ebp";
29         $r="edi";
30         $c="esi";
31
32         &xor($c,$c);            # clear carry
33         &mov($r,&wparam(0));    #
34
35         &mov("ecx",&wparam(2)); #
36         &mov($a,&wparam(1));    #
37
38         &and("ecx",0xfffffff8); # num / 8
39         &mov($w,&wparam(3));    #
40
41         &push("ecx");           # Up the stack for a tmp variable
42
43         &jz(&label("maw_finish"));
44
45         &set_label("maw_loop",0);
46
47         &mov(&swtmp(0),"ecx");  #
48
49         for ($i=0; $i<32; $i+=4)
50                 {
51                 &comment("Round $i");
52
53                  &mov("eax",&DWP($i,$a,"",0));  # *a
54                 &mul($w);                       # *a * w
55                 &add("eax",$c);         # L(t)+= *r
56                  &mov($c,&DWP($i,$r,"",0));     # L(t)+= *r
57                 &adc("edx",0);                  # H(t)+=carry
58                  &add("eax",$c);                # L(t)+=c
59                 &adc("edx",0);                  # H(t)+=carry
60                  &mov(&DWP($i,$r,"",0),"eax");  # *r= L(t);
61                 &mov($c,"edx");                 # c=  H(t);
62                 }
63
64         &comment("");
65         &mov("ecx",&swtmp(0));  #
66         &add($a,32);
67         &add($r,32);
68         &sub("ecx",8);
69         &jnz(&label("maw_loop"));
70
71         &set_label("maw_finish",0);
72         &mov("ecx",&wparam(2)); # get num
73         &and("ecx",7);
74         &jnz(&label("maw_finish2"));    # helps branch prediction
75         &jmp(&label("maw_end"));
76
77         &set_label("maw_finish2",1);
78         for ($i=0; $i<7; $i++)
79                 {
80                 &comment("Tail Round $i");
81                  &mov("eax",&DWP($i*4,$a,"",0));# *a
82                 &mul($w);                       # *a * w
83                 &add("eax",$c);                 # L(t)+=c
84                  &mov($c,&DWP($i*4,$r,"",0));   # L(t)+= *r
85                 &adc("edx",0);                  # H(t)+=carry
86                  &add("eax",$c);
87                 &adc("edx",0);                  # H(t)+=carry
88                  &dec("ecx") if ($i != 7-1);
89                 &mov(&DWP($i*4,$r,"",0),"eax"); # *r= L(t);
90                  &mov($c,"edx");                        # c=  H(t);
91                 &jz(&label("maw_end")) if ($i != 7-1);
92                 }
93         &set_label("maw_end",0);
94         &mov("eax",$c);
95
96         &pop("ecx");    # clear variable from
97
98         &function_end($name);
99         }
100
101 sub bn_mul_words
102         {
103         local($name)=@_;
104
105         &function_begin($name,"");
106
107         &comment("");
108         $Low="eax";
109         $High="edx";
110         $a="ebx";
111         $w="ecx";
112         $r="edi";
113         $c="esi";
114         $num="ebp";
115
116         &xor($c,$c);            # clear carry
117         &mov($r,&wparam(0));    #
118         &mov($a,&wparam(1));    #
119         &mov($num,&wparam(2));  #
120         &mov($w,&wparam(3));    #
121
122         &and($num,0xfffffff8);  # num / 8
123         &jz(&label("mw_finish"));
124
125         &set_label("mw_loop",0);
126         for ($i=0; $i<32; $i+=4)
127                 {
128                 &comment("Round $i");
129
130                  &mov("eax",&DWP($i,$a,"",0));  # *a
131                 &mul($w);                       # *a * w
132                 &add("eax",$c);                 # L(t)+=c
133                  # XXX
134
135                 &adc("edx",0);                  # H(t)+=carry
136                  &mov(&DWP($i,$r,"",0),"eax");  # *r= L(t);
137
138                 &mov($c,"edx");                 # c=  H(t);
139                 }
140
141         &comment("");
142         &add($a,32);
143         &add($r,32);
144         &sub($num,8);
145         &jz(&label("mw_finish"));
146         &jmp(&label("mw_loop"));
147
148         &set_label("mw_finish",0);
149         &mov($num,&wparam(2));  # get num
150         &and($num,7);
151         &jnz(&label("mw_finish2"));
152         &jmp(&label("mw_end"));
153
154         &set_label("mw_finish2",1);
155         for ($i=0; $i<7; $i++)
156                 {
157                 &comment("Tail Round $i");
158                  &mov("eax",&DWP($i*4,$a,"",0));# *a
159                 &mul($w);                       # *a * w
160                 &add("eax",$c);                 # L(t)+=c
161                  # XXX
162                 &adc("edx",0);                  # H(t)+=carry
163                  &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t);
164                 &mov($c,"edx");                 # c=  H(t);
165                  &dec($num) if ($i != 7-1);
166                 &jz(&label("mw_end")) if ($i != 7-1);
167                 }
168         &set_label("mw_end",0);
169         &mov("eax",$c);
170
171         &function_end($name);
172         }
173
174 sub bn_sqr_words
175         {
176         local($name)=@_;
177
178         &function_begin($name,"");
179
180         &comment("");
181         $r="esi";
182         $a="edi";
183         $num="ebx";
184
185         &mov($r,&wparam(0));    #
186         &mov($a,&wparam(1));    #
187         &mov($num,&wparam(2));  #
188
189         &and($num,0xfffffff8);  # num / 8
190         &jz(&label("sw_finish"));
191
192         &set_label("sw_loop",0);
193         for ($i=0; $i<32; $i+=4)
194                 {
195                 &comment("Round $i");
196                 &mov("eax",&DWP($i,$a,"",0));   # *a
197                  # XXX
198                 &mul("eax");                    # *a * *a
199                 &mov(&DWP($i*2,$r,"",0),"eax"); #
200                  &mov(&DWP($i*2+4,$r,"",0),"edx");#
201                 }
202
203         &comment("");
204         &add($a,32);
205         &add($r,64);
206         &sub($num,8);
207         &jnz(&label("sw_loop"));
208
209         &set_label("sw_finish",0);
210         &mov($num,&wparam(2));  # get num
211         &and($num,7);
212         &jz(&label("sw_end"));
213
214         for ($i=0; $i<7; $i++)
215                 {
216                 &comment("Tail Round $i");
217                 &mov("eax",&DWP($i*4,$a,"",0)); # *a
218                  # XXX
219                 &mul("eax");                    # *a * *a
220                 &mov(&DWP($i*8,$r,"",0),"eax"); #
221                  &dec($num) if ($i != 7-1);
222                 &mov(&DWP($i*8+4,$r,"",0),"edx");
223                  &jz(&label("sw_end")) if ($i != 7-1);
224                 }
225         &set_label("sw_end",0);
226
227         &function_end($name);
228         }
229
230 sub bn_div_words
231         {
232         local($name)=@_;
233
234         &function_begin($name,"");
235         &mov("edx",&wparam(0)); #
236         &mov("eax",&wparam(1)); #
237         &mov("ebx",&wparam(2)); #
238         &div("ebx");
239         &function_end($name);
240         }
241
242 sub bn_add_words
243         {
244         local($name)=@_;
245
246         &function_begin($name,"");
247
248         &comment("");
249         $a="esi";
250         $b="edi";
251         $c="eax";
252         $r="ebx";
253         $tmp1="ecx";
254         $tmp2="edx";
255         $num="ebp";
256
257         &mov($r,&wparam(0));    # get r
258          &mov($a,&wparam(1));   # get a
259         &mov($b,&wparam(2));    # get b
260          &mov($num,&wparam(3)); # get num
261         &xor($c,$c);            # clear carry
262          &and($num,0xfffffff8); # num / 8
263
264         &jz(&label("aw_finish"));
265
266         &set_label("aw_loop",0);
267         for ($i=0; $i<8; $i++)
268                 {
269                 &comment("Round $i");
270
271                 &mov($tmp1,&DWP($i*4,$a,"",0));         # *a
272                  &mov($tmp2,&DWP($i*4,$b,"",0));        # *b
273                 &add($tmp1,$c);
274                  &mov($c,0);
275                 &adc($c,$c);
276                  &add($tmp1,$tmp2);
277                 &adc($c,0);
278                  &mov(&DWP($i*4,$r,"",0),$tmp1);        # *r
279                 }
280
281         &comment("");
282         &add($a,32);
283          &add($b,32);
284         &add($r,32);
285          &sub($num,8);
286         &jnz(&label("aw_loop"));
287
288         &set_label("aw_finish",0);
289         &mov($num,&wparam(3));  # get num
290         &and($num,7);
291          &jz(&label("aw_end"));
292
293         for ($i=0; $i<7; $i++)
294                 {
295                 &comment("Tail Round $i");
296                 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
297                  &mov($tmp2,&DWP($i*4,$b,"",0));# *b
298                 &add($tmp1,$c);
299                  &mov($c,0);
300                 &adc($c,$c);
301                  &add($tmp1,$tmp2);
302                 &adc($c,0);
303                  &dec($num) if ($i != 6);
304                 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
305                  &jz(&label("aw_end")) if ($i != 6);
306                 }
307         &set_label("aw_end",0);
308
309 #       &mov("eax",$c);         # $c is "eax"
310
311         &function_end($name);
312         }
313
314 sub bn_sub_words
315         {
316         local($name)=@_;
317
318         &function_begin($name,"");
319
320         &comment("");
321         $a="esi";
322         $b="edi";
323         $c="eax";
324         $r="ebx";
325         $tmp1="ecx";
326         $tmp2="edx";
327         $num="ebp";
328
329         &mov($r,&wparam(0));    # get r
330          &mov($a,&wparam(1));   # get a
331         &mov($b,&wparam(2));    # get b
332          &mov($num,&wparam(3)); # get num
333         &xor($c,$c);            # clear carry
334          &and($num,0xfffffff8); # num / 8
335
336         &jz(&label("aw_finish"));
337
338         &set_label("aw_loop",0);
339         for ($i=0; $i<8; $i++)
340                 {
341                 &comment("Round $i");
342
343                 &mov($tmp1,&DWP($i*4,$a,"",0));         # *a
344                  &mov($tmp2,&DWP($i*4,$b,"",0));        # *b
345                 &sub($tmp1,$c);
346                  &mov($c,0);
347                 &adc($c,$c);
348                  &sub($tmp1,$tmp2);
349                 &adc($c,0);
350                  &mov(&DWP($i*4,$r,"",0),$tmp1);        # *r
351                 }
352
353         &comment("");
354         &add($a,32);
355          &add($b,32);
356         &add($r,32);
357          &sub($num,8);
358         &jnz(&label("aw_loop"));
359
360         &set_label("aw_finish",0);
361         &mov($num,&wparam(3));  # get num
362         &and($num,7);
363          &jz(&label("aw_end"));
364
365         for ($i=0; $i<7; $i++)
366                 {
367                 &comment("Tail Round $i");
368                 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
369                  &mov($tmp2,&DWP($i*4,$b,"",0));# *b
370                 &sub($tmp1,$c);
371                  &mov($c,0);
372                 &adc($c,$c);
373                  &sub($tmp1,$tmp2);
374                 &adc($c,0);
375                  &dec($num) if ($i != 6);
376                 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
377                  &jz(&label("aw_end")) if ($i != 6);
378                 }
379         &set_label("aw_end",0);
380
381 #       &mov("eax",$c);         # $c is "eax"
382
383         &function_end($name);
384         }
385
386 sub bn_sub_part_words
387         {
388         local($name)=@_;
389
390         &function_begin($name,"");
391
392         &comment("");
393         $a="esi";
394         $b="edi";
395         $c="eax";
396         $r="ebx";
397         $tmp1="ecx";
398         $tmp2="edx";
399         $num="ebp";
400
401         &mov($r,&wparam(0));    # get r
402          &mov($a,&wparam(1));   # get a
403         &mov($b,&wparam(2));    # get b
404          &mov($num,&wparam(3)); # get num
405         &xor($c,$c);            # clear carry
406          &and($num,0xfffffff8); # num / 8
407
408         &jz(&label("aw_finish"));
409
410         &set_label("aw_loop",0);
411         for ($i=0; $i<8; $i++)
412                 {
413                 &comment("Round $i");
414
415                 &mov($tmp1,&DWP($i*4,$a,"",0));         # *a
416                  &mov($tmp2,&DWP($i*4,$b,"",0));        # *b
417                 &sub($tmp1,$c);
418                  &mov($c,0);
419                 &adc($c,$c);
420                  &sub($tmp1,$tmp2);
421                 &adc($c,0);
422                  &mov(&DWP($i*4,$r,"",0),$tmp1);        # *r
423                 }
424
425         &comment("");
426         &add($a,32);
427          &add($b,32);
428         &add($r,32);
429          &sub($num,8);
430         &jnz(&label("aw_loop"));
431
432         &set_label("aw_finish",0);
433         &mov($num,&wparam(3));  # get num
434         &and($num,7);
435          &jz(&label("aw_end"));
436
437         for ($i=0; $i<7; $i++)
438                 {
439                 &comment("Tail Round $i");
440                 &mov($tmp1,&DWP(0,$a,"",0));    # *a
441                  &mov($tmp2,&DWP(0,$b,"",0));# *b
442                 &sub($tmp1,$c);
443                  &mov($c,0);
444                 &adc($c,$c);
445                  &sub($tmp1,$tmp2);
446                 &adc($c,0);
447                 &mov(&DWP(0,$r,"",0),$tmp1);    # *r
448                 &add($a, 4);
449                 &add($b, 4);
450                 &add($r, 4);
451                  &dec($num) if ($i != 6);
452                  &jz(&label("aw_end")) if ($i != 6);
453                 }
454         &set_label("aw_end",0);
455
456         &cmp(&wparam(4),0);
457         &je(&label("pw_end"));
458
459         &mov($num,&wparam(4));  # get dl
460         &cmp($num,0);
461         &je(&label("pw_end")); # unnoetig
462         &jge(&label("pw_pos"));
463
464         &comment("pw_neg");
465         &mov($tmp2,0);
466         &sub($tmp2,$num);
467         &mov($num,$tmp2);
468         &and($num,0xfffffff8);  # num / 8
469         &jz(&label("pw_neg_finish"));
470
471         &set_label("pw_neg_loop",0);
472         for ($i=0; $i<8; $i++)
473         {
474             &comment("dl<0 Round $i");
475
476             &mov($tmp1,0);
477             &mov($tmp2,&DWP($i*4,$b,"",0));     # *b
478             &sub($tmp1,$c);
479             &mov($c,0);
480             &adc($c,$c);
481             &sub($tmp1,$tmp2);
482             &adc($c,0);
483             &mov(&DWP($i*4,$r,"",0),$tmp1);     # *r
484         }
485             
486         &comment("");
487         &add($b,32);
488         &add($r,32);
489         &sub($num,8);
490         &jnz(&label("pw_neg_loop"));
491             
492         &set_label("pw_neg_finish",0);
493         &mov($tmp2,&wparam(4)); # get dl
494         &mov($num,0);
495         &sub($num,$tmp2);
496         &and($num,7);
497         &jz(&label("pw_end"));
498             
499         for ($i=0; $i<7; $i++)
500         {
501             &comment("dl<0 Tail Round $i");
502             &mov($tmp1,0);
503             &mov($tmp2,&DWP($i*4,$b,"",0));# *b
504             &sub($tmp1,$c);
505             &mov($c,0);
506             &adc($c,$c);
507             &sub($tmp1,$tmp2);
508             &adc($c,0);
509             &dec($num) if ($i != 6);
510             &mov(&DWP($i*4,$r,"",0),$tmp1);     # *r
511             &jz(&label("pw_end")) if ($i != 6);
512         }
513
514         &jmp(&label("pw_end"));
515         
516         &set_label("pw_pos",0);
517         
518         &and($num,0xfffffff8);  # num / 8
519         &jz(&label("pw_pos_finish"));
520
521         &set_label("pw_pos_loop",0);
522
523         for ($i=0; $i<8; $i++)
524         {
525             &comment("dl>0 Round $i");
526
527             &mov($tmp1,&DWP($i*4,$a,"",0));     # *a
528             &sub($tmp1,$c);
529             &mov(&DWP($i*4,$r,"",0),$tmp1);     # *r
530             &jnc(&label("pw_nc".$i));
531         }
532             
533         &comment("");
534         &add($a,32);
535         &add($r,32);
536         &sub($num,8);
537         &jnz(&label("pw_pos_loop"));
538             
539         &set_label("pw_pos_finish",0);
540         &mov($num,&wparam(4));  # get dl
541         &and($num,7);
542         &jz(&label("pw_end"));
543             
544         for ($i=0; $i<7; $i++)
545         {
546             &comment("dl>0 Tail Round $i");
547             &mov($tmp1,&DWP($i*4,$a,"",0));     # *a
548             &sub($tmp1,$c);
549             &mov(&DWP($i*4,$r,"",0),$tmp1);     # *r
550             &jnc(&label("pw_tail_nc".$i));
551             &dec($num) if ($i != 6);
552             &jz(&label("pw_end")) if ($i != 6);
553         }
554         &mov($c,1);
555         &jmp(&label("pw_end"));
556
557         &set_label("pw_nc_loop",0);
558         for ($i=0; $i<8; $i++)
559         {
560             &mov($tmp1,&DWP($i*4,$a,"",0));     # *a
561             &mov(&DWP($i*4,$r,"",0),$tmp1);     # *r
562             &set_label("pw_nc".$i,0);
563         }
564             
565         &comment("");
566         &add($a,32);
567         &add($r,32);
568         &sub($num,8);
569         &jnz(&label("pw_nc_loop"));
570             
571         &mov($num,&wparam(4));  # get dl
572         &and($num,7);
573         &jz(&label("pw_nc_end"));
574             
575         for ($i=0; $i<7; $i++)
576         {
577             &mov($tmp1,&DWP($i*4,$a,"",0));     # *a
578             &mov(&DWP($i*4,$r,"",0),$tmp1);     # *r
579             &set_label("pw_tail_nc".$i,0);
580             &dec($num) if ($i != 6);
581             &jz(&label("pw_nc_end")) if ($i != 6);
582         }
583
584         &set_label("pw_nc_end",0);
585         &mov($c,0);
586
587         &set_label("pw_end",0);
588
589 #       &mov("eax",$c);         # $c is "eax"
590
591         &function_end($name);
592         }
593