332ef3e91d621d7f9f4bb20952795c4c04c0acd2
[openssl.git] / crypto / bn / asm / bn-586.pl
1 #!/usr/local/bin/perl
2
3 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
4 push(@INC,"${dir}","${dir}../../perlasm");
5 require "x86asm.pl";
6
7 &asm_init($ARGV[0],$0);
8
9 $sse2=0;
10 for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
11
12 &external_label("OPENSSL_ia32cap_P") if ($sse2);
13
14 &bn_mul_add_words("bn_mul_add_words");
15 &bn_mul_words("bn_mul_words");
16 &bn_sqr_words("bn_sqr_words");
17 &bn_div_words("bn_div_words");
18 &bn_add_words("bn_add_words");
19 &bn_sub_words("bn_sub_words");
20 &bn_sub_part_words("bn_sub_part_words");
21
22 &asm_finish();
23
24 sub bn_mul_add_words
25         {
26         local($name)=@_;
27
28         &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":"");
29
30         $r="eax";
31         $a="edx";
32         $c="ecx";
33
34         if ($sse2) {
35                 &picmeup("eax","OPENSSL_ia32cap_P");
36                 &bt(&DWP(0,"eax"),26);
37                 &jnc(&label("maw_non_sse2"));
38
39                 &mov($r,&wparam(0));
40                 &mov($a,&wparam(1));
41                 &mov($c,&wparam(2));
42                 &movd("mm0",&wparam(3));        # mm0 = w
43                 &pxor("mm1","mm1");             # mm1 = carry_in
44                 &jmp(&label("maw_sse2_entry"));
45                 
46         &set_label("maw_sse2_unrolled",16);
47                 &movd("mm3",&DWP(0,$r,"",0));   # mm3 = r[0]
48                 &paddq("mm1","mm3");            # mm1 = carry_in + r[0]
49                 &movd("mm2",&DWP(0,$a,"",0));   # mm2 = a[0]
50                 &pmuludq("mm2","mm0");          # mm2 = w*a[0]
51                 &movd("mm4",&DWP(4,$a,"",0));   # mm4 = a[1]
52                 &pmuludq("mm4","mm0");          # mm4 = w*a[1]
53                 &movd("mm6",&DWP(8,$a,"",0));   # mm6 = a[2]
54                 &pmuludq("mm6","mm0");          # mm6 = w*a[2]
55                 &movd("mm7",&DWP(12,$a,"",0));  # mm7 = a[3]
56                 &pmuludq("mm7","mm0");          # mm7 = w*a[3]
57                 &paddq("mm1","mm2");            # mm1 = carry_in + r[0] + w*a[0]
58                 &movd("mm3",&DWP(4,$r,"",0));   # mm3 = r[1]
59                 &paddq("mm3","mm4");            # mm3 = r[1] + w*a[1]
60                 &movd("mm5",&DWP(8,$r,"",0));   # mm5 = r[2]
61                 &paddq("mm5","mm6");            # mm5 = r[2] + w*a[2]
62                 &movd("mm4",&DWP(12,$r,"",0));  # mm4 = r[3]
63                 &paddq("mm7","mm4");            # mm7 = r[3] + w*a[3]
64                 &movd(&DWP(0,$r,"",0),"mm1");
65                 &movd("mm2",&DWP(16,$a,"",0));  # mm2 = a[4]
66                 &pmuludq("mm2","mm0");          # mm2 = w*a[4]
67                 &psrlq("mm1",32);               # mm1 = carry0
68                 &movd("mm4",&DWP(20,$a,"",0));  # mm4 = a[5]
69                 &pmuludq("mm4","mm0");          # mm4 = w*a[5]
70                 &paddq("mm1","mm3");            # mm1 = carry0 + r[1] + w*a[1]
71                 &movd("mm6",&DWP(24,$a,"",0));  # mm6 = a[6]
72                 &pmuludq("mm6","mm0");          # mm6 = w*a[6]
73                 &movd(&DWP(4,$r,"",0),"mm1");
74                 &psrlq("mm1",32);               # mm1 = carry1
75                 &movd("mm3",&DWP(28,$a,"",0));  # mm3 = a[7]
76                 &add($a,32);
77                 &pmuludq("mm3","mm0");          # mm3 = w*a[7]
78                 &paddq("mm1","mm5");            # mm1 = carry1 + r[2] + w*a[2]
79                 &movd("mm5",&DWP(16,$r,"",0));  # mm5 = r[4]
80                 &paddq("mm2","mm5");            # mm2 = r[4] + w*a[4]
81                 &movd(&DWP(8,$r,"",0),"mm1");
82                 &psrlq("mm1",32);               # mm1 = carry2
83                 &paddq("mm1","mm7");            # mm1 = carry2 + r[3] + w*a[3]
84                 &movd("mm5",&DWP(20,$r,"",0));  # mm5 = r[5]
85                 &paddq("mm4","mm5");            # mm4 = r[5] + w*a[5]
86                 &movd(&DWP(12,$r,"",0),"mm1");
87                 &psrlq("mm1",32);               # mm1 = carry3
88                 &paddq("mm1","mm2");            # mm1 = carry3 + r[4] + w*a[4]
89                 &movd("mm5",&DWP(24,$r,"",0));  # mm5 = r[6]
90                 &paddq("mm6","mm5");            # mm6 = r[6] + w*a[6]
91                 &movd(&DWP(16,$r,"",0),"mm1");
92                 &psrlq("mm1",32);               # mm1 = carry4
93                 &paddq("mm1","mm4");            # mm1 = carry4 + r[5] + w*a[5]
94                 &movd("mm5",&DWP(28,$r,"",0));  # mm5 = r[7]
95                 &paddq("mm3","mm5");            # mm3 = r[7] + w*a[7]
96                 &movd(&DWP(20,$r,"",0),"mm1");
97                 &psrlq("mm1",32);               # mm1 = carry5
98                 &paddq("mm1","mm6");            # mm1 = carry5 + r[6] + w*a[6]
99                 &movd(&DWP(24,$r,"",0),"mm1");
100                 &psrlq("mm1",32);               # mm1 = carry6
101                 &paddq("mm1","mm3");            # mm1 = carry6 + r[7] + w*a[7]
102                 &movd(&DWP(28,$r,"",0),"mm1");
103                 &lea($r,&DWP(32,$r));
104                 &psrlq("mm1",32);               # mm1 = carry_out
105
106                 &sub($c,8);
107                 &jz(&label("maw_sse2_exit"));
108         &set_label("maw_sse2_entry");
109                 &test($c,0xfffffff8);
110                 &jnz(&label("maw_sse2_unrolled"));
111
112         &set_label("maw_sse2_loop",4);
113                 &movd("mm2",&DWP(0,$a));        # mm2 = a[i]
114                 &movd("mm3",&DWP(0,$r));        # mm3 = r[i]
115                 &pmuludq("mm2","mm0");          # a[i] *= w
116                 &lea($a,&DWP(4,$a));
117                 &paddq("mm1","mm3");            # carry += r[i]
118                 &paddq("mm1","mm2");            # carry += a[i]*w
119                 &movd(&DWP(0,$r),"mm1");        # r[i] = carry_low
120                 &sub($c,1);
121                 &psrlq("mm1",32);               # carry = carry_high
122                 &lea($r,&DWP(4,$r));
123                 &jnz(&label("maw_sse2_loop"));
124         &set_label("maw_sse2_exit");
125                 &movd("eax","mm1");             # c = carry_out
126                 &emms();
127                 &ret();
128
129         &set_label("maw_non_sse2",16);
130         }
131
132         # function_begin prologue
133         &push("ebp");
134         &push("ebx");
135         &push("esi");
136         &push("edi");
137
138         &comment("");
139         $Low="eax";
140         $High="edx";
141         $a="ebx";
142         $w="ebp";
143         $r="edi";
144         $c="esi";
145
146         &xor($c,$c);            # clear carry
147         &mov($r,&wparam(0));    #
148
149         &mov("ecx",&wparam(2)); #
150         &mov($a,&wparam(1));    #
151
152         &and("ecx",0xfffffff8); # num / 8
153         &mov($w,&wparam(3));    #
154
155         &push("ecx");           # Up the stack for a tmp variable
156
157         &jz(&label("maw_finish"));
158
159         &set_label("maw_loop",16);
160
161         for ($i=0; $i<32; $i+=4)
162                 {
163                 &comment("Round $i");
164
165                  &mov("eax",&DWP($i,$a));       # *a
166                 &mul($w);                       # *a * w
167                 &add("eax",$c);                 # L(t)+= c
168                 &adc("edx",0);                  # H(t)+=carry
169                  &add("eax",&DWP($i,$r));       # L(t)+= *r
170                 &adc("edx",0);                  # H(t)+=carry
171                  &mov(&DWP($i,$r),"eax");       # *r= L(t);
172                 &mov($c,"edx");                 # c=  H(t);
173                 }
174
175         &comment("");
176         &sub("ecx",8);
177         &lea($a,&DWP(32,$a));
178         &lea($r,&DWP(32,$r));
179         &jnz(&label("maw_loop"));
180
181         &set_label("maw_finish",0);
182         &mov("ecx",&wparam(2)); # get num
183         &and("ecx",7);
184         &jnz(&label("maw_finish2"));    # helps branch prediction
185         &jmp(&label("maw_end"));
186
187         &set_label("maw_finish2",1);
188         for ($i=0; $i<7; $i++)
189                 {
190                 &comment("Tail Round $i");
191                  &mov("eax",&DWP($i*4,$a));     # *a
192                 &mul($w);                       # *a * w
193                 &add("eax",$c);                 # L(t)+=c
194                 &adc("edx",0);                  # H(t)+=carry
195                  &add("eax",&DWP($i*4,$r));     # L(t)+= *r
196                 &adc("edx",0);                  # H(t)+=carry
197                  &dec("ecx") if ($i != 7-1);
198                 &mov(&DWP($i*4,$r),"eax");      # *r= L(t);
199                  &mov($c,"edx");                # c=  H(t);
200                 &jz(&label("maw_end")) if ($i != 7-1);
201                 }
202         &set_label("maw_end",0);
203         &mov("eax",$c);
204
205         &pop("ecx");    # clear variable from
206
207         &function_end($name);
208         }
209
210 sub bn_mul_words
211         {
212         local($name)=@_;
213
214         &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":"");
215
216         $r="eax";
217         $a="edx";
218         $c="ecx";
219
220         if ($sse2) {
221                 &picmeup("eax","OPENSSL_ia32cap_P");
222                 &bt(&DWP(0,"eax"),26);
223                 &jnc(&label("mw_non_sse2"));
224
225                 &mov($r,&wparam(0));
226                 &mov($a,&wparam(1));
227                 &mov($c,&wparam(2));
228                 &movd("mm0",&wparam(3));        # mm0 = w
229                 &pxor("mm1","mm1");             # mm1 = carry = 0
230
231         &set_label("mw_sse2_loop",16);
232                 &movd("mm2",&DWP(0,$a));        # mm2 = a[i]
233                 &pmuludq("mm2","mm0");          # a[i] *= w
234                 &lea($a,&DWP(4,$a));
235                 &paddq("mm1","mm2");            # carry += a[i]*w
236                 &movd(&DWP(0,$r),"mm1");        # r[i] = carry_low
237                 &sub($c,1);
238                 &psrlq("mm1",32);               # carry = carry_high
239                 &lea($r,&DWP(4,$r));
240                 &jnz(&label("mw_sse2_loop"));
241
242                 &movd("eax","mm1");             # return carry
243                 &emms();
244                 &ret();
245         &set_label("mw_non_sse2",16);
246         }
247
248         # function_begin prologue
249         &push("ebp");
250         &push("ebx");
251         &push("esi");
252         &push("edi");
253
254         &comment("");
255         $Low="eax";
256         $High="edx";
257         $a="ebx";
258         $w="ecx";
259         $r="edi";
260         $c="esi";
261         $num="ebp";
262
263         &xor($c,$c);            # clear carry
264         &mov($r,&wparam(0));    #
265         &mov($a,&wparam(1));    #
266         &mov($num,&wparam(2));  #
267         &mov($w,&wparam(3));    #
268
269         &and($num,0xfffffff8);  # num / 8
270         &jz(&label("mw_finish"));
271
272         &set_label("mw_loop",0);
273         for ($i=0; $i<32; $i+=4)
274                 {
275                 &comment("Round $i");
276
277                  &mov("eax",&DWP($i,$a,"",0));  # *a
278                 &mul($w);                       # *a * w
279                 &add("eax",$c);                 # L(t)+=c
280                  # XXX
281
282                 &adc("edx",0);                  # H(t)+=carry
283                  &mov(&DWP($i,$r,"",0),"eax");  # *r= L(t);
284
285                 &mov($c,"edx");                 # c=  H(t);
286                 }
287
288         &comment("");
289         &add($a,32);
290         &add($r,32);
291         &sub($num,8);
292         &jz(&label("mw_finish"));
293         &jmp(&label("mw_loop"));
294
295         &set_label("mw_finish",0);
296         &mov($num,&wparam(2));  # get num
297         &and($num,7);
298         &jnz(&label("mw_finish2"));
299         &jmp(&label("mw_end"));
300
301         &set_label("mw_finish2",1);
302         for ($i=0; $i<7; $i++)
303                 {
304                 &comment("Tail Round $i");
305                  &mov("eax",&DWP($i*4,$a,"",0));# *a
306                 &mul($w);                       # *a * w
307                 &add("eax",$c);                 # L(t)+=c
308                  # XXX
309                 &adc("edx",0);                  # H(t)+=carry
310                  &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t);
311                 &mov($c,"edx");                 # c=  H(t);
312                  &dec($num) if ($i != 7-1);
313                 &jz(&label("mw_end")) if ($i != 7-1);
314                 }
315         &set_label("mw_end",0);
316         &mov("eax",$c);
317
318         &function_end($name);
319         }
320
321 sub bn_sqr_words
322         {
323         local($name)=@_;
324
325         &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":"");
326
327         $r="eax";
328         $a="edx";
329         $c="ecx";
330
331         if ($sse2) {
332                 &picmeup("eax","OPENSSL_ia32cap_P");
333                 &bt(&DWP(0,"eax"),26);
334                 &jnc(&label("sqr_non_sse2"));
335
336                 &mov($r,&wparam(0));
337                 &mov($a,&wparam(1));
338                 &mov($c,&wparam(2));
339
340         &set_label("sqr_sse2_loop",16);
341                 &movd("mm0",&DWP(0,$a));        # mm0 = a[i]
342                 &pmuludq("mm0","mm0");          # a[i] *= a[i]
343                 &lea($a,&DWP(4,$a));            # a++
344                 &movq(&QWP(0,$r),"mm0");        # r[i] = a[i]*a[i]
345                 &sub($c,1);
346                 &lea($r,&DWP(8,$r));            # r += 2
347                 &jnz(&label("sqr_sse2_loop"));
348
349                 &emms();
350                 &ret();
351         &set_label("sqr_non_sse2",16);
352         }
353
354         # function_begin prologue
355         &push("ebp");
356         &push("ebx");
357         &push("esi");
358         &push("edi");
359
360         &comment("");
361         $r="esi";
362         $a="edi";
363         $num="ebx";
364
365         &mov($r,&wparam(0));    #
366         &mov($a,&wparam(1));    #
367         &mov($num,&wparam(2));  #
368
369         &and($num,0xfffffff8);  # num / 8
370         &jz(&label("sw_finish"));
371
372         &set_label("sw_loop",0);
373         for ($i=0; $i<32; $i+=4)
374                 {
375                 &comment("Round $i");
376                 &mov("eax",&DWP($i,$a,"",0));   # *a
377                  # XXX
378                 &mul("eax");                    # *a * *a
379                 &mov(&DWP($i*2,$r,"",0),"eax"); #
380                  &mov(&DWP($i*2+4,$r,"",0),"edx");#
381                 }
382
383         &comment("");
384         &add($a,32);
385         &add($r,64);
386         &sub($num,8);
387         &jnz(&label("sw_loop"));
388
389         &set_label("sw_finish",0);
390         &mov($num,&wparam(2));  # get num
391         &and($num,7);
392         &jz(&label("sw_end"));
393
394         for ($i=0; $i<7; $i++)
395                 {
396                 &comment("Tail Round $i");
397                 &mov("eax",&DWP($i*4,$a,"",0)); # *a
398                  # XXX
399                 &mul("eax");                    # *a * *a
400                 &mov(&DWP($i*8,$r,"",0),"eax"); #
401                  &dec($num) if ($i != 7-1);
402                 &mov(&DWP($i*8+4,$r,"",0),"edx");
403                  &jz(&label("sw_end")) if ($i != 7-1);
404                 }
405         &set_label("sw_end",0);
406
407         &function_end($name);
408         }
409
410 sub bn_div_words
411         {
412         local($name)=@_;
413
414         &function_begin_B($name,"");
415         &mov("edx",&wparam(0)); #
416         &mov("eax",&wparam(1)); #
417         &mov("ecx",&wparam(2)); #
418         &div("ecx");
419         &ret();
420         &function_end_B($name);
421         }
422
423 sub bn_add_words
424         {
425         local($name)=@_;
426
427         &function_begin($name,"");
428
429         &comment("");
430         $a="esi";
431         $b="edi";
432         $c="eax";
433         $r="ebx";
434         $tmp1="ecx";
435         $tmp2="edx";
436         $num="ebp";
437
438         &mov($r,&wparam(0));    # get r
439          &mov($a,&wparam(1));   # get a
440         &mov($b,&wparam(2));    # get b
441          &mov($num,&wparam(3)); # get num
442         &xor($c,$c);            # clear carry
443          &and($num,0xfffffff8); # num / 8
444
445         &jz(&label("aw_finish"));
446
447         &set_label("aw_loop",0);
448         for ($i=0; $i<8; $i++)
449                 {
450                 &comment("Round $i");
451
452                 &mov($tmp1,&DWP($i*4,$a,"",0));         # *a
453                  &mov($tmp2,&DWP($i*4,$b,"",0));        # *b
454                 &add($tmp1,$c);
455                  &mov($c,0);
456                 &adc($c,$c);
457                  &add($tmp1,$tmp2);
458                 &adc($c,0);
459                  &mov(&DWP($i*4,$r,"",0),$tmp1);        # *r
460                 }
461
462         &comment("");
463         &add($a,32);
464          &add($b,32);
465         &add($r,32);
466          &sub($num,8);
467         &jnz(&label("aw_loop"));
468
469         &set_label("aw_finish",0);
470         &mov($num,&wparam(3));  # get num
471         &and($num,7);
472          &jz(&label("aw_end"));
473
474         for ($i=0; $i<7; $i++)
475                 {
476                 &comment("Tail Round $i");
477                 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
478                  &mov($tmp2,&DWP($i*4,$b,"",0));# *b
479                 &add($tmp1,$c);
480                  &mov($c,0);
481                 &adc($c,$c);
482                  &add($tmp1,$tmp2);
483                 &adc($c,0);
484                  &dec($num) if ($i != 6);
485                 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
486                  &jz(&label("aw_end")) if ($i != 6);
487                 }
488         &set_label("aw_end",0);
489
490 #       &mov("eax",$c);         # $c is "eax"
491
492         &function_end($name);
493         }
494
495 sub bn_sub_words
496         {
497         local($name)=@_;
498
499         &function_begin($name,"");
500
501         &comment("");
502         $a="esi";
503         $b="edi";
504         $c="eax";
505         $r="ebx";
506         $tmp1="ecx";
507         $tmp2="edx";
508         $num="ebp";
509
510         &mov($r,&wparam(0));    # get r
511          &mov($a,&wparam(1));   # get a
512         &mov($b,&wparam(2));    # get b
513          &mov($num,&wparam(3)); # get num
514         &xor($c,$c);            # clear carry
515          &and($num,0xfffffff8); # num / 8
516
517         &jz(&label("aw_finish"));
518
519         &set_label("aw_loop",0);
520         for ($i=0; $i<8; $i++)
521                 {
522                 &comment("Round $i");
523
524                 &mov($tmp1,&DWP($i*4,$a,"",0));         # *a
525                  &mov($tmp2,&DWP($i*4,$b,"",0));        # *b
526                 &sub($tmp1,$c);
527                  &mov($c,0);
528                 &adc($c,$c);
529                  &sub($tmp1,$tmp2);
530                 &adc($c,0);
531                  &mov(&DWP($i*4,$r,"",0),$tmp1);        # *r
532                 }
533
534         &comment("");
535         &add($a,32);
536          &add($b,32);
537         &add($r,32);
538          &sub($num,8);
539         &jnz(&label("aw_loop"));
540
541         &set_label("aw_finish",0);
542         &mov($num,&wparam(3));  # get num
543         &and($num,7);
544          &jz(&label("aw_end"));
545
546         for ($i=0; $i<7; $i++)
547                 {
548                 &comment("Tail Round $i");
549                 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
550                  &mov($tmp2,&DWP($i*4,$b,"",0));# *b
551                 &sub($tmp1,$c);
552                  &mov($c,0);
553                 &adc($c,$c);
554                  &sub($tmp1,$tmp2);
555                 &adc($c,0);
556                  &dec($num) if ($i != 6);
557                 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
558                  &jz(&label("aw_end")) if ($i != 6);
559                 }
560         &set_label("aw_end",0);
561
562 #       &mov("eax",$c);         # $c is "eax"
563
564         &function_end($name);
565         }
566
567 sub bn_sub_part_words
568         {
569         local($name)=@_;
570
571         &function_begin($name,"");
572
573         &comment("");
574         $a="esi";
575         $b="edi";
576         $c="eax";
577         $r="ebx";
578         $tmp1="ecx";
579         $tmp2="edx";
580         $num="ebp";
581
582         &mov($r,&wparam(0));    # get r
583          &mov($a,&wparam(1));   # get a
584         &mov($b,&wparam(2));    # get b
585          &mov($num,&wparam(3)); # get num
586         &xor($c,$c);            # clear carry
587          &and($num,0xfffffff8); # num / 8
588
589         &jz(&label("aw_finish"));
590
591         &set_label("aw_loop",0);
592         for ($i=0; $i<8; $i++)
593                 {
594                 &comment("Round $i");
595
596                 &mov($tmp1,&DWP($i*4,$a,"",0));         # *a
597                  &mov($tmp2,&DWP($i*4,$b,"",0));        # *b
598                 &sub($tmp1,$c);
599                  &mov($c,0);
600                 &adc($c,$c);
601                  &sub($tmp1,$tmp2);
602                 &adc($c,0);
603                  &mov(&DWP($i*4,$r,"",0),$tmp1);        # *r
604                 }
605
606         &comment("");
607         &add($a,32);
608          &add($b,32);
609         &add($r,32);
610          &sub($num,8);
611         &jnz(&label("aw_loop"));
612
613         &set_label("aw_finish",0);
614         &mov($num,&wparam(3));  # get num
615         &and($num,7);
616          &jz(&label("aw_end"));
617
618         for ($i=0; $i<7; $i++)
619                 {
620                 &comment("Tail Round $i");
621                 &mov($tmp1,&DWP(0,$a,"",0));    # *a
622                  &mov($tmp2,&DWP(0,$b,"",0));# *b
623                 &sub($tmp1,$c);
624                  &mov($c,0);
625                 &adc($c,$c);
626                  &sub($tmp1,$tmp2);
627                 &adc($c,0);
628                 &mov(&DWP(0,$r,"",0),$tmp1);    # *r
629                 &add($a, 4);
630                 &add($b, 4);
631                 &add($r, 4);
632                  &dec($num) if ($i != 6);
633                  &jz(&label("aw_end")) if ($i != 6);
634                 }
635         &set_label("aw_end",0);
636
637         &cmp(&wparam(4),0);
638         &je(&label("pw_end"));
639
640         &mov($num,&wparam(4));  # get dl
641         &cmp($num,0);
642         &je(&label("pw_end"));
643         &jge(&label("pw_pos"));
644
645         &comment("pw_neg");
646         &mov($tmp2,0);
647         &sub($tmp2,$num);
648         &mov($num,$tmp2);
649         &and($num,0xfffffff8);  # num / 8
650         &jz(&label("pw_neg_finish"));
651
652         &set_label("pw_neg_loop",0);
653         for ($i=0; $i<8; $i++)
654         {
655             &comment("dl<0 Round $i");
656
657             &mov($tmp1,0);
658             &mov($tmp2,&DWP($i*4,$b,"",0));     # *b
659             &sub($tmp1,$c);
660             &mov($c,0);
661             &adc($c,$c);
662             &sub($tmp1,$tmp2);
663             &adc($c,0);
664             &mov(&DWP($i*4,$r,"",0),$tmp1);     # *r
665         }
666             
667         &comment("");
668         &add($b,32);
669         &add($r,32);
670         &sub($num,8);
671         &jnz(&label("pw_neg_loop"));
672             
673         &set_label("pw_neg_finish",0);
674         &mov($tmp2,&wparam(4)); # get dl
675         &mov($num,0);
676         &sub($num,$tmp2);
677         &and($num,7);
678         &jz(&label("pw_end"));
679             
680         for ($i=0; $i<7; $i++)
681         {
682             &comment("dl<0 Tail Round $i");
683             &mov($tmp1,0);
684             &mov($tmp2,&DWP($i*4,$b,"",0));# *b
685             &sub($tmp1,$c);
686             &mov($c,0);
687             &adc($c,$c);
688             &sub($tmp1,$tmp2);
689             &adc($c,0);
690             &dec($num) if ($i != 6);
691             &mov(&DWP($i*4,$r,"",0),$tmp1);     # *r
692             &jz(&label("pw_end")) if ($i != 6);
693         }
694
695         &jmp(&label("pw_end"));
696         
697         &set_label("pw_pos",0);
698         
699         &and($num,0xfffffff8);  # num / 8
700         &jz(&label("pw_pos_finish"));
701
702         &set_label("pw_pos_loop",0);
703
704         for ($i=0; $i<8; $i++)
705         {
706             &comment("dl>0 Round $i");
707
708             &mov($tmp1,&DWP($i*4,$a,"",0));     # *a
709             &sub($tmp1,$c);
710             &mov(&DWP($i*4,$r,"",0),$tmp1);     # *r
711             &jnc(&label("pw_nc".$i));
712         }
713             
714         &comment("");
715         &add($a,32);
716         &add($r,32);
717         &sub($num,8);
718         &jnz(&label("pw_pos_loop"));
719             
720         &set_label("pw_pos_finish",0);
721         &mov($num,&wparam(4));  # get dl
722         &and($num,7);
723         &jz(&label("pw_end"));
724             
725         for ($i=0; $i<7; $i++)
726         {
727             &comment("dl>0 Tail Round $i");
728             &mov($tmp1,&DWP($i*4,$a,"",0));     # *a
729             &sub($tmp1,$c);
730             &mov(&DWP($i*4,$r,"",0),$tmp1);     # *r
731             &jnc(&label("pw_tail_nc".$i));
732             &dec($num) if ($i != 6);
733             &jz(&label("pw_end")) if ($i != 6);
734         }
735         &mov($c,1);
736         &jmp(&label("pw_end"));
737
738         &set_label("pw_nc_loop",0);
739         for ($i=0; $i<8; $i++)
740         {
741             &mov($tmp1,&DWP($i*4,$a,"",0));     # *a
742             &mov(&DWP($i*4,$r,"",0),$tmp1);     # *r
743             &set_label("pw_nc".$i,0);
744         }
745             
746         &comment("");
747         &add($a,32);
748         &add($r,32);
749         &sub($num,8);
750         &jnz(&label("pw_nc_loop"));
751             
752         &mov($num,&wparam(4));  # get dl
753         &and($num,7);
754         &jz(&label("pw_nc_end"));
755             
756         for ($i=0; $i<7; $i++)
757         {
758             &mov($tmp1,&DWP($i*4,$a,"",0));     # *a
759             &mov(&DWP($i*4,$r,"",0),$tmp1);     # *r
760             &set_label("pw_tail_nc".$i,0);
761             &dec($num) if ($i != 6);
762             &jz(&label("pw_nc_end")) if ($i != 6);
763         }
764
765         &set_label("pw_nc_end",0);
766         &mov($c,0);
767
768         &set_label("pw_end",0);
769
770 #       &mov("eax",$c);         # $c is "eax"
771
772         &function_end($name);
773         }
774