Support for IA-32 SSE2 instruction set.
[openssl.git] / crypto / perlasm / x86unix.pl
1 #!/usr/local/bin/perl
2
3 package x86unix;
4
5 $label="L000";
6 $const="";
7 $constl=0;
8
9 $align=($main'aout)?"4":"16";
10 $under=($main'aout)?"_":"";
11 $com_start=($main'sol)?"/":"#";
12
13 sub main'asm_init_output { @out=(); }
14 sub main'asm_get_output { return(@out); }
15 sub main'get_labels { return(@labels); }
16 sub main'external_label { push(@labels,@_); }
17
18 if ($main'cpp)
19         {
20         $align="ALIGN";
21         $under="";
22         $com_start='/*';
23         $com_end='*/';
24         }
25
26 %lb=(   'eax',  '%al',
27         'ebx',  '%bl',
28         'ecx',  '%cl',
29         'edx',  '%dl',
30         'ax',   '%al',
31         'bx',   '%bl',
32         'cx',   '%cl',
33         'dx',   '%dl',
34         );
35
36 %hb=(   'eax',  '%ah',
37         'ebx',  '%bh',
38         'ecx',  '%ch',
39         'edx',  '%dh',
40         'ax',   '%ah',
41         'bx',   '%bh',
42         'cx',   '%ch',
43         'dx',   '%dh',
44         );
45
46 %regs=( 'eax',  '%eax',
47         'ebx',  '%ebx',
48         'ecx',  '%ecx',
49         'edx',  '%edx',
50         'esi',  '%esi',
51         'edi',  '%edi',
52         'ebp',  '%ebp',
53         'esp',  '%esp',
54
55         'mm0',  '%mm0',
56         'mm1',  '%mm1',
57         'mm2',  '%mm2',
58         'mm3',  '%mm3',
59         'mm4',  '%mm4',
60         'mm5',  '%mm5',
61         'mm6',  '%mm6',
62         'mm7',  '%mm7',
63
64         'xmm0', '%xmm0',
65         'xmm1', '%xmm1',
66         'xmm2', '%xmm2',
67         'xmm3', '%xmm3',
68         'xmm4', '%xmm4',
69         'xmm5', '%xmm5',
70         'xmm6', '%xmm6',
71         'xmm7', '%xmm7',
72         );
73
74 %reg_val=(
75         'eax',  0x00,
76         'ebx',  0x03,
77         'ecx',  0x01,
78         'edx',  0x02,
79         'esi',  0x06,
80         'edi',  0x07,
81         'ebp',  0x05,
82         'esp',  0x04,
83         );
84
85 sub main'LB
86         {
87         (defined($lb{$_[0]})) || die "$_[0] does not have a 'low byte'\n";
88         return($lb{$_[0]});
89         }
90
91 sub main'HB
92         {
93         (defined($hb{$_[0]})) || die "$_[0] does not have a 'high byte'\n";
94         return($hb{$_[0]});
95         }
96
97 sub main'DWP
98         {
99         local($addr,$reg1,$reg2,$idx)=@_;
100
101         $ret="";
102         $addr =~ s/(^|[+ \t])([A-Za-z_]+[A-Za-z0-9_]+)($|[+ \t])/$1$under$2$3/;
103         $reg1="$regs{$reg1}" if defined($regs{$reg1});
104         $reg2="$regs{$reg2}" if defined($regs{$reg2});
105         $ret.=$addr if ($addr ne "") && ($addr ne 0);
106         if ($reg2 ne "")
107                 {
108                 if($idx ne "" && $idx != 0)
109                     { $ret.="($reg1,$reg2,$idx)"; }
110                 else
111                     { $ret.="($reg1,$reg2)"; }
112                 }
113         elsif ($reg1 ne "")
114                 { $ret.="($reg1)" }
115         return($ret);
116         }
117
118 sub main'QWP
119         {
120         return(&main'DWP(@_));
121         }
122
123 sub main'BP
124         {
125         return(&main'DWP(@_));
126         }
127
128 sub main'BC
129         {
130         return @_;
131         }
132
133 sub main'DWC
134         {
135         return @_;
136         }
137
138 #sub main'BP
139 #       {
140 #       local($addr,$reg1,$reg2,$idx)=@_;
141 #
142 #       $ret="";
143 #
144 #       $addr =~ s/(^|[+ \t])([A-Za-z_]+)($|[+ \t])/$1$under$2$3/;
145 #       $reg1="$regs{$reg1}" if defined($regs{$reg1});
146 #       $reg2="$regs{$reg2}" if defined($regs{$reg2});
147 #       $ret.=$addr if ($addr ne "") && ($addr ne 0);
148 #       if ($reg2 ne "")
149 #               { $ret.="($reg1,$reg2,$idx)"; }
150 #       else
151 #               { $ret.="($reg1)" }
152 #       return($ret);
153 #       }
154
155 sub main'mov    { &out2("movl",@_); }
156 sub main'movb   { &out2("movb",@_); }
157 sub main'and    { &out2("andl",@_); }
158 sub main'or     { &out2("orl",@_); }
159 sub main'shl    { &out2("sall",@_); }
160 sub main'shr    { &out2("shrl",@_); }
161 sub main'xor    { &out2("xorl",@_); }
162 sub main'xorb   { &out2("xorb",@_); }
163 sub main'add    { &out2("addl",@_); }
164 sub main'adc    { &out2("adcl",@_); }
165 sub main'sub    { &out2("subl",@_); }
166 sub main'rotl   { &out2("roll",@_); }
167 sub main'rotr   { &out2("rorl",@_); }
168 sub main'exch   { &out2("xchg",@_); }
169 sub main'cmp    { &out2("cmpl",@_); }
170 sub main'lea    { &out2("leal",@_); }
171 sub main'mul    { &out1("mull",@_); }
172 sub main'div    { &out1("divl",@_); }
173 sub main'jmp    { &out1("jmp",@_); }
174 sub main'jmp_ptr { &out1p("jmp",@_); }
175 sub main'je     { &out1("je",@_); }
176 sub main'jle    { &out1("jle",@_); }
177 sub main'jne    { &out1("jne",@_); }
178 sub main'jnz    { &out1("jnz",@_); }
179 sub main'jz     { &out1("jz",@_); }
180 sub main'jge    { &out1("jge",@_); }
181 sub main'jl     { &out1("jl",@_); }
182 sub main'ja     { &out1("ja",@_); }
183 sub main'jae    { &out1("jae",@_); }
184 sub main'jb     { &out1("jb",@_); }
185 sub main'jbe    { &out1("jbe",@_); }
186 sub main'jc     { &out1("jc",@_); }
187 sub main'jnc    { &out1("jnc",@_); }
188 sub main'jno    { &out1("jno",@_); }
189 sub main'dec    { &out1("decl",@_); }
190 sub main'inc    { &out1("incl",@_); }
191 sub main'push   { &out1("pushl",@_); $stack+=4; }
192 sub main'pop    { &out1("popl",@_); $stack-=4; }
193 sub main'pushf  { &out0("pushf"); $stack+=4; }
194 sub main'popf   { &out0("popf"); $stack-=4; }
195 sub main'not    { &out1("notl",@_); }
196 sub main'call   { &out1("call",($_[0]=~/^\.L/?'':$under).$_[0]); }
197 sub main'ret    { &out0("ret"); }
198 sub main'nop    { &out0("nop"); }
199 sub main'test   { &out2("testl",@_); }
200 sub main'bt     { &out2("btl",@_); }
201 sub main'leave  { &out0("leave"); }
202
203 # SSE2
204 sub main'emms   { &out0("emms"); }
205 sub main'movd   { &out2("movd",@_); }
206 sub main'movq   { &out2("movq",@_); }
207 sub main'movdqu { &out2("movdqu",@_); }
208 sub main'movdqa { &out2("movdqa",@_); }
209 sub main'movdq2q{ &out2("movdq2q",@_); }
210 sub main'movq2dq{ &out2("movq2dq",@_); }
211 sub main'paddq  { &out2("paddq",@_); }
212 sub main'pmuludq{ &out2("pmuludq",@_); }
213 sub main'psrlq  { &out2("psrlq",@_); }
214 sub main'psllq  { &out2("psllq",@_); }
215 sub main'pxor   { &out2("pxor",@_); }
216 sub main'por    { &out2("por",@_); }
217 sub main'pand   { &out2("pand",@_); }
218
219 # The bswapl instruction is new for the 486. Emulate if i386.
220 sub main'bswap
221         {
222         if ($main'i386)
223                 {
224                 &main'comment("bswapl @_");
225                 &main'exch(main'HB(@_),main'LB(@_));
226                 &main'rotr(@_,16);
227                 &main'exch(main'HB(@_),main'LB(@_));
228                 }
229         else
230                 {
231                 &out1("bswapl",@_);
232                 }
233         }
234
235 sub out2
236         {
237         local($name,$p1,$p2)=@_;
238         local($l,$ll,$t);
239         local(%special)=(       "roll",0xD1C0,"rorl",0xD1C8,
240                                 "rcll",0xD1D0,"rcrl",0xD1D8,
241                                 "shll",0xD1E0,"shrl",0xD1E8,
242                                 "sarl",0xD1F8);
243         
244         if ((defined($special{$name})) && defined($regs{$p1}) && ($p2 == 1))
245                 {
246                 $op=$special{$name}|$reg_val{$p1};
247                 $tmp1=sprintf(".byte %d\n",($op>>8)&0xff);
248                 $tmp2=sprintf(".byte %d\t",$op     &0xff);
249                 push(@out,$tmp1);
250                 push(@out,$tmp2);
251
252                 $p2=&conv($p2);
253                 $p1=&conv($p1);
254                 &main'comment("$name $p2 $p1");
255                 return;
256                 }
257
258         push(@out,"\t$name\t");
259         $t=&conv($p2).",";
260         $l=length($t);
261         push(@out,$t);
262         $ll=4-($l+9)/8;
263         $tmp1=sprintf("\t" x $ll);
264         push(@out,$tmp1);
265         push(@out,&conv($p1)."\n");
266         }
267
268 sub out1
269         {
270         local($name,$p1)=@_;
271         local($l,$t);
272         local(%special)=("bswapl",0x0FC8);
273
274         if ((defined($special{$name})) && defined($regs{$p1}))
275                 {
276                 $op=$special{$name}|$reg_val{$p1};
277                 $tmp1=sprintf(".byte %d\n",($op>>8)&0xff);
278                 $tmp2=sprintf(".byte %d\t",$op     &0xff);
279                 push(@out,$tmp1);
280                 push(@out,$tmp2);
281
282                 $p2=&conv($p2);
283                 $p1=&conv($p1);
284                 &main'comment("$name $p2 $p1");
285                 return;
286                 }
287
288         push(@out,"\t$name\t".&conv($p1)."\n");
289         }
290
291 sub out1p
292         {
293         local($name,$p1)=@_;
294         local($l,$t);
295
296         push(@out,"\t$name\t*".&conv($p1)."\n");
297         }
298
299 sub out0
300         {
301         push(@out,"\t$_[0]\n");
302         }
303
304 sub conv
305         {
306         local($p)=@_;
307
308 #       $p =~ s/0x([0-9A-Fa-f]+)/0$1h/;
309
310         $p=$regs{$p} if (defined($regs{$p}));
311
312         $p =~ s/^(-{0,1}[0-9A-Fa-f]+)$/\$$1/;
313         $p =~ s/^(0x[0-9A-Fa-f]+)$/\$$1/;
314         return $p;
315         }
316
317 sub main'file
318         {
319         local($file)=@_;
320
321         local($tmp)=<<"EOF";
322         .file   "$file.s"
323         .version        "01.01"
324 gcc2_compiled.:
325 EOF
326         push(@out,$tmp);
327         }
328
329 sub main'function_begin
330         {
331         local($func)=@_;
332
333         &main'external_label($func);
334         $func=$under.$func;
335
336         local($tmp)=<<"EOF";
337 .text
338         .align $align
339 .globl $func
340 EOF
341         push(@out,$tmp);
342         if ($main'cpp)
343                 { $tmp=push(@out,"\tTYPE($func,\@function)\n"); }
344         elsif ($main'gaswin)
345                 { $tmp=push(@out,"\t.def\t$func;\t.scl\t2;\t.type\t32;\t.endef\n"); }
346         else    { $tmp=push(@out,"\t.type\t$func,\@function\n"); }
347         push(@out,"$func:\n");
348         $tmp=<<"EOF";
349         pushl   %ebp
350         pushl   %ebx
351         pushl   %esi
352         pushl   %edi
353
354 EOF
355         push(@out,$tmp);
356         $stack=20;
357         }
358
359 sub main'function_begin_B
360         {
361         local($func,$extra)=@_;
362
363         &main'external_label($func);
364         $func=$under.$func;
365
366         local($tmp)=<<"EOF";
367 .text
368         .align $align
369 .globl $func
370 EOF
371         push(@out,$tmp);
372         if ($main'cpp)
373                 { push(@out,"\tTYPE($func,\@function)\n"); }
374         elsif ($main'gaswin)
375                 { $tmp=push(@out,"\t.def\t$func;\t.scl\t2;\t.type\t32;\t.endef\n"); }
376         else    { push(@out,"\t.type    $func,\@function\n"); }
377         push(@out,"$func:\n");
378         $stack=4;
379         }
380
381 sub main'function_end
382         {
383         local($func)=@_;
384
385         $func=$under.$func;
386
387         local($tmp)=<<"EOF";
388         popl    %edi
389         popl    %esi
390         popl    %ebx
391         popl    %ebp
392         ret
393 .L_${func}_end:
394 EOF
395         push(@out,$tmp);
396
397         if ($main'cpp)
398                 { push(@out,"\tSIZE($func,.L_${func}_end-$func)\n"); }
399         elsif ($main'gaswin)
400                 { $tmp=push(@out,"\t.align 4\n"); }
401         else    { push(@out,"\t.size\t$func,.L_${func}_end-$func\n"); }
402         push(@out,".ident       \"$func\"\n");
403         $stack=0;
404         %label=();
405         }
406
407 sub main'function_end_A
408         {
409         local($func)=@_;
410
411         local($tmp)=<<"EOF";
412         popl    %edi
413         popl    %esi
414         popl    %ebx
415         popl    %ebp
416         ret
417 EOF
418         push(@out,$tmp);
419         }
420
421 sub main'function_end_B
422         {
423         local($func)=@_;
424
425         $func=$under.$func;
426
427         push(@out,".L_${func}_end:\n");
428         if ($main'cpp)
429                 { push(@out,"\tSIZE($func,.L_${func}_end-$func)\n"); }
430         elsif ($main'gaswin)
431                 { push(@out,"\t.align 4\n"); }
432         else    { push(@out,"\t.size\t$func,.L_${func}_end-$func\n"); }
433         push(@out,".ident       \"$func\"\n");
434         $stack=0;
435         %label=();
436         }
437
438 sub main'wparam
439         {
440         local($num)=@_;
441
442         return(&main'DWP($stack+$num*4,"esp","",0));
443         }
444
445 sub main'stack_push
446         {
447         local($num)=@_;
448         $stack+=$num*4;
449         &main'sub("esp",$num*4);
450         }
451
452 sub main'stack_pop
453         {
454         local($num)=@_;
455         $stack-=$num*4;
456         &main'add("esp",$num*4);
457         }
458
459 sub main'swtmp
460         {
461         return(&main'DWP($_[0]*4,"esp","",0));
462         }
463
464 # Should use swtmp, which is above esp.  Linix can trash the stack above esp
465 #sub main'wtmp
466 #       {
467 #       local($num)=@_;
468 #
469 #       return(&main'DWP(-($num+1)*4,"esp","",0));
470 #       }
471
472 sub main'comment
473         {
474         if ($main'elf)  # GNU and SVR4 as'es use different comment delimiters,
475                 {       # so we just skip comments...
476                 push(@out,"\n");
477                 return;
478                 }
479         foreach (@_)
480                 {
481                 if (/^\s*$/)
482                         { push(@out,"\n"); }
483                 else
484                         { push(@out,"\t$com_start $_ $com_end\n"); }
485                 }
486         }
487
488 sub main'label
489         {
490         if (!defined($label{$_[0]}))
491                 {
492                 $label{$_[0]}=".${label}${_[0]}";
493                 $label++;
494                 }
495         return($label{$_[0]});
496         }
497
498 sub main'set_label
499         {
500         if (!defined($label{$_[0]}))
501                 {
502                 $label{$_[0]}=".${label}${_[0]}";
503                 $label++;
504                 }
505         push(@out,".align $align\n") if ($_[1] != 0);
506         push(@out,"$label{$_[0]}:\n");
507         }
508
509 sub main'file_end
510         {
511         # try to detect if SSE2 or MMX extensions were used on ELF platform...
512         if ($main'elf && grep {/%[x]*mm[0-7]/i} @out) {
513                 local($tmp);
514
515                 push (@out,"\n.comm\t".$under."OPENSSL_ia32cap,8,4\n");
516
517                 push (@out,".section\t.init\n");
518                 # One can argue that it's wasteful to craft every
519                 # SSE/MMX module with this snippet... Well, it's 72
520                 # bytes long and for the moment we have two modules.
521                 # Let's argue when we have 7 modules or so...
522                 &main'picmeup("edx","OPENSSL_ia32cap");
523                 $tmp=<<___;
524                 cmpl    \$0,(%edx)
525                 jne     1f
526                 movl    \$1,(%edx)
527                 pushf
528                 popl    %eax
529                 movl    %eax,%ecx
530                 xorl    \$1<<21,%eax
531                 pushl   %eax
532                 popf
533                 pushf
534                 popl    %eax
535                 xorl    %ecx,%eax
536                 bt      \$21,%eax
537                 jnc     1f
538                 pushl   %edi
539                 pushl   %ebx
540                 movl    %edx,%edi
541                 movl    \$1,%eax
542                 cpuid
543                 orl     \$1,%edx
544                 movl    %edx,0(%edi)
545                 movl    %ecx,4(%edi)
546                 popl    %ebx
547                 popl    %edi
548         1:
549 ___
550                 push (@out,$tmp);
551         }
552
553         if ($const ne "")
554                 {
555                 push(@out,".section .rodata\n");
556                 push(@out,$const);
557                 $const="";
558                 }
559         }
560
561 sub main'data_word
562         {
563         push(@out,"\t.long\t".join(',',@_)."\n");
564         }
565
566 sub main'align
567         {
568         push(@out,".align $_[0]\n");
569         }
570
571 # debug output functions: puts, putx, printf
572
573 sub main'puts
574         {
575         &pushvars();
576         &main'push('$Lstring' . ++$constl);
577         &main'call('puts');
578         $stack-=4;
579         &main'add("esp",4);
580         &popvars();
581
582         $const .= "Lstring$constl:\n\t.string \"@_[0]\"\n";
583         }
584
585 sub main'putx
586         {
587         &pushvars();
588         &main'push($_[0]);
589         &main'push('$Lstring' . ++$constl);
590         &main'call('printf');
591         &main'add("esp",8);
592         $stack-=8;
593         &popvars();
594
595         $const .= "Lstring$constl:\n\t.string \"\%X\"\n";
596         }
597
598 sub main'printf
599         {
600         $ostack = $stack;
601         &pushvars();
602         for ($i = @_ - 1; $i >= 0; $i--)
603                 {
604                 if ($i == 0) # change this to support %s format strings
605                         {
606                         &main'push('$Lstring' . ++$constl);
607                         $const .= "Lstring$constl:\n\t.string \"@_[$i]\"\n";
608                         }
609                 else
610                         {
611                         if ($_[$i] =~ /([0-9]*)\(%esp\)/)
612                                 {
613                                 &main'push(($1 + $stack - $ostack) . '(%esp)');
614                                 }
615                         else
616                                 {
617                                 &main'push($_[$i]);
618                                 }
619                         }
620                 }
621         &main'call('printf');
622         $stack-=4*@_;
623         &main'add("esp",4*@_);
624         &popvars();
625         }
626
627 sub pushvars
628         {
629         &main'pushf();
630         &main'push("edx");
631         &main'push("ecx");
632         &main'push("eax");
633         }
634
635 sub popvars
636         {
637         &main'pop("eax");
638         &main'pop("ecx");
639         &main'pop("edx");
640         &main'popf();
641         }
642
643 sub main'picmeup
644         {
645         local($dst,$sym)=@_;
646         if ($main'cpp)
647                 {
648                 local($tmp)=<<___;
649 #if (defined(ELF) || defined(SOL)) && defined(PIC)
650         .align  4
651         call    1f
652 1:      popl    $regs{$dst}
653         addl    \$_GLOBAL_OFFSET_TABLE_+[.-1b],$regs{$dst}
654         movl    $sym\@GOT($regs{$dst}),$regs{$dst}
655 #else
656         leal    $sym,$regs{$dst}
657 #endif
658 ___
659                 push(@out,$tmp);
660                 }
661         elsif ($main'pic && ($main'elf || $main'aout))
662                 {
663                 push(@out,"\t.align\t4\n");
664                 &main'call(&main'label("PIC_me_up"));
665                 &main'set_label("PIC_me_up");
666                 &main'blindpop($dst);
667                 &main'add($dst,"\$$under"."_GLOBAL_OFFSET_TABLE_+[.-".
668                                 &main'label("PIC_me_up") . "]");
669                 &main'mov($dst,&main'DWP($sym."\@GOT",$dst));
670                 }
671         else
672                 {
673                 &main'lea($dst,&main'DWP($sym));
674                 }
675         }
676
677 sub main'blindpop { &out1("popl",@_); }