Throw in AES CBC assembler, up to +40% on aes-128-cbc benchmark.
[openssl.git] / crypto / perlasm / x86nasm.pl
1 #!/usr/local/bin/perl
2
3 package x86nasm;
4
5 $label="L000";
6 $under=($main'netware)?'':'_';
7
8 %lb=(   'eax',  'al',
9         'ebx',  'bl',
10         'ecx',  'cl',
11         'edx',  'dl',
12         'ax',   'al',
13         'bx',   'bl',
14         'cx',   'cl',
15         'dx',   'dl',
16         );
17
18 %hb=(   'eax',  'ah',
19         'ebx',  'bh',
20         'ecx',  'ch',
21         'edx',  'dh',
22         'ax',   'ah',
23         'bx',   'bh',
24         'cx',   'ch',
25         'dx',   'dh',
26         );
27
28 sub main'asm_init_output { @out=(); }
29 sub main'asm_get_output { return(@out); }
30 sub main'get_labels { return(@labels); }
31
32 sub main'external_label
33 {
34         push(@labels,@_);
35         foreach (@_) {
36                 push(@out,".") if ($main'mwerks);
37                 push(@out, "extern\t${under}$_\n");
38         }
39 }
40
41 sub main'LB
42         {
43         (defined($lb{$_[0]})) || die "$_[0] does not have a 'low byte'\n";
44         return($lb{$_[0]});
45         }
46
47 sub main'HB
48         {
49         (defined($hb{$_[0]})) || die "$_[0] does not have a 'high byte'\n";
50         return($hb{$_[0]});
51         }
52
53 sub main'BP
54         {
55         &get_mem("BYTE",@_);
56         }
57
58 sub main'DWP
59         {
60         &get_mem("DWORD",@_);
61         }
62
63 sub main'QWP
64         {
65         &get_mem("",@_);
66         }
67
68 sub main'BC
69         {
70         return (($main'mwerks)?"":"BYTE ")."@_";
71         }
72
73 sub main'DWC
74         {
75         return (($main'mwerks)?"":"DWORD ")."@_";
76         }
77
78 sub main'stack_push
79         {
80         my($num)=@_;
81         $stack+=$num*4;
82         &main'sub("esp",$num*4);
83         }
84
85 sub main'stack_pop
86         {
87         my($num)=@_;
88         $stack-=$num*4;
89         &main'add("esp",$num*4);
90         }
91
92 sub get_mem
93         {
94         my($size,$addr,$reg1,$reg2,$idx)=@_;
95         my($t,$post);
96         my($ret)=$size;
97         if ($ret ne "")
98                 {
99                 $ret .= " PTR" if ($main'mwerks);
100                 $ret .= " ";
101                 }
102         $ret .= "[";
103         $addr =~ s/^\s+//;
104         if ($addr =~ /^(.+)\+(.+)$/)
105                 {
106                 $reg2=&conv($1);
107                 $addr="$under$2";
108                 }
109         elsif ($addr =~ /^[_a-z][_a-z0-9]*$/i)
110                 {
111                 $addr="$under$addr";
112                 }
113
114         if ($addr =~ /^.+\-.+$/) { $addr="($addr)"; }
115
116         $reg1="$regs{$reg1}" if defined($regs{$reg1});
117         $reg2="$regs{$reg2}" if defined($regs{$reg2});
118         if (($addr ne "") && ($addr ne 0))
119                 {
120                 if ($addr !~ /^-/)
121                         { $ret.="${addr}+"; }
122                 else    { $post=$addr; }
123                 }
124         if ($reg2 ne "")
125                 {
126                 $t="";
127                 $t="*$idx" if ($idx != 0);
128                 $reg1="+".$reg1 if ("$reg1$post" ne "");
129                 $ret.="$reg2$t$reg1$post]";
130                 }
131         else
132                 {
133                 $ret.="$reg1$post]"
134                 }
135         $ret =~ s/\+\]/]/; # in case $addr was the only argument
136         return($ret);
137         }
138
139 sub main'mov    { &out2("mov",@_); }
140 sub main'movb   { &out2("mov",@_); }
141 sub main'and    { &out2("and",@_); }
142 sub main'or     { &out2("or",@_); }
143 sub main'shl    { &out2("shl",@_); }
144 sub main'shr    { &out2("shr",@_); }
145 sub main'xor    { &out2("xor",@_); }
146 sub main'xorb   { &out2("xor",@_); }
147 sub main'add    { &out2("add",@_); }
148 sub main'adc    { &out2("adc",@_); }
149 sub main'sub    { &out2("sub",@_); }
150 sub main'sbb    { &out2("sbb",@_); }
151 sub main'rotl   { &out2("rol",@_); }
152 sub main'rotr   { &out2("ror",@_); }
153 sub main'exch   { &out2("xchg",@_); }
154 sub main'cmp    { &out2("cmp",@_); }
155 sub main'lea    { &out2("lea",@_); }
156 sub main'mul    { &out1("mul",@_); }
157 sub main'div    { &out1("div",@_); }
158 sub main'dec    { &out1("dec",@_); }
159 sub main'inc    { &out1("inc",@_); }
160 sub main'jmp    { &out1("jmp",@_); }
161 sub main'jmp_ptr { &out1p("jmp",@_); }
162
163 # This is a bit of a kludge: declare all branches as NEAR.
164 $near=($main'mwerks)?'':'NEAR';
165 sub main'je     { &out1("je $near",@_); }
166 sub main'jle    { &out1("jle $near",@_); }
167 sub main'jz     { &out1("jz $near",@_); }
168 sub main'jge    { &out1("jge $near",@_); }
169 sub main'jl     { &out1("jl $near",@_); }
170 sub main'ja     { &out1("ja $near",@_); }
171 sub main'jae    { &out1("jae $near",@_); }
172 sub main'jb     { &out1("jb $near",@_); }
173 sub main'jbe    { &out1("jbe $near",@_); }
174 sub main'jc     { &out1("jc $near",@_); }
175 sub main'jnc    { &out1("jnc $near",@_); }
176 sub main'jnz    { &out1("jnz $near",@_); }
177 sub main'jne    { &out1("jne $near",@_); }
178 sub main'jno    { &out1("jno $near",@_); }
179
180 sub main'push   { &out1("push",@_); $stack+=4; }
181 sub main'pop    { &out1("pop",@_); $stack-=4; }
182 sub main'pushf  { &out0("pushfd"); $stack+=4; }
183 sub main'popf   { &out0("popfd"); $stack-=4; }
184 sub main'bswap  { &out1("bswap",@_); &using486(); }
185 sub main'not    { &out1("not",@_); }
186 sub main'call   { &out1("call",($_[0]=~/^\@L/?'':$under).$_[0]); }
187 sub main'ret    { &out0("ret"); }
188 sub main'nop    { &out0("nop"); }
189 sub main'test   { &out2("test",@_); }
190 sub main'bt     { &out2("bt",@_); }
191 sub main'leave  { &out0("leave"); }
192 sub main'cpuid  { &out0("cpuid"); }
193 sub main'rdtsc  { &out0("rdtsc"); }
194 sub main'halt   { &out0("hlt"); }
195 sub main'movz   { &out2("movzx",@_); }
196 sub main'neg    { &out1("neg",@_); }
197
198 # SSE2
199 sub main'emms   { &out0("emms"); }
200 sub main'movd   { &out2("movd",@_); }
201 sub main'movq   { &out2("movq",@_); }
202 sub main'movdqu { &out2("movdqu",@_); }
203 sub main'movdqa { &out2("movdqa",@_); }
204 sub main'movdq2q{ &out2("movdq2q",@_); }
205 sub main'movq2dq{ &out2("movq2dq",@_); }
206 sub main'paddq  { &out2("paddq",@_); }
207 sub main'pmuludq{ &out2("pmuludq",@_); }
208 sub main'psrlq  { &out2("psrlq",@_); }
209 sub main'psllq  { &out2("psllq",@_); }
210 sub main'pxor   { &out2("pxor",@_); }
211 sub main'por    { &out2("por",@_); }
212 sub main'pand   { &out2("pand",@_); }
213
214 sub out2
215         {
216         my($name,$p1,$p2)=@_;
217         my($l,$t);
218
219         push(@out,"\t$name\t");
220         if (!$main'mwerks and $name eq "lea")
221                 {
222                 $p1 =~ s/^[^\[]*\[/\[/;
223                 $p2 =~ s/^[^\[]*\[/\[/;
224                 }
225         $t=&conv($p1).",";
226         $l=length($t);
227         push(@out,$t);
228         $l=4-($l+9)/8;
229         push(@out,"\t" x $l);
230         push(@out,&conv($p2));
231         push(@out,"\n");
232         }
233
234 sub out0
235         {
236         my($name)=@_;
237
238         push(@out,"\t$name\n");
239         }
240
241 sub out1
242         {
243         my($name,$p1)=@_;
244         my($l,$t);
245         push(@out,"\t$name\t".&conv($p1)."\n");
246         }
247
248 sub conv
249         {
250         my($p)=@_;
251         $p =~ s/0x([0-9A-Fa-f]+)/0$1h/;
252         return $p;
253         }
254
255 sub using486
256         {
257         return if $using486;
258         $using486++;
259         grep(s/\.386/\.486/,@out);
260         }
261
262 sub main'file
263         {
264         push(@out,".") if ($main'mwerks);
265         push(@out,"section\t.text\n");
266         }
267
268 sub main'function_begin
269         {
270         my($func,$extra)=@_;
271
272         push(@labels,$func);
273         my($tmp)=<<"EOF";
274 global  $under$func
275 $under$func:
276         push    ebp
277         push    ebx
278         push    esi
279         push    edi
280 EOF
281         push(@out,$tmp);
282         $stack=20;
283         }
284
285 sub main'function_begin_B
286         {
287         my($func,$extra)=@_;
288         my($tmp)=<<"EOF";
289 global  $under$func
290 $under$func:
291 EOF
292         push(@out,$tmp);
293         $stack=4;
294         }
295
296 sub main'function_end
297         {
298         my($func)=@_;
299
300         my($tmp)=<<"EOF";
301         pop     edi
302         pop     esi
303         pop     ebx
304         pop     ebp
305         ret
306 EOF
307         push(@out,$tmp);
308         $stack=0;
309         %label=();
310         }
311
312 sub main'function_end_B
313         {
314         $stack=0;
315         %label=();
316         }
317
318 sub main'function_end_A
319         {
320         my($func)=@_;
321
322         my($tmp)=<<"EOF";
323         pop     edi
324         pop     esi
325         pop     ebx
326         pop     ebp
327         ret
328 EOF
329         push(@out,$tmp);
330         }
331
332 sub main'file_end
333         {
334         }
335
336 sub main'wparam
337         {
338         my($num)=@_;
339
340         return(&main'DWP($stack+$num*4,"esp","",0));
341         }
342
343 sub main'swtmp
344         {
345         return(&main'DWP($_[0]*4,"esp","",0));
346         }
347
348 # Should use swtmp, which is above esp.  Linix can trash the stack above esp
349 #sub main'wtmp
350 #       {
351 #       my($num)=@_;
352 #
353 #       return(&main'DWP(-(($num+1)*4),"esp","",0));
354 #       }
355
356 sub main'comment
357         {
358         foreach (@_)
359                 {
360                 push(@out,"\t; $_\n");
361                 }
362         }
363
364 sub main'public_label
365         {
366         $label{$_[0]}="${under}${_[0]}" if (!defined($label{$_[0]}));
367         push(@out,"global\t$label{$_[0]}\n");
368         }
369
370 sub main'label
371         {
372         if (!defined($label{$_[0]}))
373                 {
374                 $label{$_[0]}="\@${label}${_[0]}";
375                 $label++;
376                 }
377         return($label{$_[0]});
378         }
379
380 sub main'set_label
381         {
382         if (!defined($label{$_[0]}))
383                 {
384                 $label{$_[0]}="\@${label}${_[0]}";
385                 $label++;
386                 }
387         if ($_[1]!=0 && $_[1]>1)
388                 {
389                 main'align($_[1]);
390                 }
391         push(@out,"$label{$_[0]}:\n");
392         }
393
394 sub main'data_word
395         {
396         push(@out,(($main'mwerks)?".long\t":"DD\t").join(',',@_)."\n");
397         }
398
399 sub main'align
400         {
401         push(@out,".") if ($main'mwerks);
402         push(@out,"align\t$_[0]\n");
403         }
404
405 sub out1p
406         {
407         my($name,$p1)=@_;
408         my($l,$t);
409
410         push(@out,"\t$name\t ".&conv($p1)."\n");
411         }
412
413 sub main'picmeup
414         {
415         local($dst,$sym)=@_;
416         &main'lea($dst,&main'DWP($sym));
417         }
418
419 sub main'blindpop { &out1("pop",@_); }
420
421 sub main'initseg
422         {
423         local($f)=@_;
424         if ($main'win32)
425                 {
426                 local($tmp)=<<___;
427 segment .CRT\$XIU data
428 extern  $under$f
429 DD      $under$f
430 ___
431                 push(@out,$tmp);
432                 }
433         }
434
435 1;