sub main'shr { &out2("shrl",@_); }
sub main'xor { &out2("xorl",@_); }
sub main'xorb { &out2("xorb",@_); }
-sub main'add { &out2("addl",@_); }
+sub main'add { &out2($_[0]=~/%[a-d][lh]/?"addb":"addl",@_); }
sub main'adc { &out2("adcl",@_); }
sub main'sub { &out2("subl",@_); }
sub main'sbb { &out2("sbbl",@_); }
sub main'rotl { &out2("roll",@_); }
sub main'rotr { &out2("rorl",@_); }
-sub main'exch { &out2("xchg",@_); }
+sub main'exch { &out2($_[0]=~/%[a-d][lh]/?"xchgb":"xchgl",@_); }
sub main'cmp { &out2("cmpl",@_); }
sub main'lea { &out2("leal",@_); }
sub main'mul { &out1("mull",@_); }
sub main'jnc { &out1("jnc",@_); }
sub main'jno { &out1("jno",@_); }
sub main'dec { &out1("decl",@_); }
-sub main'inc { &out1("incl",@_); }
+sub main'inc { &out1($_[0]=~/%[a-d][hl]/?"incb":"incl",@_); }
sub main'push { &out1("pushl",@_); $stack+=4; }
sub main'pop { &out1("popl",@_); $stack-=4; }
sub main'pushf { &out0("pushfl"); $stack+=4; }
{ if ($label{$i} eq $_[0]) { $pre=''; last; } }
&out1("call",$pre.$_[0]);
}
+sub main'call_ptr { &out1p("call",@_); }
sub main'ret { &out0("ret"); }
sub main'nop { &out0("nop"); }
sub main'test { &out2("testl",@_); }
sub main'bt { &out2("btl",@_); }
sub main'leave { &out0("leave"); }
-sub main'cpuid { &out0(".word\t0xa20f"); }
-sub main'rdtsc { &out0(".word\t0x310f"); }
+sub main'cpuid { &out0(".byte\t0x0f,0xa2"); }
+sub main'rdtsc { &out0(".byte\t0x0f,0x31"); }
sub main'halt { &out0("hlt"); }
+sub main'movz { &out2("movzbl",@_); }
+sub main'neg { &out1("negl",@_); }
+sub main'cld { &out0("cld"); }
# SSE2
sub main'emms { &out0("emms"); }
sub main'movd { &out2("movd",@_); }
-sub main'movq { &out2("movq",@_); }
sub main'movdqu { &out2("movdqu",@_); }
sub main'movdqa { &out2("movdqa",@_); }
sub main'movdq2q{ &out2("movdq2q",@_); }
sub main'pxor { &out2("pxor",@_); }
sub main'por { &out2("por",@_); }
sub main'pand { &out2("pand",@_); }
+sub main'movq {
+ local($p1,$p2,$optimize)=@_;
+ if ($optimize && $p1=~/^mm[0-7]$/ && $p2=~/^mm[0-7]$/)
+ # movq between mmx registers can sink Intel CPUs
+ { push(@out,"\tpshufw\t\$0xe4,%$p2,%$p1\n"); }
+ else { &out2("movq",@_); }
+ }
+sub main'pshufw {
+ local ($dst,$src,$magic)=@_;
+ push(@out,"\tpshufw\t\$$magic,%$src,%$dst\n");
+ }
+sub main'punpckldq { &out2("punpckldq",@_); }
+sub main'pcmpgtb { &out2("pcmpgtb",@_); }
+sub main'paddb { &out2("paddb",@_); }
+sub main'psrld { &out2("psrld",@_); }
+sub main'pslld { &out2("pslld",@_); }
# The bswapl instruction is new for the 486. Emulate if i386.
sub main'bswap
}
}
+sub main'public_label
+ {
+ $label{$_[0]}="${under}${_[0]}" if (!defined($label{$_[0]}));
+ push(@out,".globl\t$label{$_[0]}\n");
+ }
+
sub main'label
{
if (!defined($label{$_[0]}))
if ($main'elf && grep {/%[x]*mm[0-7]/i} @out) {
local($tmp);
- push (@out,"\n.comm\t${under}OPENSSL_ia32cap_P,4,4\n");
+ push (@out,"\n.section\t.bss\n");
+ push (@out,".comm\t${under}OPENSSL_ia32cap_P,4,4\n");
push (@out,".section\t.init\n");
# One can argue that it's wasteful to craft every
pushf
popl %eax
xorl %ecx,%eax
- bt \$21,%eax
+ btl \$21,%eax
jnc 1f
pushl %edi
pushl %ebx
movl %edx,%edi
movl \$1,%eax
- .word 0xa20f
+ .byte 0x0f,0xa2
orl \$1<<10,%edx
movl %edx,0(%edi)
popl %ebx
popl %edi
- .align 4
+ jmp 1f
+ .align $align
1:
___
push (@out,$tmp);
}
}
+sub main'data_byte
+ {
+ push(@out,"\t.byte\t".join(',',@_)."\n");
+ }
+
sub main'data_word
{
push(@out,"\t.long\t".join(',',@_)."\n");
sub main'picmeup
{
- local($dst,$sym)=@_;
+ local($dst,$sym,$base,$reflabel)=@_;
+
if ($main'cpp)
{
- local($tmp)=<<___;
+ local($tmp);
+ if (!defined($base))
+ {
+ $tmp=<<___;
#if (defined(ELF) || defined(SOL)) && defined(PIC)
call 1f
1: popl $regs{$dst}
leal $sym,$regs{$dst}
#endif
___
+ }
+ else {
+ $tmp=<<___;
+#if (defined(ELF) || defined(SOL)) && defined(PIC)
+ leal _GLOBAL_OFFSET_TABLE_+[.-$reflabel]($regs{$base}),$regs{$dst}
+ movl $sym\@GOT($regs{$dst}),$regs{$dst}
+#else
+ leal $sym,$regs{$dst}
+#endif
+___
+ }
push(@out,$tmp);
}
elsif ($main'pic && ($main'elf || $main'aout))
{
- &main'call(&main'label("PIC_me_up"));
- &main'set_label("PIC_me_up");
- &main'blindpop($dst);
- &main'add($dst,"\$${under}_GLOBAL_OFFSET_TABLE_+[.-".
+ if (!defined($base))
+ {
+ &main'call(&main'label("PIC_me_up"));
+ &main'set_label("PIC_me_up");
+ &main'blindpop($dst);
+ &main'add($dst,"\$${under}_GLOBAL_OFFSET_TABLE_+[.-".
&main'label("PIC_me_up") . "]");
+ }
+ else {
+ &main'lea($dst,&main'DWP(
+ "${under}_GLOBAL_OFFSET_TABLE_+[.-$reflabel]",
+ $base));
+ }
&main'mov($dst,&main'DWP($under.$sym."\@GOT",$dst));
}
else
$tmp=<<___;
.section .init
call $under$f
+ jmp .Linitalign
+.align $align
+.Linitalign:
___
}
elsif ($main'coff)