Fix bug in x86unix.pl introduced in latest update.
[openssl.git] / crypto / perlasm / x86unix.pl
index 79c1abb99dad49f1e3e0b94f2384a79f4cd9b76d..72b2c7b1a841fdba1e31780964a08cfa10691637 100644 (file)
@@ -161,13 +161,13 @@ sub main'shl      { &out2("sall",@_); }
 sub main'shr   { &out2("shrl",@_); }
 sub main'xor   { &out2("xorl",@_); }
 sub main'xorb  { &out2("xorb",@_); }
-sub main'add   { &out2("addl",@_); }
+sub main'add   { &out2($_[0]=~/%[a-d][lh]/?"addb":"addl",@_); }
 sub main'adc   { &out2("adcl",@_); }
 sub main'sub   { &out2("subl",@_); }
 sub main'sbb   { &out2("sbbl",@_); }
 sub main'rotl  { &out2("roll",@_); }
 sub main'rotr  { &out2("rorl",@_); }
-sub main'exch  { &out2("xchg",@_); }
+sub main'exch  { &out2($_[0]=~/%[a-d][lh]/?"xchgb":"xchgl",@_); }
 sub main'cmp   { &out2("cmpl",@_); }
 sub main'lea   { &out2("leal",@_); }
 sub main'mul   { &out1("mull",@_); }
@@ -189,7 +189,7 @@ sub main'jc { &out1("jc",@_); }
 sub main'jnc   { &out1("jnc",@_); }
 sub main'jno   { &out1("jno",@_); }
 sub main'dec   { &out1("decl",@_); }
-sub main'inc   { &out1("incl",@_); }
+sub main'inc   { &out1($_[0]=~/%[a-d][hl]/?"incb":"incl",@_); }
 sub main'push  { &out1("pushl",@_); $stack+=4; }
 sub main'pop   { &out1("popl",@_); $stack-=4; }
 sub main'pushf { &out0("pushfl"); $stack+=4; }
@@ -200,19 +200,22 @@ sub main'call     {       my $pre=$under;
                        { if ($label{$i} eq $_[0]) { $pre=''; last; } }
                        &out1("call",$pre.$_[0]);
                }
+sub main'call_ptr { &out1p("call",@_); }
 sub main'ret   { &out0("ret"); }
 sub main'nop   { &out0("nop"); }
 sub main'test  { &out2("testl",@_); }
 sub main'bt    { &out2("btl",@_); }
 sub main'leave { &out0("leave"); }
-sub main'cpuid { &out0(".word\t0xa20f"); }
-sub main'rdtsc { &out0(".word\t0x310f"); }
+sub main'cpuid { &out0(".byte\t0x0f,0xa2"); }
+sub main'rdtsc { &out0(".byte\t0x0f,0x31"); }
 sub main'halt  { &out0("hlt"); }
+sub main'movz  { &out2("movzbl",@_); }
+sub main'neg   { &out1("negl",@_); }
+sub main'cld   { &out0("cld"); }
 
 # SSE2
 sub main'emms  { &out0("emms"); }
 sub main'movd  { &out2("movd",@_); }
-sub main'movq  { &out2("movq",@_); }
 sub main'movdqu        { &out2("movdqu",@_); }
 sub main'movdqa        { &out2("movdqa",@_); }
 sub main'movdq2q{ &out2("movdq2q",@_); }
@@ -224,6 +227,22 @@ sub main'psllq     { &out2("psllq",@_); }
 sub main'pxor  { &out2("pxor",@_); }
 sub main'por   { &out2("por",@_); }
 sub main'pand  { &out2("pand",@_); }
+sub main'movq  {
+       local($p1,$p2,$optimize)=@_;
+       if ($optimize && $p1=~/^mm[0-7]$/ && $p2=~/^mm[0-7]$/)
+               # movq between mmx registers can sink Intel CPUs
+               {       push(@out,"\tpshufw\t\$0xe4,%$p2,%$p1\n");      }
+       else    {       &out2("movq",@_);                               }
+       }
+sub main'pshufw        {
+       local ($dst,$src,$magic)=@_;
+       push(@out,"\tpshufw\t\$$magic,%$src,%$dst\n");
+       }
+sub main'punpckldq     { &out2("punpckldq",@_); }
+sub main'pcmpgtb       { &out2("pcmpgtb",@_);   }
+sub main'paddb         { &out2("paddb",@_);     }
+sub main'psrld         { &out2("psrld",@_);     }
+sub main'pslld         { &out2("pslld",@_);     }
 
 # The bswapl instruction is new for the 486. Emulate if i386.
 sub main'bswap
@@ -497,6 +516,12 @@ sub main'comment
                }
        }
 
+sub main'public_label
+       {
+       $label{$_[0]}="${under}${_[0]}" if (!defined($label{$_[0]}));
+       push(@out,".globl\t$label{$_[0]}\n");
+       }
+
 sub main'label
        {
        if (!defined($label{$_[0]}))
@@ -528,7 +553,8 @@ sub main'file_end
        if ($main'elf && grep {/%[x]*mm[0-7]/i} @out) {
                local($tmp);
 
-               push (@out,"\n.comm\t${under}OPENSSL_ia32cap_P,4,4\n");
+               push (@out,"\n.section\t.bss\n");
+               push (@out,".comm\t${under}OPENSSL_ia32cap_P,4,4\n");
 
                push (@out,".section\t.init\n");
                # One can argue that it's wasteful to craft every
@@ -552,18 +578,19 @@ sub main'file_end
                pushf
                popl    %eax
                xorl    %ecx,%eax
-               bt      \$21,%eax
+               btl     \$21,%eax
                jnc     1f
                pushl   %edi
                pushl   %ebx
                movl    %edx,%edi
                movl    \$1,%eax
-               .word   0xa20f
+               .byte   0x0f,0xa2
                orl     \$1<<10,%edx
                movl    %edx,0(%edi)
                popl    %ebx
                popl    %edi
-       .align  4
+               jmp     1f
+       .align  $align
        1:
 ___
                push (@out,$tmp);
@@ -577,6 +604,11 @@ ___
                }
        }
 
+sub main'data_byte
+       {
+       push(@out,"\t.byte\t".join(',',@_)."\n");
+       }
+
 sub main'data_word
        {
        push(@out,"\t.long\t".join(',',@_)."\n");
@@ -667,10 +699,14 @@ sub popvars
 
 sub main'picmeup
        {
-       local($dst,$sym)=@_;
+       local($dst,$sym,$base,$reflabel)=@_;
+
        if ($main'cpp)
                {
-               local($tmp)=<<___;
+               local($tmp);
+               if (!defined($base))
+                       {
+                       $tmp=<<___;
 #if (defined(ELF) || defined(SOL)) && defined(PIC)
        call    1f
 1:     popl    $regs{$dst}
@@ -680,15 +716,34 @@ sub main'picmeup
        leal    $sym,$regs{$dst}
 #endif
 ___
+                       }
+               else    {
+                       $tmp=<<___;
+#if (defined(ELF) || defined(SOL)) && defined(PIC)
+       leal    _GLOBAL_OFFSET_TABLE_+[.-$reflabel]($regs{$base}),$regs{$dst}
+       movl    $sym\@GOT($regs{$dst}),$regs{$dst}
+#else
+       leal    $sym,$regs{$dst}
+#endif
+___
+                       }
                push(@out,$tmp);
                }
        elsif ($main'pic && ($main'elf || $main'aout))
                {
-               &main'call(&main'label("PIC_me_up"));
-               &main'set_label("PIC_me_up");
-               &main'blindpop($dst);
-               &main'add($dst,"\$${under}_GLOBAL_OFFSET_TABLE_+[.-".
+               if (!defined($base))
+                       {
+                       &main'call(&main'label("PIC_me_up"));
+                       &main'set_label("PIC_me_up");
+                       &main'blindpop($dst);
+                       &main'add($dst,"\$${under}_GLOBAL_OFFSET_TABLE_+[.-".
                                &main'label("PIC_me_up") . "]");
+                       }
+               else    {
+                       &main'lea($dst,&main'DWP(
+                               "${under}_GLOBAL_OFFSET_TABLE_+[.-$reflabel]",
+                               $base));
+                       }
                &main'mov($dst,&main'DWP($under.$sym."\@GOT",$dst));
                }
        else
@@ -708,6 +763,9 @@ sub main'initseg
                $tmp=<<___;
 .section       .init
        call    $under$f
+       jmp     .Linitalign
+.align $align
+.Linitalign:
 ___
                }
        elsif ($main'coff)