nasm fixes.
authorAndy Polyakov <appro@openssl.org>
Tue, 20 Mar 2007 08:55:58 +0000 (08:55 +0000)
committerAndy Polyakov <appro@openssl.org>
Tue, 20 Mar 2007 08:55:58 +0000 (08:55 +0000)
crypto/bn/asm/bn-586.pl
crypto/bn/asm/x86-mont.pl
crypto/perlasm/x86nasm.pl

index 3651c0d..1468906 100644 (file)
@@ -340,7 +340,7 @@ sub bn_sqr_words
                &movd("mm0",&DWP(0,$a));        # mm0 = a[i]
                &pmuludq("mm0","mm0");          # a[i] *= a[i]
                &lea($a,&DWP(4,$a));            # a++
-               &movq(&DWP(0,$r),"mm0");        # r[i] = a[i]*a[i]
+               &movq(&QWP(0,$r),"mm0");        # r[i] = a[i]*a[i]
                &sub($c,1);
                &lea($r,&DWP(8,$r));            # r += 2
                &jnz(&label("sqr_sse2_loop"));
index 81ab77d..319c17d 100755 (executable)
@@ -51,7 +51,7 @@ $_rp=&DWP(4*1,"esp");
 $_ap=&DWP(4*2,"esp");
 $_bp=&DWP(4*3,"esp");
 $_np=&DWP(4*4,"esp");
-$_n0=&DWP(4*5,"esp");
+$_n0=&DWP(4*5,"esp");  $_n0q=&QWP(4*5,"esp");
 $_sp=&DWP(4*6,"esp");
 $_bpend=&DWP(4*7,"esp");
 $frame=32;                             # size of above frame rounded up to 16n
@@ -136,7 +136,7 @@ $mask="mm7";
        &movq   ($acc0,$mul1);                  # I wish movd worked for
        &pand   ($acc0,$mask);                  # inter-register transfers
 
-       &pmuludq($mul1,$_n0);                   # *=n0
+       &pmuludq($mul1,$_n0q);                  # *=n0
 
        &pmuludq($car1,$mul1);                  # "t[0]"*np[0]*n0
        &paddq  ($car1,$acc0);
@@ -181,7 +181,7 @@ $mask="mm7";
        &psrlq  ($car1,32);
 
        &paddq  ($car1,$car0);
-       &movq   (&DWP($frame,"esp",$num,4),$car1);      # tp[num].tp[num-1]
+       &movq   (&QWP($frame,"esp",$num,4),$car1);      # tp[num].tp[num-1]
 \f
        &inc    ($i);                           # i++
 &set_label("outer");
@@ -198,7 +198,7 @@ $mask="mm7";
        &movq   ($car0,$mul1);
        &pand   ($acc0,$mask);
 
-       &pmuludq($mul1,$_n0);                   # *=n0
+       &pmuludq($mul1,$_n0q);                  # *=n0
 
        &pmuludq($car1,$mul1);
        &paddq  ($car1,$acc0);
@@ -250,7 +250,7 @@ $mask="mm7";
        &movd   ($temp,&DWP($frame+4,"esp",$num,4));    # += tp[num]
        &paddq  ($car1,$car0);
        &paddq  ($car1,$temp);
-       &movq   (&DWP($frame,"esp",$num,4),$car1);      # tp[num].tp[num-1]
+       &movq   (&QWP($frame,"esp",$num,4),$car1);      # tp[num].tp[num-1]
 
        &lea    ($i,&DWP(1,$i));                # i++
        &cmp    ($i,$num);
index be439f4..9969012 100644 (file)
@@ -81,7 +81,7 @@ sub get_mem
 }
 sub ::BP       { &get_mem("BYTE",@_);  }
 sub ::DWP      { &get_mem("DWORD",@_); }
-sub ::QWP      { &get_mem("QWORD",@_); }
+sub ::QWP      { &get_mem("",@_);      }
 sub ::BC       { (($::mwerks)?"":"BYTE ")."@_";  }
 sub ::DWC      { (($::mwerks)?"":"DWORD ")."@_"; }
 
@@ -160,6 +160,7 @@ dd  ${lprfx}OPENSSL_ia32cap_init
 segment        .bss
 common ${under}OPENSSL_ia32cap_P 4
 ___
+       grep {s/(^extern\s+${under}OPENSSL_ia32cap_P)/\;$1/} @out;
        push (@out,$tmp);               
     }
 }