Update .cvsignore

[openssl.git] / crypto / whrlpool / asm / wp-mmx.pl
diff --git a/crypto/whrlpool/asm/wp-mmx.pl b/crypto/whrlpool/asm/wp-mmx.pl

index 7f49c778e80a10badfe8f4aff04f0bf4de35d643..d52439bca4b805f668134731e82411b4e19a2bb0 100644 (file)
--- a/crypto/whrlpool/asm/wp-mmx.pl
+++ b/crypto/whrlpool/asm/wp-mmx.pl
@@ -31,7 +31,7 @@
  #
  # (*)  I've sketched even non-MMX assembler, but for the record
  #      I've failed to beat the Intel compiler on P4, without using
  #
  # (*)  I've sketched even non-MMX assembler, but for the record
  #      I've failed to beat the Intel compiler on P4, without using
-*      MMX that is...
+#      MMX that is...
  # (**) ... on AMD on the other hand non-MMX assembler was observed
  #      to perform significantly better, but I figured this MMX
  #      implementation is even faster anyway, so why bother? As for
  # (**) ... on AMD on the other hand non-MMX assembler was observed
  #      to perform significantly better, but I figured this MMX
  #      implementation is even faster anyway, so why bother? As for
@@ -45,7 +45,7 @@
  #      non-MMX implementation would actually pay off, but till
  #      opposite is proved "unlikely" is assumed.
  
  #      non-MMX implementation would actually pay off, but till
  #      opposite is proved "unlikely" is assumed.
  
-push(@INC,"../CVS/HEAD/openssl/crypto/perlasm","../../perlasm");
+push(@INC,"../../perlasm");
  require "x86asm.pl";
  
  &asm_init($ARGV[0],"wp-mmx.pl");
  require "x86asm.pl";
  
  &asm_init($ARGV[0],"wp-mmx.pl");
@@ -99,22 +99,22 @@ $tbl="ebp";
         &call   (&label("pic_point"));
  &set_label("pic_point");
         &blindpop($tbl);
         &call   (&label("pic_point"));
  &set_label("pic_point");
         &blindpop($tbl);
-        &lea    ($tbl,&DWP(&label("table")."-".&label("pic_point"),$tbl));
+       &lea    ($tbl,&DWP(&label("table")."-".&label("pic_point"),$tbl));
  
         &xor    ("ecx","ecx");
         &xor    ("edx","edx");
  
         for($i=0;$i<8;$i++) { &movq(@mm[$i],&QWP($i*8,"esi")); }    # L=H
  &set_label("outerloop");
  
         &xor    ("ecx","ecx");
         &xor    ("edx","edx");
  
         for($i=0;$i<8;$i++) { &movq(@mm[$i],&QWP($i*8,"esi")); }    # L=H
  &set_label("outerloop");
-       for($i=0;$i<8;$i++) { &movq(&QWP($i*8,"esp"),@mm[$i]); }    # K=H
+       for($i=0;$i<8;$i++) { &movq(&QWP($i*8,"esp"),@mm[$i]); }    # K=L
         for($i=0;$i<8;$i++) { &pxor(@mm[$i],&QWP($i*8,"edi")); }    # L^=inp
         for($i=0;$i<8;$i++) { &pxor(@mm[$i],&QWP($i*8,"edi")); }    # L^=inp
-       for($i=0;$i<8;$i++) { &movq(&QWP(64+$i*8,"esp"),@mm[$i]); } #S=L
+       for($i=0;$i<8;$i++) { &movq(&QWP(64+$i*8,"esp"),@mm[$i]); } # S=L
  
         &xor    ("esi","esi");
         &mov    (&DWP(12,"ebx"),"esi");         # zero round counter
  
  &set_label("round",16);
  
         &xor    ("esi","esi");
         &mov    (&DWP(12,"ebx"),"esi");         # zero round counter
  
  &set_label("round",16);
-       &movq   (@mm[0],&DWP(2048*$SCALE,$tbl,"esi",8));        # rc[r]
+       &movq   (@mm[0],&QWP(2048*$SCALE,$tbl,"esi",8));        # rc[r]
         &mov    ("eax",&DWP(0,"esp"));
         &mov    ("ebx",&DWP(4,"esp"));
  for($i=0;$i<8;$i++) {
         &mov    ("eax",&DWP(0,"esp"));
         &mov    ("ebx",&DWP(4,"esp"));
  for($i=0;$i<8;$i++) {
@@ -488,5 +488,5 @@ for($i=0;$i<8;$i++) {
         &L(0xfb,0xee,0x7c,0x66,0xdd,0x17,0x47,0x9e);
         &L(0xca,0x2d,0xbf,0x07,0xad,0x5a,0x83,0x33);
  
         &L(0xfb,0xee,0x7c,0x66,0xdd,0x17,0x47,0x9e);
         &L(0xca,0x2d,0xbf,0x07,0xad,0x5a,0x83,0x33);
  
-&function_end_B("whrilpool_block_mmx");
+&function_end_B("whirlpool_block_mmx");
  &asm_finish(); 
  &asm_finish();