e_padlock-x86.pl: make it work on VIA C3 (which doesn't support SSE2).
[openssl.git] / engines / asm / e_padlock-x86.pl
index 1ee622ff88cae54d99478991fac115c0419c9725..47f05dc9f10e6437ca1e38a19a6cc4a872214569 100644 (file)
@@ -177,7 +177,7 @@ my ($mode,$opcode) = @_;
        &lea    ($ctx,&DWP(16,$ctx));   # control word
        &xor    ("eax","eax");
                                        if ($mode eq "ctr16") {
        &lea    ($ctx,&DWP(16,$ctx));   # control word
        &xor    ("eax","eax");
                                        if ($mode eq "ctr16") {
-       &movdqa ("xmm0",&QWP(-16,$ctx));# load iv
+       &movq   ("xmm0",&QWP(-16,$ctx));# load [upper part of] counter
                                        } else {
        &xor    ("ebx","ebx");
        &test   (&DWP(0,$ctx),1<<5);    # align bit in control word
                                        } else {
        &xor    ("ebx","ebx");
        &test   (&DWP(0,$ctx),1<<5);    # align bit in control word
@@ -210,20 +210,21 @@ my ($mode,$opcode) = @_;
        &mov    ($len,$chunk);
        &mov    (&DWP(12,"ebp"),$chunk);        # chunk
                                                if ($mode eq "ctr16") {
        &mov    ($len,$chunk);
        &mov    (&DWP(12,"ebp"),$chunk);        # chunk
                                                if ($mode eq "ctr16") {
-       &pextrw ("ecx","xmm0",7);               # borrow $len
-       &mov    ($inp,1);
+       &mov    ("ecx",&DWP(-4,$ctx));
        &xor    ($out,$out);
        &xor    ($out,$out);
-       &xchg   ("ch","cl");
+       &mov    ("eax",&DWP(-8,$ctx));          # borrow $len
 &set_label("${mode}_prepare");
 &set_label("${mode}_prepare");
-       &movdqa (&QWP(0,"esp",$out),"xmm0");
-       &lea    ("eax",&DWP(0,"ecx",$inp));
-       &xchg   ("ah","al");
+       &mov    (&DWP(12,"esp",$out),"ecx");
+       &bswap  ("ecx");
+       &movq   (&QWP(0,"esp",$out),"xmm0");
+       &inc    ("ecx");
+       &mov    (&DWP(8,"esp",$out),"eax");
+       &bswap  ("ecx");
        &lea    ($out,&DWP(16,$out));
        &lea    ($out,&DWP(16,$out));
-       &pinsrw ("xmm0","eax",7);
-       &lea    ($inp,&DWP(1,$inp));
        &cmp    ($out,$chunk);
        &jb     (&label("${mode}_prepare"));
 
        &cmp    ($out,$chunk);
        &jb     (&label("${mode}_prepare"));
 
+       &mov    (&DWP(-4,$ctx),"ecx");
        &lea    ($inp,&DWP(0,"esp"));
        &lea    ($out,&DWP(0,"esp"));
        &mov    ($len,$chunk);
        &lea    ($inp,&DWP(0,"esp"));
        &lea    ($out,&DWP(0,"esp"));
        &mov    ($len,$chunk);
@@ -244,8 +245,8 @@ my ($mode,$opcode) = @_;
        &shr    ($len,4);                       # len/=AES_BLOCK_SIZE
        &data_byte(0xf3,0x0f,0xa7,$opcode);     # rep xcrypt*
                                                if ($mode !~ /ecb|ctr/) {
        &shr    ($len,4);                       # len/=AES_BLOCK_SIZE
        &data_byte(0xf3,0x0f,0xa7,$opcode);     # rep xcrypt*
                                                if ($mode !~ /ecb|ctr/) {
-       &movdqa ("xmm0",&QWP(0,"eax"));
-       &movdqa (&QWP(-16,$ctx),"xmm0");        # copy [or refresh] iv
+       &movaps ("xmm0",&QWP(0,"eax"));
+       &movaps (&QWP(-16,$ctx),"xmm0");        # copy [or refresh] iv
                                                }
        &mov    ($out,&DWP(0,"ebp"));           # restore parameters
        &mov    ($chunk,&DWP(12,"ebp"));
                                                }
        &mov    ($out,&DWP(0,"ebp"));           # restore parameters
        &mov    ($chunk,&DWP(12,"ebp"));
@@ -253,10 +254,10 @@ my ($mode,$opcode) = @_;
        &mov    ($inp,&DWP(4,"ebp"));
        &xor    ($len,$len);
 &set_label("${mode}_xor");
        &mov    ($inp,&DWP(4,"ebp"));
        &xor    ($len,$len);
 &set_label("${mode}_xor");
-       &movdqu ("xmm1",&QWP(0,$inp,$len));
+       &movups ("xmm1",&QWP(0,$inp,$len));
        &lea    ($len,&DWP(16,$len));
        &pxor   ("xmm1",&QWP(-16,"esp",$len));
        &lea    ($len,&DWP(16,$len));
        &pxor   ("xmm1",&QWP(-16,"esp",$len));
-       &movdqu (&QWP(-16,$out,$len),"xmm1");
+       &movups (&QWP(-16,$out,$len),"xmm1");
        &cmp    ($len,$chunk);
        &jb     (&label("${mode}_xor"));
                                                } else {
        &cmp    ($len,$chunk);
        &jb     (&label("${mode}_xor"));
                                                } else {
@@ -276,11 +277,7 @@ my ($mode,$opcode) = @_;
        &sub    ($len,$chunk);
        &mov    ($chunk,$PADLOCK_CHUNK);
        &jnz    (&label("${mode}_loop"));
        &sub    ($len,$chunk);
        &mov    ($chunk,$PADLOCK_CHUNK);
        &jnz    (&label("${mode}_loop"));
-                                               if ($mode eq "ctr16") {
-       &movdqa (&QWP(-16,$ctx),"xmm0");        # write out iv
-       &pxor   ("xmm0","xmm0");
-       &pxor   ("xmm1","xmm1");
-                                               } else {
+                                               if ($mode ne "ctr16") {
        &test   ($out,0x0f);                    # out_misaligned
        &jz     (&label("${mode}_done"));
                                                }
        &test   ($out,0x0f);                    # out_misaligned
        &jz     (&label("${mode}_done"));
                                                }
@@ -301,8 +298,8 @@ my ($mode,$opcode) = @_;
        &shr    ($len,4);                       # len/=AES_BLOCK_SIZE
        &data_byte(0xf3,0x0f,0xa7,$opcode);     # rep xcrypt*
                                                if ($mode ne "ecb") {
        &shr    ($len,4);                       # len/=AES_BLOCK_SIZE
        &data_byte(0xf3,0x0f,0xa7,$opcode);     # rep xcrypt*
                                                if ($mode ne "ecb") {
-       &movdqa ("xmm0",&QWP(0,"eax"));
-       &movdqa (&QWP(-16,$ctx),"xmm0");        # copy [or refresh] iv
+       &movaps ("xmm0",&QWP(0,"eax"));
+       &movaps (&QWP(-16,$ctx),"xmm0");        # copy [or refresh] iv
                                                }
 &set_label("${mode}_exit");                    }
        &mov    ("eax",1);
                                                }
 &set_label("${mode}_exit");                    }
        &mov    ("eax",1);