hkdf: when HMAC key is all zeros, still set a valid key length
[openssl.git] / engines / asm / e_padlock-x86.pl
index e211706ae1b8511e3db929c8ff59eaf6f1513757..3e9a22fca4bf09e8074e4374fe4e4e5d0bfe2279 100644 (file)
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2011-2023 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the Apache License 2.0 (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
 
 # ====================================================================
 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -35,8 +42,11 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
 push(@INC,"${dir}","${dir}../../crypto/perlasm");
 require "x86asm.pl";
 
-&asm_init($ARGV[0],$0);
+$output=pop and open STDOUT,">$output";
+
+&asm_init($ARGV[0]);
 
+%PADLOCK_PREFETCH=(ecb=>128, cbc=>64); # prefetch errata
 $PADLOCK_CHUNK=512;    # Must be a power of 2 larger than 16
 
 $ctx="edx";
@@ -62,11 +72,20 @@ $chunk="ebx";
        &cpuid  ();
        &xor    ("eax","eax");
        &cmp    ("ebx","0x".unpack("H*",'tneC'));
-       &jne    (&label("noluck"));
+       &jne    (&label("zhaoxin"));
        &cmp    ("edx","0x".unpack("H*",'Hrua'));
        &jne    (&label("noluck"));
        &cmp    ("ecx","0x".unpack("H*",'slua'));
        &jne    (&label("noluck"));
+       &jmp    (&label("zhaoxinEnd"));
+&set_label("zhaoxin");
+       &cmp    ("ebx","0x".unpack("H*",'hS  '));
+       &jne    (&label("noluck"));
+       &cmp    ("edx","0x".unpack("H*",'hgna'));
+       &jne    (&label("noluck"));
+       &cmp    ("ecx","0x".unpack("H*",'  ia'));
+       &jne    (&label("noluck"));
+&set_label("zhaoxinEnd");
        &mov    ("eax",0xC0000000);
        &cpuid  ();
        &mov    ("edx","eax");
@@ -96,6 +115,8 @@ $chunk="ebx";
 &function_begin_B("padlock_key_bswap");
        &mov    ("edx",&wparam(0));
        &mov    ("ecx",&DWP(240,"edx"));
+       &inc    ("ecx");
+       &shl    ("ecx",2);
 &set_label("bswap_loop");
        &mov    ("eax",&DWP(0,"edx"));
        &bswap  ("eax");
@@ -207,7 +228,27 @@ my ($mode,$opcode) = @_;
        &neg    ("eax");
        &and    ($chunk,$PADLOCK_CHUNK-1);      # chunk=len%PADLOCK_CHUNK
        &lea    ("esp",&DWP(0,"eax","ebp"));    # alloca
+       &mov    ("eax",$PADLOCK_CHUNK);
+       &cmovz  ($chunk,"eax");                 # chunk=chunk?:PADLOCK_CHUNK
+       &mov    ("eax","ebp");
+       &and    ("ebp",-16);
        &and    ("esp",-16);
+       &mov    (&DWP(16,"ebp"),"eax");
+    if ($PADLOCK_PREFETCH{$mode}) {
+       &cmp    ($len,$chunk);
+       &ja     (&label("${mode}_loop"));
+       &mov    ("eax",$inp);           # check if prefetch crosses page
+       &cmp    ("ebp","esp");
+       &cmove  ("eax",$out);
+       &add    ("eax",$len);
+       &neg    ("eax");
+       &and    ("eax",0xfff);          # distance to page boundary
+       &cmp    ("eax",$PADLOCK_PREFETCH{$mode});
+       &mov    ("eax",-$PADLOCK_PREFETCH{$mode});
+       &cmovae ("eax",$chunk);         # mask=distance<prefetch?-prefetch:-1
+       &and    ($chunk,"eax");
+       &jz     (&label("${mode}_unaligned_tail"));
+    }
        &jmp    (&label("${mode}_loop"));
 
 &set_label("${mode}_loop",16);
@@ -271,8 +312,8 @@ my ($mode,$opcode) = @_;
        &test   ($out,0x0f);
        &jz     (&label("${mode}_out_aligned"));
        &mov    ($len,$chunk);
-       &shr    ($len,2);
        &lea    ($inp,&DWP(0,"esp"));
+       &shr    ($len,2);
        &data_byte(0xf3,0xa5);                  # rep movsl
        &sub    ($out,$chunk);
 &set_label("${mode}_out_aligned");
@@ -283,23 +324,61 @@ my ($mode,$opcode) = @_;
        &add    ($inp,$chunk);
        &sub    ($len,$chunk);
        &mov    ($chunk,$PADLOCK_CHUNK);
+    if (!$PADLOCK_PREFETCH{$mode}) {
        &jnz    (&label("${mode}_loop"));
-                                               if ($mode ne "ctr32") {
-       &test   ($out,0x0f);                    # out_misaligned
-       &jz     (&label("${mode}_done"));
-                                               }
-       &mov    ($len,"ebp");
-       &mov    ($out,"esp");
-       &sub    ($len,"esp");
+    } else {
+       &jz     (&label("${mode}_break"));
+       &cmp    ($len,$chunk);
+       &jae    (&label("${mode}_loop"));
+
+&set_label("${mode}_unaligned_tail");
        &xor    ("eax","eax");
+       &cmp    ("esp","ebp");
+       &cmove  ("eax",$len);
+       &sub    ("esp","eax");                  # alloca
+       &mov    ("eax", $out);                  # save parameters
+       &mov    ($chunk,$len);
        &shr    ($len,2);
-       &data_byte(0xf3,0xab);                  # rep stosl
+       &lea    ($out,&DWP(0,"esp"));
+       &data_byte(0xf3,0xa5);                  # rep movsl
+       &mov    ($inp,"esp");
+       &mov    ($out,"eax");                   # restore parameters
+       &mov    ($len,$chunk);
+       &jmp    (&label("${mode}_loop"));
+
+&set_label("${mode}_break",16);
+    }
+                                               if ($mode ne "ctr32") {
+       &cmp    ("esp","ebp");
+       &je     (&label("${mode}_done"));
+                                               }
+       &pxor   ("xmm0","xmm0");
+       &lea    ("eax",&DWP(0,"esp"));
+&set_label("${mode}_bzero");
+       &movaps (&QWP(0,"eax"),"xmm0");
+       &lea    ("eax",&DWP(16,"eax"));
+       &cmp    ("ebp","eax");
+       &ja     (&label("${mode}_bzero"));
+
 &set_label("${mode}_done");
+       &mov    ("ebp",&DWP(16,"ebp"));
        &lea    ("esp",&DWP(24,"ebp"));
                                                if ($mode ne "ctr32") {
        &jmp    (&label("${mode}_exit"));
 
 &set_label("${mode}_aligned",16);
+    if ($PADLOCK_PREFETCH{$mode}) {
+       &lea    ("ebp",&DWP(0,$inp,$len));
+       &neg    ("ebp");
+       &and    ("ebp",0xfff);                  # distance to page boundary
+       &xor    ("eax","eax");
+       &cmp    ("ebp",$PADLOCK_PREFETCH{$mode});
+       &mov    ("ebp",$PADLOCK_PREFETCH{$mode}-1);
+       &cmovae ("ebp","eax");
+       &and    ("ebp",$len);                   # remainder
+       &sub    ($len,"ebp");
+       &jz     (&label("${mode}_aligned_tail"));
+    }
        &lea    ("eax",&DWP(-16,$ctx));         # ivp
        &lea    ("ebx",&DWP(16,$ctx));          # key
        &shr    ($len,4);                       # len/=AES_BLOCK_SIZE
@@ -308,6 +387,29 @@ my ($mode,$opcode) = @_;
        &movaps ("xmm0",&QWP(0,"eax"));
        &movaps (&QWP(-16,$ctx),"xmm0");        # copy [or refresh] iv
                                                }
+    if ($PADLOCK_PREFETCH{$mode}) {
+       &test   ("ebp","ebp");
+       &jz     (&label("${mode}_exit"));
+
+&set_label("${mode}_aligned_tail");
+       &mov    ($len,"ebp");
+       &lea    ("ebp",&DWP(-24,"esp"));
+       &mov    ("esp","ebp");
+       &mov    ("eax","ebp");
+       &sub    ("esp",$len);
+       &and    ("ebp",-16);
+       &and    ("esp",-16);
+       &mov    (&DWP(16,"ebp"),"eax");
+       &mov    ("eax", $out);                  # save parameters
+       &mov    ($chunk,$len);
+       &shr    ($len,2);
+       &lea    ($out,&DWP(0,"esp"));
+       &data_byte(0xf3,0xa5);                  # rep movsl
+       &mov    ($inp,"esp");
+       &mov    ($out,"eax");                   # restore parameters
+       &mov    ($len,$chunk);
+       &jmp    (&label("${mode}_loop"));
+    }
 &set_label("${mode}_exit");                    }
        &mov    ("eax",1);
        &lea    ("esp",&DWP(4,"esp"));          # popf
@@ -522,3 +624,5 @@ my ($mode,$opcode) = @_;
 &data_word(0);
 
 &asm_finish();
+
+close STDOUT;