X-Git-Url: https://git.openssl.org/?a=blobdiff_plain;f=engines%2Fasm%2Fe_padlock-x86_64.pl;h=f8ba1e909f3d9420bcf81c27a96411c07f4fa301;hb=46bf83f07ae1ba7fda435c90af93960e77159f4b;hp=ad61974988155beed15c27a1c27851dc98311d4e;hpb=08d62e9f1a122d2a9029a8130b55525f44274d9f;p=openssl.git diff --git a/engines/asm/e_padlock-x86_64.pl b/engines/asm/e_padlock-x86_64.pl index ad61974988..f8ba1e909f 100644 --- a/engines/asm/e_padlock-x86_64.pl +++ b/engines/asm/e_padlock-x86_64.pl @@ -23,10 +23,12 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ( $xlate="${dir}../../crypto/perlasm/x86_64-xlate.pl" and -f $xlate) or die "can't locate x86_64-xlate.pl"; -open STDOUT,"| $^X $xlate $flavour $output"; +open OUT,"| \"$^X\" $xlate $flavour $output"; +*STDOUT=*OUT; $code=".text\n"; +%PADLOCK_PREFETCH=(ecb=>128, cbc=>64, ctr32=>32); # prefetch errata $PADLOCK_CHUNK=512; # Must be a power of 2 between 32 and 2^20 $ctx="%rdx"; @@ -158,6 +160,7 @@ padlock_sha1_oneshot: .byte 0xf3,0x0f,0xa6,0xc8 # rep xsha1 movaps (%rsp),%xmm0 mov 16(%rsp),%eax + add \$128+8,%rsp movups %xmm0,(%rdx) # copy-out context mov %eax,16(%rdx) ret @@ -179,6 +182,7 @@ padlock_sha1_blocks: .byte 0xf3,0x0f,0xa6,0xc8 # rep xsha1 movaps (%rsp),%xmm0 mov 16(%rsp),%eax + add \$128+8,%rsp movups %xmm0,(%rdx) # copy-out context mov %eax,16(%rdx) ret @@ -200,6 +204,7 @@ padlock_sha256_oneshot: .byte 0xf3,0x0f,0xa6,0xd0 # rep xsha256 movaps (%rsp),%xmm0 movaps 16(%rsp),%xmm1 + add \$128+8,%rsp movups %xmm0,(%rdx) # copy-out context movups %xmm1,16(%rdx) ret @@ -221,6 +226,7 @@ padlock_sha256_blocks: .byte 0xf3,0x0f,0xa6,0xd0 # rep xsha256 movaps (%rsp),%xmm0 movaps 16(%rsp),%xmm1 + add \$128+8,%rsp movups %xmm0,(%rdx) # copy-out context movups %xmm1,16(%rdx) ret @@ -247,6 +253,7 @@ padlock_sha512_blocks: movaps 16(%rsp),%xmm1 movaps 32(%rsp),%xmm2 movaps 48(%rsp),%xmm3 + add \$128+8,%rsp movups %xmm0,(%rdx) # copy-out context movups %xmm1,16(%rdx) movups %xmm2,32(%rdx) @@ -298,16 +305,36 @@ padlock_${mode}_encrypt: neg %rax and \$$PADLOCK_CHUNK-1,$chunk # chunk%=PADLOCK_CHUNK lea (%rax,%rbp),%rsp + mov \$$PADLOCK_CHUNK,%rax + cmovz %rax,$chunk # chunk=chunk?:PADLOCK_CHUNK ___ $code.=<<___ if ($mode eq "ctr32"); +.L${mode}_reenter: mov -4($ctx),%eax # pull 32-bit counter bswap %eax neg %eax and \$`$PADLOCK_CHUNK/16-1`,%eax - jz .L${mode}_loop + mov \$$PADLOCK_CHUNK,$chunk shl \$4,%eax + cmovz $chunk,%rax cmp %rax,$len cmova %rax,$chunk # don't let counter cross PADLOCK_CHUNK + cmovbe $len,$chunk +___ +$code.=<<___ if ($PADLOCK_PREFETCH{$mode}); + cmp $chunk,$len + ja .L${mode}_loop + mov $inp,%rax # check if prefetch crosses page + cmp %rsp,%rbp + cmove $out,%rax + add $len,%rax + neg %rax + and \$0xfff,%rax # distance to page boundary + cmp \$$PADLOCK_PREFETCH{$mode},%rax + mov \$-$PADLOCK_PREFETCH{$mode},%rax + cmovae $chunk,%rax # mask=distance