#! /usr/bin/env perl
# Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved.
#
-# Licensed under the OpenSSL license (the "License"). You may not use
+# Licensed under the Apache License 2.0 (the "License"). You may not use
# this file except in compliance with the License. You can obtain a copy
# in the file LICENSE in the source distribution or at
# https://www.openssl.org/source/license.html
# in real-life application are somewhat lower, e.g. for 2KB
# fragments they range from 30% to 100% (on Haswell);
-$flavour = shift;
-$output = shift;
-if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
+# $output is the last argument if it looks like a file (it has an extension)
+# $flavour is the first argument if it doesn't look like a file
+$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
+$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
die "can't locate x86_64-xlate.pl";
+push(@INC,"${dir}","${dir}../../perlasm");
+require "x86_64-support.pl";
+
+$ptr_size=&pointer_size($flavour);
+
$avx=0;
if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
$avx = ($1>=10) + ($1>=11);
}
-if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([3-9]\.[0-9]+)/) {
+if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([0-9]+\.[0-9]+)/) {
$avx = ($2>=3.0) + ($2>3.0);
}
-open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
+open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""
+ or die "can't call $xlate: $!";
*STDOUT=*OUT;
# void sha1_multi_block (
$num="%edx";
@ptr=map("%r$_",(8..11));
$Tbl="%rbp";
+$inp_elm_size=2*$ptr_size;
@V=($A,$B,$C,$D,$E)=map("%xmm$_",(0..4));
($t0,$t1,$t2,$t3,$tx)=map("%xmm$_",(5..9));
xor $num,$num
___
for($i=0;$i<4;$i++) {
+ $ptr_reg=&pointer_register($flavour,@ptr[$i]);
$code.=<<___;
- mov `16*$i+0`($inp),@ptr[$i] # input pointer
- mov `16*$i+8`($inp),%ecx # number of blocks
+ # input pointer
+ mov `$inp_elm_size*$i+0`($inp),$ptr_reg
+ # number of blocks
+ mov `$inp_elm_size*$i+$ptr_size`($inp),%ecx
cmp $num,%ecx
cmovg %ecx,$num # find maximum
test %ecx,%ecx
$code.=<<___;
movdqa (%rbx),@Xi[0] # pull counters
mov \$1,%ecx
- cmp 4*0(%rbx),%ecx # examinte counters
+ cmp 4*0(%rbx),%ecx # examine counters
pxor $t2,$t2
cmovge $Tbl,@ptr[0] # cancel input
cmp 4*1(%rbx),%ecx
mov `$REG_SZ*17+8`(%rsp),$num
lea $REG_SZ($ctx),$ctx
- lea `16*$REG_SZ/4`($inp),$inp
+ lea `$inp_elm_size*$REG_SZ/4`($inp),$inp
dec $num
jnz .Loop_grande
xor $num,$num
___
for($i=0;$i<2;$i++) {
+ $ptr_reg=&pointer_register($flavour,@ptr[$i]);
$code.=<<___;
- mov `16*$i+0`($inp),@ptr[$i] # input pointer
- mov `16*$i+8`($inp),%ecx # number of blocks
+ # input pointer
+ mov `$inp_elm_size*$i+0`($inp),$ptr_reg
+ # number of blocks
+ mov `$inp_elm_size*$i+$ptr_size`($inp),%ecx
cmp $num,%ecx
cmovg %ecx,$num # find maximum
test %ecx,%ecx
movq $E0,0x80-0x40($ctx) # e1.e0
lea `$REG_SZ/2`($ctx),$ctx
- lea `16*2`($inp),$inp
+ lea `$inp_elm_size*2`($inp),$inp
dec $num
jnz .Loop_grande_shaext
xor $num,$num
___
for($i=0;$i<4;$i++) {
+ $ptr_reg=&pointer_register($flavour,@ptr[$i]);
$code.=<<___;
- mov `16*$i+0`($inp),@ptr[$i] # input pointer
- mov `16*$i+8`($inp),%ecx # number of blocks
+ # input pointer
+ mov `$inp_elm_size*$i+0`($inp),$ptr_reg
+ # number of blocks
+ mov `$inp_elm_size*$i+$ptr_size`($inp),%ecx
cmp $num,%ecx
cmovg %ecx,$num # find maximum
test %ecx,%ecx
mov `$REG_SZ*17+8`(%rsp),$num
lea $REG_SZ($ctx),$ctx
- lea `16*$REG_SZ/4`($inp),$inp
+ lea `$inp_elm_size*$REG_SZ/4`($inp),$inp
dec $num
jnz .Loop_grande_avx
lea `$REG_SZ*16`(%rsp),%rbx
___
for($i=0;$i<8;$i++) {
+ $ptr_reg=&pointer_register($flavour,@ptr[$i]);
$code.=<<___;
- mov `16*$i+0`($inp),@ptr[$i] # input pointer
- mov `16*$i+8`($inp),%ecx # number of blocks
+ # input pointer
+ mov `$inp_elm_size*$i+0`($inp),$ptr_reg
+ # number of blocks
+ mov `$inp_elm_size*$i+$ptr_size`($inp),%ecx
cmp $num,%ecx
cmovg %ecx,$num # find maximum
test %ecx,%ecx
#mov `$REG_SZ*17+8`(%rsp),$num
#lea $REG_SZ($ctx),$ctx
- #lea `16*$REG_SZ/4`($inp),$inp
+ #lea `$inp_elm_size*$REG_SZ/4`($inp),$inp
#dec $num
#jnz .Loop_grande_avx2
mov -48(%rax),%r15
mov %rbx,144($context) # restore context->Rbx
mov %rbp,160($context) # restore context->Rbp
- mov %r12,216($context) # restore cotnext->R12
- mov %r13,224($context) # restore cotnext->R13
- mov %r14,232($context) # restore cotnext->R14
- mov %r15,240($context) # restore cotnext->R15
+ mov %r12,216($context) # restore context->R12
+ mov %r13,224($context) # restore context->R13
+ mov %r14,232($context) # restore context->R14
+ mov %r15,240($context) # restore context->R15
lea -56-10*16(%rax),%rsi
lea 512($context),%rdi # &context.Xmm6
print $_,"\n";
}
-close STDOUT;
+close STDOUT or die "error closing STDOUT: $!";