projects
/
openssl.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
aes/asm/aesni-x86[_64].pl update.
[openssl.git]
/
crypto
/
aes
/
asm
/
aes-x86_64.pl
diff --git
a/crypto/aes/asm/aes-x86_64.pl
b/crypto/aes/asm/aes-x86_64.pl
index a545e892aef00c6584c5bf31e1389ee3dc8bbeb1..e95593e35e0bd985abf9c5994b1811037e291b87 100755
(executable)
--- a/
crypto/aes/asm/aes-x86_64.pl
+++ b/
crypto/aes/asm/aes-x86_64.pl
@@
-19,9
+19,10
@@
# Performance in number of cycles per processed byte for 128-bit key:
#
# ECB encrypt ECB decrypt CBC large chunk
# Performance in number of cycles per processed byte for 128-bit key:
#
# ECB encrypt ECB decrypt CBC large chunk
-# AMD64 33 41 13.0
-# EM64T 38 59 18.6(*)
-# Core 2 30 43 14.5(*)
+# AMD64 33 43 13.0
+# EM64T 38 56 18.6(*)
+# Core 2 30 42 14.5(*)
+# Atom 65 86 32.1(*)
#
# (*) with hyper-threading off
#
# (*) with hyper-threading off
@@
-36,7
+37,8
@@
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
die "can't locate x86_64-xlate.pl";
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
die "can't locate x86_64-xlate.pl";
-open STDOUT,"| $^X $xlate $flavour $output";
+open OUT,"| \"$^X\" $xlate $flavour $output";
+*STDOUT=*OUT;
$verticalspin=1; # unlike 32-bit version $verticalspin performs
# ~15% better on both AMD and Intel cores
$verticalspin=1; # unlike 32-bit version $verticalspin performs
# ~15% better on both AMD and Intel cores
@@
-365,68
+367,66
@@
$code.=<<___;
movzb `&lo("$s0")`,$t0
movzb `&lo("$s1")`,$t1
movzb `&lo("$s2")`,$t2
movzb `&lo("$s0")`,$t0
movzb `&lo("$s1")`,$t1
movzb `&lo("$s2")`,$t2
- movzb ($sbox,$t0,1),$t0
- movzb ($sbox,$t1,1),$t1
- movzb ($sbox,$t2,1),$t2
-
movzb `&lo("$s3")`,$t3
movzb `&hi("$s1")`,$acc0
movzb `&hi("$s2")`,$acc1
movzb `&lo("$s3")`,$t3
movzb `&hi("$s1")`,$acc0
movzb `&hi("$s2")`,$acc1
+ shr \$16,$s2
+ movzb `&hi("$s3")`,$acc2
+ movzb ($sbox,$t0,1),$t0
+ movzb ($sbox,$t1,1),$t1
+ movzb ($sbox,$t2,1),$t2
movzb ($sbox,$t3,1),$t3
movzb ($sbox,$t3,1),$t3
- movzb ($sbox,$acc0,1),$t4 #$t0
- movzb ($sbox,$acc1,1),$t5 #$t1
- movzb
`&hi("$s3")`,$acc2
+ movzb
($sbox,$acc0,1),$t4 #$t0
movzb `&hi("$s0")`,$acc0
movzb `&hi("$s0")`,$acc0
- shr \$16,$s2
+ movzb ($sbox,$acc1,1),$t5 #$t1
+ movzb `&lo("$s2")`,$acc1
movzb ($sbox,$acc2,1),$acc2 #$t2
movzb ($sbox,$acc0,1),$acc0 #$t3
movzb ($sbox,$acc2,1),$acc2 #$t2
movzb ($sbox,$acc0,1),$acc0 #$t3
- shr \$16,$s3
- movzb `&lo("$s2")`,$acc1
shl \$8,$t4
shl \$8,$t4
+ shr \$16,$s3
shl \$8,$t5
shl \$8,$t5
- movzb ($sbox,$acc1,1),$acc1 #$t0
xor $t4,$t0
xor $t4,$t0
- xor $t5,$t1
-
- movzb `&lo("$s3")`,$t4
shr \$16,$s0
shr \$16,$s0
+ movzb `&lo("$s3")`,$t4
shr \$16,$s1
shr \$16,$s1
- movzb `&lo("$s0")`,$t5
+ xor $t5,$t1
shl \$8,$acc2
shl \$8,$acc2
- shl \$8,$acc0
- movzb ($sbox,$t4,1),$t4 #$t1
- movzb ($sbox,$t5,1),$t5 #$t2
+ movzb `&lo("$s0")`,$t5
+ movzb ($sbox,$acc1,1),$acc1 #$t0
xor $acc2,$t2
xor $acc2,$t2
- xor $acc0,$t3
+ shl \$8,$acc0
movzb `&lo("$s1")`,$acc2
movzb `&lo("$s1")`,$acc2
- movzb `&hi("$s3")`,$acc0
shl \$16,$acc1
shl \$16,$acc1
- movzb ($sbox,$acc2,1),$acc2 #$t3
- movzb ($sbox,$acc0,1),$acc0 #$t0
+ xor $acc0,$t3
+ movzb ($sbox,$t4,1),$t4 #$t1
+ movzb `&hi("$s3")`,$acc0
+ movzb ($sbox,$t5,1),$t5 #$t2
xor $acc1,$t0
xor $acc1,$t0
- movzb `&hi("$s0")`,$acc1
shr \$8,$s2
shr \$8,$s2
+ movzb `&hi("$s0")`,$acc1
+ shl \$16,$t4
shr \$8,$s1
shr \$8,$s1
+ shl \$16,$t5
+ xor $t4,$t1
+ movzb ($sbox,$acc2,1),$acc2 #$t3
+ movzb ($sbox,$acc0,1),$acc0 #$t0
movzb ($sbox,$acc1,1),$acc1 #$t1
movzb ($sbox,$s2,1),$s3 #$t3
movzb ($sbox,$s1,1),$s2 #$t2
movzb ($sbox,$acc1,1),$acc1 #$t1
movzb ($sbox,$s2,1),$s3 #$t3
movzb ($sbox,$s1,1),$s2 #$t2
- shl \$16,$t4
- shl \$16,$t5
+
shl \$16,$acc2
shl \$16,$acc2
- xor $t4,$t1
xor $t5,$t2
xor $t5,$t2
- xor $acc2,$t3
-
shl \$24,$acc0
shl \$24,$acc0
+ xor $acc2,$t3
shl \$24,$acc1
shl \$24,$acc1
- shl \$24,$s3
xor $acc0,$t0
xor $acc0,$t0
- shl \$24,$s
2
+ shl \$24,$s
3
xor $acc1,$t1
xor $acc1,$t1
+ shl \$24,$s2
mov $t0,$s0
mov $t1,$s1
xor $t2,$s2
mov $t0,$s0
mov $t1,$s1
xor $t2,$s2
@@
-465,12
+465,12
@@
sub enctransform()
{ my ($t3,$r20,$r21)=($acc2,"%r8d","%r9d");
$code.=<<___;
{ my ($t3,$r20,$r21)=($acc2,"%r8d","%r9d");
$code.=<<___;
- mov
$s0,$acc
0
- mov
$s1,$acc
1
- and
\$0x80808080,$acc
0
- and
\$0x80808080,$acc
1
- mov $
acc0,$t
0
- mov $
acc1,$t
1
+ mov
\$0x80808080,$t
0
+ mov
\$0x80808080,$t
1
+ and
$s0,$t
0
+ and
$s1,$t
1
+ mov $
t0,$acc
0
+ mov $
t1,$acc
1
shr \$7,$t0
lea ($s0,$s0),$r20
shr \$7,$t1
shr \$7,$t0
lea ($s0,$s0),$r20
shr \$7,$t1
@@
-488,25
+488,25
@@
$code.=<<___;
xor $r20,$s0
xor $r21,$s1
xor $r20,$s0
xor $r21,$s1
- mov $s2,$acc0
- mov $s3,$acc1
+ mov \$0x80808080,$t2
rol \$24,$s0
rol \$24,$s0
+ mov \$0x80808080,$t3
rol \$24,$s1
rol \$24,$s1
- and
\$0x80808080,$acc0
- and
\$0x80808080,$acc1
+ and
$s2,$t2
+ and
$s3,$t3
xor $r20,$s0
xor $r21,$s1
xor $r20,$s0
xor $r21,$s1
- mov $acc0,$t2
- mov $acc1,$t3
+ mov $t2,$acc0
ror \$16,$t0
ror \$16,$t0
+ mov $t3,$acc1
ror \$16,$t1
ror \$16,$t1
- shr \$7,$t2
lea ($s2,$s2),$r20
lea ($s2,$s2),$r20
+ shr \$7,$t2
xor $t0,$s0
xor $t0,$s0
- xor $t1,$s1
shr \$7,$t3
shr \$7,$t3
-
lea ($s3,$s3),$r2
1
+
xor $t1,$s
1
ror \$8,$t0
ror \$8,$t0
+ lea ($s3,$s3),$r21
ror \$8,$t1
sub $t2,$acc0
sub $t3,$acc1
ror \$8,$t1
sub $t2,$acc0
sub $t3,$acc1
@@
-522,23
+522,23
@@
$code.=<<___;
xor $acc0,$r20
xor $acc1,$r21
xor $acc0,$r20
xor $acc1,$r21
+ ror \$16,$t2
xor $r20,$s2
xor $r20,$s2
+ ror \$16,$t3
xor $r21,$s3
rol \$24,$s2
xor $r21,$s3
rol \$24,$s2
+ mov 0($sbox),$acc0 # prefetch Te4
rol \$24,$s3
xor $r20,$s2
rol \$24,$s3
xor $r20,$s2
- xor $r21,$s3
- mov 0($sbox),$acc0 # prefetch Te4
- ror \$16,$t2
- ror \$16,$t3
mov 64($sbox),$acc1
mov 64($sbox),$acc1
- xor $t2,$s2
- xor $t3,$s3
+ xor $r21,$s3
mov 128($sbox),$r20
mov 128($sbox),$r20
+ xor $t2,$s2
ror \$8,$t2
ror \$8,$t2
+ xor $t3,$s3
ror \$8,$t3
ror \$8,$t3
- mov 192($sbox),$r21
xor $t2,$s2
xor $t2,$s2
+ mov 192($sbox),$r21
xor $t3,$s3
___
}
xor $t3,$s3
___
}
@@
-588,6
+588,9
@@
$code.=<<___;
.globl AES_encrypt
.type AES_encrypt,\@function,3
.align 16
.globl AES_encrypt
.type AES_encrypt,\@function,3
.align 16
+.globl asm_AES_encrypt
+.hidden asm_AES_encrypt
+asm_AES_encrypt:
AES_encrypt:
push %rbx
push %rbp
AES_encrypt:
push %rbx
push %rbp
@@
-932,70
+935,69
@@
$code.=<<___;
movzb `&lo("$s0")`,$t0
movzb `&lo("$s1")`,$t1
movzb `&lo("$s2")`,$t2
movzb `&lo("$s0")`,$t0
movzb `&lo("$s1")`,$t1
movzb `&lo("$s2")`,$t2
- movzb ($sbox,$t0,1),$t0
- movzb ($sbox,$t1,1),$t1
- movzb ($sbox,$t2,1),$t2
-
movzb `&lo("$s3")`,$t3
movzb `&hi("$s3")`,$acc0
movzb `&hi("$s0")`,$acc1
movzb `&lo("$s3")`,$t3
movzb `&hi("$s3")`,$acc0
movzb `&hi("$s0")`,$acc1
+ shr \$16,$s3
+ movzb `&hi("$s1")`,$acc2
+ movzb ($sbox,$t0,1),$t0
+ movzb ($sbox,$t1,1),$t1
+ movzb ($sbox,$t2,1),$t2
movzb ($sbox,$t3,1),$t3
movzb ($sbox,$t3,1),$t3
- movzb ($sbox,$acc0,1),$t4 #$t0
- movzb ($sbox,$acc1,1),$t5 #$t1
- movzb
`&hi("$s1")`,$acc2
+ movzb
($sbox,$acc0,1),$t4 #$t0
movzb `&hi("$s2")`,$acc0
movzb `&hi("$s2")`,$acc0
- shr \$16,$s2
+ movzb ($sbox,$acc1,1),$t5 #$t1
movzb ($sbox,$acc2,1),$acc2 #$t2
movzb ($sbox,$acc0,1),$acc0 #$t3
movzb ($sbox,$acc2,1),$acc2 #$t2
movzb ($sbox,$acc0,1),$acc0 #$t3
- shr \$16,$s3
- movzb `&lo("$s2")`,$acc1
- shl \$8,$t4
+ shr \$16,$s2
shl \$8,$t5
shl \$8,$t5
- movzb ($sbox,$acc1,1),$acc1 #$t0
- xor $t4,$t0
- xor $t5,$t1
-
- movzb `&lo("$s3")`,$t4
+ shl \$8,$t4
+ movzb `&lo("$s2")`,$acc1
shr \$16,$s0
shr \$16,$s0
+ xor $t4,$t0
shr \$16,$s1
shr \$16,$s1
- movzb `&lo("$s0")`,$t5
+ movzb `&lo("$s3")`,$t4
+
shl \$8,$acc2
shl \$8,$acc2
+ xor $t5,$t1
shl \$8,$acc0
shl \$8,$acc0
- movzb
($sbox,$t4,1),$t4 #$t1
- movzb ($sbox,$
t5,1),$t5 #$t2
+ movzb
`&lo("$s0")`,$t5
+ movzb ($sbox,$
acc1,1),$acc1 #$t0
xor $acc2,$t2
xor $acc2,$t2
- xor $acc0,$t3
-
movzb `&lo("$s1")`,$acc2
movzb `&lo("$s1")`,$acc2
- movzb `&hi("$s1")`,$acc0
+
shl \$16,$acc1
shl \$16,$acc1
+ xor $acc0,$t3
+ movzb ($sbox,$t4,1),$t4 #$t1
+ movzb `&hi("$s1")`,$acc0
movzb ($sbox,$acc2,1),$acc2 #$t3
movzb ($sbox,$acc2,1),$acc2 #$t3
- movzb ($sbox,$acc0,1),$acc0 #$t0
xor $acc1,$t0
xor $acc1,$t0
-
+ movzb ($sbox,$t5,1),$t5 #$t2
movzb `&hi("$s2")`,$acc1
movzb `&hi("$s2")`,$acc1
+
+ shl \$16,$acc2
shl \$16,$t4
shl \$16,$t5
shl \$16,$t4
shl \$16,$t5
- movzb ($sbox,$acc1,1),$s1 #$t1
+ xor $acc2,$t3
+ movzb `&hi("$s3")`,$acc2
xor $t4,$t1
xor $t4,$t1
+ shr \$8,$s0
xor $t5,$t2
xor $t5,$t2
- movzb `&hi("$s3")`,$acc1
- shr \$8,$s0
- shl \$16,$acc2
- movzb ($sbox,$acc1,1),$s2 #$t2
+ movzb ($sbox,$acc0,1),$acc0 #$t0
+ movzb ($sbox,$acc1,1),$s1 #$t1
+ movzb ($sbox,$acc2,1),$s2 #$t2
movzb ($sbox,$s0,1),$s3 #$t3
movzb ($sbox,$s0,1),$s3 #$t3
- xor $acc2,$t3
+ mov $t0,$s0
shl \$24,$acc0
shl \$24,$s1
shl \$24,$s2
shl \$24,$acc0
shl \$24,$s1
shl \$24,$s2
- xor $acc0,$
t
0
+ xor $acc0,$
s
0
shl \$24,$s3
xor $t1,$s1
shl \$24,$s3
xor $t1,$s1
- mov $t0,$s0
xor $t2,$s2
xor $t3,$s3
___
xor $t2,$s2
xor $t3,$s3
___
@@
-1010,12
+1012,12
@@
sub dectransform()
my $prefetch = shift;
$code.=<<___;
my $prefetch = shift;
$code.=<<___;
- mov $
tp10,$acc
0
- mov $
tp18,$acc
8
- and $
mask80,$acc
0
- and $
mask80,$acc
8
- mov $
acc0,$tp4
0
- mov $
acc8,$tp4
8
+ mov $
mask80,$tp4
0
+ mov $
mask80,$tp4
8
+ and $
tp10,$tp4
0
+ and $
tp18,$tp4
8
+ mov $
tp40,$acc
0
+ mov $
tp48,$acc
8
shr \$7,$tp40
lea ($tp10,$tp10),$tp20
shr \$7,$tp48
shr \$7,$tp40
lea ($tp10,$tp10),$tp20
shr \$7,$tp48
@@
-1026,15
+1028,15
@@
$code.=<<___;
and $maskfe,$tp28
and $mask1b,$acc0
and $mask1b,$acc8
and $maskfe,$tp28
and $mask1b,$acc0
and $mask1b,$acc8
- xor $
tp20,$acc
0
- xor $
tp28,$acc
8
- mov $
acc0,$tp2
0
- mov $
acc8,$tp2
8
-
- and $
mask80,$acc
0
- and $
mask80,$acc
8
- mov $
acc0,$tp8
0
- mov $
acc8,$tp8
8
+ xor $
acc0,$tp2
0
+ xor $
acc8,$tp2
8
+ mov $
mask80,$tp8
0
+ mov $
mask80,$tp8
8
+
+ and $
tp20,$tp8
0
+ and $
tp28,$tp8
8
+ mov $
tp80,$acc
0
+ mov $
tp88,$acc
8
shr \$7,$tp80
lea ($tp20,$tp20),$tp40
shr \$7,$tp88
shr \$7,$tp80
lea ($tp20,$tp20),$tp40
shr \$7,$tp88
@@
-1045,15
+1047,15
@@
$code.=<<___;
and $maskfe,$tp48
and $mask1b,$acc0
and $mask1b,$acc8
and $maskfe,$tp48
and $mask1b,$acc0
and $mask1b,$acc8
- xor $
tp40,$acc
0
- xor $
tp48,$acc
8
- mov $
acc0,$tp4
0
- mov $
acc8,$tp4
8
-
- and $
mask80,$acc
0
- and $
mask80,$acc
8
- mov $
acc0,$tp8
0
- mov $
acc8,$tp8
8
+ xor $
acc0,$tp4
0
+ xor $
acc8,$tp4
8
+ mov $
mask80,$tp8
0
+ mov $
mask80,$tp8
8
+
+ and $
tp40,$tp8
0
+ and $
tp48,$tp8
8
+ mov $
tp80,$acc
0
+ mov $
tp88,$acc
8
shr \$7,$tp80
xor $tp10,$tp20 # tp2^=tp1
shr \$7,$tp88
shr \$7,$tp80
xor $tp10,$tp20 # tp2^=tp1
shr \$7,$tp88
@@
-1078,51
+1080,51
@@
$code.=<<___;
mov $tp10,$acc0
mov $tp18,$acc8
xor $tp80,$tp40 # tp4^tp1^=tp8
mov $tp10,$acc0
mov $tp18,$acc8
xor $tp80,$tp40 # tp4^tp1^=tp8
- xor $tp88,$tp48 # tp4^tp1^=tp8
shr \$32,$acc0
shr \$32,$acc0
+ xor $tp88,$tp48 # tp4^tp1^=tp8
shr \$32,$acc8
xor $tp20,$tp80 # tp8^=tp8^tp2^tp1=tp2^tp1
shr \$32,$acc8
xor $tp20,$tp80 # tp8^=tp8^tp2^tp1=tp2^tp1
- xor $tp28,$tp88 # tp8^=tp8^tp2^tp1=tp2^tp1
rol \$8,`&LO("$tp10")` # ROTATE(tp1^tp8,8)
rol \$8,`&LO("$tp10")` # ROTATE(tp1^tp8,8)
+ xor $tp28,$tp88 # tp8^=tp8^tp2^tp1=tp2^tp1
rol \$8,`&LO("$tp18")` # ROTATE(tp1^tp8,8)
xor $tp40,$tp80 # tp2^tp1^=tp8^tp4^tp1=tp8^tp4^tp2
rol \$8,`&LO("$tp18")` # ROTATE(tp1^tp8,8)
xor $tp40,$tp80 # tp2^tp1^=tp8^tp4^tp1=tp8^tp4^tp2
+ rol \$8,`&LO("$acc0")` # ROTATE(tp1^tp8,8)
xor $tp48,$tp88 # tp2^tp1^=tp8^tp4^tp1=tp8^tp4^tp2
xor $tp48,$tp88 # tp2^tp1^=tp8^tp4^tp1=tp8^tp4^tp2
- rol \$8,`&LO("$acc0")` # ROTATE(tp1^tp8,8)
rol \$8,`&LO("$acc8")` # ROTATE(tp1^tp8,8)
xor `&LO("$tp80")`,`&LO("$tp10")`
rol \$8,`&LO("$acc8")` # ROTATE(tp1^tp8,8)
xor `&LO("$tp80")`,`&LO("$tp10")`
- xor `&LO("$tp88")`,`&LO("$tp18")`
shr \$32,$tp80
shr \$32,$tp80
+ xor `&LO("$tp88")`,`&LO("$tp18")`
shr \$32,$tp88
xor `&LO("$tp80")`,`&LO("$acc0")`
xor `&LO("$tp88")`,`&LO("$acc8")`
mov $tp20,$tp80
shr \$32,$tp88
xor `&LO("$tp80")`,`&LO("$acc0")`
xor `&LO("$tp88")`,`&LO("$acc8")`
mov $tp20,$tp80
- mov $tp28,$tp88
- shr \$32,$tp80
- shr \$32,$tp88
rol \$24,`&LO("$tp20")` # ROTATE(tp2^tp1^tp8,24)
rol \$24,`&LO("$tp20")` # ROTATE(tp2^tp1^tp8,24)
+ mov $tp28,$tp88
rol \$24,`&LO("$tp28")` # ROTATE(tp2^tp1^tp8,24)
rol \$24,`&LO("$tp28")` # ROTATE(tp2^tp1^tp8,24)
- rol \$24,`&LO("$tp80")` # ROTATE(tp2^tp1^tp8,24)
- rol \$24,`&LO("$tp88")` # ROTATE(tp2^tp1^tp8,24)
+ shr \$32,$tp80
xor `&LO("$tp20")`,`&LO("$tp10")`
xor `&LO("$tp20")`,`&LO("$tp10")`
+ shr \$32,$tp88
xor `&LO("$tp28")`,`&LO("$tp18")`
xor `&LO("$tp28")`,`&LO("$tp18")`
+ rol \$24,`&LO("$tp80")` # ROTATE(tp2^tp1^tp8,24)
mov $tp40,$tp20
mov $tp40,$tp20
+ rol \$24,`&LO("$tp88")` # ROTATE(tp2^tp1^tp8,24)
mov $tp48,$tp28
mov $tp48,$tp28
+ shr \$32,$tp20
xor `&LO("$tp80")`,`&LO("$acc0")`
xor `&LO("$tp80")`,`&LO("$acc0")`
+ shr \$32,$tp28
xor `&LO("$tp88")`,`&LO("$acc8")`
`"mov 0($sbox),$mask80" if ($prefetch)`
xor `&LO("$tp88")`,`&LO("$acc8")`
`"mov 0($sbox),$mask80" if ($prefetch)`
- shr \$32,$tp20
- shr \$32,$tp28
- `"mov 64($sbox),$maskfe" if ($prefetch)`
rol \$16,`&LO("$tp40")` # ROTATE(tp4^tp1^tp8,16)
rol \$16,`&LO("$tp40")` # ROTATE(tp4^tp1^tp8,16)
+ `"mov 64($sbox),$maskfe" if ($prefetch)`
rol \$16,`&LO("$tp48")` # ROTATE(tp4^tp1^tp8,16)
`"mov 128($sbox),$mask1b" if ($prefetch)`
rol \$16,`&LO("$tp20")` # ROTATE(tp4^tp1^tp8,16)
rol \$16,`&LO("$tp48")` # ROTATE(tp4^tp1^tp8,16)
`"mov 128($sbox),$mask1b" if ($prefetch)`
rol \$16,`&LO("$tp20")` # ROTATE(tp4^tp1^tp8,16)
- rol \$16,`&LO("$tp28")` # ROTATE(tp4^tp1^tp8,16)
`"mov 192($sbox),$tp80" if ($prefetch)`
xor `&LO("$tp40")`,`&LO("$tp10")`
`"mov 192($sbox),$tp80" if ($prefetch)`
xor `&LO("$tp40")`,`&LO("$tp10")`
+ rol \$16,`&LO("$tp28")` # ROTATE(tp4^tp1^tp8,16)
xor `&LO("$tp48")`,`&LO("$tp18")`
`"mov 256($sbox),$tp88" if ($prefetch)`
xor `&LO("$tp20")`,`&LO("$acc0")`
xor `&LO("$tp48")`,`&LO("$tp18")`
`"mov 256($sbox),$tp88" if ($prefetch)`
xor `&LO("$tp20")`,`&LO("$acc0")`
@@
-1184,6
+1186,9
@@
$code.=<<___;
.globl AES_decrypt
.type AES_decrypt,\@function,3
.align 16
.globl AES_decrypt
.type AES_decrypt,\@function,3
.align 16
+.globl asm_AES_decrypt
+.hidden asm_AES_decrypt
+asm_AES_decrypt:
AES_decrypt:
push %rbx
push %rbp
AES_decrypt:
push %rbx
push %rbp
@@
-1295,10
+1300,6
@@
AES_set_encrypt_key:
call _x86_64_AES_set_encrypt_key
call _x86_64_AES_set_encrypt_key
- mov 8(%rsp),%r15
- mov 16(%rsp),%r14
- mov 24(%rsp),%r13
- mov 32(%rsp),%r12
mov 40(%rsp),%rbp
mov 48(%rsp),%rbx
add \$56,%rsp
mov 40(%rsp),%rbp
mov 48(%rsp),%rbx
add \$56,%rsp
@@
-1648,6
+1649,9
@@
$code.=<<___;
.type AES_cbc_encrypt,\@function,6
.align 16
.extern OPENSSL_ia32cap_P
.type AES_cbc_encrypt,\@function,6
.align 16
.extern OPENSSL_ia32cap_P
+.globl asm_AES_cbc_encrypt
+.hidden asm_AES_cbc_encrypt
+asm_AES_cbc_encrypt:
AES_cbc_encrypt:
cmp \$0,%rdx # check length
je .Lcbc_epilogue
AES_cbc_encrypt:
cmp \$0,%rdx # check length
je .Lcbc_epilogue