#! /usr/bin/env perl
-# Copyright 2009-2016 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 2009-2019 The OpenSSL Project Authors. All Rights Reserved.
#
# Licensed under the OpenSSL license (the "License"). You may not use
# this file except in compliance with the License. You can obtain a copy
# nothing one can do and the result appears optimal. CCM result is
# identical to CBC, because CBC-MAC is essentially CBC encrypt without
# saving output. CCM CTR "stays invisible," because it's neatly
-# interleaved wih CBC-MAC. This provides ~30% improvement over
-# "straghtforward" CCM implementation with CTR and CBC-MAC performed
+# interleaved with CBC-MAC. This provides ~30% improvement over
+# "straightforward" CCM implementation with CTR and CBC-MAC performed
# disjointly. Parallelizable modes practically achieve the theoretical
# limit.
#
# asymptotic, if it can be surpassed, isn't it? What happens there?
# Rewind to CBC paragraph for the answer. Yes, out-of-order execution
# magic is responsible for this. Processor overlaps not only the
-# additional instructions with AES ones, but even AES instuctions
+# additional instructions with AES ones, but even AES instructions
# processing adjacent triplets of independent blocks. In the 6x case
# additional instructions still claim disproportionally small amount
# of additional cycles, but in 8x case number of instructions must be
# a tad too high for out-of-order logic to cope with, and AES unit
# remains underutilized... As you can see 8x interleave is hardly
# justifiable, so there no need to feel bad that 32-bit aesni-x86.pl
-# utilizies 6x interleave because of limited register bank capacity.
+# utilizes 6x interleave because of limited register bank capacity.
#
# Higher interleave factors do have negative impact on Westmere
# performance. While for ECB mode it's negligible ~1.5%, other
.type ${PREFIX}_encrypt,\@abi-omnipotent
.align 16
${PREFIX}_encrypt:
+.cfi_startproc
movups ($inp),$inout0 # load input
mov 240($key),$rounds # key->rounds
___
movups $inout0,($out) # output
pxor $inout0,$inout0
ret
+.cfi_endproc
.size ${PREFIX}_encrypt,.-${PREFIX}_encrypt
.globl ${PREFIX}_decrypt
.type ${PREFIX}_decrypt,\@abi-omnipotent
.align 16
${PREFIX}_decrypt:
+.cfi_startproc
movups ($inp),$inout0 # load input
mov 240($key),$rounds # key->rounds
___
movups $inout0,($out) # output
pxor $inout0,$inout0
ret
+.cfi_endproc
.size ${PREFIX}_decrypt, .-${PREFIX}_decrypt
___
}
.type _aesni_${dir}rypt2,\@abi-omnipotent
.align 16
_aesni_${dir}rypt2:
+.cfi_startproc
$movkey ($key),$rndkey0
shl \$4,$rounds
$movkey 16($key),$rndkey1
aes${dir}last $rndkey0,$inout0
aes${dir}last $rndkey0,$inout1
ret
+.cfi_endproc
.size _aesni_${dir}rypt2,.-_aesni_${dir}rypt2
___
}
.type _aesni_${dir}rypt3,\@abi-omnipotent
.align 16
_aesni_${dir}rypt3:
+.cfi_startproc
$movkey ($key),$rndkey0
shl \$4,$rounds
$movkey 16($key),$rndkey1
aes${dir}last $rndkey0,$inout1
aes${dir}last $rndkey0,$inout2
ret
+.cfi_endproc
.size _aesni_${dir}rypt3,.-_aesni_${dir}rypt3
___
}
.type _aesni_${dir}rypt4,\@abi-omnipotent
.align 16
_aesni_${dir}rypt4:
+.cfi_startproc
$movkey ($key),$rndkey0
shl \$4,$rounds
$movkey 16($key),$rndkey1
aes${dir}last $rndkey0,$inout2
aes${dir}last $rndkey0,$inout3
ret
+.cfi_endproc
.size _aesni_${dir}rypt4,.-_aesni_${dir}rypt4
___
}
.type _aesni_${dir}rypt6,\@abi-omnipotent
.align 16
_aesni_${dir}rypt6:
+.cfi_startproc
$movkey ($key),$rndkey0
shl \$4,$rounds
$movkey 16($key),$rndkey1
aes${dir}last $rndkey0,$inout4
aes${dir}last $rndkey0,$inout5
ret
+.cfi_endproc
.size _aesni_${dir}rypt6,.-_aesni_${dir}rypt6
___
}
.type _aesni_${dir}rypt8,\@abi-omnipotent
.align 16
_aesni_${dir}rypt8:
+.cfi_startproc
$movkey ($key),$rndkey0
shl \$4,$rounds
$movkey 16($key),$rndkey1
aes${dir}last $rndkey0,$inout6
aes${dir}last $rndkey0,$inout7
ret
+.cfi_endproc
.size _aesni_${dir}rypt8,.-_aesni_${dir}rypt8
___
}
.type aesni_ecb_encrypt,\@function,5
.align 16
aesni_ecb_encrypt:
+.cfi_startproc
___
$code.=<<___ if ($win64);
lea -0x58(%rsp),%rsp
___
$code.=<<___;
ret
+.cfi_endproc
.size aesni_ecb_encrypt,.-aesni_ecb_encrypt
___
\f
sub \$8,$len
jnc .Lctr32_loop8 # loop if $len-=8 didn't borrow
- add \$8,$len # restore real remainig $len
+ add \$8,$len # restore real remaining $len
jz .Lctr32_done # done if ($len==0)
lea -0x80($key),$key
movups $inout2,0x20($out) # $len was 3, stop store
.Lctr32_done:
- xorps %xmm0,%xmm0 # clear regiser bank
+ xorps %xmm0,%xmm0 # clear register bank
xor $key0,$key0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
lea `16*6`($inp),$inp
pxor $twmask,$inout5
- pxor $twres,@tweak[0] # calclulate tweaks^round[last]
+ pxor $twres,@tweak[0] # calculate tweaks^round[last]
aesenc $rndkey1,$inout4
pxor $twres,@tweak[1]
movdqa @tweak[0],`16*0`(%rsp) # put aside tweaks^round[last]
lea `16*6`($inp),$inp
pxor $twmask,$inout5
- pxor $twres,@tweak[0] # calclulate tweaks^round[last]
+ pxor $twres,@tweak[0] # calculate tweaks^round[last]
aesdec $rndkey1,$inout4
pxor $twres,@tweak[1]
movdqa @tweak[0],`16*0`(%rsp) # put aside tweaks^last round key
.align 16
.L14rounds:
- movups 16($inp),%xmm2 # remaning half of *userKey
+ movups 16($inp),%xmm2 # remaining half of *userKey
mov \$13,$bits # 14 rounds for 256
lea 16(%rax),%rax
cmp \$`1<<28`,%r10d # AVX, but no XOP