-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2005-2019 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
#
# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
die "can't locate x86_64-xlate.pl";
-open STDOUT,"| \"$^X\" $xlate $flavour $output";
+open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
+*STDOUT=*OUT;
$verticalspin=1; # unlike 32-bit version $verticalspin performs
# ~15% better on both AMD and Intel cores
.type _x86_64_AES_encrypt_compact,\@abi-omnipotent
.align 16
_x86_64_AES_encrypt_compact:
+.cfi_startproc
lea 128($sbox),$inp # size optimization
mov 0-128($inp),$acc1 # prefetch Te4
mov 32-128($inp),$acc2
xor 8($key),$s2
xor 12($key),$s3
.byte 0xf3,0xc3 # rep ret
+.cfi_endproc
.size _x86_64_AES_encrypt_compact,.-_x86_64_AES_encrypt_compact
___
.hidden asm_AES_encrypt
asm_AES_encrypt:
AES_encrypt:
+.cfi_startproc
+ mov %rsp,%rax
+.cfi_def_cfa_register %rax
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
# allocate frame "above" key schedule
- mov %rsp,%r10
lea -63(%rdx),%rcx # %rdx is key argument
and \$-64,%rsp
sub %rsp,%rcx
sub \$32,%rsp
mov %rsi,16(%rsp) # save out
- mov %r10,24(%rsp) # save real stack pointer
+ mov %rax,24(%rsp) # save original stack pointer
+.cfi_cfa_expression %rsp+24,deref,+8
.Lenc_prologue:
mov %rdx,$key
mov 16(%rsp),$out # restore out
mov 24(%rsp),%rsi # restore saved stack pointer
+.cfi_def_cfa %rsi,8
mov $s0,0($out) # write output vector
mov $s1,4($out)
mov $s2,8($out)
mov $s3,12($out)
- mov (%rsi),%r15
- mov 8(%rsi),%r14
- mov 16(%rsi),%r13
- mov 24(%rsi),%r12
- mov 32(%rsi),%rbp
- mov 40(%rsi),%rbx
- lea 48(%rsi),%rsp
+ mov -48(%rsi),%r15
+.cfi_restore %r15
+ mov -40(%rsi),%r14
+.cfi_restore %r14
+ mov -32(%rsi),%r13
+.cfi_restore %r13
+ mov -24(%rsi),%r12
+.cfi_restore %r12
+ mov -16(%rsi),%rbp
+.cfi_restore %rbp
+ mov -8(%rsi),%rbx
+.cfi_restore %rbx
+ lea (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lenc_epilogue:
ret
+.cfi_endproc
.size AES_encrypt,.-AES_encrypt
___
.type _x86_64_AES_decrypt_compact,\@abi-omnipotent
.align 16
_x86_64_AES_decrypt_compact:
+.cfi_startproc
lea 128($sbox),$inp # size optimization
mov 0-128($inp),$acc1 # prefetch Td4
mov 32-128($inp),$acc2
xor 8($key),$s2
xor 12($key),$s3
.byte 0xf3,0xc3 # rep ret
+.cfi_endproc
.size _x86_64_AES_decrypt_compact,.-_x86_64_AES_decrypt_compact
___
.hidden asm_AES_decrypt
asm_AES_decrypt:
AES_decrypt:
+.cfi_startproc
+ mov %rsp,%rax
+.cfi_def_cfa_register %rax
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
# allocate frame "above" key schedule
- mov %rsp,%r10
lea -63(%rdx),%rcx # %rdx is key argument
and \$-64,%rsp
sub %rsp,%rcx
sub \$32,%rsp
mov %rsi,16(%rsp) # save out
- mov %r10,24(%rsp) # save real stack pointer
+ mov %rax,24(%rsp) # save original stack pointer
+.cfi_cfa_expression %rsp+24,deref,+8
.Ldec_prologue:
mov %rdx,$key
mov 16(%rsp),$out # restore out
mov 24(%rsp),%rsi # restore saved stack pointer
+.cfi_def_cfa %rsi,8
mov $s0,0($out) # write output vector
mov $s1,4($out)
mov $s2,8($out)
mov $s3,12($out)
- mov (%rsi),%r15
- mov 8(%rsi),%r14
- mov 16(%rsi),%r13
- mov 24(%rsi),%r12
- mov 32(%rsi),%rbp
- mov 40(%rsi),%rbx
- lea 48(%rsi),%rsp
+ mov -48(%rsi),%r15
+.cfi_restore %r15
+ mov -40(%rsi),%r14
+.cfi_restore %r14
+ mov -32(%rsi),%r13
+.cfi_restore %r13
+ mov -24(%rsi),%r12
+.cfi_restore %r12
+ mov -16(%rsi),%rbp
+.cfi_restore %rbp
+ mov -8(%rsi),%rbx
+.cfi_restore %rbx
+ lea (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Ldec_epilogue:
ret
+.cfi_endproc
.size AES_decrypt,.-AES_decrypt
___
#------------------------------------------------------------------#
.type AES_set_encrypt_key,\@function,3
.align 16
AES_set_encrypt_key:
+.cfi_startproc
push %rbx
+.cfi_push %rbx
push %rbp
- push %r12 # redundant, but allows to share
+.cfi_push %rbp
+ push %r12 # redundant, but allows to share
+.cfi_push %r12
push %r13 # exception handler...
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
sub \$8,%rsp
+.cfi_adjust_cfa_offset 8
.Lenc_key_prologue:
call _x86_64_AES_set_encrypt_key
mov 40(%rsp),%rbp
+.cfi_restore %rbp
mov 48(%rsp),%rbx
+.cfi_restore %rbx
add \$56,%rsp
+.cfi_adjust_cfa_offset -56
.Lenc_key_epilogue:
ret
+.cfi_endproc
.size AES_set_encrypt_key,.-AES_set_encrypt_key
.type _x86_64_AES_set_encrypt_key,\@abi-omnipotent
.align 16
_x86_64_AES_set_encrypt_key:
+.cfi_startproc
mov %esi,%ecx # %ecx=bits
mov %rdi,%rsi # %rsi=userKey
mov %rdx,%rdi # %rdi=key
xor %rax,%rax
jmp .Lexit
-.L14rounds:
+.L14rounds:
mov 0(%rsi),%rax # copy first 8 dwords
mov 8(%rsi),%rbx
mov 16(%rsi),%rcx
mov \$-1,%rax
.Lexit:
.byte 0xf3,0xc3 # rep ret
+.cfi_endproc
.size _x86_64_AES_set_encrypt_key,.-_x86_64_AES_set_encrypt_key
___
.type AES_set_decrypt_key,\@function,3
.align 16
AES_set_decrypt_key:
+.cfi_startproc
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
push %rdx # save key schedule
+.cfi_adjust_cfa_offset 8
.Ldec_key_prologue:
call _x86_64_AES_set_encrypt_key
xor %rax,%rax
.Labort:
mov 8(%rsp),%r15
+.cfi_restore %r15
mov 16(%rsp),%r14
+.cfi_restore %r14
mov 24(%rsp),%r13
+.cfi_restore %r13
mov 32(%rsp),%r12
+.cfi_restore %r12
mov 40(%rsp),%rbp
+.cfi_restore %rbp
mov 48(%rsp),%rbx
+.cfi_restore %rbx
add \$56,%rsp
+.cfi_adjust_cfa_offset -56
.Ldec_key_epilogue:
ret
+.cfi_endproc
.size AES_set_decrypt_key,.-AES_set_decrypt_key
___
.hidden asm_AES_cbc_encrypt
asm_AES_cbc_encrypt:
AES_cbc_encrypt:
+.cfi_startproc
cmp \$0,%rdx # check length
je .Lcbc_epilogue
pushfq
+# This could be .cfi_push 49, but libunwind fails on registers it does not
+# recognize. See https://bugzilla.redhat.com/show_bug.cgi?id=217087.
+.cfi_adjust_cfa_offset 8
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
.Lcbc_prologue:
cld
mov %r9d,%r9d # clear upper half of enc
lea .LAES_Te(%rip),$sbox
+ lea .LAES_Td(%rip),%r10
cmp \$0,%r9
- jne .Lcbc_picked_te
- lea .LAES_Td(%rip),$sbox
-.Lcbc_picked_te:
+ cmoveq %r10,$sbox
+.cfi_remember_state
mov OPENSSL_ia32cap_P(%rip),%r10d
cmp \$$speed_limit,%rdx
jb .Lcbc_slow_prologue
.Lcbc_te_ok:
xchg %rsp,$key
+.cfi_def_cfa_register $key
#add \$8,%rsp # reserve for return address!
mov $key,$_rsp # save %rsp
+.cfi_cfa_expression $_rsp,deref,+64
.Lcbc_fast_body:
mov %rdi,$_inp # save copy of inp
mov %rsi,$_out # save copy of out
#--------------------------- SLOW ROUTINE ---------------------------#
.align 16
.Lcbc_slow_prologue:
+.cfi_restore_state
# allocate aligned stack frame...
lea -88(%rsp),%rbp
and \$-64,%rbp
sub %r10,%rbp
xchg %rsp,%rbp
+.cfi_def_cfa_register %rbp
#add \$8,%rsp # reserve for return address!
mov %rbp,$_rsp # save %rsp
+.cfi_cfa_expression $_rsp,deref,+64
.Lcbc_slow_body:
#mov %rdi,$_inp # save copy of inp
#mov %rsi,$_out # save copy of out
lea ($key,%rax),%rax
mov %rax,$keyend
- # pick Te4 copy which can't "overlap" with stack frame or key scdedule
+ # pick Te4 copy which can't "overlap" with stack frame or key schedule
lea 2048($sbox),$sbox
lea 768-8(%rsp),%rax
sub $sbox,%rax
.align 16
.Lcbc_exit:
mov $_rsp,%rsi
+.cfi_def_cfa %rsi,64
mov (%rsi),%r15
+.cfi_restore %r15
mov 8(%rsi),%r14
+.cfi_restore %r14
mov 16(%rsi),%r13
+.cfi_restore %r13
mov 24(%rsi),%r12
+.cfi_restore %r12
mov 32(%rsi),%rbp
+.cfi_restore %rbp
mov 40(%rsi),%rbx
+.cfi_restore %rbx
lea 48(%rsi),%rsp
+.cfi_def_cfa %rsp,16
.Lcbc_popfq:
popfq
+# This could be .cfi_pop 49, but libunwind fails on registers it does not
+# recognize. See https://bugzilla.redhat.com/show_bug.cgi?id=217087.
+.cfi_adjust_cfa_offset -8
.Lcbc_epilogue:
ret
+.cfi_endproc
.size AES_cbc_encrypt,.-AES_cbc_encrypt
___
}
jae .Lin_block_prologue
mov 24(%rax),%rax # pull saved real stack pointer
- lea 48(%rax),%rax # adjust...
mov -8(%rax),%rbx
mov -16(%rax),%rbp