-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2015-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
######################################################################
## Constant-time SSSE3 AES core implementation.
# X-Gene 45.9(**) 45.8/57.7(**) [33.1/37.6(**) ]
# Denver(***) 16.6(**) 15.1/17.8(**) [8.80/9.93 ]
# Apple A7(***) 22.7(**) 10.9/14.3 [8.45/10.0 ]
+# Mongoose(***) 26.3(**) 21.0/25.0(**) [13.3/16.8 ]
#
-# (*) ECB denotes approximate result for parallelizeable modes
+# (*) ECB denotes approximate result for parallelizable modes
# such as CBC decrypt, CTR, etc.;
# (**) these results are worse than scalar compiler-generated
# code, but it's constant-time and therefore preferred;
# (***) presented for reference/comparison purposes;
$flavour = shift;
-while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
+while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A
.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77
-.asciz "Vector Permutaion AES for ARMv8, Mike Hamburg (Stanford University)"
+.asciz "Vector Permutation AES for ARMv8, Mike Hamburg (Stanford University)"
.size _vpaes_consts,.-_vpaes_consts
.align 6
___
.type vpaes_encrypt,%function
.align 4
vpaes_encrypt:
+ .inst 0xd503233f // paciasp
stp x29,x30,[sp,#-16]!
add x29,sp,#0
st1 {v0.16b}, [$out]
ldp x29,x30,[sp],#16
+ .inst 0xd50323bf // autiasp
ret
.size vpaes_encrypt,.-vpaes_encrypt
.type vpaes_decrypt,%function
.align 4
vpaes_decrypt:
+ .inst 0xd503233f // paciasp
stp x29,x30,[sp,#-16]!
add x29,sp,#0
st1 {v0.16b}, [$out]
ldp x29,x30,[sp],#16
+ .inst 0xd50323bf // autiasp
ret
.size vpaes_decrypt,.-vpaes_decrypt
.type _vpaes_schedule_core,%function
.align 4
_vpaes_schedule_core:
+ .inst 0xd503233f // paciasp
stp x29, x30, [sp,#-16]!
add x29,sp,#0
ld1 {v0.16b}, [$inp] // vmovdqu 16(%rdi),%xmm0 # load key part 2 (unaligned)
bl _vpaes_schedule_transform // input transform
mov $inp, #7 // mov \$7, %esi
-
+
.Loop_schedule_256:
sub $inp, $inp, #1 // dec %esi
bl _vpaes_schedule_mangle // output low result
// high round
bl _vpaes_schedule_round
cbz $inp, .Lschedule_mangle_last
- bl _vpaes_schedule_mangle
+ bl _vpaes_schedule_mangle
// low round. swap xmm7 and xmm6
dup v0.4s, v0.s[3] // vpshufd \$0xFF, %xmm0, %xmm0
mov v7.16b, v6.16b // vmovdqa %xmm6, %xmm7
bl _vpaes_schedule_low_round
mov v7.16b, v5.16b // vmovdqa %xmm5, %xmm7
-
+
b .Loop_schedule_256
##
.Lschedule_mangle_last_dec:
ld1 {v20.2d-v21.2d}, [x11] // reload constants
- sub $out, $out, #16 // add \$-16, %rdx
+ sub $out, $out, #16 // add \$-16, %rdx
eor v0.16b, v0.16b, v16.16b // vpxor .Lk_s63(%rip), %xmm0, %xmm0
bl _vpaes_schedule_transform // output transform
st1 {v0.2d}, [$out] // vmovdqu %xmm0, (%rdx) # save last key
eor v6.16b, v6.16b, v6.16b // vpxor %xmm6, %xmm6, %xmm6
eor v7.16b, v7.16b, v7.16b // vpxor %xmm7, %xmm7, %xmm7
ldp x29, x30, [sp],#16
+ .inst 0xd50323bf // autiasp
ret
.size _vpaes_schedule_core,.-_vpaes_schedule_core
.type vpaes_set_encrypt_key,%function
.align 4
vpaes_set_encrypt_key:
+ .inst 0xd503233f // paciasp
stp x29,x30,[sp,#-16]!
add x29,sp,#0
stp d8,d9,[sp,#-16]! // ABI spec says so
ldp d8,d9,[sp],#16
ldp x29,x30,[sp],#16
+ .inst 0xd50323bf // autiasp
ret
.size vpaes_set_encrypt_key,.-vpaes_set_encrypt_key
.type vpaes_set_decrypt_key,%function
.align 4
vpaes_set_decrypt_key:
+ .inst 0xd503233f // paciasp
stp x29,x30,[sp,#-16]!
add x29,sp,#0
stp d8,d9,[sp,#-16]! // ABI spec says so
ldp d8,d9,[sp],#16
ldp x29,x30,[sp],#16
+ .inst 0xd50323bf // autiasp
ret
.size vpaes_set_decrypt_key,.-vpaes_set_decrypt_key
___
cmp w5, #0 // check direction
b.eq vpaes_cbc_decrypt
+ .inst 0xd503233f // paciasp
stp x29,x30,[sp,#-16]!
add x29,sp,#0
st1 {v0.16b}, [$ivec] // write ivec
ldp x29,x30,[sp],#16
+ .inst 0xd50323bf // autiasp
.Lcbc_abort:
ret
.size vpaes_cbc_encrypt,.-vpaes_cbc_encrypt
.type vpaes_cbc_decrypt,%function
.align 4
vpaes_cbc_decrypt:
+ .inst 0xd503233f // paciasp
stp x29,x30,[sp,#-16]!
add x29,sp,#0
stp d8,d9,[sp,#-16]! // ABI spec says so
ldp d10,d11,[sp],#16
ldp d8,d9,[sp],#16
ldp x29,x30,[sp],#16
+ .inst 0xd50323bf // autiasp
ret
.size vpaes_cbc_decrypt,.-vpaes_cbc_decrypt
___
.type vpaes_ecb_encrypt,%function
.align 4
vpaes_ecb_encrypt:
+ .inst 0xd503233f // paciasp
stp x29,x30,[sp,#-16]!
add x29,sp,#0
stp d8,d9,[sp,#-16]! // ABI spec says so
ldp d10,d11,[sp],#16
ldp d8,d9,[sp],#16
ldp x29,x30,[sp],#16
+ .inst 0xd50323bf // autiasp
ret
.size vpaes_ecb_encrypt,.-vpaes_ecb_encrypt
.type vpaes_ecb_decrypt,%function
.align 4
vpaes_ecb_decrypt:
+ .inst 0xd503233f // paciasp
stp x29,x30,[sp,#-16]!
add x29,sp,#0
stp d8,d9,[sp,#-16]! // ABI spec says so
ldp d10,d11,[sp],#16
ldp d8,d9,[sp],#16
ldp x29,x30,[sp],#16
+ .inst 0xd50323bf // autiasp
ret
.size vpaes_ecb_decrypt,.-vpaes_ecb_decrypt
___