X-Git-Url: https://git.openssl.org/gitweb/?p=openssl.git;a=blobdiff_plain;f=crypto%2Faes%2Fasm%2Fvpaes-armv8.pl;h=5758133f64cdb77fbdaf41060e453f5762b2ece7;hp=bc90b9f3bb6ad692c3a4482415144a41f41f8da2;hb=2cf7fd698ec1375421f91338ff8a44e7da5238b6;hpb=35141544e2994f0f3b87be7d7c9a43ea3cd9840a diff --git a/crypto/aes/asm/vpaes-armv8.pl b/crypto/aes/asm/vpaes-armv8.pl index bc90b9f3bb..5758133f64 100755 --- a/crypto/aes/asm/vpaes-armv8.pl +++ b/crypto/aes/asm/vpaes-armv8.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2015-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + ###################################################################### ## Constant-time SSSE3 AES core implementation. @@ -22,15 +29,16 @@ # X-Gene 45.9(**) 45.8/57.7(**) [33.1/37.6(**) ] # Denver(***) 16.6(**) 15.1/17.8(**) [8.80/9.93 ] # Apple A7(***) 22.7(**) 10.9/14.3 [8.45/10.0 ] +# Mongoose(***) 26.3(**) 21.0/25.0(**) [13.3/16.8 ] # -# (*) ECB denotes approximate result for parallelizeable modes +# (*) ECB denotes approximate result for parallelizable modes # such as CBC decrypt, CTR, etc.; # (**) these results are worse than scalar compiler-generated # code, but it's constant-time and therefore preferred; # (***) presented for reference/comparison purposes; $flavour = shift; -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} +while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or @@ -129,7 +137,7 @@ _vpaes_consts: .quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A .quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77 -.asciz "Vector Permutaion AES for ARMv8, Mike Hamburg (Stanford University)" +.asciz "Vector Permutation AES for ARMv8, Mike Hamburg (Stanford University)" .size _vpaes_consts,.-_vpaes_consts .align 6 ___ @@ -247,6 +255,7 @@ _vpaes_encrypt_core: .type vpaes_encrypt,%function .align 4 vpaes_encrypt: + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-16]! add x29,sp,#0 @@ -256,6 +265,7 @@ vpaes_encrypt: st1 {v0.16b}, [$out] ldp x29,x30,[sp],#16 + .inst 0xd50323bf // autiasp ret .size vpaes_encrypt,.-vpaes_encrypt @@ -478,6 +488,7 @@ _vpaes_decrypt_core: .type vpaes_decrypt,%function .align 4 vpaes_decrypt: + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-16]! add x29,sp,#0 @@ -487,6 +498,7 @@ vpaes_decrypt: st1 {v0.16b}, [$out] ldp x29,x30,[sp],#16 + .inst 0xd50323bf // autiasp ret .size vpaes_decrypt,.-vpaes_decrypt @@ -657,6 +669,7 @@ _vpaes_key_preheat: .type _vpaes_schedule_core,%function .align 4 _vpaes_schedule_core: + .inst 0xd503233f // paciasp stp x29, x30, [sp,#-16]! add x29,sp,#0 @@ -761,7 +774,7 @@ _vpaes_schedule_core: ld1 {v0.16b}, [$inp] // vmovdqu 16(%rdi),%xmm0 # load key part 2 (unaligned) bl _vpaes_schedule_transform // input transform mov $inp, #7 // mov \$7, %esi - + .Loop_schedule_256: sub $inp, $inp, #1 // dec %esi bl _vpaes_schedule_mangle // output low result @@ -770,7 +783,7 @@ _vpaes_schedule_core: // high round bl _vpaes_schedule_round cbz $inp, .Lschedule_mangle_last - bl _vpaes_schedule_mangle + bl _vpaes_schedule_mangle // low round. swap xmm7 and xmm6 dup v0.4s, v0.s[3] // vpshufd \$0xFF, %xmm0, %xmm0 @@ -779,7 +792,7 @@ _vpaes_schedule_core: mov v7.16b, v6.16b // vmovdqa %xmm6, %xmm7 bl _vpaes_schedule_low_round mov v7.16b, v5.16b // vmovdqa %xmm5, %xmm7 - + b .Loop_schedule_256 ## @@ -806,7 +819,7 @@ _vpaes_schedule_core: .Lschedule_mangle_last_dec: ld1 {v20.2d-v21.2d}, [x11] // reload constants - sub $out, $out, #16 // add \$-16, %rdx + sub $out, $out, #16 // add \$-16, %rdx eor v0.16b, v0.16b, v16.16b // vpxor .Lk_s63(%rip), %xmm0, %xmm0 bl _vpaes_schedule_transform // output transform st1 {v0.2d}, [$out] // vmovdqu %xmm0, (%rdx) # save last key @@ -821,6 +834,7 @@ _vpaes_schedule_core: eor v6.16b, v6.16b, v6.16b // vpxor %xmm6, %xmm6, %xmm6 eor v7.16b, v7.16b, v7.16b // vpxor %xmm7, %xmm7, %xmm7 ldp x29, x30, [sp],#16 + .inst 0xd50323bf // autiasp ret .size _vpaes_schedule_core,.-_vpaes_schedule_core @@ -1033,6 +1047,7 @@ _vpaes_schedule_mangle: .type vpaes_set_encrypt_key,%function .align 4 vpaes_set_encrypt_key: + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-16]! add x29,sp,#0 stp d8,d9,[sp,#-16]! // ABI spec says so @@ -1048,6 +1063,7 @@ vpaes_set_encrypt_key: ldp d8,d9,[sp],#16 ldp x29,x30,[sp],#16 + .inst 0xd50323bf // autiasp ret .size vpaes_set_encrypt_key,.-vpaes_set_encrypt_key @@ -1055,6 +1071,7 @@ vpaes_set_encrypt_key: .type vpaes_set_decrypt_key,%function .align 4 vpaes_set_decrypt_key: + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-16]! add x29,sp,#0 stp d8,d9,[sp,#-16]! // ABI spec says so @@ -1074,6 +1091,7 @@ vpaes_set_decrypt_key: ldp d8,d9,[sp],#16 ldp x29,x30,[sp],#16 + .inst 0xd50323bf // autiasp ret .size vpaes_set_decrypt_key,.-vpaes_set_decrypt_key ___ @@ -1090,6 +1108,7 @@ vpaes_cbc_encrypt: cmp w5, #0 // check direction b.eq vpaes_cbc_decrypt + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-16]! add x29,sp,#0 @@ -1112,6 +1131,7 @@ vpaes_cbc_encrypt: st1 {v0.16b}, [$ivec] // write ivec ldp x29,x30,[sp],#16 + .inst 0xd50323bf // autiasp .Lcbc_abort: ret .size vpaes_cbc_encrypt,.-vpaes_cbc_encrypt @@ -1119,6 +1139,7 @@ vpaes_cbc_encrypt: .type vpaes_cbc_decrypt,%function .align 4 vpaes_cbc_decrypt: + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-16]! add x29,sp,#0 stp d8,d9,[sp,#-16]! // ABI spec says so @@ -1160,6 +1181,7 @@ vpaes_cbc_decrypt: ldp d10,d11,[sp],#16 ldp d8,d9,[sp],#16 ldp x29,x30,[sp],#16 + .inst 0xd50323bf // autiasp ret .size vpaes_cbc_decrypt,.-vpaes_cbc_decrypt ___ @@ -1169,6 +1191,7 @@ $code.=<<___; .type vpaes_ecb_encrypt,%function .align 4 vpaes_ecb_encrypt: + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-16]! add x29,sp,#0 stp d8,d9,[sp,#-16]! // ABI spec says so @@ -1202,6 +1225,7 @@ vpaes_ecb_encrypt: ldp d10,d11,[sp],#16 ldp d8,d9,[sp],#16 ldp x29,x30,[sp],#16 + .inst 0xd50323bf // autiasp ret .size vpaes_ecb_encrypt,.-vpaes_ecb_encrypt @@ -1209,6 +1233,7 @@ vpaes_ecb_encrypt: .type vpaes_ecb_decrypt,%function .align 4 vpaes_ecb_decrypt: + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-16]! add x29,sp,#0 stp d8,d9,[sp,#-16]! // ABI spec says so @@ -1242,6 +1267,7 @@ vpaes_ecb_decrypt: ldp d10,d11,[sp],#16 ldp d8,d9,[sp],#16 ldp x29,x30,[sp],#16 + .inst 0xd50323bf // autiasp ret .size vpaes_ecb_decrypt,.-vpaes_ecb_decrypt ___