#! /usr/bin/env perl
# Copyright 2016 The OpenSSL Project Authors. All Rights Reserved.
#
-# Licensed under the OpenSSL license (the "License"). You may not use
+# Licensed under the Apache License 2.0 (the "License"). You may not use
# this file except in compliance with the License. You can obtain a copy
# in the file LICENSE in the source distribution or at
# https://www.openssl.org/source/license.html
# ====================================================================
#
# June 2015
-#
+#
# ChaCha20 for ARMv8.
#
# Performance in cycles per byte out of large buffer.
# Cortex-A57 8.06/+43% 4.90 4.43(**)
# Denver 4.50/+82% 2.63 2.67(*)
# X-Gene 9.50/+46% 8.82 8.89(*)
+# Mongoose 8.00/+44% 3.64 3.25
+# Kryo 8.17/+50% 4.83 4.65
#
# (*) it's expected that doubling interleave factor doesn't help
# all processors, only those with higher NEON latency and
.quad 0x3320646e61707865,0x6b20657479622d32 // endian-neutral
.Lone:
.long 1,0,0,0
-.LOPENSSL_armcap_P:
-#ifdef __ILP32__
-.long OPENSSL_armcap_P-.
-#else
-.quad OPENSSL_armcap_P-.
-#endif
.asciz "ChaCha20 for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
.globl ChaCha20_ctr32
.align 5
ChaCha20_ctr32:
cbz $len,.Labort
- adr @x[0],.LOPENSSL_armcap_P
cmp $len,#192
b.lo .Lshort
-#ifdef __ILP32__
- ldrsw @x[1],[@x[0]]
-#else
- ldr @x[1],[@x[0]]
-#endif
- ldr w17,[@x[1],@x[0]]
+
+ adrp x17,OPENSSL_armcap_P
+ ldr w17,[x17,#:lo12:OPENSSL_armcap_P]
tst w17,#ARMV7_NEON
- b.ne ChaCha20_neon
+ b.ne .LChaCha20_neon
.Lshort:
+ .inst 0xd503233f // paciasp
stp x29,x30,[sp,#-96]!
add x29,sp,#0
mov $ctr,#10
subs $len,$len,#64
.Loop:
- sub $ctr,$ctr,#1
+ sub $ctr,$ctr,#1
___
foreach (&ROUND(0, 4, 8,12)) { eval; }
foreach (&ROUND(0, 5,10,15)) { eval; }
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96
+ .inst 0xd50323bf // autiasp
.Labort:
ret
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96
+ .inst 0xd50323bf // autiasp
ret
.size ChaCha20_ctr32,.-ChaCha20_ctr32
___
.type ChaCha20_neon,%function
.align 5
ChaCha20_neon:
+.LChaCha20_neon:
+ .inst 0xd503233f // paciasp
stp x29,x30,[sp,#-96]!
add x29,sp,#0
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96
+ .inst 0xd50323bf // autiasp
ret
.Ltail_neon:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96
+ .inst 0xd50323bf // autiasp
ret
.size ChaCha20_neon,.-ChaCha20_neon
___
.type ChaCha20_512_neon,%function
.align 5
ChaCha20_512_neon:
+ .inst 0xd503233f // paciasp
stp x29,x30,[sp,#-96]!
add x29,sp,#0
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96
+ .inst 0xd50323bf // autiasp
ret
.size ChaCha20_512_neon,.-ChaCha20_512_neon
___