projects
/
openssl.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
poly1305/asm/poly1305-*.pl: flip horizontal add and reduction.
[openssl.git]
/
crypto
/
poly1305
/
asm
/
poly1305-armv8.pl
diff --git
a/crypto/poly1305/asm/poly1305-armv8.pl
b/crypto/poly1305/asm/poly1305-armv8.pl
index 79185d2bdd812dd4f172eb5d9e50beaf6e9f7550..f1359fd44a85f2a86852ff9cfb5b66869f03af06 100755
(executable)
--- a/
crypto/poly1305/asm/poly1305-armv8.pl
+++ b/
crypto/poly1305/asm/poly1305-armv8.pl
@@
-790,6
+790,19
@@
poly1305_blocks_neon:
umlal $ACC2,$IN01_4,${S3}
.Lshort_tail:
umlal $ACC2,$IN01_4,${S3}
.Lshort_tail:
+ ////////////////////////////////////////////////////////////////
+ // horizontal add
+
+ addp $ACC3,$ACC3,$ACC3
+ ldp d8,d9,[sp,#16] // meet ABI requirements
+ addp $ACC0,$ACC0,$ACC0
+ ldp d10,d11,[sp,#32]
+ addp $ACC4,$ACC4,$ACC4
+ ldp d12,d13,[sp,#48]
+ addp $ACC1,$ACC1,$ACC1
+ ldp d14,d15,[sp,#64]
+ addp $ACC2,$ACC2,$ACC2
+
////////////////////////////////////////////////////////////////
// lazy reduction, but without narrowing
////////////////////////////////////////////////////////////////
// lazy reduction, but without narrowing
@@
-821,19
+834,6
@@
poly1305_blocks_neon:
add $ACC1,$ACC1,$T0.2d // h0 -> h1
add $ACC4,$ACC4,$T1.2d // h3 -> h4
add $ACC1,$ACC1,$T0.2d // h0 -> h1
add $ACC4,$ACC4,$T1.2d // h3 -> h4
- ////////////////////////////////////////////////////////////////
- // horizontal add
-
- addp $ACC2,$ACC2,$ACC2
- ldp d8,d9,[sp,#16] // meet ABI requirements
- addp $ACC0,$ACC0,$ACC0
- ldp d10,d11,[sp,#32]
- addp $ACC1,$ACC1,$ACC1
- ldp d12,d13,[sp,#48]
- addp $ACC3,$ACC3,$ACC3
- ldp d14,d15,[sp,#64]
- addp $ACC4,$ACC4,$ACC4
-
////////////////////////////////////////////////////////////////
// write the result, can be partially reduced
////////////////////////////////////////////////////////////////
// write the result, can be partially reduced