From 9a708bf982da1d2c9739339d16d7b021da955e00 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Sun, 20 May 2018 12:13:16 +0200 Subject: [PATCH] {arm64|x86_64}cpuid.pl: add special 16-byte case to OPENSSL_memcmp. OPENSSL_memcmp is a must in GCM decrypt and general-purpose loop takes quite a portion of execution time for short inputs, more than GHASH for few-byte inputs according to profiler. Special 16-byte case takes it off top five list in profiler output. Reviewed-by: Rich Salz (Merged from https://github.com/openssl/openssl/pull/6312) --- crypto/arm64cpuid.pl | 13 +++++++++++++ crypto/x86_64cpuid.pl | 12 ++++++++++++ 2 files changed, 25 insertions(+) diff --git a/crypto/arm64cpuid.pl b/crypto/arm64cpuid.pl index daa2b17ada..06c8add7a0 100755 --- a/crypto/arm64cpuid.pl +++ b/crypto/arm64cpuid.pl @@ -115,6 +115,19 @@ OPENSSL_cleanse: CRYPTO_memcmp: eor w3,w3,w3 cbz x2,.Lno_data // len==0? + cmp x2,#16 + b.ne .Loop_cmp + ldp x8,x9,[x0] + ldp x10,x11,[x1] + eor x8,x8,x10 + eor x9,x9,x11 + orr x8,x8,x9 + mov x0,#1 + cmp x8,#0 + csel x0,xzr,x0,eq + ret + +.align 4 .Loop_cmp: ldrb w4,[x0],#1 ldrb w5,[x1],#1 diff --git a/crypto/x86_64cpuid.pl b/crypto/x86_64cpuid.pl index 513d00560c..6423e803b7 100644 --- a/crypto/x86_64cpuid.pl +++ b/crypto/x86_64cpuid.pl @@ -271,6 +271,18 @@ CRYPTO_memcmp: xor %r10,%r10 cmp \$0,$arg3 je .Lno_data + cmp \$16,$arg3 + jne .Loop_cmp + mov ($arg1),%r10 + mov 8($arg1),%r11 + mov \$1,$arg3 + xor ($arg2),%r10 + xor 8($arg2),%r11 + or %r11,%r10 + cmovnz $arg3,%rax + ret + +.align 16 .Loop_cmp: mov ($arg1),%r10b lea 1($arg1),$arg1 -- 2.34.1