From: Andy Polyakov Date: Sun, 15 May 2016 15:01:15 +0000 (+0200) Subject: Add assembly CRYPTO_memcmp. X-Git-Tag: OpenSSL_1_1_0-pre6~763 X-Git-Url: https://git.openssl.org/?p=openssl.git;a=commitdiff_plain;h=e33826f01bd78af76e0135c8dfab3387927a82bb Add assembly CRYPTO_memcmp. GH: #102 Reviewed-by: Richard Levitte --- diff --git a/crypto/alphacpuid.pl b/crypto/alphacpuid.pl index 6356b00f94..6c7fd4c9dd 100644 --- a/crypto/alphacpuid.pl +++ b/crypto/alphacpuid.pl @@ -134,6 +134,34 @@ OPENSSL_cleanse: bne $17,.Little .Ldone: ret ($26) .end OPENSSL_cleanse + +.globl CRYPTO_memcmp +.ent CRYPTO_memcmp +CRYPTO_memcmp: + .frame $30,0,$26 + .prologue 0 + xor $0,$0,$0 + beq $18,.Lno_data + + xor $1,$1,$1 + nop +.Loop_cmp: + ldq_u $2,0($16) + subq $18,1,$18 + ldq_u $3,0($17) + extbl $2,$16,$2 + lda $16,1($16) + extbl $3,$17,$3 + lda $17,1($17) + xor $3,$2,$2 + or $2,$0,$0 + bne $18,.Loop_cmp + + subq $31,$0,$0 + srl $0,63,$0 +.Lno_data: + ret ($26) +.end CRYPTO_memcmp ___ { my ($out,$cnt,$max)=("\$16","\$17","\$18"); diff --git a/crypto/arm64cpuid.pl b/crypto/arm64cpuid.pl index c0af608142..caa33875c9 100755 --- a/crypto/arm64cpuid.pl +++ b/crypto/arm64cpuid.pl @@ -100,6 +100,26 @@ OPENSSL_cleanse: cbnz x1,.Little // len!=0? ret .size OPENSSL_cleanse,.-OPENSSL_cleanse + +.globl CRYPTO_memcmp +.type CRYPTO_memcmp,%function +.align 4 +CRYPTO_memcmp: + eor w3,w3,w3 + cbz x2,.Lno_data // len==0? +.Loop_cmp: + ldrb w4,[x0],#1 + ldrb w5,[x1],#1 + eor w4,w4,w5 + orr w3,w3,w4 + subs x2,x2,#1 + b.ne .Loop_cmp + +.Lno_data: + neg w0,w3 + lsr w0,w0,#31 + ret +.size CRYPTO_memcmp,.-CRYPTO_memcmp ___ print $code; diff --git a/crypto/armv4cpuid.pl b/crypto/armv4cpuid.pl index 2c02f08c74..33c893d0e4 100644 --- a/crypto/armv4cpuid.pl +++ b/crypto/armv4cpuid.pl @@ -105,6 +105,36 @@ OPENSSL_cleanse: #endif .size OPENSSL_cleanse,.-OPENSSL_cleanse +.global CRYPTO_memcmp +.type CRYPTO_memcmp,%function +.align 4 +CRYPTO_memcmp: + eor ip,ip,ip + cmp r2,#0 + beq .Lno_data + stmdb sp!,{r4,r5} + +.Loop_cmp: + ldrb r4,[r0],#1 + ldrb r5,[r1],#1 + eor r4,r4,r5 + orr ip,ip,r4 + subs r2,r2,#1 + bne .Loop_cmp + + ldmia sp!,{r4,r5} +.Lno_data: + neg r0,ip + mov r0,r0,lsr#31 +#if __ARM_ARCH__>=5 + bx lr +#else + tst lr,#1 + moveq pc,lr + .word 0xe12fff1e @ bx lr +#endif +.size CRYPTO_memcmp,.-CRYPTO_memcmp + #if __ARM_MAX_ARCH__>=7 .arch armv7-a .fpu neon diff --git a/crypto/c64xpluscpuid.pl b/crypto/c64xpluscpuid.pl index 3dcd691129..9efe1205ff 100644 --- a/crypto/c64xpluscpuid.pl +++ b/crypto/c64xpluscpuid.pl @@ -18,6 +18,7 @@ $code.=<<___; .if __TI_EABI__ .asg OPENSSL_rdtsc,_OPENSSL_rdtsc .asg OPENSSL_cleanse,_OPENSSL_cleanse + .asg CRYPTO_memcmp,_CRYPTO_memcmp .asg OPENSSL_atomic_add,_OPENSSL_atomic_add .asg OPENSSL_wipe_cpu,_OPENSSL_wipe_cpu .asg OPENSSL_instrument_bus,_OPENSSL_instrument_bus @@ -87,6 +88,29 @@ _OPENSSL_cleanse: [A1] STB A2,*A4++[2] .endasmfunc + .global _CRYPTO_memcmp +_CRYPTO_memcmp: + .asmfunc + MV A6,B0 + [!B0] BNOP RA +||[!B0] ZERO A4 + [B0] MVC B0,ILC +|| [B0] ZERO A0 + NOP 4 + + SPLOOP 1 + LDBU *A4++,A1 +|| LDBU *B4++,B1 + NOP 4 + XOR.L B1,A1,A2 + SPKERNEL 1,0 +|| OR.S A2,A0,A0 + + BNOP RA,3 + ZERO.L A4 + [A0] MVK 1,A4 + .endasmfunc + .global _OPENSSL_atomic_add _OPENSSL_atomic_add: .asmfunc diff --git a/crypto/cryptlib.c b/crypto/cryptlib.c index 92e323c609..8e189eca64 100644 --- a/crypto/cryptlib.c +++ b/crypto/cryptlib.c @@ -316,6 +316,7 @@ void OPENSSL_die(const char *message, const char *file, int line) #endif } +#if !defined(OPENSSL_CPUID_OBJ) /* volatile unsigned char* pointers are there because * 1. Accessing a variable declared volatile via a pointer * that lacks a volatile qualifier causes undefined behavior. @@ -347,3 +348,4 @@ int CRYPTO_memcmp(const volatile void * volatile in_a, return x; } +#endif diff --git a/crypto/ia64cpuid.S b/crypto/ia64cpuid.S index bf5abc3be3..f942648bae 100644 --- a/crypto/ia64cpuid.S +++ b/crypto/ia64cpuid.S @@ -2,6 +2,12 @@ // On Win64i compile with ias.exe. .text +#if defined(_HPUX_SOURCE) && !defined(_LP64) +#define ADDP addp4 +#else +#define ADDP add +#endif + .global OPENSSL_cpuid_setup# .proc OPENSSL_cpuid_setup# OPENSSL_cpuid_setup: @@ -131,9 +137,7 @@ OPENSSL_wipe_cpu: .proc OPENSSL_cleanse# OPENSSL_cleanse: { .mib; cmp.eq p6,p0=0,r33 // len==0 -#if defined(_HPUX_SOURCE) && !defined(_LP64) - addp4 r32=0,r32 -#endif + ADDP r32=0,r32 (p6) br.ret.spnt b0 };; { .mib; and r2=7,r32 cmp.leu p6,p0=15,r33 // len>=15 @@ -166,14 +170,51 @@ OPENSSL_cleanse: (p6) br.ret.sptk.many b0 };; .endp OPENSSL_cleanse# +.global CRYPTO_memcmp# +.proc CRYPTO_memcmp# +.align 32 +.skip 16 +CRYPTO_memcmp: + .prologue +{ .mib; mov r8=0 + cmp.eq p6,p0=0,r34 // len==0? +(p6) br.ret.spnt b0 };; + .save ar.pfs,r2 +{ .mib; alloc r2=ar.pfs,3,5,0,8 + .save ar.lc,r3 + mov r3=ar.lc + brp.loop.imp .Loop_cmp_ctop,.Loop_cmp_cend-16 + } +{ .mib; sub r10=r34,r0,1 + .save pr,r9 + mov r9=pr };; +{ .mii; ADDP r16=0,r32 + mov ar.lc=r10 + mov ar.ec=4 } +{ .mib; ADDP r17=0,r33 + mov pr.rot=1<<16 };; + +.Loop_cmp_ctop: +{ .mib; (p16) ld1 r32=[r16],1 + (p18) xor r34=r34,r38 } +{ .mib; (p16) ld1 r36=[r17],1 + (p19) or r8=r8,r35 + br.ctop.sptk .Loop_cmp_ctop };; +.Loop_cmp_cend: + +{ .mib; cmp.ne p6,p0=0,r8 + mov ar.lc=r3 };; +{ .mib; +(p6) mov r8=1 + mov pr=r9,0x1ffff + br.ret.sptk.many b0 };; +.endp CRYPTO_memcmp# + .global OPENSSL_instrument_bus# .proc OPENSSL_instrument_bus# OPENSSL_instrument_bus: { .mmi; mov r2=r33 -#if defined(_HPUX_SOURCE) && !defined(_LP64) - addp4 r32=0,r32 -#endif - } + ADDP r32=0,r32 } { .mmi; mov r8=ar.itc;; mov r10=r0 mov r9=r8 };; @@ -208,10 +249,7 @@ OPENSSL_instrument_bus: .proc OPENSSL_instrument_bus2# OPENSSL_instrument_bus2: { .mmi; mov r2=r33 // put aside cnt -#if defined(_HPUX_SOURCE) && !defined(_LP64) - addp4 r32=0,r32 -#endif - } + ADDP r32=0,r32 } { .mmi; mov r8=ar.itc;; mov r10=r0 mov r9=r8 };; diff --git a/crypto/pariscid.pl b/crypto/pariscid.pl index 03895d5cf3..f82e27ac4c 100644 --- a/crypto/pariscid.pl +++ b/crypto/pariscid.pl @@ -138,6 +138,37 @@ L\$done ___ } { +my ($in1,$in2,$len)=("%r26","%r25","%r24"); + +$code.=<<___; + .EXPORT CRYPTO_memcmp,ENTRY,ARGW0=GR,ARGW1=GR,ARGW1=GR + .ALIGN 8 +CRYPTO_memcmp + .PROC + .CALLINFO NO_CALLS + .ENTRY + cmpib,*= 0,$len,L\$no_data + xor $rv,$rv,$rv + +L\$oop_cmp + ldb 0($in1),%r19 + ldb 0($in2),%r20 + ldo 1($in1),$in1 + ldo 1($in2),$in2 + xor %r19,%r20,%r29 + addib,*<> -1,$len,L\$oop_cmp + or %r29,$rv,$rv + + sub %r0,$rv,%r29 + extru %r29,31,1,$rv +L\$no_data + bv ($rp) + .EXIT + nop + .PROCEND +___ +} +{ my ($out,$cnt,$max)=("%r26","%r25","%r24"); my ($tick,$lasttick)=("%r23","%r22"); my ($diff,$lastdiff)=("%r21","%r20"); diff --git a/crypto/ppccpuid.pl b/crypto/ppccpuid.pl index 8f603e972f..9d1cada4dc 100755 --- a/crypto/ppccpuid.pl +++ b/crypto/ppccpuid.pl @@ -177,6 +177,32 @@ Laligned: .byte 0,12,0x14,0,0,0,2,0 .long 0 .size .OPENSSL_cleanse,.-.OPENSSL_cleanse + +globl .CRYPTO_memcmp +.align 4 +.CRYPTO_memcmp: + $CMPLI r5,0 + li r0,0 + beq Lno_data + mtctr r5 +Loop_cmp: + lbz r6,0(r3) + addi r3,r3,1 + lbz r7,0(r4) + addi r4,r4,1 + xor r6,r6,r7 + or r0,r0,r6 + bdnz Loop_cmp + +Lno_data: + li r3,0 + sub r3,r3,r0 + extrwi r3,r3,1,0 + blr + .long 0 + .byte 0,12,0x14,0,0,0,3,0 + .long 0 +.size .CRYPTO_memcmp,.-.CRYPTO_memcmp ___ { my ($out,$cnt,$max)=("r3","r4","r5"); diff --git a/crypto/s390xcpuid.S b/crypto/s390xcpuid.S index 3efad5506b..df7b35ad70 100644 --- a/crypto/s390xcpuid.S +++ b/crypto/s390xcpuid.S @@ -125,6 +125,33 @@ OPENSSL_cleanse: br %r14 .size OPENSSL_cleanse,.-OPENSSL_cleanse +.globl CRYPTO_memcmp +.type CRYPTO_memcmp,@function +.align 16 +CRYPTO_memcmp: +#if !defined(__s390x__) && !defined(__s390x) + llgfr %r4,%r4 +#endif + lghi %r5,0 + clgr %r4,%r5 + je .Lno_data + +.Loop_cmp: + llc %r0,0(%r2) + la %r2,1(%r2) + llc %r1,0(%r3) + la %r3,1(%r3) + xr %r1,%r0 + or %r5,%r1 + brctg %r4,.Loop_cmp + + lnr %r5,%r5 + srl %r5,31 +.Lno_data: + lgr %r2,%r5 + br %r14 +.size CRYPTO_memcmp,.-CRYPTO_memcmp + .globl OPENSSL_instrument_bus .type OPENSSL_instrument_bus,@function .align 16 diff --git a/crypto/sparccpuid.S b/crypto/sparccpuid.S index 6f1dded8e2..f48d860159 100644 --- a/crypto/sparccpuid.S +++ b/crypto/sparccpuid.S @@ -440,6 +440,40 @@ OPENSSL_cleanse: .type OPENSSL_cleanse,#function .size OPENSSL_cleanse,.-OPENSSL_cleanse +.global CRYPTO_memcmp +.align 16 +CRYPTO_memcmp: + cmp %o2,0 +#ifdef ABI64 + beq,pn %xcc,.Lno_data +#else + beq .Lno_data +#endif + xor %g1,%g1,%g1 + nop + +.Loop_cmp: + ldub [%o0],%o3 + add %o0,1,%o0 + ldub [%o1],%o4 + add %o1,1,%o1 + subcc %o2,1,%o2 + xor %o3,%o4,%o4 +#ifdef ABI64 + bnz %xcc,.Loop_cmp +#else + bnz .Loop_cmp +#endif + or %o4,%g1,%g1 + + sub %g0,%g1,%g1 + srl %g1,31,%g1 +.Lno_data: + retl + mov %g1,%o0 +.type CRYPTO_memcmp,#function +.size CRYPTO_memcmp,.-CRYPTO_memcmp + .global _sparcv9_vis1_instrument_bus .align 8 _sparcv9_vis1_instrument_bus: diff --git a/crypto/x86_64cpuid.pl b/crypto/x86_64cpuid.pl index 450550bfb5..4946688d92 100644 --- a/crypto/x86_64cpuid.pl +++ b/crypto/x86_64cpuid.pl @@ -224,6 +224,28 @@ OPENSSL_cleanse: jne .Little ret .size OPENSSL_cleanse,.-OPENSSL_cleanse + +.globl CRYPTO_memcmp +.type CRYPTO_memcmp,\@abi-omnipotent +.align 16 +CRYPTO_memcmp: + xor %rax,%rax + xor %r10,%r10 + cmp \$0,$arg3 + je .Lno_data +.Loop_cmp: + mov ($arg1),%r10b + lea 1($arg1),$arg1 + xor ($arg2),%r10b + lea 1($arg2),$arg2 + or %r10b,%al + dec $arg3 + jnz .Loop_cmp + neg %rax + shr \$63,%rax +.Lno_data: + ret +.size CRYPTO_memcmp,.-CRYPTO_memcmp ___ print<<___ if (!$win64); diff --git a/crypto/x86cpuid.pl b/crypto/x86cpuid.pl index 2b110ba896..99ffa1d2fb 100644 --- a/crypto/x86cpuid.pl +++ b/crypto/x86cpuid.pl @@ -365,6 +365,31 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &ret (); &function_end_B("OPENSSL_cleanse"); +&function_begin_B("CRYPTO_memcmp"); + &push ("esi"); + &push ("edi"); + &mov ("esi",&wparam(0)); + &mov ("edi",&wparam(1)); + &mov ("ecx",&wparam(2)); + &xor ("eax","eax"); + &xor ("edx","edx"); + &cmp ("ecx",0); + &je (&label("no_data")); +&set_label("loop"); + &mov ("dl",&BP(0,"esi")); + &lea ("esi",&DWP(1,"esi")); + &xor ("dl",&BP(0,"edi")); + &lea ("edi",&DWP(1,"edi")); + &or ("al","dl"); + &dec ("ecx"); + &jnz (&label("loop")); + &neg ("eax"); + &shr ("eax",31); +&set_label("no_data"); + &pop ("edi"); + &pop ("esi"); + &ret (); +&function_end_B("CRYPTO_memcmp"); { my $lasttick = "esi"; my $lastdiff = "ebx";