Add assembly CRYPTO_memcmp.
authorAndy Polyakov <appro@openssl.org>
Sun, 15 May 2016 15:01:15 +0000 (17:01 +0200)
committerAndy Polyakov <appro@openssl.org>
Thu, 19 May 2016 20:33:00 +0000 (22:33 +0200)
GH: #102

Reviewed-by: Richard Levitte <levitte@openssl.org>
12 files changed:
crypto/alphacpuid.pl
crypto/arm64cpuid.pl
crypto/armv4cpuid.pl
crypto/c64xpluscpuid.pl
crypto/cryptlib.c
crypto/ia64cpuid.S
crypto/pariscid.pl
crypto/ppccpuid.pl
crypto/s390xcpuid.S
crypto/sparccpuid.S
crypto/x86_64cpuid.pl
crypto/x86cpuid.pl

index 6356b00f94e4a0a4b3a3edd57185db76003048b3..6c7fd4c9dd3dd1dbd19a9c93141f44e6e6295716 100644 (file)
@@ -134,6 +134,34 @@ OPENSSL_cleanse:
        bne     $17,.Little
 .Ldone: ret    ($26)
 .end   OPENSSL_cleanse
        bne     $17,.Little
 .Ldone: ret    ($26)
 .end   OPENSSL_cleanse
+
+.globl CRYPTO_memcmp
+.ent   CRYPTO_memcmp
+CRYPTO_memcmp:
+       .frame  $30,0,$26
+       .prologue 0
+       xor     $0,$0,$0
+       beq     $18,.Lno_data
+
+       xor     $1,$1,$1
+       nop
+.Loop_cmp:
+       ldq_u   $2,0($16)
+       subq    $18,1,$18
+       ldq_u   $3,0($17)
+       extbl   $2,$16,$2
+       lda     $16,1($16)
+       extbl   $3,$17,$3
+       lda     $17,1($17)
+       xor     $3,$2,$2
+       or      $2,$0,$0
+       bne     $18,.Loop_cmp
+
+       subq    $31,$0,$0
+       srl     $0,63,$0
+.Lno_data:
+       ret     ($26)
+.end   CRYPTO_memcmp
 ___
 {
 my ($out,$cnt,$max)=("\$16","\$17","\$18");
 ___
 {
 my ($out,$cnt,$max)=("\$16","\$17","\$18");
index c0af608142b972144283c1c2270ca23f057a714f..caa33875c93739e9f9645f1350078bb231bfeb69 100755 (executable)
@@ -100,6 +100,26 @@ OPENSSL_cleanse:
        cbnz    x1,.Little      // len!=0?
        ret
 .size  OPENSSL_cleanse,.-OPENSSL_cleanse
        cbnz    x1,.Little      // len!=0?
        ret
 .size  OPENSSL_cleanse,.-OPENSSL_cleanse
+
+.globl CRYPTO_memcmp
+.type  CRYPTO_memcmp,%function
+.align 4
+CRYPTO_memcmp:
+       eor     w3,w3,w3
+       cbz     x2,.Lno_data    // len==0?
+.Loop_cmp:
+       ldrb    w4,[x0],#1
+       ldrb    w5,[x1],#1
+       eor     w4,w4,w5
+       orr     w3,w3,w4
+       subs    x2,x2,#1
+       b.ne    .Loop_cmp
+
+.Lno_data:
+       neg     w0,w3
+       lsr     w0,w0,#31
+       ret
+.size  CRYPTO_memcmp,.-CRYPTO_memcmp
 ___
 
 print $code;
 ___
 
 print $code;
index 2c02f08c74cc6a77eb6d90ca1664b9988073a4f7..33c893d0e460f7e901240f68456712fbb968ecbd 100644 (file)
@@ -105,6 +105,36 @@ OPENSSL_cleanse:
 #endif
 .size  OPENSSL_cleanse,.-OPENSSL_cleanse
 
 #endif
 .size  OPENSSL_cleanse,.-OPENSSL_cleanse
 
+.global        CRYPTO_memcmp
+.type  CRYPTO_memcmp,%function
+.align 4
+CRYPTO_memcmp:
+       eor     ip,ip,ip
+       cmp     r2,#0
+       beq     .Lno_data
+       stmdb   sp!,{r4,r5}
+
+.Loop_cmp:
+       ldrb    r4,[r0],#1
+       ldrb    r5,[r1],#1
+       eor     r4,r4,r5
+       orr     ip,ip,r4
+       subs    r2,r2,#1
+       bne     .Loop_cmp
+
+       ldmia   sp!,{r4,r5}
+.Lno_data:
+       neg     r0,ip
+       mov     r0,r0,lsr#31
+#if __ARM_ARCH__>=5
+       bx      lr
+#else
+       tst     lr,#1
+       moveq   pc,lr
+       .word   0xe12fff1e      @ bx    lr
+#endif
+.size  CRYPTO_memcmp,.-CRYPTO_memcmp
+
 #if __ARM_MAX_ARCH__>=7
 .arch  armv7-a
 .fpu   neon
 #if __ARM_MAX_ARCH__>=7
 .arch  armv7-a
 .fpu   neon
index 3dcd691129ecbae5ee8847e902d4af6c979b5919..9efe1205fff4b5146fe0769fb19cd70903f6a704 100644 (file)
@@ -18,6 +18,7 @@ $code.=<<___;
        .if     __TI_EABI__
        .asg    OPENSSL_rdtsc,_OPENSSL_rdtsc
        .asg    OPENSSL_cleanse,_OPENSSL_cleanse
        .if     __TI_EABI__
        .asg    OPENSSL_rdtsc,_OPENSSL_rdtsc
        .asg    OPENSSL_cleanse,_OPENSSL_cleanse
+       .asg    CRYPTO_memcmp,_CRYPTO_memcmp
        .asg    OPENSSL_atomic_add,_OPENSSL_atomic_add
        .asg    OPENSSL_wipe_cpu,_OPENSSL_wipe_cpu
        .asg    OPENSSL_instrument_bus,_OPENSSL_instrument_bus
        .asg    OPENSSL_atomic_add,_OPENSSL_atomic_add
        .asg    OPENSSL_wipe_cpu,_OPENSSL_wipe_cpu
        .asg    OPENSSL_instrument_bus,_OPENSSL_instrument_bus
@@ -87,6 +88,29 @@ _OPENSSL_cleanse:
    [A1]        STB     A2,*A4++[2]
        .endasmfunc
 
    [A1]        STB     A2,*A4++[2]
        .endasmfunc
 
+       .global _CRYPTO_memcmp
+_CRYPTO_memcmp:
+       .asmfunc
+       MV      A6,B0
+  [!B0]        BNOP    RA
+||[!B0]        ZERO    A4
+   [B0]        MVC     B0,ILC
+|| [B0]        ZERO    A0
+       NOP     4
+
+       SPLOOP  1
+       LDBU    *A4++,A1
+||     LDBU    *B4++,B1
+       NOP     4
+       XOR.L   B1,A1,A2
+       SPKERNEL 1,0
+||     OR.S    A2,A0,A0
+
+       BNOP    RA,3
+       ZERO.L  A4
+  [A0] MVK     1,A4
+       .endasmfunc
+
        .global _OPENSSL_atomic_add
 _OPENSSL_atomic_add:
        .asmfunc
        .global _OPENSSL_atomic_add
 _OPENSSL_atomic_add:
        .asmfunc
index 92e323c6094a5529a29d057f6ec5bab1f8efa952..8e189eca64fea5b386649fb44dcf328a313bbd45 100644 (file)
@@ -316,6 +316,7 @@ void OPENSSL_die(const char *message, const char *file, int line)
 #endif
 }
 
 #endif
 }
 
+#if !defined(OPENSSL_CPUID_OBJ)
 /* volatile unsigned char* pointers are there because
  * 1. Accessing a variable declared volatile via a pointer
  *    that lacks a volatile qualifier causes undefined behavior.
 /* volatile unsigned char* pointers are there because
  * 1. Accessing a variable declared volatile via a pointer
  *    that lacks a volatile qualifier causes undefined behavior.
@@ -347,3 +348,4 @@ int CRYPTO_memcmp(const volatile void * volatile in_a,
 
     return x;
 }
 
     return x;
 }
+#endif
index bf5abc3be3796816dc1379f73a8f09217e0ee004..f942648bae35f77faa5488a6be7d0e78d5fde305 100644 (file)
@@ -2,6 +2,12 @@
 // On Win64i compile with ias.exe.
 .text
 
 // On Win64i compile with ias.exe.
 .text
 
+#if defined(_HPUX_SOURCE) && !defined(_LP64)
+#define        ADDP    addp4
+#else
+#define        ADDP    add
+#endif
+
 .global        OPENSSL_cpuid_setup#
 .proc  OPENSSL_cpuid_setup#
 OPENSSL_cpuid_setup:
 .global        OPENSSL_cpuid_setup#
 .proc  OPENSSL_cpuid_setup#
 OPENSSL_cpuid_setup:
@@ -131,9 +137,7 @@ OPENSSL_wipe_cpu:
 .proc  OPENSSL_cleanse#
 OPENSSL_cleanse:
 { .mib;        cmp.eq          p6,p0=0,r33         // len==0
 .proc  OPENSSL_cleanse#
 OPENSSL_cleanse:
 { .mib;        cmp.eq          p6,p0=0,r33         // len==0
-#if defined(_HPUX_SOURCE) && !defined(_LP64)
-       addp4           r32=0,r32
-#endif
+       ADDP            r32=0,r32
 (p6)   br.ret.spnt     b0              };;
 { .mib;        and             r2=7,r32
        cmp.leu         p6,p0=15,r33        // len>=15
 (p6)   br.ret.spnt     b0              };;
 { .mib;        and             r2=7,r32
        cmp.leu         p6,p0=15,r33        // len>=15
@@ -166,14 +170,51 @@ OPENSSL_cleanse:
 (p6)   br.ret.sptk.many        b0      };;
 .endp  OPENSSL_cleanse#
 
 (p6)   br.ret.sptk.many        b0      };;
 .endp  OPENSSL_cleanse#
 
+.global        CRYPTO_memcmp#
+.proc  CRYPTO_memcmp#
+.align 32
+.skip  16
+CRYPTO_memcmp:
+       .prologue
+{ .mib;        mov             r8=0
+       cmp.eq          p6,p0=0,r34         // len==0?
+(p6)   br.ret.spnt     b0              };;
+       .save           ar.pfs,r2
+{ .mib;        alloc           r2=ar.pfs,3,5,0,8
+       .save           ar.lc,r3
+       mov             r3=ar.lc
+       brp.loop.imp    .Loop_cmp_ctop,.Loop_cmp_cend-16
+                                       }
+{ .mib;        sub             r10=r34,r0,1
+       .save           pr,r9
+       mov             r9=pr           };;
+{ .mii;        ADDP            r16=0,r32
+       mov             ar.lc=r10
+       mov             ar.ec=4         }
+{ .mib;        ADDP            r17=0,r33
+       mov             pr.rot=1<<16    };;
+
+.Loop_cmp_ctop:
+{ .mib;        (p16)   ld1     r32=[r16],1
+       (p18)   xor     r34=r34,r38     }
+{ .mib;        (p16)   ld1     r36=[r17],1
+       (p19)   or      r8=r8,r35
+       br.ctop.sptk    .Loop_cmp_ctop  };;
+.Loop_cmp_cend:
+
+{ .mib;        cmp.ne          p6,p0=0,r8
+       mov             ar.lc=r3        };;
+{ .mib;
+(p6)   mov             r8=1
+       mov             pr=r9,0x1ffff
+       br.ret.sptk.many        b0      };;
+.endp  CRYPTO_memcmp#
+
 .global        OPENSSL_instrument_bus#
 .proc  OPENSSL_instrument_bus#
 OPENSSL_instrument_bus:
 { .mmi;        mov             r2=r33
 .global        OPENSSL_instrument_bus#
 .proc  OPENSSL_instrument_bus#
 OPENSSL_instrument_bus:
 { .mmi;        mov             r2=r33
-#if defined(_HPUX_SOURCE) && !defined(_LP64)
-       addp4           r32=0,r32
-#endif
-                                       }
+       ADDP            r32=0,r32       }
 { .mmi;        mov             r8=ar.itc;;
        mov             r10=r0
        mov             r9=r8           };;
 { .mmi;        mov             r8=ar.itc;;
        mov             r10=r0
        mov             r9=r8           };;
@@ -208,10 +249,7 @@ OPENSSL_instrument_bus:
 .proc  OPENSSL_instrument_bus2#
 OPENSSL_instrument_bus2:
 { .mmi;        mov             r2=r33                  // put aside cnt
 .proc  OPENSSL_instrument_bus2#
 OPENSSL_instrument_bus2:
 { .mmi;        mov             r2=r33                  // put aside cnt
-#if defined(_HPUX_SOURCE) && !defined(_LP64)
-       addp4           r32=0,r32
-#endif
-                                       }
+       ADDP            r32=0,r32       }
 { .mmi;        mov             r8=ar.itc;;
        mov             r10=r0
        mov             r9=r8           };;
 { .mmi;        mov             r8=ar.itc;;
        mov             r10=r0
        mov             r9=r8           };;
index 03895d5cf355663351cb6a162edbba0ddd1434c4..f82e27ac4c34e8bebd75245f9f097382aecb6aa8 100644 (file)
@@ -138,6 +138,37 @@ L\$done
 ___
 }
 {
 ___
 }
 {
+my ($in1,$in2,$len)=("%r26","%r25","%r24");
+
+$code.=<<___;
+       .EXPORT CRYPTO_memcmp,ENTRY,ARGW0=GR,ARGW1=GR,ARGW1=GR
+       .ALIGN  8
+CRYPTO_memcmp
+       .PROC
+       .CALLINFO       NO_CALLS
+       .ENTRY
+       cmpib,*=        0,$len,L\$no_data
+       xor             $rv,$rv,$rv
+
+L\$oop_cmp
+       ldb             0($in1),%r19
+       ldb             0($in2),%r20
+       ldo             1($in1),$in1
+       ldo             1($in2),$in2
+       xor             %r19,%r20,%r29
+       addib,*<>       -1,$len,L\$oop_cmp
+       or              %r29,$rv,$rv
+
+       sub             %r0,$rv,%r29
+       extru           %r29,31,1,$rv
+L\$no_data
+       bv              ($rp)
+       .EXIT
+       nop
+       .PROCEND
+___
+}
+{
 my ($out,$cnt,$max)=("%r26","%r25","%r24");
 my ($tick,$lasttick)=("%r23","%r22");
 my ($diff,$lastdiff)=("%r21","%r20");
 my ($out,$cnt,$max)=("%r26","%r25","%r24");
 my ($tick,$lasttick)=("%r23","%r22");
 my ($diff,$lastdiff)=("%r21","%r20");
index 8f603e972f1275ec18f0371d3644a2420db74a17..9d1cada4dc4c9177552f1abdd2bd4151c1e73296 100755 (executable)
@@ -177,6 +177,32 @@ Laligned:
        .byte   0,12,0x14,0,0,0,2,0
        .long   0
 .size  .OPENSSL_cleanse,.-.OPENSSL_cleanse
        .byte   0,12,0x14,0,0,0,2,0
        .long   0
 .size  .OPENSSL_cleanse,.-.OPENSSL_cleanse
+
+globl  .CRYPTO_memcmp
+.align 4
+.CRYPTO_memcmp:
+       $CMPLI  r5,0
+       li      r0,0
+       beq     Lno_data
+       mtctr   r5
+Loop_cmp:
+       lbz     r6,0(r3)
+       addi    r3,r3,1
+       lbz     r7,0(r4)
+       addi    r4,r4,1
+       xor     r6,r6,r7
+       or      r0,r0,r6
+       bdnz    Loop_cmp
+
+Lno_data:
+       li      r3,0
+       sub     r3,r3,r0
+       extrwi  r3,r3,1,0
+       blr
+       .long   0
+       .byte   0,12,0x14,0,0,0,3,0
+       .long   0
+.size  .CRYPTO_memcmp,.-.CRYPTO_memcmp
 ___
 {
 my ($out,$cnt,$max)=("r3","r4","r5");
 ___
 {
 my ($out,$cnt,$max)=("r3","r4","r5");
index 3efad5506bed687c0b9b9447e8a3b4d9bd544488..df7b35ad7039468369b2efff3085002e993ae522 100644 (file)
@@ -125,6 +125,33 @@ OPENSSL_cleanse:
        br      %r14
 .size  OPENSSL_cleanse,.-OPENSSL_cleanse
 
        br      %r14
 .size  OPENSSL_cleanse,.-OPENSSL_cleanse
 
+.globl CRYPTO_memcmp
+.type  CRYPTO_memcmp,@function
+.align 16
+CRYPTO_memcmp:
+#if !defined(__s390x__) && !defined(__s390x)
+       llgfr   %r4,%r4
+#endif
+       lghi    %r5,0
+       clgr    %r4,%r5
+       je      .Lno_data
+
+.Loop_cmp:
+       llc     %r0,0(%r2)
+       la      %r2,1(%r2)
+       llc     %r1,0(%r3)
+       la      %r3,1(%r3)
+       xr      %r1,%r0
+       or      %r5,%r1
+       brctg   %r4,.Loop_cmp
+
+       lnr     %r5,%r5
+       srl     %r5,31
+.Lno_data:
+       lgr     %r2,%r5
+       br      %r14
+.size  CRYPTO_memcmp,.-CRYPTO_memcmp
+
 .globl OPENSSL_instrument_bus
 .type  OPENSSL_instrument_bus,@function
 .align 16
 .globl OPENSSL_instrument_bus
 .type  OPENSSL_instrument_bus,@function
 .align 16
index 6f1dded8e2166b442206ecb1e0e13bf825f28003..f48d860159e51d78e5ba3c698d1a510a17867a13 100644 (file)
@@ -440,6 +440,40 @@ OPENSSL_cleanse:
 .type  OPENSSL_cleanse,#function
 .size  OPENSSL_cleanse,.-OPENSSL_cleanse
 
 .type  OPENSSL_cleanse,#function
 .size  OPENSSL_cleanse,.-OPENSSL_cleanse
 
+.global        CRYPTO_memcmp
+.align 16
+CRYPTO_memcmp:
+       cmp     %o2,0
+#ifdef ABI64
+       beq,pn  %xcc,.Lno_data
+#else
+       beq     .Lno_data
+#endif
+       xor     %g1,%g1,%g1
+       nop
+
+.Loop_cmp:
+       ldub    [%o0],%o3
+       add     %o0,1,%o0
+       ldub    [%o1],%o4
+       add     %o1,1,%o1
+       subcc   %o2,1,%o2
+       xor     %o3,%o4,%o4
+#ifdef ABI64
+       bnz     %xcc,.Loop_cmp
+#else
+       bnz     .Loop_cmp
+#endif
+       or      %o4,%g1,%g1
+
+       sub     %g0,%g1,%g1
+       srl     %g1,31,%g1
+.Lno_data:
+       retl
+       mov     %g1,%o0
+.type  CRYPTO_memcmp,#function
+.size  CRYPTO_memcmp,.-CRYPTO_memcmp
+
 .global        _sparcv9_vis1_instrument_bus
 .align 8
 _sparcv9_vis1_instrument_bus:
 .global        _sparcv9_vis1_instrument_bus
 .align 8
 _sparcv9_vis1_instrument_bus:
index 450550bfb5f610e19518b21cd831e2af0181712b..4946688d922e9f2572d53f929840900897be457d 100644 (file)
@@ -224,6 +224,28 @@ OPENSSL_cleanse:
        jne     .Little
        ret
 .size  OPENSSL_cleanse,.-OPENSSL_cleanse
        jne     .Little
        ret
 .size  OPENSSL_cleanse,.-OPENSSL_cleanse
+
+.globl  CRYPTO_memcmp
+.type   CRYPTO_memcmp,\@abi-omnipotent
+.align  16
+CRYPTO_memcmp:
+       xor     %rax,%rax
+       xor     %r10,%r10
+       cmp     \$0,$arg3
+       je      .Lno_data
+.Loop_cmp:
+       mov     ($arg1),%r10b
+       lea     1($arg1),$arg1
+       xor     ($arg2),%r10b
+       lea     1($arg2),$arg2
+       or      %r10b,%al
+       dec     $arg3
+       jnz     .Loop_cmp
+       neg     %rax
+       shr     \$63,%rax
+.Lno_data:
+       ret
+.size  CRYPTO_memcmp,.-CRYPTO_memcmp
 ___
 
 print<<___ if (!$win64);
 ___
 
 print<<___ if (!$win64);
index 2b110ba896d590bbb18fc64a65180a6804c052a2..99ffa1d2fbbd44cef024a9bac437978f803b0870 100644 (file)
@@ -365,6 +365,31 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
        &ret    ();
 &function_end_B("OPENSSL_cleanse");
 
        &ret    ();
 &function_end_B("OPENSSL_cleanse");
 
+&function_begin_B("CRYPTO_memcmp");
+       &push   ("esi");
+       &push   ("edi");
+       &mov    ("esi",&wparam(0));
+       &mov    ("edi",&wparam(1));
+       &mov    ("ecx",&wparam(2));
+       &xor    ("eax","eax");
+       &xor    ("edx","edx");
+       &cmp    ("ecx",0);
+       &je     (&label("no_data"));
+&set_label("loop");
+       &mov    ("dl",&BP(0,"esi"));
+       &lea    ("esi",&DWP(1,"esi"));
+       &xor    ("dl",&BP(0,"edi"));
+       &lea    ("edi",&DWP(1,"edi"));
+       &or     ("al","dl");
+       &dec    ("ecx");
+       &jnz    (&label("loop"));
+       &neg    ("eax");
+       &shr    ("eax",31);
+&set_label("no_data");
+       &pop    ("edi");
+       &pop    ("esi");
+       &ret    ();
+&function_end_B("CRYPTO_memcmp");
 {
 my $lasttick = "esi";
 my $lastdiff = "ebx";
 {
 my $lasttick = "esi";
 my $lastdiff = "ebx";