From 5fabb88a7816f19090384e45bb8f2a22c7f290fb Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Sun, 17 Apr 2011 12:46:00 +0000 Subject: [PATCH] Multiple assembler packs: add experimental memory bus instrumentation. --- crypto/alphacpuid.pl | 90 +++++++++++++++++++++++ crypto/ia64cpuid.S | 88 +++++++++++++++++++++- crypto/pariscid.pl | 94 +++++++++++++++++++++++- crypto/ppccpuid.pl | 87 +++++++++++++++++++++- crypto/s390xcpuid.S | 16 ++++ crypto/sparccpuid.S | 96 ++++++++++++++++++++++++ crypto/sparcv9cap.c | 26 ++++++- crypto/x86_64cpuid.pl | 95 +++++++++++++++++++++++- crypto/x86cpuid.pl | 102 ++++++++++++++++++++++++++ doc/crypto/OPENSSL_instrument_bus.pod | 42 +++++++++++ 10 files changed, 724 insertions(+), 12 deletions(-) create mode 100644 doc/crypto/OPENSSL_instrument_bus.pod diff --git a/crypto/alphacpuid.pl b/crypto/alphacpuid.pl index c9474ff497..11f2e30ce0 100644 --- a/crypto/alphacpuid.pl +++ b/crypto/alphacpuid.pl @@ -126,3 +126,93 @@ OPENSSL_cleanse: .Ldone: ret ($26) .end OPENSSL_cleanse ___ +{ +my ($out,$cnt,$max)=("\$16","\$17","\$18"); +my ($tick,$lasttick)=("\$19","\$20"); +my ($diff,$lastdiff)=("\$21","\$22"); +my ($v0,$ra,$sp,$zero)=("\$0","\$26","\$30","\$31"); + +print <<___; +.globl OPENSSL_instrument_bus +.ent OPENSSL_instrument_bus +OPENSSL_instrument_bus: + .frame $sp,0,$ra + .prologue 0 + mov $cnt,$v0 + + rpcc $lasttick + mov 0,$diff + + ecb ($out) + ldl_l $tick,0($out) + addl $diff,$tick,$tick + mov $tick,$diff + stl_c $tick,0($out) + stl $diff,0($out) + +.Loop: rpcc $tick + subq $tick,$lasttick,$diff + mov $tick,$lasttick + + ecb ($out) + ldl_l $tick,0($out) + addl $diff,$tick,$tick + mov $tick,$diff + stl_c $tick,0($out) + stl $diff,0($out) + + subl $cnt,1,$cnt + lda $out,4($out) + bne $cnt,.Loop + + ret ($ra) +.end OPENSSL_instrument_bus + +.globl OPENSSL_instrument_bus2 +.ent OPENSSL_instrument_bus2 +OPENSSL_instrument_bus2: + .frame $sp,0,$ra + .prologue 0 + mov $cnt,$v0 + + rpcc $lasttick + mov 0,$diff + + ecb ($out) + ldl_l $tick,0($out) + addl $diff,$tick,$tick + mov $tick,$diff + stl_c $tick,0($out) + stl $diff,0($out) + + rpcc $tick + subq $tick,$lasttick,$diff + mov $tick,$lasttick + mov $diff,$lastdiff +.Loop2: + ecb ($out) + ldl_l $tick,0($out) + addl $diff,$tick,$tick + mov $tick,$diff + stl_c $tick,0($out) + stl $diff,0($out) + + subl $max,1,$max + beq $max,.Ldone2 + + rpcc $tick + subq $tick,$lasttick,$diff + mov $tick,$lasttick + subq $lastdiff,$diff,$tick + mov $diff,$lastdiff + cmovne $tick,1,$tick + subl $cnt,$tick,$cnt + s4addq $tick,$out,$out + bne $cnt,.Loop2 + +.Ldone2: + subl $v0,$cnt,$v0 + ret ($ra) +.end OPENSSL_instrument_bus2 +___ +} diff --git a/crypto/ia64cpuid.S b/crypto/ia64cpuid.S index d705fff7ee..dd27e163ce 100644 --- a/crypto/ia64cpuid.S +++ b/crypto/ia64cpuid.S @@ -26,7 +26,7 @@ OPENSSL_atomic_add: { .mii; mov ar.ccv=r2 add r8=r2,r33 mov r3=r2 };; -{ .mmi; mf +{ .mmi; mf;; cmpxchg4.acq r2=[r32],r8,ar.ccv nop.i 0 };; { .mib; cmp.ne p6,p0=r2,r3 @@ -165,3 +165,89 @@ OPENSSL_cleanse: (p7) br.cond.dpnt .Little (p6) br.ret.sptk.many b0 };; .endp OPENSSL_cleanse# + +.global OPENSSL_instrument_bus# +.proc OPENSSL_instrument_bus# +OPENSSL_instrument_cache: +{ .mmi; mov r2=r33 +#if defined(_HPUX_SOURCE) && !defined(_LP64) + addp4 r32=0,r32 +#endif + } +{ .mmi; mov r8=ar.itc;; + mov r10=r0 + mov r9=r8 };; + +{ .mmi; fc r32;; + ld4 r8=[r32] };; +{ .mmi; mf + mov ar.ccv=r8 + add r8=r8,r10 };; +{ .mmi; cmpxchg4.acq r3=[r32],r8,ar.ccv + };; +.Loop: +{ .mmi; mov r8=ar.itc;; + sub r10=r8,r9 // diff=tick-lasttick + mov r9=r8 };; // lasttick=tick +{ .mmi; fc r32;; + ld4 r8=[r32] };; +{ .mmi; mf + mov ar.ccv=r8 + add r8=r8,r10 };; +{ .mmi; cmpxchg4.acq r3=[r32],r8,ar.ccv + add r33=-1,r33 + add r32=4,r32 };; +{ .mib; cmp4.ne p6,p0=0,r33 +(p6) br.cond.dptk .Loop };; + +{ .mib; sub r8=r2,r33 + br.ret.sptk.many b0 };; +.endp OPENSSL_instrument_bus# + +.global OPENSSL_instrument_bus2# +.proc OPENSSL_instrument_bus2# +OPENSSL_instrument_cache2: +{ .mmi; mov r2=r33 // put aside cnt +#if defined(_HPUX_SOURCE) && !defined(_LP64) + addp4 r32=0,r32 +#endif + } +{ .mmi; mov r8=ar.itc;; + mov r10=r0 + mov r9=r8 };; + +{ .mmi; fc r32;; + ld4 r8=[r32] };; +{ .mmi; mf + mov ar.ccv=r8 + add r8=r8,r10 };; +{ .mmi; cmpxchg4.acq r3=[r32],r8,ar.ccv + };; + +{ .mmi; mov r8=ar.itc;; + sub r10=r8,r9 + mov r9=r8 };; +.Loop2: +{ .mmi; mov r11=r10 // lastdiff=diff + add r34=-1,r34 };; // --max +{ .mmi; fc r32;; + ld4 r8=[r32] + cmp4.eq p6,p0=0,r34 };; +{ .mmi; mf + mov ar.ccv=r8 + add r8=r8,r10 };; +{ .mmb; cmpxchg4.acq r3=[r32],r8,ar.ccv +(p6) br.cond.spnt .Ldone2 };; + +{ .mmi; mov r8=ar.itc;; + sub r10=r8,r9 // diff=tick-lasttick + mov r9=r8 };; // lasttick=tick +{ .mmi; cmp.ne p6,p0=r10,r11;; // diff!=lastdiff +(p6) add r33=-1,r33 };; // conditional --cnt +{ .mib; cmp4.ne p7,p0=0,r33 +(p6) add r32=4,r32 // conditional ++out +(p7) br.cond.dptk .Loop2 };; +.Ldone2: +{ .mib; sub r8=r2,r33 + br.ret.sptk.many b0 };; +.endp OPENSSL_instrument_bus2# diff --git a/crypto/pariscid.pl b/crypto/pariscid.pl index 1ed5381957..477ec9b87d 100644 --- a/crypto/pariscid.pl +++ b/crypto/pariscid.pl @@ -87,8 +87,8 @@ OPENSSL_wipe_cpu .PROCEND ___ { -$inp="%r26"; -$len="%r25"; +my $inp="%r26"; +my $len="%r25"; $code.=<<___; .EXPORT OPENSSL_cleanse,ENTRY,ARGW0=GR,ARGW1=GR @@ -112,9 +112,9 @@ Lalign Laligned andcm $len,%r1,%r28 -Loop +Lot $ST %r0,0($inp) - addib,*<> -$SIZE_T,%r28,Loop + addib,*<> -$SIZE_T,%r28,Lot ldo $SIZE_T($inp),$inp and,*<> $len,%r1,$len @@ -130,7 +130,93 @@ Ldone .PROCEND ___ } +{ +my ($out,$cnt,$max)=("%r26","%r25","%r24"); +my ($tick,$lasttick)=("%r23","%r22"); +my ($diff,$lastdiff)=("%r21","%r20"); + +$code.=<<___; + .EXPORT OPENSSL_instrument_bus,ENTRY,ARGW0=GR,ARGW1=GR + .ALIGN 8 +OPENSSL_instrument_bus + .PROC + .CALLINFO NO_CALLS + .ENTRY + copy $cnt,$rv + mfctl %cr16,$tick + copy $tick,$lasttick + ldi 0,$diff + + fdc 0($out) + ldw 0($out),$tick + add $diff,$tick,$tick + stw $tick,0($out) +Loop + mfctl %cr16,$tick + sub $tick,$lasttick,$diff + copy $tick,$lasttick + + fdc 0($out) + ldw 0($out),$tick + add $diff,$tick,$tick + stw $tick,0($out) + + addib,<> -1,$cnt,Loop + addi 4,$out,$out + + bv ($rp) + .EXIT + sub $rv,$cnt,$rv + .PROCEND + .EXPORT OPENSSL_instrument_bus2,ENTRY,ARGW0=GR,ARGW1=GR + .ALIGN 8 +OPENSSL_instrument_bus2 + .PROC + .CALLINFO NO_CALLS + .ENTRY + copy $cnt,$rv + sub %r0,$cnt,$cnt + + mfctl %cr16,$tick + copy $tick,$lasttick + ldi 0,$diff + + fdc 0($out) + ldw 0($out),$tick + add $diff,$tick,$tick + stw $tick,0($out) + + mfctl %cr16,$tick + sub $tick,$lasttick,$diff + copy $tick,$lasttick +Loop2 + copy $diff,$lastdiff + fdc 0($out) + ldw 0($out),$tick + add $diff,$tick,$tick + stw $tick,0($out) + + addib,= -1,$max,Ldone2 + nop + + mfctl %cr16,$tick + sub $tick,$lasttick,$diff + copy $tick,$lasttick + cmpclr,<> $lastdiff,$diff,$tick + ldi 1,$tick + + ldi 1,%r1 + xor %r1,$tick,$tick + addb,<> $tick,$cnt,Loop2 + shladd,l $tick,2,$out,$out +Ldone2 + bv ($rp) + .EXIT + add $rv,$cnt,$rv + .PROCEND +___ +} $code =~ s/cmpib,\*/comib,/gm if ($SIZE_T==4); $code =~ s/,\*/,/gm if ($SIZE_T==4); print $code; diff --git a/crypto/ppccpuid.pl b/crypto/ppccpuid.pl index 2131d30c1d..d6220e747d 100755 --- a/crypto/ppccpuid.pl +++ b/crypto/ppccpuid.pl @@ -69,10 +69,10 @@ $code=<<___; .globl .OPENSSL_atomic_add .align 4 .OPENSSL_atomic_add: -Loop: lwarx r5,0,r3 +Ladd: lwarx r5,0,r3 add r0,r4,r5 stwcx. r0,0,r3 - bne- Loop + bne- Ladd $SIGNX r3,r0 blr @@ -112,6 +112,89 @@ Laligned: bne Little blr ___ +{ +my ($out,$cnt,$max)=("r3","r4","r5"); +my ($tick,$lasttick)=("r6","r7"); +my ($diff,$lastdiff)=("r8","r9"); + +$code.=<<___; +.globl .OPENSSL_instrument_bus +.align 4 +.OPENSSL_instrument_bus: + mtctr $cnt + + mftb $lasttick # collect 1st tick + li $diff,0 + + dcbf 0,$out # flush cache line + lwarx $tick,0,$out # load and lock + add $tick,$tick,$diff + stwcx. $tick,0,$out + stwx $tick,0,$out + +Loop: mftb $tick + sub $diff,$tick,$lasttick + mr $lasttick,$tick + dcbf 0,$out # flush cache line + lwarx $tick,0,$out # load and lock + add $tick,$tick,$diff + stwcx. $tick,0,$out + stwx $tick,0,$out + addi $out,$out,4 # ++$out + bdnz Loop + + mr r3,$cnt + blr + +.globl .OPENSSL_instrument_bus2 +.align 4 +.OPENSSL_instrument_bus2: + mr r0,$cnt + slwi $cnt,$cnt,2 + + mftb $lasttick # collect 1st tick + li $diff,0 + + dcbf 0,$out # flush cache line + lwarx $tick,0,$out # load and lock + add $tick,$tick,$diff + stwcx. $tick,0,$out + stwx $tick,0,$out + + mftb $tick # collect 1st diff + sub $diff,$tick,$lasttick + mr $lasttick,$tick + mr $lastdiff,$diff +Loop2: + dcbf 0,$out # flush cache line + lwarx $tick,0,$out # load and lock + add $tick,$tick,$diff + stwcx. $tick,0,$out + stwx $tick,0,$out + + addic. $max,$max,-1 + beq Ldone2 + + mftb $tick + sub $diff,$tick,$lasttick + mr $lasttick,$tick + cmplw 7,$diff,$lastdiff + mr $lastdiff,$diff + + mfcr $tick # pull cr + not $tick,$tick # flip bits + rlwinm $tick,$tick,1,29,29 # isolate flipped eq bit and scale + + sub. $cnt,$cnt,$tick # conditional --$cnt + add $out,$out,$tick # conditional ++$out + bne Loop2 + +Ldone2: + srwi $cnt,$cnt,2 + sub r3,r0,$cnt + blr +___ +} $code =~ s/\`([^\`]*)\`/eval $1/gem; print $code; diff --git a/crypto/s390xcpuid.S b/crypto/s390xcpuid.S index 06815347e6..3402a2404b 100644 --- a/crypto/s390xcpuid.S +++ b/crypto/s390xcpuid.S @@ -93,6 +93,22 @@ OPENSSL_cleanse: br %r14 .size OPENSSL_cleanse,.-OPENSSL_cleanse +.globl OPENSSL_instrument_bus +.type OPENSSL_instrument_bus,@function +.align 16 +OPENSSL_instrument_bus: + lghi %r2,0 + br %r14 +.size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus + +.globl OPENSSL_instrument_bus2 +.type OPENSSL_instrument_bus2,@function +.align 16 +OPENSSL_instrument_bus2: + lghi %r2,0 + br %r14 +.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2 + .section .init brasl %r14,OPENSSL_cpuid_setup diff --git a/crypto/sparccpuid.S b/crypto/sparccpuid.S index ae61f7f5ce..329efcdd3d 100644 --- a/crypto/sparccpuid.S +++ b/crypto/sparccpuid.S @@ -397,6 +397,102 @@ OPENSSL_cleanse: .type OPENSSL_cleanse,#function .size OPENSSL_cleanse,.-OPENSSL_cleanse +.global _sparcv9_vis1_instrument_bus +.align 8 +_sparcv9_vis1_instrument_bus: + mov %o1,%o3 ! save cnt + .word 0x99410000 !rd %tick,%o4 ! tick + mov %o4,%o5 ! lasttick = tick + set 0,%g4 ! diff + + andn %o0,63,%g1 + .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load + .word 0x8143e040 !membar #Sync + .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit + .word 0x8143e040 !membar #Sync + ld [%o0],%o4 + add %o4,%g4,%g4 + .word 0xc9e2100c !cas [%o0],%o4,%g4 + +.Loop: .word 0x99410000 !rd %tick,%o4 + sub %o4,%o5,%g4 ! diff=tick-lasttick + mov %o4,%o5 ! lasttick=tick + + andn %o0,63,%g1 + .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load + .word 0x8143e040 !membar #Sync + .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit + .word 0x8143e040 !membar #Sync + ld [%o0],%o4 + add %o4,%g4,%g4 + .word 0xc9e2100c !cas [%o0],%o4,%g4 + subcc %o1,1,%o1 ! --$cnt + bnz .Loop + add %o0,4,%o0 ! ++$out + + retl + mov %o3,%o0 +.type _sparcv9_vis1_instrument_bus,#function +.size _sparcv9_vis1_instrument_bus,.-_sparcv9_vis1_instrument_bus + +.global _sparcv9_vis1_instrument_bus2 +.align 8 +_sparcv9_vis1_instrument_bus2: + mov %o1,%o3 ! save cnt + sll %o1,2,%o1 ! cnt*=4 + + .word 0x99410000 !rd %tick,%o4 ! tick + mov %o4,%o5 ! lasttick = tick + set 0,%g4 ! diff + + andn %o0,63,%g1 + .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load + .word 0x8143e040 !membar #Sync + .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit + .word 0x8143e040 !membar #Sync + ld [%o0],%o4 + add %o4,%g4,%g4 + .word 0xc9e2100c !cas [%o0],%o4,%g4 + + .word 0x99410000 !rd %tick,%o4 ! tick + sub %o4,%o5,%g4 ! diff=tick-lasttick + mov %o4,%o5 ! lasttick=tick + mov %g4,%g5 ! lastdiff=diff +.Loop2: + andn %o0,63,%g1 + .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load + .word 0x8143e040 !membar #Sync + .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit + .word 0x8143e040 !membar #Sync + ld [%o0],%o4 + add %o4,%g4,%g4 + .word 0xc9e2100c !cas [%o0],%o4,%g4 + + subcc %o2,1,%o2 ! --max + bz .Ldone2 + nop + + .word 0x99410000 !rd %tick,%o4 ! tick + sub %o4,%o5,%g4 ! diff=tick-lasttick + mov %o4,%o5 ! lasttick=tick + cmp %g4,%g5 + mov %g4,%g5 ! lastdiff=diff + + .word 0x83408000 !rd %ccr,%g1 + and %g1,4,%g1 ! isolate zero flag + xor %g1,4,%g1 ! flip zero flag + + subcc %o1,%g1,%o1 ! conditional --$cnt + bnz .Loop2 + add %o0,%g1,%o0 ! conditional ++$out + +.Ldone2: + srl %o1,2,%o1 + retl + sub %o3,%o1,%o0 +.type _sparcv9_vis1_instrument_bus2,#function +.size _sparcv9_vis1_instrument_bus2,.-_sparcv9_vis1_instrument_bus2 + .section ".init",#alloc,#execinstr call OPENSSL_cpuid_setup nop diff --git a/crypto/sparcv9cap.c b/crypto/sparcv9cap.c index ed195ab402..ad4b3be718 100644 --- a/crypto/sparcv9cap.c +++ b/crypto/sparcv9cap.c @@ -11,6 +11,7 @@ #define SPARCV9_VIS1 (1<<2) #define SPARCV9_VIS2 (1<<3) /* reserved */ #define SPARCV9_FMADD (1<<4) /* reserved for SPARC64 V */ +#define SPARCV9_BLK (1<<5) /* VIS1 block copy */ static int OPENSSL_sparcv9cap_P=SPARCV9_TICK_PRIVILEGED; @@ -31,6 +32,8 @@ void _sparcv9_vis1_probe(void); unsigned long _sparcv9_vis1_instrument(void); void _sparcv9_vis2_probe(void); void _sparcv9_fmadd_probe(void); +size_t _sparcv9_vis1_instrument_bus(unsigned int *,size_t); +size_t _sparcv8_vis1_instrument_bus2(unsigned int *,size_t,size_t); unsigned long OPENSSL_rdtsc(void) { @@ -44,6 +47,24 @@ unsigned long OPENSSL_rdtsc(void) return _sparcv9_rdtick(); } +size_t OPENSSL_instrument_bus(unsigned int *out,size_t cnt) + { + if (OPENSSL_sparcv9cap_P&(SPARCV9_TICK_PRIVILEGED|SPARCV9_BLK) == + SPARCV9_BLK) + return _sparcv9_vis1_instrument_bus(out,cnt); + else + return 0; + } + +size_t OPENSSL_instrument_bus2(unsigned int *out,size_t cnt,size_t max) + { + if (OPENSSL_sparcv9cap_P&(SPARCV9_TICK_PRIVILEGED|SPARCV9_BLK) == + SPARCV9_BLK) + return _sparcv9_vis1_instrument_bus2(out,cnt,max); + else + return 0; + } + #if 0 && defined(__sun) && defined(__SVR4) /* This code path is disabled, because of incompatibility of * libdevinfo.so.1 and libmalloc.so.1 (see below for details) @@ -112,7 +133,7 @@ void OPENSSL_cpuid_setup(void) if (sysinfo(SI_ISALIST,si,sizeof(si))>0) { if (strstr(si,"+vis")) - OPENSSL_sparcv9cap_P |= SPARCV9_VIS1; + OPENSSL_sparcv9cap_P |= SPARCV9_VIS1|SPARCV9_BLK; if (strstr(si,"+vis2")) { OPENSSL_sparcv9cap_P |= SPARCV9_VIS2; @@ -169,7 +190,6 @@ void OPENSSL_cpuid_setup(void) char *e; struct sigaction common_act,ill_oact,bus_oact; sigset_t all_masked,oset; - int sig; static int trigger=0; if (trigger) return; @@ -211,7 +231,7 @@ void OPENSSL_cpuid_setup(void) if (sigsetjmp(common_jmp,1) == 0) { _sparcv9_vis1_probe(); - OPENSSL_sparcv9cap_P |= SPARCV9_VIS1; + OPENSSL_sparcv9cap_P |= SPARCV9_VIS1|SPARCV9_BLK; /* detect UltraSPARC-Tx, see sparccpud.S for details... */ if (_sparcv9_vis1_instrument() >= 12) OPENSSL_sparcv9cap_P &= ~(SPARCV9_VIS1|SPARCV9_PREFER_FPU); diff --git a/crypto/x86_64cpuid.pl b/crypto/x86_64cpuid.pl index c96821a3c8..ecfcfc763c 100644 --- a/crypto/x86_64cpuid.pl +++ b/crypto/x86_64cpuid.pl @@ -9,8 +9,9 @@ $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; open STDOUT,"| $^X ${dir}perlasm/x86_64-xlate.pl $flavour $output"; -if ($win64) { $arg1="%rcx"; $arg2="%rdx"; } -else { $arg1="%rdi"; $arg2="%rsi"; } +($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order + ("%rdi","%rsi","%rdx","%rcx"); # Unix order + print<<___; .extern OPENSSL_cpuid_setup .section .init @@ -228,5 +229,95 @@ OPENSSL_wipe_cpu: ret .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu ___ +{ +my $out="%r10"; +my $cnt="%rcx"; +my $max="%r11"; +my $lasttick="%r8d"; +my $lastdiff="%r9d"; +my $redzone=win64?8:-8; + +print<<___; +.globl OPENSSL_instrument_bus +.type OPENSSL_instrument_bus,\@abi-omnipotent +.align 16 +OPENSSL_instrument_bus: + mov $arg1,$out # tribute to Win64 + mov $arg2,$cnt + mov $arg2,$max + + rdtsc # collect 1st tick + mov %eax,$lasttick # lasttick = tick + mov \$0,$lastdiff # lastdiff = 0 + clflush ($out) + lock + add $lastdiff,($out) + jmp .Loop +.align 16 +.Loop: rdtsc + mov %eax,%edx + sub $lasttick,%eax + mov %edx,$lasttick + mov %eax,$lastdiff + clflush ($out) + lock + add %eax,($out) + lea 4($out),$out + sub \$1,$cnt + jnz .Loop + + mov $max,%rax + ret +.size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus + +.globl OPENSSL_instrument_bus2 +.type OPENSSL_instrument_bus2,\@abi-omnipotent +.align 16 +OPENSSL_instrument_bus2: + mov $arg1,$out # tribute to Win64 + mov $arg2,$cnt + mov $arg3,$max + mov $cnt,$redzone(%rsp) + + rdtsc # collect 1st tick + mov %eax,$lasttick # lasttick = tick + mov \$0,$lastdiff # lastdiff = 0 + + clflush ($out) + lock + add $lastdiff,($out) + + rdtsc # collect 1st diff + mov %eax,%edx + sub $lasttick,%eax # diff + mov %edx,$lasttick # lasttick = tick + mov %eax,$lastdiff # lastdiff = diff +.Loop2: + clflush ($out) + lock + add %eax,($out) # accumulate diff + + sub \$1,$max + jz .Ldone2 + + rdtsc + mov %eax,%edx + sub $lasttick,%eax # diff + mov %edx,$lasttick # lasttick = tick + cmp $lastdiff,%eax + mov %eax,$lastdiff # lastdiff = diff + mov \$0,%edx + setne %dl + sub %rdx,$cnt # conditional --$cnt + lea ($out,%rdx,4),$out # conditional ++$out + jnz .Loop2 + +.Ldone2: + mov $redzone(%rsp),%rax + sub $cnt,%rax + ret +.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2 +___ +} close STDOUT; # flush diff --git a/crypto/x86cpuid.pl b/crypto/x86cpuid.pl index a7464af19b..0513398739 100644 --- a/crypto/x86cpuid.pl +++ b/crypto/x86cpuid.pl @@ -307,6 +307,108 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &ret (); &function_end_B("OPENSSL_cleanse"); +{ +my $lasttick = "esi"; +my $lastdiff = "ebx"; +my $out = "edi"; +my $cnt = "ecx"; +my $max = "ebp"; + +&function_begin("OPENSSL_instrument_bus"); + &mov ("eax",0); + if ($sse2) { + &picmeup("edx","OPENSSL_ia32cap_P"); + &bt (&DWP(0,"edx"),4); + &jnc (&label("nogo")); # no TSC + &bt (&DWP(0,"edx"),19); + &jnc (&label("nogo")); # no CLFLUSH + + &mov ($out,&wparam(0)); # load arguments + &mov ($cnt,&wparam(1)); + + # collect 1st tick + &rdtsc (); + &mov ($lasttick,"eax"); # lasttick = tick + &mov ($lastdiff,0); # lastdiff = 0 + &clflush(&DWP(0,$out)); + &lock (); + &add (&DWP(0,$out),$lastdiff); + &jmp (&label("loop")); + +&set_label("loop",16); + &rdtsc (); + &mov ("edx","eax"); # put aside tick (yes, I neglect edx) + &sub ("eax",$lasttick); # diff + &mov ($lasttick,"edx"); # lasttick = tick + &mov ($lastdiff,"eax"); # lastdiff = diff + &clflush(&DWP(0,$out)); + &lock (); + &add (&DWP(0,$out),"eax"); # accumulate diff + &lea ($out,&DWP(4,$out)); # ++$out + &sub ($cnt,1); # --$cnt + &jnz (&label("loop")); + + &mov ("eax",&wparam(1)); +&set_label("nogo"); + } +&function_end("OPENSSL_instrument_bus"); + +&function_begin("OPENSSL_instrument_bus2"); + &mov ("eax",0); + if ($sse2) { + &picmeup("edx","OPENSSL_ia32cap_P"); + &bt (&DWP(0,"edx"),4); + &jnc (&label("nogo")); # no TSC + &bt (&DWP(0,"edx"),19); + &jnc (&label("nogo")); # no CLFLUSH + + &mov ($out,&wparam(0)); # load arguments + &mov ($cnt,&wparam(1)); + &mov ($max,&wparam(2)); + + &rdtsc (); # collect 1st tick + &mov ($lasttick,"eax"); # lasttick = tick + &mov ($lastdiff,0); # lastdiff = 0 + + &clflush(&DWP(0,$out)); + &lock (); + &add (&DWP(0,$out),$lastdiff); + + &rdtsc (); # collect 1st diff + &mov ("edx","eax"); # put aside tick (yes, I neglect edx) + &sub ("eax",$lasttick); # diff + &mov ($lasttick,"edx"); # lasttick = tick + &mov ($lastdiff,"eax"); # lastdiff = diff + &jmp (&label("loop2")); + +&set_label("loop2",16); + &clflush(&DWP(0,$out)); + &lock (); + &add (&DWP(0,$out),"eax"); # accumulate diff + + &sub ($max,1); + &jz (&label("done2")); + + &rdtsc (); + &mov ("edx","eax"); # put aside tick (yes, I neglect edx) + &sub ("eax",$lasttick); # diff + &mov ($lasttick,"edx"); # lasttick = tick + &cmp ("eax",$lastdiff); + &mov ($lastdiff,"eax"); # lastdiff = diff + &mov ("edx",0); + &setne ("dl"); + &sub ($cnt,"edx"); # conditional --$cnt + &lea ($out,&DWP(0,$out,"edx",4)); # conditional ++$out + &jnz (&label("loop2")); + +&set_label("done2"); + &mov ("eax",&wparam(1)); + &sub ("eax",$cnt); +&set_label("nogo"); + } +&function_end("OPENSSL_instrument_bus2"); +} + &initseg("OPENSSL_cpuid_setup"); &asm_finish(); diff --git a/doc/crypto/OPENSSL_instrument_bus.pod b/doc/crypto/OPENSSL_instrument_bus.pod new file mode 100644 index 0000000000..539957b0bf --- /dev/null +++ b/doc/crypto/OPENSSL_instrument_bus.pod @@ -0,0 +1,42 @@ +=pod + +=head1 NAME + +OPENSSL_instrument_bus[2] - instrument references to memory bus + +=head1 SYNOPSIS + + #ifdef OPENSSL_CPUID_OBJ + size_t OPENSSL_instrument_bus (int *vector,size_t num); + size_t OPENSSL_instrument_bus2(int *vector,size_t num,size_t max); + #endif + +=head1 DESCRIPTION + +It was empirically found that timings of references to primary memory +are subject to irregular, apparently non-deterministic variations. The +subroutines in question instrument these references for purposes of +gathering entropy for random number generator. In order to make it +bus-bound a 'flush cache line' instruction is used between probes. In +addition probes are added to B elements in atomic or +interlocked manner, which should contribute additional noise on +multi-processor systems. This also means that B should be +zeroed upon invocation (if you want to retrieve actual probe values). + +OPENSSL_instrument_bus performs B probes and records the number of +oscillator cycles every probe took. + +OPENSSL_instrument_bus2 on the other hand B consecutive +probes with the same value, i.e. in a way it records duration of +periods when probe values appeared deterministic. The subroutine +performs at most B probes in attempt to fill the B, +with B value of 0 meaning "as many as it takes." + +=head1 RETURN VALUE + +Return value of 0 indicates that CPU is not capable of performing the +benchmark, either because oscillator counter or 'flush cache line' is +not available on current platform. For reference, on x86 'flush cache +line' was introduced with the SSE2 extensions. + +Otherwise number of recorded values is returned. -- 2.34.1