crypto/x86[_64]cpuid.pl: add OPENSSL_ia32_rd[rand|seed]_bytes.
authorAndy Polyakov <appro@openssl.org>
Sun, 10 Jul 2016 10:05:43 +0000 (12:05 +0200)
committerAndy Polyakov <appro@openssl.org>
Fri, 15 Jul 2016 11:20:52 +0000 (13:20 +0200)
Reviewed-by: Richard Levitte <levitte@openssl.org>
crypto/perlasm/x86_64-xlate.pl
crypto/x86_64cpuid.pl
crypto/x86cpuid.pl

index 60b98d7..617adf9 100755 (executable)
@@ -805,7 +805,7 @@ my $rdrand = sub {
       my @opcode=();
       my $dst=$1;
        if ($dst !~ /[0-9]+/) { $dst = $regrm{"%e$dst"}; }
-       rex(\@opcode,0,$1,8);
+       rex(\@opcode,0,$dst,8);
        push @opcode,0x0f,0xc7,0xf0|($dst&7);
        @opcode;
     } else {
@@ -818,7 +818,7 @@ my $rdseed = sub {
       my @opcode=();
       my $dst=$1;
        if ($dst !~ /[0-9]+/) { $dst = $regrm{"%e$dst"}; }
-       rex(\@opcode,0,$1,8);
+       rex(\@opcode,0,$dst,8);
        push @opcode,0x0f,0xc7,0xf8|($dst&7);
        @opcode;
     } else {
index 285ad1b..6cb1521 100644 (file)
@@ -393,36 +393,67 @@ OPENSSL_instrument_bus2:
 ___
 }
 
+sub gen_random {
+my $rdop = shift;
 print<<___;
-.globl OPENSSL_ia32_rdrand
-.type  OPENSSL_ia32_rdrand,\@abi-omnipotent
+.globl OPENSSL_ia32_${rdop}
+.type  OPENSSL_ia32_${rdop},\@abi-omnipotent
 .align 16
-OPENSSL_ia32_rdrand:
+OPENSSL_ia32_${rdop}:
        mov     \$8,%ecx
-.Loop_rdrand:
-       rdrand  %rax
-       jc      .Lbreak_rdrand
-       loop    .Loop_rdrand
-.Lbreak_rdrand:
+.Loop_${rdop}:
+       ${rdop} %rax
+       jc      .Lbreak_${rdop}
+       loop    .Loop_${rdop}
+.Lbreak_${rdop}:
        cmp     \$0,%rax
        cmove   %rcx,%rax
        ret
-.size  OPENSSL_ia32_rdrand,.-OPENSSL_ia32_rdrand
+.size  OPENSSL_ia32_${rdop},.-OPENSSL_ia32_${rdop}
 
-.globl OPENSSL_ia32_rdseed
-.type  OPENSSL_ia32_rdseed,\@abi-omnipotent
+.globl OPENSSL_ia32_${rdop}_bytes
+.type  OPENSSL_ia32_${rdop}_bytes,\@abi-omnipotent
 .align 16
-OPENSSL_ia32_rdseed:
-       mov     \$8,%ecx
-.Loop_rdseed:
-       rdseed  %rax
-       jc      .Lbreak_rdseed
-       loop    .Loop_rdseed
-.Lbreak_rdseed:
-       cmp     \$0,%rax
-       cmove   %rcx,%rax
+OPENSSL_ia32_${rdop}_bytes:
+       xor     %rax, %rax      # return value
+       cmp     \$0,$arg2
+       je      .Ldone_${rdop}_bytes
+
+       mov     \$8,%r11
+.Loop_${rdop}_bytes:
+       ${rdop} %r10
+       jc      .Lbreak_${rdop}_bytes
+       dec     %r11
+       jnz     .Loop_${rdop}_bytes
+       jmp     .Ldone_${rdop}_bytes
+
+.align 16
+.Lbreak_${rdop}_bytes:
+       cmp     \$8,$arg2
+       jb      .Ltail_${rdop}_bytes
+       mov     %r10,($arg1)
+       lea     8($arg1),$arg1
+       add     \$8,%rax
+       sub     \$8,$arg2
+       jz      .Ldone_${rdop}_bytes
+       mov     \$8,%r11
+       jmp     .Loop_${rdop}_bytes
+
+.align 16
+.Ltail_${rdop}_bytes:
+       mov     %r10b,($arg1)
+       lea     1($arg1),$arg1
+       inc     %rax
+       shr     \$8,%r8
+       dec     $arg2
+       jnz     .Ltail_${rdop}_bytes
+
+.Ldone_${rdop}_bytes:
        ret
-.size  OPENSSL_ia32_rdseed,.-OPENSSL_ia32_rdseed
+.size  OPENSSL_ia32_${rdop}_bytes,.-OPENSSL_ia32_${rdop}_bytes
 ___
+}
+gen_random("rdrand");
+gen_random("rdseed");
 
 close STDOUT;  # flush
index 99ffa1d..c45b183 100644 (file)
@@ -492,29 +492,64 @@ my $max = "ebp";
 &function_end("OPENSSL_instrument_bus2");
 }
 
-&function_begin_B("OPENSSL_ia32_rdrand");
+sub gen_random {
+my $rdop = shift;
+&function_begin_B("OPENSSL_ia32_${rdop}");
        &mov    ("ecx",8);
 &set_label("loop");
-       &rdrand ("eax");
+       &${rdop}("eax");
        &jc     (&label("break"));
        &loop   (&label("loop"));
 &set_label("break");
        &cmp    ("eax",0);
        &cmove  ("eax","ecx");
        &ret    ();
-&function_end_B("OPENSSL_ia32_rdrand");
+&function_end_B("OPENSSL_ia32_${rdop}");
+
+&function_begin_B("OPENSSL_ia32_${rdop}_bytes");
+       &push   ("edi");
+       &push   ("ebx");
+       &xor    ("eax","eax");          # return value
+       &mov    ("edi",&wparam(0));
+       &mov    ("ebx",&wparam(1));
+
+       &cmp    ("ebx",0);
+       &je     (&label("done"));
 
-&function_begin_B("OPENSSL_ia32_rdseed");
        &mov    ("ecx",8);
 &set_label("loop");
-       &rdseed ("eax");
+       &${rdop}("edx");
        &jc     (&label("break"));
        &loop   (&label("loop"));
-&set_label("break");
-       &cmp    ("eax",0);
-       &cmove  ("eax","ecx");
+       &jmp    (&label("done"));
+
+&set_label("break",16);
+       &cmp    ("ebx",4);
+       &jb     (&label("tail"));
+       &mov    (&DWP(0,"edi"),"edx");
+       &lea    ("edi",&DWP(4,"edi"));
+       &add    ("eax",4);
+       &sub    ("ebx",4);
+       &jz     (&label("done"));
+       &mov    ("ecx",8);
+       &jmp    (&label("loop"));
+
+&set_label("tail",16);
+       &mov    (&BP(0,"edi"),"dl");
+       &lea    ("edi",&DWP(1,"edi"));
+       &inc    ("eax");
+       &shr    ("edx",8);
+       &dec    ("ebx");
+       &jnz    (&label("tail"));
+
+&set_label("done");
+       &pop    ("ebx");
+       &pop    ("edi");
        &ret    ();
-&function_end_B("OPENSSL_ia32_rdseed");
+&function_end_B("OPENSSL_ia32_${rdop}_bytes");
+}
+&gen_random("rdrand");
+&gen_random("rdseed");
 
 &initseg("OPENSSL_cpuid_setup");