RC4_set_key for x86_64 and Core2 optimization.
[openssl.git] / crypto / x86_64cpuid.pl
1 #!/usr/bin/env perl
2
3 $output=shift;
4 $win64a=1 if ($output =~ /win64a\.[s|asm]/);
5 open STDOUT,">$output" || die "can't open $output: $!";
6
7 print<<___ if(defined($win64a));
8 _TEXT   SEGMENT
9 PUBLIC  OPENSSL_rdtsc
10 ALIGN   16
11 OPENSSL_rdtsc   PROC
12         rdtsc
13         shl     rdx,32
14         or      rax,rdx
15         ret
16 OPENSSL_rdtsc   ENDP
17
18 PUBLIC  OPENSSL_atomic_add
19 ALIGN   16
20 OPENSSL_atomic_add      PROC
21         mov     eax,DWORD PTR[rcx]
22 \$Lspin:        lea     r8,DWORD PTR[rdx+rax]
23 lock    cmpxchg DWORD PTR[rcx],r8d
24         jne     \$Lspin
25         mov     eax,r8d
26         cdqe    
27         ret
28 OPENSSL_atomic_add      ENDP
29
30 PUBLIC  OPENSSL_wipe_cpu
31 ALIGN   16
32 OPENSSL_wipe_cpu        PROC
33         pxor    xmm0,xmm0
34         pxor    xmm1,xmm1
35         pxor    xmm2,xmm2
36         pxor    xmm3,xmm3
37         pxor    xmm4,xmm4
38         pxor    xmm5,xmm5
39         xor     rcx,rcx
40         xor     rdx,rdx
41         xor     r8,r8
42         xor     r9,r9
43         xor     r10,r10
44         xor     r11,r11
45         lea     rax,QWORD PTR[rsp+8]
46         ret
47 OPENSSL_wipe_cpu        ENDP
48
49 OPENSSL_ia32_cpuid      PROC
50         mov     r8,rbx
51
52         xor     eax,eax
53         cpuid
54         xor     eax,eax
55         cmp     ebx,0756e6547h
56         setne   al
57         mov     r9d,eax
58         cmp     edx,049656e69h
59         setne   al
60         or      r9d,eax
61         cmp     ecx,06c65746eh
62         setne   al
63         or      r9d,eax
64
65         mov     eax,1
66         cpuid
67         bt      edx,28
68         jnc     \$Ldone
69         cmp     r9,0
70         jne     \$Lnotintel
71         or      edx,000100000h
72         and     ah,15
73         cmp     ah,15
74         je      \$Lnotintel
75         or      edx,040000000h
76 \$Lnotintel:
77         shr     ebx,16
78         cmp     bl,1
79         ja      \$Ldone
80         and     edx,0efffffffh
81 \$Ldone:
82         shl     rcx,32
83         mov     eax,edx
84         mov     rbx,r8
85         or      rax,rcx
86         ret
87 OPENSSL_ia32_cpuid      ENDP
88 _TEXT   ENDS
89
90 CRT\$XIU        SEGMENT
91 EXTRN   OPENSSL_cpuid_setup:PROC
92 DQ      OPENSSL_cpuid_setup
93 CRT\$XIU        ENDS
94 END
95 ___
96 print<<___ if(!defined($win64a));
97 .text
98 .globl  OPENSSL_rdtsc
99 .align  16
100 OPENSSL_rdtsc:
101         rdtsc
102         shlq    \$32,%rdx
103         orq     %rdx,%rax
104         ret
105 .size   OPENSSL_rdtsc,.-OPENSSL_rdtsc
106
107 .globl  OPENSSL_atomic_add
108 .type   OPENSSL_atomic_add,\@function
109 .align  16
110 OPENSSL_atomic_add:
111         movl    (%rdi),%eax
112 .Lspin: leaq    (%rsi,%rax),%r8
113 lock;   cmpxchgl        %r8d,(%rdi)
114         jne     .Lspin
115         movl    %r8d,%eax
116         .byte   0x48,0x98
117         ret
118 .size   OPENSSL_atomic_add,.-OPENSSL_atomic_add
119
120 .globl  OPENSSL_wipe_cpu
121 .type   OPENSSL_wipe_cpu,\@function
122 .align  16
123 OPENSSL_wipe_cpu:
124         pxor    %xmm0,%xmm0
125         pxor    %xmm1,%xmm1
126         pxor    %xmm2,%xmm2
127         pxor    %xmm3,%xmm3
128         pxor    %xmm4,%xmm4
129         pxor    %xmm5,%xmm5
130         pxor    %xmm6,%xmm6
131         pxor    %xmm7,%xmm7
132         pxor    %xmm8,%xmm8
133         pxor    %xmm9,%xmm9
134         pxor    %xmm10,%xmm10
135         pxor    %xmm11,%xmm11
136         pxor    %xmm12,%xmm12
137         pxor    %xmm13,%xmm13
138         pxor    %xmm14,%xmm14
139         pxor    %xmm15,%xmm15
140         xorq    %rcx,%rcx
141         xorq    %rdx,%rdx
142         xorq    %rsi,%rsi
143         xorq    %rdi,%rdi
144         xorq    %r8,%r8
145         xorq    %r9,%r9
146         xorq    %r10,%r10
147         xorq    %r11,%r11
148         leaq    8(%rsp),%rax
149         ret
150 .size   OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
151
152 .globl  OPENSSL_ia32_cpuid
153 .align  16
154 OPENSSL_ia32_cpuid:
155         movq    %rbx,%r8
156
157         xor     %eax,%eax
158         cpuid
159         xor     %eax,%eax
160         cmp     \$0x756e6547,%ebx       # "Genu"
161         setne   %al
162         mov     %eax,%r9d
163         cmp     \$0x49656e69,%edx       # "ineI"
164         setne   %al
165         or      %eax,%r9d
166         cmp     \$0x6c65746e,%ecx       # "ntel"
167         setne   %al
168         or      %eax,%r9d
169
170         movl    \$1,%eax
171         cpuid
172         bt      \$28,%edx               # test hyper-threading bit
173         jnc     .Ldone
174         cmp     \$0,%r9
175         jne     .Lnotintel
176         or      \$1<<20,%edx            # use reserved bit to engage RC4_CHAR
177         and     \$15,%ah
178         cmp     \$15,%ah                # examine Family ID
179         je      .Lnotintel
180         or      \$1<<30,%edx            # use reserved bit to skip unrolled loop
181 .Lnotintel:
182         shr     \$16,%ebx
183         cmp     \$1,%bl                 # see if cache is shared
184         ja      .Ldone
185         and     \$~(1<<28),%edx
186 .Ldone:
187         shlq    \$32,%rcx
188         movl    %edx,%eax
189         movq    %r8,%rbx
190         orq     %rcx,%rax
191         ret
192 .size   OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
193
194 .section        .init
195         call    OPENSSL_cpuid_setup
196 ___