7b76522bd881f8eac8a53939a07941d19f957185
[openssl.git] / crypto / x86_64cpuid.pl
1 #!/usr/bin/env perl
2
3 $flavour = shift;
4 $output  = shift;
5 if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
6
7 $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
8
9 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
10 open STDOUT,"| $^X ${dir}perlasm/x86_64-xlate.pl $flavour $output";
11
12 ($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order
13                                  ("%rdi","%rsi","%rdx","%rcx"); # Unix order
14
15 print<<___;
16 .extern         OPENSSL_cpuid_setup
17 .hidden         OPENSSL_cpuid_setup
18 .section        .init
19         call    OPENSSL_cpuid_setup
20
21 .hidden OPENSSL_ia32cap_P
22 .comm   OPENSSL_ia32cap_P,8
23
24 .text
25
26 .globl  OPENSSL_atomic_add
27 .type   OPENSSL_atomic_add,\@abi-omnipotent
28 .align  16
29 OPENSSL_atomic_add:
30         movl    ($arg1),%eax
31 .Lspin: leaq    ($arg2,%rax),%r8
32         .byte   0xf0            # lock
33         cmpxchgl        %r8d,($arg1)
34         jne     .Lspin
35         movl    %r8d,%eax
36         .byte   0x48,0x98       # cltq/cdqe
37         ret
38 .size   OPENSSL_atomic_add,.-OPENSSL_atomic_add
39
40 .globl  OPENSSL_rdtsc
41 .type   OPENSSL_rdtsc,\@abi-omnipotent
42 .align  16
43 OPENSSL_rdtsc:
44         rdtsc
45         shl     \$32,%rdx
46         or      %rdx,%rax
47         ret
48 .size   OPENSSL_rdtsc,.-OPENSSL_rdtsc
49
50 .globl  OPENSSL_ia32_cpuid
51 .type   OPENSSL_ia32_cpuid,\@abi-omnipotent
52 .align  16
53 OPENSSL_ia32_cpuid:
54         mov     %rbx,%r8                # save %rbx
55
56         xor     %eax,%eax
57         cpuid
58         mov     %eax,%r11d              # max value for standard query level
59
60         xor     %eax,%eax
61         cmp     \$0x756e6547,%ebx       # "Genu"
62         setne   %al
63         mov     %eax,%r9d
64         cmp     \$0x49656e69,%edx       # "ineI"
65         setne   %al
66         or      %eax,%r9d
67         cmp     \$0x6c65746e,%ecx       # "ntel"
68         setne   %al
69         or      %eax,%r9d               # 0 indicates Intel CPU
70         jz      .Lintel
71
72         cmp     \$0x68747541,%ebx       # "Auth"
73         setne   %al
74         mov     %eax,%r10d
75         cmp     \$0x69746E65,%edx       # "enti"
76         setne   %al
77         or      %eax,%r10d
78         cmp     \$0x444D4163,%ecx       # "cAMD"
79         setne   %al
80         or      %eax,%r10d              # 0 indicates AMD CPU
81         jnz     .Lintel
82
83         # AMD specific
84         mov     \$0x80000000,%eax
85         cpuid
86         cmp     \$0x80000001,%eax
87         jb      .Lintel
88         mov     %eax,%r10d
89         mov     \$0x80000001,%eax
90         cpuid
91         or      %ecx,%r9d
92         and     \$0x00000801,%r9d       # isolate AMD XOP bit, 1<<11
93
94         cmp     \$0x80000008,%r10d
95         jb      .Lintel
96
97         mov     \$0x80000008,%eax
98         cpuid
99         movzb   %cl,%r10                # number of cores - 1
100         inc     %r10                    # number of cores
101
102         mov     \$1,%eax
103         cpuid
104         bt      \$28,%edx               # test hyper-threading bit
105         jnc     .Lgeneric
106         shr     \$16,%ebx               # number of logical processors
107         cmp     %r10b,%bl
108         ja      .Lgeneric
109         and     \$0xefffffff,%edx       # ~(1<<28)
110         jmp     .Lgeneric
111
112 .Lintel:
113         cmp     \$4,%r11d
114         mov     \$-1,%r10d
115         jb      .Lnocacheinfo
116
117         mov     \$4,%eax
118         mov     \$0,%ecx                # query L1D
119         cpuid
120         mov     %eax,%r10d
121         shr     \$14,%r10d
122         and     \$0xfff,%r10d           # number of cores -1 per L1D
123
124 .Lnocacheinfo:
125         mov     \$1,%eax
126         cpuid
127         and     \$0xbfefffff,%edx       # force reserved bits to 0
128         cmp     \$0,%r9d
129         jne     .Lnotintel
130         or      \$0x40000000,%edx       # set reserved bit#30 on Intel CPUs
131         and     \$15,%ah
132         cmp     \$15,%ah                # examine Family ID
133         jne     .Lnotintel
134         or      \$0x00100000,%edx       # set reserved bit#20 to engage RC4_CHAR
135 .Lnotintel:
136         bt      \$28,%edx               # test hyper-threading bit
137         jnc     .Lgeneric
138         and     \$0xefffffff,%edx       # ~(1<<28)
139         cmp     \$0,%r10d
140         je      .Lgeneric
141
142         or      \$0x10000000,%edx       # 1<<28
143         shr     \$16,%ebx
144         cmp     \$1,%bl                 # see if cache is shared
145         ja      .Lgeneric
146         and     \$0xefffffff,%edx       # ~(1<<28)
147 .Lgeneric:
148         and     \$0x00000800,%r9d       # isolate AMD XOP flag
149         and     \$0xfffff7ff,%ecx
150         or      %ecx,%r9d               # merge AMD XOP flag
151
152         mov     %edx,%r10d              # %r9d:%r10d is copy of %ecx:%edx
153         bt      \$27,%r9d               # check OSXSAVE bit
154         jnc     .Lclear_avx
155         xor     %ecx,%ecx               # XCR0
156         .byte   0x0f,0x01,0xd0          # xgetbv
157         and     \$6,%eax                # isolate XMM and YMM state support
158         cmp     \$6,%eax
159         je      .Ldone
160 .Lclear_avx:
161         mov     \$0xefffe7ff,%eax       # ~(1<<28|1<<12|1<<11)
162         and     %eax,%r9d               # clear AVX, FMA and AMD XOP bits
163 .Ldone:
164         shl     \$32,%r9
165         mov     %r10d,%eax
166         mov     %r8,%rbx                # restore %rbx
167         or      %r9,%rax
168         ret
169 .size   OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
170
171 .globl  OPENSSL_cleanse
172 .type   OPENSSL_cleanse,\@abi-omnipotent
173 .align  16
174 OPENSSL_cleanse:
175         xor     %rax,%rax
176         cmp     \$15,$arg2
177         jae     .Lot
178         cmp     \$0,$arg2
179         je      .Lret
180 .Little:
181         mov     %al,($arg1)
182         sub     \$1,$arg2
183         lea     1($arg1),$arg1
184         jnz     .Little
185 .Lret:
186         ret
187 .align  16
188 .Lot:
189         test    \$7,$arg1
190         jz      .Laligned
191         mov     %al,($arg1)
192         lea     -1($arg2),$arg2
193         lea     1($arg1),$arg1
194         jmp     .Lot
195 .Laligned:
196         mov     %rax,($arg1)
197         lea     -8($arg2),$arg2
198         test    \$-8,$arg2
199         lea     8($arg1),$arg1
200         jnz     .Laligned
201         cmp     \$0,$arg2
202         jne     .Little
203         ret
204 .size   OPENSSL_cleanse,.-OPENSSL_cleanse
205 ___
206
207 print<<___ if (!$win64);
208 .globl  OPENSSL_wipe_cpu
209 .type   OPENSSL_wipe_cpu,\@abi-omnipotent
210 .align  16
211 OPENSSL_wipe_cpu:
212         pxor    %xmm0,%xmm0
213         pxor    %xmm1,%xmm1
214         pxor    %xmm2,%xmm2
215         pxor    %xmm3,%xmm3
216         pxor    %xmm4,%xmm4
217         pxor    %xmm5,%xmm5
218         pxor    %xmm6,%xmm6
219         pxor    %xmm7,%xmm7
220         pxor    %xmm8,%xmm8
221         pxor    %xmm9,%xmm9
222         pxor    %xmm10,%xmm10
223         pxor    %xmm11,%xmm11
224         pxor    %xmm12,%xmm12
225         pxor    %xmm13,%xmm13
226         pxor    %xmm14,%xmm14
227         pxor    %xmm15,%xmm15
228         xorq    %rcx,%rcx
229         xorq    %rdx,%rdx
230         xorq    %rsi,%rsi
231         xorq    %rdi,%rdi
232         xorq    %r8,%r8
233         xorq    %r9,%r9
234         xorq    %r10,%r10
235         xorq    %r11,%r11
236         leaq    8(%rsp),%rax
237         ret
238 .size   OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
239 ___
240 print<<___ if ($win64);
241 .globl  OPENSSL_wipe_cpu
242 .type   OPENSSL_wipe_cpu,\@abi-omnipotent
243 .align  16
244 OPENSSL_wipe_cpu:
245         pxor    %xmm0,%xmm0
246         pxor    %xmm1,%xmm1
247         pxor    %xmm2,%xmm2
248         pxor    %xmm3,%xmm3
249         pxor    %xmm4,%xmm4
250         pxor    %xmm5,%xmm5
251         xorq    %rcx,%rcx
252         xorq    %rdx,%rdx
253         xorq    %r8,%r8
254         xorq    %r9,%r9
255         xorq    %r10,%r10
256         xorq    %r11,%r11
257         leaq    8(%rsp),%rax
258         ret
259 .size   OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
260 ___
261 {
262 my $out="%r10";
263 my $cnt="%rcx";
264 my $max="%r11";
265 my $lasttick="%r8d";
266 my $lastdiff="%r9d";
267 my $redzone=win64?8:-8;
268
269 print<<___;
270 .globl  OPENSSL_instrument_bus
271 .type   OPENSSL_instrument_bus,\@abi-omnipotent
272 .align  16
273 OPENSSL_instrument_bus:
274         mov     $arg1,$out      # tribute to Win64
275         mov     $arg2,$cnt
276         mov     $arg2,$max
277
278         rdtsc                   # collect 1st tick
279         mov     %eax,$lasttick  # lasttick = tick
280         mov     \$0,$lastdiff   # lastdiff = 0
281         clflush ($out)
282         .byte   0xf0            # lock
283         add     $lastdiff,($out)
284         jmp     .Loop
285 .align  16
286 .Loop:  rdtsc
287         mov     %eax,%edx
288         sub     $lasttick,%eax
289         mov     %edx,$lasttick
290         mov     %eax,$lastdiff
291         clflush ($out)
292         .byte   0xf0            # lock
293         add     %eax,($out)
294         lea     4($out),$out
295         sub     \$1,$cnt
296         jnz     .Loop
297
298         mov     $max,%rax
299         ret
300 .size   OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
301
302 .globl  OPENSSL_instrument_bus2
303 .type   OPENSSL_instrument_bus2,\@abi-omnipotent
304 .align  16
305 OPENSSL_instrument_bus2:
306         mov     $arg1,$out      # tribute to Win64
307         mov     $arg2,$cnt
308         mov     $arg3,$max
309         mov     $cnt,$redzone(%rsp)
310
311         rdtsc                   # collect 1st tick
312         mov     %eax,$lasttick  # lasttick = tick
313         mov     \$0,$lastdiff   # lastdiff = 0
314
315         clflush ($out)
316         .byte   0xf0            # lock
317         add     $lastdiff,($out)
318
319         rdtsc                   # collect 1st diff
320         mov     %eax,%edx
321         sub     $lasttick,%eax  # diff
322         mov     %edx,$lasttick  # lasttick = tick
323         mov     %eax,$lastdiff  # lastdiff = diff
324 .Loop2:
325         clflush ($out)
326         .byte   0xf0            # lock
327         add     %eax,($out)     # accumulate diff
328
329         sub     \$1,$max
330         jz      .Ldone2
331
332         rdtsc
333         mov     %eax,%edx
334         sub     $lasttick,%eax  # diff
335         mov     %edx,$lasttick  # lasttick = tick
336         cmp     $lastdiff,%eax
337         mov     %eax,$lastdiff  # lastdiff = diff
338         mov     \$0,%edx
339         setne   %dl
340         sub     %rdx,$cnt       # conditional --$cnt
341         lea     ($out,%rdx,4),$out      # conditional ++$out
342         jnz     .Loop2
343
344 .Ldone2:
345         mov     $redzone(%rsp),%rax
346         sub     $cnt,%rax
347         ret
348 .size   OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
349 ___
350 }
351
352 close STDOUT;   # flush