Add new APIs EC_curve_nist2nid and EC_curve_nid2nist which convert
[openssl.git] / crypto / x86_64cpuid.pl
1 #!/usr/bin/env perl
2
3 $flavour = shift;
4 $output  = shift;
5 if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
6
7 $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
8
9 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
10 ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
11 ( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or
12 die "can't locate x86_64-xlate.pl";
13
14 open STDOUT,"| $^X $xlate $flavour $output";
15
16 ($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order
17                                  ("%rdi","%rsi","%rdx","%rcx"); # Unix order
18
19 print<<___;
20 .extern         OPENSSL_cpuid_setup
21 .hidden         OPENSSL_cpuid_setup
22 .section        .init
23         call    OPENSSL_cpuid_setup
24
25 .hidden OPENSSL_ia32cap_P
26 .comm   OPENSSL_ia32cap_P,8,4
27
28 .text
29
30 .globl  OPENSSL_atomic_add
31 .type   OPENSSL_atomic_add,\@abi-omnipotent
32 .align  16
33 OPENSSL_atomic_add:
34         movl    ($arg1),%eax
35 .Lspin: leaq    ($arg2,%rax),%r8
36         .byte   0xf0            # lock
37         cmpxchgl        %r8d,($arg1)
38         jne     .Lspin
39         movl    %r8d,%eax
40         .byte   0x48,0x98       # cltq/cdqe
41         ret
42 .size   OPENSSL_atomic_add,.-OPENSSL_atomic_add
43
44 .globl  OPENSSL_rdtsc
45 .type   OPENSSL_rdtsc,\@abi-omnipotent
46 .align  16
47 OPENSSL_rdtsc:
48         rdtsc
49         shl     \$32,%rdx
50         or      %rdx,%rax
51         ret
52 .size   OPENSSL_rdtsc,.-OPENSSL_rdtsc
53
54 .globl  OPENSSL_ia32_cpuid
55 .type   OPENSSL_ia32_cpuid,\@abi-omnipotent
56 .align  16
57 OPENSSL_ia32_cpuid:
58         mov     %rbx,%r8                # save %rbx
59
60         xor     %eax,%eax
61         cpuid
62         mov     %eax,%r11d              # max value for standard query level
63
64         xor     %eax,%eax
65         cmp     \$0x756e6547,%ebx       # "Genu"
66         setne   %al
67         mov     %eax,%r9d
68         cmp     \$0x49656e69,%edx       # "ineI"
69         setne   %al
70         or      %eax,%r9d
71         cmp     \$0x6c65746e,%ecx       # "ntel"
72         setne   %al
73         or      %eax,%r9d               # 0 indicates Intel CPU
74         jz      .Lintel
75
76         cmp     \$0x68747541,%ebx       # "Auth"
77         setne   %al
78         mov     %eax,%r10d
79         cmp     \$0x69746E65,%edx       # "enti"
80         setne   %al
81         or      %eax,%r10d
82         cmp     \$0x444D4163,%ecx       # "cAMD"
83         setne   %al
84         or      %eax,%r10d              # 0 indicates AMD CPU
85         jnz     .Lintel
86
87         # AMD specific
88         mov     \$0x80000000,%eax
89         cpuid
90         cmp     \$0x80000001,%eax
91         jb      .Lintel
92         mov     %eax,%r10d
93         mov     \$0x80000001,%eax
94         cpuid
95         or      %ecx,%r9d
96         and     \$0x00000801,%r9d       # isolate AMD XOP bit, 1<<11
97
98         cmp     \$0x80000008,%r10d
99         jb      .Lintel
100
101         mov     \$0x80000008,%eax
102         cpuid
103         movzb   %cl,%r10                # number of cores - 1
104         inc     %r10                    # number of cores
105
106         mov     \$1,%eax
107         cpuid
108         bt      \$28,%edx               # test hyper-threading bit
109         jnc     .Lgeneric
110         shr     \$16,%ebx               # number of logical processors
111         cmp     %r10b,%bl
112         ja      .Lgeneric
113         and     \$0xefffffff,%edx       # ~(1<<28)
114         jmp     .Lgeneric
115
116 .Lintel:
117         cmp     \$4,%r11d
118         mov     \$-1,%r10d
119         jb      .Lnocacheinfo
120
121         mov     \$4,%eax
122         mov     \$0,%ecx                # query L1D
123         cpuid
124         mov     %eax,%r10d
125         shr     \$14,%r10d
126         and     \$0xfff,%r10d           # number of cores -1 per L1D
127
128 .Lnocacheinfo:
129         mov     \$1,%eax
130         cpuid
131         and     \$0xbfefffff,%edx       # force reserved bits to 0
132         cmp     \$0,%r9d
133         jne     .Lnotintel
134         or      \$0x40000000,%edx       # set reserved bit#30 on Intel CPUs
135         and     \$15,%ah
136         cmp     \$15,%ah                # examine Family ID
137         jne     .Lnotintel
138         or      \$0x00100000,%edx       # set reserved bit#20 to engage RC4_CHAR
139 .Lnotintel:
140         bt      \$28,%edx               # test hyper-threading bit
141         jnc     .Lgeneric
142         and     \$0xefffffff,%edx       # ~(1<<28)
143         cmp     \$0,%r10d
144         je      .Lgeneric
145
146         or      \$0x10000000,%edx       # 1<<28
147         shr     \$16,%ebx
148         cmp     \$1,%bl                 # see if cache is shared
149         ja      .Lgeneric
150         and     \$0xefffffff,%edx       # ~(1<<28)
151 .Lgeneric:
152         and     \$0x00000800,%r9d       # isolate AMD XOP flag
153         and     \$0xfffff7ff,%ecx
154         or      %ecx,%r9d               # merge AMD XOP flag
155
156         mov     %edx,%r10d              # %r9d:%r10d is copy of %ecx:%edx
157         bt      \$27,%r9d               # check OSXSAVE bit
158         jnc     .Lclear_avx
159         xor     %ecx,%ecx               # XCR0
160         .byte   0x0f,0x01,0xd0          # xgetbv
161         and     \$6,%eax                # isolate XMM and YMM state support
162         cmp     \$6,%eax
163         je      .Ldone
164 .Lclear_avx:
165         mov     \$0xefffe7ff,%eax       # ~(1<<28|1<<12|1<<11)
166         and     %eax,%r9d               # clear AVX, FMA and AMD XOP bits
167 .Ldone:
168         shl     \$32,%r9
169         mov     %r10d,%eax
170         mov     %r8,%rbx                # restore %rbx
171         or      %r9,%rax
172         ret
173 .size   OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
174
175 .globl  OPENSSL_cleanse
176 .type   OPENSSL_cleanse,\@abi-omnipotent
177 .align  16
178 OPENSSL_cleanse:
179         xor     %rax,%rax
180         cmp     \$15,$arg2
181         jae     .Lot
182         cmp     \$0,$arg2
183         je      .Lret
184 .Little:
185         mov     %al,($arg1)
186         sub     \$1,$arg2
187         lea     1($arg1),$arg1
188         jnz     .Little
189 .Lret:
190         ret
191 .align  16
192 .Lot:
193         test    \$7,$arg1
194         jz      .Laligned
195         mov     %al,($arg1)
196         lea     -1($arg2),$arg2
197         lea     1($arg1),$arg1
198         jmp     .Lot
199 .Laligned:
200         mov     %rax,($arg1)
201         lea     -8($arg2),$arg2
202         test    \$-8,$arg2
203         lea     8($arg1),$arg1
204         jnz     .Laligned
205         cmp     \$0,$arg2
206         jne     .Little
207         ret
208 .size   OPENSSL_cleanse,.-OPENSSL_cleanse
209 ___
210
211 print<<___ if (!$win64);
212 .globl  OPENSSL_wipe_cpu
213 .type   OPENSSL_wipe_cpu,\@abi-omnipotent
214 .align  16
215 OPENSSL_wipe_cpu:
216         pxor    %xmm0,%xmm0
217         pxor    %xmm1,%xmm1
218         pxor    %xmm2,%xmm2
219         pxor    %xmm3,%xmm3
220         pxor    %xmm4,%xmm4
221         pxor    %xmm5,%xmm5
222         pxor    %xmm6,%xmm6
223         pxor    %xmm7,%xmm7
224         pxor    %xmm8,%xmm8
225         pxor    %xmm9,%xmm9
226         pxor    %xmm10,%xmm10
227         pxor    %xmm11,%xmm11
228         pxor    %xmm12,%xmm12
229         pxor    %xmm13,%xmm13
230         pxor    %xmm14,%xmm14
231         pxor    %xmm15,%xmm15
232         xorq    %rcx,%rcx
233         xorq    %rdx,%rdx
234         xorq    %rsi,%rsi
235         xorq    %rdi,%rdi
236         xorq    %r8,%r8
237         xorq    %r9,%r9
238         xorq    %r10,%r10
239         xorq    %r11,%r11
240         leaq    8(%rsp),%rax
241         ret
242 .size   OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
243 ___
244 print<<___ if ($win64);
245 .globl  OPENSSL_wipe_cpu
246 .type   OPENSSL_wipe_cpu,\@abi-omnipotent
247 .align  16
248 OPENSSL_wipe_cpu:
249         pxor    %xmm0,%xmm0
250         pxor    %xmm1,%xmm1
251         pxor    %xmm2,%xmm2
252         pxor    %xmm3,%xmm3
253         pxor    %xmm4,%xmm4
254         pxor    %xmm5,%xmm5
255         xorq    %rcx,%rcx
256         xorq    %rdx,%rdx
257         xorq    %r8,%r8
258         xorq    %r9,%r9
259         xorq    %r10,%r10
260         xorq    %r11,%r11
261         leaq    8(%rsp),%rax
262         ret
263 .size   OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
264 ___
265 {
266 my $out="%r10";
267 my $cnt="%rcx";
268 my $max="%r11";
269 my $lasttick="%r8d";
270 my $lastdiff="%r9d";
271 my $redzone=win64?8:-8;
272
273 print<<___;
274 .globl  OPENSSL_instrument_bus
275 .type   OPENSSL_instrument_bus,\@abi-omnipotent
276 .align  16
277 OPENSSL_instrument_bus:
278         mov     $arg1,$out      # tribute to Win64
279         mov     $arg2,$cnt
280         mov     $arg2,$max
281
282         rdtsc                   # collect 1st tick
283         mov     %eax,$lasttick  # lasttick = tick
284         mov     \$0,$lastdiff   # lastdiff = 0
285         clflush ($out)
286         .byte   0xf0            # lock
287         add     $lastdiff,($out)
288         jmp     .Loop
289 .align  16
290 .Loop:  rdtsc
291         mov     %eax,%edx
292         sub     $lasttick,%eax
293         mov     %edx,$lasttick
294         mov     %eax,$lastdiff
295         clflush ($out)
296         .byte   0xf0            # lock
297         add     %eax,($out)
298         lea     4($out),$out
299         sub     \$1,$cnt
300         jnz     .Loop
301
302         mov     $max,%rax
303         ret
304 .size   OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
305
306 .globl  OPENSSL_instrument_bus2
307 .type   OPENSSL_instrument_bus2,\@abi-omnipotent
308 .align  16
309 OPENSSL_instrument_bus2:
310         mov     $arg1,$out      # tribute to Win64
311         mov     $arg2,$cnt
312         mov     $arg3,$max
313         mov     $cnt,$redzone(%rsp)
314
315         rdtsc                   # collect 1st tick
316         mov     %eax,$lasttick  # lasttick = tick
317         mov     \$0,$lastdiff   # lastdiff = 0
318
319         clflush ($out)
320         .byte   0xf0            # lock
321         add     $lastdiff,($out)
322
323         rdtsc                   # collect 1st diff
324         mov     %eax,%edx
325         sub     $lasttick,%eax  # diff
326         mov     %edx,$lasttick  # lasttick = tick
327         mov     %eax,$lastdiff  # lastdiff = diff
328 .Loop2:
329         clflush ($out)
330         .byte   0xf0            # lock
331         add     %eax,($out)     # accumulate diff
332
333         sub     \$1,$max
334         jz      .Ldone2
335
336         rdtsc
337         mov     %eax,%edx
338         sub     $lasttick,%eax  # diff
339         mov     %edx,$lasttick  # lasttick = tick
340         cmp     $lastdiff,%eax
341         mov     %eax,$lastdiff  # lastdiff = diff
342         mov     \$0,%edx
343         setne   %dl
344         sub     %rdx,$cnt       # conditional --$cnt
345         lea     ($out,%rdx,4),$out      # conditional ++$out
346         jnz     .Loop2
347
348 .Ldone2:
349         mov     $redzone(%rsp),%rax
350         sub     $cnt,%rax
351         ret
352 .size   OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
353 ___
354 }
355
356 print<<___;
357 .globl  OPENSSL_ia32_rdrand
358 .type   OPENSSL_ia32_rdrand,\@abi-omnipotent
359 .align  16
360 OPENSSL_ia32_rdrand:
361         mov     \$8,%ecx
362 .Loop_rdrand:
363         rdrand  %rax
364         jc      .Lbreak_rdrand
365         loop    .Loop_rdrand
366 .Lbreak_rdrand:
367         cmp     \$0,%rax
368         cmove   %rcx,%rax
369         ret
370 .size   OPENSSL_ia32_rdrand,.-OPENSSL_ia32_rdrand
371 ___
372
373 close STDOUT;   # flush