Initial do_fips.bat build script for WIN32 fipscanister.
[openssl.git] / crypto / x86_64cpuid.pl
1 #!/usr/bin/env perl
2
3 $flavour = shift;
4 $output  = shift;
5 if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
6
7 $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
8
9 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
10 open STDOUT,"| $^X ${dir}perlasm/x86_64-xlate.pl $flavour $output";
11
12 ($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order
13                                  ("%rdi","%rsi","%rdx","%rcx"); # Unix order
14
15 print<<___;
16 .extern         OPENSSL_cpuid_setup
17 .section        .init
18         call    OPENSSL_cpuid_setup
19
20 .text
21
22 .globl  OPENSSL_atomic_add
23 .type   OPENSSL_atomic_add,\@abi-omnipotent
24 .align  16
25 OPENSSL_atomic_add:
26         movl    ($arg1),%eax
27 .Lspin: leaq    ($arg2,%rax),%r8
28         .byte   0xf0            # lock
29         cmpxchgl        %r8d,($arg1)
30         jne     .Lspin
31         movl    %r8d,%eax
32         .byte   0x48,0x98       # cltq/cdqe
33         ret
34 .size   OPENSSL_atomic_add,.-OPENSSL_atomic_add
35
36 .globl  OPENSSL_rdtsc
37 .type   OPENSSL_rdtsc,\@abi-omnipotent
38 .align  16
39 OPENSSL_rdtsc:
40         rdtsc
41         shl     \$32,%rdx
42         or      %rdx,%rax
43         ret
44 .size   OPENSSL_rdtsc,.-OPENSSL_rdtsc
45
46 .globl  OPENSSL_ia32_cpuid
47 .type   OPENSSL_ia32_cpuid,\@abi-omnipotent
48 .align  16
49 OPENSSL_ia32_cpuid:
50         mov     %rbx,%r8
51
52         xor     %eax,%eax
53         cpuid
54         mov     %eax,%r11d              # max value for standard query level
55
56         xor     %eax,%eax
57         cmp     \$0x756e6547,%ebx       # "Genu"
58         setne   %al
59         mov     %eax,%r9d
60         cmp     \$0x49656e69,%edx       # "ineI"
61         setne   %al
62         or      %eax,%r9d
63         cmp     \$0x6c65746e,%ecx       # "ntel"
64         setne   %al
65         or      %eax,%r9d               # 0 indicates Intel CPU
66         jz      .Lintel
67
68         cmp     \$0x68747541,%ebx       # "Auth"
69         setne   %al
70         mov     %eax,%r10d
71         cmp     \$0x69746E65,%edx       # "enti"
72         setne   %al
73         or      %eax,%r10d
74         cmp     \$0x444D4163,%ecx       # "cAMD"
75         setne   %al
76         or      %eax,%r10d              # 0 indicates AMD CPU
77         jnz     .Lintel
78
79         # AMD specific
80         mov     \$0x80000000,%eax
81         cpuid
82         cmp     \$0x80000008,%eax
83         jb      .Lintel
84
85         mov     \$0x80000008,%eax
86         cpuid
87         movzb   %cl,%r10                # number of cores - 1
88         inc     %r10                    # number of cores
89
90         mov     \$1,%eax
91         cpuid
92         bt      \$28,%edx               # test hyper-threading bit
93         jnc     .Ldone
94         shr     \$16,%ebx               # number of logical processors
95         cmp     %r10b,%bl
96         ja      .Ldone
97         and     \$0xefffffff,%edx       # ~(1<<28)
98         jmp     .Ldone
99
100 .Lintel:
101         cmp     \$4,%r11d
102         mov     \$-1,%r10d
103         jb      .Lnocacheinfo
104
105         mov     \$4,%eax
106         mov     \$0,%ecx                # query L1D
107         cpuid
108         mov     %eax,%r10d
109         shr     \$14,%r10d
110         and     \$0xfff,%r10d           # number of cores -1 per L1D
111
112 .Lnocacheinfo:
113         mov     \$1,%eax
114         cpuid
115         cmp     \$0,%r9d
116         jne     .Lnotintel
117         or      \$0x00100000,%edx       # use reserved 20th bit to engage RC4_CHAR
118         and     \$15,%ah
119         cmp     \$15,%ah                # examine Family ID
120         je      .Lnotintel
121         or      \$0x40000000,%edx       # use reserved bit to skip unrolled loop
122 .Lnotintel:
123         bt      \$28,%edx               # test hyper-threading bit
124         jnc     .Ldone
125         and     \$0xefffffff,%edx       # ~(1<<28)
126         cmp     \$0,%r10d
127         je      .Ldone
128
129         or      \$0x10000000,%edx       # 1<<28
130         shr     \$16,%ebx
131         cmp     \$1,%bl                 # see if cache is shared
132         ja      .Ldone
133         and     \$0xefffffff,%edx       # ~(1<<28)
134 .Ldone:
135         shl     \$32,%rcx
136         mov     %edx,%eax
137         mov     %r8,%rbx
138         or      %rcx,%rax
139         ret
140 .size   OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
141
142 .globl  OPENSSL_cleanse
143 .type   OPENSSL_cleanse,\@abi-omnipotent
144 .align  16
145 OPENSSL_cleanse:
146         xor     %rax,%rax
147         cmp     \$15,$arg2
148         jae     .Lot
149         cmp     \$0,$arg2
150         je      .Lret
151 .Little:
152         mov     %al,($arg1)
153         sub     \$1,$arg2
154         lea     1($arg1),$arg1
155         jnz     .Little
156 .Lret:
157         ret
158 .align  16
159 .Lot:
160         test    \$7,$arg1
161         jz      .Laligned
162         mov     %al,($arg1)
163         lea     -1($arg2),$arg2
164         lea     1($arg1),$arg1
165         jmp     .Lot
166 .Laligned:
167         mov     %rax,($arg1)
168         lea     -8($arg2),$arg2
169         test    \$-8,$arg2
170         lea     8($arg1),$arg1
171         jnz     .Laligned
172         cmp     \$0,$arg2
173         jne     .Little
174         ret
175 .size   OPENSSL_cleanse,.-OPENSSL_cleanse
176 ___
177
178 print<<___ if (!$win64);
179 .globl  OPENSSL_wipe_cpu
180 .type   OPENSSL_wipe_cpu,\@abi-omnipotent
181 .align  16
182 OPENSSL_wipe_cpu:
183         pxor    %xmm0,%xmm0
184         pxor    %xmm1,%xmm1
185         pxor    %xmm2,%xmm2
186         pxor    %xmm3,%xmm3
187         pxor    %xmm4,%xmm4
188         pxor    %xmm5,%xmm5
189         pxor    %xmm6,%xmm6
190         pxor    %xmm7,%xmm7
191         pxor    %xmm8,%xmm8
192         pxor    %xmm9,%xmm9
193         pxor    %xmm10,%xmm10
194         pxor    %xmm11,%xmm11
195         pxor    %xmm12,%xmm12
196         pxor    %xmm13,%xmm13
197         pxor    %xmm14,%xmm14
198         pxor    %xmm15,%xmm15
199         xorq    %rcx,%rcx
200         xorq    %rdx,%rdx
201         xorq    %rsi,%rsi
202         xorq    %rdi,%rdi
203         xorq    %r8,%r8
204         xorq    %r9,%r9
205         xorq    %r10,%r10
206         xorq    %r11,%r11
207         leaq    8(%rsp),%rax
208         ret
209 .size   OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
210 ___
211 print<<___ if ($win64);
212 .globl  OPENSSL_wipe_cpu
213 .type   OPENSSL_wipe_cpu,\@abi-omnipotent
214 .align  16
215 OPENSSL_wipe_cpu:
216         pxor    %xmm0,%xmm0
217         pxor    %xmm1,%xmm1
218         pxor    %xmm2,%xmm2
219         pxor    %xmm3,%xmm3
220         pxor    %xmm4,%xmm4
221         pxor    %xmm5,%xmm5
222         xorq    %rcx,%rcx
223         xorq    %rdx,%rdx
224         xorq    %r8,%r8
225         xorq    %r9,%r9
226         xorq    %r10,%r10
227         xorq    %r11,%r11
228         leaq    8(%rsp),%rax
229         ret
230 .size   OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
231 ___
232 {
233 my $out="%r10";
234 my $cnt="%rcx";
235 my $max="%r11";
236 my $lasttick="%r8d";
237 my $lastdiff="%r9d";
238 my $redzone=win64?8:-8;
239
240 print<<___;
241 .globl  OPENSSL_instrument_bus
242 .type   OPENSSL_instrument_bus,\@abi-omnipotent
243 .align  16
244 OPENSSL_instrument_bus:
245         mov     $arg1,$out      # tribute to Win64
246         mov     $arg2,$cnt
247         mov     $arg2,$max
248
249         rdtsc                   # collect 1st tick
250         mov     %eax,$lasttick  # lasttick = tick
251         mov     \$0,$lastdiff   # lastdiff = 0
252         clflush ($out)
253         lock
254         add     $lastdiff,($out)
255         jmp     .Loop
256 .align  16
257 .Loop:  rdtsc
258         mov     %eax,%edx
259         sub     $lasttick,%eax
260         mov     %edx,$lasttick
261         mov     %eax,$lastdiff
262         clflush ($out)
263         lock
264         add     %eax,($out)
265         lea     4($out),$out
266         sub     \$1,$cnt
267         jnz     .Loop
268
269         mov     $max,%rax
270         ret
271 .size   OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
272
273 .globl  OPENSSL_instrument_bus2
274 .type   OPENSSL_instrument_bus2,\@abi-omnipotent
275 .align  16
276 OPENSSL_instrument_bus2:
277         mov     $arg1,$out      # tribute to Win64
278         mov     $arg2,$cnt
279         mov     $arg3,$max
280         mov     $cnt,$redzone(%rsp)
281
282         rdtsc                   # collect 1st tick
283         mov     %eax,$lasttick  # lasttick = tick
284         mov     \$0,$lastdiff   # lastdiff = 0
285
286         clflush ($out)
287         lock
288         add     $lastdiff,($out)
289
290         rdtsc                   # collect 1st diff
291         mov     %eax,%edx
292         sub     $lasttick,%eax  # diff
293         mov     %edx,$lasttick  # lasttick = tick
294         mov     %eax,$lastdiff  # lastdiff = diff
295 .Loop2:
296         clflush ($out)
297         lock
298         add     %eax,($out)     # accumulate diff
299
300         sub     \$1,$max
301         jz      .Ldone2
302
303         rdtsc
304         mov     %eax,%edx
305         sub     $lasttick,%eax  # diff
306         mov     %edx,$lasttick  # lasttick = tick
307         cmp     $lastdiff,%eax
308         mov     %eax,$lastdiff  # lastdiff = diff
309         mov     \$0,%edx
310         setne   %dl
311         sub     %rdx,$cnt       # conditional --$cnt
312         lea     ($out,%rdx,4),$out      # conditional ++$out
313         jnz     .Loop2
314
315 .Ldone2:
316         mov     $redzone(%rsp),%rax
317         sub     $cnt,%rax
318         ret
319 .size   OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
320 ___
321 }
322
323 close STDOUT;   # flush