EVP: Implement support for key downgrading in backends
[openssl.git] / crypto / x86cpuid.pl
1 #! /usr/bin/env perl
2 # Copyright 2004-2018 The OpenSSL Project Authors. All Rights Reserved.
3 #
4 # Licensed under the Apache License 2.0 (the "License").  You may not use
5 # this file except in compliance with the License.  You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
8
9 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
10 push(@INC, "${dir}perlasm", "perlasm");
11 require "x86asm.pl";
12
13 $output = pop and open STDOUT,">$output";
14
15 &asm_init($ARGV[0]);
16
17 for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
18
19 &function_begin("OPENSSL_ia32_cpuid");
20         &xor    ("edx","edx");
21         &pushf  ();
22         &pop    ("eax");
23         &mov    ("ecx","eax");
24         &xor    ("eax",1<<21);
25         &push   ("eax");
26         &popf   ();
27         &pushf  ();
28         &pop    ("eax");
29         &xor    ("ecx","eax");
30         &xor    ("eax","eax");
31         &mov    ("esi",&wparam(0));
32         &mov    (&DWP(8,"esi"),"eax");  # clear extended feature flags
33         &bt     ("ecx",21);
34         &jnc    (&label("nocpuid"));
35         &cpuid  ();
36         &mov    ("edi","eax");          # max value for standard query level
37
38         &xor    ("eax","eax");
39         &cmp    ("ebx",0x756e6547);     # "Genu"
40         &setne  (&LB("eax"));
41         &mov    ("ebp","eax");
42         &cmp    ("edx",0x49656e69);     # "ineI"
43         &setne  (&LB("eax"));
44         &or     ("ebp","eax");
45         &cmp    ("ecx",0x6c65746e);     # "ntel"
46         &setne  (&LB("eax"));
47         &or     ("ebp","eax");          # 0 indicates Intel CPU
48         &jz     (&label("intel"));
49
50         &cmp    ("ebx",0x68747541);     # "Auth"
51         &setne  (&LB("eax"));
52         &mov    ("esi","eax");
53         &cmp    ("edx",0x69746E65);     # "enti"
54         &setne  (&LB("eax"));
55         &or     ("esi","eax");
56         &cmp    ("ecx",0x444D4163);     # "cAMD"
57         &setne  (&LB("eax"));
58         &or     ("esi","eax");          # 0 indicates AMD CPU
59         &jnz    (&label("intel"));
60
61         # AMD specific
62         &mov    ("eax",0x80000000);
63         &cpuid  ();
64         &cmp    ("eax",0x80000001);
65         &jb     (&label("intel"));
66         &mov    ("esi","eax");
67         &mov    ("eax",0x80000001);
68         &cpuid  ();
69         &or     ("ebp","ecx");
70         &and    ("ebp",1<<11|1);        # isolate XOP bit
71         &cmp    ("esi",0x80000008);
72         &jb     (&label("intel"));
73
74         &mov    ("eax",0x80000008);
75         &cpuid  ();
76         &movz   ("esi",&LB("ecx"));     # number of cores - 1
77         &inc    ("esi");                # number of cores
78
79         &mov    ("eax",1);
80         &xor    ("ecx","ecx");
81         &cpuid  ();
82         &bt     ("edx",28);
83         &jnc    (&label("generic"));
84         &shr    ("ebx",16);
85         &and    ("ebx",0xff);
86         &cmp    ("ebx","esi");
87         &ja     (&label("generic"));
88         &and    ("edx",0xefffffff);     # clear hyper-threading bit
89         &jmp    (&label("generic"));
90
91 &set_label("intel");
92         &cmp    ("edi",4);
93         &mov    ("esi",-1);
94         &jb     (&label("nocacheinfo"));
95
96         &mov    ("eax",4);
97         &mov    ("ecx",0);              # query L1D
98         &cpuid  ();
99         &mov    ("esi","eax");
100         &shr    ("esi",14);
101         &and    ("esi",0xfff);          # number of cores -1 per L1D
102
103 &set_label("nocacheinfo");
104         &mov    ("eax",1);
105         &xor    ("ecx","ecx");
106         &cpuid  ();
107         &and    ("edx",0xbfefffff);     # force reserved bits #20, #30 to 0
108         &cmp    ("ebp",0);
109         &jne    (&label("notintel"));
110         &or     ("edx",1<<30);          # set reserved bit#30 on Intel CPUs
111         &and    (&HB("eax"),15);        # family ID
112         &cmp    (&HB("eax"),15);        # P4?
113         &jne    (&label("notintel"));
114         &or     ("edx",1<<20);          # set reserved bit#20 to engage RC4_CHAR
115 &set_label("notintel");
116         &bt     ("edx",28);             # test hyper-threading bit
117         &jnc    (&label("generic"));
118         &and    ("edx",0xefffffff);
119         &cmp    ("esi",0);
120         &je     (&label("generic"));
121
122         &or     ("edx",0x10000000);
123         &shr    ("ebx",16);
124         &cmp    (&LB("ebx"),1);
125         &ja     (&label("generic"));
126         &and    ("edx",0xefffffff);     # clear hyper-threading bit if not
127
128 &set_label("generic");
129         &and    ("ebp",1<<11);          # isolate AMD XOP flag
130         &and    ("ecx",0xfffff7ff);     # force 11th bit to 0
131         &mov    ("esi","edx");          # %ebp:%esi is copy of %ecx:%edx
132         &or     ("ebp","ecx");          # merge AMD XOP flag
133
134         &cmp    ("edi",7);
135         &mov    ("edi",&wparam(0));
136         &jb     (&label("no_extended_info"));
137         &mov    ("eax",7);
138         &xor    ("ecx","ecx");
139         &cpuid  ();
140         &mov    (&DWP(8,"edi"),"ebx");  # save extended feature flag
141 &set_label("no_extended_info");
142
143         &bt     ("ebp",27);             # check OSXSAVE bit
144         &jnc    (&label("clear_avx"));
145         &xor    ("ecx","ecx");
146         &data_byte(0x0f,0x01,0xd0);     # xgetbv
147         &and    ("eax",6);
148         &cmp    ("eax",6);
149         &je     (&label("done"));
150         &cmp    ("eax",2);
151         &je     (&label("clear_avx"));
152 &set_label("clear_xmm");
153         &and    ("ebp",0xfdfffffd);     # clear AESNI and PCLMULQDQ bits
154         &and    ("esi",0xfeffffff);     # clear FXSR
155 &set_label("clear_avx");
156         &and    ("ebp",0xefffe7ff);     # clear AVX, FMA and AMD XOP bits
157         &and    (&DWP(8,"edi"),0xffffffdf);     # clear AVX2
158 &set_label("done");
159         &mov    ("eax","esi");
160         &mov    ("edx","ebp");
161 &set_label("nocpuid");
162 &function_end("OPENSSL_ia32_cpuid");
163
164 &external_label("OPENSSL_ia32cap_P");
165
166 &function_begin_B("OPENSSL_rdtsc","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
167         &xor    ("eax","eax");
168         &xor    ("edx","edx");
169         &picmeup("ecx","OPENSSL_ia32cap_P");
170         &bt     (&DWP(0,"ecx"),4);
171         &jnc    (&label("notsc"));
172         &rdtsc  ();
173 &set_label("notsc");
174         &ret    ();
175 &function_end_B("OPENSSL_rdtsc");
176
177 # This works in Ring 0 only [read DJGPP+MS-DOS+privileged DPMI host],
178 # but it's safe to call it on any [supported] 32-bit platform...
179 # Just check for [non-]zero return value...
180 &function_begin_B("OPENSSL_instrument_halt","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
181         &picmeup("ecx","OPENSSL_ia32cap_P");
182         &bt     (&DWP(0,"ecx"),4);
183         &jnc    (&label("nohalt"));     # no TSC
184
185         &data_word(0x9058900e);         # push %cs; pop %eax
186         &and    ("eax",3);
187         &jnz    (&label("nohalt"));     # not enough privileges
188
189         &pushf  ();
190         &pop    ("eax");
191         &bt     ("eax",9);
192         &jnc    (&label("nohalt"));     # interrupts are disabled
193
194         &rdtsc  ();
195         &push   ("edx");
196         &push   ("eax");
197         &halt   ();
198         &rdtsc  ();
199
200         &sub    ("eax",&DWP(0,"esp"));
201         &sbb    ("edx",&DWP(4,"esp"));
202         &add    ("esp",8);
203         &ret    ();
204
205 &set_label("nohalt");
206         &xor    ("eax","eax");
207         &xor    ("edx","edx");
208         &ret    ();
209 &function_end_B("OPENSSL_instrument_halt");
210
211 # Essentially there is only one use for this function. Under DJGPP:
212 #
213 #       #include <go32.h>
214 #       ...
215 #       i=OPENSSL_far_spin(_dos_ds,0x46c);
216 #       ...
217 # to obtain the number of spins till closest timer interrupt.
218
219 &function_begin_B("OPENSSL_far_spin");
220         &pushf  ();
221         &pop    ("eax");
222         &bt     ("eax",9);
223         &jnc    (&label("nospin"));     # interrupts are disabled
224
225         &mov    ("eax",&DWP(4,"esp"));
226         &mov    ("ecx",&DWP(8,"esp"));
227         &data_word (0x90d88e1e);        # push %ds, mov %eax,%ds
228         &xor    ("eax","eax");
229         &mov    ("edx",&DWP(0,"ecx"));
230         &jmp    (&label("spin"));
231
232         &align  (16);
233 &set_label("spin");
234         &inc    ("eax");
235         &cmp    ("edx",&DWP(0,"ecx"));
236         &je     (&label("spin"));
237
238         &data_word (0x1f909090);        # pop   %ds
239         &ret    ();
240
241 &set_label("nospin");
242         &xor    ("eax","eax");
243         &xor    ("edx","edx");
244         &ret    ();
245 &function_end_B("OPENSSL_far_spin");
246
247 &function_begin_B("OPENSSL_wipe_cpu","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
248         &xor    ("eax","eax");
249         &xor    ("edx","edx");
250         &picmeup("ecx","OPENSSL_ia32cap_P");
251         &mov    ("ecx",&DWP(0,"ecx"));
252         &bt     (&DWP(0,"ecx"),1);
253         &jnc    (&label("no_x87"));
254         if ($sse2) {
255                 &and    ("ecx",1<<26|1<<24);    # check SSE2 and FXSR bits
256                 &cmp    ("ecx",1<<26|1<<24);
257                 &jne    (&label("no_sse2"));
258                 &pxor   ("xmm0","xmm0");
259                 &pxor   ("xmm1","xmm1");
260                 &pxor   ("xmm2","xmm2");
261                 &pxor   ("xmm3","xmm3");
262                 &pxor   ("xmm4","xmm4");
263                 &pxor   ("xmm5","xmm5");
264                 &pxor   ("xmm6","xmm6");
265                 &pxor   ("xmm7","xmm7");
266         &set_label("no_sse2");
267         }
268         # just a bunch of fldz to zap the fp/mm bank followed by finit...
269         &data_word(0xeed9eed9,0xeed9eed9,0xeed9eed9,0xeed9eed9,0x90e3db9b);
270 &set_label("no_x87");
271         &lea    ("eax",&DWP(4,"esp"));
272         &ret    ();
273 &function_end_B("OPENSSL_wipe_cpu");
274
275 &function_begin_B("OPENSSL_atomic_add");
276         &mov    ("edx",&DWP(4,"esp"));  # fetch the pointer, 1st arg
277         &mov    ("ecx",&DWP(8,"esp"));  # fetch the increment, 2nd arg
278         &push   ("ebx");
279         &nop    ();
280         &mov    ("eax",&DWP(0,"edx"));
281 &set_label("spin");
282         &lea    ("ebx",&DWP(0,"eax","ecx"));
283         &nop    ();
284         &data_word(0x1ab10ff0); # lock; cmpxchg %ebx,(%edx)     # %eax is involved and is always reloaded
285         &jne    (&label("spin"));
286         &mov    ("eax","ebx");  # OpenSSL expects the new value
287         &pop    ("ebx");
288         &ret    ();
289 &function_end_B("OPENSSL_atomic_add");
290
291 &function_begin_B("OPENSSL_cleanse");
292         &mov    ("edx",&wparam(0));
293         &mov    ("ecx",&wparam(1));
294         &xor    ("eax","eax");
295         &cmp    ("ecx",7);
296         &jae    (&label("lot"));
297         &cmp    ("ecx",0);
298         &je     (&label("ret"));
299 &set_label("little");
300         &mov    (&BP(0,"edx"),"al");
301         &sub    ("ecx",1);
302         &lea    ("edx",&DWP(1,"edx"));
303         &jnz    (&label("little"));
304 &set_label("ret");
305         &ret    ();
306
307 &set_label("lot",16);
308         &test   ("edx",3);
309         &jz     (&label("aligned"));
310         &mov    (&BP(0,"edx"),"al");
311         &lea    ("ecx",&DWP(-1,"ecx"));
312         &lea    ("edx",&DWP(1,"edx"));
313         &jmp    (&label("lot"));
314 &set_label("aligned");
315         &mov    (&DWP(0,"edx"),"eax");
316         &lea    ("ecx",&DWP(-4,"ecx"));
317         &test   ("ecx",-4);
318         &lea    ("edx",&DWP(4,"edx"));
319         &jnz    (&label("aligned"));
320         &cmp    ("ecx",0);
321         &jne    (&label("little"));
322         &ret    ();
323 &function_end_B("OPENSSL_cleanse");
324
325 &function_begin_B("CRYPTO_memcmp");
326         &push   ("esi");
327         &push   ("edi");
328         &mov    ("esi",&wparam(0));
329         &mov    ("edi",&wparam(1));
330         &mov    ("ecx",&wparam(2));
331         &xor    ("eax","eax");
332         &xor    ("edx","edx");
333         &cmp    ("ecx",0);
334         &je     (&label("no_data"));
335 &set_label("loop");
336         &mov    ("dl",&BP(0,"esi"));
337         &lea    ("esi",&DWP(1,"esi"));
338         &xor    ("dl",&BP(0,"edi"));
339         &lea    ("edi",&DWP(1,"edi"));
340         &or     ("al","dl");
341         &dec    ("ecx");
342         &jnz    (&label("loop"));
343         &neg    ("eax");
344         &shr    ("eax",31);
345 &set_label("no_data");
346         &pop    ("edi");
347         &pop    ("esi");
348         &ret    ();
349 &function_end_B("CRYPTO_memcmp");
350 {
351 my $lasttick = "esi";
352 my $lastdiff = "ebx";
353 my $out = "edi";
354 my $cnt = "ecx";
355 my $max = "ebp";
356
357 &function_begin("OPENSSL_instrument_bus");
358     &mov        ("eax",0);
359     if ($sse2) {
360         &picmeup("edx","OPENSSL_ia32cap_P");
361         &bt     (&DWP(0,"edx"),4);
362         &jnc    (&label("nogo"));       # no TSC
363         &bt     (&DWP(0,"edx"),19);
364         &jnc    (&label("nogo"));       # no CLFLUSH
365
366         &mov    ($out,&wparam(0));      # load arguments
367         &mov    ($cnt,&wparam(1));
368
369         # collect 1st tick
370         &rdtsc  ();
371         &mov    ($lasttick,"eax");      # lasttick = tick
372         &mov    ($lastdiff,0);          # lastdiff = 0
373         &clflush(&DWP(0,$out));
374         &data_byte(0xf0);               # lock
375         &add    (&DWP(0,$out),$lastdiff);
376         &jmp    (&label("loop"));
377
378 &set_label("loop",16);
379         &rdtsc  ();
380         &mov    ("edx","eax");          # put aside tick (yes, I neglect edx)
381         &sub    ("eax",$lasttick);      # diff
382         &mov    ($lasttick,"edx");      # lasttick = tick
383         &mov    ($lastdiff,"eax");      # lastdiff = diff
384         &clflush(&DWP(0,$out));
385         &data_byte(0xf0);               # lock
386         &add    (&DWP(0,$out),"eax");   # accumulate diff
387         &lea    ($out,&DWP(4,$out));    # ++$out
388         &sub    ($cnt,1);               # --$cnt
389         &jnz    (&label("loop"));
390
391         &mov    ("eax",&wparam(1));
392 &set_label("nogo");
393     }
394 &function_end("OPENSSL_instrument_bus");
395
396 &function_begin("OPENSSL_instrument_bus2");
397     &mov        ("eax",0);
398     if ($sse2) {
399         &picmeup("edx","OPENSSL_ia32cap_P");
400         &bt     (&DWP(0,"edx"),4);
401         &jnc    (&label("nogo"));       # no TSC
402         &bt     (&DWP(0,"edx"),19);
403         &jnc    (&label("nogo"));       # no CLFLUSH
404
405         &mov    ($out,&wparam(0));      # load arguments
406         &mov    ($cnt,&wparam(1));
407         &mov    ($max,&wparam(2));
408
409         &rdtsc  ();                     # collect 1st tick
410         &mov    ($lasttick,"eax");      # lasttick = tick
411         &mov    ($lastdiff,0);          # lastdiff = 0
412
413         &clflush(&DWP(0,$out));
414         &data_byte(0xf0);               # lock
415         &add    (&DWP(0,$out),$lastdiff);
416
417         &rdtsc  ();                     # collect 1st diff
418         &mov    ("edx","eax");          # put aside tick (yes, I neglect edx)
419         &sub    ("eax",$lasttick);      # diff
420         &mov    ($lasttick,"edx");      # lasttick = tick
421         &mov    ($lastdiff,"eax");      # lastdiff = diff
422         &jmp    (&label("loop2"));
423
424 &set_label("loop2",16);
425         &clflush(&DWP(0,$out));
426         &data_byte(0xf0);               # lock
427         &add    (&DWP(0,$out),"eax");   # accumulate diff
428
429         &sub    ($max,1);
430         &jz     (&label("done2"));
431
432         &rdtsc  ();
433         &mov    ("edx","eax");          # put aside tick (yes, I neglect edx)
434         &sub    ("eax",$lasttick);      # diff
435         &mov    ($lasttick,"edx");      # lasttick = tick
436         &cmp    ("eax",$lastdiff);
437         &mov    ($lastdiff,"eax");      # lastdiff = diff
438         &mov    ("edx",0);
439         &setne  ("dl");
440         &sub    ($cnt,"edx");           # conditional --$cnt
441         &lea    ($out,&DWP(0,$out,"edx",4));    # conditional ++$out
442         &jnz    (&label("loop2"));
443
444 &set_label("done2");
445         &mov    ("eax",&wparam(1));
446         &sub    ("eax",$cnt);
447 &set_label("nogo");
448     }
449 &function_end("OPENSSL_instrument_bus2");
450 }
451
452 sub gen_random {
453 my $rdop = shift;
454 &function_begin_B("OPENSSL_ia32_${rdop}_bytes");
455         &push   ("edi");
456         &push   ("ebx");
457         &xor    ("eax","eax");          # return value
458         &mov    ("edi",&wparam(0));
459         &mov    ("ebx",&wparam(1));
460
461         &cmp    ("ebx",0);
462         &je     (&label("done"));
463
464         &mov    ("ecx",8);
465 &set_label("loop");
466         &${rdop}("edx");
467         &jc     (&label("break"));
468         &loop   (&label("loop"));
469         &jmp    (&label("done"));
470
471 &set_label("break",16);
472         &cmp    ("ebx",4);
473         &jb     (&label("tail"));
474         &mov    (&DWP(0,"edi"),"edx");
475         &lea    ("edi",&DWP(4,"edi"));
476         &add    ("eax",4);
477         &sub    ("ebx",4);
478         &jz     (&label("done"));
479         &mov    ("ecx",8);
480         &jmp    (&label("loop"));
481
482 &set_label("tail",16);
483         &mov    (&BP(0,"edi"),"dl");
484         &lea    ("edi",&DWP(1,"edi"));
485         &inc    ("eax");
486         &shr    ("edx",8);
487         &dec    ("ebx");
488         &jnz    (&label("tail"));
489
490 &set_label("done");
491         &xor    ("edx","edx");          # Clear random value from registers
492         &pop    ("ebx");
493         &pop    ("edi");
494         &ret    ();
495 &function_end_B("OPENSSL_ia32_${rdop}_bytes");
496 }
497 &gen_random("rdrand");
498 &gen_random("rdseed");
499
500 &initseg("OPENSSL_cpuid_setup");
501
502 &hidden("OPENSSL_cpuid_setup");
503 &hidden("OPENSSL_ia32cap_P");
504
505 &asm_finish();
506
507 close STDOUT or die "error closing STDOUT: $!";