Update x86cpuid.pl to correctly detect shared cache and to support new
[openssl.git] / crypto / x86cpuid.pl
1 #!/usr/bin/env perl
2
3 push(@INC,"perlasm");
4 require "x86asm.pl";
5
6 &asm_init($ARGV[0],"x86cpuid");
7
8 for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
9
10 &function_begin("OPENSSL_ia32_cpuid");
11         &xor    ("edx","edx");
12         &pushf  ();
13         &pop    ("eax");
14         &mov    ("ecx","eax");
15         &xor    ("eax",1<<21);
16         &push   ("eax");
17         &popf   ();
18         &pushf  ();
19         &pop    ("eax");
20         &xor    ("ecx","eax");
21         &bt     ("ecx",21);
22         &jnc    (&label("nocpuid"));
23         &xor    ("eax","eax");
24         &cpuid  ();
25         &xor    ("eax","eax");
26         &cmp    ("ebx",0x756e6547);     # "Genu"
27         &setne  (&LB("eax"));
28         &mov    ("ebp","eax");
29         &cmp    ("edx",0x49656e69);     # "ineI"
30         &setne  (&LB("eax"));
31         &or     ("ebp","eax");
32         &cmp    ("ecx",0x6c65746e);     # "ntel"
33         &setne  (&LB("eax"));
34         &or     ("ebp","eax");
35         &mov    ("eax",1);
36         &cpuid  ();
37         &bt     ("edx",28);             # test hyper-threading bit
38         &jnc    (&label("nocpuid"));
39         &cmp    ("ebp",0);
40         &jne    (&label("notintel"));
41         &or     ("edx",1<<20);          # use reserved bit to engage RC4_CHAR
42 &set_label("notintel");
43         &shr    ("ebx",16);
44         &cmp    (&LB("ebx"),1);         # see if cache is shared(*)
45         &ja     (&label("nocpuid"));
46         &and    ("edx",~(1<<28));       # clear hyper-threading bit if not
47 &set_label("nocpuid");
48         &mov    ("eax","edx");
49         &mov    ("edx","ecx");
50 &function_end("OPENSSL_ia32_cpuid");
51 # (*)   on Core2 this value is set to 2 denoting the fact that L2
52 #       cache is shared between cores.
53
54 &external_label("OPENSSL_ia32cap_P");
55
56 &function_begin_B("OPENSSL_rdtsc","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
57         &xor    ("eax","eax");
58         &xor    ("edx","edx");
59         &picmeup("ecx","OPENSSL_ia32cap_P");
60         &bt     (&DWP(0,"ecx"),4);
61         &jnc    (&label("notsc"));
62         &rdtsc  ();
63 &set_label("notsc");
64         &ret    ();
65 &function_end_B("OPENSSL_rdtsc");
66
67 # This works in Ring 0 only [read DJGPP+MS-DOS+privileged DPMI host],
68 # but it's safe to call it on any [supported] 32-bit platform...
69 # Just check for [non-]zero return value...
70 &function_begin_B("OPENSSL_instrument_halt","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
71         &picmeup("ecx","OPENSSL_ia32cap_P");
72         &bt     (&DWP(0,"ecx"),4);
73         &jnc    (&label("nohalt"));     # no TSC
74
75         &data_word(0x9058900e);         # push %cs; pop %eax
76         &and    ("eax",3);
77         &jnz    (&label("nohalt"));     # not enough privileges
78
79         &pushf  ();
80         &pop    ("eax")
81         &bt     ("eax",9);
82         &jnc    (&label("nohalt"));     # interrupts are disabled
83
84         &rdtsc  ();
85         &push   ("edx");
86         &push   ("eax");
87         &halt   ();
88         &rdtsc  ();
89
90         &sub    ("eax",&DWP(0,"esp"));
91         &sbb    ("edx",&DWP(4,"esp"));
92         &add    ("esp",8);
93         &ret    ();
94
95 &set_label("nohalt");
96         &xor    ("eax","eax");
97         &xor    ("edx","edx");
98         &ret    ();
99 &function_end_B("OPENSSL_instrument_halt");
100
101 # Essentially there is only one use for this function. Under DJGPP:
102 #
103 #       #include <go32.h>
104 #       ...
105 #       i=OPENSSL_far_spin(_dos_ds,0x46c);
106 #       ...
107 # to obtain the number of spins till closest timer interrupt.
108
109 &function_begin_B("OPENSSL_far_spin");
110         &pushf  ();
111         &pop    ("eax")
112         &bt     ("eax",9);
113         &jnc    (&label("nospin"));     # interrupts are disabled
114
115         &mov    ("eax",&DWP(4,"esp"));
116         &mov    ("ecx",&DWP(8,"esp"));
117         &data_word (0x90d88e1e);        # push %ds, mov %eax,%ds
118         &xor    ("eax","eax");
119         &mov    ("edx",&DWP(0,"ecx"));
120         &jmp    (&label("spin"));
121
122         &align  (16);
123 &set_label("spin");
124         &inc    ("eax");
125         &cmp    ("edx",&DWP(0,"ecx"));
126         &je     (&label("spin"));
127
128         &data_word (0x1f909090);        # pop   %ds
129         &ret    ();
130
131 &set_label("nospin");
132         &xor    ("eax","eax");
133         &xor    ("edx","edx");
134         &ret    ();
135 &function_end_B("OPENSSL_far_spin");
136
137 &function_begin_B("OPENSSL_wipe_cpu","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
138         &xor    ("eax","eax");
139         &xor    ("edx","edx");
140         &picmeup("ecx","OPENSSL_ia32cap_P");
141         &mov    ("ecx",&DWP(0,"ecx"));
142         &bt     (&DWP(0,"ecx"),1);
143         &jnc    (&label("no_x87"));
144         if ($sse2) {
145                 &bt     (&DWP(0,"ecx"),26);
146                 &jnc    (&label("no_sse2"));
147                 &pxor   ("xmm0","xmm0");
148                 &pxor   ("xmm1","xmm1");
149                 &pxor   ("xmm2","xmm2");
150                 &pxor   ("xmm3","xmm3");
151                 &pxor   ("xmm4","xmm4");
152                 &pxor   ("xmm5","xmm5");
153                 &pxor   ("xmm6","xmm6");
154                 &pxor   ("xmm7","xmm7");
155         &set_label("no_sse2");
156         }
157         # just a bunch of fldz to zap the fp/mm bank followed by finit...
158         &data_word(0xeed9eed9,0xeed9eed9,0xeed9eed9,0xeed9eed9,0x90e3db9b);
159 &set_label("no_x87");
160         &lea    ("eax",&DWP(4,"esp"));
161         &ret    ();
162 &function_end_B("OPENSSL_wipe_cpu");
163
164 &function_begin_B("OPENSSL_atomic_add");
165         &mov    ("edx",&DWP(4,"esp"));  # fetch the pointer, 1st arg
166         &mov    ("ecx",&DWP(8,"esp"));  # fetch the increment, 2nd arg
167         &push   ("ebx");
168         &nop    ();
169         &mov    ("eax",&DWP(0,"edx"));
170 &set_label("spin");
171         &lea    ("ebx",&DWP(0,"eax","ecx"));
172         &nop    ();
173         &data_word(0x1ab10ff0); # lock; cmpxchg %ebx,(%edx)     # %eax is envolved and is always reloaded
174         &jne    (&label("spin"));
175         &mov    ("eax","ebx");  # OpenSSL expects the new value
176         &pop    ("ebx");
177         &ret    ();
178 &function_end_B("OPENSSL_atomic_add");
179
180 # This function can become handy under Win32 in situations when
181 # we don't know which calling convention, __stdcall or __cdecl(*),
182 # indirect callee is using. In C it can be deployed as
183 #
184 #ifdef OPENSSL_CPUID_OBJ
185 #       type OPENSSL_indirect_call(void *f,...);
186 #       ...
187 #       OPENSSL_indirect_call(func,[up to $max arguments]);
188 #endif
189 #
190 # (*)   it's designed to work even for __fastcall if number of
191 #       arguments is 1 or 2!
192 &function_begin_B("OPENSSL_indirect_call");
193         {
194         my $i,$max=7;           # $max has to be chosen as 4*n-1
195                                 # in order to preserve eventual
196                                 # stack alignment
197         &push   ("ebp");
198         &mov    ("ebp","esp");
199         &sub    ("esp",$max*4);
200         &mov    ("ecx",&DWP(12,"ebp"));
201         &mov    (&DWP(0,"esp"),"ecx");
202         &mov    ("edx",&DWP(16,"ebp"));
203         &mov    (&DWP(4,"esp"),"edx");
204         for($i=2;$i<$max;$i++)
205                 {
206                 # Some copies will be redundant/bogus...
207                 &mov    ("eax",&DWP(12+$i*4,"ebp"));
208                 &mov    (&DWP(0+$i*4,"esp"),"eax");
209                 }
210         &call_ptr       (&DWP(8,"ebp"));# make the call...
211         &mov    ("esp","ebp");  # ... and just restore the stack pointer
212                                 # without paying attention to what we called,
213                                 # (__cdecl *func) or (__stdcall *one).
214         &pop    ("ebp");
215         &ret    ();
216         }
217 &function_end_B("OPENSSL_indirect_call");
218
219 &initseg("OPENSSL_cpuid_setup");
220
221 &asm_finish();