Leave the decision to call/implement bn_sqr_mont to assembler developer.
[openssl.git] / crypto / x86cpuid.pl
1 #!/usr/bin/env perl
2
3 push(@INC,"perlasm");
4 require "x86asm.pl";
5
6 &asm_init($ARGV[0],"x86cpuid");
7
8 for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
9
10 &function_begin("OPENSSL_ia32_cpuid");
11         &xor    ("edx","edx");
12         &pushf  ();
13         &pop    ("eax");
14         &mov    ("ecx","eax");
15         &xor    ("eax",1<<21);
16         &push   ("eax");
17         &popf   ();
18         &pushf  ();
19         &pop    ("eax");
20         &xor    ("ecx","eax");
21         &bt     ("ecx",21);
22         &jnc    (&label("nocpuid"));
23         &mov    ("eax",1);
24         &cpuid  ();
25 &set_label("nocpuid");
26         &mov    ("eax","edx");
27         &mov    ("edx","ecx");
28 &function_end("OPENSSL_ia32_cpuid");
29
30 &external_label("OPENSSL_ia32cap_P");
31
32 &function_begin_B("OPENSSL_rdtsc","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
33         &xor    ("eax","eax");
34         &xor    ("edx","edx");
35         &picmeup("ecx","OPENSSL_ia32cap_P");
36         &bt     (&DWP(0,"ecx"),4);
37         &jnc    (&label("notsc"));
38         &rdtsc  ();
39 &set_label("notsc");
40         &ret    ();
41 &function_end_B("OPENSSL_rdtsc");
42
43 # This works in Ring 0 only [read DJGPP+MS-DOS+privileged DPMI host],
44 # but it's safe to call it on any [supported] 32-bit platform...
45 # Just check for [non-]zero return value...
46 &function_begin_B("OPENSSL_instrument_halt","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
47         &picmeup("ecx","OPENSSL_ia32cap_P");
48         &bt     (&DWP(0,"ecx"),4);
49         &jnc    (&label("nohalt"));     # no TSC
50
51         &data_word(0x9058900e);         # push %cs; pop %eax
52         &and    ("eax",3);
53         &jnz    (&label("nohalt"));     # not enough privileges
54
55         &pushf  ();
56         &pop    ("eax")
57         &bt     ("eax",9);
58         &jnc    (&label("nohalt"));     # interrupts are disabled
59
60         &rdtsc  ();
61         &push   ("edx");
62         &push   ("eax");
63         &halt   ();
64         &rdtsc  ();
65
66         &sub    ("eax",&DWP(0,"esp"));
67         &sbb    ("edx",&DWP(4,"esp"));
68         &add    ("esp",8);
69         &ret    ();
70
71 &set_label("nohalt");
72         &xor    ("eax","eax");
73         &xor    ("edx","edx");
74         &ret    ();
75 &function_end_B("OPENSSL_instrument_halt");
76
77 # Essentially there is only one use for this function. Under DJGPP:
78 #
79 #       #include <go32.h>
80 #       ...
81 #       i=OPENSSL_far_spin(_dos_ds,0x46c);
82 #       ...
83 # to obtain the number of spins till closest timer interrupt.
84
85 &function_begin_B("OPENSSL_far_spin");
86         &pushf  ();
87         &pop    ("eax")
88         &bt     ("eax",9);
89         &jnc    (&label("nospin"));     # interrupts are disabled
90
91         &mov    ("eax",&DWP(4,"esp"));
92         &mov    ("ecx",&DWP(8,"esp"));
93         &data_word (0x90d88e1e);        # push %ds, mov %eax,%ds
94         &xor    ("eax","eax");
95         &mov    ("edx",&DWP(0,"ecx"));
96         &jmp    (&label("spin"));
97
98         &align  (16);
99 &set_label("spin");
100         &inc    ("eax");
101         &cmp    ("edx",&DWP(0,"ecx"));
102         &je     (&label("spin"));
103
104         &data_word (0x1f909090);        # pop   %ds
105         &ret    ();
106
107 &set_label("nospin");
108         &xor    ("eax","eax");
109         &xor    ("edx","edx");
110         &ret    ();
111 &function_end_B("OPENSSL_far_spin");
112
113 &function_begin_B("OPENSSL_wipe_cpu","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
114         &xor    ("eax","eax");
115         &xor    ("edx","edx");
116         &picmeup("ecx","OPENSSL_ia32cap_P");
117         &mov    ("ecx",&DWP(0,"ecx"));
118         &bt     (&DWP(0,"ecx"),1);
119         &jnc    (&label("no_x87"));
120         if ($sse2) {
121                 &bt     (&DWP(0,"ecx"),26);
122                 &jnc    (&label("no_sse2"));
123                 &pxor   ("xmm0","xmm0");
124                 &pxor   ("xmm1","xmm1");
125                 &pxor   ("xmm2","xmm2");
126                 &pxor   ("xmm3","xmm3");
127                 &pxor   ("xmm4","xmm4");
128                 &pxor   ("xmm5","xmm5");
129                 &pxor   ("xmm6","xmm6");
130                 &pxor   ("xmm7","xmm7");
131         &set_label("no_sse2");
132         }
133         # just a bunch of fldz to zap the fp/mm bank followed by finit...
134         &data_word(0xeed9eed9,0xeed9eed9,0xeed9eed9,0xeed9eed9,0x90e3db9b);
135 &set_label("no_x87");
136         &lea    ("eax",&DWP(4,"esp"));
137         &ret    ();
138 &function_end_B("OPENSSL_wipe_cpu");
139
140 &function_begin_B("OPENSSL_atomic_add");
141         &mov    ("edx",&DWP(4,"esp"));  # fetch the pointer, 1st arg
142         &mov    ("ecx",&DWP(8,"esp"));  # fetch the increment, 2nd arg
143         &push   ("ebx");
144         &nop    ();
145         &mov    ("eax",&DWP(0,"edx"));
146 &set_label("spin");
147         &lea    ("ebx",&DWP(0,"eax","ecx"));
148         &nop    ();
149         &data_word(0x1ab10ff0); # lock; cmpxchg %ebx,(%edx)     # %eax is envolved and is always reloaded
150         &jne    (&label("spin"));
151         &mov    ("eax","ebx");  # OpenSSL expects the new value
152         &pop    ("ebx");
153         &ret    ();
154 &function_end_B("OPENSSL_atomic_add");
155
156 &initseg("OPENSSL_cpuid_setup");
157
158 &asm_finish();