72010c9cbe020c018ce1795158210184ca3f6e36
[openssl.git] / crypto / x86_64cpuid.pl
1 #!/usr/bin/env perl
2
3 $flavour = shift;
4 $output  = shift;
5 if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
6
7 $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
8
9 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
10 open STDOUT,"| $^X ${dir}perlasm/x86_64-xlate.pl $flavour $output";
11
12 if ($win64)     { $arg1="%rcx"; $arg2="%rdx"; }
13 else            { $arg1="%rdi"; $arg2="%rsi"; }
14 print<<___;
15 .extern         OPENSSL_cpuid_setup
16 .section        .init
17         call    OPENSSL_cpuid_setup
18
19 ___
20
21 if ($flavour =~ /elf/) {
22   print ".section .note.GNU-stack,\"\",\@progbits\n"
23 }
24
25 print<<___;
26 .text
27
28 .globl  OPENSSL_atomic_add
29 .type   OPENSSL_atomic_add,\@abi-omnipotent
30 .align  16
31 OPENSSL_atomic_add:
32         movl    ($arg1),%eax
33 .Lspin: leaq    ($arg2,%rax),%r8
34         .byte   0xf0            # lock
35         cmpxchgl        %r8d,($arg1)
36         jne     .Lspin
37         movl    %r8d,%eax
38         .byte   0x48,0x98       # cltq/cdqe
39         ret
40 .size   OPENSSL_atomic_add,.-OPENSSL_atomic_add
41
42 .globl  OPENSSL_rdtsc
43 .type   OPENSSL_rdtsc,\@abi-omnipotent
44 .align  16
45 OPENSSL_rdtsc:
46         rdtsc
47         shl     \$32,%rdx
48         or      %rdx,%rax
49         ret
50 .size   OPENSSL_rdtsc,.-OPENSSL_rdtsc
51
52 .globl  OPENSSL_ia32_cpuid
53 .type   OPENSSL_ia32_cpuid,\@abi-omnipotent
54 .align  16
55 OPENSSL_ia32_cpuid:
56         mov     %rbx,%r8
57
58         xor     %eax,%eax
59         cpuid
60         mov     %eax,%r11d              # max value for standard query level
61
62         xor     %eax,%eax
63         cmp     \$0x756e6547,%ebx       # "Genu"
64         setne   %al
65         mov     %eax,%r9d
66         cmp     \$0x49656e69,%edx       # "ineI"
67         setne   %al
68         or      %eax,%r9d
69         cmp     \$0x6c65746e,%ecx       # "ntel"
70         setne   %al
71         or      %eax,%r9d               # 0 indicates Intel CPU
72         jz      .Lintel
73
74         cmp     \$0x68747541,%ebx       # "Auth"
75         setne   %al
76         mov     %eax,%r10d
77         cmp     \$0x69746E65,%edx       # "enti"
78         setne   %al
79         or      %eax,%r10d
80         cmp     \$0x444D4163,%ecx       # "cAMD"
81         setne   %al
82         or      %eax,%r10d              # 0 indicates AMD CPU
83         jnz     .Lintel
84
85         # AMD specific
86         mov     \$0x80000000,%eax
87         cpuid
88         cmp     \$0x80000008,%eax
89         jb      .Lintel
90
91         mov     \$0x80000008,%eax
92         cpuid
93         movzb   %cl,%r10                # number of cores - 1
94         inc     %r10                    # number of cores
95
96         mov     \$1,%eax
97         cpuid
98         bt      \$28,%edx               # test hyper-threading bit
99         jnc     .Ldone
100         shr     \$16,%ebx               # number of logical processors
101         cmp     %r10b,%bl
102         ja      .Ldone
103         and     \$0xefffffff,%edx       # ~(1<<28)
104         jmp     .Ldone
105
106 .Lintel:
107         cmp     \$4,%r11d
108         mov     \$-1,%r10d
109         jb      .Lnocacheinfo
110
111         mov     \$4,%eax
112         mov     \$0,%ecx                # query L1D
113         cpuid
114         mov     %eax,%r10d
115         shr     \$14,%r10d
116         and     \$0xfff,%r10d           # number of cores -1 per L1D
117
118 .Lnocacheinfo:
119         mov     \$1,%eax
120         cpuid
121         cmp     \$0,%r9d
122         jne     .Lnotintel
123         or      \$0x00100000,%edx       # use reserved 20th bit to engage RC4_CHAR
124         and     \$15,%ah
125         cmp     \$15,%ah                # examine Family ID
126         je      .Lnotintel
127         or      \$0x40000000,%edx       # use reserved bit to skip unrolled loop
128 .Lnotintel:
129         bt      \$28,%edx               # test hyper-threading bit
130         jnc     .Ldone
131         and     \$0xefffffff,%edx       # ~(1<<28)
132         cmp     \$0,%r10d
133         je      .Ldone
134
135         or      \$0x10000000,%edx       # 1<<28
136         shr     \$16,%ebx
137         cmp     \$1,%bl                 # see if cache is shared
138         ja      .Ldone
139         and     \$0xefffffff,%edx       # ~(1<<28)
140 .Ldone:
141         shl     \$32,%rcx
142         mov     %edx,%eax
143         mov     %r8,%rbx
144         or      %rcx,%rax
145         ret
146 .size   OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
147
148 .globl  OPENSSL_cleanse
149 .type   OPENSSL_cleanse,\@abi-omnipotent
150 .align  16
151 OPENSSL_cleanse:
152         xor     %rax,%rax
153         cmp     \$15,$arg2
154         jae     .Lot
155         cmp     \$0,$arg2
156         je      .Lret
157 .Little:
158         mov     %al,($arg1)
159         sub     \$1,$arg2
160         lea     1($arg1),$arg1
161         jnz     .Little
162 .Lret:
163         ret
164 .align  16
165 .Lot:
166         test    \$7,$arg1
167         jz      .Laligned
168         mov     %al,($arg1)
169         lea     -1($arg2),$arg2
170         lea     1($arg1),$arg1
171         jmp     .Lot
172 .Laligned:
173         mov     %rax,($arg1)
174         lea     -8($arg2),$arg2
175         test    \$-8,$arg2
176         lea     8($arg1),$arg1
177         jnz     .Laligned
178         cmp     \$0,$arg2
179         jne     .Little
180         ret
181 .size   OPENSSL_cleanse,.-OPENSSL_cleanse
182 ___
183
184 print<<___ if (!$win64);
185 .globl  OPENSSL_wipe_cpu
186 .type   OPENSSL_wipe_cpu,\@abi-omnipotent
187 .align  16
188 OPENSSL_wipe_cpu:
189         pxor    %xmm0,%xmm0
190         pxor    %xmm1,%xmm1
191         pxor    %xmm2,%xmm2
192         pxor    %xmm3,%xmm3
193         pxor    %xmm4,%xmm4
194         pxor    %xmm5,%xmm5
195         pxor    %xmm6,%xmm6
196         pxor    %xmm7,%xmm7
197         pxor    %xmm8,%xmm8
198         pxor    %xmm9,%xmm9
199         pxor    %xmm10,%xmm10
200         pxor    %xmm11,%xmm11
201         pxor    %xmm12,%xmm12
202         pxor    %xmm13,%xmm13
203         pxor    %xmm14,%xmm14
204         pxor    %xmm15,%xmm15
205         xorq    %rcx,%rcx
206         xorq    %rdx,%rdx
207         xorq    %rsi,%rsi
208         xorq    %rdi,%rdi
209         xorq    %r8,%r8
210         xorq    %r9,%r9
211         xorq    %r10,%r10
212         xorq    %r11,%r11
213         leaq    8(%rsp),%rax
214         ret
215 .size   OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
216 ___
217 print<<___ if ($win64);
218 .globl  OPENSSL_wipe_cpu
219 .type   OPENSSL_wipe_cpu,\@abi-omnipotent
220 .align  16
221 OPENSSL_wipe_cpu:
222         pxor    %xmm0,%xmm0
223         pxor    %xmm1,%xmm1
224         pxor    %xmm2,%xmm2
225         pxor    %xmm3,%xmm3
226         pxor    %xmm4,%xmm4
227         pxor    %xmm5,%xmm5
228         xorq    %rcx,%rcx
229         xorq    %rdx,%rdx
230         xorq    %r8,%r8
231         xorq    %r9,%r9
232         xorq    %r10,%r10
233         xorq    %r11,%r11
234         leaq    8(%rsp),%rax
235         ret
236 .size   OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
237 ___
238
239 close STDOUT;   # flush