x86_64cpuid.pl: ml64 is allergic to db on label line.
[openssl.git] / crypto / x86_64cpuid.pl
1 #!/usr/bin/env perl
2
3 $flavour = shift;
4 $output  = shift;
5 if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
6
7 $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
8
9 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
10 open STDOUT,"| $^X ${dir}perlasm/x86_64-xlate.pl $flavour $output";
11
12 if ($win64)     { $arg1="%rcx"; $arg2="%rdx"; }
13 else            { $arg1="%rdi"; $arg2="%rsi"; }
14 print<<___;
15 .extern         OPENSSL_cpuid_setup
16 .section        .init
17         call    OPENSSL_cpuid_setup
18
19 .text
20
21 .globl  OPENSSL_atomic_add
22 .type   OPENSSL_atomic_add,\@abi-omnipotent
23 .align  16
24 OPENSSL_atomic_add:
25         movl    ($arg1),%eax
26 .Lspin: leaq    ($arg2,%rax),%r8
27         .byte   0xf0            # lock
28         cmpxchgl        %r8d,($arg1)
29         jne     .Lspin
30         movl    %r8d,%eax
31         .byte   0x48,0x98       # cltq/cdqe
32         ret
33 .size   OPENSSL_atomic_add,.-OPENSSL_atomic_add
34
35 .globl  OPENSSL_rdtsc
36 .type   OPENSSL_rdtsc,\@abi-omnipotent
37 .align  16
38 OPENSSL_rdtsc:
39         rdtsc
40         shl     \$32,%rdx
41         or      %rdx,%rax
42         ret
43 .size   OPENSSL_rdtsc,.-OPENSSL_rdtsc
44
45 .globl  OPENSSL_ia32_cpuid
46 .type   OPENSSL_ia32_cpuid,\@abi-omnipotent
47 .align  16
48 OPENSSL_ia32_cpuid:
49         mov     %rbx,%r8
50
51         xor     %eax,%eax
52         cpuid
53         mov     %eax,%r11d              # max value for standard query level
54
55         xor     %eax,%eax
56         cmp     \$0x756e6547,%ebx       # "Genu"
57         setne   %al
58         mov     %eax,%r9d
59         cmp     \$0x49656e69,%edx       # "ineI"
60         setne   %al
61         or      %eax,%r9d
62         cmp     \$0x6c65746e,%ecx       # "ntel"
63         setne   %al
64         or      %eax,%r9d               # 0 indicates Intel CPU
65         jz      .Lintel
66
67         cmp     \$0x68747541,%ebx       # "Auth"
68         setne   %al
69         mov     %eax,%r10d
70         cmp     \$0x69746E65,%edx       # "enti"
71         setne   %al
72         or      %eax,%r10d
73         cmp     \$0x444D4163,%ecx       # "cAMD"
74         setne   %al
75         or      %eax,%r10d              # 0 indicates AMD CPU
76         jnz     .Lintel
77
78         # AMD specific
79         mov     \$0x80000000,%eax
80         cpuid
81         cmp     \$0x80000008,%eax
82         jb      .Lintel
83
84         mov     \$0x80000008,%eax
85         cpuid
86         movzb   %cl,%r10                # number of cores - 1
87         inc     %r10                    # number of cores
88
89         mov     \$1,%eax
90         cpuid
91         bt      \$28,%edx               # test hyper-threading bit
92         jnc     .Ldone
93         shr     \$16,%ebx               # number of logical processors
94         cmp     %r10b,%bl
95         ja      .Ldone
96         and     \$0xefffffff,%edx       # ~(1<<28)
97         jmp     .Ldone
98
99 .Lintel:
100         cmp     \$4,%r11d
101         mov     \$-1,%r10d
102         jb      .Lnocacheinfo
103
104         mov     \$4,%eax
105         mov     \$0,%ecx                # query L1D
106         cpuid
107         mov     %eax,%r10d
108         shr     \$14,%r10d
109         and     \$0xfff,%r10d           # number of cores -1 per L1D
110
111 .Lnocacheinfo:
112         mov     \$1,%eax
113         cpuid
114         cmp     \$0,%r9d
115         jne     .Lnotintel
116         or      \$0x00100000,%edx       # use reserved 20th bit to engage RC4_CHAR
117         and     \$15,%ah
118         cmp     \$15,%ah                # examine Family ID
119         je      .Lnotintel
120         or      \$0x40000000,%edx       # use reserved bit to skip unrolled loop
121 .Lnotintel:
122         bt      \$28,%edx               # test hyper-threading bit
123         jnc     .Ldone
124         and     \$0xefffffff,%edx       # ~(1<<28)
125         cmp     \$0,%r10d
126         je      .Ldone
127
128         or      \$0x10000000,%edx       # 1<<28
129         shr     \$16,%ebx
130         cmp     \$1,%bl                 # see if cache is shared
131         ja      .Ldone
132         and     \$0xefffffff,%edx       # ~(1<<28)
133 .Ldone:
134         shl     \$32,%rcx
135         mov     %edx,%eax
136         mov     %r8,%rbx
137         or      %rcx,%rax
138         ret
139 .size   OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
140
141 .globl  OPENSSL_cleanse
142 .type   OPENSSL_cleanse,\@abi-omnipotent
143 .align  16
144 OPENSSL_cleanse:
145         xor     %rax,%rax
146         cmp     \$15,$arg2
147         jae     .Lot
148         cmp     \$0,$arg2
149         je      .Lret
150 .Little:
151         mov     %al,($arg1)
152         sub     \$1,$arg2
153         lea     1($arg1),$arg1
154         jnz     .Little
155 .Lret:
156         ret
157 .align  16
158 .Lot:
159         test    \$7,$arg1
160         jz      .Laligned
161         mov     %al,($arg1)
162         lea     -1($arg2),$arg2
163         lea     1($arg1),$arg1
164         jmp     .Lot
165 .Laligned:
166         mov     %rax,($arg1)
167         lea     -8($arg2),$arg2
168         test    \$-8,$arg2
169         lea     8($arg1),$arg1
170         jnz     .Laligned
171         cmp     \$0,$arg2
172         jne     .Little
173         ret
174 .size   OPENSSL_cleanse,.-OPENSSL_cleanse
175 ___
176
177 print<<___ if (!$win64);
178 .globl  OPENSSL_wipe_cpu
179 .type   OPENSSL_wipe_cpu,\@abi-omnipotent
180 .align  16
181 OPENSSL_wipe_cpu:
182         pxor    %xmm0,%xmm0
183         pxor    %xmm1,%xmm1
184         pxor    %xmm2,%xmm2
185         pxor    %xmm3,%xmm3
186         pxor    %xmm4,%xmm4
187         pxor    %xmm5,%xmm5
188         pxor    %xmm6,%xmm6
189         pxor    %xmm7,%xmm7
190         pxor    %xmm8,%xmm8
191         pxor    %xmm9,%xmm9
192         pxor    %xmm10,%xmm10
193         pxor    %xmm11,%xmm11
194         pxor    %xmm12,%xmm12
195         pxor    %xmm13,%xmm13
196         pxor    %xmm14,%xmm14
197         pxor    %xmm15,%xmm15
198         xorq    %rcx,%rcx
199         xorq    %rdx,%rdx
200         xorq    %rsi,%rsi
201         xorq    %rdi,%rdi
202         xorq    %r8,%r8
203         xorq    %r9,%r9
204         xorq    %r10,%r10
205         xorq    %r11,%r11
206         leaq    8(%rsp),%rax
207         ret
208 .size   OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
209 ___
210 print<<___ if ($win64);
211 .globl  OPENSSL_wipe_cpu
212 .type   OPENSSL_wipe_cpu,\@abi-omnipotent
213 .align  16
214 OPENSSL_wipe_cpu:
215         pxor    %xmm0,%xmm0
216         pxor    %xmm1,%xmm1
217         pxor    %xmm2,%xmm2
218         pxor    %xmm3,%xmm3
219         pxor    %xmm4,%xmm4
220         pxor    %xmm5,%xmm5
221         xorq    %rcx,%rcx
222         xorq    %rdx,%rdx
223         xorq    %r8,%r8
224         xorq    %r9,%r9
225         xorq    %r10,%r10
226         xorq    %r11,%r11
227         leaq    8(%rsp),%rax
228         ret
229 .size   OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
230 ___
231
232 close STDOUT;   # flush