Use correct extension and OSX detection.
[openssl.git] / crypto / x86_64cpuid.pl
1 #!/usr/bin/env perl
2
3 $flavour = shift;
4 $output  = shift;
5 if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
6
7 $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
8
9 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
10 open STDOUT,"| $^X ${dir}perlasm/x86_64-xlate.pl $flavour $output";
11
12 if ($win64)     { $arg1="%rcx"; $arg2="%rdx"; }
13 else            { $arg1="%rdi"; $arg2="%rsi"; }
14 print<<___;
15 .extern         OPENSSL_cpuid_setup
16 .section        .init
17         call    OPENSSL_cpuid_setup
18
19 .text
20
21 .globl  OPENSSL_atomic_add
22 .type   OPENSSL_atomic_add,\@abi-omnipotent
23 .align  16
24 OPENSSL_atomic_add:
25         movl    ($arg1),%eax
26 .Lspin: leaq    ($arg2,%rax),%r8
27         .byte   0xf0            # lock
28         cmpxchgl        %r8d,($arg1)
29         jne     .Lspin
30         movl    %r8d,%eax
31         .byte   0x48,0x98       # cltq/cdqe
32         ret
33 .size   OPENSSL_atomic_add,.-OPENSSL_atomic_add
34
35 .globl  OPENSSL_rdtsc
36 .type   OPENSSL_rdtsc,\@abi-omnipotent
37 .align  16
38 OPENSSL_rdtsc:
39         rdtsc
40         shl     \$32,%rdx
41         or      %rdx,%rax
42         ret
43 .size   OPENSSL_rdtsc,.-OPENSSL_rdtsc
44
45 .globl  OPENSSL_ia32_cpuid
46 .type   OPENSSL_ia32_cpuid,\@abi-omnipotent
47 .align  16
48 OPENSSL_ia32_cpuid:
49         mov     %rbx,%r8
50
51         xor     %eax,%eax
52         cpuid
53         mov     %eax,%r11d              # max value for standard query level
54
55         xor     %eax,%eax
56         cmp     \$0x756e6547,%ebx       # "Genu"
57         setne   %al
58         mov     %eax,%r9d
59         cmp     \$0x49656e69,%edx       # "ineI"
60         setne   %al
61         or      %eax,%r9d
62         cmp     \$0x6c65746e,%ecx       # "ntel"
63         setne   %al
64         or      %eax,%r9d               # 0 indicates Intel CPU
65         jz      .Lintel
66
67         cmp     \$0x68747541,%ebx       # "Auth"
68         setne   %al
69         mov     %eax,%r10d
70         cmp     \$0x69746E65,%edx       # "enti"
71         setne   %al
72         or      %eax,%r10d
73         cmp     \$0x444D4163,%ecx       # "cAMD"
74         setne   %al
75         or      %eax,%r10d              # 0 indicates AMD CPU
76         jnz     .Lintel
77
78         # AMD specific
79         mov     \$0x80000000,%eax
80         cpuid
81         cmp     \$0x80000008,%eax
82         jb      .Lintel
83
84         mov     \$0x80000008,%eax
85         cpuid
86         movzb   %cl,%r10                # number of cores - 1
87         inc     %r10                    # number of cores
88
89         mov     \$1,%eax
90         cpuid
91         bt      \$28,%edx               # test hyper-threading bit
92         jnc     .Ldone
93         shr     \$16,%ebx               # number of logical processors
94         cmp     %r10b,%bl
95         ja      .Ldone
96         and     \$0xefffffff,%edx       # ~(1<<28)
97         jmp     .Ldone
98
99 .Lintel:
100         cmp     \$4,%r11d
101         mov     \$-1,%r10d
102         jb      .Lnocacheinfo
103
104         mov     \$4,%eax
105         mov     \$0,%ecx                # query L1D
106         cpuid
107         mov     %eax,%r10d
108         shr     \$14,%r10d
109         and     \$0xfff,%r10d           # number of cores -1 per L1D
110
111 .Lnocacheinfo:
112         mov     \$1,%eax
113         cpuid
114         cmp     \$0,%r9d
115         jne     .Lnotintel
116         or      \$0x00100000,%edx       # use reserved 20th bit to engage RC4_CHAR
117         and     \$15,%ah
118         cmp     \$15,%ah                # examine Family ID
119         je      .Lnotintel
120         or      \$0x40000000,%edx       # use reserved bit to skip unrolled loop
121 .Lnotintel:
122         bt      \$28,%edx               # test hyper-threading bit
123         jnc     .Ldone
124         and     \$0xefffffff,%edx       # ~(1<<28)
125         cmp     \$0,%r10d
126         je      .Ldone
127
128         or      \$0x10000000,%edx       # 1<<28
129         shr     \$16,%ebx
130         cmp     \$1,%bl                 # see if cache is shared
131         ja      .Ldone
132         and     \$0xefffffff,%edx       # ~(1<<28)
133 .Ldone:
134         shl     \$32,%rcx
135         mov     %edx,%eax
136         mov     %r8,%rbx
137         or      %rcx,%rax
138         ret
139 .size   OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
140
141 .globl  OPENSSL_cleanse
142 .type   OPENSSL_cleanse,\@abi-omnipotent
143 .align  16
144 OPENSSL_cleanse:
145         xor     %rax,%rax
146         cmp     \$15,$arg2
147         jae     .Lot
148 .Little:
149         mov     %al,($arg1)
150         sub     \$1,$arg2
151         lea     1($arg1),$arg1
152         jnz     .Little
153         ret
154 .align  16
155 .Lot:
156         test    \$7,$arg1
157         jz      .Laligned
158         mov     %al,($arg1)
159         lea     -1($arg2),$arg2
160         lea     1($arg1),$arg1
161         jmp     .Lot
162 .Laligned:
163         mov     %rax,($arg1)
164         lea     -8($arg2),$arg2
165         test    \$-8,$arg2
166         lea     8($arg1),$arg1
167         jnz     .Laligned
168         cmp     \$0,$arg2
169         jne     .Little
170         ret
171 .size   OPENSSL_cleanse,.-OPENSSL_cleanse
172 ___
173
174 print<<___ if (!$win64);
175 .globl  OPENSSL_wipe_cpu
176 .type   OPENSSL_wipe_cpu,\@abi-omnipotent
177 .align  16
178 OPENSSL_wipe_cpu:
179         pxor    %xmm0,%xmm0
180         pxor    %xmm1,%xmm1
181         pxor    %xmm2,%xmm2
182         pxor    %xmm3,%xmm3
183         pxor    %xmm4,%xmm4
184         pxor    %xmm5,%xmm5
185         pxor    %xmm6,%xmm6
186         pxor    %xmm7,%xmm7
187         pxor    %xmm8,%xmm8
188         pxor    %xmm9,%xmm9
189         pxor    %xmm10,%xmm10
190         pxor    %xmm11,%xmm11
191         pxor    %xmm12,%xmm12
192         pxor    %xmm13,%xmm13
193         pxor    %xmm14,%xmm14
194         pxor    %xmm15,%xmm15
195         xorq    %rcx,%rcx
196         xorq    %rdx,%rdx
197         xorq    %rsi,%rsi
198         xorq    %rdi,%rdi
199         xorq    %r8,%r8
200         xorq    %r9,%r9
201         xorq    %r10,%r10
202         xorq    %r11,%r11
203         leaq    8(%rsp),%rax
204         ret
205 .size   OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
206 ___
207 print<<___ if ($win64);
208 .globl  OPENSSL_wipe_cpu
209 .type   OPENSSL_wipe_cpu,\@abi-omnipotent
210 .align  16
211 OPENSSL_wipe_cpu:
212         pxor    %xmm0,%xmm0
213         pxor    %xmm1,%xmm1
214         pxor    %xmm2,%xmm2
215         pxor    %xmm3,%xmm3
216         pxor    %xmm4,%xmm4
217         pxor    %xmm5,%xmm5
218         xorq    %rcx,%rcx
219         xorq    %rdx,%rdx
220         xorq    %r8,%r8
221         xorq    %r9,%r9
222         xorq    %r10,%r10
223         xorq    %r11,%r11
224         leaq    8(%rsp),%rax
225         ret
226 .size   OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
227 ___
228
229 close STDOUT;   # flush