PPC: Try out if mftb works before using it
[openssl.git] / crypto / ppccpuid.pl
1 #! /usr/bin/env perl
2 # Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
3 #
4 # Licensed under the Apache License 2.0 (the "License").  You may not use
5 # this file except in compliance with the License.  You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
8
9
10 $flavour = shift;
11
12 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
13 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
14 ( $xlate="${dir}perlasm/ppc-xlate.pl" and -f $xlate) or
15 die "can't locate ppc-xlate.pl";
16
17 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
18
19 if ($flavour=~/64/) {
20     $CMPLI="cmpldi";
21     $SHRLI="srdi";
22     $SIGNX="extsw";
23 } else {
24     $CMPLI="cmplwi";
25     $SHRLI="srwi";
26     $SIGNX="mr";
27 }
28
29 $code=<<___;
30 .machine        "any"
31 .text
32
33 .globl  .OPENSSL_fpu_probe
34 .align  4
35 .OPENSSL_fpu_probe:
36         fmr     f0,f0
37         blr
38         .long   0
39         .byte   0,12,0x14,0,0,0,0,0
40 .size   .OPENSSL_fpu_probe,.-.OPENSSL_fpu_probe
41 .globl  .OPENSSL_ppc64_probe
42 .align  4
43 .OPENSSL_ppc64_probe:
44         fcfid   f1,f1
45         extrdi  r0,r0,32,0
46         blr
47         .long   0
48         .byte   0,12,0x14,0,0,0,0,0
49 .size   .OPENSSL_ppc64_probe,.-.OPENSSL_ppc64_probe
50
51 .globl  .OPENSSL_altivec_probe
52 .align  4
53 .OPENSSL_altivec_probe:
54         .long   0x10000484      # vor   v0,v0,v0
55         blr
56         .long   0
57         .byte   0,12,0x14,0,0,0,0,0
58 .size   .OPENSSL_altivec_probe,.-..OPENSSL_altivec_probe
59
60 .globl  .OPENSSL_crypto207_probe
61 .align  4
62 .OPENSSL_crypto207_probe:
63         lvx_u   v0,0,r1
64         vcipher v0,v0,v0
65         blr
66         .long   0
67         .byte   0,12,0x14,0,0,0,0,0
68 .size   .OPENSSL_crypto207_probe,.-.OPENSSL_crypto207_probe
69
70 .globl  .OPENSSL_madd300_probe
71 .align  4
72 .OPENSSL_madd300_probe:
73         xor     r0,r0,r0
74         maddld  r3,r0,r0,r0
75         maddhdu r3,r0,r0,r0
76         blr
77         .long   0
78         .byte   0,12,0x14,0,0,0,0,0
79
80 .globl  .OPENSSL_wipe_cpu
81 .align  4
82 .OPENSSL_wipe_cpu:
83         xor     r0,r0,r0
84         fmr     f0,f31
85         fmr     f1,f31
86         fmr     f2,f31
87         mr      r3,r1
88         fmr     f3,f31
89         xor     r4,r4,r4
90         fmr     f4,f31
91         xor     r5,r5,r5
92         fmr     f5,f31
93         xor     r6,r6,r6
94         fmr     f6,f31
95         xor     r7,r7,r7
96         fmr     f7,f31
97         xor     r8,r8,r8
98         fmr     f8,f31
99         xor     r9,r9,r9
100         fmr     f9,f31
101         xor     r10,r10,r10
102         fmr     f10,f31
103         xor     r11,r11,r11
104         fmr     f11,f31
105         xor     r12,r12,r12
106         fmr     f12,f31
107         fmr     f13,f31
108         blr
109         .long   0
110         .byte   0,12,0x14,0,0,0,0,0
111 .size   .OPENSSL_wipe_cpu,.-.OPENSSL_wipe_cpu
112
113 .globl  .OPENSSL_atomic_add
114 .align  4
115 .OPENSSL_atomic_add:
116 Ladd:   lwarx   r5,0,r3
117         add     r0,r4,r5
118         stwcx.  r0,0,r3
119         bne-    Ladd
120         $SIGNX  r3,r0
121         blr
122         .long   0
123         .byte   0,12,0x14,0,0,0,2,0
124         .long   0
125 .size   .OPENSSL_atomic_add,.-.OPENSSL_atomic_add
126
127 .globl  .OPENSSL_rdtsc_mftb
128 .align  4
129 .OPENSSL_rdtsc_mftb:
130         mftb    r3
131         blr
132         .long   0
133         .byte   0,12,0x14,0,0,0,0,0
134 .size   .OPENSSL_rdtsc_mftb,.-.OPENSSL_rdtsc_mftb
135
136 .globl  .OPENSSL_rdtsc_mfspr268
137 .align  4
138 .OPENSSL_rdtsc_mfspr268:
139         mfspr   r3,268
140         blr
141         .long   0
142         .byte   0,12,0x14,0,0,0,0,0
143 .size   .OPENSSL_rdtsc_mfspr268,.-.OPENSSL_rdtsc_mfspr268
144
145 .globl  .OPENSSL_cleanse
146 .align  4
147 .OPENSSL_cleanse:
148         $CMPLI  r4,7
149         li      r0,0
150         bge     Lot
151         $CMPLI  r4,0
152         beqlr-
153 Little: mtctr   r4
154         stb     r0,0(r3)
155         addi    r3,r3,1
156         bdnz    \$-8
157         blr
158 Lot:    andi.   r5,r3,3
159         beq     Laligned
160         stb     r0,0(r3)
161         subi    r4,r4,1
162         addi    r3,r3,1
163         b       Lot
164 Laligned:
165         $SHRLI  r5,r4,2
166         mtctr   r5
167         stw     r0,0(r3)
168         addi    r3,r3,4
169         bdnz    \$-8
170         andi.   r4,r4,3
171         bne     Little
172         blr
173         .long   0
174         .byte   0,12,0x14,0,0,0,2,0
175         .long   0
176 .size   .OPENSSL_cleanse,.-.OPENSSL_cleanse
177
178 globl   .CRYPTO_memcmp
179 .align  4
180 .CRYPTO_memcmp:
181         $CMPLI  r5,0
182         li      r0,0
183         beq     Lno_data
184         mtctr   r5
185 Loop_cmp:
186         lbz     r6,0(r3)
187         addi    r3,r3,1
188         lbz     r7,0(r4)
189         addi    r4,r4,1
190         xor     r6,r6,r7
191         or      r0,r0,r6
192         bdnz    Loop_cmp
193
194 Lno_data:
195         li      r3,0
196         sub     r3,r3,r0
197         extrwi  r3,r3,1,0
198         blr
199         .long   0
200         .byte   0,12,0x14,0,0,0,3,0
201         .long   0
202 .size   .CRYPTO_memcmp,.-.CRYPTO_memcmp
203 ___
204 {
205 my ($out,$cnt,$max)=("r3","r4","r5");
206 my ($tick,$lasttick)=("r6","r7");
207 my ($diff,$lastdiff)=("r8","r9");
208
209 $code.=<<___;
210 .globl  .OPENSSL_instrument_bus_mftb
211 .align  4
212 .OPENSSL_instrument_bus_mftb:
213         mtctr   $cnt
214
215         mftb    $lasttick               # collect 1st tick
216         li      $diff,0
217
218         dcbf    0,$out                  # flush cache line
219         lwarx   $tick,0,$out            # load and lock
220         add     $tick,$tick,$diff
221         stwcx.  $tick,0,$out
222         stwx    $tick,0,$out
223
224 Loop:   mftb    $tick
225         sub     $diff,$tick,$lasttick
226         mr      $lasttick,$tick
227         dcbf    0,$out                  # flush cache line
228         lwarx   $tick,0,$out            # load and lock
229         add     $tick,$tick,$diff
230         stwcx.  $tick,0,$out
231         stwx    $tick,0,$out
232         addi    $out,$out,4             # ++$out
233         bdnz    Loop
234
235         mr      r3,$cnt
236         blr
237         .long   0
238         .byte   0,12,0x14,0,0,0,2,0
239         .long   0
240 .size   .OPENSSL_instrument_bus_mftb,.-.OPENSSL_instrument_bus_mftb
241
242 .globl  .OPENSSL_instrument_bus2_mftb
243 .align  4
244 .OPENSSL_instrument_bus2_mftb:
245         mr      r0,$cnt
246         slwi    $cnt,$cnt,2
247
248         mftb    $lasttick               # collect 1st tick
249         li      $diff,0
250
251         dcbf    0,$out                  # flush cache line
252         lwarx   $tick,0,$out            # load and lock
253         add     $tick,$tick,$diff
254         stwcx.  $tick,0,$out
255         stwx    $tick,0,$out
256
257         mftb    $tick                   # collect 1st diff
258         sub     $diff,$tick,$lasttick
259         mr      $lasttick,$tick
260         mr      $lastdiff,$diff
261 Loop2:
262         dcbf    0,$out                  # flush cache line
263         lwarx   $tick,0,$out            # load and lock
264         add     $tick,$tick,$diff
265         stwcx.  $tick,0,$out
266         stwx    $tick,0,$out
267
268         addic.  $max,$max,-1
269         beq     Ldone2
270
271         mftb    $tick
272         sub     $diff,$tick,$lasttick
273         mr      $lasttick,$tick
274         cmplw   7,$diff,$lastdiff
275         mr      $lastdiff,$diff
276
277         mfcr    $tick                   # pull cr
278         not     $tick,$tick             # flip bits
279         rlwinm  $tick,$tick,1,29,29     # isolate flipped eq bit and scale
280
281         sub.    $cnt,$cnt,$tick         # conditional --$cnt
282         add     $out,$out,$tick         # conditional ++$out
283         bne     Loop2
284
285 Ldone2:
286         srwi    $cnt,$cnt,2
287         sub     r3,r0,$cnt
288         blr
289         .long   0
290         .byte   0,12,0x14,0,0,0,3,0
291         .long   0
292 .size   .OPENSSL_instrument_bus2_mftb,.-.OPENSSL_instrument_bus2_mftb
293
294 .globl  .OPENSSL_instrument_bus_mfspr268
295 .align  4
296 .OPENSSL_instrument_bus_mfspr268:
297         mtctr   $cnt
298
299         mfspr   $lasttick,268           # collect 1st tick
300         li      $diff,0
301
302         dcbf    0,$out                  # flush cache line
303         lwarx   $tick,0,$out            # load and lock
304         add     $tick,$tick,$diff
305         stwcx.  $tick,0,$out
306         stwx    $tick,0,$out
307
308 Loop3:  mfspr   $tick,268
309         sub     $diff,$tick,$lasttick
310         mr      $lasttick,$tick
311         dcbf    0,$out                  # flush cache line
312         lwarx   $tick,0,$out            # load and lock
313         add     $tick,$tick,$diff
314         stwcx.  $tick,0,$out
315         stwx    $tick,0,$out
316         addi    $out,$out,4             # ++$out
317         bdnz    Loop3
318
319         mr      r3,$cnt
320         blr
321         .long   0
322         .byte   0,12,0x14,0,0,0,2,0
323         .long   0
324 .size   .OPENSSL_instrument_bus_mfspr268,.-.OPENSSL_instrument_bus_mfspr268
325
326 .globl  .OPENSSL_instrument_bus2_mfspr268
327 .align  4
328 .OPENSSL_instrument_bus2_mfspr268:
329         mr      r0,$cnt
330         slwi    $cnt,$cnt,2
331
332         mfspr   $lasttick,268           # collect 1st tick
333         li      $diff,0
334
335         dcbf    0,$out                  # flush cache line
336         lwarx   $tick,0,$out            # load and lock
337         add     $tick,$tick,$diff
338         stwcx.  $tick,0,$out
339         stwx    $tick,0,$out
340
341         mfspr   $tick,268               # collect 1st diff
342         sub     $diff,$tick,$lasttick
343         mr      $lasttick,$tick
344         mr      $lastdiff,$diff
345 Loop4:
346         dcbf    0,$out                  # flush cache line
347         lwarx   $tick,0,$out            # load and lock
348         add     $tick,$tick,$diff
349         stwcx.  $tick,0,$out
350         stwx    $tick,0,$out
351
352         addic.  $max,$max,-1
353         beq     Ldone4
354
355         mfspr   $tick,268
356         sub     $diff,$tick,$lasttick
357         mr      $lasttick,$tick
358         cmplw   7,$diff,$lastdiff
359         mr      $lastdiff,$diff
360
361         mfcr    $tick                   # pull cr
362         not     $tick,$tick             # flip bits
363         rlwinm  $tick,$tick,1,29,29     # isolate flipped eq bit and scale
364
365         sub.    $cnt,$cnt,$tick         # conditional --$cnt
366         add     $out,$out,$tick         # conditional ++$out
367         bne     Loop4
368
369 Ldone4:
370         srwi    $cnt,$cnt,2
371         sub     r3,r0,$cnt
372         blr
373         .long   0
374         .byte   0,12,0x14,0,0,0,3,0
375         .long   0
376 .size   .OPENSSL_instrument_bus2_mfspr268,.-.OPENSSL_instrument_bus2_mfspr268
377 ___
378 }
379
380 $code =~ s/\`([^\`]*)\`/eval $1/gem;
381 print $code;
382 close STDOUT;