make update
[openssl.git] / crypto / armv4cpuid.S
1 #include "arm_arch.h"
2
3 .text
4 .code   32
5
6 @ Special note about using .byte directives to encode instructions.
7 @ Initial reason for hand-coding instructions was to allow module to
8 @ be compilable by legacy tool-chains. At later point it was pointed
9 @ out that since ARMv7, instructions are always encoded in little-endian
10 @ order, therefore one has to opt for endian-neutral presentation.
11 @ Contemporary tool-chains offer .inst directive for this purpose,
12 @ but not legacy ones. Therefore .byte. But there is an exception,
13 @ namely ARMv7-R profile still allows for big-endian encoding even for
14 @ instructions. This raises the question what if probe instructions
15 @ appear executable to such processor operating in big-endian order?
16 @ They have to be chosen in a way that avoids this problem. As failed
17 @ NEON probe disables a number of other probes we have to ensure that
18 @ only NEON probe instruction doesn't appear executable in big-endian
19 @ order, therefore 'vorr q8,q8,q8', and not some other register. The
20 @ only probe that is not bypassed on failed NEON probe is _armv7_tick,
21 @ where you'll spot 'mov r0,r6' that serves this purpose. Basic idea is
22 @ that if fetched in alternative byte oder instruction should crash to
23 @ denote lack of probed capability...
24
25 .align  5
26 .global _armv7_neon_probe
27 .type   _armv7_neon_probe,%function
28 _armv7_neon_probe:
29         .byte   0xf0,0x01,0x60,0xf2     @ vorr  q8,q8,q8
30         .byte   0x1e,0xff,0x2f,0xe1     @ bx    lr
31 .size   _armv7_neon_probe,.-_armv7_neon_probe
32
33 .global _armv7_tick
34 .type   _armv7_tick,%function
35 _armv7_tick:
36         .byte   0x06,0x00,0xa0,0xe1     @ mov   r0,r6
37         .byte   0x1e,0x0f,0x51,0xec     @ mrrc  p15,1,r0,r1,c14 @ CNTVCT
38         .byte   0x1e,0xff,0x2f,0xe1     @ bx    lr
39         nop
40 .size   _armv7_tick,.-_armv7_tick
41
42 .global _armv8_aes_probe
43 .type   _armv8_aes_probe,%function
44 _armv8_aes_probe:
45         .byte   0x00,0x03,0xb0,0xf3     @ aese.8        q0,q0
46         .byte   0x1e,0xff,0x2f,0xe1     @ bx    lr
47 .size   _armv8_aes_probe,.-_armv8_aes_probe
48
49 .global _armv8_sha1_probe
50 .type   _armv8_sha1_probe,%function
51 _armv8_sha1_probe:
52         .byte   0x40,0x0c,0x00,0xf2     @ sha1c.32      q0,q0,q0
53         .byte   0x1e,0xff,0x2f,0xe1     @ bx    lr
54 .size   _armv8_sha1_probe,.-_armv8_sha1_probe
55
56 .global _armv8_sha256_probe
57 .type   _armv8_sha256_probe,%function
58 _armv8_sha256_probe:
59         .byte   0x40,0x0c,0x00,0xf3     @ sha256h.32    q0,q0,q0
60         .byte   0x1e,0xff,0x2f,0xe1     @ bx lr
61 .size   _armv8_sha256_probe,.-_armv8_sha256_probe
62 .global _armv8_pmull_probe
63 .type   _armv8_pmull_probe,%function
64 _armv8_pmull_probe:
65         .byte   0x00,0x0e,0xa0,0xf2     @ vmull.p64     q0,d0,d0
66         .byte   0x1e,0xff,0x2f,0xe1     @ bx    lr
67 .size   _armv8_pmull_probe,.-_armv8_pmull_probe
68
69 .align  5
70 .global OPENSSL_atomic_add
71 .type   OPENSSL_atomic_add,%function
72 OPENSSL_atomic_add:
73 #if __ARM_ARCH__>=6
74 .Ladd:  ldrex   r2,[r0]
75         add     r3,r2,r1
76         strex   r2,r3,[r0]
77         cmp     r2,#0
78         bne     .Ladd
79         mov     r0,r3
80         bx      lr
81 #else
82         stmdb   sp!,{r4-r6,lr}
83         ldr     r2,.Lspinlock
84         adr     r3,.Lspinlock
85         mov     r4,r0
86         mov     r5,r1
87         add     r6,r3,r2        @ &spinlock
88         b       .+8
89 .Lspin: bl      sched_yield
90         mov     r0,#-1
91         swp     r0,r0,[r6]
92         cmp     r0,#0
93         bne     .Lspin
94
95         ldr     r2,[r4]
96         add     r2,r2,r5
97         str     r2,[r4]
98         str     r0,[r6]         @ release spinlock
99         ldmia   sp!,{r4-r6,lr}
100         tst     lr,#1
101         moveq   pc,lr
102         .word   0xe12fff1e      @ bx    lr
103 #endif
104 .size   OPENSSL_atomic_add,.-OPENSSL_atomic_add
105
106 .global OPENSSL_cleanse
107 .type   OPENSSL_cleanse,%function
108 OPENSSL_cleanse:
109         eor     ip,ip,ip
110         cmp     r1,#7
111         subhs   r1,r1,#4
112         bhs     .Lot
113         cmp     r1,#0
114         beq     .Lcleanse_done
115 .Little:
116         strb    ip,[r0],#1
117         subs    r1,r1,#1
118         bhi     .Little
119         b       .Lcleanse_done
120
121 .Lot:   tst     r0,#3
122         beq     .Laligned
123         strb    ip,[r0],#1
124         sub     r1,r1,#1
125         b       .Lot
126 .Laligned:
127         str     ip,[r0],#4
128         subs    r1,r1,#4
129         bhs     .Laligned
130         adds    r1,r1,#4
131         bne     .Little
132 .Lcleanse_done:
133 #if __ARM_ARCH__>=5
134         bx      lr
135 #else
136         tst     lr,#1
137         moveq   pc,lr
138         .word   0xe12fff1e      @ bx    lr
139 #endif
140 .size   OPENSSL_cleanse,.-OPENSSL_cleanse
141
142 .global OPENSSL_wipe_cpu
143 .type   OPENSSL_wipe_cpu,%function
144 OPENSSL_wipe_cpu:
145         ldr     r0,.LOPENSSL_armcap
146         adr     r1,.LOPENSSL_armcap
147         ldr     r0,[r1,r0]
148         eor     r2,r2,r2
149         eor     r3,r3,r3
150         eor     ip,ip,ip
151         tst     r0,#1
152         beq     .Lwipe_done
153         .byte   0x50,0x01,0x00,0xf3     @ veor  q0, q0, q0
154         .byte   0x52,0x21,0x02,0xf3     @ veor  q1, q1, q1
155         .byte   0x54,0x41,0x04,0xf3     @ veor  q2, q2, q2
156         .byte   0x56,0x61,0x06,0xf3     @ veor  q3, q3, q3
157         .byte   0xf0,0x01,0x40,0xf3     @ veor  q8, q8, q8
158         .byte   0xf2,0x21,0x42,0xf3     @ veor  q9, q9, q9
159         .byte   0xf4,0x41,0x44,0xf3     @ veor  q10, q10, q10
160         .byte   0xf6,0x61,0x46,0xf3     @ veor  q11, q11, q11
161         .byte   0xf8,0x81,0x48,0xf3     @ veor  q12, q12, q12
162         .byte   0xfa,0xa1,0x4a,0xf3     @ veor  q13, q13, q13
163         .byte   0xfc,0xc1,0x4c,0xf3     @ veor  q14, q14, q14
164         .byte   0xfe,0xe1,0x4e,0xf3     @ veor  q14, q14, q14
165 .Lwipe_done:
166         mov     r0,sp
167 #if __ARM_ARCH__>=5
168         bx      lr
169 #else
170         tst     lr,#1
171         moveq   pc,lr
172         .word   0xe12fff1e      @ bx    lr
173 #endif
174 .size   OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
175
176 .global OPENSSL_instrument_bus
177 .type   OPENSSL_instrument_bus,%function
178 OPENSSL_instrument_bus:
179         eor     r0,r0,r0
180 #if __ARM_ARCH__>=5
181         bx      lr
182 #else
183         tst     lr,#1
184         moveq   pc,lr
185         .word   0xe12fff1e      @ bx    lr
186 #endif
187 .size   OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
188
189 .global OPENSSL_instrument_bus2
190 .type   OPENSSL_instrument_bus2,%function
191 OPENSSL_instrument_bus2:
192         eor     r0,r0,r0
193 #if __ARM_ARCH__>=5
194         bx      lr
195 #else
196         tst     lr,#1
197         moveq   pc,lr
198         .word   0xe12fff1e      @ bx    lr
199 #endif
200 .size   OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
201
202 .align  5
203 .LOPENSSL_armcap:
204 .word   OPENSSL_armcap_P-.LOPENSSL_armcap
205 #if __ARM_ARCH__>=6
206 .align  5
207 #else
208 .Lspinlock:
209 .word   atomic_add_spinlock-.Lspinlock
210 .align  5
211
212 .data
213 .align  2
214 atomic_add_spinlock:
215 .word   0
216 #endif
217
218 .comm   OPENSSL_armcap_P,4,4
219 .hidden OPENSSL_armcap_P