SHA512 for ARMv4.
[openssl.git] / crypto / sha / asm / sha512-armv4.pl
1 #!/usr/bin/env perl
2
3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
9
10 # SHA512 block procedure for ARMv4. September 2007.
11
12 # This code is ~4.5 (four and a half) times faster than code generated
13 # by gcc 3.4 and it spends ~72 clock cycles per byte. 
14
15 # This module currently has dependency on byte order, namely *dword*
16 # order in ctx->h[0-9]. I have to think of a way to reliably detect
17 # "endianness" [and flip below two constants] or arrange given dword
18 # order in C.
19 $lo=0;  # this denotes little-endian platform.
20 $hi=4;
21
22 $ctx="r0";
23 $inp="r1";
24 $len="r2";
25 $Tlo="r3";
26 $Thi="r4";
27 $Alo="r5";
28 $Ahi="r6";
29 $Elo="r7";
30 $Ehi="r8";
31 $t0="r9";
32 $t1="r10";
33 $t2="r11";
34 $t3="r12";
35 ############    r13 is stack pointer
36 $Ktbl="r14";
37 ############    r15 is program counter
38
39 $Aoff=8*0;
40 $Boff=8*1;
41 $Coff=8*2;
42 $Doff=8*3;
43 $Eoff=8*4;
44 $Foff=8*5;
45 $Goff=8*6;
46 $Hoff=8*7;
47 $Xoff=8*8;
48
49 sub BODY_00_15() {
50 my $magic = shift;
51 $code.=<<___;
52         ldr     $t2,[sp,#$Hoff+0]       @ h.lo
53         ldr     $t3,[sp,#$Hoff+4]       @ h.hi
54         @ Sigma1(x)     (ROTR((x),14) ^ ROTR((x),18)  ^ ROTR((x),41))
55         @ LO            lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
56         @ HI            hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
57         mov     $t0,$Elo,lsr#14
58         mov     $t1,$Ehi,lsr#14
59         eor     $t0,$t0,$Ehi,lsl#18
60         eor     $t1,$t1,$Elo,lsl#18
61         eor     $t0,$t0,$Elo,lsr#18
62         eor     $t1,$t1,$Ehi,lsr#18
63         eor     $t0,$t0,$Ehi,lsl#14
64         eor     $t1,$t1,$Elo,lsl#14
65         eor     $t0,$t0,$Ehi,lsr#9
66         eor     $t1,$t1,$Elo,lsr#9
67         eor     $t0,$t0,$Elo,lsl#23
68         eor     $t1,$t1,$Ehi,lsl#23     @ Sigma1(e)
69         adds    $Tlo,$Tlo,$t0
70         adc     $Thi,$Thi,$t1           @ T += Sigma1(e)
71         adds    $Tlo,$Tlo,$t2
72         adc     $Thi,$Thi,$t3           @ T += h
73
74         ldr     $t0,[sp,#$Foff+0]       @ f.lo
75         ldr     $t1,[sp,#$Foff+4]       @ f.hi
76         ldr     $t2,[sp,#$Goff+0]       @ g.lo
77         ldr     $t3,[sp,#$Goff+4]       @ g.hi
78         str     $Elo,[sp,#$Eoff+0]
79         str     $Ehi,[sp,#$Eoff+4]
80         str     $Alo,[sp,#$Aoff+0]
81         str     $Ahi,[sp,#$Aoff+4]
82
83         eor     $t0,$t0,$t2
84         eor     $t1,$t1,$t3
85         and     $t0,$t0,$Elo
86         and     $t1,$t1,$Ehi
87         eor     $t0,$t0,$t2
88         eor     $t1,$t1,$t3             @ Ch(e,f,g)
89
90         ldr     $t2,[$Ktbl,#4]          @ K[i].lo
91         ldr     $t3,[$Ktbl,#0]          @ K[i].hi
92         ldr     $Elo,[sp,#$Doff+0]      @ d.lo
93         ldr     $Ehi,[sp,#$Doff+4]      @ d.hi
94
95         adds    $Tlo,$Tlo,$t0
96         adc     $Thi,$Thi,$t1           @ T += Ch(e,f,g)
97         adds    $Tlo,$Tlo,$t2
98         adc     $Thi,$Thi,$t3           @ T += K[i]
99         adds    $Elo,$Elo,$Tlo
100         adc     $Ehi,$Ehi,$Thi          @ d += T
101
102         and     $t0,$t2,#0xff
103         teq     $t0,#$magic
104         orreq   $Ktbl,$Ktbl,#1
105
106         ldr     $t2,[sp,#$Boff+0]       @ b.lo
107         ldr     $t3,[sp,#$Coff+0]               @ c.lo
108         @ Sigma0(x)     (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
109         @ LO            lo>>28^hi<<4  ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
110         @ HI            hi>>28^lo<<4  ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
111         mov     $t0,$Alo,lsr#28
112         mov     $t1,$Ahi,lsr#28
113         eor     $t0,$t0,$Ahi,lsl#4
114         eor     $t1,$t1,$Alo,lsl#4
115         eor     $t0,$t0,$Ahi,lsr#2
116         eor     $t1,$t1,$Alo,lsr#2
117         eor     $t0,$t0,$Alo,lsl#30
118         eor     $t1,$t1,$Ahi,lsl#30
119         eor     $t0,$t0,$Ahi,lsr#7
120         eor     $t1,$t1,$Alo,lsr#7
121         eor     $t0,$t0,$Alo,lsl#25
122         eor     $t1,$t1,$Ahi,lsl#25     @ Sigma0(a)
123         adds    $Tlo,$Tlo,$t0
124         adc     $Thi,$Thi,$t1           @ T += Sigma0(a)
125
126         and     $t0,$Alo,$t2
127         orr     $Alo,$Alo,$t2
128         ldr     $t1,[sp,#$Boff+4]       @ b.hi
129         ldr     $t2,[sp,#$Coff+4]       @ c.hi
130         and     $Alo,$Alo,$t3
131         orr     $Alo,$Alo,$t0           @ Maj(a,b,c).lo
132         and     $t3,$Ahi,$t1
133         orr     $Ahi,$Ahi,$t1
134         and     $Ahi,$Ahi,$t2
135         orr     $Ahi,$Ahi,$t3           @ Maj(a,b,c).hi
136         adds    $Alo,$Alo,$Tlo
137         adc     $Ahi,$Ahi,$Thi          @ h += T
138
139         sub     sp,sp,#8
140         add     $Ktbl,$Ktbl,#8
141 ___
142 }
143 $code=<<___;
144 .text
145 .code   32
146 .type   K512,%object
147 .align  5
148 K512:
149 .word   0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd
150 .word   0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc
151 .word   0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019
152 .word   0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118
153 .word   0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe
154 .word   0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2
155 .word   0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1
156 .word   0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694
157 .word   0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3
158 .word   0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65
159 .word   0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483
160 .word   0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5
161 .word   0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210
162 .word   0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4
163 .word   0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725
164 .word   0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70
165 .word   0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926
166 .word   0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df
167 .word   0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8
168 .word   0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b
169 .word   0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001
170 .word   0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30
171 .word   0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910
172 .word   0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8
173 .word   0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53
174 .word   0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8
175 .word   0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb
176 .word   0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3
177 .word   0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60
178 .word   0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec
179 .word   0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9
180 .word   0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b
181 .word   0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207
182 .word   0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178
183 .word   0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6
184 .word   0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b
185 .word   0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493
186 .word   0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c
187 .word   0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a
188 .word   0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817
189 .size   K512,.-K512
190
191 .global sha512_block_data_order
192 .type   sha512_block_data_order,%function
193 sha512_block_data_order:
194         sub     r3,pc,#8                @ sha512_block_data_order
195         add     $len,$inp,$len,lsl#7    @ len to point at the end of inp
196         stmdb   sp!,{r4-r12,lr}
197         sub     $Ktbl,r3,#640           @ K512
198         sub     sp,sp,#9*8
199
200         ldr     $Elo,[$ctx,#$Eoff+$lo]
201         ldr     $Ehi,[$ctx,#$Eoff+$hi]
202         ldr     $t0, [$ctx,#$Goff+$lo]
203         ldr     $t1, [$ctx,#$Goff+$hi]
204         ldr     $t2, [$ctx,#$Hoff+$lo]
205         ldr     $t3, [$ctx,#$Hoff+$hi]
206 .Loop:
207         str     $t0, [sp,#$Goff+0]
208         str     $t1, [sp,#$Goff+4]
209         str     $t2, [sp,#$Hoff+0]
210         str     $t3, [sp,#$Hoff+4]
211         ldr     $Alo,[$ctx,#$Aoff+$lo]
212         ldr     $Ahi,[$ctx,#$Aoff+$hi]
213         ldr     $Tlo,[$ctx,#$Boff+$lo]
214         ldr     $Thi,[$ctx,#$Boff+$hi]
215         ldr     $t0, [$ctx,#$Coff+$lo]
216         ldr     $t1, [$ctx,#$Coff+$hi]
217         ldr     $t2, [$ctx,#$Doff+$lo]
218         ldr     $t3, [$ctx,#$Doff+$hi]
219         str     $Tlo,[sp,#$Boff+0]
220         str     $Thi,[sp,#$Boff+4]
221         str     $t0, [sp,#$Coff+0]
222         str     $t1, [sp,#$Coff+4]
223         str     $t2, [sp,#$Doff+0]
224         str     $t3, [sp,#$Doff+4]
225         ldr     $Tlo,[$ctx,#$Foff+$lo]
226         ldr     $Thi,[$ctx,#$Foff+$hi]
227         str     $Tlo,[sp,#$Foff+0]
228         str     $Thi,[sp,#$Foff+4]
229
230 .L00_15:
231         ldrb    $Tlo,[$inp,#7]
232         ldrb    $t0, [$inp,#6]
233         ldrb    $t1, [$inp,#5]
234         ldrb    $t2, [$inp,#4]
235         ldrb    $Thi,[$inp,#3]
236         ldrb    $t3, [$inp,#2]
237         orr     $Tlo,$Tlo,$t0,lsl#8
238         ldrb    $t0, [$inp,#1]
239         orr     $Tlo,$Tlo,$t1,lsl#16
240         ldrb    $t1, [$inp],#8
241         orr     $Tlo,$Tlo,$t2,lsl#24
242         orr     $Thi,$Thi,$t3,lsl#8
243         orr     $Thi,$Thi,$t0,lsl#16
244         orr     $Thi,$Thi,$t1,lsl#24
245         str     $Tlo,[sp,#$Xoff+0]
246         str     $Thi,[sp,#$Xoff+4]
247 ___
248         &BODY_00_15(0x94);
249 $code.=<<___;
250         tst     $Ktbl,#1
251         beq     .L00_15
252         bic     $Ktbl,$Ktbl,#1
253
254 .L16_79:
255         ldr     $t0,[sp,#`$Xoff+8*(16-1)`+0]
256         ldr     $t1,[sp,#`$Xoff+8*(16-1)`+4]
257         ldr     $t2,[sp,#`$Xoff+8*(16-14)`+0]
258         ldr     $t3,[sp,#`$Xoff+8*(16-14)`+4]
259
260         @ sigma0(x)     (ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
261         @ LO            lo>>1^hi<<31  ^ lo>>8^hi<<24 ^ lo>>7^hi<<25
262         @ HI            hi>>1^lo<<31  ^ hi>>8^lo<<24 ^ hi>>7
263         mov     $Tlo,$t0,lsr#1
264         mov     $Thi,$t1,lsr#1
265         eor     $Tlo,$Tlo,$t1,lsl#31
266         eor     $Thi,$Thi,$t0,lsl#31
267         eor     $Tlo,$Tlo,$t0,lsr#8
268         eor     $Thi,$Thi,$t1,lsr#8
269         eor     $Tlo,$Tlo,$t1,lsl#24
270         eor     $Thi,$Thi,$t0,lsl#24
271         eor     $Tlo,$Tlo,$t0,lsr#7
272         eor     $Thi,$Thi,$t1,lsr#7
273         eor     $Tlo,$Tlo,$t1,lsl#25
274
275         @ sigma1(x)     (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
276         @ LO            lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26
277         @ HI            hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6
278         mov     $t0,$t2,lsr#19
279         mov     $t1,$t3,lsr#19
280         eor     $t0,$t0,$t3,lsl#13
281         eor     $t1,$t1,$t2,lsl#13
282         eor     $t0,$t0,$t3,lsr#29
283         eor     $t1,$t1,$t2,lsr#29
284         eor     $t0,$t0,$t2,lsl#3
285         eor     $t1,$t1,$t3,lsl#3
286         eor     $t0,$t0,$t2,lsr#6
287         eor     $t1,$t1,$t3,lsr#6
288         eor     $t0,$t0,$t3,lsl#26
289
290         ldr     $t2,[sp,#`$Xoff+8*(16-9)`+0]
291         ldr     $t3,[sp,#`$Xoff+8*(16-9)`+4]
292         adds    $Tlo,$Tlo,$t0
293         adc     $Thi,$Thi,$t1
294
295         ldr     $t0,[sp,#`$Xoff+8*16`+0]
296         ldr     $t1,[sp,#`$Xoff+8*16`+4]
297         adds    $Tlo,$Tlo,$t2
298         adc     $Thi,$Thi,$t3
299         adds    $Tlo,$Tlo,$t0
300         adc     $Thi,$Thi,$t1
301         str     $Tlo,[sp,#$Xoff+0]
302         str     $Thi,[sp,#$Xoff+4]
303 ___
304         &BODY_00_15(0x17);
305 $code.=<<___;
306         tst     $Ktbl,#1
307         beq     .L16_79
308         bic     $Ktbl,$Ktbl,#1
309
310         ldr     $Tlo,[sp,#$Boff+0]
311         ldr     $Thi,[sp,#$Boff+4]
312         ldr     $t0, [$ctx,#$Aoff+$lo]
313         ldr     $t1, [$ctx,#$Aoff+$hi]
314         ldr     $t2, [$ctx,#$Boff+$lo]
315         ldr     $t3, [$ctx,#$Boff+$hi]
316         adds    $t0,$Alo,$t0
317         adc     $t1,$Ahi,$t1
318         adds    $t2,$Tlo,$t2
319         adc     $t3,$Thi,$t3
320         str     $t0, [$ctx,#$Aoff+$lo]
321         str     $t1, [$ctx,#$Aoff+$hi]
322         str     $t2, [$ctx,#$Boff+$lo]
323         str     $t3, [$ctx,#$Boff+$hi]
324
325         ldr     $Alo,[sp,#$Coff+0]
326         ldr     $Ahi,[sp,#$Coff+4]
327         ldr     $Tlo,[sp,#$Doff+0]
328         ldr     $Thi,[sp,#$Doff+4]
329         ldr     $t0, [$ctx,#$Coff+$lo]
330         ldr     $t1, [$ctx,#$Coff+$hi]
331         ldr     $t2, [$ctx,#$Doff+$lo]
332         ldr     $t3, [$ctx,#$Doff+$hi]
333         adds    $t0,$Alo,$t0
334         adc     $t1,$Ahi,$t1
335         adds    $t2,$Tlo,$t2
336         adc     $t3,$Thi,$t3
337         str     $t0, [$ctx,#$Coff+$lo]
338         str     $t1, [$ctx,#$Coff+$hi]
339         str     $t2, [$ctx,#$Doff+$lo]
340         str     $t3, [$ctx,#$Doff+$hi]
341
342         ldr     $Tlo,[sp,#$Foff+0]
343         ldr     $Thi,[sp,#$Foff+4]
344         ldr     $t0, [$ctx,#$Eoff+$lo]
345         ldr     $t1, [$ctx,#$Eoff+$hi]
346         ldr     $t2, [$ctx,#$Foff+$lo]
347         ldr     $t3, [$ctx,#$Foff+$hi]
348         adds    $Elo,$Elo,$t0
349         adc     $Ehi,$Ehi,$t1
350         adds    $t2,$Tlo,$t2
351         adc     $t3,$Thi,$t3
352         str     $Elo,[$ctx,#$Eoff+$lo]
353         str     $Ehi,[$ctx,#$Eoff+$hi]
354         str     $t2, [$ctx,#$Foff+$lo]
355         str     $t3, [$ctx,#$Foff+$hi]
356
357         ldr     $Alo,[sp,#$Goff+0]
358         ldr     $Ahi,[sp,#$Goff+4]
359         ldr     $Tlo,[sp,#$Hoff+0]
360         ldr     $Thi,[sp,#$Hoff+4]
361         ldr     $t0, [$ctx,#$Goff+$lo]
362         ldr     $t1, [$ctx,#$Goff+$hi]
363         ldr     $t2, [$ctx,#$Hoff+$lo]
364         ldr     $t3, [$ctx,#$Hoff+$hi]
365         adds    $t0,$Alo,$t0
366         adc     $t1,$Ahi,$t1
367         adds    $t2,$Tlo,$t2
368         adc     $t3,$Thi,$t3
369         str     $t0, [$ctx,#$Goff+$lo]
370         str     $t1, [$ctx,#$Goff+$hi]
371         str     $t2, [$ctx,#$Hoff+$lo]
372         str     $t3, [$ctx,#$Hoff+$hi]
373
374         add     sp,sp,#640
375         sub     $Ktbl,$Ktbl,#640
376
377         teq     $inp,$len
378         bne     .Loop
379
380         add     sp,sp,#8*9              @ destroy frame
381         ldmia   sp!,{r4-r12,lr}
382         tst     lr,#1
383         moveq   pc,lr                   @ be binary compatible with V4, yet
384         bx      lr                      @ interoperable with Thumb ISA:-)
385 .size   sha512_block_data_order,.-sha512_block_data_order
386 .asciz  "SHA512 block transform for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
387 ___
388
389 $code =~ s/\`([^\`]*)\`/eval $1/gem;
390 print $code;