SHA512 for ARMv4.
[openssl.git] / crypto / sha / asm / sha256-armv4.pl
1 #!/usr/bin/env perl
2
3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
9
10 # SHA256 block procedure for ARMv4.
11
12 # Performance is ~2x better than gcc 3.4 generated code.
13
14 $ctx="r0";      $t0="r0";
15 $inp="r1";
16 $len="r2";      $t1="r2";
17 $T1="r3";
18 $A="r4";
19 $B="r5";
20 $C="r6";
21 $D="r7";
22 $E="r8";
23 $F="r9";
24 $G="r10";
25 $H="r11";
26 @V=($A,$B,$C,$D,$E,$F,$G,$H);
27 $t2="r12";
28 $Ktbl="r14";
29
30 @Sigma0=( 2,13,22);
31 @Sigma1=( 6,11,25);
32 @sigma0=( 7,18, 3);
33 @sigma1=(17,19,10);
34
35 sub BODY_00_15 {
36 my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
37
38 $code.=<<___ if ($i<16);
39         ldrb    $T1,[$inp,#3]                   @ $i
40         ldrb    $t2,[$inp,#2]
41         ldrb    $t1,[$inp,#1]
42         ldrb    $t0,[$inp],#4
43         orr     $T1,$T1,$t2,lsl#8
44         orr     $T1,$T1,$t1,lsl#16
45         orr     $T1,$T1,$t0,lsl#24
46         `"str   $inp,[sp,#17*4]"        if ($i==15)`
47 ___
48 $code.=<<___;
49         ldr     $t2,[$Ktbl],#4                  @ *K256++
50         str     $T1,[sp,#`$i%16`*4]
51         mov     $t0,$e,ror#$Sigma1[0]
52         eor     $t0,$t0,$e,ror#$Sigma1[1]
53         eor     $t0,$t0,$e,ror#$Sigma1[2]       @ Sigma1(e)
54         add     $T1,$T1,$t0
55         eor     $t1,$f,$g
56         and     $t1,$t1,$e
57         eor     $t1,$t1,$g                      @ Ch(e,f,g)
58         add     $T1,$T1,$t1
59         add     $T1,$T1,$h
60         add     $T1,$T1,$t2
61         mov     $h,$a,ror#$Sigma0[0]
62         eor     $h,$h,$a,ror#$Sigma0[1]
63         eor     $h,$h,$a,ror#$Sigma0[2]         @ Sigma0(a)
64         orr     $t0,$a,$b
65         and     $t0,$t0,$c
66         and     $t1,$a,$b
67         orr     $t0,$t0,$t1                     @ Maj(a,b,c)
68         add     $h,$h,$t0
69         add     $d,$d,$T1
70         add     $h,$h,$T1
71 ___
72 }
73
74 sub BODY_16_XX {
75 my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
76
77 $code.=<<___;
78         ldr     $t1,[sp,#`($i+1)%16`*4] @ $i
79         ldr     $t2,[sp,#`($i+14)%16`*4]
80         ldr     $T1,[sp,#`($i+0)%16`*4]
81         ldr     $inp,[sp,#`($i+9)%16`*4]
82         mov     $t0,$t1,ror#$sigma0[0]
83         eor     $t0,$t0,$t1,ror#$sigma0[1]
84         eor     $t0,$t0,$t1,lsr#$sigma0[2]      @ sigma0(X[i+1])
85         mov     $t1,$t2,ror#$sigma1[0]
86         eor     $t1,$t1,$t2,ror#$sigma1[1]
87         eor     $t1,$t1,$t2,lsr#$sigma1[2]      @ sigma1(X[i+14])
88         add     $T1,$T1,$t0
89         add     $T1,$T1,$t1
90         add     $T1,$T1,$inp
91 ___
92         &BODY_00_15(@_);
93 }
94
95 $code=<<___;
96 .text
97 .code   32
98
99 .type   K256,%object
100 .align  5
101 K256:
102 .word   0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
103 .word   0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
104 .word   0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
105 .word   0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
106 .word   0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
107 .word   0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
108 .word   0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
109 .word   0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
110 .word   0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
111 .word   0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
112 .word   0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
113 .word   0xd192e819,0xd6990624,0xf40e3585,0x106aa070
114 .word   0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
115 .word   0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
116 .word   0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
117 .word   0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
118 .size   K256,.-K256
119
120 .global sha256_block_data_order
121 .type   sha256_block_data_order,%function
122 sha256_block_data_order:
123         sub     r3,pc,#8                @ sha256_block_data_order
124         add     $len,$inp,$len,lsl#6    @ len to point at the end of inp
125         stmdb   sp!,{$ctx,$inp,$len,r4-r12,lr}
126         ldmia   $ctx,{$A,$B,$C,$D,$E,$F,$G,$H}
127         sub     $Ktbl,r3,#256           @ K256
128         sub     sp,sp,#16*4             @ alloca(X[16])
129 .Loop:
130 ___
131 for($i=0;$i<16;$i++)    { &BODY_00_15($i,@V); unshift(@V,pop(@V)); }
132 $code.=".Lrounds_16_xx:\n";
133 for (;$i<32;$i++)       { &BODY_16_XX($i,@V); unshift(@V,pop(@V)); }
134 $code.=<<___;
135         and     $t2,$t2,#0xff
136         cmp     $t2,#0xf2
137         bne     .Lrounds_16_xx
138
139         ldr     $T1,[sp,#16*4]          @ pull ctx
140         ldr     $t0,[$T1,#0]
141         ldr     $t1,[$T1,#4]
142         ldr     $t2,[$T1,#8]
143         add     $A,$A,$t0
144         ldr     $t0,[$T1,#12]
145         add     $B,$B,$t1
146         ldr     $t1,[$T1,#16]
147         add     $C,$C,$t2
148         ldr     $t2,[$T1,#20]
149         add     $D,$D,$t0
150         ldr     $t0,[$T1,#24]
151         add     $E,$E,$t1
152         ldr     $t1,[$T1,#28]
153         add     $F,$F,$t2
154         ldr     $inp,[sp,#17*4]         @ pull inp
155         ldr     $t2,[sp,#18*4]          @ pull inp+len
156         add     $G,$G,$t0
157         add     $H,$H,$t1
158         stmia   $T1,{$A,$B,$C,$D,$E,$F,$G,$H}
159         cmp     $inp,$t2
160         sub     $Ktbl,$Ktbl,#256        @ rewind Ktbl
161         bne     .Loop
162
163         add     sp,sp,#`16+3`*4 @ destroy frame
164         ldmia   sp!,{r4-r12,lr}
165         tst     lr,#1
166         moveq   pc,lr                   @ be binary compatible with V4, yet
167         bx      lr                      @ interoperable with Thumb ISA:-)
168 .size   sha256_block_data_order,.-sha1_block_data_order
169 .asciz  "SHA256 block transform for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
170 ___
171
172 $code =~ s/\`([^\`]*)\`/eval $1/gem;
173 print $code;