0c9d999de998c5f11b5bfb53f320afa5c8e255c6
[openssl.git] / crypto / sha / asm / sha256-armv4.pl
1 #!/usr/bin/env perl
2
3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
9
10 # SHA256 block procedure for ARMv4. May 2007.
11
12 # Performance is ~2x better than gcc 3.4 generated code and in "abso-
13 # lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
14 # byte.
15
16 $output=shift;
17 open STDOUT,">$output";
18
19 $ctx="r0";      $t0="r0";
20 $inp="r1";
21 $len="r2";      $t1="r2";
22 $T1="r3";
23 $A="r4";
24 $B="r5";
25 $C="r6";
26 $D="r7";
27 $E="r8";
28 $F="r9";
29 $G="r10";
30 $H="r11";
31 @V=($A,$B,$C,$D,$E,$F,$G,$H);
32 $t2="r12";
33 $Ktbl="r14";
34
35 @Sigma0=( 2,13,22);
36 @Sigma1=( 6,11,25);
37 @sigma0=( 7,18, 3);
38 @sigma1=(17,19,10);
39
40 sub BODY_00_15 {
41 my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
42
43 $code.=<<___ if ($i<16);
44         ldrb    $T1,[$inp,#3]                   @ $i
45         ldrb    $t2,[$inp,#2]
46         ldrb    $t1,[$inp,#1]
47         ldrb    $t0,[$inp],#4
48         orr     $T1,$T1,$t2,lsl#8
49         orr     $T1,$T1,$t1,lsl#16
50         orr     $T1,$T1,$t0,lsl#24
51         `"str   $inp,[sp,#17*4]"        if ($i==15)`
52 ___
53 $code.=<<___;
54         ldr     $t2,[$Ktbl],#4                  @ *K256++
55         str     $T1,[sp,#`$i%16`*4]
56         mov     $t0,$e,ror#$Sigma1[0]
57         eor     $t0,$t0,$e,ror#$Sigma1[1]
58         eor     $t0,$t0,$e,ror#$Sigma1[2]       @ Sigma1(e)
59         add     $T1,$T1,$t0
60         eor     $t1,$f,$g
61         and     $t1,$t1,$e
62         eor     $t1,$t1,$g                      @ Ch(e,f,g)
63         add     $T1,$T1,$t1
64         add     $T1,$T1,$h
65         add     $T1,$T1,$t2
66         mov     $h,$a,ror#$Sigma0[0]
67         eor     $h,$h,$a,ror#$Sigma0[1]
68         eor     $h,$h,$a,ror#$Sigma0[2]         @ Sigma0(a)
69         orr     $t0,$a,$b
70         and     $t0,$t0,$c
71         and     $t1,$a,$b
72         orr     $t0,$t0,$t1                     @ Maj(a,b,c)
73         add     $h,$h,$t0
74         add     $d,$d,$T1
75         add     $h,$h,$T1
76 ___
77 }
78
79 sub BODY_16_XX {
80 my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
81
82 $code.=<<___;
83         ldr     $t1,[sp,#`($i+1)%16`*4] @ $i
84         ldr     $t2,[sp,#`($i+14)%16`*4]
85         ldr     $T1,[sp,#`($i+0)%16`*4]
86         ldr     $inp,[sp,#`($i+9)%16`*4]
87         mov     $t0,$t1,ror#$sigma0[0]
88         eor     $t0,$t0,$t1,ror#$sigma0[1]
89         eor     $t0,$t0,$t1,lsr#$sigma0[2]      @ sigma0(X[i+1])
90         mov     $t1,$t2,ror#$sigma1[0]
91         eor     $t1,$t1,$t2,ror#$sigma1[1]
92         eor     $t1,$t1,$t2,lsr#$sigma1[2]      @ sigma1(X[i+14])
93         add     $T1,$T1,$t0
94         add     $T1,$T1,$t1
95         add     $T1,$T1,$inp
96 ___
97         &BODY_00_15(@_);
98 }
99
100 $code=<<___;
101 .text
102 .code   32
103
104 .type   K256,%object
105 .align  5
106 K256:
107 .word   0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
108 .word   0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
109 .word   0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
110 .word   0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
111 .word   0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
112 .word   0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
113 .word   0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
114 .word   0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
115 .word   0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
116 .word   0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
117 .word   0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
118 .word   0xd192e819,0xd6990624,0xf40e3585,0x106aa070
119 .word   0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
120 .word   0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
121 .word   0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
122 .word   0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
123 .size   K256,.-K256
124
125 .global sha256_block_data_order
126 .type   sha256_block_data_order,%function
127 sha256_block_data_order:
128         sub     r3,pc,#8                @ sha256_block_data_order
129         add     $len,$inp,$len,lsl#6    @ len to point at the end of inp
130         stmdb   sp!,{$ctx,$inp,$len,r4-r12,lr}
131         ldmia   $ctx,{$A,$B,$C,$D,$E,$F,$G,$H}
132         sub     $Ktbl,r3,#256           @ K256
133         sub     sp,sp,#16*4             @ alloca(X[16])
134 .Loop:
135 ___
136 for($i=0;$i<16;$i++)    { &BODY_00_15($i,@V); unshift(@V,pop(@V)); }
137 $code.=".Lrounds_16_xx:\n";
138 for (;$i<32;$i++)       { &BODY_16_XX($i,@V); unshift(@V,pop(@V)); }
139 $code.=<<___;
140         and     $t2,$t2,#0xff
141         cmp     $t2,#0xf2
142         bne     .Lrounds_16_xx
143
144         ldr     $T1,[sp,#16*4]          @ pull ctx
145         ldr     $t0,[$T1,#0]
146         ldr     $t1,[$T1,#4]
147         ldr     $t2,[$T1,#8]
148         add     $A,$A,$t0
149         ldr     $t0,[$T1,#12]
150         add     $B,$B,$t1
151         ldr     $t1,[$T1,#16]
152         add     $C,$C,$t2
153         ldr     $t2,[$T1,#20]
154         add     $D,$D,$t0
155         ldr     $t0,[$T1,#24]
156         add     $E,$E,$t1
157         ldr     $t1,[$T1,#28]
158         add     $F,$F,$t2
159         ldr     $inp,[sp,#17*4]         @ pull inp
160         ldr     $t2,[sp,#18*4]          @ pull inp+len
161         add     $G,$G,$t0
162         add     $H,$H,$t1
163         stmia   $T1,{$A,$B,$C,$D,$E,$F,$G,$H}
164         cmp     $inp,$t2
165         sub     $Ktbl,$Ktbl,#256        @ rewind Ktbl
166         bne     .Loop
167
168         add     sp,sp,#`16+3`*4 @ destroy frame
169         ldmia   sp!,{r4-r12,lr}
170         tst     lr,#1
171         moveq   pc,lr                   @ be binary compatible with V4, yet
172         bx      lr                      @ interoperable with Thumb ISA:-)
173 .size   sha256_block_data_order,.-sha256_block_data_order
174 .asciz  "SHA256 block transform for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
175 ___
176
177 $code =~ s/\`([^\`]*)\`/eval $1/gem;
178 print $code;
179 close STDOUT; # enforce flush