sha1-s390x.pl: lingering comment update.
[openssl.git] / crypto / sha / asm / sha1-thumb.pl
1 #!/usr/bin/env perl
2
3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
9
10 # sha1_block for Thumb.
11 #
12 # January 2007.
13 #
14 # The code does not present direct interest to OpenSSL, because of low
15 # performance. Its purpose is to establish _size_ benchmark. Pretty
16 # useless one I must say, because 30% or 88 bytes larger ARMv4 code
17 # [avialable on demand] is almost _twice_ as fast. It should also be
18 # noted that in-lining of .Lcommon and .Lrotate improves performance
19 # by over 40%, while code increases by only 10% or 32 bytes. But once
20 # again, the goal was to establish _size_ benchmark, not performance.
21
22 $output=shift;
23 open STDOUT,">$output";
24
25 $inline=0;
26 #$cheat_on_binutils=1;
27
28 $t0="r0";
29 $t1="r1";
30 $t2="r2";
31 $a="r3";
32 $b="r4";
33 $c="r5";
34 $d="r6";
35 $e="r7";
36 $K="r8";        # "upper" registers can be used in add/sub and mov insns
37 $ctx="r9";
38 $inp="r10";
39 $len="r11";
40 $Xi="r12";
41
42 sub common {
43 <<___;
44         sub     $t0,#4
45         ldr     $t1,[$t0]
46         add     $e,$K                   @ E+=K_xx_xx
47         lsl     $t2,$a,#5
48         add     $t2,$e
49         lsr     $e,$a,#27
50         add     $t2,$e                  @ E+=ROR(A,27)
51         add     $t2,$t1                 @ E+=X[i]
52 ___
53 }
54 sub rotate {
55 <<___;
56         mov     $e,$d                   @ E=D
57         mov     $d,$c                   @ D=C
58         lsl     $c,$b,#30
59         lsr     $b,$b,#2
60         orr     $c,$b                   @ C=ROR(B,2)
61         mov     $b,$a                   @ B=A
62         add     $a,$t2,$t1              @ A=E+F_xx_xx(B,C,D)
63 ___
64 }
65
66 sub BODY_00_19 {
67 $code.=$inline?&common():"\tbl  .Lcommon\n";
68 $code.=<<___;
69         mov     $t1,$c
70         eor     $t1,$d
71         and     $t1,$b
72         eor     $t1,$d                  @ F_00_19(B,C,D)
73 ___
74 $code.=$inline?&rotate():"\tbl  .Lrotate\n";
75 }
76
77 sub BODY_20_39 {
78 $code.=$inline?&common():"\tbl  .Lcommon\n";
79 $code.=<<___;
80         mov     $t1,$b
81         eor     $t1,$c
82         eor     $t1,$d                  @ F_20_39(B,C,D)
83 ___
84 $code.=$inline?&rotate():"\tbl  .Lrotate\n";
85 }
86
87 sub BODY_40_59 {
88 $code.=$inline?&common():"\tbl  .Lcommon\n";
89 $code.=<<___;
90         mov     $t1,$b
91         and     $t1,$c
92         mov     $e,$b
93         orr     $e,$c
94         and     $e,$d
95         orr     $t1,$e                  @ F_40_59(B,C,D)
96 ___
97 $code.=$inline?&rotate():"\tbl  .Lrotate\n";
98 }
99
100 $code=<<___;
101 .text
102 .code   16
103
104 .global sha1_block_data_order
105 .type   sha1_block_data_order,%function
106
107 .align  2
108 sha1_block_data_order:
109 ___
110 if ($cheat_on_binutils) {
111 $code.=<<___;
112 .code   32
113         add     r3,pc,#1
114         bx      r3                      @ switch to Thumb ISA
115 .code   16
116 ___
117 }
118 $code.=<<___;
119         push    {r4-r7}
120         mov     r3,r8
121         mov     r4,r9
122         mov     r5,r10
123         mov     r6,r11
124         mov     r7,r12
125         push    {r3-r7,lr}
126         lsl     r2,#6
127         mov     $ctx,r0                 @ save context
128         mov     $inp,r1                 @ save inp
129         mov     $len,r2                 @ save len
130         add     $len,$inp               @ $len to point at inp end
131
132 .Lloop:
133         mov     $Xi,sp
134         mov     $t2,sp
135         sub     $t2,#16*4               @ [3]
136 .LXload:
137         ldrb    $a,[$t1,#0]             @ $t1 is r1 and holds inp
138         ldrb    $b,[$t1,#1]
139         ldrb    $c,[$t1,#2]
140         ldrb    $d,[$t1,#3]
141         lsl     $a,#24
142         lsl     $b,#16
143         lsl     $c,#8
144         orr     $a,$b
145         orr     $a,$c
146         orr     $a,$d
147         add     $t1,#4
148         push    {$a}
149         cmp     sp,$t2
150         bne     .LXload                 @ [+14*16]
151
152         mov     $inp,$t1                @ update $inp
153         sub     $t2,#32*4
154         sub     $t2,#32*4
155         mov     $e,#31                  @ [+4]
156 .LXupdate:
157         ldr     $a,[sp,#15*4]
158         ldr     $b,[sp,#13*4]
159         ldr     $c,[sp,#7*4]
160         ldr     $d,[sp,#2*4]
161         eor     $a,$b
162         eor     $a,$c
163         eor     $a,$d
164         ror     $a,$e
165         push    {$a}
166         cmp     sp,$t2
167         bne     .LXupdate               @ [+(11+1)*64]
168
169         ldmia   $t0!,{$a,$b,$c,$d,$e}   @ $t0 is r0 and holds ctx
170         mov     $t0,$Xi
171
172         ldr     $t2,.LK_00_19
173         mov     $t1,$t0
174         sub     $t1,#20*4
175         mov     $Xi,$t1
176         mov     $K,$t2                  @ [+7+4]
177 .L_00_19:
178 ___
179         &BODY_00_19();
180 $code.=<<___;
181         cmp     $Xi,$t0
182         bne     .L_00_19                @ [+(2+9+4+2+8+2)*20]
183
184         ldr     $t2,.LK_20_39
185         mov     $t1,$t0
186         sub     $t1,#20*4
187         mov     $Xi,$t1
188         mov     $K,$t2                  @ [+5]
189 .L_20_39_or_60_79:
190 ___
191         &BODY_20_39();
192 $code.=<<___;
193         cmp     $Xi,$t0
194         bne     .L_20_39_or_60_79       @ [+(2+9+3+2+8+2)*20*2]
195         cmp     sp,$t0
196         beq     .Ldone                  @ [+2]
197
198         ldr     $t2,.LK_40_59
199         mov     $t1,$t0
200         sub     $t1,#20*4
201         mov     $Xi,$t1
202         mov     $K,$t2                  @ [+5]
203 .L_40_59:
204 ___
205         &BODY_40_59();
206 $code.=<<___;
207         cmp     $Xi,$t0
208         bne     .L_40_59                @ [+(2+9+6+2+8+2)*20]
209
210         ldr     $t2,.LK_60_79
211         mov     $Xi,sp
212         mov     $K,$t2
213         b       .L_20_39_or_60_79       @ [+4]
214 .Ldone:
215         mov     $t0,$ctx
216         ldr     $t1,[$t0,#0]
217         ldr     $t2,[$t0,#4]
218         add     $a,$t1
219         ldr     $t1,[$t0,#8]
220         add     $b,$t2
221         ldr     $t2,[$t0,#12]
222         add     $c,$t1
223         ldr     $t1,[$t0,#16]
224         add     $d,$t2
225         add     $e,$t1
226         stmia   $t0!,{$a,$b,$c,$d,$e}   @ [+20]
227
228         add     sp,#80*4                @ deallocate stack frame
229         mov     $t0,$ctx                @ restore ctx
230         mov     $t1,$inp                @ restore inp
231         cmp     $t1,$len
232         beq     .Lexit
233         b       .Lloop                  @ [+6] total 3212 cycles
234 .Lexit:
235         pop     {r2-r7}
236         mov     r8,r2
237         mov     r9,r3
238         mov     r10,r4
239         mov     r11,r5
240         mov     r12,r6
241         mov     lr,r7
242         pop     {r4-r7}
243         bx      lr
244 .align  2
245 ___
246 $code.=".Lcommon:\n".&common()."\tmov   pc,lr\n" if (!$inline);
247 $code.=".Lrotate:\n".&rotate()."\tmov   pc,lr\n" if (!$inline);
248 $code.=<<___;
249 .align  2
250 .LK_00_19:      .word   0x5a827999
251 .LK_20_39:      .word   0x6ed9eba1
252 .LK_40_59:      .word   0x8f1bbcdc
253 .LK_60_79:      .word   0xca62c1d6
254 .size   sha1_block_data_order,.-sha1_block_data_order
255 .asciz  "SHA1 block transform for Thumb, CRYPTOGAMS by <appro\@openssl.org>"
256 ___
257
258 print $code;
259 close STDOUT; # enforce flush