C64x+ assembly pack: improve EABI support.
[openssl.git] / crypto / sha / asm / sha512-c64xplus.pl
1 #!/usr/bin/env perl
2 #
3 # ====================================================================
4 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
9 #
10 # SHA512 for C64x+.
11 #
12 # January 2012
13 #
14 # Performance is 19 cycles per processed byte. Compared to block
15 # transform function from sha512.c compiled with cl6x with -mv6400+
16 # -o2 -DOPENSSL_SMALL_FOOTPRINT it's almost 7x faster and 2x smaller.
17 # Loop unroll won't make it, this implementation, any faster, because
18 # it's effectively dominated by SHRU||SHL pairs and you can't schedule
19 # more of them.
20 #
21 # !!! Note that this module uses AMR, which means that all interrupt
22 # service routines are expected to preserve it and for own well-being
23 # zero it upon entry.
24
25 while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
26 open STDOUT,">$output";
27
28 ($CTXA,$INP,$NUM) = ("A4","B4","A6");            # arguments
29  $K512="A3";
30
31 ($Ahi,$Actxhi,$Bhi,$Bctxhi,$Chi,$Cctxhi,$Dhi,$Dctxhi,
32  $Ehi,$Ectxhi,$Fhi,$Fctxhi,$Ghi,$Gctxhi,$Hhi,$Hctxhi)=map("A$_",(16..31));
33 ($Alo,$Actxlo,$Blo,$Bctxlo,$Clo,$Cctxlo,$Dlo,$Dctxlo,
34  $Elo,$Ectxlo,$Flo,$Fctxlo,$Glo,$Gctxlo,$Hlo,$Hctxlo)=map("B$_",(16..31));
35
36 ($S1hi,$CHhi,$S0hi,$t0hi)=map("A$_",(10..13));
37 ($S1lo,$CHlo,$S0lo,$t0lo)=map("B$_",(10..13));
38 ($T1hi,         $T2hi)=         ("A6","A7");
39 ($T1lo,$T1carry,$T2lo,$T2carry)=("B6","B7","B8","B9");
40 ($Khi,$Klo)=("A9","A8");
41 ($MAJhi,$MAJlo)=($T2hi,$T2lo);
42 ($t1hi,$t1lo)=($Khi,"B2");
43  $CTXB=$t1lo;
44
45 ($Xihi,$Xilo)=("A5","B5");                      # circular/ring buffer
46
47 $code.=<<___;
48         .text
49         .if     __TI_EABI__
50         .nocmp
51         .asg    sha512_block_data_order,_sha512_block_data_order
52         .endif
53
54         .asg    B3,RA
55         .asg    A15,FP
56         .asg    B15,SP
57
58         .if     .BIG_ENDIAN
59         .asg    $Khi,KHI
60         .asg    $Klo,KLO
61         .else
62         .asg    $Khi,KLO
63         .asg    $Klo,KHI
64         .endif
65
66         .global _sha512_block_data_order
67 _sha512_block_data_order:
68         .asmfunc stack_usage(40+128)
69         MV      $NUM,A0                         ; reassign $NUM
70 ||      MVK     -128,B0
71   [!A0] BNOP    RA                              ; if ($NUM==0) return;
72 || [A0] STW     FP,*SP--(40)                    ; save frame pointer
73 || [A0] MV      SP,FP
74    [A0] STDW    B13:B12,*SP[4]
75 || [A0] MVK     0x00404,B1
76    [A0] STDW    B11:B10,*SP[3]
77 || [A0] STDW    A13:A12,*FP[-3]
78 || [A0] MVKH    0x60000,B1
79    [A0] STDW    A11:A10,*SP[1]
80 || [A0] MVC     B1,AMR                          ; setup circular addressing
81 || [A0] ADD     B0,SP,SP                        ; alloca(128)
82         .if     __TI_EABI__
83    [A0] AND     B0,SP,SP                        ; align stack at 128 bytes
84 || [A0] ADDKPC  _sha512_block_data_order,B1
85 || [A0] MVKL    \$PCR_OFFSET(K512,_sha512_block_data_order),$K512
86    [A0] MVKH    \$PCR_OFFSET(K512,_sha512_block_data_order),$K512
87 || [A0] SUBAW   SP,2,SP                         ; reserve two words above buffer
88         .else
89    [A0] AND     B0,SP,SP                        ; align stack at 128 bytes
90 || [A0] ADDKPC  _sha512_block_data_order,B1
91 || [A0] MVKL    (K512-_sha512_block_data_order),$K512
92    [A0] MVKH    (K512-_sha512_block_data_order),$K512
93 || [A0] SUBAW   SP,2,SP                         ; reserve two words above buffer
94         .endif
95         ADDAW   SP,3,$Xilo
96         ADDAW   SP,2,$Xihi
97
98 ||      MV      $CTXA,$CTXB
99         LDW     *${CTXA}[0^.LITTLE_ENDIAN],$Ahi ; load ctx
100 ||      LDW     *${CTXB}[1^.LITTLE_ENDIAN],$Alo
101 ||      ADD     B1,$K512,$K512
102         LDW     *${CTXA}[2^.LITTLE_ENDIAN],$Bhi
103 ||      LDW     *${CTXB}[3^.LITTLE_ENDIAN],$Blo
104         LDW     *${CTXA}[4^.LITTLE_ENDIAN],$Chi
105 ||      LDW     *${CTXB}[5^.LITTLE_ENDIAN],$Clo
106         LDW     *${CTXA}[6^.LITTLE_ENDIAN],$Dhi
107 ||      LDW     *${CTXB}[7^.LITTLE_ENDIAN],$Dlo
108         LDW     *${CTXA}[8^.LITTLE_ENDIAN],$Ehi
109 ||      LDW     *${CTXB}[9^.LITTLE_ENDIAN],$Elo
110         LDW     *${CTXA}[10^.LITTLE_ENDIAN],$Fhi
111 ||      LDW     *${CTXB}[11^.LITTLE_ENDIAN],$Flo
112         LDW     *${CTXA}[12^.LITTLE_ENDIAN],$Ghi
113 ||      LDW     *${CTXB}[13^.LITTLE_ENDIAN],$Glo
114         LDW     *${CTXA}[14^.LITTLE_ENDIAN],$Hhi
115 ||      LDW     *${CTXB}[15^.LITTLE_ENDIAN],$Hlo
116
117         LDNDW   *$INP++,B11:B10                 ; pre-fetch input
118         LDDW    *$K512++,$Khi:$Klo              ; pre-fetch K512[0]
119 outerloop?:
120         MVK     15,B0                           ; loop counters
121 ||      MVK     64,B1
122 ||      SUB     A0,1,A0
123         MV      $Ahi,$Actxhi
124 ||      MV      $Alo,$Actxlo
125 ||      MV      $Bhi,$Bctxhi
126 ||      MV      $Blo,$Bctxlo
127 ||      MV      $Chi,$Cctxhi
128 ||      MV      $Clo,$Cctxlo
129 ||      MVD     $Dhi,$Dctxhi
130 ||      MVD     $Dlo,$Dctxlo
131         MV      $Ehi,$Ectxhi
132 ||      MV      $Elo,$Ectxlo
133 ||      MV      $Fhi,$Fctxhi
134 ||      MV      $Flo,$Fctxlo
135 ||      MV      $Ghi,$Gctxhi
136 ||      MV      $Glo,$Gctxlo
137 ||      MVD     $Hhi,$Hctxhi
138 ||      MVD     $Hlo,$Hctxlo
139 loop0_15?:
140         .if     .BIG_ENDIAN
141         MV      B11,$T1hi
142 ||      MV      B10,$T1lo
143         .else
144         SWAP4   B10,$T1hi
145 ||      SWAP4   B11,$T1lo
146         SWAP2   $T1hi,$T1hi
147 ||      SWAP2   $T1lo,$T1lo
148         .endif
149 loop16_79?:
150         STW     $T1hi,*$Xihi++[2]
151 ||      STW     $T1lo,*$Xilo++[2]                       ; X[i] = T1
152 ||      ADD     $Hhi,$T1hi,$T1hi
153 ||      ADDU    $Hlo,$T1lo,$T1carry:$T1lo               ; T1 += h
154 ||      SHRU    $Ehi,14,$S1hi
155 ||      SHL     $Ehi,32-14,$S1lo
156         XOR     $Fhi,$Ghi,$CHhi
157 ||      XOR     $Flo,$Glo,$CHlo
158 ||      ADD     KHI,$T1hi,$T1hi
159 ||      ADDU    KLO,$T1carry:$T1lo,$T1carry:$T1lo       ; T1 += K512[i]
160 ||      SHRU    $Elo,14,$t0lo
161 ||      SHL     $Elo,32-14,$t0hi
162         XOR     $t0hi,$S1hi,$S1hi
163 ||      XOR     $t0lo,$S1lo,$S1lo
164 ||      AND     $Ehi,$CHhi,$CHhi
165 ||      AND     $Elo,$CHlo,$CHlo
166 ||      ROTL    $Ghi,0,$Hhi
167 ||      ROTL    $Glo,0,$Hlo                             ; h = g
168 ||      SHRU    $Ehi,18,$t0hi
169 ||      SHL     $Ehi,32-18,$t0lo
170         XOR     $t0hi,$S1hi,$S1hi
171 ||      XOR     $t0lo,$S1lo,$S1lo
172 ||      XOR     $Ghi,$CHhi,$CHhi
173 ||      XOR     $Glo,$CHlo,$CHlo                        ; Ch(e,f,g) = ((f^g)&e)^g
174 ||      ROTL    $Fhi,0,$Ghi
175 ||      ROTL    $Flo,0,$Glo                             ; g = f
176 ||      SHRU    $Elo,18,$t0lo
177 ||      SHL     $Elo,32-18,$t0hi
178         XOR     $t0hi,$S1hi,$S1hi
179 ||      XOR     $t0lo,$S1lo,$S1lo
180 ||      OR      $Ahi,$Bhi,$MAJhi
181 ||      OR      $Alo,$Blo,$MAJlo
182 ||      ROTL    $Ehi,0,$Fhi
183 ||      ROTL    $Elo,0,$Flo                             ; f = e
184 ||      SHRU    $Ehi,41-32,$t0lo
185 ||      SHL     $Ehi,64-41,$t0hi
186         XOR     $t0hi,$S1hi,$S1hi
187 ||      XOR     $t0lo,$S1lo,$S1lo
188 ||      AND     $Chi,$MAJhi,$MAJhi
189 ||      AND     $Clo,$MAJlo,$MAJlo
190 ||      ROTL    $Dhi,0,$Ehi
191 ||      ROTL    $Dlo,0,$Elo                             ; e = d
192 ||      SHRU    $Elo,41-32,$t0hi
193 ||      SHL     $Elo,64-41,$t0lo
194         XOR     $t0hi,$S1hi,$S1hi
195 ||      XOR     $t0lo,$S1lo,$S1lo                       ; Sigma1(e)
196 ||      AND     $Ahi,$Bhi,$t1hi
197 ||      AND     $Alo,$Blo,$t1lo
198 ||      ROTL    $Chi,0,$Dhi
199 ||      ROTL    $Clo,0,$Dlo                             ; d = c
200 ||      SHRU    $Ahi,28,$S0hi
201 ||      SHL     $Ahi,32-28,$S0lo
202         OR      $t1hi,$MAJhi,$MAJhi
203 ||      OR      $t1lo,$MAJlo,$MAJlo                     ; Maj(a,b,c) = ((a|b)&c)|(a&b)
204 ||      ADD     $CHhi,$T1hi,$T1hi
205 ||      ADDU    $CHlo,$T1carry:$T1lo,$T1carry:$T1lo     ; T1 += Ch(e,f,g)
206 ||      ROTL    $Bhi,0,$Chi
207 ||      ROTL    $Blo,0,$Clo                             ; c = b
208 ||      SHRU    $Alo,28,$t0lo
209 ||      SHL     $Alo,32-28,$t0hi
210         XOR     $t0hi,$S0hi,$S0hi
211 ||      XOR     $t0lo,$S0lo,$S0lo
212 ||      ADD     $S1hi,$T1hi,$T1hi
213 ||      ADDU    $S1lo,$T1carry:$T1lo,$T1carry:$T1lo     ; T1 += Sigma1(e)
214 ||      ROTL    $Ahi,0,$Bhi
215 ||      ROTL    $Alo,0,$Blo                             ; b = a
216 ||      SHRU    $Ahi,34-32,$t0lo
217 ||      SHL     $Ahi,64-34,$t0hi
218         XOR     $t0hi,$S0hi,$S0hi
219 ||      XOR     $t0lo,$S0lo,$S0lo
220 ||      ADD     $MAJhi,$T1hi,$T2hi
221 ||      ADDU    $MAJlo,$T1carry:$T1lo,$T2carry:$T2lo    ; T2 = T1+Maj(a,b,c)
222 ||      SHRU    $Alo,34-32,$t0hi
223 ||      SHL     $Alo,64-34,$t0lo
224         XOR     $t0hi,$S0hi,$S0hi
225 ||      XOR     $t0lo,$S0lo,$S0lo
226 ||      ADD     $Ehi,$T1hi,$T1hi
227 ||      ADDU    $Elo,$T1carry:$T1lo,$T1carry:$T1lo      ; T1 += e
228 || [B0] BNOP    loop0_15?
229 ||      SHRU    $Ahi,39-32,$t0lo
230 ||      SHL     $Ahi,64-39,$t0hi
231         XOR     $t0hi,$S0hi,$S0hi
232 ||      XOR     $t0lo,$S0lo,$S0lo
233 || [B0] LDNDW   *$INP++,B11:B10                         ; pre-fetch input
234 ||[!B1] BNOP    break?
235 ||      SHRU    $Alo,39-32,$t0hi
236 ||      SHL     $Alo,64-39,$t0lo
237         XOR     $t0hi,$S0hi,$S0hi
238 ||      XOR     $t0lo,$S0lo,$S0lo                       ; Sigma0(a)
239 ||      ADD     $T1carry,$T1hi,$Ehi
240 ||      MV      $T1lo,$Elo                              ; e = T1
241 ||[!B0] LDW     *${Xihi}[28],$T1hi
242 ||[!B0] LDW     *${Xilo}[28],$T1lo                      ; X[i+14]
243         ADD     $S0hi,$T2hi,$T2hi
244 ||      ADDU    $S0lo,$T2carry:$T2lo,$T2carry:$T2lo     ; T2 += Sigma0(a)
245 || [B1] LDDW    *$K512++,$Khi:$Klo                      ; pre-fetch K512[i]
246         NOP                                             ; avoid cross-path stall
247         ADD     $T2carry,$T2hi,$Ahi
248 ||      MV      $T2lo,$Alo                              ; a = T2
249 || [B0] SUB     B0,1,B0
250 ;;===== branch to loop00_15? is taken here
251         NOP
252 ;;===== branch to break? is taken here
253         LDW     *${Xihi}[2],$T2hi
254 ||      LDW     *${Xilo}[2],$T2lo                       ; X[i+1]
255 ||      SHRU    $T1hi,19,$S1hi
256 ||      SHL     $T1hi,32-19,$S1lo
257         SHRU    $T1lo,19,$t0lo
258 ||      SHL     $T1lo,32-19,$t0hi
259         XOR     $t0hi,$S1hi,$S1hi
260 ||      XOR     $t0lo,$S1lo,$S1lo
261 ||      SHRU    $T1hi,61-32,$t0lo
262 ||      SHL     $T1hi,64-61,$t0hi
263         XOR     $t0hi,$S1hi,$S1hi
264 ||      XOR     $t0lo,$S1lo,$S1lo
265 ||      SHRU    $T1lo,61-32,$t0hi
266 ||      SHL     $T1lo,64-61,$t0lo
267         XOR     $t0hi,$S1hi,$S1hi
268 ||      XOR     $t0lo,$S1lo,$S1lo
269 ||      SHRU    $T1hi,6,$t0hi
270 ||      SHL     $T1hi,32-6,$t0lo
271         XOR     $t0hi,$S1hi,$S1hi
272 ||      XOR     $t0lo,$S1lo,$S1lo
273 ||      SHRU    $T1lo,6,$t0lo
274 ||      LDW     *${Xihi}[18],$T1hi
275 ||      LDW     *${Xilo}[18],$T1lo                      ; X[i+9]
276         XOR     $t0lo,$S1lo,$S1lo                       ; sigma1(Xi[i+14])
277
278 ||      LDW     *${Xihi}[0],$CHhi
279 ||      LDW     *${Xilo}[0],$CHlo                       ; X[i]
280 ||      SHRU    $T2hi,1,$S0hi
281 ||      SHL     $T2hi,32-1,$S0lo
282         SHRU    $T2lo,1,$t0lo
283 ||      SHL     $T2lo,32-1,$t0hi
284         XOR     $t0hi,$S0hi,$S0hi
285 ||      XOR     $t0lo,$S0lo,$S0lo
286 ||      SHRU    $T2hi,8,$t0hi
287 ||      SHL     $T2hi,32-8,$t0lo
288         XOR     $t0hi,$S0hi,$S0hi
289 ||      XOR     $t0lo,$S0lo,$S0lo
290 ||      SHRU    $T2lo,8,$t0lo
291 ||      SHL     $T2lo,32-8,$t0hi
292         XOR     $t0hi,$S0hi,$S0hi
293 ||      XOR     $t0lo,$S0lo,$S0lo
294 ||      ADD     $S1hi,$T1hi,$T1hi
295 ||      ADDU    $S1lo,$T1lo,$T1carry:$T1lo              ; T1 = X[i+9]+sigma1()
296 || [B1] BNOP    loop16_79?
297 ||      SHRU    $T2hi,7,$t0hi
298 ||      SHL     $T2hi,32-7,$t0lo
299         XOR     $t0hi,$S0hi,$S0hi
300 ||      XOR     $t0lo,$S0lo,$S0lo
301 ||      ADD     $CHhi,$T1hi,$T1hi
302 ||      ADDU    $CHlo,$T1carry:$T1lo,$T1carry:$T1lo     ; T1 += X[i]
303 ||      SHRU    $T2lo,7,$t0lo
304         XOR     $t0lo,$S0lo,$S0lo                       ; sigma0(Xi[i+1]
305
306         ADD     $S0hi,$T1hi,$T1hi
307 ||      ADDU    $S0lo,$T1carry:$T1lo,$T1carry:$T1lo     ; T1 += sigma0()
308 || [B1] SUB     B1,1,B1
309         NOP                                             ; avoid cross-path stall
310         ADD     $T1carry,$T1hi,$T1hi
311 ;;===== branch to loop16_79? is taken here
312
313 break?:
314         ADD     $Ahi,$Actxhi,$Ahi               ; accumulate ctx
315 ||      ADDU    $Alo,$Actxlo,$Actxlo:$Alo
316 || [A0] LDNDW   *$INP++,B11:B10                 ; pre-fetch input
317 || [A0] ADDK    -640,$K512                      ; rewind pointer to K512
318         ADD     $Bhi,$Bctxhi,$Bhi
319 ||      ADDU    $Blo,$Bctxlo,$Bctxlo:$Blo
320 || [A0] LDDW    *$K512++,$Khi:$Klo              ; pre-fetch K512[0]
321         ADD     $Chi,$Cctxhi,$Chi
322 ||      ADDU    $Clo,$Cctxlo,$Cctxlo:$Clo
323 ||      ADD     $Actxlo,$Ahi,$Ahi
324 ||[!A0] MV      $CTXA,$CTXB
325         ADD     $Dhi,$Dctxhi,$Dhi
326 ||      ADDU    $Dlo,$Dctxlo,$Dctxlo:$Dlo
327 ||      ADD     $Bctxlo,$Bhi,$Bhi
328 ||[!A0] STW     $Ahi,*${CTXA}[0^.LITTLE_ENDIAN] ; save ctx
329 ||[!A0] STW     $Alo,*${CTXB}[1^.LITTLE_ENDIAN]
330         ADD     $Ehi,$Ectxhi,$Ehi
331 ||      ADDU    $Elo,$Ectxlo,$Ectxlo:$Elo
332 ||      ADD     $Cctxlo,$Chi,$Chi
333 || [A0] BNOP    outerloop?
334 ||[!A0] STW     $Bhi,*${CTXA}[2^.LITTLE_ENDIAN]
335 ||[!A0] STW     $Blo,*${CTXB}[3^.LITTLE_ENDIAN]
336         ADD     $Fhi,$Fctxhi,$Fhi
337 ||      ADDU    $Flo,$Fctxlo,$Fctxlo:$Flo
338 ||      ADD     $Dctxlo,$Dhi,$Dhi
339 ||[!A0] STW     $Chi,*${CTXA}[4^.LITTLE_ENDIAN]
340 ||[!A0] STW     $Clo,*${CTXB}[5^.LITTLE_ENDIAN]
341         ADD     $Ghi,$Gctxhi,$Ghi
342 ||      ADDU    $Glo,$Gctxlo,$Gctxlo:$Glo
343 ||      ADD     $Ectxlo,$Ehi,$Ehi
344 ||[!A0] STW     $Dhi,*${CTXA}[6^.LITTLE_ENDIAN]
345 ||[!A0] STW     $Dlo,*${CTXB}[7^.LITTLE_ENDIAN]
346         ADD     $Hhi,$Hctxhi,$Hhi
347 ||      ADDU    $Hlo,$Hctxlo,$Hctxlo:$Hlo
348 ||      ADD     $Fctxlo,$Fhi,$Fhi
349 ||[!A0] STW     $Ehi,*${CTXA}[8^.LITTLE_ENDIAN]
350 ||[!A0] STW     $Elo,*${CTXB}[9^.LITTLE_ENDIAN]
351         ADD     $Gctxlo,$Ghi,$Ghi
352 ||[!A0] STW     $Fhi,*${CTXA}[10^.LITTLE_ENDIAN]
353 ||[!A0] STW     $Flo,*${CTXB}[11^.LITTLE_ENDIAN]
354         ADD     $Hctxlo,$Hhi,$Hhi
355 ||[!A0] STW     $Ghi,*${CTXA}[12^.LITTLE_ENDIAN]
356 ||[!A0] STW     $Glo,*${CTXB}[13^.LITTLE_ENDIAN]
357 ;;===== branch to outerloop? is taken here
358
359         STW     $Hhi,*${CTXA}[14^.LITTLE_ENDIAN]
360 ||      STW     $Hlo,*${CTXB}[15^.LITTLE_ENDIAN]
361 ||      MVK     -40,B0
362         ADD     FP,B0,SP                        ; destroy circular buffer
363 ||      LDDW    *FP[-4],A11:A10
364         LDDW    *SP[2],A13:A12
365 ||      LDDW    *FP[-2],B11:B10
366         LDDW    *SP[4],B13:B12
367 ||      BNOP    RA
368         LDW     *++SP(40),FP                    ; restore frame pointer
369         MVK     0,B0
370         MVC     B0,AMR                          ; clear AMR
371         NOP     2                               ; wait till FP is committed
372         .endasmfunc
373
374         .if     __TI_EABI__
375         .sect   ".text:sha_asm.const"
376         .else
377         .sect   ".const:sha_asm"
378         .endif
379         .align  128
380 K512:
381         .uword  0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd
382         .uword  0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc
383         .uword  0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019
384         .uword  0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118
385         .uword  0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe
386         .uword  0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2
387         .uword  0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1
388         .uword  0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694
389         .uword  0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3
390         .uword  0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65
391         .uword  0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483
392         .uword  0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5
393         .uword  0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210
394         .uword  0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4
395         .uword  0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725
396         .uword  0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70
397         .uword  0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926
398         .uword  0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df
399         .uword  0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8
400         .uword  0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b
401         .uword  0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001
402         .uword  0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30
403         .uword  0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910
404         .uword  0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8
405         .uword  0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53
406         .uword  0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8
407         .uword  0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb
408         .uword  0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3
409         .uword  0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60
410         .uword  0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec
411         .uword  0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9
412         .uword  0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b
413         .uword  0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207
414         .uword  0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178
415         .uword  0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6
416         .uword  0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b
417         .uword  0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493
418         .uword  0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c
419         .uword  0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a
420         .uword  0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817
421         .cstring "SHA512 block transform for C64x+, CRYPTOGAMS by <appro\@openssl.org>"
422         .align  4
423 ___
424
425 print $code;
426 close STDOUT;