Following the license change, modify the boilerplates in crypto/sha/
[openssl.git] / crypto / sha / asm / sha512-c64xplus.pl
1 #! /usr/bin/env perl
2 # Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved.
3 #
4 # Licensed under the Apache License 2.0 (the "License").  You may not use
5 # this file except in compliance with the License.  You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
8
9 #
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
16 #
17 # SHA512 for C64x+.
18 #
19 # January 2012
20 #
21 # Performance is 19 cycles per processed byte. Compared to block
22 # transform function from sha512.c compiled with cl6x with -mv6400+
23 # -o2 -DOPENSSL_SMALL_FOOTPRINT it's almost 7x faster and 2x smaller.
24 # Loop unroll won't make it, this implementation, any faster, because
25 # it's effectively dominated by SHRU||SHL pairs and you can't schedule
26 # more of them.
27 #
28 # !!! Note that this module uses AMR, which means that all interrupt
29 # service routines are expected to preserve it and for own well-being
30 # zero it upon entry.
31
32 while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
33 open STDOUT,">$output";
34
35 ($CTXA,$INP,$NUM) = ("A4","B4","A6");            # arguments
36  $K512="A3";
37
38 ($Ahi,$Actxhi,$Bhi,$Bctxhi,$Chi,$Cctxhi,$Dhi,$Dctxhi,
39  $Ehi,$Ectxhi,$Fhi,$Fctxhi,$Ghi,$Gctxhi,$Hhi,$Hctxhi)=map("A$_",(16..31));
40 ($Alo,$Actxlo,$Blo,$Bctxlo,$Clo,$Cctxlo,$Dlo,$Dctxlo,
41  $Elo,$Ectxlo,$Flo,$Fctxlo,$Glo,$Gctxlo,$Hlo,$Hctxlo)=map("B$_",(16..31));
42
43 ($S1hi,$CHhi,$S0hi,$t0hi)=map("A$_",(10..13));
44 ($S1lo,$CHlo,$S0lo,$t0lo)=map("B$_",(10..13));
45 ($T1hi,         $T2hi)=         ("A6","A7");
46 ($T1lo,$T1carry,$T2lo,$T2carry)=("B6","B7","B8","B9");
47 ($Khi,$Klo)=("A9","A8");
48 ($MAJhi,$MAJlo)=($T2hi,$T2lo);
49 ($t1hi,$t1lo)=($Khi,"B2");
50  $CTXB=$t1lo;
51
52 ($Xihi,$Xilo)=("A5","B5");                      # circular/ring buffer
53
54 $code.=<<___;
55         .text
56
57         .if     .ASSEMBLER_VERSION<7000000
58         .asg    0,__TI_EABI__
59         .endif
60         .if     __TI_EABI__
61         .nocmp
62         .asg    sha512_block_data_order,_sha512_block_data_order
63         .endif
64
65         .asg    B3,RA
66         .asg    A15,FP
67         .asg    B15,SP
68
69         .if     .BIG_ENDIAN
70         .asg    $Khi,KHI
71         .asg    $Klo,KLO
72         .else
73         .asg    $Khi,KLO
74         .asg    $Klo,KHI
75         .endif
76
77         .global _sha512_block_data_order
78 _sha512_block_data_order:
79 __sha512_block:
80         .asmfunc stack_usage(40+128)
81         MV      $NUM,A0                         ; reassign $NUM
82 ||      MVK     -128,B0
83   [!A0] BNOP    RA                              ; if ($NUM==0) return;
84 || [A0] STW     FP,*SP--(40)                    ; save frame pointer
85 || [A0] MV      SP,FP
86    [A0] STDW    B13:B12,*SP[4]
87 || [A0] MVK     0x00404,B1
88    [A0] STDW    B11:B10,*SP[3]
89 || [A0] STDW    A13:A12,*FP[-3]
90 || [A0] MVKH    0x60000,B1
91    [A0] STDW    A11:A10,*SP[1]
92 || [A0] MVC     B1,AMR                          ; setup circular addressing
93 || [A0] ADD     B0,SP,SP                        ; alloca(128)
94         .if     __TI_EABI__
95    [A0] AND     B0,SP,SP                        ; align stack at 128 bytes
96 || [A0] ADDKPC  __sha512_block,B1
97 || [A0] MVKL    \$PCR_OFFSET(K512,__sha512_block),$K512
98    [A0] MVKH    \$PCR_OFFSET(K512,__sha512_block),$K512
99 || [A0] SUBAW   SP,2,SP                         ; reserve two words above buffer
100         .else
101    [A0] AND     B0,SP,SP                        ; align stack at 128 bytes
102 || [A0] ADDKPC  __sha512_block,B1
103 || [A0] MVKL    (K512-__sha512_block),$K512
104    [A0] MVKH    (K512-__sha512_block),$K512
105 || [A0] SUBAW   SP,2,SP                         ; reserve two words above buffer
106         .endif
107         ADDAW   SP,3,$Xilo
108         ADDAW   SP,2,$Xihi
109
110 ||      MV      $CTXA,$CTXB
111         LDW     *${CTXA}[0^.LITTLE_ENDIAN],$Ahi ; load ctx
112 ||      LDW     *${CTXB}[1^.LITTLE_ENDIAN],$Alo
113 ||      ADD     B1,$K512,$K512
114         LDW     *${CTXA}[2^.LITTLE_ENDIAN],$Bhi
115 ||      LDW     *${CTXB}[3^.LITTLE_ENDIAN],$Blo
116         LDW     *${CTXA}[4^.LITTLE_ENDIAN],$Chi
117 ||      LDW     *${CTXB}[5^.LITTLE_ENDIAN],$Clo
118         LDW     *${CTXA}[6^.LITTLE_ENDIAN],$Dhi
119 ||      LDW     *${CTXB}[7^.LITTLE_ENDIAN],$Dlo
120         LDW     *${CTXA}[8^.LITTLE_ENDIAN],$Ehi
121 ||      LDW     *${CTXB}[9^.LITTLE_ENDIAN],$Elo
122         LDW     *${CTXA}[10^.LITTLE_ENDIAN],$Fhi
123 ||      LDW     *${CTXB}[11^.LITTLE_ENDIAN],$Flo
124         LDW     *${CTXA}[12^.LITTLE_ENDIAN],$Ghi
125 ||      LDW     *${CTXB}[13^.LITTLE_ENDIAN],$Glo
126         LDW     *${CTXA}[14^.LITTLE_ENDIAN],$Hhi
127 ||      LDW     *${CTXB}[15^.LITTLE_ENDIAN],$Hlo
128
129         LDNDW   *$INP++,B11:B10                 ; pre-fetch input
130         LDDW    *$K512++,$Khi:$Klo              ; pre-fetch K512[0]
131 outerloop?:
132         MVK     15,B0                           ; loop counters
133 ||      MVK     64,B1
134 ||      SUB     A0,1,A0
135         MV      $Ahi,$Actxhi
136 ||      MV      $Alo,$Actxlo
137 ||      MV      $Bhi,$Bctxhi
138 ||      MV      $Blo,$Bctxlo
139 ||      MV      $Chi,$Cctxhi
140 ||      MV      $Clo,$Cctxlo
141 ||      MVD     $Dhi,$Dctxhi
142 ||      MVD     $Dlo,$Dctxlo
143         MV      $Ehi,$Ectxhi
144 ||      MV      $Elo,$Ectxlo
145 ||      MV      $Fhi,$Fctxhi
146 ||      MV      $Flo,$Fctxlo
147 ||      MV      $Ghi,$Gctxhi
148 ||      MV      $Glo,$Gctxlo
149 ||      MVD     $Hhi,$Hctxhi
150 ||      MVD     $Hlo,$Hctxlo
151 loop0_15?:
152         .if     .BIG_ENDIAN
153         MV      B11,$T1hi
154 ||      MV      B10,$T1lo
155         .else
156         SWAP4   B10,$T1hi
157 ||      SWAP4   B11,$T1lo
158         SWAP2   $T1hi,$T1hi
159 ||      SWAP2   $T1lo,$T1lo
160         .endif
161 loop16_79?:
162         STW     $T1hi,*$Xihi++[2]
163 ||      STW     $T1lo,*$Xilo++[2]                       ; X[i] = T1
164 ||      ADD     $Hhi,$T1hi,$T1hi
165 ||      ADDU    $Hlo,$T1lo,$T1carry:$T1lo               ; T1 += h
166 ||      SHRU    $Ehi,14,$S1hi
167 ||      SHL     $Ehi,32-14,$S1lo
168         XOR     $Fhi,$Ghi,$CHhi
169 ||      XOR     $Flo,$Glo,$CHlo
170 ||      ADD     KHI,$T1hi,$T1hi
171 ||      ADDU    KLO,$T1carry:$T1lo,$T1carry:$T1lo       ; T1 += K512[i]
172 ||      SHRU    $Elo,14,$t0lo
173 ||      SHL     $Elo,32-14,$t0hi
174         XOR     $t0hi,$S1hi,$S1hi
175 ||      XOR     $t0lo,$S1lo,$S1lo
176 ||      AND     $Ehi,$CHhi,$CHhi
177 ||      AND     $Elo,$CHlo,$CHlo
178 ||      ROTL    $Ghi,0,$Hhi
179 ||      ROTL    $Glo,0,$Hlo                             ; h = g
180 ||      SHRU    $Ehi,18,$t0hi
181 ||      SHL     $Ehi,32-18,$t0lo
182         XOR     $t0hi,$S1hi,$S1hi
183 ||      XOR     $t0lo,$S1lo,$S1lo
184 ||      XOR     $Ghi,$CHhi,$CHhi
185 ||      XOR     $Glo,$CHlo,$CHlo                        ; Ch(e,f,g) = ((f^g)&e)^g
186 ||      ROTL    $Fhi,0,$Ghi
187 ||      ROTL    $Flo,0,$Glo                             ; g = f
188 ||      SHRU    $Elo,18,$t0lo
189 ||      SHL     $Elo,32-18,$t0hi
190         XOR     $t0hi,$S1hi,$S1hi
191 ||      XOR     $t0lo,$S1lo,$S1lo
192 ||      OR      $Ahi,$Bhi,$MAJhi
193 ||      OR      $Alo,$Blo,$MAJlo
194 ||      ROTL    $Ehi,0,$Fhi
195 ||      ROTL    $Elo,0,$Flo                             ; f = e
196 ||      SHRU    $Ehi,41-32,$t0lo
197 ||      SHL     $Ehi,64-41,$t0hi
198         XOR     $t0hi,$S1hi,$S1hi
199 ||      XOR     $t0lo,$S1lo,$S1lo
200 ||      AND     $Chi,$MAJhi,$MAJhi
201 ||      AND     $Clo,$MAJlo,$MAJlo
202 ||      ROTL    $Dhi,0,$Ehi
203 ||      ROTL    $Dlo,0,$Elo                             ; e = d
204 ||      SHRU    $Elo,41-32,$t0hi
205 ||      SHL     $Elo,64-41,$t0lo
206         XOR     $t0hi,$S1hi,$S1hi
207 ||      XOR     $t0lo,$S1lo,$S1lo                       ; Sigma1(e)
208 ||      AND     $Ahi,$Bhi,$t1hi
209 ||      AND     $Alo,$Blo,$t1lo
210 ||      ROTL    $Chi,0,$Dhi
211 ||      ROTL    $Clo,0,$Dlo                             ; d = c
212 ||      SHRU    $Ahi,28,$S0hi
213 ||      SHL     $Ahi,32-28,$S0lo
214         OR      $t1hi,$MAJhi,$MAJhi
215 ||      OR      $t1lo,$MAJlo,$MAJlo                     ; Maj(a,b,c) = ((a|b)&c)|(a&b)
216 ||      ADD     $CHhi,$T1hi,$T1hi
217 ||      ADDU    $CHlo,$T1carry:$T1lo,$T1carry:$T1lo     ; T1 += Ch(e,f,g)
218 ||      ROTL    $Bhi,0,$Chi
219 ||      ROTL    $Blo,0,$Clo                             ; c = b
220 ||      SHRU    $Alo,28,$t0lo
221 ||      SHL     $Alo,32-28,$t0hi
222         XOR     $t0hi,$S0hi,$S0hi
223 ||      XOR     $t0lo,$S0lo,$S0lo
224 ||      ADD     $S1hi,$T1hi,$T1hi
225 ||      ADDU    $S1lo,$T1carry:$T1lo,$T1carry:$T1lo     ; T1 += Sigma1(e)
226 ||      ROTL    $Ahi,0,$Bhi
227 ||      ROTL    $Alo,0,$Blo                             ; b = a
228 ||      SHRU    $Ahi,34-32,$t0lo
229 ||      SHL     $Ahi,64-34,$t0hi
230         XOR     $t0hi,$S0hi,$S0hi
231 ||      XOR     $t0lo,$S0lo,$S0lo
232 ||      ADD     $MAJhi,$T1hi,$T2hi
233 ||      ADDU    $MAJlo,$T1carry:$T1lo,$T2carry:$T2lo    ; T2 = T1+Maj(a,b,c)
234 ||      SHRU    $Alo,34-32,$t0hi
235 ||      SHL     $Alo,64-34,$t0lo
236         XOR     $t0hi,$S0hi,$S0hi
237 ||      XOR     $t0lo,$S0lo,$S0lo
238 ||      ADD     $Ehi,$T1hi,$T1hi
239 ||      ADDU    $Elo,$T1carry:$T1lo,$T1carry:$T1lo      ; T1 += e
240 || [B0] BNOP    loop0_15?
241 ||      SHRU    $Ahi,39-32,$t0lo
242 ||      SHL     $Ahi,64-39,$t0hi
243         XOR     $t0hi,$S0hi,$S0hi
244 ||      XOR     $t0lo,$S0lo,$S0lo
245 || [B0] LDNDW   *$INP++,B11:B10                         ; pre-fetch input
246 ||[!B1] BNOP    break?
247 ||      SHRU    $Alo,39-32,$t0hi
248 ||      SHL     $Alo,64-39,$t0lo
249         XOR     $t0hi,$S0hi,$S0hi
250 ||      XOR     $t0lo,$S0lo,$S0lo                       ; Sigma0(a)
251 ||      ADD     $T1carry,$T1hi,$Ehi
252 ||      MV      $T1lo,$Elo                              ; e = T1
253 ||[!B0] LDW     *${Xihi}[28],$T1hi
254 ||[!B0] LDW     *${Xilo}[28],$T1lo                      ; X[i+14]
255         ADD     $S0hi,$T2hi,$T2hi
256 ||      ADDU    $S0lo,$T2carry:$T2lo,$T2carry:$T2lo     ; T2 += Sigma0(a)
257 || [B1] LDDW    *$K512++,$Khi:$Klo                      ; pre-fetch K512[i]
258         NOP                                             ; avoid cross-path stall
259         ADD     $T2carry,$T2hi,$Ahi
260 ||      MV      $T2lo,$Alo                              ; a = T2
261 || [B0] SUB     B0,1,B0
262 ;;===== branch to loop00_15? is taken here
263         NOP
264 ;;===== branch to break? is taken here
265         LDW     *${Xihi}[2],$T2hi
266 ||      LDW     *${Xilo}[2],$T2lo                       ; X[i+1]
267 ||      SHRU    $T1hi,19,$S1hi
268 ||      SHL     $T1hi,32-19,$S1lo
269         SHRU    $T1lo,19,$t0lo
270 ||      SHL     $T1lo,32-19,$t0hi
271         XOR     $t0hi,$S1hi,$S1hi
272 ||      XOR     $t0lo,$S1lo,$S1lo
273 ||      SHRU    $T1hi,61-32,$t0lo
274 ||      SHL     $T1hi,64-61,$t0hi
275         XOR     $t0hi,$S1hi,$S1hi
276 ||      XOR     $t0lo,$S1lo,$S1lo
277 ||      SHRU    $T1lo,61-32,$t0hi
278 ||      SHL     $T1lo,64-61,$t0lo
279         XOR     $t0hi,$S1hi,$S1hi
280 ||      XOR     $t0lo,$S1lo,$S1lo
281 ||      SHRU    $T1hi,6,$t0hi
282 ||      SHL     $T1hi,32-6,$t0lo
283         XOR     $t0hi,$S1hi,$S1hi
284 ||      XOR     $t0lo,$S1lo,$S1lo
285 ||      SHRU    $T1lo,6,$t0lo
286 ||      LDW     *${Xihi}[18],$T1hi
287 ||      LDW     *${Xilo}[18],$T1lo                      ; X[i+9]
288         XOR     $t0lo,$S1lo,$S1lo                       ; sigma1(Xi[i+14])
289
290 ||      LDW     *${Xihi}[0],$CHhi
291 ||      LDW     *${Xilo}[0],$CHlo                       ; X[i]
292 ||      SHRU    $T2hi,1,$S0hi
293 ||      SHL     $T2hi,32-1,$S0lo
294         SHRU    $T2lo,1,$t0lo
295 ||      SHL     $T2lo,32-1,$t0hi
296         XOR     $t0hi,$S0hi,$S0hi
297 ||      XOR     $t0lo,$S0lo,$S0lo
298 ||      SHRU    $T2hi,8,$t0hi
299 ||      SHL     $T2hi,32-8,$t0lo
300         XOR     $t0hi,$S0hi,$S0hi
301 ||      XOR     $t0lo,$S0lo,$S0lo
302 ||      SHRU    $T2lo,8,$t0lo
303 ||      SHL     $T2lo,32-8,$t0hi
304         XOR     $t0hi,$S0hi,$S0hi
305 ||      XOR     $t0lo,$S0lo,$S0lo
306 ||      ADD     $S1hi,$T1hi,$T1hi
307 ||      ADDU    $S1lo,$T1lo,$T1carry:$T1lo              ; T1 = X[i+9]+sigma1()
308 || [B1] BNOP    loop16_79?
309 ||      SHRU    $T2hi,7,$t0hi
310 ||      SHL     $T2hi,32-7,$t0lo
311         XOR     $t0hi,$S0hi,$S0hi
312 ||      XOR     $t0lo,$S0lo,$S0lo
313 ||      ADD     $CHhi,$T1hi,$T1hi
314 ||      ADDU    $CHlo,$T1carry:$T1lo,$T1carry:$T1lo     ; T1 += X[i]
315 ||      SHRU    $T2lo,7,$t0lo
316         XOR     $t0lo,$S0lo,$S0lo                       ; sigma0(Xi[i+1]
317
318         ADD     $S0hi,$T1hi,$T1hi
319 ||      ADDU    $S0lo,$T1carry:$T1lo,$T1carry:$T1lo     ; T1 += sigma0()
320 || [B1] SUB     B1,1,B1
321         NOP                                             ; avoid cross-path stall
322         ADD     $T1carry,$T1hi,$T1hi
323 ;;===== branch to loop16_79? is taken here
324
325 break?:
326         ADD     $Ahi,$Actxhi,$Ahi               ; accumulate ctx
327 ||      ADDU    $Alo,$Actxlo,$Actxlo:$Alo
328 || [A0] LDNDW   *$INP++,B11:B10                 ; pre-fetch input
329 || [A0] ADDK    -640,$K512                      ; rewind pointer to K512
330         ADD     $Bhi,$Bctxhi,$Bhi
331 ||      ADDU    $Blo,$Bctxlo,$Bctxlo:$Blo
332 || [A0] LDDW    *$K512++,$Khi:$Klo              ; pre-fetch K512[0]
333         ADD     $Chi,$Cctxhi,$Chi
334 ||      ADDU    $Clo,$Cctxlo,$Cctxlo:$Clo
335 ||      ADD     $Actxlo,$Ahi,$Ahi
336 ||[!A0] MV      $CTXA,$CTXB
337         ADD     $Dhi,$Dctxhi,$Dhi
338 ||      ADDU    $Dlo,$Dctxlo,$Dctxlo:$Dlo
339 ||      ADD     $Bctxlo,$Bhi,$Bhi
340 ||[!A0] STW     $Ahi,*${CTXA}[0^.LITTLE_ENDIAN] ; save ctx
341 ||[!A0] STW     $Alo,*${CTXB}[1^.LITTLE_ENDIAN]
342         ADD     $Ehi,$Ectxhi,$Ehi
343 ||      ADDU    $Elo,$Ectxlo,$Ectxlo:$Elo
344 ||      ADD     $Cctxlo,$Chi,$Chi
345 || [A0] BNOP    outerloop?
346 ||[!A0] STW     $Bhi,*${CTXA}[2^.LITTLE_ENDIAN]
347 ||[!A0] STW     $Blo,*${CTXB}[3^.LITTLE_ENDIAN]
348         ADD     $Fhi,$Fctxhi,$Fhi
349 ||      ADDU    $Flo,$Fctxlo,$Fctxlo:$Flo
350 ||      ADD     $Dctxlo,$Dhi,$Dhi
351 ||[!A0] STW     $Chi,*${CTXA}[4^.LITTLE_ENDIAN]
352 ||[!A0] STW     $Clo,*${CTXB}[5^.LITTLE_ENDIAN]
353         ADD     $Ghi,$Gctxhi,$Ghi
354 ||      ADDU    $Glo,$Gctxlo,$Gctxlo:$Glo
355 ||      ADD     $Ectxlo,$Ehi,$Ehi
356 ||[!A0] STW     $Dhi,*${CTXA}[6^.LITTLE_ENDIAN]
357 ||[!A0] STW     $Dlo,*${CTXB}[7^.LITTLE_ENDIAN]
358         ADD     $Hhi,$Hctxhi,$Hhi
359 ||      ADDU    $Hlo,$Hctxlo,$Hctxlo:$Hlo
360 ||      ADD     $Fctxlo,$Fhi,$Fhi
361 ||[!A0] STW     $Ehi,*${CTXA}[8^.LITTLE_ENDIAN]
362 ||[!A0] STW     $Elo,*${CTXB}[9^.LITTLE_ENDIAN]
363         ADD     $Gctxlo,$Ghi,$Ghi
364 ||[!A0] STW     $Fhi,*${CTXA}[10^.LITTLE_ENDIAN]
365 ||[!A0] STW     $Flo,*${CTXB}[11^.LITTLE_ENDIAN]
366         ADD     $Hctxlo,$Hhi,$Hhi
367 ||[!A0] STW     $Ghi,*${CTXA}[12^.LITTLE_ENDIAN]
368 ||[!A0] STW     $Glo,*${CTXB}[13^.LITTLE_ENDIAN]
369 ;;===== branch to outerloop? is taken here
370
371         STW     $Hhi,*${CTXA}[14^.LITTLE_ENDIAN]
372 ||      STW     $Hlo,*${CTXB}[15^.LITTLE_ENDIAN]
373 ||      MVK     -40,B0
374         ADD     FP,B0,SP                        ; destroy circular buffer
375 ||      LDDW    *FP[-4],A11:A10
376         LDDW    *SP[2],A13:A12
377 ||      LDDW    *FP[-2],B11:B10
378         LDDW    *SP[4],B13:B12
379 ||      BNOP    RA
380         LDW     *++SP(40),FP                    ; restore frame pointer
381         MVK     0,B0
382         MVC     B0,AMR                          ; clear AMR
383         NOP     2                               ; wait till FP is committed
384         .endasmfunc
385
386         .if     __TI_EABI__
387         .sect   ".text:sha_asm.const"
388         .else
389         .sect   ".const:sha_asm"
390         .endif
391         .align  128
392 K512:
393         .uword  0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd
394         .uword  0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc
395         .uword  0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019
396         .uword  0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118
397         .uword  0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe
398         .uword  0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2
399         .uword  0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1
400         .uword  0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694
401         .uword  0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3
402         .uword  0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65
403         .uword  0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483
404         .uword  0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5
405         .uword  0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210
406         .uword  0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4
407         .uword  0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725
408         .uword  0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70
409         .uword  0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926
410         .uword  0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df
411         .uword  0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8
412         .uword  0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b
413         .uword  0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001
414         .uword  0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30
415         .uword  0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910
416         .uword  0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8
417         .uword  0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53
418         .uword  0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8
419         .uword  0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb
420         .uword  0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3
421         .uword  0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60
422         .uword  0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec
423         .uword  0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9
424         .uword  0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b
425         .uword  0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207
426         .uword  0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178
427         .uword  0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6
428         .uword  0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b
429         .uword  0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493
430         .uword  0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c
431         .uword  0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a
432         .uword  0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817
433         .cstring "SHA512 block transform for C64x+, CRYPTOGAMS by <appro\@openssl.org>"
434         .align  4
435 ___
436
437 print $code;
438 close STDOUT;