Raise an error on syscall failure in tls_retry_write_records
[openssl.git] / crypto / sha / asm / sha256-c64xplus.pl
1 #! /usr/bin/env perl
2 # Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved.
3 #
4 # Licensed under the Apache License 2.0 (the "License").  You may not use
5 # this file except in compliance with the License.  You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
8
9 #
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
16 #
17 # SHA256 for C64x+.
18 #
19 # January 2012
20 #
21 # Performance is just below 10 cycles per processed byte, which is
22 # almost 40% faster than compiler-generated code. Unroll is unlikely
23 # to give more than ~8% improvement...
24 #
25 # !!! Note that this module uses AMR, which means that all interrupt
26 # service routines are expected to preserve it and for own well-being
27 # zero it upon entry.
28
29 $output = pop and open STDOUT,">$output";
30
31 ($CTXA,$INP,$NUM) = ("A4","B4","A6");            # arguments
32  $K256="A3";
33
34 ($A,$Actx,$B,$Bctx,$C,$Cctx,$D,$Dctx,$T2,$S0,$s1,$t0a,$t1a,$t2a,$X9,$X14)
35         =map("A$_",(16..31));
36 ($E,$Ectx,$F,$Fctx,$G,$Gctx,$H,$Hctx,$T1,$S1,$s0,$t0e,$t1e,$t2e,$X1,$X15)
37         =map("B$_",(16..31));
38
39 ($Xia,$Xib)=("A5","B5");                        # circular/ring buffer
40  $CTXB=$t2e;
41
42 ($Xn,$X0,$K)=("B7","B8","B9");
43 ($Maj,$Ch)=($T2,"B6");
44
45 $code.=<<___;
46         .text
47
48         .if     .ASSEMBLER_VERSION<7000000
49         .asg    0,__TI_EABI__
50         .endif
51         .if     __TI_EABI__
52         .nocmp
53         .asg    sha256_block_data_order,_sha256_block_data_order
54         .endif
55
56         .asg    B3,RA
57         .asg    A15,FP
58         .asg    B15,SP
59
60         .if     .BIG_ENDIAN
61         .asg    SWAP2,MV
62         .asg    SWAP4,MV
63         .endif
64
65         .global _sha256_block_data_order
66 _sha256_block_data_order:
67 __sha256_block:
68         .asmfunc stack_usage(64)
69         MV      $NUM,A0                         ; reassign $NUM
70 ||      MVK     -64,B0
71   [!A0] BNOP    RA                              ; if ($NUM==0) return;
72 || [A0] STW     FP,*SP--[16]                    ; save frame pointer and alloca(64)
73 || [A0] MV      SP,FP
74    [A0] ADDKPC  __sha256_block,B2
75 || [A0] AND     B0,SP,SP                        ; align stack at 64 bytes
76         .if     __TI_EABI__
77    [A0] MVK     0x00404,B1
78 || [A0] MVKL    \$PCR_OFFSET(K256,__sha256_block),$K256
79    [A0] MVKH    0x50000,B1
80 || [A0] MVKH    \$PCR_OFFSET(K256,__sha256_block),$K256
81         .else
82    [A0] MVK     0x00404,B1
83 || [A0] MVKL    (K256-__sha256_block),$K256
84    [A0] MVKH    0x50000,B1
85 || [A0] MVKH    (K256-__sha256_block),$K256
86         .endif
87    [A0] MVC     B1,AMR                          ; setup circular addressing
88 || [A0] MV      SP,$Xia
89    [A0] MV      SP,$Xib
90 || [A0] ADD     B2,$K256,$K256
91 || [A0] MV      $CTXA,$CTXB
92 || [A0] SUBAW   SP,2,SP                         ; reserve two words above buffer
93         LDW     *${CTXA}[0],$A                  ; load ctx
94 ||      LDW     *${CTXB}[4],$E
95         LDW     *${CTXA}[1],$B
96 ||      LDW     *${CTXB}[5],$F
97         LDW     *${CTXA}[2],$C
98 ||      LDW     *${CTXB}[6],$G
99         LDW     *${CTXA}[3],$D
100 ||      LDW     *${CTXB}[7],$H
101
102         LDNW    *$INP++,$Xn                     ; pre-fetch input
103         LDW     *$K256++,$K                     ; pre-fetch K256[0]
104         MVK     14,B0                           ; loop counters
105         MVK     47,B1
106 ||      ADDAW   $Xia,9,$Xia
107 outerloop?:
108         SUB     A0,1,A0
109 ||      MV      $A,$Actx
110 ||      MV      $E,$Ectx
111 ||      MVD     $B,$Bctx
112 ||      MVD     $F,$Fctx
113         MV      $C,$Cctx
114 ||      MV      $G,$Gctx
115 ||      MVD     $D,$Dctx
116 ||      MVD     $H,$Hctx
117 ||      SWAP4   $Xn,$X0
118
119         SPLOOPD 8                               ; BODY_00_14
120 ||      MVC     B0,ILC
121 ||      SWAP2   $X0,$X0
122
123         LDNW    *$INP++,$Xn
124 ||      ROTL    $A,30,$S0
125 ||      OR      $A,$B,$Maj
126 ||      AND     $A,$B,$t2a
127 ||      ROTL    $E,26,$S1
128 ||      AND     $F,$E,$Ch
129 ||      ANDN    $G,$E,$t2e
130         ROTL    $A,19,$t0a
131 ||      AND     $C,$Maj,$Maj
132 ||      ROTL    $E,21,$t0e
133 ||      XOR     $t2e,$Ch,$Ch                    ; Ch(e,f,g) = (e&f)^(~e&g)
134         ROTL    $A,10,$t1a
135 ||      OR      $t2a,$Maj,$Maj                  ; Maj(a,b,c) = ((a|b)&c)|(a&b)
136 ||      ROTL    $E,7,$t1e
137 ||      ADD     $K,$H,$T1                       ; T1 = h + K256[i]
138         ADD     $X0,$T1,$T1                     ; T1 += X[i];
139 ||      STW     $X0,*$Xib++
140 ||      XOR     $t0a,$S0,$S0
141 ||      XOR     $t0e,$S1,$S1
142         XOR     $t1a,$S0,$S0                    ; Sigma0(a)
143 ||      XOR     $t1e,$S1,$S1                    ; Sigma1(e)
144 ||      LDW     *$K256++,$K                     ; pre-fetch K256[i+1]
145 ||      ADD     $Ch,$T1,$T1                     ; T1 += Ch(e,f,g)
146         ADD     $S1,$T1,$T1                     ; T1 += Sigma1(e)
147 ||      ADD     $S0,$Maj,$T2                    ; T2 = Sigma0(a) + Maj(a,b,c)
148 ||      ROTL    $G,0,$H                         ; h = g
149 ||      MV      $F,$G                           ; g = f
150 ||      MV      $X0,$X14
151 ||      SWAP4   $Xn,$X0
152         SWAP2   $X0,$X0
153 ||      MV      $E,$F                           ; f = e
154 ||      ADD     $D,$T1,$E                       ; e = d + T1
155 ||      MV      $C,$D                           ; d = c
156         MV      $B,$C                           ; c = b
157 ||      MV      $A,$B                           ; b = a
158 ||      ADD     $T1,$T2,$A                      ; a = T1 + T2
159         SPKERNEL
160
161         ROTL    $A,30,$S0                       ; BODY_15
162 ||      OR      $A,$B,$Maj
163 ||      AND     $A,$B,$t2a
164 ||      ROTL    $E,26,$S1
165 ||      AND     $F,$E,$Ch
166 ||      ANDN    $G,$E,$t2e
167 ||      LDW     *${Xib}[1],$Xn                  ; modulo-scheduled
168         ROTL    $A,19,$t0a
169 ||      AND     $C,$Maj,$Maj
170 ||      ROTL    $E,21,$t0e
171 ||      XOR     $t2e,$Ch,$Ch                    ; Ch(e,f,g) = (e&f)^(~e&g)
172 ||      LDW     *${Xib}[2],$X1                  ; modulo-scheduled
173         ROTL    $A,10,$t1a
174 ||      OR      $t2a,$Maj,$Maj                  ; Maj(a,b,c) = ((a|b)&c)|(a&b)
175 ||      ROTL    $E,7,$t1e
176 ||      ADD     $K,$H,$T1                       ; T1 = h + K256[i]
177         ADD     $X0,$T1,$T1                     ; T1 += X[i];
178 ||      STW     $X0,*$Xib++
179 ||      XOR     $t0a,$S0,$S0
180 ||      XOR     $t0e,$S1,$S1
181         XOR     $t1a,$S0,$S0                    ; Sigma0(a)
182 ||      XOR     $t1e,$S1,$S1                    ; Sigma1(e)
183 ||      LDW     *$K256++,$K                     ; pre-fetch K256[i+1]
184 ||      ADD     $Ch,$T1,$T1                     ; T1 += Ch(e,f,g)
185         ADD     $S1,$T1,$T1                     ; T1 += Sigma1(e)
186 ||      ADD     $S0,$Maj,$T2                    ; T2 = Sigma0(a) + Maj(a,b,c)
187 ||      ROTL    $G,0,$H                         ; h = g
188 ||      MV      $F,$G                           ; g = f
189 ||      MV      $X0,$X15
190         MV      $E,$F                           ; f = e
191 ||      ADD     $D,$T1,$E                       ; e = d + T1
192 ||      MV      $C,$D                           ; d = c
193 ||      MV      $Xn,$X0                         ; modulo-scheduled
194 ||      LDW     *$Xia,$X9                       ; modulo-scheduled
195 ||      ROTL    $X1,25,$t0e                     ; modulo-scheduled
196 ||      ROTL    $X14,15,$t0a                    ; modulo-scheduled
197         SHRU    $X1,3,$s0                       ; modulo-scheduled
198 ||      SHRU    $X14,10,$s1                     ; modulo-scheduled
199 ||      ROTL    $B,0,$C                         ; c = b
200 ||      MV      $A,$B                           ; b = a
201 ||      ADD     $T1,$T2,$A                      ; a = T1 + T2
202
203         SPLOOPD 10                              ; BODY_16_63
204 ||      MVC     B1,ILC
205 ||      ROTL    $X1,14,$t1e                     ; modulo-scheduled
206 ||      ROTL    $X14,13,$t1a                    ; modulo-scheduled
207
208         XOR     $t0e,$s0,$s0
209 ||      XOR     $t0a,$s1,$s1
210 ||      MV      $X15,$X14
211 ||      MV      $X1,$Xn
212         XOR     $t1e,$s0,$s0                    ; sigma0(X[i+1])
213 ||      XOR     $t1a,$s1,$s1                    ; sigma1(X[i+14])
214 ||      LDW     *${Xib}[2],$X1                  ; module-scheduled
215         ROTL    $A,30,$S0
216 ||      OR      $A,$B,$Maj
217 ||      AND     $A,$B,$t2a
218 ||      ROTL    $E,26,$S1
219 ||      AND     $F,$E,$Ch
220 ||      ANDN    $G,$E,$t2e
221 ||      ADD     $X9,$X0,$X0                     ; X[i] += X[i+9]
222         ROTL    $A,19,$t0a
223 ||      AND     $C,$Maj,$Maj
224 ||      ROTL    $E,21,$t0e
225 ||      XOR     $t2e,$Ch,$Ch                    ; Ch(e,f,g) = (e&f)^(~e&g)
226 ||      ADD     $s0,$X0,$X0                     ; X[i] += sigma1(X[i+1])
227         ROTL    $A,10,$t1a
228 ||      OR      $t2a,$Maj,$Maj                  ; Maj(a,b,c) = ((a|b)&c)|(a&b)
229 ||      ROTL    $E,7,$t1e
230 ||      ADD     $H,$K,$T1                       ; T1 = h + K256[i]
231 ||      ADD     $s1,$X0,$X0                     ; X[i] += sigma1(X[i+14])
232         XOR     $t0a,$S0,$S0
233 ||      XOR     $t0e,$S1,$S1
234 ||      ADD     $X0,$T1,$T1                     ; T1 += X[i]
235 ||      STW     $X0,*$Xib++
236         XOR     $t1a,$S0,$S0                    ; Sigma0(a)
237 ||      XOR     $t1e,$S1,$S1                    ; Sigma1(e)
238 ||      ADD     $Ch,$T1,$T1                     ; T1 += Ch(e,f,g)
239 ||      MV      $X0,$X15
240 ||      ROTL    $G,0,$H                         ; h = g
241 ||      LDW     *$K256++,$K                     ; pre-fetch K256[i+1]
242         ADD     $S1,$T1,$T1                     ; T1 += Sigma1(e)
243 ||      ADD     $S0,$Maj,$T2                    ; T2 = Sigma0(a) + Maj(a,b,c)
244 ||      MV      $F,$G                           ; g = f
245 ||      MV      $Xn,$X0                         ; modulo-scheduled
246 ||      LDW     *++$Xia,$X9                     ; modulo-scheduled
247 ||      ROTL    $X1,25,$t0e                     ; module-scheduled
248 ||      ROTL    $X14,15,$t0a                    ; modulo-scheduled
249         ROTL    $X1,14,$t1e                     ; modulo-scheduled
250 ||      ROTL    $X14,13,$t1a                    ; modulo-scheduled
251 ||      MV      $E,$F                           ; f = e
252 ||      ADD     $D,$T1,$E                       ; e = d + T1
253 ||      MV      $C,$D                           ; d = c
254 ||      MV      $B,$C                           ; c = b
255         MV      $A,$B                           ; b = a
256 ||      ADD     $T1,$T2,$A                      ; a = T1 + T2
257 ||      SHRU    $X1,3,$s0                       ; modulo-scheduled
258 ||      SHRU    $X14,10,$s1                     ; modulo-scheduled
259         SPKERNEL
260
261    [A0] B       outerloop?
262 || [A0] LDNW    *$INP++,$Xn                     ; pre-fetch input
263 || [A0] ADDK    -260,$K256                      ; rewind K256
264 ||      ADD     $Actx,$A,$A                     ; accumulate ctx
265 ||      ADD     $Ectx,$E,$E
266 ||      ADD     $Bctx,$B,$B
267         ADD     $Fctx,$F,$F
268 ||      ADD     $Cctx,$C,$C
269 ||      ADD     $Gctx,$G,$G
270 ||      ADD     $Dctx,$D,$D
271 ||      ADD     $Hctx,$H,$H
272 || [A0] LDW     *$K256++,$K                     ; pre-fetch K256[0]
273
274   [!A0] BNOP    RA
275 ||[!A0] MV      $CTXA,$CTXB
276   [!A0] MV      FP,SP                           ; restore stack pointer
277 ||[!A0] LDW     *FP[0],FP                       ; restore frame pointer
278   [!A0] STW     $A,*${CTXA}[0]                  ; save ctx
279 ||[!A0] STW     $E,*${CTXB}[4]
280 ||[!A0] MVK     0,B0
281   [!A0] STW     $B,*${CTXA}[1]
282 ||[!A0] STW     $F,*${CTXB}[5]
283 ||[!A0] MVC     B0,AMR                          ; clear AMR
284         STW     $C,*${CTXA}[2]
285 ||      STW     $G,*${CTXB}[6]
286         STW     $D,*${CTXA}[3]
287 ||      STW     $H,*${CTXB}[7]
288         .endasmfunc
289
290         .if     __TI_EABI__
291         .sect   ".text:sha_asm.const"
292         .else
293         .sect   ".const:sha_asm"
294         .endif
295         .align  128
296 K256:
297         .uword  0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
298         .uword  0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
299         .uword  0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
300         .uword  0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
301         .uword  0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
302         .uword  0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
303         .uword  0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
304         .uword  0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
305         .uword  0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
306         .uword  0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
307         .uword  0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
308         .uword  0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
309         .uword  0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
310         .uword  0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
311         .uword  0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
312         .uword  0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
313         .cstring "SHA256 block transform for C64x+, CRYPTOGAMS by <appro\@openssl.org>"
314         .align  4
315
316 ___
317
318 print $code;
319 close STDOUT;