Fix bug introduced in cn#16195.
[openssl.git] / crypto / aes / asm / aes-s390x.pl
1 #!/usr/bin/env perl
2
3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
9
10 # AES for s390x.
11
12 # April 2007.
13 #
14 # Software performance improvement over gcc-generated code is ~70% and
15 # in absolute terms is ~73 cycles per byte processed with 128-bit key.
16 # You're likely to exclaim "why so slow?" Keep in mind that z-CPUs are
17 # *strictly* in-order execution and issued instruction [in this case
18 # load value from memory is critical] has to complete before execution
19 # flow proceeds. S-boxes are compressed to 2KB.
20 #
21 # As for hardware acceleration support. It's basically a "teaser," as
22 # it can and should be improved in several ways. Most notably support
23 # for CBC is not utilized, nor multiple blocks are ever processed.
24 # Then software key schedule can be postponed till hardware support
25 # detection... Performance improvement over assembler is reportedly
26 # ~2.5x, but can reach >8x [naturally on larger chunks] if proper
27 # support is implemented.
28
29 $t1="%r0";
30 $t2="%r1";
31 $t3="%r2";      $inp="%r2";
32 $out="%r3";     $mask="%r3";
33 $key="%r4";
34 $i1="%r5";
35 $i2="%r6";
36 $i3="%r7";
37 $s0="%r8";
38 $s1="%r9";
39 $s2="%r10";
40 $s3="%r11";
41 $tbl="%r12";
42 $rounds="%r13";
43 $ra="%r14";
44 $sp="%r15";
45
46 sub _data_word()
47 { my $i;
48     while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; }
49 }
50
51 $code=<<___;
52 .text
53
54 .type   AES_Te,\@object
55 .align  64
56 AES_Te:
57 ___
58 &_data_word(
59         0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
60         0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
61         0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
62         0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
63         0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
64         0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
65         0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
66         0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
67         0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
68         0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
69         0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
70         0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
71         0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
72         0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
73         0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
74         0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
75         0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
76         0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
77         0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
78         0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
79         0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
80         0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
81         0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
82         0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
83         0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
84         0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
85         0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
86         0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
87         0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
88         0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
89         0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
90         0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
91         0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
92         0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
93         0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
94         0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
95         0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
96         0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
97         0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
98         0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
99         0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
100         0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
101         0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
102         0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
103         0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
104         0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
105         0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
106         0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
107         0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
108         0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
109         0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
110         0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
111         0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
112         0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
113         0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
114         0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
115         0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
116         0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
117         0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
118         0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
119         0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
120         0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
121         0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
122         0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
123 $code.=<<___;
124 .size   AES_Te,.-AES_Te
125
126 # void AES_encrypt(const unsigned char *in, unsigned char *out,
127 #                const AES_KEY *key) {
128 .globl  AES_encrypt
129 .type   AES_encrypt,\@function
130 AES_encrypt:
131         lghi    %r0,10
132         c       %r0,240($key)
133         jne     .Lesoft
134         lghi    %r0,0           # query capability vector
135         la      %r1,16($sp)
136         .long   0xb92e0042      # km %r4,%r2
137         lg      %r0,16($sp)
138         tmhl    %r0,`0x8000>>2`
139         jz      .Lesoft
140         lghi    %r0,`0x00|0x12` # encrypt AES-128
141         la      %r1,0($key)
142         #la     %r2,0($inp)
143         la      %r4,0($out)
144         lghi    %r3,16          # single block length
145         .long   0xb92e0042      # km %r4,%r2
146         bcr     8,%r14
147         la      $out,0(%r4)     # restore arguments
148         la      $key,0(%r1)
149 .Lesoft:
150         stmg    %r3,%r15,24($sp)
151
152         bras    $tbl,.Lepic
153 .Lepic: aghi    $tbl,AES_Te-.Lepic
154
155         llgf    $s0,0($inp)
156         llgf    $s1,4($inp)
157         llgf    $s2,8($inp)
158         llgf    $s3,12($inp)
159
160         llill   $mask,`0xff<<3`
161         bras    $ra,_s390x_AES_encrypt
162
163         lg      $out,24($sp)
164         st      $s0,0($out)
165         st      $s1,4($out)
166         st      $s2,8($out)
167         st      $s3,12($out)
168
169         lmg     %r6,%r15,48($sp)
170         br      %r14
171 .size   AES_encrypt,.-AES_encrypt
172
173 .type   _s390x_AES_encrypt,\@function
174 .align  16
175 _s390x_AES_encrypt:
176         x       $s0,0($key)
177         x       $s1,4($key)
178         x       $s2,8($key)
179         x       $s3,12($key)
180         l       $rounds,240($key)
181         aghi    $rounds,-1
182
183 .Lenc_loop:
184         sllg    $i1,$s0,`0+3`
185         srlg    $i2,$s0,`8-3`
186         srlg    $i3,$s0,`16-3`
187         srl     $s0,`24-3`
188         nr      $s0,$mask
189         ngr     $i1,$mask
190         nr      $i2,$mask
191         nr      $i3,$mask
192         l       $s0,0($s0,$tbl) # Te0[s0>>24]
193         l       $t1,1($i1,$tbl) # Te3[s0>>0]
194         l       $t2,2($i2,$tbl) # Te2[s0>>8]
195         l       $t3,3($i3,$tbl) # Te1[s0>>16]
196
197         srlg    $i1,$s1,`16-3`  # i0
198         sllg    $i2,$s1,`0+3`
199         srlg    $i3,$s1,`8-3`
200         srl     $s1,`24-3`
201         nr      $i1,$mask
202         nr      $s1,$mask
203         ngr     $i2,$mask
204         nr      $i3,$mask
205         x       $s0,3($i1,$tbl) # Te1[s1>>16]
206         l       $s1,0($s1,$tbl) # Te0[s1>>24]
207         x       $t2,1($i2,$tbl) # Te3[s1>>0]
208         x       $t3,2($i3,$tbl) # Te2[s1>>8]
209         xr      $s1,$t1
210
211         srlg    $i1,$s2,`8-3`   # i0
212         srlg    $i2,$s2,`16-3`  # i1
213         sllg    $i3,$s2,`0+3`
214         srl     $s2,`24-3`
215         nr      $i1,$mask
216         nr      $i2,$mask
217         nr      $s2,$mask
218         ngr     $i3,$mask
219         x       $s0,2($i1,$tbl) # Te2[s2>>8]
220         x       $s1,3($i2,$tbl) # Te1[s2>>16]
221         l       $s2,0($s2,$tbl) # Te0[s2>>24]
222         x       $t3,1($i3,$tbl) # Te3[s2>>0]
223         xr      $s2,$t2
224
225         sllg    $i1,$s3,`0+3`   # i0
226         srlg    $i2,$s3,`8-3`   # i1
227         srlg    $i3,$s3,`16-3`  # i2
228         srl     $s3,`24-3`
229         ngr     $i1,$mask
230         nr      $i2,$mask
231         nr      $i3,$mask
232         nr      $s3,$mask
233         x       $s0,1($i1,$tbl) # Te3[s3>>0]
234         x       $s1,2($i2,$tbl) # Te2[s3>>8]
235         x       $s2,3($i3,$tbl) # Te1[s3>>16]
236         l       $s3,0($s3,$tbl) # Te0[s3>>24]
237         xr      $s3,$t3
238
239         la      $key,16($key)
240         x       $s0,0($key)
241         x       $s1,4($key)
242         x       $s2,8($key)
243         x       $s3,12($key)
244
245         brct    $rounds,.Lenc_loop
246
247         sllg    $i1,$s0,`0+3`
248         srlg    $i2,$s0,`8-3`
249         srlg    $i3,$s0,`16-3`
250         srl     $s0,`24-3`
251         nr      $s0,$mask
252         ngr     $i1,$mask
253         nr      $i2,$mask
254         nr      $i3,$mask
255         llgc    $s0,2($s0,$tbl) # Te4[s0>>24]
256         llgc    $t1,2($i1,$tbl) # Te4[s0>>0]
257         llgc    $t2,2($i2,$tbl) # Te4[s0>>8]
258         llgc    $t3,2($i3,$tbl) # Te4[s0>>16]
259         sll     $s0,24
260         sll     $t2,8
261         sll     $t3,16
262
263         srlg    $i1,$s1,`16-3`  # i0
264         sllg    $i2,$s1,`0+3`
265         srlg    $i3,$s1,`8-3`
266         srl     $s1,`24-3`
267         nr      $i1,$mask
268         nr      $s1,$mask
269         ngr     $i2,$mask
270         nr      $i3,$mask
271         llgc    $i1,2($i1,$tbl) # Te4[s1>>16]
272         llgc    $s1,2($s1,$tbl) # Te4[s1>>24]
273         llgc    $i2,2($i2,$tbl) # Te4[s1>>0]
274         llgc    $i3,2($i3,$tbl) # Te4[s1>>8]
275         sll     $i1,16
276         sll     $s1,24
277         sll     $i3,8
278         or      $s0,$i1
279         or      $s1,$t1
280         or      $t2,$i2
281         or      $t3,$i3
282         
283         srlg    $i1,$s2,`8-3`   # i0
284         srlg    $i2,$s2,`16-3`  # i1
285         sllg    $i3,$s2,`0+3`
286         srl     $s2,`24-3`
287         nr      $i1,$mask
288         nr      $i2,$mask
289         nr      $s2,$mask
290         ngr     $i3,$mask
291         llgc    $i1,2($i1,$tbl) # Te4[s2>>8]
292         llgc    $i2,2($i2,$tbl) # Te4[s2>>16]
293         llgc    $s2,2($s2,$tbl) # Te4[s2>>24]
294         llgc    $i3,2($i3,$tbl) # Te4[s2>>0]
295         sll     $i1,8
296         sll     $i2,16
297         sll     $s2,24
298         or      $s0,$i1
299         or      $s1,$i2
300         or      $s2,$t2
301         or      $t3,$i3
302
303         sllg    $i1,$s3,`0+3`   # i0
304         srlg    $i2,$s3,`8-3`   # i1
305         srlg    $i3,$s3,`16-3`  # i2
306         srl     $s3,`24-3`
307         ngr     $i1,$mask
308         nr      $i2,$mask
309         nr      $i3,$mask
310         nr      $s3,$mask
311         llgc    $i1,2($i1,$tbl) # Te4[s3>>0]
312         llgc    $i2,2($i2,$tbl) # Te4[s3>>8]
313         llgc    $i3,2($i3,$tbl) # Te4[s3>>16]
314         llgc    $s3,2($s3,$tbl) # Te4[s3>>24]
315         sll     $i2,8
316         sll     $i3,16
317         sll     $s3,24
318         or      $s0,$i1
319         or      $s1,$i2
320         or      $s2,$i3
321         or      $s3,$t3
322
323         x       $s0,16($key)
324         x       $s1,20($key)
325         x       $s2,24($key)
326         x       $s3,28($key)
327
328         br      $ra     
329 .size   _s390x_AES_encrypt,.-_s390x_AES_encrypt
330 ___
331
332 $code.=<<___;
333 .type   AES_Td,\@object
334 .align  64
335 AES_Td:
336 ___
337 &_data_word(
338         0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
339         0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
340         0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
341         0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
342         0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
343         0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
344         0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
345         0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
346         0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
347         0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
348         0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
349         0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
350         0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
351         0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
352         0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
353         0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
354         0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
355         0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
356         0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
357         0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
358         0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
359         0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
360         0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
361         0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
362         0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
363         0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
364         0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
365         0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
366         0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
367         0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
368         0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
369         0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
370         0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
371         0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
372         0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
373         0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
374         0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
375         0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
376         0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
377         0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
378         0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
379         0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
380         0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
381         0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
382         0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
383         0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
384         0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
385         0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
386         0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
387         0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
388         0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
389         0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
390         0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
391         0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
392         0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
393         0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
394         0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
395         0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
396         0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
397         0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
398         0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
399         0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
400         0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
401         0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
402 $code.=<<___;
403 .byte   0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
404 .byte   0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
405 .byte   0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
406 .byte   0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
407 .byte   0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
408 .byte   0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
409 .byte   0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
410 .byte   0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
411 .byte   0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
412 .byte   0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
413 .byte   0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
414 .byte   0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
415 .byte   0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
416 .byte   0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
417 .byte   0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
418 .byte   0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
419 .byte   0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
420 .byte   0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
421 .byte   0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
422 .byte   0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
423 .byte   0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
424 .byte   0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
425 .byte   0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
426 .byte   0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
427 .byte   0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
428 .byte   0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
429 .byte   0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
430 .byte   0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
431 .byte   0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
432 .byte   0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
433 .byte   0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
434 .byte   0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
435 .size   AES_Td,.-AES_Td
436
437 # void AES_decrypt(const unsigned char *in, unsigned char *out,
438 #                const AES_KEY *key) {
439 .globl  AES_decrypt
440 .type   AES_decrypt,\@function
441 AES_decrypt:
442         lghi    %r0,10
443         c       %r0,240($key)
444         jne     .Ldsoft
445         lghi    %r0,0           # query capability vector
446         la      %r1,16($sp)
447         .long   0xb92e0042      # km %r4,%r2
448         lg      %r0,16($sp)
449         tmhl    %r0,`0x8000>>2`
450         jz      .Ldsoft
451         lghi    %r0,`0x80|0x12` # decrypt AES-128
452         la      %r1,160($key)
453         #la     %r2,0($inp)
454         la      %r4,0($out)
455         lghi    %r3,16          # single block length
456         .long   0xb92e0042      # km %r4,%r2
457         bcr     8,%r14
458         la      $out,0(%r4)     # restore arguments
459         lghi    $key,-160
460         la      $key,0($key,%r1)
461 .Ldsoft:
462         stmg    %r3,%r15,24($sp)
463
464         bras    $tbl,.Ldpic
465 .Ldpic: aghi    $tbl,AES_Td-.Ldpic
466
467         llgf    $s0,0($inp)
468         llgf    $s1,4($inp)
469         llgf    $s2,8($inp)
470         llgf    $s3,12($inp)
471
472         llill   $mask,`0xff<<3`
473         bras    $ra,_s390x_AES_decrypt
474
475         lg      $out,24($sp)
476         st      $s0,0($out)
477         st      $s1,4($out)
478         st      $s2,8($out)
479         st      $s3,12($out)
480
481         lmg     %r6,%r15,48($sp)
482         br      %r14
483 .size   AES_decrypt,.-AES_decrypt
484
485 .type   _s390x_AES_decrypt,\@function
486 .align  16
487 _s390x_AES_decrypt:
488         x       $s0,0($key)
489         x       $s1,4($key)
490         x       $s2,8($key)
491         x       $s3,12($key)
492         l       $rounds,240($key)
493         aghi    $rounds,-1
494
495 .Ldec_loop:
496         srlg    $i1,$s0,`16-3`
497         srlg    $i2,$s0,`8-3`
498         sllg    $i3,$s0,`0+3`
499         srl     $s0,`24-3`
500         nr      $s0,$mask
501         nr      $i1,$mask
502         nr      $i2,$mask
503         ngr     $i3,$mask
504         l       $s0,0($s0,$tbl) # Td0[s0>>24]
505         l       $t1,3($i1,$tbl) # Td1[s0>>16]
506         l       $t2,2($i2,$tbl) # Td2[s0>>8]
507         l       $t3,1($i3,$tbl) # Td3[s0>>0]
508
509         sllg    $i1,$s1,`0+3`   # i0
510         srlg    $i2,$s1,`16-3`
511         srlg    $i3,$s1,`8-3`
512         srl     $s1,`24-3`
513         ngr     $i1,$mask
514         nr      $s1,$mask
515         nr      $i2,$mask
516         nr      $i3,$mask
517         x       $s0,1($i1,$tbl) # Td3[s1>>0]
518         l       $s1,0($s1,$tbl) # Td0[s1>>24]
519         x       $t2,3($i2,$tbl) # Td1[s1>>16]
520         x       $t3,2($i3,$tbl) # Td2[s1>>8]
521         xr      $s1,$t1
522
523         srlg    $i1,$s2,`8-3`   # i0
524         sllg    $i2,$s2,`0+3`   # i1
525         srlg    $i3,$s2,`16-3`
526         srl     $s2,`24-3`
527         nr      $i1,$mask
528         ngr     $i2,$mask
529         nr      $s2,$mask
530         nr      $i3,$mask
531         x       $s0,2($i1,$tbl) # Td2[s2>>8]
532         x       $s1,1($i2,$tbl) # Td3[s2>>0]
533         l       $s2,0($s2,$tbl) # Td0[s2>>24]
534         x       $t3,3($i3,$tbl) # Td1[s2>>16]
535         xr      $s2,$t2
536
537         srlg    $i1,$s3,`16-3`  # i0
538         srlg    $i2,$s3,`8-3`   # i1
539         sllg    $i3,$s3,`0+3`   # i2
540         srl     $s3,`24-3`
541         nr      $i1,$mask
542         nr      $i2,$mask
543         ngr     $i3,$mask
544         nr      $s3,$mask
545         x       $s0,3($i1,$tbl) # Td1[s3>>16]
546         x       $s1,2($i2,$tbl) # Td2[s3>>8]
547         x       $s2,1($i3,$tbl) # Td3[s3>>0]
548         l       $s3,0($s3,$tbl) # Td0[s3>>24]
549         xr      $s3,$t3
550
551         la      $key,16($key)
552         x       $s0,0($key)
553         x       $s1,4($key)
554         x       $s2,8($key)
555         x       $s3,12($key)
556
557         brct    $rounds,.Ldec_loop
558
559         l       $t1,`2048+0`($tbl)      # prefetch Td4
560         l       $t2,`2048+32`($tbl)
561         l       $t3,`2048+64`($tbl)
562         l       $i1,`2048+96`($tbl)
563         l       $i2,`2048+128`($tbl)
564         l       $i3,`2048+160`($tbl)
565         l       $t1,`2048+192`($tbl)
566         l       $t2,`2048+224`($tbl)
567         llill   $mask,0xff
568
569         srlg    $i3,$s0,24      # i0
570         srlg    $i1,$s0,16
571         srlg    $i2,$s0,8
572         nr      $s0,$mask       # i3
573         nr      $i1,$mask
574         nr      $i2,$mask
575         llgc    $i3,2048($i3,$tbl)      # Td4[s0>>24]
576         llgc    $t1,2048($i1,$tbl)      # Td4[s0>>16]
577         llgc    $t2,2048($i2,$tbl)      # Td4[s0>>8]
578         llgc    $t3,2048($s0,$tbl)      # Td4[s0>>0]
579         sllg    $s0,$i3,24
580         sll     $t1,16
581         sll     $t2,8
582
583         srlg    $i1,$s1,24
584         srlg    $i2,$s1,16
585         srlg    $i3,$s1,8
586         nr      $s1,$mask       # i0
587         nr      $i2,$mask
588         nr      $i3,$mask
589         llgc    $s1,2048($s1,$tbl)      # Td4[s1>>0]
590         llgc    $i1,2048($i1,$tbl)      # Td4[s1>>24]
591         llgc    $i2,2048($i2,$tbl)      # Td4[s1>>16]
592         llgc    $i3,2048($i3,$tbl)      # Td4[s1>>8]
593         sll     $i1,24
594         sll     $i2,16
595         sll     $i3,8
596         or      $s0,$s1
597         or      $t1,$i1
598         or      $t2,$i2
599         or      $t3,$i3
600
601         srlg    $i1,$s2,8       # i0
602         srlg    $i2,$s2,24
603         srlg    $i3,$s2,16
604         nr      $s2,$mask       # i1
605         nr      $i1,$mask
606         nr      $i3,$mask
607         llgc    $i1,2048($i1,$tbl)      # Td4[s2>>8]
608         llgc    $s1,2048($s2,$tbl)      # Td4[s2>>0]
609         llgc    $i2,2048($i2,$tbl)      # Td4[s2>>24]
610         llgc    $i3,2048($i3,$tbl)      # Td4[s2>>16]
611         sll     $i1,8
612         sll     $i2,24
613         sll     $i3,16
614         or      $s0,$i1
615         or      $s1,$t1
616         or      $t2,$i2
617         or      $t3,$i3
618
619         srlg    $i1,$s3,16      # i0
620         srlg    $i2,$s3,8       # i1
621         srlg    $i3,$s3,24
622         nr      $s3,$mask       # i2
623         nr      $i1,$mask
624         nr      $i2,$mask
625         llgc    $i1,2048($i1,$tbl)      # Td4[s3>>16]
626         llgc    $i2,2048($i2,$tbl)      # Td4[s3>>8]
627         llgc    $s2,2048($s3,$tbl)      # Td4[s3>>0]
628         llgc    $s3,2048($i3,$tbl)      # Td4[s3>>24]
629         sll     $i1,16
630         sll     $i2,8
631         sll     $s3,24
632         or      $s0,$i1
633         or      $s1,$i2
634         or      $s2,$t2
635         or      $s3,$t3
636
637         x       $s0,16($key)
638         x       $s1,20($key)
639         x       $s2,24($key)
640         x       $s3,28($key)
641
642         br      $ra     
643 .size   _s390x_AES_decrypt,.-_s390x_AES_decrypt
644 .string "AES for s390x, CRYPTOGAMS by <appro\@openssl.org>"
645 ___
646
647 $code =~ s/\`([^\`]*)\`/eval $1/gem;
648 print $code;