s390x assembler pack.
[openssl.git] / crypto / aes / asm / aes-s390x.pl
1 #!/usr/bin/env perl
2
3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
9
10 # AES for s390x.
11
12 # April 2007.
13 #
14 # Software performance improvement over gcc-generated code is ~70% and
15 # in absolute terms is ~73 cycles per byte processed with 128-bit key.
16 # You're likely to exclaim "why so slow?" Keep in mind that z-CPUs are
17 # *strictly* in-order execution and issued instruction [in this case
18 # load value from memory is critical] has to complete before execution
19 # flow proceeds. S-boxes are compressed to 2KB.
20 #
21 # As for hardware acceleration support. It's basically a "teaser," as
22 # it can and should be improved in several ways. Most notably support
23 # for CBC is not utilized, nor multiple blocks are ever processed.
24 # Then software key schedule can be postponed till hardware support
25 # detection... Performance improvement over assembler is reportedly
26 # ~2.5x, but can reach >15x [naturally on larger chunks] if proper
27 # support is implemented.
28
29 $t1="%r0";
30 $t2="%r1";
31 $t3="%r2";      $inp="%r2";
32 $out="%r3";     $mask="%r3";
33 $key="%r4";
34 $i1="%r5";
35 $i2="%r6";
36 $i3="%r7";
37 $s0="%r8";
38 $s1="%r9";
39 $s2="%r10";
40 $s3="%r11";
41 $tbl="%r12";
42 $rounds="%r13";
43 $ra="%r14";
44 $sp="%r15";
45
46 sub _data_word()
47 { my $i;
48     while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; }
49 }
50
51 $code=<<___;
52 .text
53
54 .type   AES_Te,\@object
55 .align  64
56 AES_Te:
57 ___
58 &_data_word(
59         0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
60         0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
61         0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
62         0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
63         0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
64         0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
65         0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
66         0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
67         0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
68         0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
69         0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
70         0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
71         0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
72         0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
73         0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
74         0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
75         0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
76         0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
77         0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
78         0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
79         0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
80         0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
81         0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
82         0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
83         0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
84         0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
85         0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
86         0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
87         0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
88         0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
89         0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
90         0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
91         0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
92         0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
93         0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
94         0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
95         0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
96         0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
97         0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
98         0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
99         0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
100         0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
101         0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
102         0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
103         0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
104         0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
105         0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
106         0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
107         0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
108         0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
109         0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
110         0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
111         0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
112         0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
113         0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
114         0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
115         0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
116         0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
117         0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
118         0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
119         0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
120         0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
121         0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
122         0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
123 $code.=<<___;
124 .size   AES_Te,.-AES_Te
125
126 # void AES_encrypt(const unsigned char *in, unsigned char *out,
127 #                const AES_KEY *key) {
128 .globl  AES_encrypt
129 .type   AES_encrypt,\@function
130 AES_encrypt:
131         lghi    %r0,10
132         c       %r0,240($key)
133         jne     .Lesoft
134         lghi    %r0,0           # query capability vector
135         la      %r1,16($sp)
136         .long   0xb92e0042      # km %r4,%r2
137         lg      %r0,16($sp)
138         tmhl    %r0,`0x8000>>2`
139         jz      .Lesoft
140         lghi    %r0,`0x00|0x12` # encrypt AES-128
141         la      %r1,0($key)
142         la      %r2,0($inp)
143         la      %r4,0($out)
144         lghi    %r3,16          # single block length
145         .long   0xb92e0042      # km %r4,%r2
146         br      %r14
147 .Lesoft:
148         stmg    %r3,%r15,24($sp)
149
150         bras    $tbl,.Lepic
151 .Lepic: aghi    $tbl,AES_Te-.Lepic
152
153         llgf    $s0,0($inp)
154         llgf    $s1,4($inp)
155         llgf    $s2,8($inp)
156         llgf    $s3,12($inp)
157
158         llill   $mask,`0xff<<3`
159         bras    $ra,_s390x_AES_encrypt
160
161         lg      $out,24($sp)
162         st      $s0,0($out)
163         st      $s1,4($out)
164         st      $s2,8($out)
165         st      $s3,12($out)
166
167         lmg     %r6,%r15,48($sp)
168         br      %r14
169 .size   AES_encrypt,.-AES_encrypt
170
171 .type   _s390x_AES_encrypt,\@function
172 .align  16
173 _s390x_AES_encrypt:
174         x       $s0,0($key)
175         x       $s1,4($key)
176         x       $s2,8($key)
177         x       $s3,12($key)
178         l       $rounds,240($key)
179         aghi    $rounds,-1
180
181 .Lenc_loop:
182         sllg    $i1,$s0,`0+3`
183         srlg    $i2,$s0,`8-3`
184         srlg    $i3,$s0,`16-3`
185         srl     $s0,`24-3`
186         nr      $s0,$mask
187         ngr     $i1,$mask
188         nr      $i2,$mask
189         nr      $i3,$mask
190         l       $s0,0($s0,$tbl) # Te0[s0>>24]
191         l       $t1,1($i1,$tbl) # Te3[s0>>0]
192         l       $t2,2($i2,$tbl) # Te2[s0>>8]
193         l       $t3,3($i3,$tbl) # Te1[s0>>16]
194
195         srlg    $i1,$s1,`16-3`  # i0
196         sllg    $i2,$s1,`0+3`
197         srlg    $i3,$s1,`8-3`
198         srl     $s1,`24-3`
199         nr      $i1,$mask
200         nr      $s1,$mask
201         ngr     $i2,$mask
202         nr      $i3,$mask
203         x       $s0,3($i1,$tbl) # Te1[s1>>16]
204         l       $s1,0($s1,$tbl) # Te0[s1>>24]
205         x       $t2,1($i2,$tbl) # Te3[s1>>0]
206         x       $t3,2($i3,$tbl) # Te2[s1>>8]
207         xr      $s1,$t1
208
209         srlg    $i1,$s2,`8-3`   # i0
210         srlg    $i2,$s2,`16-3`  # i1
211         sllg    $i3,$s2,`0+3`
212         srl     $s2,`24-3`
213         nr      $i1,$mask
214         nr      $i2,$mask
215         nr      $s2,$mask
216         ngr     $i3,$mask
217         x       $s0,2($i1,$tbl) # Te2[s2>>8]
218         x       $s1,3($i2,$tbl) # Te1[s2>>16]
219         l       $s2,0($s2,$tbl) # Te0[s2>>24]
220         x       $t3,1($i3,$tbl) # Te3[s2>>0]
221         xr      $s2,$t2
222
223         sllg    $i1,$s3,`0+3`   # i0
224         srlg    $i2,$s3,`8-3`   # i1
225         srlg    $i3,$s3,`16-3`  # i2
226         srl     $s3,`24-3`
227         ngr     $i1,$mask
228         nr      $i2,$mask
229         nr      $i3,$mask
230         nr      $s3,$mask
231         x       $s0,1($i1,$tbl) # Te3[s3>>0]
232         x       $s1,2($i2,$tbl) # Te2[s3>>8]
233         x       $s2,3($i3,$tbl) # Te1[s3>>16]
234         l       $s3,0($s3,$tbl) # Te0[s3>>24]
235         xr      $s3,$t3
236
237         la      $key,16($key)
238         x       $s0,0($key)
239         x       $s1,4($key)
240         x       $s2,8($key)
241         x       $s3,12($key)
242
243         brct    $rounds,.Lenc_loop
244
245         sllg    $i1,$s0,`0+3`
246         srlg    $i2,$s0,`8-3`
247         srlg    $i3,$s0,`16-3`
248         srl     $s0,`24-3`
249         nr      $s0,$mask
250         ngr     $i1,$mask
251         nr      $i2,$mask
252         nr      $i3,$mask
253         llgc    $s0,2($s0,$tbl) # Te4[s0>>24]
254         llgc    $t1,2($i1,$tbl) # Te4[s0>>0]
255         llgc    $t2,2($i2,$tbl) # Te4[s0>>8]
256         llgc    $t3,2($i3,$tbl) # Te4[s0>>16]
257         sll     $s0,24
258         sll     $t2,8
259         sll     $t3,16
260
261         srlg    $i1,$s1,`16-3`  # i0
262         sllg    $i2,$s1,`0+3`
263         srlg    $i3,$s1,`8-3`
264         srl     $s1,`24-3`
265         nr      $i1,$mask
266         nr      $s1,$mask
267         ngr     $i2,$mask
268         nr      $i3,$mask
269         llgc    $i1,2($i1,$tbl) # Te4[s1>>16]
270         llgc    $s1,2($s1,$tbl) # Te4[s1>>24]
271         llgc    $i2,2($i2,$tbl) # Te4[s1>>0]
272         llgc    $i3,2($i3,$tbl) # Te4[s1>>8]
273         sll     $i1,16
274         sll     $s1,24
275         sll     $i3,8
276         or      $s0,$i1
277         or      $s1,$t1
278         or      $t2,$i2
279         or      $t3,$i3
280         
281         srlg    $i1,$s2,`8-3`   # i0
282         srlg    $i2,$s2,`16-3`  # i1
283         sllg    $i3,$s2,`0+3`
284         srl     $s2,`24-3`
285         nr      $i1,$mask
286         nr      $i2,$mask
287         nr      $s2,$mask
288         ngr     $i3,$mask
289         llgc    $i1,2($i1,$tbl) # Te4[s2>>8]
290         llgc    $i2,2($i2,$tbl) # Te4[s2>>16]
291         llgc    $s2,2($s2,$tbl) # Te4[s2>>24]
292         llgc    $i3,2($i3,$tbl) # Te4[s2>>0]
293         sll     $i1,8
294         sll     $i2,16
295         sll     $s2,24
296         or      $s0,$i1
297         or      $s1,$i2
298         or      $s2,$t2
299         or      $t3,$i3
300
301         sllg    $i1,$s3,`0+3`   # i0
302         srlg    $i2,$s3,`8-3`   # i1
303         srlg    $i3,$s3,`16-3`  # i2
304         srl     $s3,`24-3`
305         ngr     $i1,$mask
306         nr      $i2,$mask
307         nr      $i3,$mask
308         nr      $s3,$mask
309         llgc    $i1,2($i1,$tbl) # Te4[s3>>0]
310         llgc    $i2,2($i2,$tbl) # Te4[s3>>8]
311         llgc    $i3,2($i3,$tbl) # Te4[s3>>16]
312         llgc    $s3,2($s3,$tbl) # Te4[s3>>24]
313         sll     $i2,8
314         sll     $i3,16
315         sll     $s3,24
316         or      $s0,$i1
317         or      $s1,$i2
318         or      $s2,$i3
319         or      $s3,$t3
320
321         x       $s0,16($key)
322         x       $s1,20($key)
323         x       $s2,24($key)
324         x       $s3,28($key)
325
326         br      $ra     
327 .size   _s390x_AES_encrypt,.-_s390x_AES_encrypt
328 ___
329
330 $code.=<<___;
331 .type   AES_Td,\@object
332 .align  64
333 AES_Td:
334 ___
335 &_data_word(
336         0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
337         0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
338         0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
339         0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
340         0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
341         0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
342         0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
343         0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
344         0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
345         0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
346         0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
347         0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
348         0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
349         0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
350         0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
351         0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
352         0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
353         0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
354         0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
355         0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
356         0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
357         0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
358         0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
359         0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
360         0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
361         0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
362         0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
363         0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
364         0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
365         0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
366         0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
367         0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
368         0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
369         0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
370         0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
371         0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
372         0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
373         0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
374         0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
375         0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
376         0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
377         0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
378         0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
379         0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
380         0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
381         0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
382         0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
383         0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
384         0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
385         0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
386         0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
387         0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
388         0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
389         0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
390         0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
391         0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
392         0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
393         0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
394         0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
395         0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
396         0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
397         0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
398         0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
399         0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
400 $code.=<<___;
401 .byte   0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
402 .byte   0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
403 .byte   0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
404 .byte   0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
405 .byte   0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
406 .byte   0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
407 .byte   0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
408 .byte   0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
409 .byte   0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
410 .byte   0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
411 .byte   0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
412 .byte   0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
413 .byte   0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
414 .byte   0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
415 .byte   0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
416 .byte   0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
417 .byte   0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
418 .byte   0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
419 .byte   0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
420 .byte   0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
421 .byte   0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
422 .byte   0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
423 .byte   0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
424 .byte   0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
425 .byte   0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
426 .byte   0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
427 .byte   0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
428 .byte   0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
429 .byte   0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
430 .byte   0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
431 .byte   0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
432 .byte   0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
433 .size   AES_Td,.-AES_Td
434
435 # void AES_decrypt(const unsigned char *in, unsigned char *out,
436 #                const AES_KEY *key) {
437 .globl  AES_decrypt
438 .type   AES_decrypt,\@function
439 AES_decrypt:
440         lghi    %r0,10
441         c       %r0,240($key)
442         jne     .Ldsoft
443         lghi    %r0,0           # query capability vector
444         la      %r1,16($sp)
445         .long   0xb92e0042      # km %r4,%r2
446         lg      %r0,16($sp)
447         tmhl    %r0,`0x8000>>2`
448         jz      .Ldsoft
449         lghi    %r0,`0x80|0x12` # decrypt AES-128
450         la      %r1,160($key)
451         la      %r2,0($inp)
452         la      %r4,0($out)
453         lghi    %r3,16          # single block length
454         .long   0xb92e0042      # km %r4,%r2
455         br      %r14
456 .Ldsoft:
457         stmg    %r3,%r15,24($sp)
458
459         bras    $tbl,.Ldpic
460 .Ldpic: aghi    $tbl,AES_Td-.Ldpic
461
462         llgf    $s0,0($inp)
463         llgf    $s1,4($inp)
464         llgf    $s2,8($inp)
465         llgf    $s3,12($inp)
466
467         llill   $mask,`0xff<<3`
468         bras    $ra,_s390x_AES_decrypt
469
470         lg      $out,24($sp)
471         st      $s0,0($out)
472         st      $s1,4($out)
473         st      $s2,8($out)
474         st      $s3,12($out)
475
476         lmg     %r6,%r15,48($sp)
477         br      %r14
478 .size   AES_decrypt,.-AES_decrypt
479
480 .type   _s390x_AES_decrypt,\@function
481 .align  16
482 _s390x_AES_decrypt:
483         x       $s0,0($key)
484         x       $s1,4($key)
485         x       $s2,8($key)
486         x       $s3,12($key)
487         l       $rounds,240($key)
488         aghi    $rounds,-1
489
490 .Ldec_loop:
491         srlg    $i1,$s0,`16-3`
492         srlg    $i2,$s0,`8-3`
493         sllg    $i3,$s0,`0+3`
494         srl     $s0,`24-3`
495         nr      $s0,$mask
496         nr      $i1,$mask
497         nr      $i2,$mask
498         ngr     $i3,$mask
499         l       $s0,0($s0,$tbl) # Td0[s0>>24]
500         l       $t1,3($i1,$tbl) # Td1[s0>>16]
501         l       $t2,2($i2,$tbl) # Td2[s0>>8]
502         l       $t3,1($i3,$tbl) # Td3[s0>>0]
503
504         sllg    $i1,$s1,`0+3`   # i0
505         srlg    $i2,$s1,`16-3`
506         srlg    $i3,$s1,`8-3`
507         srl     $s1,`24-3`
508         ngr     $i1,$mask
509         nr      $s1,$mask
510         nr      $i2,$mask
511         nr      $i3,$mask
512         x       $s0,1($i1,$tbl) # Td3[s1>>0]
513         l       $s1,0($s1,$tbl) # Td0[s1>>24]
514         x       $t2,3($i2,$tbl) # Td1[s1>>16]
515         x       $t3,2($i3,$tbl) # Td2[s1>>8]
516         xr      $s1,$t1
517
518         srlg    $i1,$s2,`8-3`   # i0
519         sllg    $i2,$s2,`0+3`   # i1
520         srlg    $i3,$s2,`16-3`
521         srl     $s2,`24-3`
522         nr      $i1,$mask
523         ngr     $i2,$mask
524         nr      $s2,$mask
525         nr      $i3,$mask
526         x       $s0,2($i1,$tbl) # Td2[s2>>8]
527         x       $s1,1($i2,$tbl) # Td3[s2>>0]
528         l       $s2,0($s2,$tbl) # Td0[s2>>24]
529         x       $t3,3($i3,$tbl) # Td1[s2>>16]
530         xr      $s2,$t2
531
532         srlg    $i1,$s3,`16-3`  # i0
533         srlg    $i2,$s3,`8-3`   # i1
534         sllg    $i3,$s3,`0+3`   # i2
535         srl     $s3,`24-3`
536         nr      $i1,$mask
537         nr      $i2,$mask
538         ngr     $i3,$mask
539         nr      $s3,$mask
540         x       $s0,3($i1,$tbl) # Td1[s3>>16]
541         x       $s1,2($i2,$tbl) # Td2[s3>>8]
542         x       $s2,1($i3,$tbl) # Td3[s3>>0]
543         l       $s3,0($s3,$tbl) # Td0[s3>>24]
544         xr      $s3,$t3
545
546         la      $key,16($key)
547         x       $s0,0($key)
548         x       $s1,4($key)
549         x       $s2,8($key)
550         x       $s3,12($key)
551
552         brct    $rounds,.Ldec_loop
553
554         l       $t1,`2048+0`($tbl)      # prefetch Td4
555         l       $t2,`2048+32`($tbl)
556         l       $t3,`2048+64`($tbl)
557         l       $i1,`2048+96`($tbl)
558         l       $i2,`2048+128`($tbl)
559         l       $i3,`2048+160`($tbl)
560         l       $t1,`2048+192`($tbl)
561         l       $t2,`2048+224`($tbl)
562         llill   $mask,0xff
563
564         srlg    $i3,$s0,24      # i0
565         srlg    $i1,$s0,16
566         srlg    $i2,$s0,8
567         nr      $s0,$mask       # i3
568         nr      $i1,$mask
569         nr      $i2,$mask
570         llgc    $i3,2048($i3,$tbl)      # Td4[s0>>24]
571         llgc    $t1,2048($i1,$tbl)      # Td4[s0>>16]
572         llgc    $t2,2048($i2,$tbl)      # Td4[s0>>8]
573         llgc    $t3,2048($s0,$tbl)      # Td4[s0>>0]
574         sllg    $s0,$i3,24
575         sll     $t1,16
576         sll     $t2,8
577
578         srlg    $i1,$s1,24
579         srlg    $i2,$s1,16
580         srlg    $i3,$s1,8
581         nr      $s1,$mask       # i0
582         nr      $i2,$mask
583         nr      $i3,$mask
584         llgc    $s1,2048($s1,$tbl)      # Td4[s1>>0]
585         llgc    $i1,2048($i1,$tbl)      # Td4[s1>>24]
586         llgc    $i2,2048($i2,$tbl)      # Td4[s1>>16]
587         llgc    $i3,2048($i3,$tbl)      # Td4[s1>>8]
588         sll     $i1,24
589         sll     $i2,16
590         sll     $i3,8
591         or      $s0,$s1
592         or      $t1,$i1
593         or      $t2,$i2
594         or      $t3,$i3
595
596         srlg    $i1,$s2,8       # i0
597         srlg    $i2,$s2,24
598         srlg    $i3,$s2,16
599         nr      $s2,$mask       # i1
600         nr      $i1,$mask
601         nr      $i3,$mask
602         llgc    $i1,2048($i1,$tbl)      # Td4[s2>>8]
603         llgc    $s1,2048($s2,$tbl)      # Td4[s2>>0]
604         llgc    $i2,2048($i2,$tbl)      # Td4[s2>>24]
605         llgc    $i3,2048($i3,$tbl)      # Td4[s2>>16]
606         sll     $i1,8
607         sll     $i2,24
608         sll     $i3,16
609         or      $s0,$i1
610         or      $s1,$t1
611         or      $t2,$i2
612         or      $t3,$i3
613
614         srlg    $i1,$s3,16      # i0
615         srlg    $i2,$s3,8       # i1
616         srlg    $i3,$s3,24
617         nr      $s3,$mask       # i2
618         nr      $i1,$mask
619         nr      $i2,$mask
620         llgc    $i1,2048($i1,$tbl)      # Td4[s3>>16]
621         llgc    $i2,2048($i2,$tbl)      # Td4[s3>>8]
622         llgc    $s2,2048($s3,$tbl)      # Td4[s3>>0]
623         llgc    $s3,2048($i3,$tbl)      # Td4[s3>>24]
624         sll     $i1,16
625         sll     $i2,8
626         sll     $s3,24
627         or      $s0,$i1
628         or      $s1,$i2
629         or      $s2,$t2
630         or      $s3,$t3
631
632         x       $s0,16($key)
633         x       $s1,20($key)
634         x       $s2,24($key)
635         x       $s3,28($key)
636
637         br      $ra     
638 .size   _s390x_AES_decrypt,.-_s390x_AES_decrypt
639 .string "AES for s390x, CRYPTOGAMS by <appro\@openssl.org>"
640 ___
641
642 $code =~ s/\`([^\`]*)\`/eval $1/gem;
643 print $code;