3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
14 # Software performance improvement over gcc-generated code is ~70% and
15 # in absolute terms is ~73 cycles per byte processed with 128-bit key.
16 # You're likely to exclaim "why so slow?" Keep in mind that z-CPUs are
17 # *strictly* in-order execution and issued instruction [in this case
18 # load value from memory is critical] has to complete before execution
19 # flow proceeds. S-boxes are compressed to 2KB[+256B].
21 # As for hardware acceleration support. It's basically a "teaser," as
22 # it can and should be improved in several ways. Most notably support
23 # for CBC is not utilized, nor multiple blocks are ever processed.
24 # Then software key schedule can be postponed till hardware support
25 # detection... Performance improvement over assembler is reportedly
26 # ~2.5x, but can reach >8x [naturally on larger chunks] if proper
27 # support is implemented.
31 # Implement AES_set_[en|de]crypt_key. Key schedule setup is avoided
32 # for 128-bit keys, if hardware support is detected.
36 # Add support for hardware AES192/256 and reschedule instructions to
37 # minimize/avoid Address Generation Interlock hazard and to favour
38 # dual-issue z10 pipeline. This gave ~25% improvement on z10 and
39 # almost 50% on z9. The gain is smaller on z10, because being dual-
40 # issue z10 makes it improssible to eliminate the interlock condition:
41 # critial path is not long enough. Yet it spends ~24 cycles per byte
42 # processed with 128-bit key.
44 # Unlike previous version hardware support detection takes place only
45 # at the moment of key schedule setup, which is denoted in key->rounds.
46 # This is done, because deferred key setup can't be made MT-safe, not
47 # for key lengthes longer than 128 bits.
49 # Add AES_cbc_encrypt, which gives incredible performance improvement,
50 # it was measured to be ~6.6x. It's less than previously mentioned 8x,
51 # because software implementation was optimized.
55 # Add AES_ctr32_encrypt.
57 while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
58 open STDOUT,">$output";
60 $softonly=0; # allow hardware support
62 $t0="%r0"; $mask="%r0";
64 $t2="%r2"; $inp="%r2";
65 $t3="%r3"; $out="%r3"; $bits="%r3";
81 while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; }
92 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
93 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
94 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
95 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
96 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
97 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
98 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
99 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
100 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
101 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
102 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
103 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
104 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
105 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
106 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
107 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
108 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
109 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
110 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
111 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
112 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
113 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
114 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
115 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
116 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
117 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
118 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
119 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
120 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
121 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
122 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
123 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
124 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
125 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
126 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
127 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
128 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
129 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
130 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
131 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
132 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
133 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
134 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
135 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
136 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
137 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
138 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
139 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
140 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
141 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
142 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
143 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
144 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
145 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
146 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
147 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
148 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
149 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
150 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
151 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
152 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
153 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
154 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
155 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
158 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
159 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
160 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
161 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
162 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
163 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
164 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
165 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
166 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
167 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
168 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
169 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
170 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
171 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
172 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
173 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
174 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
175 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
176 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
177 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
178 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
179 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
180 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
181 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
182 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
183 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
184 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
185 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
186 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
187 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
188 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
189 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
191 .long 0x01000000, 0x02000000, 0x04000000, 0x08000000
192 .long 0x10000000, 0x20000000, 0x40000000, 0x80000000
193 .long 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
195 .size AES_Te,.-AES_Te
197 # void AES_encrypt(const unsigned char *inp, unsigned char *out,
198 # const AES_KEY *key) {
200 .type AES_encrypt,\@function
203 $code.=<<___ if (!$softonly);
212 lghi %r3,16 # single block length
213 .long 0xb92e0042 # km %r4,%r2
214 brc 1,.-4 # can this happen?
228 bras $ra,_s390x_AES_encrypt
238 .size AES_encrypt,.-AES_encrypt
240 .type _s390x_AES_encrypt,\@function
249 llill $mask,`0xff<<3`
263 srlg $i1,$s1,`16-3` # i0
272 l $s0,0($s0,$tbl) # Te0[s0>>24]
273 l $t1,1($t1,$tbl) # Te3[s0>>0]
274 l $t2,2($t2,$tbl) # Te2[s0>>8]
275 l $t3,3($t3,$tbl) # Te1[s0>>16]
277 x $s0,3($i1,$tbl) # Te1[s1>>16]
278 l $s1,0($s1,$tbl) # Te0[s1>>24]
279 x $t2,1($i2,$tbl) # Te3[s1>>0]
280 x $t3,2($i3,$tbl) # Te2[s1>>8]
282 srlg $i1,$s2,`8-3` # i0
283 srlg $i2,$s2,`16-3` # i1
292 srlg $ra,$s3,`8-3` # i1
293 sllg $t1,$s3,`0+3` # i0
298 x $s0,2($i1,$tbl) # Te2[s2>>8]
299 x $s1,3($i2,$tbl) # Te1[s2>>16]
300 l $s2,0($s2,$tbl) # Te0[s2>>24]
301 x $t3,1($i3,$tbl) # Te3[s2>>0]
303 srlg $i3,$s3,`16-3` # i2
314 x $s0,1($t1,$tbl) # Te3[s3>>0]
315 x $s1,2($ra,$tbl) # Te2[s3>>8]
316 x $s2,3($i3,$tbl) # Te1[s3>>16]
317 l $s3,0($s3,$tbl) # Te0[s3>>24]
320 brct $rounds,.Lenc_loop
332 srlg $i1,$s1,`16-3` # i0
341 llgc $s0,2($s0,$tbl) # Te4[s0>>24]
342 llgc $t1,2($t1,$tbl) # Te4[s0>>0]
344 llgc $t2,2($t2,$tbl) # Te4[s0>>8]
345 llgc $t3,2($t3,$tbl) # Te4[s0>>16]
349 llgc $i1,2($i1,$tbl) # Te4[s1>>16]
350 llgc $s1,2($s1,$tbl) # Te4[s1>>24]
351 llgc $i2,2($i2,$tbl) # Te4[s1>>0]
352 llgc $i3,2($i3,$tbl) # Te4[s1>>8]
361 srlg $i1,$s2,`8-3` # i0
362 srlg $i2,$s2,`16-3` # i1
370 sllg $t1,$s3,`0+3` # i0
371 srlg $ra,$s3,`8-3` # i1
374 llgc $i1,2($i1,$tbl) # Te4[s2>>8]
375 llgc $i2,2($i2,$tbl) # Te4[s2>>16]
377 llgc $s2,2($s2,$tbl) # Te4[s2>>24]
378 llgc $i3,2($i3,$tbl) # Te4[s2>>0]
387 srlg $i3,$s3,`16-3` # i2
395 llgc $i1,2($t1,$tbl) # Te4[s3>>0]
396 llgc $i2,2($ra,$tbl) # Te4[s3>>8]
397 llgc $i3,2($i3,$tbl) # Te4[s3>>16]
398 llgc $s3,2($s3,$tbl) # Te4[s3>>24]
414 .size _s390x_AES_encrypt,.-_s390x_AES_encrypt
418 .type AES_Td,\@object
423 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
424 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
425 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
426 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
427 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
428 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
429 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
430 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
431 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
432 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
433 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
434 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
435 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
436 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
437 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
438 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
439 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
440 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
441 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
442 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
443 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
444 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
445 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
446 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
447 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
448 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
449 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
450 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
451 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
452 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
453 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
454 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
455 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
456 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
457 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
458 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
459 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
460 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
461 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
462 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
463 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
464 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
465 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
466 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
467 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
468 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
469 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
470 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
471 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
472 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
473 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
474 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
475 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
476 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
477 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
478 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
479 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
480 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
481 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
482 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
483 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
484 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
485 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
486 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
489 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
490 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
491 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
492 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
493 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
494 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
495 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
496 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
497 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
498 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
499 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
500 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
501 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
502 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
503 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
504 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
505 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
506 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
507 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
508 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
509 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
510 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
511 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
512 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
513 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
514 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
515 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
516 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
517 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
518 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
519 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
520 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
521 .size AES_Td,.-AES_Td
523 # void AES_decrypt(const unsigned char *inp, unsigned char *out,
524 # const AES_KEY *key) {
526 .type AES_decrypt,\@function
529 $code.=<<___ if (!$softonly);
538 lghi %r3,16 # single block length
539 .long 0xb92e0042 # km %r4,%r2
540 brc 1,.-4 # can this happen?
554 bras $ra,_s390x_AES_decrypt
564 .size AES_decrypt,.-AES_decrypt
566 .type _s390x_AES_decrypt,\@function
575 llill $mask,`0xff<<3`
589 sllg $i1,$s1,`0+3` # i0
598 l $s0,0($s0,$tbl) # Td0[s0>>24]
599 l $t1,3($t1,$tbl) # Td1[s0>>16]
600 l $t2,2($t2,$tbl) # Td2[s0>>8]
601 l $t3,1($t3,$tbl) # Td3[s0>>0]
603 x $s0,1($i1,$tbl) # Td3[s1>>0]
604 l $s1,0($s1,$tbl) # Td0[s1>>24]
605 x $t2,3($i2,$tbl) # Td1[s1>>16]
606 x $t3,2($i3,$tbl) # Td2[s1>>8]
608 srlg $i1,$s2,`8-3` # i0
609 sllg $i2,$s2,`0+3` # i1
618 srlg $ra,$s3,`8-3` # i1
619 srlg $t1,$s3,`16-3` # i0
624 x $s0,2($i1,$tbl) # Td2[s2>>8]
625 x $s1,1($i2,$tbl) # Td3[s2>>0]
626 l $s2,0($s2,$tbl) # Td0[s2>>24]
627 x $t3,3($i3,$tbl) # Td1[s2>>16]
629 sllg $i3,$s3,`0+3` # i2
640 x $s0,3($t1,$tbl) # Td1[s3>>16]
641 x $s1,2($ra,$tbl) # Td2[s3>>8]
642 x $s2,1($i3,$tbl) # Td3[s3>>0]
643 l $s3,0($s3,$tbl) # Td0[s3>>24]
646 brct $rounds,.Ldec_loop
649 l $t1,`2048+0`($tbl) # prefetch Td4
650 l $t2,`2048+64`($tbl)
651 l $t3,`2048+128`($tbl)
652 l $i1,`2048+192`($tbl)
669 llgc $i3,2048($i3,$tbl) # Td4[s0>>24]
670 llgc $t1,2048($t1,$tbl) # Td4[s0>>16]
671 llgc $t2,2048($t2,$tbl) # Td4[s0>>8]
673 llgc $t3,2048($s0,$tbl) # Td4[s0>>0]
677 llgc $s1,2048($s1,$tbl) # Td4[s1>>0]
678 llgc $i1,2048($i1,$tbl) # Td4[s1>>24]
679 llgc $i2,2048($i2,$tbl) # Td4[s1>>16]
681 llgc $i3,2048($ra,$tbl) # Td4[s1>>8]
695 llgc $i1,2048($i1,$tbl) # Td4[s2>>8]
696 llgc $s1,2048($s2,$tbl) # Td4[s2>>0]
697 llgc $i2,2048($i2,$tbl) # Td4[s2>>24]
698 llgc $i3,2048($i3,$tbl) # Td4[s2>>16]
718 llgc $i1,2048($i1,$tbl) # Td4[s3>>16]
719 llgc $i2,2048($i2,$tbl) # Td4[s3>>8]
721 llgc $s2,2048($s3,$tbl) # Td4[s3>>0]
722 llgc $s3,2048($i3,$tbl) # Td4[s3>>24]
736 .size _s390x_AES_decrypt,.-_s390x_AES_decrypt
740 # void AES_set_encrypt_key(const unsigned char *in, int bits,
742 .globl AES_set_encrypt_key
743 .type AES_set_encrypt_key,\@function
767 $code.=<<___ if (!$softonly);
768 # convert bits to km code, [128,192,256]->[18,19,20]
775 larl %r1,OPENSSL_s390xcap_P
777 tmhl %r0,0x4000 # check for message-security assist
780 lghi %r0,0 # query capability vector
782 .long 0xb92f0042 # kmc %r4,%r2
789 lmg %r0,%r1,0($inp) # just copy 128 bits...
799 1: st $bits,236($key) # save bits
800 st %r5,240($key) # save km code
807 stmg %r6,%r13,48($sp) # all non-volatile regs
809 larl $tbl,AES_Te+2048
828 llgfr $t2,$s3 # temp=rk[3]
842 icm $t2,2,0($t2) # Te4[rk[3]>>0]<<8
843 icm $t2,4,0($i1) # Te4[rk[3]>>8]<<16
844 icm $t2,8,0($i2) # Te4[rk[3]>>16]<<24
845 icm $t2,1,0($i3) # Te4[rk[3]>>24]
846 x $t2,256($t3,$tbl) # rcon[i]
847 xr $s0,$t2 # rk[4]=rk[0]^...
848 xr $s1,$s0 # rk[5]=rk[1]^rk[4]
849 xr $s2,$s1 # rk[6]=rk[2]^rk[5]
850 xr $s3,$s2 # rk[7]=rk[3]^rk[6]
852 llgfr $t2,$s3 # temp=rk[3]
864 la $key,16($key) # key+=4
866 brct $rounds,.L128_loop
900 icm $t1,2,0($t1) # Te4[rk[5]>>0]<<8
901 icm $t1,4,0($i1) # Te4[rk[5]>>8]<<16
902 icm $t1,8,0($i2) # Te4[rk[5]>>16]<<24
903 icm $t1,1,0($i3) # Te4[rk[5]>>24]
904 x $t1,256($t3,$tbl) # rcon[i]
905 xr $s0,$t1 # rk[6]=rk[0]^...
906 xr $s1,$s0 # rk[7]=rk[1]^rk[6]
907 xr $s2,$s1 # rk[8]=rk[2]^rk[7]
908 xr $s3,$s2 # rk[9]=rk[3]^rk[8]
914 brct $rounds,.L192_continue
922 x $t1,16($key) # rk[10]=rk[4]^rk[9]
924 x $t1,20($key) # rk[11]=rk[5]^rk[10]
934 la $key,24($key) # key+=6
963 icm $t1,2,0($t1) # Te4[rk[7]>>0]<<8
964 icm $t1,4,0($i1) # Te4[rk[7]>>8]<<16
965 icm $t1,8,0($i2) # Te4[rk[7]>>16]<<24
966 icm $t1,1,0($i3) # Te4[rk[7]>>24]
967 x $t1,256($t3,$tbl) # rcon[i]
968 xr $s0,$t1 # rk[8]=rk[0]^...
969 xr $s1,$s0 # rk[9]=rk[1]^rk[8]
970 xr $s2,$s1 # rk[10]=rk[2]^rk[9]
971 xr $s3,$s2 # rk[11]=rk[3]^rk[10]
976 brct $rounds,.L256_continue
983 lgr $t1,$s3 # temp=rk[11]
994 llgc $t1,0($t1) # Te4[rk[11]>>0]
995 icm $t1,2,0($i1) # Te4[rk[11]>>8]<<8
996 icm $t1,4,0($i2) # Te4[rk[11]>>16]<<16
997 icm $t1,8,0($i3) # Te4[rk[11]>>24]<<24
998 x $t1,16($key) # rk[12]=rk[4]^...
1000 x $t1,20($key) # rk[13]=rk[5]^rk[12]
1002 x $t1,24($key) # rk[14]=rk[6]^rk[13]
1004 x $t1,28($key) # rk[15]=rk[7]^rk[14]
1014 la $key,32($key) # key+=8
1021 .size AES_set_encrypt_key,.-AES_set_encrypt_key
1023 # void AES_set_decrypt_key(const unsigned char *in, int bits,
1025 .globl AES_set_decrypt_key
1026 .type AES_set_decrypt_key,\@function
1028 AES_set_decrypt_key:
1029 stg $key,32($sp) # I rely on AES_set_encrypt_key to
1030 stg $ra,112($sp) # save non-volatile registers!
1031 bras $ra,AES_set_encrypt_key
1037 $code.=<<___ if (!$softonly);
1042 oill $t0,0x80 # set "decrypt" bit
1050 bras $ra,.Lekey_internal
1056 .Lgo: llgf $rounds,240($key)
1064 .Linv: lmg $s0,$s1,0($i1)
1076 llgf $rounds,240($key)
1078 sll $rounds,2 # (rounds-1)*4
1079 llilh $mask80,0x8080
1080 llilh $mask1b,0x1b1b
1081 llilh $maskfe,0xfefe
1087 .Lmix: l $s0,16($key) # tp1
1115 xr $s1,$s0 # tp2^tp1
1116 xr $s2,$s0 # tp4^tp1
1117 rll $s0,$s0,24 # = ROTATE(tp1,8)
1119 xr $s0,$s1 # ^=tp2^tp1
1120 xr $s1,$s3 # tp2^tp1^tp8
1121 xr $s0,$s2 # ^=tp4^tp1^tp8
1124 xr $s0,$s1 # ^= ROTATE(tp8^tp2^tp1,24)
1126 xr $s0,$s2 # ^= ROTATE(tp8^tp4^tp1,16)
1127 xr $s0,$s3 # ^= ROTATE(tp8,8)
1133 lmg %r6,%r13,48($sp)# as was saved by AES_set_encrypt_key!
1136 .size AES_set_decrypt_key,.-AES_set_decrypt_key
1139 #void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
1140 # size_t length, const AES_KEY *key,
1141 # unsigned char *ivec, const int enc)
1144 my $out="%r4"; # length and out are swapped
1150 .globl AES_cbc_encrypt
1151 .type AES_cbc_encrypt,\@function
1154 xgr %r3,%r4 # flip %r3 and %r4, out and len
1158 $code.=<<___ if (!$softonly);
1163 lg %r0,0($ivp) # copy ivec
1165 stmg %r0,%r1,16($sp)
1166 lmg %r0,%r1,0($key) # copy key, cover 256 bit
1167 stmg %r0,%r1,32($sp)
1168 lmg %r0,%r1,16($key)
1169 stmg %r0,%r1,48($sp)
1170 l %r0,240($key) # load kmc code
1171 lghi $key,15 # res=len%16, len-=res;
1174 la %r1,16($sp) # parameter block - ivec || key
1176 .long 0xb92f0042 # kmc %r4,%r2
1177 brc 1,.-4 # pay attention to "partial completion"
1181 lmg %r0,%r1,16($sp) # copy ivec to caller
1187 ahi $key,-1 # it's the way it's encoded in mvc
1189 jnz .Lkmc_truncated_dec
1194 mvc 128(1,$sp),0($inp)
1196 la %r1,16($sp) # restore parameter block
1199 .long 0xb92f0042 # kmc %r4,%r2
1202 .Lkmc_truncated_dec:
1206 .long 0xb92f0042 # kmc %r4,%r2
1209 mvc 0(1,$out),128($sp)
1216 stmg $key,$ra,40($sp)
1230 brc 4,.Lcbc_enc_tail # if borrow
1232 stmg $inp,$out,16($sp)
1239 bras $ra,_s390x_AES_encrypt
1241 lmg $inp,$key,16($sp)
1253 brc 4,.Lcbc_enc_tail # if borrow
1273 mvc 128(1,$sp),0($inp)
1285 stmg $t0,$t1,128($sp)
1288 stmg $inp,$out,16($sp)
1295 bras $ra,_s390x_AES_decrypt
1297 lmg $inp,$key,16($sp)
1309 brc 4,.Lcbc_dec_tail # if borrow
1310 brc 2,.Lcbc_dec_done # if zero
1313 stmg $t0,$t1,128($sp)
1323 lmg $ivp,$ra,48($sp)
1324 stmg $t0,$t1,0($ivp)
1334 mvc 0(1,$out),128($sp)
1337 .size AES_cbc_encrypt,.-AES_cbc_encrypt
1340 #void AES_ctr32_encrypt(const unsigned char *in, unsigned char *out,
1341 # size_t blocks, const AES_KEY *key,
1342 # const unsigned char *ivec)
1347 my $key="%r5"; my $iv0="%r5";
1352 .globl AES_ctr32_encrypt
1353 .type AES_ctr32_encrypt,\@function
1357 $code.=<<___ if (!$softonly);
1363 stmg %r6,$s3,48($sp)
1366 la %r1,0($key) # %r1 is permanent copy of $key
1367 lg $iv0,0($ivp) # load ivec
1370 # prepare and allocate stack frame
1371 lghi $s0,-272 # guarantee at least 256-bytes buffer
1375 ngr $s0,$s1 # align at page boundary
1376 la $sp,0($s0) # alloca
1377 stg $fp,0($s0) # back-chain
1379 # calculate resultant buffer size
1380 la $s0,16($s0) # buffer starts at offset of 16
1382 srlg $fp,$fp,4 # $fp is buffer length in blocks, minimum 16
1386 brc 1,.Lctr32_hw_loop # not zero, no borrow
1398 ahi $ivp,1 # 32-bit increment, preserves upper half
1399 brct $s3,.Lctr32_hw_prepare
1401 la $s0,16($sp) # inp
1402 sllg $s1,$fp,4 # len
1403 la $s2,16($sp) # out
1404 .long 0xb92e00a8 # km %r10,%r8
1405 brc 1,.-4 # pay attention to "partial completion"
1415 stg $s0,0($out,$inp)
1416 stg $s1,8($out,$inp)
1418 brct $s3,.Lctr32_hw_xor
1421 brc 1,.Lctr32_hw_loop # not zero, no borrow
1424 brc 4+1,.Lctr32_hw_loop # not zero
1433 brct $s1,.Lctr32_hw_zap
1442 stmg $key,$ra,40($sp)
1448 stmg $inp,$len,16($sp)
1456 bras $ra,_s390x_AES_encrypt
1458 lmg $inp,$ivp,16($sp)
1467 st $s3,12($out,$inp)
1470 ahi $t1,1 # 32-bit increment
1471 brct $len,.Lctr32_loop
1475 .size AES_ctr32_encrypt,.-AES_ctr32_encrypt
1479 .comm OPENSSL_s390xcap_P,8,8
1480 .string "AES for s390x, CRYPTOGAMS by <appro\@openssl.org>"
1483 $code =~ s/\`([^\`]*)\`/eval $1/gem;
1485 close STDOUT; # force flush