3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
14 # Software performance improvement over gcc-generated code is ~70% and
15 # in absolute terms is ~73 cycles per byte processed with 128-bit key.
16 # You're likely to exclaim "why so slow?" Keep in mind that z-CPUs are
17 # *strictly* in-order execution and issued instruction [in this case
18 # load value from memory is critical] has to complete before execution
19 # flow proceeds. S-boxes are compressed to 2KB[+256B].
21 # As for hardware acceleration support. It's basically a "teaser," as
22 # it can and should be improved in several ways. Most notably support
23 # for CBC is not utilized, nor multiple blocks are ever processed.
24 # Then software key schedule can be postponed till hardware support
25 # detection... Performance improvement over assembler is reportedly
26 # ~2.5x, but can reach >8x [naturally on larger chunks] if proper
27 # support is implemented.
31 # Implement AES_set_[en|de]crypt_key. Key schedule setup is avoided
32 # for 128-bit keys, if hardware support is detected.
36 # Add support for hardware AES192/256 and reschedule instructions to
37 # minimize/avoid Address Generation Interlock hazard and to favour
38 # dual-issue z10 pipeline. This gave ~25% improvement on z10 and
39 # almost 50% on z9. The gain is smaller on z10, because being dual-
40 # issue z10 makes it improssible to eliminate the interlock condition:
41 # critial path is not long enough. Yet it spends ~24 cycles per byte
42 # processed with 128-bit key.
44 # Unlike previous version hardware support detection takes place only
45 # at the moment of key schedule setup, which is denoted in key->rounds.
46 # This is done, because deferred key setup can't be made MT-safe, not
47 # for keys longer than 128 bits.
49 # Add AES_cbc_encrypt, which gives incredible performance improvement,
50 # it was measured to be ~6.6x. It's less than previously mentioned 8x,
51 # because software implementation was optimized.
55 # Add AES_ctr32_encrypt. If hardware-assisted, it provides up to 4.3x
56 # performance improvement over "generic" counter mode routine relying
57 # on single-block, also hardware-assisted, AES_encrypt. "Up to" refers
58 # to the fact that exact throughput value depends on current stack
59 # frame alignment within 4KB page. In worst case you get ~75% of the
60 # maximum, but *on average* it would be as much as ~98%. Meaning that
61 # worst case is unlike, it's like hitting ravine on plateau.
65 # Adapt for -m31 build. If kernel supports what's called "highgprs"
66 # feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit
67 # instructions and achieve "64-bit" performance even in 31-bit legacy
68 # application context. The feature is not specific to any particular
69 # processor, as long as it's "z-CPU". Latter implies that the code
70 # remains z/Architecture specific. On z990 it was measured to perform
71 # 2x better than code generated by gcc 4.3.
75 if ($flavour =~ /3[12]/) {
83 while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
84 open STDOUT,">$output";
86 $softonly=0; # allow hardware support
88 $t0="%r0"; $mask="%r0";
90 $t2="%r2"; $inp="%r2";
91 $t3="%r3"; $out="%r3"; $bits="%r3";
105 $stdframe=16*$SIZE_T+4*8;
109 while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; }
115 .type AES_Te,\@object
120 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
121 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
122 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
123 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
124 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
125 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
126 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
127 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
128 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
129 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
130 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
131 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
132 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
133 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
134 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
135 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
136 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
137 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
138 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
139 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
140 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
141 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
142 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
143 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
144 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
145 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
146 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
147 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
148 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
149 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
150 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
151 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
152 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
153 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
154 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
155 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
156 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
157 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
158 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
159 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
160 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
161 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
162 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
163 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
164 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
165 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
166 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
167 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
168 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
169 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
170 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
171 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
172 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
173 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
174 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
175 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
176 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
177 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
178 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
179 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
180 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
181 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
182 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
183 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
186 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
187 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
188 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
189 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
190 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
191 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
192 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
193 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
194 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
195 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
196 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
197 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
198 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
199 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
200 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
201 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
202 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
203 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
204 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
205 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
206 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
207 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
208 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
209 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
210 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
211 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
212 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
213 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
214 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
215 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
216 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
217 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
219 .long 0x01000000, 0x02000000, 0x04000000, 0x08000000
220 .long 0x10000000, 0x20000000, 0x40000000, 0x80000000
221 .long 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
223 .size AES_Te,.-AES_Te
225 # void AES_encrypt(const unsigned char *inp, unsigned char *out,
226 # const AES_KEY *key) {
228 .type AES_encrypt,\@function
231 $code.=<<___ if (!$softonly);
240 lghi %r3,16 # single block length
241 .long 0xb92e0042 # km %r4,%r2
242 brc 1,.-4 # can this happen?
248 stm${g} %r3,$ra,3*$SIZE_T($sp)
256 bras $ra,_s390x_AES_encrypt
258 l${g} $out,3*$SIZE_T($sp)
264 lm${g} %r6,$ra,6*$SIZE_T($sp)
266 .size AES_encrypt,.-AES_encrypt
268 .type _s390x_AES_encrypt,\@function
271 st${g} $ra,`$stdframe-$SIZE_T`($sp)
277 llill $mask,`0xff<<3`
291 srlg $i1,$s1,`16-3` # i0
300 l $s0,0($s0,$tbl) # Te0[s0>>24]
301 l $t1,1($t1,$tbl) # Te3[s0>>0]
302 l $t2,2($t2,$tbl) # Te2[s0>>8]
303 l $t3,3($t3,$tbl) # Te1[s0>>16]
305 x $s0,3($i1,$tbl) # Te1[s1>>16]
306 l $s1,0($s1,$tbl) # Te0[s1>>24]
307 x $t2,1($i2,$tbl) # Te3[s1>>0]
308 x $t3,2($i3,$tbl) # Te2[s1>>8]
310 srlg $i1,$s2,`8-3` # i0
311 srlg $i2,$s2,`16-3` # i1
320 srlg $ra,$s3,`8-3` # i1
321 sllg $t1,$s3,`0+3` # i0
326 x $s0,2($i1,$tbl) # Te2[s2>>8]
327 x $s1,3($i2,$tbl) # Te1[s2>>16]
328 l $s2,0($s2,$tbl) # Te0[s2>>24]
329 x $t3,1($i3,$tbl) # Te3[s2>>0]
331 srlg $i3,$s3,`16-3` # i2
342 x $s0,1($t1,$tbl) # Te3[s3>>0]
343 x $s1,2($ra,$tbl) # Te2[s3>>8]
344 x $s2,3($i3,$tbl) # Te1[s3>>16]
345 l $s3,0($s3,$tbl) # Te0[s3>>24]
348 brct $rounds,.Lenc_loop
360 srlg $i1,$s1,`16-3` # i0
369 llgc $s0,2($s0,$tbl) # Te4[s0>>24]
370 llgc $t1,2($t1,$tbl) # Te4[s0>>0]
372 llgc $t2,2($t2,$tbl) # Te4[s0>>8]
373 llgc $t3,2($t3,$tbl) # Te4[s0>>16]
377 llgc $i1,2($i1,$tbl) # Te4[s1>>16]
378 llgc $s1,2($s1,$tbl) # Te4[s1>>24]
379 llgc $i2,2($i2,$tbl) # Te4[s1>>0]
380 llgc $i3,2($i3,$tbl) # Te4[s1>>8]
389 srlg $i1,$s2,`8-3` # i0
390 srlg $i2,$s2,`16-3` # i1
398 sllg $t1,$s3,`0+3` # i0
399 srlg $ra,$s3,`8-3` # i1
402 llgc $i1,2($i1,$tbl) # Te4[s2>>8]
403 llgc $i2,2($i2,$tbl) # Te4[s2>>16]
405 llgc $s2,2($s2,$tbl) # Te4[s2>>24]
406 llgc $i3,2($i3,$tbl) # Te4[s2>>0]
415 srlg $i3,$s3,`16-3` # i2
423 llgc $i1,2($t1,$tbl) # Te4[s3>>0]
424 llgc $i2,2($ra,$tbl) # Te4[s3>>8]
425 llgc $i3,2($i3,$tbl) # Te4[s3>>16]
426 llgc $s3,2($s3,$tbl) # Te4[s3>>24]
435 l${g} $ra,`$stdframe-$SIZE_T`($sp)
442 .size _s390x_AES_encrypt,.-_s390x_AES_encrypt
446 .type AES_Td,\@object
451 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
452 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
453 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
454 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
455 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
456 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
457 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
458 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
459 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
460 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
461 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
462 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
463 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
464 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
465 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
466 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
467 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
468 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
469 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
470 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
471 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
472 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
473 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
474 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
475 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
476 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
477 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
478 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
479 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
480 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
481 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
482 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
483 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
484 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
485 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
486 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
487 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
488 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
489 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
490 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
491 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
492 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
493 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
494 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
495 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
496 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
497 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
498 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
499 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
500 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
501 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
502 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
503 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
504 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
505 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
506 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
507 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
508 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
509 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
510 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
511 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
512 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
513 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
514 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
517 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
518 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
519 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
520 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
521 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
522 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
523 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
524 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
525 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
526 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
527 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
528 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
529 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
530 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
531 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
532 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
533 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
534 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
535 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
536 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
537 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
538 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
539 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
540 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
541 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
542 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
543 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
544 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
545 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
546 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
547 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
548 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
549 .size AES_Td,.-AES_Td
551 # void AES_decrypt(const unsigned char *inp, unsigned char *out,
552 # const AES_KEY *key) {
554 .type AES_decrypt,\@function
557 $code.=<<___ if (!$softonly);
566 lghi %r3,16 # single block length
567 .long 0xb92e0042 # km %r4,%r2
568 brc 1,.-4 # can this happen?
574 stm${g} %r3,$ra,3*$SIZE_T($sp)
582 bras $ra,_s390x_AES_decrypt
584 l${g} $out,3*$SIZE_T($sp)
590 lm${g} %r6,$ra,6*$SIZE_T($sp)
592 .size AES_decrypt,.-AES_decrypt
594 .type _s390x_AES_decrypt,\@function
597 st${g} $ra,`$stdframe-$SIZE_T`($sp)
603 llill $mask,`0xff<<3`
617 sllg $i1,$s1,`0+3` # i0
626 l $s0,0($s0,$tbl) # Td0[s0>>24]
627 l $t1,3($t1,$tbl) # Td1[s0>>16]
628 l $t2,2($t2,$tbl) # Td2[s0>>8]
629 l $t3,1($t3,$tbl) # Td3[s0>>0]
631 x $s0,1($i1,$tbl) # Td3[s1>>0]
632 l $s1,0($s1,$tbl) # Td0[s1>>24]
633 x $t2,3($i2,$tbl) # Td1[s1>>16]
634 x $t3,2($i3,$tbl) # Td2[s1>>8]
636 srlg $i1,$s2,`8-3` # i0
637 sllg $i2,$s2,`0+3` # i1
646 srlg $ra,$s3,`8-3` # i1
647 srlg $t1,$s3,`16-3` # i0
652 x $s0,2($i1,$tbl) # Td2[s2>>8]
653 x $s1,1($i2,$tbl) # Td3[s2>>0]
654 l $s2,0($s2,$tbl) # Td0[s2>>24]
655 x $t3,3($i3,$tbl) # Td1[s2>>16]
657 sllg $i3,$s3,`0+3` # i2
668 x $s0,3($t1,$tbl) # Td1[s3>>16]
669 x $s1,2($ra,$tbl) # Td2[s3>>8]
670 x $s2,1($i3,$tbl) # Td3[s3>>0]
671 l $s3,0($s3,$tbl) # Td0[s3>>24]
674 brct $rounds,.Ldec_loop
677 l $t1,`2048+0`($tbl) # prefetch Td4
678 l $t2,`2048+64`($tbl)
679 l $t3,`2048+128`($tbl)
680 l $i1,`2048+192`($tbl)
697 llgc $i3,2048($i3,$tbl) # Td4[s0>>24]
698 llgc $t1,2048($t1,$tbl) # Td4[s0>>16]
699 llgc $t2,2048($t2,$tbl) # Td4[s0>>8]
701 llgc $t3,2048($s0,$tbl) # Td4[s0>>0]
705 llgc $s1,2048($s1,$tbl) # Td4[s1>>0]
706 llgc $i1,2048($i1,$tbl) # Td4[s1>>24]
707 llgc $i2,2048($i2,$tbl) # Td4[s1>>16]
709 llgc $i3,2048($ra,$tbl) # Td4[s1>>8]
723 llgc $i1,2048($i1,$tbl) # Td4[s2>>8]
724 llgc $s1,2048($s2,$tbl) # Td4[s2>>0]
725 llgc $i2,2048($i2,$tbl) # Td4[s2>>24]
726 llgc $i3,2048($i3,$tbl) # Td4[s2>>16]
741 l${g} $ra,`$stdframe-$SIZE_T`($sp)
746 llgc $i1,2048($i1,$tbl) # Td4[s3>>16]
747 llgc $i2,2048($i2,$tbl) # Td4[s3>>8]
749 llgc $s2,2048($s3,$tbl) # Td4[s3>>0]
750 llgc $s3,2048($i3,$tbl) # Td4[s3>>24]
764 .size _s390x_AES_decrypt,.-_s390x_AES_decrypt
768 # void AES_set_encrypt_key(const unsigned char *in, int bits,
770 .globl AES_set_encrypt_key
771 .type AES_set_encrypt_key,\@function
795 $code.=<<___ if (!$softonly);
796 # convert bits to km code, [128,192,256]->[18,19,20]
803 larl %r1,OPENSSL_s390xcap_P
805 tmhl %r0,0x4000 # check for message-security assist
808 lghi %r0,0 # query capability vector
810 .long 0xb92f0042 # kmc %r4,%r2
817 lmg %r0,%r1,0($inp) # just copy 128 bits...
827 1: st $bits,236($key) # save bits
828 st %r5,240($key) # save km code
835 stm${g} %r6,%r13,6*$SIZE_T($sp) # all non-volatile regs
837 larl $tbl,AES_Te+2048
856 llgfr $t2,$s3 # temp=rk[3]
870 icm $t2,2,0($t2) # Te4[rk[3]>>0]<<8
871 icm $t2,4,0($i1) # Te4[rk[3]>>8]<<16
872 icm $t2,8,0($i2) # Te4[rk[3]>>16]<<24
873 icm $t2,1,0($i3) # Te4[rk[3]>>24]
874 x $t2,256($t3,$tbl) # rcon[i]
875 xr $s0,$t2 # rk[4]=rk[0]^...
876 xr $s1,$s0 # rk[5]=rk[1]^rk[4]
877 xr $s2,$s1 # rk[6]=rk[2]^rk[5]
878 xr $s3,$s2 # rk[7]=rk[3]^rk[6]
880 llgfr $t2,$s3 # temp=rk[3]
892 la $key,16($key) # key+=4
894 brct $rounds,.L128_loop
896 lm${g} %r6,%r13,6*$SIZE_T($sp)
928 icm $t1,2,0($t1) # Te4[rk[5]>>0]<<8
929 icm $t1,4,0($i1) # Te4[rk[5]>>8]<<16
930 icm $t1,8,0($i2) # Te4[rk[5]>>16]<<24
931 icm $t1,1,0($i3) # Te4[rk[5]>>24]
932 x $t1,256($t3,$tbl) # rcon[i]
933 xr $s0,$t1 # rk[6]=rk[0]^...
934 xr $s1,$s0 # rk[7]=rk[1]^rk[6]
935 xr $s2,$s1 # rk[8]=rk[2]^rk[7]
936 xr $s3,$s2 # rk[9]=rk[3]^rk[8]
942 brct $rounds,.L192_continue
944 lm${g} %r6,%r13,6*$SIZE_T($sp)
950 x $t1,16($key) # rk[10]=rk[4]^rk[9]
952 x $t1,20($key) # rk[11]=rk[5]^rk[10]
962 la $key,24($key) # key+=6
991 icm $t1,2,0($t1) # Te4[rk[7]>>0]<<8
992 icm $t1,4,0($i1) # Te4[rk[7]>>8]<<16
993 icm $t1,8,0($i2) # Te4[rk[7]>>16]<<24
994 icm $t1,1,0($i3) # Te4[rk[7]>>24]
995 x $t1,256($t3,$tbl) # rcon[i]
996 xr $s0,$t1 # rk[8]=rk[0]^...
997 xr $s1,$s0 # rk[9]=rk[1]^rk[8]
998 xr $s2,$s1 # rk[10]=rk[2]^rk[9]
999 xr $s3,$s2 # rk[11]=rk[3]^rk[10]
1004 brct $rounds,.L256_continue
1006 lm${g} %r6,%r13,6*$SIZE_T($sp)
1011 lgr $t1,$s3 # temp=rk[11]
1022 llgc $t1,0($t1) # Te4[rk[11]>>0]
1023 icm $t1,2,0($i1) # Te4[rk[11]>>8]<<8
1024 icm $t1,4,0($i2) # Te4[rk[11]>>16]<<16
1025 icm $t1,8,0($i3) # Te4[rk[11]>>24]<<24
1026 x $t1,16($key) # rk[12]=rk[4]^...
1028 x $t1,20($key) # rk[13]=rk[5]^rk[12]
1030 x $t1,24($key) # rk[14]=rk[6]^rk[13]
1032 x $t1,28($key) # rk[15]=rk[7]^rk[14]
1042 la $key,32($key) # key+=8
1049 .size AES_set_encrypt_key,.-AES_set_encrypt_key
1051 # void AES_set_decrypt_key(const unsigned char *in, int bits,
1053 .globl AES_set_decrypt_key
1054 .type AES_set_decrypt_key,\@function
1056 AES_set_decrypt_key:
1057 st${g} $key,4*$SIZE_T($sp) # I rely on AES_set_encrypt_key to
1058 st${g} $ra,14*$SIZE_T($sp) # save non-volatile registers!
1059 bras $ra,AES_set_encrypt_key
1060 l${g} $key,4*$SIZE_T($sp)
1061 l${g} $ra,14*$SIZE_T($sp)
1065 $code.=<<___ if (!$softonly);
1070 oill $t0,0x80 # set "decrypt" bit
1076 st${g} $key,4*$SIZE_T($sp)
1077 st${g} $ra,14*$SIZE_T($sp)
1078 bras $ra,.Lekey_internal
1079 l${g} $key,4*$SIZE_T($sp)
1080 l${g} $ra,14*$SIZE_T($sp)
1084 .Lgo: llgf $rounds,240($key)
1092 .Linv: lmg $s0,$s1,0($i1)
1104 llgf $rounds,240($key)
1106 sll $rounds,2 # (rounds-1)*4
1107 llilh $mask80,0x8080
1108 llilh $mask1b,0x1b1b
1109 llilh $maskfe,0xfefe
1115 .Lmix: l $s0,16($key) # tp1
1143 xr $s1,$s0 # tp2^tp1
1144 xr $s2,$s0 # tp4^tp1
1145 rll $s0,$s0,24 # = ROTATE(tp1,8)
1147 xr $s0,$s1 # ^=tp2^tp1
1148 xr $s1,$s3 # tp2^tp1^tp8
1149 xr $s0,$s2 # ^=tp4^tp1^tp8
1152 xr $s0,$s1 # ^= ROTATE(tp8^tp2^tp1,24)
1154 xr $s0,$s2 # ^= ROTATE(tp8^tp4^tp1,16)
1155 xr $s0,$s3 # ^= ROTATE(tp8,8)
1161 lm${g} %r6,%r13,6*$SIZE_T($sp)# as was saved by AES_set_encrypt_key!
1164 .size AES_set_decrypt_key,.-AES_set_decrypt_key
1167 #void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
1168 # size_t length, const AES_KEY *key,
1169 # unsigned char *ivec, const int enc)
1172 my $out="%r4"; # length and out are swapped
1178 .globl AES_cbc_encrypt
1179 .type AES_cbc_encrypt,\@function
1182 xgr %r3,%r4 # flip %r3 and %r4, out and len
1186 $code.=<<___ if (!$softonly);
1191 lg %r0,0($ivp) # copy ivec
1193 stmg %r0,%r1,16($sp)
1194 lmg %r0,%r1,0($key) # copy key, cover 256 bit
1195 stmg %r0,%r1,32($sp)
1196 lmg %r0,%r1,16($key)
1197 stmg %r0,%r1,48($sp)
1198 l %r0,240($key) # load kmc code
1199 lghi $key,15 # res=len%16, len-=res;
1202 la %r1,16($sp) # parameter block - ivec || key
1204 .long 0xb92f0042 # kmc %r4,%r2
1205 brc 1,.-4 # pay attention to "partial completion"
1209 lmg %r0,%r1,16($sp) # copy ivec to caller
1215 ahi $key,-1 # it's the way it's encoded in mvc
1217 jnz .Lkmc_truncated_dec
1219 stg %r1,16*$SIZE_T($sp)
1220 stg %r1,16*$SIZE_T+8($sp)
1222 mvc 16*$SIZE_T(1,$sp),0($inp)
1224 la %r1,16($sp) # restore parameter block
1225 la $inp,16*$SIZE_T($sp)
1227 .long 0xb92f0042 # kmc %r4,%r2
1230 .Lkmc_truncated_dec:
1231 st${g} $out,4*$SIZE_T($sp)
1232 la $out,16*$SIZE_T($sp)
1234 .long 0xb92f0042 # kmc %r4,%r2
1235 l${g} $out,4*$SIZE_T($sp)
1237 mvc 0(1,$out),16*$SIZE_T($sp)
1244 stm${g} $key,$ra,5*$SIZE_T($sp)
1246 cl %r0,`$stdframe+$SIZE_T-4`($sp)
1258 brc 4,.Lcbc_enc_tail # if borrow
1260 stm${g} $inp,$out,2*$SIZE_T($sp)
1267 bras $ra,_s390x_AES_encrypt
1269 lm${g} $inp,$key,2*$SIZE_T($sp)
1281 brc 4,.Lcbc_enc_tail # if borrow
1285 l${g} $ivp,6*$SIZE_T($sp)
1291 lm${g} %r7,$ra,7*$SIZE_T($sp)
1298 stg $t0,16*$SIZE_T($sp)
1299 stg $t0,16*$SIZE_T+8($sp)
1301 mvc 16*$SIZE_T(1,$sp),0($inp)
1304 la $inp,16*$SIZE_T($sp)
1313 stmg $t0,$t1,16*$SIZE_T($sp)
1316 stm${g} $inp,$out,2*$SIZE_T($sp)
1323 bras $ra,_s390x_AES_decrypt
1325 lm${g} $inp,$key,2*$SIZE_T($sp)
1333 xg $s0,16*$SIZE_T($sp)
1334 xg $s2,16*$SIZE_T+8($sp)
1337 brc 4,.Lcbc_dec_tail # if borrow
1338 brc 2,.Lcbc_dec_done # if zero
1341 stmg $t0,$t1,16*$SIZE_T($sp)
1351 lm${g} %r6,$ra,6*$SIZE_T($sp)
1352 stmg $t0,$t1,0($ivp)
1359 stg $s0,16*$SIZE_T($sp)
1360 stg $s2,16*$SIZE_T+8($sp)
1362 mvc 0(1,$out),16*$SIZE_T($sp)
1365 .size AES_cbc_encrypt,.-AES_cbc_encrypt
1368 #void AES_ctr32_encrypt(const unsigned char *in, unsigned char *out,
1369 # size_t blocks, const AES_KEY *key,
1370 # const unsigned char *ivec)
1375 my $key="%r5"; my $iv0="%r5";
1380 .globl AES_ctr32_encrypt
1381 .type AES_ctr32_encrypt,\@function
1384 llgfr $len,$len # safe in ctr32 subroutine even in 64-bit case
1386 $code.=<<___ if (!$softonly);
1392 stm${g} %r6,$s3,6*$SIZE_T($sp)
1395 la %r1,0($key) # %r1 is permanent copy of $key
1396 lg $iv0,0($ivp) # load ivec
1399 # prepare and allocate stack frame at the top of 4K page
1400 # with 1K reserved for eventual signal handling
1401 lghi $s0,-1024-256-16# guarantee at least 256-bytes buffer
1405 ngr $s0,$s1 # align at page boundary
1406 slgr $fp,$s0 # total buffer size
1408 lghi $s1,1024+16 # sl[g]fi is extended-immediate facility
1409 slgr $fp,$s1 # deduct reservation to get usable buffer size
1410 # buffer size is at lest 256 and at most 3072+256-16
1412 la $sp,1024($s0) # alloca
1413 srlg $fp,$fp,4 # convert bytes to blocks, minimum 16
1414 st${g} $s2,0($sp) # back-chain
1415 st${g} $fp,$SIZE_T($sp)
1418 brc 1,.Lctr32_hw_loop # not zero, no borrow
1419 algr $fp,$len # input is shorter than allocated buffer
1421 st${g} $fp,$SIZE_T($sp)
1430 ahi $ivp,1 # 32-bit increment, preserves upper half
1431 brct $s3,.Lctr32_hw_prepare
1433 la $s0,16($sp) # inp
1434 sllg $s1,$fp,4 # len
1435 la $s2,16($sp) # out
1436 .long 0xb92e00a8 # km %r10,%r8
1437 brc 1,.-4 # pay attention to "partial completion"
1447 stg $s0,0($out,$inp)
1448 stg $s1,8($out,$inp)
1450 brct $s3,.Lctr32_hw_xor
1453 brc 1,.Lctr32_hw_loop # not zero, no borrow
1456 brc 4+1,.Lctr32_hw_loop # not zero
1459 l${g} $s1,$SIZE_T($sp)
1465 brct $s1,.Lctr32_hw_zap
1468 lm${g} %r6,$s3,6*$SIZE_T($sp)
1474 stm${g} $key,$ra,5*$SIZE_T($sp)
1480 stm${g} $inp,$len,2*$SIZE_T($sp)
1485 st $t1,16*$SIZE_T($sp)
1488 bras $ra,_s390x_AES_encrypt
1490 lm${g} $inp,$ivp,2*$SIZE_T($sp)
1491 llgf $t1,16*$SIZE_T($sp)
1499 st $s3,12($out,$inp)
1502 ahi $t1,1 # 32-bit increment
1503 brct $len,.Lctr32_loop
1505 lm${g} %r6,$ra,6*$SIZE_T($sp)
1507 .size AES_ctr32_encrypt,.-AES_ctr32_encrypt
1511 .comm OPENSSL_s390xcap_P,16,8
1512 .string "AES for s390x, CRYPTOGAMS by <appro\@openssl.org>"
1515 $code =~ s/\`([^\`]*)\`/eval $1/gem;
1517 close STDOUT; # force flush