2 # Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the OpenSSL license (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
21 # Software performance improvement over gcc-generated code is ~70% and
22 # in absolute terms is ~73 cycles per byte processed with 128-bit key.
23 # You're likely to exclaim "why so slow?" Keep in mind that z-CPUs are
24 # *strictly* in-order execution and issued instruction [in this case
25 # load value from memory is critical] has to complete before execution
26 # flow proceeds. S-boxes are compressed to 2KB[+256B].
28 # As for hardware acceleration support. It's basically a "teaser," as
29 # it can and should be improved in several ways. Most notably support
30 # for CBC is not utilized, nor multiple blocks are ever processed.
31 # Then software key schedule can be postponed till hardware support
32 # detection... Performance improvement over assembler is reportedly
33 # ~2.5x, but can reach >8x [naturally on larger chunks] if proper
34 # support is implemented.
38 # Implement AES_set_[en|de]crypt_key. Key schedule setup is avoided
39 # for 128-bit keys, if hardware support is detected.
43 # Add support for hardware AES192/256 and reschedule instructions to
44 # minimize/avoid Address Generation Interlock hazard and to favour
45 # dual-issue z10 pipeline. This gave ~25% improvement on z10 and
46 # almost 50% on z9. The gain is smaller on z10, because being dual-
47 # issue z10 makes it improssible to eliminate the interlock condition:
48 # critial path is not long enough. Yet it spends ~24 cycles per byte
49 # processed with 128-bit key.
51 # Unlike previous version hardware support detection takes place only
52 # at the moment of key schedule setup, which is denoted in key->rounds.
53 # This is done, because deferred key setup can't be made MT-safe, not
54 # for keys longer than 128 bits.
56 # Add AES_cbc_encrypt, which gives incredible performance improvement,
57 # it was measured to be ~6.6x. It's less than previously mentioned 8x,
58 # because software implementation was optimized.
62 # Add AES_ctr32_encrypt. If hardware-assisted, it provides up to 4.3x
63 # performance improvement over "generic" counter mode routine relying
64 # on single-block, also hardware-assisted, AES_encrypt. "Up to" refers
65 # to the fact that exact throughput value depends on current stack
66 # frame alignment within 4KB page. In worst case you get ~75% of the
67 # maximum, but *on average* it would be as much as ~98%. Meaning that
68 # worst case is unlike, it's like hitting ravine on plateau.
72 # Adapt for -m31 build. If kernel supports what's called "highgprs"
73 # feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit
74 # instructions and achieve "64-bit" performance even in 31-bit legacy
75 # application context. The feature is not specific to any particular
76 # processor, as long as it's "z-CPU". Latter implies that the code
77 # remains z/Architecture specific. On z990 it was measured to perform
78 # 2x better than code generated by gcc 4.3.
82 # Add support for z196 "cipher message with counter" instruction.
83 # Note however that it's disengaged, because it was measured to
84 # perform ~12% worse than vanilla km-based code...
88 # Add AES_xts_[en|de]crypt. This includes support for z196 km-xts-aes
89 # instructions, which deliver ~70% improvement at 8KB block size over
90 # vanilla km-based code, 37% - at most like 512-bytes block size.
94 if ($flavour =~ /3[12]/) {
102 while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
103 open STDOUT,">$output";
105 $softonly=0; # allow hardware support
107 $t0="%r0"; $mask="%r0";
109 $t2="%r2"; $inp="%r2";
110 $t3="%r3"; $out="%r3"; $bits="%r3";
124 $stdframe=16*$SIZE_T+4*8;
128 while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; }
134 .type AES_Te,\@object
139 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
140 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
141 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
142 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
143 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
144 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
145 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
146 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
147 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
148 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
149 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
150 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
151 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
152 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
153 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
154 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
155 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
156 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
157 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
158 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
159 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
160 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
161 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
162 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
163 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
164 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
165 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
166 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
167 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
168 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
169 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
170 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
171 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
172 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
173 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
174 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
175 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
176 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
177 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
178 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
179 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
180 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
181 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
182 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
183 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
184 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
185 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
186 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
187 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
188 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
189 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
190 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
191 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
192 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
193 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
194 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
195 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
196 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
197 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
198 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
199 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
200 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
201 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
202 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
205 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
206 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
207 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
208 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
209 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
210 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
211 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
212 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
213 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
214 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
215 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
216 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
217 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
218 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
219 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
220 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
221 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
222 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
223 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
224 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
225 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
226 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
227 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
228 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
229 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
230 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
231 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
232 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
233 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
234 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
235 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
236 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
238 .long 0x01000000, 0x02000000, 0x04000000, 0x08000000
239 .long 0x10000000, 0x20000000, 0x40000000, 0x80000000
240 .long 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
242 .size AES_Te,.-AES_Te
244 # void AES_encrypt(const unsigned char *inp, unsigned char *out,
245 # const AES_KEY *key) {
247 .type AES_encrypt,\@function
250 $code.=<<___ if (!$softonly);
259 lghi %r3,16 # single block length
260 .long 0xb92e0042 # km %r4,%r2
261 brc 1,.-4 # can this happen?
267 stm${g} %r3,$ra,3*$SIZE_T($sp)
275 bras $ra,_s390x_AES_encrypt
277 l${g} $out,3*$SIZE_T($sp)
283 lm${g} %r6,$ra,6*$SIZE_T($sp)
285 .size AES_encrypt,.-AES_encrypt
287 .type _s390x_AES_encrypt,\@function
290 st${g} $ra,15*$SIZE_T($sp)
296 llill $mask,`0xff<<3`
310 srlg $i1,$s1,`16-3` # i0
319 l $s0,0($s0,$tbl) # Te0[s0>>24]
320 l $t1,1($t1,$tbl) # Te3[s0>>0]
321 l $t2,2($t2,$tbl) # Te2[s0>>8]
322 l $t3,3($t3,$tbl) # Te1[s0>>16]
324 x $s0,3($i1,$tbl) # Te1[s1>>16]
325 l $s1,0($s1,$tbl) # Te0[s1>>24]
326 x $t2,1($i2,$tbl) # Te3[s1>>0]
327 x $t3,2($i3,$tbl) # Te2[s1>>8]
329 srlg $i1,$s2,`8-3` # i0
330 srlg $i2,$s2,`16-3` # i1
339 srlg $ra,$s3,`8-3` # i1
340 sllg $t1,$s3,`0+3` # i0
345 x $s0,2($i1,$tbl) # Te2[s2>>8]
346 x $s1,3($i2,$tbl) # Te1[s2>>16]
347 l $s2,0($s2,$tbl) # Te0[s2>>24]
348 x $t3,1($i3,$tbl) # Te3[s2>>0]
350 srlg $i3,$s3,`16-3` # i2
361 x $s0,1($t1,$tbl) # Te3[s3>>0]
362 x $s1,2($ra,$tbl) # Te2[s3>>8]
363 x $s2,3($i3,$tbl) # Te1[s3>>16]
364 l $s3,0($s3,$tbl) # Te0[s3>>24]
367 brct $rounds,.Lenc_loop
379 srlg $i1,$s1,`16-3` # i0
388 llgc $s0,2($s0,$tbl) # Te4[s0>>24]
389 llgc $t1,2($t1,$tbl) # Te4[s0>>0]
391 llgc $t2,2($t2,$tbl) # Te4[s0>>8]
392 llgc $t3,2($t3,$tbl) # Te4[s0>>16]
396 llgc $i1,2($i1,$tbl) # Te4[s1>>16]
397 llgc $s1,2($s1,$tbl) # Te4[s1>>24]
398 llgc $i2,2($i2,$tbl) # Te4[s1>>0]
399 llgc $i3,2($i3,$tbl) # Te4[s1>>8]
408 srlg $i1,$s2,`8-3` # i0
409 srlg $i2,$s2,`16-3` # i1
417 sllg $t1,$s3,`0+3` # i0
418 srlg $ra,$s3,`8-3` # i1
421 llgc $i1,2($i1,$tbl) # Te4[s2>>8]
422 llgc $i2,2($i2,$tbl) # Te4[s2>>16]
424 llgc $s2,2($s2,$tbl) # Te4[s2>>24]
425 llgc $i3,2($i3,$tbl) # Te4[s2>>0]
434 srlg $i3,$s3,`16-3` # i2
442 llgc $i1,2($t1,$tbl) # Te4[s3>>0]
443 llgc $i2,2($ra,$tbl) # Te4[s3>>8]
444 llgc $i3,2($i3,$tbl) # Te4[s3>>16]
445 llgc $s3,2($s3,$tbl) # Te4[s3>>24]
454 l${g} $ra,15*$SIZE_T($sp)
461 .size _s390x_AES_encrypt,.-_s390x_AES_encrypt
465 .type AES_Td,\@object
470 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
471 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
472 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
473 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
474 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
475 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
476 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
477 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
478 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
479 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
480 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
481 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
482 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
483 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
484 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
485 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
486 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
487 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
488 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
489 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
490 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
491 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
492 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
493 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
494 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
495 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
496 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
497 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
498 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
499 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
500 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
501 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
502 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
503 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
504 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
505 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
506 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
507 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
508 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
509 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
510 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
511 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
512 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
513 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
514 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
515 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
516 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
517 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
518 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
519 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
520 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
521 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
522 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
523 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
524 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
525 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
526 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
527 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
528 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
529 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
530 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
531 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
532 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
533 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
536 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
537 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
538 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
539 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
540 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
541 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
542 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
543 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
544 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
545 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
546 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
547 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
548 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
549 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
550 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
551 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
552 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
553 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
554 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
555 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
556 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
557 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
558 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
559 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
560 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
561 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
562 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
563 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
564 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
565 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
566 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
567 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
568 .size AES_Td,.-AES_Td
570 # void AES_decrypt(const unsigned char *inp, unsigned char *out,
571 # const AES_KEY *key) {
573 .type AES_decrypt,\@function
576 $code.=<<___ if (!$softonly);
585 lghi %r3,16 # single block length
586 .long 0xb92e0042 # km %r4,%r2
587 brc 1,.-4 # can this happen?
593 stm${g} %r3,$ra,3*$SIZE_T($sp)
601 bras $ra,_s390x_AES_decrypt
603 l${g} $out,3*$SIZE_T($sp)
609 lm${g} %r6,$ra,6*$SIZE_T($sp)
611 .size AES_decrypt,.-AES_decrypt
613 .type _s390x_AES_decrypt,\@function
616 st${g} $ra,15*$SIZE_T($sp)
622 llill $mask,`0xff<<3`
636 sllg $i1,$s1,`0+3` # i0
645 l $s0,0($s0,$tbl) # Td0[s0>>24]
646 l $t1,3($t1,$tbl) # Td1[s0>>16]
647 l $t2,2($t2,$tbl) # Td2[s0>>8]
648 l $t3,1($t3,$tbl) # Td3[s0>>0]
650 x $s0,1($i1,$tbl) # Td3[s1>>0]
651 l $s1,0($s1,$tbl) # Td0[s1>>24]
652 x $t2,3($i2,$tbl) # Td1[s1>>16]
653 x $t3,2($i3,$tbl) # Td2[s1>>8]
655 srlg $i1,$s2,`8-3` # i0
656 sllg $i2,$s2,`0+3` # i1
665 srlg $ra,$s3,`8-3` # i1
666 srlg $t1,$s3,`16-3` # i0
671 x $s0,2($i1,$tbl) # Td2[s2>>8]
672 x $s1,1($i2,$tbl) # Td3[s2>>0]
673 l $s2,0($s2,$tbl) # Td0[s2>>24]
674 x $t3,3($i3,$tbl) # Td1[s2>>16]
676 sllg $i3,$s3,`0+3` # i2
687 x $s0,3($t1,$tbl) # Td1[s3>>16]
688 x $s1,2($ra,$tbl) # Td2[s3>>8]
689 x $s2,1($i3,$tbl) # Td3[s3>>0]
690 l $s3,0($s3,$tbl) # Td0[s3>>24]
693 brct $rounds,.Ldec_loop
696 l $t1,`2048+0`($tbl) # prefetch Td4
697 l $t2,`2048+64`($tbl)
698 l $t3,`2048+128`($tbl)
699 l $i1,`2048+192`($tbl)
716 llgc $i3,2048($i3,$tbl) # Td4[s0>>24]
717 llgc $t1,2048($t1,$tbl) # Td4[s0>>16]
718 llgc $t2,2048($t2,$tbl) # Td4[s0>>8]
720 llgc $t3,2048($s0,$tbl) # Td4[s0>>0]
724 llgc $s1,2048($s1,$tbl) # Td4[s1>>0]
725 llgc $i1,2048($i1,$tbl) # Td4[s1>>24]
726 llgc $i2,2048($i2,$tbl) # Td4[s1>>16]
728 llgc $i3,2048($ra,$tbl) # Td4[s1>>8]
742 llgc $i1,2048($i1,$tbl) # Td4[s2>>8]
743 llgc $s1,2048($s2,$tbl) # Td4[s2>>0]
744 llgc $i2,2048($i2,$tbl) # Td4[s2>>24]
745 llgc $i3,2048($i3,$tbl) # Td4[s2>>16]
760 l${g} $ra,15*$SIZE_T($sp)
765 llgc $i1,2048($i1,$tbl) # Td4[s3>>16]
766 llgc $i2,2048($i2,$tbl) # Td4[s3>>8]
768 llgc $s2,2048($s3,$tbl) # Td4[s3>>0]
769 llgc $s3,2048($i3,$tbl) # Td4[s3>>24]
783 .size _s390x_AES_decrypt,.-_s390x_AES_decrypt
787 # void AES_set_encrypt_key(const unsigned char *in, int bits,
789 .globl AES_set_encrypt_key
790 .type AES_set_encrypt_key,\@function
793 _s390x_AES_set_encrypt_key:
815 $code.=<<___ if (!$softonly);
816 # convert bits to km(c) code, [128,192,256]->[18,19,20]
823 larl %r1,OPENSSL_s390xcap_P
826 ng %r0,32(%r1) # check availability of both km...
827 ng %r0,48(%r1) # ...and kmc support for given key length
830 lmg %r0,%r1,0($inp) # just copy 128 bits...
840 1: st $bits,236($key) # save bits [for debugging purposes]
842 st %r5,240($key) # save km(c) code
849 stm${g} %r4,%r13,4*$SIZE_T($sp) # all non-volatile regs and $key
851 larl $tbl,AES_Te+2048
870 llgfr $t2,$s3 # temp=rk[3]
884 icm $t2,2,0($t2) # Te4[rk[3]>>0]<<8
885 icm $t2,4,0($i1) # Te4[rk[3]>>8]<<16
886 icm $t2,8,0($i2) # Te4[rk[3]>>16]<<24
887 icm $t2,1,0($i3) # Te4[rk[3]>>24]
888 x $t2,256($t3,$tbl) # rcon[i]
889 xr $s0,$t2 # rk[4]=rk[0]^...
890 xr $s1,$s0 # rk[5]=rk[1]^rk[4]
891 xr $s2,$s1 # rk[6]=rk[2]^rk[5]
892 xr $s3,$s2 # rk[7]=rk[3]^rk[6]
894 llgfr $t2,$s3 # temp=rk[3]
906 la $key,16($key) # key+=4
908 brct $rounds,.L128_loop
911 lm${g} %r4,%r13,4*$SIZE_T($sp)
943 icm $t1,2,0($t1) # Te4[rk[5]>>0]<<8
944 icm $t1,4,0($i1) # Te4[rk[5]>>8]<<16
945 icm $t1,8,0($i2) # Te4[rk[5]>>16]<<24
946 icm $t1,1,0($i3) # Te4[rk[5]>>24]
947 x $t1,256($t3,$tbl) # rcon[i]
948 xr $s0,$t1 # rk[6]=rk[0]^...
949 xr $s1,$s0 # rk[7]=rk[1]^rk[6]
950 xr $s2,$s1 # rk[8]=rk[2]^rk[7]
951 xr $s3,$s2 # rk[9]=rk[3]^rk[8]
957 brct $rounds,.L192_continue
960 lm${g} %r4,%r13,4*$SIZE_T($sp)
966 x $t1,16($key) # rk[10]=rk[4]^rk[9]
968 x $t1,20($key) # rk[11]=rk[5]^rk[10]
978 la $key,24($key) # key+=6
1007 icm $t1,2,0($t1) # Te4[rk[7]>>0]<<8
1008 icm $t1,4,0($i1) # Te4[rk[7]>>8]<<16
1009 icm $t1,8,0($i2) # Te4[rk[7]>>16]<<24
1010 icm $t1,1,0($i3) # Te4[rk[7]>>24]
1011 x $t1,256($t3,$tbl) # rcon[i]
1012 xr $s0,$t1 # rk[8]=rk[0]^...
1013 xr $s1,$s0 # rk[9]=rk[1]^rk[8]
1014 xr $s2,$s1 # rk[10]=rk[2]^rk[9]
1015 xr $s3,$s2 # rk[11]=rk[3]^rk[10]
1020 brct $rounds,.L256_continue
1023 lm${g} %r4,%r13,4*$SIZE_T($sp)
1028 lgr $t1,$s3 # temp=rk[11]
1039 llgc $t1,0($t1) # Te4[rk[11]>>0]
1040 icm $t1,2,0($i1) # Te4[rk[11]>>8]<<8
1041 icm $t1,4,0($i2) # Te4[rk[11]>>16]<<16
1042 icm $t1,8,0($i3) # Te4[rk[11]>>24]<<24
1043 x $t1,16($key) # rk[12]=rk[4]^...
1045 x $t1,20($key) # rk[13]=rk[5]^rk[12]
1047 x $t1,24($key) # rk[14]=rk[6]^rk[13]
1049 x $t1,28($key) # rk[15]=rk[7]^rk[14]
1059 la $key,32($key) # key+=8
1066 .size AES_set_encrypt_key,.-AES_set_encrypt_key
1068 # void AES_set_decrypt_key(const unsigned char *in, int bits,
1070 .globl AES_set_decrypt_key
1071 .type AES_set_decrypt_key,\@function
1073 AES_set_decrypt_key:
1074 #st${g} $key,4*$SIZE_T($sp) # I rely on AES_set_encrypt_key to
1075 st${g} $ra,14*$SIZE_T($sp) # save non-volatile registers and $key!
1076 bras $ra,_s390x_AES_set_encrypt_key
1077 #l${g} $key,4*$SIZE_T($sp)
1078 l${g} $ra,14*$SIZE_T($sp)
1082 $code.=<<___ if (!$softonly);
1087 oill $t0,0x80 # set "decrypt" bit
1093 .Lgo: lgr $rounds,$t0 #llgf $rounds,240($key)
1101 .Linv: lmg $s0,$s1,0($i1)
1113 llgf $rounds,240($key)
1115 sll $rounds,2 # (rounds-1)*4
1116 llilh $mask80,0x8080
1117 llilh $mask1b,0x1b1b
1118 llilh $maskfe,0xfefe
1124 .Lmix: l $s0,16($key) # tp1
1152 xr $s1,$s0 # tp2^tp1
1153 xr $s2,$s0 # tp4^tp1
1154 rll $s0,$s0,24 # = ROTATE(tp1,8)
1156 xr $s0,$s1 # ^=tp2^tp1
1157 xr $s1,$s3 # tp2^tp1^tp8
1158 xr $s0,$s2 # ^=tp4^tp1^tp8
1161 xr $s0,$s1 # ^= ROTATE(tp8^tp2^tp1,24)
1163 xr $s0,$s2 # ^= ROTATE(tp8^tp4^tp1,16)
1164 xr $s0,$s3 # ^= ROTATE(tp8,8)
1170 lm${g} %r6,%r13,6*$SIZE_T($sp)# as was saved by AES_set_encrypt_key!
1173 .size AES_set_decrypt_key,.-AES_set_decrypt_key
1176 ########################################################################
1177 # void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
1178 # size_t length, const AES_KEY *key,
1179 # unsigned char *ivec, const int enc)
1182 my $out="%r4"; # length and out are swapped
1188 .globl AES_cbc_encrypt
1189 .type AES_cbc_encrypt,\@function
1192 xgr %r3,%r4 # flip %r3 and %r4, out and len
1196 $code.=<<___ if (!$softonly);
1201 lg %r0,0($ivp) # copy ivec
1203 stmg %r0,%r1,16($sp)
1204 lmg %r0,%r1,0($key) # copy key, cover 256 bit
1205 stmg %r0,%r1,32($sp)
1206 lmg %r0,%r1,16($key)
1207 stmg %r0,%r1,48($sp)
1208 l %r0,240($key) # load kmc code
1209 lghi $key,15 # res=len%16, len-=res;
1212 la %r1,16($sp) # parameter block - ivec || key
1214 .long 0xb92f0042 # kmc %r4,%r2
1215 brc 1,.-4 # pay attention to "partial completion"
1219 lmg %r0,%r1,16($sp) # copy ivec to caller
1225 ahi $key,-1 # it's the way it's encoded in mvc
1227 jnz .Lkmc_truncated_dec
1229 stg %r1,16*$SIZE_T($sp)
1230 stg %r1,16*$SIZE_T+8($sp)
1232 mvc 16*$SIZE_T(1,$sp),0($inp)
1234 la %r1,16($sp) # restore parameter block
1235 la $inp,16*$SIZE_T($sp)
1237 .long 0xb92f0042 # kmc %r4,%r2
1240 .Lkmc_truncated_dec:
1241 st${g} $out,4*$SIZE_T($sp)
1242 la $out,16*$SIZE_T($sp)
1244 .long 0xb92f0042 # kmc %r4,%r2
1245 l${g} $out,4*$SIZE_T($sp)
1247 mvc 0(1,$out),16*$SIZE_T($sp)
1254 stm${g} $key,$ra,5*$SIZE_T($sp)
1256 cl %r0,`$stdframe+$SIZE_T-4`($sp)
1268 brc 4,.Lcbc_enc_tail # if borrow
1270 stm${g} $inp,$out,2*$SIZE_T($sp)
1277 bras $ra,_s390x_AES_encrypt
1279 lm${g} $inp,$key,2*$SIZE_T($sp)
1291 brc 4,.Lcbc_enc_tail # if borrow
1295 l${g} $ivp,6*$SIZE_T($sp)
1301 lm${g} %r7,$ra,7*$SIZE_T($sp)
1308 stg $t0,16*$SIZE_T($sp)
1309 stg $t0,16*$SIZE_T+8($sp)
1311 mvc 16*$SIZE_T(1,$sp),0($inp)
1314 la $inp,16*$SIZE_T($sp)
1323 stmg $t0,$t1,16*$SIZE_T($sp)
1326 stm${g} $inp,$out,2*$SIZE_T($sp)
1333 bras $ra,_s390x_AES_decrypt
1335 lm${g} $inp,$key,2*$SIZE_T($sp)
1343 xg $s0,16*$SIZE_T($sp)
1344 xg $s2,16*$SIZE_T+8($sp)
1347 brc 4,.Lcbc_dec_tail # if borrow
1348 brc 2,.Lcbc_dec_done # if zero
1351 stmg $t0,$t1,16*$SIZE_T($sp)
1361 lm${g} %r6,$ra,6*$SIZE_T($sp)
1362 stmg $t0,$t1,0($ivp)
1369 stg $s0,16*$SIZE_T($sp)
1370 stg $s2,16*$SIZE_T+8($sp)
1372 mvc 0(1,$out),16*$SIZE_T($sp)
1375 .size AES_cbc_encrypt,.-AES_cbc_encrypt
1378 ########################################################################
1379 # void AES_ctr32_encrypt(const unsigned char *in, unsigned char *out,
1380 # size_t blocks, const AES_KEY *key,
1381 # const unsigned char *ivec)
1384 my $out="%r4"; # blocks and out are swapped
1386 my $key="%r5"; my $iv0="%r5";
1391 .globl AES_ctr32_encrypt
1392 .type AES_ctr32_encrypt,\@function
1395 xgr %r3,%r4 # flip %r3 and %r4, $out and $len
1398 llgfr $len,$len # safe in ctr32 subroutine even in 64-bit case
1400 $code.=<<___ if (!$softonly);
1406 stm${g} %r6,$s3,6*$SIZE_T($sp)
1409 la %r1,0($key) # %r1 is permanent copy of $key
1410 lg $iv0,0($ivp) # load ivec
1413 # prepare and allocate stack frame at the top of 4K page
1414 # with 1K reserved for eventual signal handling
1415 lghi $s0,-1024-256-16# guarantee at least 256-bytes buffer
1419 ngr $s0,$s1 # align at page boundary
1420 slgr $fp,$s0 # total buffer size
1422 lghi $s1,1024+16 # sl[g]fi is extended-immediate facility
1423 slgr $fp,$s1 # deduct reservation to get usable buffer size
1424 # buffer size is at lest 256 and at most 3072+256-16
1426 la $sp,1024($s0) # alloca
1427 srlg $fp,$fp,4 # convert bytes to blocks, minimum 16
1428 st${g} $s2,0($sp) # back-chain
1429 st${g} $fp,$SIZE_T($sp)
1432 brc 1,.Lctr32_hw_switch # not zero, no borrow
1433 algr $fp,$len # input is shorter than allocated buffer
1435 st${g} $fp,$SIZE_T($sp)
1439 $code.=<<___ if (!$softonly && 0);# kmctr code was measured to be ~12% slower
1442 larl %r1,OPENSSL_s390xcap_P
1443 llihh %r0,0x8000 # check if kmctr supports the function code
1445 ng %r0,64(%r1) # check kmctr capability vector
1451 algr $out,$inp # restore $out
1452 lgr $s1,$len # $s1 undertakes $len
1453 j .Lctr32_kmctr_loop
1458 .Lctr32_kmctr_prepare:
1462 ahi $ivp,1 # 32-bit increment, preserves upper half
1463 brct $s3,.Lctr32_kmctr_prepare
1465 #la $inp,0($inp) # inp
1466 sllg $len,$fp,4 # len
1467 #la $out,0($out) # out
1469 .long 0xb92da042 # kmctr $out,$s2,$inp
1470 brc 1,.-4 # pay attention to "partial completion"
1473 brc 1,.Lctr32_kmctr_loop # not zero, no borrow
1476 brc 4+1,.Lctr32_kmctr_loop # not zero
1479 lm${g} %r6,$s3,6*$SIZE_T($sp)
1483 $code.=<<___ if (!$softonly);
1491 ahi $ivp,1 # 32-bit increment, preserves upper half
1492 brct $s3,.Lctr32_km_prepare
1494 la $s0,16($sp) # inp
1495 sllg $s1,$fp,4 # len
1496 la $s2,16($sp) # out
1497 .long 0xb92e00a8 # km %r10,%r8
1498 brc 1,.-4 # pay attention to "partial completion"
1508 stg $s0,0($out,$inp)
1509 stg $s1,8($out,$inp)
1511 brct $s3,.Lctr32_km_xor
1514 brc 1,.Lctr32_km_loop # not zero, no borrow
1517 brc 4+1,.Lctr32_km_loop # not zero
1520 l${g} $s1,$SIZE_T($sp)
1526 brct $s1,.Lctr32_km_zap
1529 lm${g} %r6,$s3,6*$SIZE_T($sp)
1535 stm${g} $key,$ra,5*$SIZE_T($sp)
1541 stm${g} $inp,$out,2*$SIZE_T($sp)
1546 st $t1,16*$SIZE_T($sp)
1549 bras $ra,_s390x_AES_encrypt
1551 lm${g} $inp,$ivp,2*$SIZE_T($sp)
1552 llgf $t1,16*$SIZE_T($sp)
1560 ahi $t1,1 # 32-bit increment
1561 brct $len,.Lctr32_loop
1563 lm${g} %r6,$ra,6*$SIZE_T($sp)
1565 .size AES_ctr32_encrypt,.-AES_ctr32_encrypt
1569 ########################################################################
1570 # void AES_xts_encrypt(const unsigned char *inp, unsigned char *out,
1571 # size_t len, const AES_KEY *key1, const AES_KEY *key2,
1572 # const unsigned char iv[16]);
1576 my $out="%r4"; # len and out are swapped
1578 my $key1="%r5"; # $i1
1579 my $key2="%r6"; # $i2
1581 my $tweak=16*$SIZE_T+16; # or $stdframe-16, bottom of the frame...
1584 .type _s390x_xts_km,\@function
1589 llgfr $s0,%r0 # put aside the function code
1592 larl %r1,OPENSSL_s390xcap_P
1594 srlg %r0,%r0,32($s1) # check for 32+function code
1595 ng %r0,32(%r1) # check km capability vector
1596 lgr %r0,$s0 # restore the function code
1597 la %r1,0($key1) # restore $key1
1600 lmg $i2,$i3,$tweak($sp) # put aside the tweak value
1603 oill %r0,32 # switch to xts function code
1605 sllg $s1,$s1,3 # (function code - 18)*8, 0 or 16
1606 la %r1,$tweak-16($sp)
1607 slgr %r1,$s1 # parameter block position
1608 lmg $s0,$s3,0($key1) # load 256 bits of key material,
1609 stmg $s0,$s3,0(%r1) # and copy it to parameter block.
1610 # yes, it contains junk and overlaps
1611 # with the tweak in 128-bit case.
1612 # it's done to avoid conditional
1614 stmg $i2,$i3,$tweak($sp) # "re-seat" the tweak value
1616 .long 0xb92e0042 # km %r4,%r2
1617 brc 1,.-4 # pay attention to "partial completion"
1619 lrvg $s0,$tweak+0($sp) # load the last tweak
1620 lrvg $s1,$tweak+8($sp)
1621 stmg %r0,%r3,$tweak-32($sp) # wipe copy of the key
1623 nill %r0,0xffdf # switch back to original function code
1624 la %r1,0($key1) # restore pointer to $key1
1627 llgc $len,2*$SIZE_T-1($sp)
1628 nill $len,0x0f # $len%=16
1635 # prepare and allocate stack frame at the top of 4K page
1636 # with 1K reserved for eventual signal handling
1637 lghi $s0,-1024-256-16# guarantee at least 256-bytes buffer
1641 ngr $s0,$s1 # align at page boundary
1642 slgr $fp,$s0 # total buffer size
1644 lghi $s1,1024+16 # sl[g]fi is extended-immediate facility
1645 slgr $fp,$s1 # deduct reservation to get usable buffer size
1646 # buffer size is at lest 256 and at most 3072+256-16
1648 la $sp,1024($s0) # alloca
1649 nill $fp,0xfff0 # round to 16*n
1650 st${g} $s2,0($sp) # back-chain
1651 nill $len,0xfff0 # redundant
1652 st${g} $fp,$SIZE_T($sp)
1655 brc 1,.Lxts_km_go # not zero, no borrow
1656 algr $fp,$len # input is shorter than allocated buffer
1658 st${g} $fp,$SIZE_T($sp)
1661 lrvg $s0,$tweak+0($s2) # load the tweak value in little-endian
1662 lrvg $s1,$tweak+8($s2)
1664 la $s2,16($sp) # vector of ascending tweak values
1675 srag $i2,$s1,63 # broadcast upper bit
1681 lrvgr $i1,$s0 # flip byte order
1687 stg $i1,0($out,$inp)
1688 stg $i2,8($out,$inp)
1690 brct $s3,.Lxts_km_prepare
1692 slgr $inp,$fp # rewind $inp
1695 .long 0xb92e00aa # km $s2,$s2
1696 brc 1,.-4 # pay attention to "partial completion"
1706 stg $i1,0($out,$inp)
1707 stg $i2,8($out,$inp)
1709 brct $s3,.Lxts_km_xor
1712 brc 1,.Lxts_km_loop # not zero, no borrow
1715 brc 4+1,.Lxts_km_loop # not zero
1717 l${g} $i1,0($sp) # back-chain
1718 llgf $fp,`2*$SIZE_T-4`($sp) # bytes used
1725 brct $fp,.Lxts_km_zap
1728 llgc $len,2*$SIZE_T-1($i1)
1729 nill $len,0x0f # $len%=16
1732 # generate one more tweak...
1734 srag $i2,$s1,63 # broadcast upper bit
1740 ltr $len,$len # clear zero flag
1742 .size _s390x_xts_km,.-_s390x_xts_km
1744 .globl AES_xts_encrypt
1745 .type AES_xts_encrypt,\@function
1748 xgr %r3,%r4 # flip %r3 and %r4, $out and $len
1752 $code.=<<___ if ($SIZE_T==4);
1756 st${g} $len,1*$SIZE_T($sp) # save copy of $len
1757 srag $len,$len,4 # formally wrong, because it expands
1758 # sign byte, but who can afford asking
1759 # to process more than 2^63-1 bytes?
1760 # I use it, because it sets condition
1762 bcr 8,$ra # abort if zero (i.e. less than 16)
1764 $code.=<<___ if (!$softonly);
1768 jl .Lxts_enc_software
1770 st${g} $ra,5*$SIZE_T($sp)
1771 stm${g} %r6,$s3,6*$SIZE_T($sp)
1773 sllg $len,$len,4 # $len&=~15
1776 # generate the tweak value
1777 l${g} $s3,$stdframe($sp) # pointer to iv
1782 la %r1,0($key2) # $key2 is not needed anymore
1783 .long 0xb92e00aa # km $s2,$s2, generate the tweak
1784 brc 1,.-4 # can this happen?
1787 la %r1,0($key1) # $key1 is not needed anymore
1788 bras $ra,_s390x_xts_km
1789 jz .Lxts_enc_km_done
1791 aghi $inp,-16 # take one step back
1792 la $i3,0($out,$inp) # put aside real $out
1795 llgc $i2,0($out,$inp)
1796 stc $i1,0($out,$inp)
1797 stc $i2,16($out,$inp)
1799 brct $len,.Lxts_enc_km_steal
1803 lrvgr $i1,$s0 # flip byte order
1809 .long 0xb92e00aa # km $s2,$s2
1810 brc 1,.-4 # can this happen?
1811 lrvgr $i1,$s0 # flip byte order
1819 stg $sp,$tweak+0($sp) # wipe tweak
1820 stg $sp,$tweak+8($sp)
1821 l${g} $ra,5*$SIZE_T($sp)
1822 lm${g} %r6,$s3,6*$SIZE_T($sp)
1828 stm${g} %r6,$ra,6*$SIZE_T($sp)
1832 l${g} $s3,$stdframe($sp) # ivp
1833 llgf $s0,0($s3) # load iv
1837 stm${g} %r2,%r5,2*$SIZE_T($sp)
1840 bras $ra,_s390x_AES_encrypt # generate the tweak
1841 lm${g} %r2,%r5,2*$SIZE_T($sp)
1842 stm $s0,$s3,$tweak($sp) # save the tweak
1847 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
1848 lrvg $s3,$tweak+8($sp)
1850 srag %r0,$s3,63 # broadcast upper bit
1855 lrvgr $s1,$s1 # flip byte order
1857 srlg $s0,$s1,32 # smash the tweak to 4x32-bits
1858 stg $s1,$tweak+0($sp) # save the tweak
1861 stg $s3,$tweak+8($sp)
1863 la $inp,16($inp) # $inp+=16
1865 x $s0,0($inp) # ^=*($inp)
1869 stm${g} %r2,%r3,2*$SIZE_T($sp) # only two registers are changing
1871 bras $ra,_s390x_AES_encrypt
1872 lm${g} %r2,%r5,2*$SIZE_T($sp)
1873 x $s0,$tweak+0($sp) # ^=tweak
1876 x $s3,$tweak+12($sp)
1880 st $s3,12($out,$inp)
1881 brct${g} $len,.Lxts_enc_loop
1883 llgc $len,`2*$SIZE_T-1`($sp)
1884 nill $len,0x0f # $len%16
1887 la $i3,0($inp,$out) # put aside real $out
1890 llgc %r1,0($out,$inp)
1891 stc %r0,0($out,$inp)
1892 stc %r1,16($out,$inp)
1894 brct $len,.Lxts_enc_steal
1895 la $out,0($i3) # restore real $out
1897 # generate last tweak...
1898 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
1899 lrvg $s3,$tweak+8($sp)
1901 srag %r0,$s3,63 # broadcast upper bit
1906 lrvgr $s1,$s1 # flip byte order
1908 srlg $s0,$s1,32 # smash the tweak to 4x32-bits
1909 stg $s1,$tweak+0($sp) # save the tweak
1912 stg $s3,$tweak+8($sp)
1915 x $s0,0($out) # ^=*(inp)|stolen cipther-text
1919 st${g} $out,4*$SIZE_T($sp)
1921 bras $ra,_s390x_AES_encrypt
1922 l${g} $out,4*$SIZE_T($sp)
1923 x $s0,`$tweak+0`($sp) # ^=tweak
1924 x $s1,`$tweak+4`($sp)
1925 x $s2,`$tweak+8`($sp)
1926 x $s3,`$tweak+12`($sp)
1933 stg $sp,$tweak+0($sp) # wipe tweak
1934 stg $sp,$twesk+8($sp)
1935 lm${g} %r6,$ra,6*$SIZE_T($sp)
1937 .size AES_xts_encrypt,.-AES_xts_encrypt
1939 # void AES_xts_decrypt(const unsigned char *inp, unsigned char *out,
1940 # size_t len, const AES_KEY *key1, const AES_KEY *key2,
1941 # const unsigned char iv[16]);
1944 .globl AES_xts_decrypt
1945 .type AES_xts_decrypt,\@function
1948 xgr %r3,%r4 # flip %r3 and %r4, $out and $len
1952 $code.=<<___ if ($SIZE_T==4);
1956 st${g} $len,1*$SIZE_T($sp) # save copy of $len
1958 bcr 4,$ra # abort if less than zero. formally
1959 # wrong, because $len is unsigned,
1960 # but who can afford asking to
1961 # process more than 2^63-1 bytes?
1963 jnz .Lxts_dec_proceed
1967 $code.=<<___ if (!$softonly);
1971 jl .Lxts_dec_software
1973 st${g} $ra,5*$SIZE_T($sp)
1974 stm${g} %r6,$s3,6*$SIZE_T($sp)
1976 nill $len,0xfff0 # $len&=~15
1979 # generate the tweak value
1980 l${g} $s3,$stdframe($sp) # pointer to iv
1985 la %r1,0($key2) # $key2 is not needed past this point
1986 .long 0xb92e00aa # km $s2,$s2, generate the tweak
1987 brc 1,.-4 # can this happen?
1990 la %r1,0($key1) # $key1 is not needed anymore
1993 jz .Lxts_dec_km_short
1994 bras $ra,_s390x_xts_km
1995 jz .Lxts_dec_km_done
1997 lrvgr $s2,$s0 # make copy in reverse byte order
1999 j .Lxts_dec_km_2ndtweak
2002 llgc $len,`2*$SIZE_T-1`($sp)
2003 nill $len,0x0f # $len%=16
2004 lrvg $s0,$tweak+0($sp) # load the tweak
2005 lrvg $s1,$tweak+8($sp)
2006 lrvgr $s2,$s0 # make copy in reverse byte order
2009 .Lxts_dec_km_2ndtweak:
2011 srag $i2,$s1,63 # broadcast upper bit
2016 lrvgr $i1,$s0 # flip byte order
2021 stg $i1,0($out,$inp)
2022 stg $i2,8($out,$inp)
2025 .long 0xb92e0066 # km $i2,$i2
2026 brc 1,.-4 # can this happen?
2031 stg $i1,0($out,$inp)
2032 stg $i2,8($out,$inp)
2034 la $i3,0($out,$inp) # put aside real $out
2037 llgc $i2,0($out,$inp)
2038 stc $i1,0($out,$inp)
2039 stc $i2,16($out,$inp)
2041 brct $len,.Lxts_dec_km_steal
2051 .long 0xb92e0088 # km $s0,$s0
2052 brc 1,.-4 # can this happen?
2058 stg $sp,$tweak+0($sp) # wipe tweak
2059 stg $sp,$tweak+8($sp)
2060 l${g} $ra,5*$SIZE_T($sp)
2061 lm${g} %r6,$s3,6*$SIZE_T($sp)
2067 stm${g} %r6,$ra,6*$SIZE_T($sp)
2072 l${g} $s3,$stdframe($sp) # ivp
2073 llgf $s0,0($s3) # load iv
2077 stm${g} %r2,%r5,2*$SIZE_T($sp)
2080 bras $ra,_s390x_AES_encrypt # generate the tweak
2081 lm${g} %r2,%r5,2*$SIZE_T($sp)
2084 stm $s0,$s3,$tweak($sp) # save the tweak
2090 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
2091 lrvg $s3,$tweak+8($sp)
2093 srag %r0,$s3,63 # broadcast upper bit
2098 lrvgr $s1,$s1 # flip byte order
2100 srlg $s0,$s1,32 # smash the tweak to 4x32-bits
2101 stg $s1,$tweak+0($sp) # save the tweak
2104 stg $s3,$tweak+8($sp)
2107 x $s0,0($inp) # tweak^=*(inp)
2111 stm${g} %r2,%r3,2*$SIZE_T($sp) # only two registers are changing
2113 bras $ra,_s390x_AES_decrypt
2114 lm${g} %r2,%r5,2*$SIZE_T($sp)
2115 x $s0,$tweak+0($sp) # ^=tweak
2118 x $s3,$tweak+12($sp)
2122 st $s3,12($out,$inp)
2124 brct${g} $len,.Lxts_dec_loop
2126 llgc $len,`2*$SIZE_T-1`($sp)
2127 nill $len,0x0f # $len%16
2130 # generate pair of tweaks...
2131 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
2132 lrvg $s3,$tweak+8($sp)
2134 srag %r0,$s3,63 # broadcast upper bit
2139 lrvgr $i2,$s1 # flip byte order
2141 stmg $i2,$i3,$tweak($sp) # save the 1st tweak
2142 j .Lxts_dec_2ndtweak
2146 llgc $len,`2*$SIZE_T-1`($sp)
2147 nill $len,0x0f # $len%16
2148 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
2149 lrvg $s3,$tweak+8($sp)
2152 srag %r0,$s3,63 # broadcast upper bit
2157 lrvgr $s1,$s1 # flip byte order
2159 srlg $s0,$s1,32 # smash the tweak to 4x32-bits
2160 stg $s1,$tweak-16+0($sp) # save the 2nd tweak
2163 stg $s3,$tweak-16+8($sp)
2166 x $s0,0($inp) # tweak_the_2nd^=*(inp)
2170 stm${g} %r2,%r3,2*$SIZE_T($sp)
2172 bras $ra,_s390x_AES_decrypt
2173 lm${g} %r2,%r5,2*$SIZE_T($sp)
2174 x $s0,$tweak-16+0($sp) # ^=tweak_the_2nd
2175 x $s1,$tweak-16+4($sp)
2176 x $s2,$tweak-16+8($sp)
2177 x $s3,$tweak-16+12($sp)
2181 st $s3,12($out,$inp)
2183 la $i3,0($out,$inp) # put aside real $out
2186 llgc %r1,0($out,$inp)
2187 stc %r0,0($out,$inp)
2188 stc %r1,16($out,$inp)
2190 brct $len,.Lxts_dec_steal
2191 la $out,0($i3) # restore real $out
2193 lm $s0,$s3,$tweak($sp) # load the 1st tweak
2194 x $s0,0($out) # tweak^=*(inp)|stolen cipher-text
2198 st${g} $out,4*$SIZE_T($sp)
2200 bras $ra,_s390x_AES_decrypt
2201 l${g} $out,4*$SIZE_T($sp)
2202 x $s0,$tweak+0($sp) # ^=tweak
2205 x $s3,$tweak+12($sp)
2210 stg $sp,$tweak-16+0($sp) # wipe 2nd tweak
2211 stg $sp,$tweak-16+8($sp)
2213 stg $sp,$tweak+0($sp) # wipe tweak
2214 stg $sp,$twesk+8($sp)
2215 lm${g} %r6,$ra,6*$SIZE_T($sp)
2217 .size AES_xts_decrypt,.-AES_xts_decrypt
2221 .string "AES for s390x, CRYPTOGAMS by <appro\@openssl.org>"
2222 .comm OPENSSL_s390xcap_P,80,8
2225 $code =~ s/\`([^\`]*)\`/eval $1/gem;
2227 close STDOUT; # force flush