2 # Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the Apache License 2.0 (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. Rights for redistribution and usage in source and binary
13 # forms are granted according to the License.
14 # ====================================================================
18 # The major reason for undertaken effort was to mitigate the hazard of
19 # cache-timing attack. This is [currently and initially!] addressed in
20 # two ways. 1. S-boxes are compressed from 5KB to 2KB+256B size each.
21 # 2. References to them are scheduled for L2 cache latency, meaning
22 # that the tables don't have to reside in L1 cache. Once again, this
23 # is an initial draft and one should expect more countermeasures to
26 # Version 1.1 prefetches T[ed]4 in order to mitigate attack on last
29 # Even though performance was not the primary goal [on the contrary,
30 # extra shifts "induced" by compressed S-box and longer loop epilogue
31 # "induced" by scheduling for L2 have negative effect on performance],
32 # the code turned out to run in ~23 cycles per processed byte en-/
33 # decrypted with 128-bit key. This is pretty good result for code
34 # with mentioned qualities and UltraSPARC core. Compared to Sun C
35 # generated code my encrypt procedure runs just few percents faster,
36 # while decrypt one - whole 50% faster [yes, Sun C failed to generate
37 # optimal decrypt procedure]. Compared to GNU C generated code both
38 # procedures are more than 60% faster:-)
40 $output = pop and open STDOUT,">$output";
77 $rounds="%i7"; # aliases with return address, which is off-loaded to stack
81 while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
85 #include "sparc_arch.h"
88 .register %g2,#scratch
89 .register %g3,#scratch
91 .section ".text",#alloc,#execinstr
97 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
98 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
99 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
100 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
101 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
102 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
103 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
104 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
105 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
106 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
107 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
108 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
109 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
110 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
111 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
112 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
113 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
114 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
115 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
116 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
117 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
118 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
119 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
120 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
121 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
122 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
123 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
124 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
125 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
126 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
127 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
128 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
129 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
130 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
131 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
132 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
133 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
134 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
135 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
136 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
137 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
138 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
139 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
140 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
141 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
142 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
143 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
144 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
145 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
146 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
147 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
148 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
149 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
150 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
151 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
152 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
153 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
154 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
155 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
156 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
157 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
158 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
159 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
160 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
162 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
163 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
164 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
165 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
166 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
167 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
168 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
169 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
170 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
171 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
172 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
173 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
174 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
175 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
176 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
177 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
178 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
179 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
180 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
181 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
182 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
183 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
184 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
185 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
186 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
187 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
188 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
189 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
190 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
191 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
192 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
193 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
195 .size AES_Te,(.-AES_Te)
199 _sparcv9_AES_encrypt:
200 save %sp,-$frame-$locals,%sp
201 stx %i7,[%sp+$bias+$frame+0] ! off-load return address
202 ld [$key+240],$rounds
206 srl $rounds,1,$rounds
223 ldx [$tbl+$acc0],$acc0
226 ldx [$tbl+$acc1],$acc1
229 ldx [$tbl+$acc2],$acc2 !
232 ldx [$tbl+$acc3],$acc3
235 ldx [$tbl+$acc4],$acc4
239 ldx [$tbl+$acc5],$acc5
242 ldx [$tbl+$acc6],$acc6
245 ldx [$tbl+$acc7],$acc7 !
248 ldx [$tbl+$acc8],$acc8
250 and $acc10,2040,$acc10
251 ldx [$tbl+$acc9],$acc9
254 and $acc11,2040,$acc11
255 ldx [$tbl+$acc10],$acc10
257 and $acc12,2040,$acc12
258 ldx [$tbl+$acc11],$acc11
260 and $acc13,2040,$acc13
261 ldx [$tbl+$acc12],$acc12 !
263 and $acc14,2040,$acc14
264 ldx [$tbl+$acc13],$acc13
265 and $acc15,2040,$acc15
267 ldx [$tbl+$acc14],$acc14
269 subcc $rounds,1,$rounds !
270 ldx [$tbl+$acc15],$acc15
271 bz,a,pn %icc,.Lenc_last
272 add $tbl,2048,$rounds
278 srlx $acc2,16,$acc2 !
294 srlx $acc10,16,$acc10 !
296 srlx $acc11,24,$acc11
300 srlx $acc14,16,$acc14
302 srlx $acc15,24,$acc15 !
304 xor $acc12,$acc14,$acc14
311 and $acc0,2040,$acc0 !
314 ldx [$tbl+$acc0],$acc0
317 ldx [$tbl+$acc1],$acc1
321 ldx [$tbl+$acc2],$acc2
324 ldx [$tbl+$acc3],$acc3
327 ldx [$tbl+$acc4],$acc4 !
330 ldx [$tbl+$acc5],$acc5
333 ldx [$tbl+$acc6],$acc6
337 ldx [$tbl+$acc7],$acc7
340 ldx [$tbl+$acc8],$acc8
342 and $acc10,2040,$acc10
343 ldx [$tbl+$acc9],$acc9 !
345 and $acc11,2040,$acc11
346 ldx [$tbl+$acc10],$acc10
348 and $acc12,2040,$acc12
349 ldx [$tbl+$acc11],$acc11
352 and $acc13,2040,$acc13
353 ldx [$tbl+$acc12],$acc12
355 and $acc14,2040,$acc14
356 ldx [$tbl+$acc13],$acc13
358 and $acc15,2040,$acc15
359 ldx [$tbl+$acc14],$acc14 !
363 ldx [$tbl+$acc15],$acc15
379 ldx [$tbl+2048+0],%g0 ! prefetch te4
380 srlx $acc10,16,$acc10
382 ldx [$tbl+2048+32],%g0 ! prefetch te4
383 srlx $acc11,24,$acc11
385 ldx [$tbl+2048+64],%g0 ! prefetch te4
388 ldx [$tbl+2048+96],%g0 ! prefetch te4
389 srlx $acc14,16,$acc14 !
391 ldx [$tbl+2048+128],%g0 ! prefetch te4
392 srlx $acc15,24,$acc15
394 ldx [$tbl+2048+160],%g0 ! prefetch te4
397 ldx [$tbl+2048+192],%g0 ! prefetch te4
398 xor $acc12,$acc14,$acc14
400 ldx [$tbl+2048+224],%g0 ! prefetch te4
427 srlx $acc10,16,$acc10
429 srlx $acc11,24,$acc11
433 srlx $acc14,16,$acc14 !
435 srlx $acc15,24,$acc15
437 xor $acc12,$acc14,$acc14
446 ldub [$rounds+$acc0],$acc0
449 ldub [$rounds+$acc1],$acc1
452 ldub [$rounds+$acc2],$acc2
453 ldub [$rounds+$acc3],$acc3
456 ldub [$rounds+$acc4],$acc4
460 ldub [$rounds+$acc5],$acc5
463 ldub [$rounds+$acc6],$acc6
464 ldub [$rounds+$acc7],$acc7
468 ldub [$rounds+$acc8],$acc8
470 and $acc10,255,$acc10
471 ldub [$rounds+$acc9],$acc9
474 ldub [$rounds+$acc10],$acc10 !
476 and $acc13,255,$acc13
477 ldub [$rounds+$acc11],$acc11
478 ldub [$rounds+$acc12],$acc12
479 and $acc14,255,$acc14
480 ldub [$rounds+$acc13],$acc13
482 ldub [$rounds+$acc14],$acc14 !
486 ldub [$rounds+$acc15],$acc15
489 ldx [%sp+$bias+$frame+0],%i7 ! restore return address
507 sll $acc13,16,$acc13 !
511 xor $acc12,$acc14,$acc14
518 .type _sparcv9_AES_encrypt,#function
519 .size _sparcv9_AES_encrypt,(.-_sparcv9_AES_encrypt)
526 bnz,pn %xcc,.Lunaligned_enc
535 add %o7,AES_Te-1b,%o4
536 call _sparcv9_AES_encrypt
595 add %o7,AES_Te-1b,%o4
596 call _sparcv9_AES_encrypt
633 .type AES_encrypt,#function
634 .size AES_encrypt,(.-AES_encrypt)
643 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
644 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
645 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
646 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
647 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
648 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
649 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
650 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
651 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
652 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
653 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
654 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
655 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
656 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
657 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
658 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
659 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
660 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
661 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
662 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
663 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
664 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
665 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
666 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
667 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
668 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
669 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
670 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
671 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
672 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
673 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
674 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
675 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
676 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
677 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
678 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
679 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
680 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
681 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
682 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
683 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
684 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
685 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
686 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
687 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
688 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
689 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
690 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
691 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
692 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
693 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
694 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
695 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
696 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
697 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
698 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
699 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
700 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
701 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
702 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
703 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
704 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
705 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
706 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
708 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
709 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
710 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
711 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
712 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
713 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
714 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
715 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
716 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
717 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
718 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
719 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
720 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
721 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
722 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
723 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
724 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
725 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
726 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
727 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
728 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
729 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
730 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
731 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
732 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
733 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
734 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
735 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
736 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
737 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
738 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
739 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
741 .size AES_Td,(.-AES_Td)
745 _sparcv9_AES_decrypt:
746 save %sp,-$frame-$locals,%sp
747 stx %i7,[%sp+$bias+$frame+0] ! off-load return address
748 ld [$key+240],$rounds
753 srl $rounds,1,$rounds
769 ldx [$tbl+$acc0],$acc0
772 ldx [$tbl+$acc1],$acc1
775 ldx [$tbl+$acc2],$acc2 !
778 ldx [$tbl+$acc3],$acc3
781 ldx [$tbl+$acc4],$acc4
785 ldx [$tbl+$acc5],$acc5
788 ldx [$tbl+$acc6],$acc6
791 ldx [$tbl+$acc7],$acc7 !
794 ldx [$tbl+$acc8],$acc8
796 and $acc10,2040,$acc10
797 ldx [$tbl+$acc9],$acc9
800 and $acc11,2040,$acc11
801 ldx [$tbl+$acc10],$acc10
803 and $acc12,2040,$acc12
804 ldx [$tbl+$acc11],$acc11
806 and $acc13,2040,$acc13
807 ldx [$tbl+$acc12],$acc12 !
809 and $acc14,2040,$acc14
810 ldx [$tbl+$acc13],$acc13
811 and $acc15,2040,$acc15
813 ldx [$tbl+$acc14],$acc14
815 subcc $rounds,1,$rounds !
816 ldx [$tbl+$acc15],$acc15
817 bz,a,pn %icc,.Ldec_last
818 add $tbl,2048,$rounds
824 srlx $acc2,16,$acc2 !
840 srlx $acc10,16,$acc10 !
842 srlx $acc11,24,$acc11
846 srlx $acc14,16,$acc14
848 srlx $acc15,24,$acc15 !
850 xor $acc12,$acc14,$acc14
857 and $acc0,2040,$acc0 !
860 ldx [$tbl+$acc0],$acc0
863 ldx [$tbl+$acc1],$acc1
867 ldx [$tbl+$acc2],$acc2
870 ldx [$tbl+$acc3],$acc3
873 ldx [$tbl+$acc4],$acc4 !
876 ldx [$tbl+$acc5],$acc5
879 ldx [$tbl+$acc6],$acc6
883 ldx [$tbl+$acc7],$acc7
886 ldx [$tbl+$acc8],$acc8
888 and $acc10,2040,$acc10
889 ldx [$tbl+$acc9],$acc9 !
891 and $acc11,2040,$acc11
892 ldx [$tbl+$acc10],$acc10
894 and $acc12,2040,$acc12
895 ldx [$tbl+$acc11],$acc11
898 and $acc13,2040,$acc13
899 ldx [$tbl+$acc12],$acc12
901 and $acc14,2040,$acc14
902 ldx [$tbl+$acc13],$acc13
904 and $acc15,2040,$acc15
905 ldx [$tbl+$acc14],$acc14 !
909 ldx [$tbl+$acc15],$acc15
925 ldx [$tbl+2048+0],%g0 ! prefetch td4
926 srlx $acc10,16,$acc10
928 ldx [$tbl+2048+32],%g0 ! prefetch td4
929 srlx $acc11,24,$acc11
931 ldx [$tbl+2048+64],%g0 ! prefetch td4
934 ldx [$tbl+2048+96],%g0 ! prefetch td4
935 srlx $acc14,16,$acc14 !
937 ldx [$tbl+2048+128],%g0 ! prefetch td4
938 srlx $acc15,24,$acc15
940 ldx [$tbl+2048+160],%g0 ! prefetch td4
943 ldx [$tbl+2048+192],%g0 ! prefetch td4
944 xor $acc12,$acc14,$acc14
946 ldx [$tbl+2048+224],%g0 ! prefetch td4
947 and $acc0,2040,$acc0 !
973 srlx $acc10,16,$acc10
975 srlx $acc11,24,$acc11
979 srlx $acc14,16,$acc14 !
981 srlx $acc15,24,$acc15
983 xor $acc12,$acc14,$acc14
992 ldub [$rounds+$acc0],$acc0
995 ldub [$rounds+$acc1],$acc1
998 ldub [$rounds+$acc2],$acc2
999 ldub [$rounds+$acc3],$acc3
1002 ldub [$rounds+$acc4],$acc4
1006 ldub [$rounds+$acc5],$acc5
1009 ldub [$rounds+$acc6],$acc6
1010 ldub [$rounds+$acc7],$acc7
1014 ldub [$rounds+$acc8],$acc8
1016 and $acc10,255,$acc10
1017 ldub [$rounds+$acc9],$acc9
1020 ldub [$rounds+$acc10],$acc10 !
1022 and $acc13,255,$acc13
1023 ldub [$rounds+$acc11],$acc11
1024 ldub [$rounds+$acc12],$acc12
1025 and $acc14,255,$acc14
1026 ldub [$rounds+$acc13],$acc13
1028 ldub [$rounds+$acc14],$acc14 !
1032 ldub [$rounds+$acc15],$acc15
1035 ldx [%sp+$bias+$frame+0],%i7 ! restore return address
1045 sll $acc8,24,$acc8 !
1051 sll $acc12,24,$acc12
1053 sll $acc13,16,$acc13 !
1057 xor $acc12,$acc14,$acc14
1064 .type _sparcv9_AES_decrypt,#function
1065 .size _sparcv9_AES_decrypt,(.-_sparcv9_AES_decrypt)
1072 bnz,pn %xcc,.Lunaligned_dec
1073 save %sp,-$frame,%sp
1081 add %o7,AES_Td-1b,%o4
1082 call _sparcv9_AES_decrypt
1141 add %o7,AES_Td-1b,%o4
1142 call _sparcv9_AES_decrypt
1179 .type AES_decrypt,#function
1180 .size AES_decrypt,(.-AES_decrypt)
1183 # fmovs instructions substituting for FP nops were originally added
1184 # to meet specific instruction alignment requirements to maximize ILP.
1185 # As UltraSPARC T1, a.k.a. Niagara, has shared FPU, FP nops can have
1186 # undesired effect, so just omit them and sacrifice some portion of
1187 # percent in performance...
1188 $code =~ s/fmovs.*$//gm;
1191 close STDOUT or die "error closing STDOUT"; # ensure flush