2 # Copyright 2005-2020 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the Apache License 2.0 (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. Rights for redistribution and usage in source and binary
13 # forms are granted according to the License.
14 # ====================================================================
18 # The major reason for undertaken effort was to mitigate the hazard of
19 # cache-timing attack. This is [currently and initially!] addressed in
20 # two ways. 1. S-boxes are compressed from 5KB to 2KB+256B size each.
21 # 2. References to them are scheduled for L2 cache latency, meaning
22 # that the tables don't have to reside in L1 cache. Once again, this
23 # is an initial draft and one should expect more countermeasures to
26 # Version 1.1 prefetches T[ed]4 in order to mitigate attack on last
29 # Even though performance was not the primary goal [on the contrary,
30 # extra shifts "induced" by compressed S-box and longer loop epilogue
31 # "induced" by scheduling for L2 have negative effect on performance],
32 # the code turned out to run in ~23 cycles per processed byte en-/
33 # decrypted with 128-bit key. This is pretty good result for code
34 # with mentioned qualities and UltraSPARC core. Compared to Sun C
35 # generated code my encrypt procedure runs just few percents faster,
36 # while decrypt one - whole 50% faster [yes, Sun C failed to generate
37 # optimal decrypt procedure]. Compared to GNU C generated code both
38 # procedures are more than 60% faster:-)
40 $output = pop and open STDOUT,">$output";
77 $rounds="%i7"; # aliases with return address, which is off-loaded to stack
81 while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
86 # define __ASSEMBLER__ 1
88 #include "crypto/sparc_arch.h"
91 .register %g2,#scratch
92 .register %g3,#scratch
94 .section ".text",#alloc,#execinstr
100 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
101 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
102 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
103 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
104 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
105 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
106 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
107 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
108 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
109 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
110 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
111 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
112 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
113 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
114 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
115 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
116 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
117 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
118 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
119 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
120 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
121 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
122 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
123 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
124 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
125 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
126 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
127 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
128 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
129 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
130 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
131 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
132 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
133 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
134 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
135 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
136 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
137 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
138 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
139 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
140 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
141 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
142 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
143 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
144 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
145 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
146 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
147 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
148 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
149 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
150 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
151 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
152 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
153 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
154 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
155 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
156 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
157 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
158 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
159 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
160 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
161 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
162 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
163 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
165 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
166 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
167 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
168 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
169 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
170 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
171 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
172 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
173 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
174 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
175 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
176 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
177 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
178 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
179 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
180 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
181 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
182 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
183 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
184 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
185 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
186 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
187 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
188 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
189 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
190 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
191 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
192 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
193 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
194 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
195 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
196 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
198 .size AES_Te,(.-AES_Te)
202 _sparcv9_AES_encrypt:
203 save %sp,-$frame-$locals,%sp
204 stx %i7,[%sp+$bias+$frame+0] ! off-load return address
205 ld [$key+240],$rounds
209 srl $rounds,1,$rounds
226 ldx [$tbl+$acc0],$acc0
229 ldx [$tbl+$acc1],$acc1
232 ldx [$tbl+$acc2],$acc2 !
235 ldx [$tbl+$acc3],$acc3
238 ldx [$tbl+$acc4],$acc4
242 ldx [$tbl+$acc5],$acc5
245 ldx [$tbl+$acc6],$acc6
248 ldx [$tbl+$acc7],$acc7 !
251 ldx [$tbl+$acc8],$acc8
253 and $acc10,2040,$acc10
254 ldx [$tbl+$acc9],$acc9
257 and $acc11,2040,$acc11
258 ldx [$tbl+$acc10],$acc10
260 and $acc12,2040,$acc12
261 ldx [$tbl+$acc11],$acc11
263 and $acc13,2040,$acc13
264 ldx [$tbl+$acc12],$acc12 !
266 and $acc14,2040,$acc14
267 ldx [$tbl+$acc13],$acc13
268 and $acc15,2040,$acc15
270 ldx [$tbl+$acc14],$acc14
272 subcc $rounds,1,$rounds !
273 ldx [$tbl+$acc15],$acc15
274 bz,a,pn %icc,.Lenc_last
275 add $tbl,2048,$rounds
281 srlx $acc2,16,$acc2 !
297 srlx $acc10,16,$acc10 !
299 srlx $acc11,24,$acc11
303 srlx $acc14,16,$acc14
305 srlx $acc15,24,$acc15 !
307 xor $acc12,$acc14,$acc14
314 and $acc0,2040,$acc0 !
317 ldx [$tbl+$acc0],$acc0
320 ldx [$tbl+$acc1],$acc1
324 ldx [$tbl+$acc2],$acc2
327 ldx [$tbl+$acc3],$acc3
330 ldx [$tbl+$acc4],$acc4 !
333 ldx [$tbl+$acc5],$acc5
336 ldx [$tbl+$acc6],$acc6
340 ldx [$tbl+$acc7],$acc7
343 ldx [$tbl+$acc8],$acc8
345 and $acc10,2040,$acc10
346 ldx [$tbl+$acc9],$acc9 !
348 and $acc11,2040,$acc11
349 ldx [$tbl+$acc10],$acc10
351 and $acc12,2040,$acc12
352 ldx [$tbl+$acc11],$acc11
355 and $acc13,2040,$acc13
356 ldx [$tbl+$acc12],$acc12
358 and $acc14,2040,$acc14
359 ldx [$tbl+$acc13],$acc13
361 and $acc15,2040,$acc15
362 ldx [$tbl+$acc14],$acc14 !
366 ldx [$tbl+$acc15],$acc15
382 ldx [$tbl+2048+0],%g0 ! prefetch te4
383 srlx $acc10,16,$acc10
385 ldx [$tbl+2048+32],%g0 ! prefetch te4
386 srlx $acc11,24,$acc11
388 ldx [$tbl+2048+64],%g0 ! prefetch te4
391 ldx [$tbl+2048+96],%g0 ! prefetch te4
392 srlx $acc14,16,$acc14 !
394 ldx [$tbl+2048+128],%g0 ! prefetch te4
395 srlx $acc15,24,$acc15
397 ldx [$tbl+2048+160],%g0 ! prefetch te4
400 ldx [$tbl+2048+192],%g0 ! prefetch te4
401 xor $acc12,$acc14,$acc14
403 ldx [$tbl+2048+224],%g0 ! prefetch te4
430 srlx $acc10,16,$acc10
432 srlx $acc11,24,$acc11
436 srlx $acc14,16,$acc14 !
438 srlx $acc15,24,$acc15
440 xor $acc12,$acc14,$acc14
449 ldub [$rounds+$acc0],$acc0
452 ldub [$rounds+$acc1],$acc1
455 ldub [$rounds+$acc2],$acc2
456 ldub [$rounds+$acc3],$acc3
459 ldub [$rounds+$acc4],$acc4
463 ldub [$rounds+$acc5],$acc5
466 ldub [$rounds+$acc6],$acc6
467 ldub [$rounds+$acc7],$acc7
471 ldub [$rounds+$acc8],$acc8
473 and $acc10,255,$acc10
474 ldub [$rounds+$acc9],$acc9
477 ldub [$rounds+$acc10],$acc10 !
479 and $acc13,255,$acc13
480 ldub [$rounds+$acc11],$acc11
481 ldub [$rounds+$acc12],$acc12
482 and $acc14,255,$acc14
483 ldub [$rounds+$acc13],$acc13
485 ldub [$rounds+$acc14],$acc14 !
489 ldub [$rounds+$acc15],$acc15
492 ldx [%sp+$bias+$frame+0],%i7 ! restore return address
510 sll $acc13,16,$acc13 !
514 xor $acc12,$acc14,$acc14
521 .type _sparcv9_AES_encrypt,#function
522 .size _sparcv9_AES_encrypt,(.-_sparcv9_AES_encrypt)
529 bnz,pn %xcc,.Lunaligned_enc
538 add %o7,AES_Te-1b,%o4
539 call _sparcv9_AES_encrypt
598 add %o7,AES_Te-1b,%o4
599 call _sparcv9_AES_encrypt
636 .type AES_encrypt,#function
637 .size AES_encrypt,(.-AES_encrypt)
646 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
647 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
648 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
649 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
650 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
651 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
652 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
653 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
654 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
655 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
656 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
657 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
658 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
659 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
660 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
661 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
662 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
663 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
664 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
665 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
666 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
667 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
668 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
669 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
670 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
671 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
672 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
673 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
674 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
675 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
676 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
677 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
678 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
679 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
680 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
681 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
682 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
683 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
684 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
685 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
686 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
687 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
688 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
689 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
690 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
691 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
692 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
693 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
694 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
695 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
696 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
697 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
698 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
699 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
700 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
701 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
702 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
703 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
704 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
705 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
706 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
707 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
708 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
709 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
711 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
712 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
713 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
714 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
715 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
716 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
717 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
718 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
719 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
720 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
721 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
722 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
723 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
724 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
725 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
726 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
727 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
728 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
729 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
730 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
731 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
732 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
733 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
734 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
735 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
736 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
737 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
738 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
739 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
740 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
741 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
742 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
744 .size AES_Td,(.-AES_Td)
748 _sparcv9_AES_decrypt:
749 save %sp,-$frame-$locals,%sp
750 stx %i7,[%sp+$bias+$frame+0] ! off-load return address
751 ld [$key+240],$rounds
756 srl $rounds,1,$rounds
772 ldx [$tbl+$acc0],$acc0
775 ldx [$tbl+$acc1],$acc1
778 ldx [$tbl+$acc2],$acc2 !
781 ldx [$tbl+$acc3],$acc3
784 ldx [$tbl+$acc4],$acc4
788 ldx [$tbl+$acc5],$acc5
791 ldx [$tbl+$acc6],$acc6
794 ldx [$tbl+$acc7],$acc7 !
797 ldx [$tbl+$acc8],$acc8
799 and $acc10,2040,$acc10
800 ldx [$tbl+$acc9],$acc9
803 and $acc11,2040,$acc11
804 ldx [$tbl+$acc10],$acc10
806 and $acc12,2040,$acc12
807 ldx [$tbl+$acc11],$acc11
809 and $acc13,2040,$acc13
810 ldx [$tbl+$acc12],$acc12 !
812 and $acc14,2040,$acc14
813 ldx [$tbl+$acc13],$acc13
814 and $acc15,2040,$acc15
816 ldx [$tbl+$acc14],$acc14
818 subcc $rounds,1,$rounds !
819 ldx [$tbl+$acc15],$acc15
820 bz,a,pn %icc,.Ldec_last
821 add $tbl,2048,$rounds
827 srlx $acc2,16,$acc2 !
843 srlx $acc10,16,$acc10 !
845 srlx $acc11,24,$acc11
849 srlx $acc14,16,$acc14
851 srlx $acc15,24,$acc15 !
853 xor $acc12,$acc14,$acc14
860 and $acc0,2040,$acc0 !
863 ldx [$tbl+$acc0],$acc0
866 ldx [$tbl+$acc1],$acc1
870 ldx [$tbl+$acc2],$acc2
873 ldx [$tbl+$acc3],$acc3
876 ldx [$tbl+$acc4],$acc4 !
879 ldx [$tbl+$acc5],$acc5
882 ldx [$tbl+$acc6],$acc6
886 ldx [$tbl+$acc7],$acc7
889 ldx [$tbl+$acc8],$acc8
891 and $acc10,2040,$acc10
892 ldx [$tbl+$acc9],$acc9 !
894 and $acc11,2040,$acc11
895 ldx [$tbl+$acc10],$acc10
897 and $acc12,2040,$acc12
898 ldx [$tbl+$acc11],$acc11
901 and $acc13,2040,$acc13
902 ldx [$tbl+$acc12],$acc12
904 and $acc14,2040,$acc14
905 ldx [$tbl+$acc13],$acc13
907 and $acc15,2040,$acc15
908 ldx [$tbl+$acc14],$acc14 !
912 ldx [$tbl+$acc15],$acc15
928 ldx [$tbl+2048+0],%g0 ! prefetch td4
929 srlx $acc10,16,$acc10
931 ldx [$tbl+2048+32],%g0 ! prefetch td4
932 srlx $acc11,24,$acc11
934 ldx [$tbl+2048+64],%g0 ! prefetch td4
937 ldx [$tbl+2048+96],%g0 ! prefetch td4
938 srlx $acc14,16,$acc14 !
940 ldx [$tbl+2048+128],%g0 ! prefetch td4
941 srlx $acc15,24,$acc15
943 ldx [$tbl+2048+160],%g0 ! prefetch td4
946 ldx [$tbl+2048+192],%g0 ! prefetch td4
947 xor $acc12,$acc14,$acc14
949 ldx [$tbl+2048+224],%g0 ! prefetch td4
950 and $acc0,2040,$acc0 !
976 srlx $acc10,16,$acc10
978 srlx $acc11,24,$acc11
982 srlx $acc14,16,$acc14 !
984 srlx $acc15,24,$acc15
986 xor $acc12,$acc14,$acc14
995 ldub [$rounds+$acc0],$acc0
998 ldub [$rounds+$acc1],$acc1
1001 ldub [$rounds+$acc2],$acc2
1002 ldub [$rounds+$acc3],$acc3
1005 ldub [$rounds+$acc4],$acc4
1009 ldub [$rounds+$acc5],$acc5
1012 ldub [$rounds+$acc6],$acc6
1013 ldub [$rounds+$acc7],$acc7
1017 ldub [$rounds+$acc8],$acc8
1019 and $acc10,255,$acc10
1020 ldub [$rounds+$acc9],$acc9
1023 ldub [$rounds+$acc10],$acc10 !
1025 and $acc13,255,$acc13
1026 ldub [$rounds+$acc11],$acc11
1027 ldub [$rounds+$acc12],$acc12
1028 and $acc14,255,$acc14
1029 ldub [$rounds+$acc13],$acc13
1031 ldub [$rounds+$acc14],$acc14 !
1035 ldub [$rounds+$acc15],$acc15
1038 ldx [%sp+$bias+$frame+0],%i7 ! restore return address
1048 sll $acc8,24,$acc8 !
1054 sll $acc12,24,$acc12
1056 sll $acc13,16,$acc13 !
1060 xor $acc12,$acc14,$acc14
1067 .type _sparcv9_AES_decrypt,#function
1068 .size _sparcv9_AES_decrypt,(.-_sparcv9_AES_decrypt)
1075 bnz,pn %xcc,.Lunaligned_dec
1076 save %sp,-$frame,%sp
1084 add %o7,AES_Td-1b,%o4
1085 call _sparcv9_AES_decrypt
1144 add %o7,AES_Td-1b,%o4
1145 call _sparcv9_AES_decrypt
1182 .type AES_decrypt,#function
1183 .size AES_decrypt,(.-AES_decrypt)
1186 # fmovs instructions substituting for FP nops were originally added
1187 # to meet specific instruction alignment requirements to maximize ILP.
1188 # As UltraSPARC T1, a.k.a. Niagara, has shared FPU, FP nops can have
1189 # undesired effect, so just omit them and sacrifice some portion of
1190 # percent in performance...
1191 $code =~ s/fmovs.*$//gm;
1194 close STDOUT or die "error closing STDOUT: $!"; # ensure flush