3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. Rights for redistribution and usage in source and binary
6 # forms are granted according to the OpenSSL license.
7 # ====================================================================
11 # The major reason for undertaken effort was to mitigate the hazard of
12 # cache-timing attack. This is [currently and initially!] addressed in
13 # two ways. 1. S-boxes are compressed from 5KB to 2KB+256B size each.
14 # 2. References to them are scheduled for L2 cache latency, meaning
15 # that the tables don't have to reside in L1 cache. Once again, this
16 # is an initial draft and one should expect more countermeasures to
19 # Even though performance was not the primary goal [on the contrary,
20 # extra shifts "induced" by compressed S-box and longer loop epilogue
21 # "induced" by scheduling for L2 have negative effect on performance],
22 # the code turned out to run in ~23 cycles per processed byte en-/
23 # decrypted with 128-bit key. This is pretty good result for code
24 # with mentioned qualities and UltraSPARC core. Compared to Sun C
25 # generated code my encrypt procedure runs just few percents faster,
26 # while decrypt one - whole 50% faster [yes, Sun C failed to generate
27 # optimal decrypt procedure]. Compared to GNU C generated code both
28 # procedures are more than 60% faster:-)
31 for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
32 if ($bits==64) { $bias=2047; $frame=192; }
33 else { $bias=0; $frame=112; }
67 $rounds="%i7"; # aliases with return address, which is off-loaded to stack
71 while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
74 $code.=<<___ if ($bits==64);
75 .register %g2,#scratch
76 .register %g3,#scratch
79 .section ".text",#alloc,#execinstr
85 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
86 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
87 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
88 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
89 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
90 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
91 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
92 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
93 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
94 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
95 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
96 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
97 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
98 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
99 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
100 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
101 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
102 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
103 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
104 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
105 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
106 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
107 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
108 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
109 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
110 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
111 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
112 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
113 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
114 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
115 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
116 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
117 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
118 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
119 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
120 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
121 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
122 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
123 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
124 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
125 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
126 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
127 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
128 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
129 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
130 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
131 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
132 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
133 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
134 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
135 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
136 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
137 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
138 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
139 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
140 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
141 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
142 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
143 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
144 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
145 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
146 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
147 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
148 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
150 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
151 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
152 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
153 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
154 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
155 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
156 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
157 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
158 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
159 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
160 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
161 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
162 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
163 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
164 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
165 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
166 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
167 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
168 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
169 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
170 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
171 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
172 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
173 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
174 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
175 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
176 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
177 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
178 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
179 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
180 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
181 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
183 .size AES_Te,(.-AES_Te)
187 _sparcv9_AES_encrypt:
188 save %sp,-$frame-$locals,%sp
189 stx %i7,[%sp+$bias+$frame+0] ! off-load return address
190 ld [$key+240],$rounds
194 srl $rounds,1,$rounds
211 ldx [$tbl+$acc0],$acc0
214 ldx [$tbl+$acc1],$acc1
217 ldx [$tbl+$acc2],$acc2 !
220 ldx [$tbl+$acc3],$acc3
223 ldx [$tbl+$acc4],$acc4
227 ldx [$tbl+$acc5],$acc5
230 ldx [$tbl+$acc6],$acc6
233 ldx [$tbl+$acc7],$acc7 !
236 ldx [$tbl+$acc8],$acc8
238 and $acc10,2040,$acc10
239 ldx [$tbl+$acc9],$acc9
242 and $acc11,2040,$acc11
243 ldx [$tbl+$acc10],$acc10
245 and $acc12,2040,$acc12
246 ldx [$tbl+$acc11],$acc11
248 and $acc13,2040,$acc13
249 ldx [$tbl+$acc12],$acc12 !
251 and $acc14,2040,$acc14
252 ldx [$tbl+$acc13],$acc13
253 and $acc15,2040,$acc15
255 ldx [$tbl+$acc14],$acc14
257 subcc $rounds,1,$rounds !
258 ldx [$tbl+$acc15],$acc15
259 bz,a,pn %icc,.Lenc_last
260 add $tbl,2048,$rounds
266 srlx $acc2,16,$acc2 !
282 srlx $acc10,16,$acc10 !
284 srlx $acc11,24,$acc11
288 srlx $acc14,16,$acc14
290 srlx $acc15,24,$acc15 !
292 xor $acc12,$acc14,$acc14
299 and $acc0,2040,$acc0 !
302 ldx [$tbl+$acc0],$acc0
305 ldx [$tbl+$acc1],$acc1
309 ldx [$tbl+$acc2],$acc2
312 ldx [$tbl+$acc3],$acc3
315 ldx [$tbl+$acc4],$acc4 !
318 ldx [$tbl+$acc5],$acc5
321 ldx [$tbl+$acc6],$acc6
325 ldx [$tbl+$acc7],$acc7
328 ldx [$tbl+$acc8],$acc8
330 and $acc10,2040,$acc10
331 ldx [$tbl+$acc9],$acc9 !
333 and $acc11,2040,$acc11
334 ldx [$tbl+$acc10],$acc10
336 and $acc12,2040,$acc12
337 ldx [$tbl+$acc11],$acc11
340 and $acc13,2040,$acc13
341 ldx [$tbl+$acc12],$acc12
343 and $acc14,2040,$acc14
344 ldx [$tbl+$acc13],$acc13
346 and $acc15,2040,$acc15
347 ldx [$tbl+$acc14],$acc14 !
351 ldx [$tbl+$acc15],$acc15
367 srlx $acc10,16,$acc10
369 srlx $acc11,24,$acc11
373 srlx $acc14,16,$acc14 !
375 srlx $acc15,24,$acc15
379 xor $acc12,$acc14,$acc14
407 srlx $acc10,16,$acc10
409 srlx $acc11,24,$acc11
413 srlx $acc14,16,$acc14 !
415 srlx $acc15,24,$acc15
417 xor $acc12,$acc14,$acc14
426 ldub [$rounds+$acc0],$acc0
429 ldub [$rounds+$acc1],$acc1
432 ldub [$rounds+$acc2],$acc2
433 ldub [$rounds+$acc3],$acc3
436 ldub [$rounds+$acc4],$acc4
440 ldub [$rounds+$acc5],$acc5
443 ldub [$rounds+$acc6],$acc6
444 ldub [$rounds+$acc7],$acc7
448 ldub [$rounds+$acc8],$acc8
450 and $acc10,255,$acc10
451 ldub [$rounds+$acc9],$acc9
454 ldub [$rounds+$acc10],$acc10 !
456 and $acc13,255,$acc13
457 ldub [$rounds+$acc11],$acc11
458 ldub [$rounds+$acc12],$acc12
459 and $acc14,255,$acc14
460 ldub [$rounds+$acc13],$acc13
462 ldub [$rounds+$acc14],$acc14 !
466 ldub [$rounds+$acc15],$acc15
469 ldx [%sp+$bias+$frame+0],%i7 ! restore return address
487 sll $acc13,16,$acc13 !
491 xor $acc12,$acc14,$acc14
498 .type _sparcv9_AES_encrypt,#function
499 .size _sparcv9_AES_encrypt,(.-_sparcv9_AES_encrypt)
506 bnz,pn %xcc,.Lunaligned_enc
516 1: call _sparcv9_AES_encrypt
517 sub %o7,1b-AES_Te,%o4
576 1: call _sparcv9_AES_encrypt
577 sub %o7,1b-AES_Te,%o4
613 .type AES_encrypt,#function
614 .size AES_encrypt,(.-AES_encrypt)
623 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
624 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
625 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
626 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
627 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
628 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
629 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
630 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
631 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
632 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
633 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
634 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
635 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
636 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
637 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
638 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
639 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
640 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
641 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
642 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
643 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
644 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
645 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
646 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
647 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
648 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
649 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
650 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
651 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
652 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
653 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
654 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
655 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
656 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
657 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
658 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
659 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
660 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
661 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
662 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
663 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
664 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
665 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
666 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
667 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
668 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
669 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
670 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
671 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
672 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
673 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
674 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
675 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
676 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
677 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
678 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
679 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
680 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
681 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
682 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
683 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
684 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
685 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
686 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
688 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
689 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
690 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
691 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
692 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
693 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
694 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
695 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
696 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
697 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
698 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
699 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
700 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
701 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
702 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
703 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
704 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
705 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
706 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
707 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
708 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
709 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
710 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
711 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
712 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
713 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
714 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
715 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
716 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
717 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
718 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
719 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
721 .size AES_Td,(.-AES_Td)
725 _sparcv9_AES_decrypt:
726 save %sp,-$frame-$locals,%sp
727 stx %i7,[%sp+$bias+$frame+0] ! off-load return address
728 ld [$key+240],$rounds
733 srl $rounds,1,$rounds
749 ldx [$tbl+$acc0],$acc0
752 ldx [$tbl+$acc1],$acc1
755 ldx [$tbl+$acc2],$acc2 !
758 ldx [$tbl+$acc3],$acc3
761 ldx [$tbl+$acc4],$acc4
765 ldx [$tbl+$acc5],$acc5
768 ldx [$tbl+$acc6],$acc6
771 ldx [$tbl+$acc7],$acc7 !
774 ldx [$tbl+$acc8],$acc8
776 and $acc10,2040,$acc10
777 ldx [$tbl+$acc9],$acc9
780 and $acc11,2040,$acc11
781 ldx [$tbl+$acc10],$acc10
783 and $acc12,2040,$acc12
784 ldx [$tbl+$acc11],$acc11
786 and $acc13,2040,$acc13
787 ldx [$tbl+$acc12],$acc12 !
789 and $acc14,2040,$acc14
790 ldx [$tbl+$acc13],$acc13
791 and $acc15,2040,$acc15
793 ldx [$tbl+$acc14],$acc14
795 subcc $rounds,1,$rounds !
796 ldx [$tbl+$acc15],$acc15
797 bz,a,pn %icc,.Ldec_last
798 add $tbl,2048,$rounds
804 srlx $acc2,16,$acc2 !
820 srlx $acc10,16,$acc10 !
822 srlx $acc11,24,$acc11
826 srlx $acc14,16,$acc14
828 srlx $acc15,24,$acc15 !
830 xor $acc12,$acc14,$acc14
837 and $acc0,2040,$acc0 !
840 ldx [$tbl+$acc0],$acc0
843 ldx [$tbl+$acc1],$acc1
847 ldx [$tbl+$acc2],$acc2
850 ldx [$tbl+$acc3],$acc3
853 ldx [$tbl+$acc4],$acc4 !
856 ldx [$tbl+$acc5],$acc5
859 ldx [$tbl+$acc6],$acc6
863 ldx [$tbl+$acc7],$acc7
866 ldx [$tbl+$acc8],$acc8
868 and $acc10,2040,$acc10
869 ldx [$tbl+$acc9],$acc9 !
871 and $acc11,2040,$acc11
872 ldx [$tbl+$acc10],$acc10
874 and $acc12,2040,$acc12
875 ldx [$tbl+$acc11],$acc11
878 and $acc13,2040,$acc13
879 ldx [$tbl+$acc12],$acc12
881 and $acc14,2040,$acc14
882 ldx [$tbl+$acc13],$acc13
884 and $acc15,2040,$acc15
885 ldx [$tbl+$acc14],$acc14 !
889 ldx [$tbl+$acc15],$acc15
905 srlx $acc10,16,$acc10
907 srlx $acc11,24,$acc11
911 srlx $acc14,16,$acc14 !
913 srlx $acc15,24,$acc15
917 xor $acc12,$acc14,$acc14
919 and $acc0,2040,$acc0 !
945 srlx $acc10,16,$acc10
947 srlx $acc11,24,$acc11
951 srlx $acc14,16,$acc14 !
953 srlx $acc15,24,$acc15
955 xor $acc12,$acc14,$acc14
964 ldub [$rounds+$acc0],$acc0
967 ldub [$rounds+$acc1],$acc1
970 ldub [$rounds+$acc2],$acc2
971 ldub [$rounds+$acc3],$acc3
974 ldub [$rounds+$acc4],$acc4
978 ldub [$rounds+$acc5],$acc5
981 ldub [$rounds+$acc6],$acc6
982 ldub [$rounds+$acc7],$acc7
986 ldub [$rounds+$acc8],$acc8
988 and $acc10,255,$acc10
989 ldub [$rounds+$acc9],$acc9
992 ldub [$rounds+$acc10],$acc10 !
994 and $acc13,255,$acc13
995 ldub [$rounds+$acc11],$acc11
996 ldub [$rounds+$acc12],$acc12
997 and $acc14,255,$acc14
998 ldub [$rounds+$acc13],$acc13
1000 ldub [$rounds+$acc14],$acc14 !
1004 ldub [$rounds+$acc15],$acc15
1007 ldx [%sp+$bias+$frame+0],%i7 ! restore return address
1017 sll $acc8,24,$acc8 !
1023 sll $acc12,24,$acc12
1025 sll $acc13,16,$acc13 !
1029 xor $acc12,$acc14,$acc14
1036 .type _sparcv9_AES_decrypt,#function
1037 .size _sparcv9_AES_decrypt,(.-_sparcv9_AES_decrypt)
1044 bnz,pn %xcc,.Lunaligned_dec
1045 save %sp,-$frame,%sp
1054 1: call _sparcv9_AES_decrypt
1055 sub %o7,1b-AES_Td,%o4
1114 1: call _sparcv9_AES_decrypt
1115 sub %o7,1b-AES_Td,%o4
1151 .type AES_decrypt,#function
1152 .size AES_decrypt,(.-AES_decrypt)
1155 # fmovs instructions substituting for FP nops were originally added
1156 # to meet specific instruction alignment requirements to maximize ILP.
1157 # As UltraSPARC T1, a.k.a. Niagara, has shared FPU, FP nops can have
1158 # undesired effect, so just omit them and sacrifice some portion of
1159 # percent in performance...
1160 $code =~ s/fmovs.*$//gem;