3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. Rights for redistribution and usage in source and binary
6 # forms are granted according to the OpenSSL license.
7 # ====================================================================
11 # The major reason for undertaken effort was to mitigate the hazard of
12 # cache-timing attack. This is [currently and initially!] addressed in
13 # two ways. 1. S-boxes are compressed from 5KB to 2KB+256B size each.
14 # 2. References to them are scheduled for L2 cache latency, meaning
15 # that the tables don't have to reside in L1 cache. Once again, this
16 # is an initial draft and one should expect more countermeasures to
19 # Even though performance was not the primary goal [on the contrary,
20 # extra shifts "induced" by compressed S-box and longer loop epilogue
21 # "induced" by scheduling for L2 have negative effect on performance],
22 # the code turned out to run in ~23 cycles per processed byte en-/
23 # decrypted with 128-bit key. This is pretty good result for code
24 # with mentioned qualities and UltraSPARC core. Compared to Sun C
25 # generated code my encrypt procedure runs just few percents faster,
26 # while decrypt one - whole 50% faster [yes, Sun C failed to generate
27 # optimal decrypt procedure]. Compared to GNU C generated code both
28 # procedures are more than 60% faster:-)
31 for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
32 if ($bits==64) { $bias=2047; $frame=192; }
33 else { $bias=0; $frame=112; }
67 $rounds="%i7"; # aliases with return address, which is off-loaded to stack
71 while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
74 $code.=<<___ if ($bits==64);
75 .register %g2,#scratch
76 .register %g3,#scratch
79 .section ".text",#alloc,#execinstr
85 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
86 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
87 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
88 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
89 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
90 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
91 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
92 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
93 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
94 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
95 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
96 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
97 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
98 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
99 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
100 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
101 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
102 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
103 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
104 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
105 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
106 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
107 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
108 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
109 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
110 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
111 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
112 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
113 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
114 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
115 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
116 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
117 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
118 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
119 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
120 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
121 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
122 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
123 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
124 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
125 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
126 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
127 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
128 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
129 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
130 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
131 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
132 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
133 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
134 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
135 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
136 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
137 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
138 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
139 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
140 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
141 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
142 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
143 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
144 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
145 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
146 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
147 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
148 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
150 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
151 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
152 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
153 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
154 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
155 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
156 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
157 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
158 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
159 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
160 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
161 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
162 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
163 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
164 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
165 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
166 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
167 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
168 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
169 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
170 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
171 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
172 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
173 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
174 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
175 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
176 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
177 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
178 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
179 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
180 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
181 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
183 .size AES_Te,(.-AES_Te)
187 _sparcv9_AES_encrypt:
188 save %sp,-$frame-$locals,%sp
189 stx %i7,[%sp+$bias+$frame+0] ! off-load return address
190 ld [$key+240],$rounds
194 srl $rounds,1,$rounds
211 ldx [$tbl+$acc0],$acc0
214 ldx [$tbl+$acc1],$acc1
217 ldx [$tbl+$acc2],$acc2 !
220 ldx [$tbl+$acc3],$acc3
223 ldx [$tbl+$acc4],$acc4
227 ldx [$tbl+$acc5],$acc5
230 ldx [$tbl+$acc6],$acc6
233 ldx [$tbl+$acc7],$acc7 !
236 ldx [$tbl+$acc8],$acc8
238 and $acc10,2040,$acc10
239 ldx [$tbl+$acc9],$acc9
242 and $acc11,2040,$acc11
243 ldx [$tbl+$acc10],$acc10
245 and $acc12,2040,$acc12
246 ldx [$tbl+$acc11],$acc11
248 and $acc13,2040,$acc13
249 ldx [$tbl+$acc12],$acc12 !
251 and $acc14,2040,$acc14
252 ldx [$tbl+$acc13],$acc13
253 and $acc15,2040,$acc15
255 ldx [$tbl+$acc14],$acc14
257 subcc $rounds,1,$rounds !
258 ldx [$tbl+$acc15],$acc15
259 bz,a,pn %icc,.Lenc_last
260 add $tbl,2048,$rounds
266 srlx $acc2,16,$acc2 !
282 srlx $acc10,16,$acc10 !
284 srlx $acc11,24,$acc11
288 srlx $acc14,16,$acc14
290 srlx $acc15,24,$acc15 !
292 xor $acc12,$acc14,$acc14
299 and $acc0,2040,$acc0 !
302 ldx [$tbl+$acc0],$acc0
305 ldx [$tbl+$acc1],$acc1
309 ldx [$tbl+$acc2],$acc2
312 ldx [$tbl+$acc3],$acc3
315 ldx [$tbl+$acc4],$acc4 !
318 ldx [$tbl+$acc5],$acc5
321 ldx [$tbl+$acc6],$acc6
325 ldx [$tbl+$acc7],$acc7
328 ldx [$tbl+$acc8],$acc8
330 and $acc10,2040,$acc10
331 ldx [$tbl+$acc9],$acc9 !
333 and $acc11,2040,$acc11
334 ldx [$tbl+$acc10],$acc10
336 and $acc12,2040,$acc12
337 ldx [$tbl+$acc11],$acc11
340 and $acc13,2040,$acc13
341 ldx [$tbl+$acc12],$acc12
343 and $acc14,2040,$acc14
344 ldx [$tbl+$acc13],$acc13
346 and $acc15,2040,$acc15
347 ldx [$tbl+$acc14],$acc14 !
351 ldx [$tbl+$acc15],$acc15
367 srlx $acc10,16,$acc10
369 srlx $acc11,24,$acc11
373 srlx $acc14,16,$acc14 !
375 srlx $acc15,24,$acc15
379 xor $acc12,$acc14,$acc14
407 srlx $acc10,16,$acc10
409 srlx $acc11,24,$acc11
413 srlx $acc14,16,$acc14 !
415 srlx $acc15,24,$acc15
417 xor $acc12,$acc14,$acc14
426 ldub [$rounds+$acc0],$acc0
429 ldub [$rounds+$acc1],$acc1
432 ldub [$rounds+$acc2],$acc2
433 ldub [$rounds+$acc3],$acc3
436 ldub [$rounds+$acc4],$acc4
440 ldub [$rounds+$acc5],$acc5
443 ldub [$rounds+$acc6],$acc6
444 ldub [$rounds+$acc7],$acc7
448 ldub [$rounds+$acc8],$acc8
450 and $acc10,255,$acc10
451 ldub [$rounds+$acc9],$acc9
454 ldub [$rounds+$acc10],$acc10 !
456 and $acc13,255,$acc13
457 ldub [$rounds+$acc11],$acc11
458 ldub [$rounds+$acc12],$acc12
459 and $acc14,255,$acc14
460 ldub [$rounds+$acc13],$acc13
462 ldub [$rounds+$acc14],$acc14 !
466 ldub [$rounds+$acc15],$acc15
469 ldx [%sp+$bias+$frame+0],%i7 ! restore return address
487 sll $acc13,16,$acc13 !
491 xor $acc12,$acc14,$acc14
498 .type _sparcv9_AES_encrypt,#function
499 .size _sparcv9_AES_encrypt,(.-_sparcv9_AES_encrypt)
506 bnz,pn %xcc,.Lunaligned_enc
516 sub %o7,1b-AES_Te,%o4
517 call _sparcv9_AES_encrypt
577 sub %o7,1b-AES_Te,%o4
578 call _sparcv9_AES_encrypt
615 .type AES_encrypt,#function
616 .size AES_encrypt,(.-AES_encrypt)
625 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
626 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
627 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
628 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
629 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
630 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
631 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
632 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
633 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
634 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
635 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
636 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
637 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
638 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
639 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
640 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
641 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
642 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
643 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
644 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
645 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
646 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
647 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
648 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
649 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
650 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
651 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
652 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
653 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
654 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
655 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
656 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
657 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
658 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
659 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
660 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
661 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
662 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
663 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
664 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
665 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
666 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
667 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
668 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
669 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
670 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
671 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
672 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
673 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
674 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
675 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
676 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
677 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
678 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
679 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
680 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
681 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
682 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
683 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
684 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
685 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
686 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
687 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
688 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
690 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
691 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
692 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
693 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
694 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
695 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
696 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
697 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
698 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
699 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
700 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
701 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
702 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
703 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
704 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
705 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
706 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
707 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
708 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
709 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
710 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
711 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
712 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
713 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
714 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
715 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
716 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
717 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
718 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
719 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
720 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
721 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
723 .size AES_Td,(.-AES_Td)
727 _sparcv9_AES_decrypt:
728 save %sp,-$frame-$locals,%sp
729 stx %i7,[%sp+$bias+$frame+0] ! off-load return address
730 ld [$key+240],$rounds
735 srl $rounds,1,$rounds
751 ldx [$tbl+$acc0],$acc0
754 ldx [$tbl+$acc1],$acc1
757 ldx [$tbl+$acc2],$acc2 !
760 ldx [$tbl+$acc3],$acc3
763 ldx [$tbl+$acc4],$acc4
767 ldx [$tbl+$acc5],$acc5
770 ldx [$tbl+$acc6],$acc6
773 ldx [$tbl+$acc7],$acc7 !
776 ldx [$tbl+$acc8],$acc8
778 and $acc10,2040,$acc10
779 ldx [$tbl+$acc9],$acc9
782 and $acc11,2040,$acc11
783 ldx [$tbl+$acc10],$acc10
785 and $acc12,2040,$acc12
786 ldx [$tbl+$acc11],$acc11
788 and $acc13,2040,$acc13
789 ldx [$tbl+$acc12],$acc12 !
791 and $acc14,2040,$acc14
792 ldx [$tbl+$acc13],$acc13
793 and $acc15,2040,$acc15
795 ldx [$tbl+$acc14],$acc14
797 subcc $rounds,1,$rounds !
798 ldx [$tbl+$acc15],$acc15
799 bz,a,pn %icc,.Ldec_last
800 add $tbl,2048,$rounds
806 srlx $acc2,16,$acc2 !
822 srlx $acc10,16,$acc10 !
824 srlx $acc11,24,$acc11
828 srlx $acc14,16,$acc14
830 srlx $acc15,24,$acc15 !
832 xor $acc12,$acc14,$acc14
839 and $acc0,2040,$acc0 !
842 ldx [$tbl+$acc0],$acc0
845 ldx [$tbl+$acc1],$acc1
849 ldx [$tbl+$acc2],$acc2
852 ldx [$tbl+$acc3],$acc3
855 ldx [$tbl+$acc4],$acc4 !
858 ldx [$tbl+$acc5],$acc5
861 ldx [$tbl+$acc6],$acc6
865 ldx [$tbl+$acc7],$acc7
868 ldx [$tbl+$acc8],$acc8
870 and $acc10,2040,$acc10
871 ldx [$tbl+$acc9],$acc9 !
873 and $acc11,2040,$acc11
874 ldx [$tbl+$acc10],$acc10
876 and $acc12,2040,$acc12
877 ldx [$tbl+$acc11],$acc11
880 and $acc13,2040,$acc13
881 ldx [$tbl+$acc12],$acc12
883 and $acc14,2040,$acc14
884 ldx [$tbl+$acc13],$acc13
886 and $acc15,2040,$acc15
887 ldx [$tbl+$acc14],$acc14 !
891 ldx [$tbl+$acc15],$acc15
907 srlx $acc10,16,$acc10
909 srlx $acc11,24,$acc11
913 srlx $acc14,16,$acc14 !
915 srlx $acc15,24,$acc15
919 xor $acc12,$acc14,$acc14
921 and $acc0,2040,$acc0 !
947 srlx $acc10,16,$acc10
949 srlx $acc11,24,$acc11
953 srlx $acc14,16,$acc14 !
955 srlx $acc15,24,$acc15
957 xor $acc12,$acc14,$acc14
966 ldub [$rounds+$acc0],$acc0
969 ldub [$rounds+$acc1],$acc1
972 ldub [$rounds+$acc2],$acc2
973 ldub [$rounds+$acc3],$acc3
976 ldub [$rounds+$acc4],$acc4
980 ldub [$rounds+$acc5],$acc5
983 ldub [$rounds+$acc6],$acc6
984 ldub [$rounds+$acc7],$acc7
988 ldub [$rounds+$acc8],$acc8
990 and $acc10,255,$acc10
991 ldub [$rounds+$acc9],$acc9
994 ldub [$rounds+$acc10],$acc10 !
996 and $acc13,255,$acc13
997 ldub [$rounds+$acc11],$acc11
998 ldub [$rounds+$acc12],$acc12
999 and $acc14,255,$acc14
1000 ldub [$rounds+$acc13],$acc13
1002 ldub [$rounds+$acc14],$acc14 !
1006 ldub [$rounds+$acc15],$acc15
1009 ldx [%sp+$bias+$frame+0],%i7 ! restore return address
1019 sll $acc8,24,$acc8 !
1025 sll $acc12,24,$acc12
1027 sll $acc13,16,$acc13 !
1031 xor $acc12,$acc14,$acc14
1038 .type _sparcv9_AES_decrypt,#function
1039 .size _sparcv9_AES_decrypt,(.-_sparcv9_AES_decrypt)
1046 bnz,pn %xcc,.Lunaligned_dec
1047 save %sp,-$frame,%sp
1056 sub %o7,1b-AES_Td,%o4
1057 call _sparcv9_AES_decrypt
1117 sub %o7,1b-AES_Td,%o4
1118 call _sparcv9_AES_decrypt
1155 .type AES_decrypt,#function
1156 .size AES_decrypt,(.-AES_decrypt)
1159 # fmovs instructions substituting for FP nops were originally added
1160 # to meet specific instruction alignment requirements to maximize ILP.
1161 # As UltraSPARC T1, a.k.a. Niagara, has shared FPU, FP nops can have
1162 # undesired effect, so just omit them and sacrifice some portion of
1163 # percent in performance...
1164 $code =~ s/fmovs.*$//gem;