3 # ====================================================================
4 # Written by David S. Miller <davem@devemloft.net> and Andy Polyakov
5 # <appro@openssl.org>. The module is licensed under 2-clause BSD
6 # license. October 2012. All rights reserved.
7 # ====================================================================
9 ######################################################################
10 # Camellia for SPARC T4.
12 # As with AES below results [for aligned data] are virtually identical
13 # to critical path lenths for 3-cycle instruction latency:
15 # 128-bit key 192/256-
16 # CBC encrypt 4.14/4.21(*) 5.46/5.52
17 # (*) numbers after slash are for
20 # As with Intel AES-NI, question is if it's possible to improve
21 # performance of parallelizeable modes by interleaving round
22 # instructions. In Camellia every instruction is dependent on
23 # previous, which means that there is place for 2 additional ones
24 # in between two dependent. Can we expect 3x performance improvement?
25 # At least one can argue that it should be possible to break 2x
26 # barrier... For some reason not even 2x appears to be possible:
28 # 128-bit key 192/256-
29 # CBC decrypt 2.21/2.74 2.99/3.40
30 # CTR 2.15/2.68(*) 2.93/3.34
31 # (*) numbers after slash are for
34 # This is for 2x interleave. But compared to 1x interleave CBC decrypt
35 # improved by ... 0% for 128-bit key, and 11% for 192/256-bit one.
36 # So that out-of-order execution logic can take non-interleaved code
37 # to 1.87x, but can't take 2x interleaved one any further. There
38 # surely is some explanation... As result 3x interleave was not even
39 # attempted. Instead an effort was made to share specific modes
40 # implementations with AES module (therefore sparct4_modes.pl).
42 # To anchor to something else, software C implementation processes
43 # one byte in 38 cycles with 128-bit key on same processor.
45 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
46 push(@INC,"${dir}","${dir}../../perlasm");
47 require "sparcv9_modes.pl";
51 $::evp=1; # if $evp is set to 0, script generates module with
52 # Camellia_[en|de]crypt, Camellia_set_key and Camellia_cbc_encrypt
53 # entry points. These are fully compatible with openssl/camellia.h.
55 ######################################################################
56 # single-round subroutines
59 my ($inp,$out,$key,$rounds,$tmp,$mask)=map("%o$_",(0..5));
64 .globl cmll_t4_encrypt
67 andcc $inp, 7, %g1 ! is input aligned?
86 ld [$key + 272], $rounds ! grandRounds, 3 or 4
97 sub $rounds, 1, $rounds
103 camellia_f %f12, %f2, %f0, %f2
105 sub $rounds,1,$rounds
106 camellia_f %f14, %f0, %f2, %f0
108 camellia_f %f16, %f2, %f0, %f2
109 ldd [$key + 16], %f16
110 camellia_f %f18, %f0, %f2, %f0
111 ldd [$key + 24], %f18
112 camellia_f %f20, %f2, %f0, %f2
113 ldd [$key + 32], %f20
114 camellia_f %f22, %f0, %f2, %f0
115 ldd [$key + 40], %f22
116 camellia_fl %f24, %f0, %f0
117 ldd [$key + 48], %f24
118 camellia_fli %f26, %f2, %f2
119 ldd [$key + 56], %f26
120 brnz,pt $rounds, .Lenc
123 andcc $out, 7, $tmp ! is output aligned?
124 camellia_f %f12, %f2, %f0, %f2
125 camellia_f %f14, %f0, %f2, %f0
126 camellia_f %f16, %f2, %f0, %f2
127 camellia_f %f18, %f0, %f2, %f0
128 camellia_f %f20, %f2, %f0, %f4
129 camellia_f %f22, %f0, %f4, %f2
140 2: alignaddrl $out, %g0, $out
142 srl $mask, $tmp, $mask
144 faligndata %f0, %f0, %f4
145 faligndata %f0, %f2, %f6
146 faligndata %f2, %f2, %f8
148 stda %f4, [$out + $mask]0xc0 ! partial store
151 orn %g0, $mask, $mask
153 stda %f8, [$out + $mask]0xc0 ! partial store
154 .type cmll_t4_encrypt,#function
155 .size cmll_t4_encrypt,.-cmll_t4_encrypt
157 .globl cmll_t4_decrypt
160 ld [$key + 272], $rounds ! grandRounds, 3 or 4
161 andcc $inp, 7, %g1 ! is input aligned?
164 sll $rounds, 6, $rounds
165 add $rounds, $key, $key
170 ldx [$inp + 16], $inp
183 ldd [$key - 16], %f14
186 ldd [$key - 24], %f16
187 ldd [$key - 32], %f18
190 ldd [$key - 40], %f20
191 ldd [$key - 48], %f22
192 sub $rounds, 64, $rounds
193 ldd [$key - 56], %f24
194 ldd [$key - 64], %f26
198 camellia_f %f12, %f2, %f0, %f2
200 sub $rounds, 64, $rounds
201 camellia_f %f14, %f0, %f2, %f0
202 ldd [$key - 16], %f14
203 camellia_f %f16, %f2, %f0, %f2
204 ldd [$key - 24], %f16
205 camellia_f %f18, %f0, %f2, %f0
206 ldd [$key - 32], %f18
207 camellia_f %f20, %f2, %f0, %f2
208 ldd [$key - 40], %f20
209 camellia_f %f22, %f0, %f2, %f0
210 ldd [$key - 48], %f22
211 camellia_fl %f24, %f0, %f0
212 ldd [$key - 56], %f24
213 camellia_fli %f26, %f2, %f2
214 ldd [$key - 64], %f26
215 brnz,pt $rounds, .Ldec
218 andcc $out, 7, $tmp ! is output aligned?
219 camellia_f %f12, %f2, %f0, %f2
220 camellia_f %f14, %f0, %f2, %f0
221 camellia_f %f16, %f2, %f0, %f2
222 camellia_f %f18, %f0, %f2, %f0
223 camellia_f %f20, %f2, %f0, %f4
224 camellia_f %f22, %f0, %f4, %f2
235 2: alignaddrl $out, %g0, $out
237 srl $mask, $tmp, $mask
239 faligndata %f0, %f0, %f4
240 faligndata %f0, %f2, %f6
241 faligndata %f2, %f2, %f8
243 stda %f4, [$out + $mask]0xc0 ! partial store
246 orn %g0, $mask, $mask
248 stda %f8, [$out + $mask]0xc0 ! partial store
249 .type cmll_t4_decrypt,#function
250 .size cmll_t4_decrypt,.-cmll_t4_decrypt
254 ######################################################################
255 # key setup subroutines
261 "srlx %o4, 64-$rot, %g4\n\t".
262 "sllx %o4, $rot, %o4\n\t".
263 "srlx %o5, 64-$rot, %g5\n\t".
264 "sllx %o5, $rot, %o5\n\t".
265 "or %o4, %g5, %o4\n\t".
269 my ($inp,$bits,$out,$tmp)=map("%o$_",(0..5));
271 .globl cmll_t4_set_key
275 alignaddr $inp, %g0, $inp
284 brz,pt $tmp, .L256aligned
288 faligndata %f0, %f2, %f0
289 faligndata %f2, %f4, %f2
290 faligndata %f4, %f6, %f4
292 faligndata %f6, %f8, %f6
296 brz,a,pt $tmp, .L256aligned
301 faligndata %f0, %f2, %f0
302 faligndata %f2, %f4, %f2
303 faligndata %f4, %f6, %f4
307 std %f0, [$out + 0] ! k[0, 1]
309 std %f2, [$out + 8] ! k[2, 3]
317 brz,pt $tmp, .L128aligned
322 faligndata %f0, %f2, %f0
323 faligndata %f2, %f4, %f2
326 std %f0, [$out + 0] ! k[0, 1]
328 std %f2, [$out + 8] ! k[2, 3]
334 add %o7, SIGMA-1b, %o4
342 camellia_f %f16, %f2, %f0, %f2
343 camellia_f %f18, %f0, %f2, %f0
346 camellia_f %f20, %f2, %f0, %f2
347 camellia_f %f22, %f0, %f2, %f0
349 bge,pn %icc, .L256key
351 std %f0, [$out + 0x10] ! k[ 4, 5]
352 std %f2, [$out + 0x18] ! k[ 6, 7]
357 stx %o4, [$out + 0x30] ! k[12, 13]
358 stx %o5, [$out + 0x38] ! k[14, 15]
360 stx %o4, [$out + 0x40] ! k[16, 17]
361 stx %o5, [$out + 0x48] ! k[18, 19]
363 stx %o4, [$out + 0x60] ! k[24, 25]
365 stx %o4, [$out + 0x70] ! k[28, 29]
366 stx %o5, [$out + 0x78] ! k[30, 31]
368 stx %o4, [$out + 0xa0] ! k[40, 41]
369 stx %o5, [$out + 0xa8] ! k[42, 43]
371 stx %o4, [$out + 0xc0] ! k[48, 49]
372 stx %o5, [$out + 0xc8] ! k[50, 51]
374 movdtox %f28, %o4 ! k[ 0, 1]
375 movdtox %f30, %o5 ! k[ 2, 3]
377 stx %o4, [$out + 0x20] ! k[ 8, 9]
378 stx %o5, [$out + 0x28] ! k[10, 11]
380 stx %o4, [$out + 0x50] ! k[20, 21]
381 stx %o5, [$out + 0x58] ! k[22, 23]
383 stx %o5, [$out + 0x68] ! k[26, 27]
385 stx %o4, [$out + 0x80] ! k[32, 33]
386 stx %o5, [$out + 0x88] ! k[34, 35]
388 stx %o4, [$out + 0x90] ! k[36, 37]
389 stx %o5, [$out + 0x98] ! k[38, 39]
391 stx %o4, [$out + 0xb0] ! k[44, 45]
392 stx %o5, [$out + 0xb8] ! k[46, 47]
395 st $tmp, [$out + 0x110]
404 std %f0, [$out + 0x30] ! k[12, 13]
405 std %f2, [$out + 0x38] ! k[14, 15]
409 camellia_f %f24, %f2, %f0, %f2
410 camellia_f %f26, %f0, %f2, %f0
412 std %f0, [$out + 0x10] ! k[ 4, 5]
413 std %f2, [$out + 0x18] ! k[ 6, 7]
418 stx %o4, [$out + 0x50] ! k[20, 21]
419 stx %o5, [$out + 0x58] ! k[22, 23]
421 stx %o4, [$out + 0xa0] ! k[40, 41]
422 stx %o5, [$out + 0xa8] ! k[42, 43]
424 stx %o4, [$out + 0x100] ! k[64, 65]
425 stx %o5, [$out + 0x108] ! k[66, 67]
427 movdtox %f4, %o4 ! k[ 8, 9]
428 movdtox %f6, %o5 ! k[10, 11]
430 stx %o4, [$out + 0x20] ! k[ 8, 9]
431 stx %o5, [$out + 0x28] ! k[10, 11]
433 stx %o4, [$out + 0x40] ! k[16, 17]
434 stx %o5, [$out + 0x48] ! k[18, 19]
436 stx %o4, [$out + 0x90] ! k[36, 37]
437 stx %o5, [$out + 0x98] ! k[38, 39]
439 stx %o4, [$out + 0xd0] ! k[52, 53]
440 stx %o5, [$out + 0xd8] ! k[54, 55]
441 ldx [$out + 0x30], %o4 ! k[12, 13]
442 ldx [$out + 0x38], %o5 ! k[14, 15]
444 stx %o4, [$out + 0x30] ! k[12, 13]
445 stx %o5, [$out + 0x38] ! k[14, 15]
447 stx %o4, [$out + 0x70] ! k[28, 29]
448 stx %o5, [$out + 0x78] ! k[30, 31]
451 st %o4, [$out + 0xc0] ! k[48]
452 st %g5, [$out + 0xc4] ! k[49]
453 st %o5, [$out + 0xc8] ! k[50]
454 st %g4, [$out + 0xcc] ! k[51]
456 stx %o4, [$out + 0xe0] ! k[56, 57]
457 stx %o5, [$out + 0xe8] ! k[58, 59]
459 movdtox %f28, %o4 ! k[ 0, 1]
460 movdtox %f30, %o5 ! k[ 2, 3]
462 stx %o4, [$out + 0x60] ! k[24, 25]
463 stx %o5, [$out + 0x68] ! k[26, 27]
465 stx %o4, [$out + 0x80] ! k[32, 33]
466 stx %o5, [$out + 0x88] ! k[34, 35]
468 stx %o4, [$out + 0xb0] ! k[44, 45]
469 stx %o5, [$out + 0xb8] ! k[46, 47]
471 stx %o4, [$out + 0xf0] ! k[60, 61]
472 stx %o5, [$out + 0xf8] ! k[62, 63]
475 st $tmp, [$out + 0x110]
478 .type cmll_t4_set_key,#function
479 .size cmll_t4_set_key,.-cmll_t4_set_key
482 .long 0xa09e667f, 0x3bcc908b, 0xb67ae858, 0x4caa73b2
483 .long 0xc6ef372f, 0xe94f82be, 0x54ff53a5, 0xf1d36f1c
484 .long 0x10e527fa, 0xde682d1d, 0xb05688c2, 0xb3e6c1fd
487 .asciz "Camellia for SPARC T4, David S. Miller, Andy Polyakov"
492 my ($inp,$out,$len,$key,$ivec,$enc)=map("%i$_",(0..5));
493 my ($ileft,$iright,$ooff,$omask,$ivoff)=map("%l$_",(1..7));
497 _cmll128_load_enckey:
501 for ($i=2; $i<26;$i++) { # load key schedule
503 ldd [$key + `8*$i`], %f`12+2*$i`
509 .type _cmll128_load_enckey,#function
510 .size _cmll128_load_enckey,.-_cmll128_load_enckey
511 _cmll256_load_enckey=_cmll128_load_enckey
514 _cmll256_load_deckey:
515 ldd [$key + 64], %f62
516 ldd [$key + 72], %f60
519 _cmll128_load_deckey:
524 for ($i=2; $i<24;$i++) { # load key schedule
526 ldd [$key + `8*$i`], %f`62-2*$i`
530 ldx [$key + 192], %g4
532 ldx [$key + 200], %g5
533 .type _cmll256_load_deckey,#function
534 .size _cmll256_load_deckey,.-_cmll256_load_deckey
539 for ($i=0; $i<3; $i++) {
541 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2
542 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0
543 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2
544 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0
546 $code.=<<___ if ($i<2);
547 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2
548 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0
549 camellia_fl %f`16+16*$i+12`, %f0, %f0
550 camellia_fli %f`16+16*$i+14`, %f2, %f2
554 camellia_f %f56, %f2, %f0, %f4
555 camellia_f %f58, %f0, %f4, %f2
559 .type _cmll128_encrypt_1x,#function
560 .size _cmll128_encrypt_1x,.-_cmll128_encrypt_1x
561 _cmll128_decrypt_1x=_cmll128_encrypt_1x
566 for ($i=0; $i<3; $i++) {
568 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2
569 camellia_f %f`16+16*$i+0`, %f6, %f4, %f6
570 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0
571 camellia_f %f`16+16*$i+2`, %f4, %f6, %f4
572 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2
573 camellia_f %f`16+16*$i+4`, %f6, %f4, %f6
574 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0
575 camellia_f %f`16+16*$i+6`, %f4, %f6, %f4
577 $code.=<<___ if ($i<2);
578 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2
579 camellia_f %f`16+16*$i+8`, %f6, %f4, %f6
580 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0
581 camellia_f %f`16+16*$i+10`, %f4, %f6, %f4
582 camellia_fl %f`16+16*$i+12`, %f0, %f0
583 camellia_fl %f`16+16*$i+12`, %f4, %f4
584 camellia_fli %f`16+16*$i+14`, %f2, %f2
585 camellia_fli %f`16+16*$i+14`, %f6, %f6
589 camellia_f %f56, %f2, %f0, %f8
590 camellia_f %f56, %f6, %f4, %f10
591 camellia_f %f58, %f0, %f8, %f2
592 camellia_f %f58, %f4, %f10, %f6
598 .type _cmll128_encrypt_2x,#function
599 .size _cmll128_encrypt_2x,.-_cmll128_encrypt_2x
600 _cmll128_decrypt_2x=_cmll128_encrypt_2x
604 camellia_f %f16, %f2, %f0, %f2
605 camellia_f %f18, %f0, %f2, %f0
606 ldd [$key + 208], %f16
607 ldd [$key + 216], %f18
608 camellia_f %f20, %f2, %f0, %f2
609 camellia_f %f22, %f0, %f2, %f0
610 ldd [$key + 224], %f20
611 ldd [$key + 232], %f22
612 camellia_f %f24, %f2, %f0, %f2
613 camellia_f %f26, %f0, %f2, %f0
614 ldd [$key + 240], %f24
615 ldd [$key + 248], %f26
616 camellia_fl %f28, %f0, %f0
617 camellia_fli %f30, %f2, %f2
618 ldd [$key + 256], %f28
619 ldd [$key + 264], %f30
621 for ($i=1; $i<3; $i++) {
623 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2
624 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0
625 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2
626 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0
627 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2
628 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0
629 camellia_fl %f`16+16*$i+12`, %f0, %f0
630 camellia_fli %f`16+16*$i+14`, %f2, %f2
634 camellia_f %f16, %f2, %f0, %f2
635 camellia_f %f18, %f0, %f2, %f0
636 ldd [$key + 16], %f16
637 ldd [$key + 24], %f18
638 camellia_f %f20, %f2, %f0, %f2
639 camellia_f %f22, %f0, %f2, %f0
640 ldd [$key + 32], %f20
641 ldd [$key + 40], %f22
642 camellia_f %f24, %f2, %f0, %f4
643 camellia_f %f26, %f0, %f4, %f2
644 ldd [$key + 48], %f24
645 ldd [$key + 56], %f26
648 ldd [$key + 64], %f28
650 ldd [$key + 72], %f30
651 .type _cmll256_encrypt_1x,#function
652 .size _cmll256_encrypt_1x,.-_cmll256_encrypt_1x
656 camellia_f %f16, %f2, %f0, %f2
657 camellia_f %f16, %f6, %f4, %f6
658 camellia_f %f18, %f0, %f2, %f0
659 camellia_f %f18, %f4, %f6, %f4
660 ldd [$key + 208], %f16
661 ldd [$key + 216], %f18
662 camellia_f %f20, %f2, %f0, %f2
663 camellia_f %f20, %f6, %f4, %f6
664 camellia_f %f22, %f0, %f2, %f0
665 camellia_f %f22, %f4, %f6, %f4
666 ldd [$key + 224], %f20
667 ldd [$key + 232], %f22
668 camellia_f %f24, %f2, %f0, %f2
669 camellia_f %f24, %f6, %f4, %f6
670 camellia_f %f26, %f0, %f2, %f0
671 camellia_f %f26, %f4, %f6, %f4
672 ldd [$key + 240], %f24
673 ldd [$key + 248], %f26
674 camellia_fl %f28, %f0, %f0
675 camellia_fl %f28, %f4, %f4
676 camellia_fli %f30, %f2, %f2
677 camellia_fli %f30, %f6, %f6
678 ldd [$key + 256], %f28
679 ldd [$key + 264], %f30
681 for ($i=1; $i<3; $i++) {
683 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2
684 camellia_f %f`16+16*$i+0`, %f6, %f4, %f6
685 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0
686 camellia_f %f`16+16*$i+2`, %f4, %f6, %f4
687 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2
688 camellia_f %f`16+16*$i+4`, %f6, %f4, %f6
689 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0
690 camellia_f %f`16+16*$i+6`, %f4, %f6, %f4
691 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2
692 camellia_f %f`16+16*$i+8`, %f6, %f4, %f6
693 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0
694 camellia_f %f`16+16*$i+10`, %f4, %f6, %f4
695 camellia_fl %f`16+16*$i+12`, %f0, %f0
696 camellia_fl %f`16+16*$i+12`, %f4, %f4
697 camellia_fli %f`16+16*$i+14`, %f2, %f2
698 camellia_fli %f`16+16*$i+14`, %f6, %f6
702 camellia_f %f16, %f2, %f0, %f2
703 camellia_f %f16, %f6, %f4, %f6
704 camellia_f %f18, %f0, %f2, %f0
705 camellia_f %f18, %f4, %f6, %f4
706 ldd [$key + 16], %f16
707 ldd [$key + 24], %f18
708 camellia_f %f20, %f2, %f0, %f2
709 camellia_f %f20, %f6, %f4, %f6
710 camellia_f %f22, %f0, %f2, %f0
711 camellia_f %f22, %f4, %f6, %f4
712 ldd [$key + 32], %f20
713 ldd [$key + 40], %f22
714 camellia_f %f24, %f2, %f0, %f8
715 camellia_f %f24, %f6, %f4, %f10
716 camellia_f %f26, %f0, %f8, %f2
717 camellia_f %f26, %f4, %f10, %f6
718 ldd [$key + 48], %f24
719 ldd [$key + 56], %f26
724 ldd [$key + 64], %f28
726 ldd [$key + 72], %f30
727 .type _cmll256_encrypt_2x,#function
728 .size _cmll256_encrypt_2x,.-_cmll256_encrypt_2x
732 camellia_f %f16, %f2, %f0, %f2
733 camellia_f %f18, %f0, %f2, %f0
735 ldd [$key - 16], %f18
736 camellia_f %f20, %f2, %f0, %f2
737 camellia_f %f22, %f0, %f2, %f0
738 ldd [$key - 24], %f20
739 ldd [$key - 32], %f22
740 camellia_f %f24, %f2, %f0, %f2
741 camellia_f %f26, %f0, %f2, %f0
742 ldd [$key - 40], %f24
743 ldd [$key - 48], %f26
744 camellia_fl %f28, %f0, %f0
745 camellia_fli %f30, %f2, %f2
746 ldd [$key - 56], %f28
747 ldd [$key - 64], %f30
749 for ($i=1; $i<3; $i++) {
751 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2
752 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0
753 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2
754 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0
755 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2
756 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0
757 camellia_fl %f`16+16*$i+12`, %f0, %f0
758 camellia_fli %f`16+16*$i+14`, %f2, %f2
762 camellia_f %f16, %f2, %f0, %f2
763 camellia_f %f18, %f0, %f2, %f0
764 ldd [$key + 184], %f16
765 ldd [$key + 176], %f18
766 camellia_f %f20, %f2, %f0, %f2
767 camellia_f %f22, %f0, %f2, %f0
768 ldd [$key + 168], %f20
769 ldd [$key + 160], %f22
770 camellia_f %f24, %f2, %f0, %f4
771 camellia_f %f26, %f0, %f4, %f2
772 ldd [$key + 152], %f24
773 ldd [$key + 144], %f26
776 ldd [$key + 136], %f28
778 ldd [$key + 128], %f30
779 .type _cmll256_decrypt_1x,#function
780 .size _cmll256_decrypt_1x,.-_cmll256_decrypt_1x
784 camellia_f %f16, %f2, %f0, %f2
785 camellia_f %f16, %f6, %f4, %f6
786 camellia_f %f18, %f0, %f2, %f0
787 camellia_f %f18, %f4, %f6, %f4
789 ldd [$key - 16], %f18
790 camellia_f %f20, %f2, %f0, %f2
791 camellia_f %f20, %f6, %f4, %f6
792 camellia_f %f22, %f0, %f2, %f0
793 camellia_f %f22, %f4, %f6, %f4
794 ldd [$key - 24], %f20
795 ldd [$key - 32], %f22
796 camellia_f %f24, %f2, %f0, %f2
797 camellia_f %f24, %f6, %f4, %f6
798 camellia_f %f26, %f0, %f2, %f0
799 camellia_f %f26, %f4, %f6, %f4
800 ldd [$key - 40], %f24
801 ldd [$key - 48], %f26
802 camellia_fl %f28, %f0, %f0
803 camellia_fl %f28, %f4, %f4
804 camellia_fli %f30, %f2, %f2
805 camellia_fli %f30, %f6, %f6
806 ldd [$key - 56], %f28
807 ldd [$key - 64], %f30
809 for ($i=1; $i<3; $i++) {
811 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2
812 camellia_f %f`16+16*$i+0`, %f6, %f4, %f6
813 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0
814 camellia_f %f`16+16*$i+2`, %f4, %f6, %f4
815 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2
816 camellia_f %f`16+16*$i+4`, %f6, %f4, %f6
817 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0
818 camellia_f %f`16+16*$i+6`, %f4, %f6, %f4
819 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2
820 camellia_f %f`16+16*$i+8`, %f6, %f4, %f6
821 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0
822 camellia_f %f`16+16*$i+10`, %f4, %f6, %f4
823 camellia_fl %f`16+16*$i+12`, %f0, %f0
824 camellia_fl %f`16+16*$i+12`, %f4, %f4
825 camellia_fli %f`16+16*$i+14`, %f2, %f2
826 camellia_fli %f`16+16*$i+14`, %f6, %f6
830 camellia_f %f16, %f2, %f0, %f2
831 camellia_f %f16, %f6, %f4, %f6
832 camellia_f %f18, %f0, %f2, %f0
833 camellia_f %f18, %f4, %f6, %f4
834 ldd [$key + 184], %f16
835 ldd [$key + 176], %f18
836 camellia_f %f20, %f2, %f0, %f2
837 camellia_f %f20, %f6, %f4, %f6
838 camellia_f %f22, %f0, %f2, %f0
839 camellia_f %f22, %f4, %f6, %f4
840 ldd [$key + 168], %f20
841 ldd [$key + 160], %f22
842 camellia_f %f24, %f2, %f0, %f8
843 camellia_f %f24, %f6, %f4, %f10
844 camellia_f %f26, %f0, %f8, %f2
845 camellia_f %f26, %f4, %f10, %f6
846 ldd [$key + 152], %f24
847 ldd [$key + 144], %f26
852 ldd [$key + 136], %f28
854 ldd [$key + 128], %f30
855 .type _cmll256_decrypt_2x,#function
856 .size _cmll256_decrypt_2x,.-_cmll256_decrypt_2x
859 &alg_cbc_encrypt_implement("cmll",128);
860 &alg_cbc_encrypt_implement("cmll",256);
862 &alg_cbc_decrypt_implement("cmll",128);
863 &alg_cbc_decrypt_implement("cmll",256);
866 &alg_ctr32_implement("cmll",128);
867 &alg_ctr32_implement("cmll",256);
873 .global Camellia_encrypt
874 Camellia_encrypt=cmll_t4_encrypt
875 .global Camellia_decrypt
876 Camellia_decrypt=cmll_t4_decrypt
877 .global Camellia_set_key
880 andcc %o2, 7, %g0 ! double-check alignment
887 andncc %o1, 0x1c0, %g0
897 .type Camellia_set_key,#function
898 .size Camellia_set_key,.-Camellia_set_key
901 my ($inp,$out,$len,$key,$ivec,$enc)=map("%o$_",(0..5));
904 .globl Camellia_cbc_encrypt
906 Camellia_cbc_encrypt:
909 brz $enc, .Lcbc_decrypt
912 be,pt %icc, cmll128_t4_cbc_encrypt
914 ba cmll256_t4_cbc_encrypt
918 be,pt %icc, cmll128_t4_cbc_decrypt
920 ba cmll256_t4_cbc_decrypt
922 .type Camellia_cbc_encrypt,#function
923 .size Camellia_cbc_encrypt,.-Camellia_cbc_encrypt