2 # Copyright 2012-2020 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the Apache License 2.0 (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
10 # ====================================================================
11 # Written by David S. Miller and Andy Polyakov.
12 # The module is licensed under 2-clause BSD
13 # license. October 2012. All rights reserved.
14 # ====================================================================
16 ######################################################################
17 # Camellia for SPARC T4.
19 # As with AES below results [for aligned data] are virtually identical
20 # to critical path lengths for 3-cycle instruction latency:
22 # 128-bit key 192/256-
23 # CBC encrypt 4.14/4.21(*) 5.46/5.52
24 # (*) numbers after slash are for
27 # As with Intel AES-NI, question is if it's possible to improve
28 # performance of parallelizable modes by interleaving round
29 # instructions. In Camellia every instruction is dependent on
30 # previous, which means that there is place for 2 additional ones
31 # in between two dependent. Can we expect 3x performance improvement?
32 # At least one can argue that it should be possible to break 2x
33 # barrier... For some reason not even 2x appears to be possible:
35 # 128-bit key 192/256-
36 # CBC decrypt 2.21/2.74 2.99/3.40
37 # CTR 2.15/2.68(*) 2.93/3.34
38 # (*) numbers after slash are for
41 # This is for 2x interleave. But compared to 1x interleave CBC decrypt
42 # improved by ... 0% for 128-bit key, and 11% for 192/256-bit one.
43 # So that out-of-order execution logic can take non-interleaved code
44 # to 1.87x, but can't take 2x interleaved one any further. There
45 # surely is some explanation... As result 3x interleave was not even
46 # attempted. Instead an effort was made to share specific modes
47 # implementations with AES module (therefore sparct4_modes.pl).
49 # To anchor to something else, software C implementation processes
50 # one byte in 38 cycles with 128-bit key on same processor.
52 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
53 push(@INC,"${dir}","${dir}../../perlasm");
54 require "sparcv9_modes.pl";
56 $output = pop and open STDOUT,">$output";
58 $::evp=1; # if $evp is set to 0, script generates module with
59 # Camellia_[en|de]crypt, Camellia_set_key and Camellia_cbc_encrypt
60 # entry points. These are fully compatible with openssl/camellia.h.
62 ######################################################################
63 # single-round subroutines
66 my ($inp,$out,$key,$rounds,$tmp,$mask)=map("%o$_",(0..5));
70 # define __ASSEMBLER__ 1
72 #include "crypto/sparc_arch.h"
76 .globl cmll_t4_encrypt
79 andcc $inp, 7, %g1 ! is input aligned?
98 ld [$key + 272], $rounds ! grandRounds, 3 or 4
100 ldd [$key + 24], %f14
103 ldd [$key + 32], %f16
104 ldd [$key + 40], %f18
107 ldd [$key + 48], %f20
108 ldd [$key + 56], %f22
109 sub $rounds, 1, $rounds
110 ldd [$key + 64], %f24
111 ldd [$key + 72], %f26
115 camellia_f %f12, %f2, %f0, %f2
117 sub $rounds,1,$rounds
118 camellia_f %f14, %f0, %f2, %f0
120 camellia_f %f16, %f2, %f0, %f2
121 ldd [$key + 16], %f16
122 camellia_f %f18, %f0, %f2, %f0
123 ldd [$key + 24], %f18
124 camellia_f %f20, %f2, %f0, %f2
125 ldd [$key + 32], %f20
126 camellia_f %f22, %f0, %f2, %f0
127 ldd [$key + 40], %f22
128 camellia_fl %f24, %f0, %f0
129 ldd [$key + 48], %f24
130 camellia_fli %f26, %f2, %f2
131 ldd [$key + 56], %f26
132 brnz,pt $rounds, .Lenc
135 andcc $out, 7, $tmp ! is output aligned?
136 camellia_f %f12, %f2, %f0, %f2
137 camellia_f %f14, %f0, %f2, %f0
138 camellia_f %f16, %f2, %f0, %f2
139 camellia_f %f18, %f0, %f2, %f0
140 camellia_f %f20, %f2, %f0, %f4
141 camellia_f %f22, %f0, %f4, %f2
152 2: alignaddrl $out, %g0, $out
154 srl $mask, $tmp, $mask
156 faligndata %f0, %f0, %f4
157 faligndata %f0, %f2, %f6
158 faligndata %f2, %f2, %f8
160 stda %f4, [$out + $mask]0xc0 ! partial store
163 orn %g0, $mask, $mask
165 stda %f8, [$out + $mask]0xc0 ! partial store
166 .type cmll_t4_encrypt,#function
167 .size cmll_t4_encrypt,.-cmll_t4_encrypt
169 .globl cmll_t4_decrypt
172 ld [$key + 272], $rounds ! grandRounds, 3 or 4
173 andcc $inp, 7, %g1 ! is input aligned?
176 sll $rounds, 6, $rounds
177 add $rounds, $key, $key
182 ldx [$inp + 16], $inp
195 ldd [$key - 16], %f14
198 ldd [$key - 24], %f16
199 ldd [$key - 32], %f18
202 ldd [$key - 40], %f20
203 ldd [$key - 48], %f22
204 sub $rounds, 64, $rounds
205 ldd [$key - 56], %f24
206 ldd [$key - 64], %f26
210 camellia_f %f12, %f2, %f0, %f2
212 sub $rounds, 64, $rounds
213 camellia_f %f14, %f0, %f2, %f0
214 ldd [$key - 16], %f14
215 camellia_f %f16, %f2, %f0, %f2
216 ldd [$key - 24], %f16
217 camellia_f %f18, %f0, %f2, %f0
218 ldd [$key - 32], %f18
219 camellia_f %f20, %f2, %f0, %f2
220 ldd [$key - 40], %f20
221 camellia_f %f22, %f0, %f2, %f0
222 ldd [$key - 48], %f22
223 camellia_fl %f24, %f0, %f0
224 ldd [$key - 56], %f24
225 camellia_fli %f26, %f2, %f2
226 ldd [$key - 64], %f26
227 brnz,pt $rounds, .Ldec
230 andcc $out, 7, $tmp ! is output aligned?
231 camellia_f %f12, %f2, %f0, %f2
232 camellia_f %f14, %f0, %f2, %f0
233 camellia_f %f16, %f2, %f0, %f2
234 camellia_f %f18, %f0, %f2, %f0
235 camellia_f %f20, %f2, %f0, %f4
236 camellia_f %f22, %f0, %f4, %f2
247 2: alignaddrl $out, %g0, $out
249 srl $mask, $tmp, $mask
251 faligndata %f0, %f0, %f4
252 faligndata %f0, %f2, %f6
253 faligndata %f2, %f2, %f8
255 stda %f4, [$out + $mask]0xc0 ! partial store
258 orn %g0, $mask, $mask
260 stda %f8, [$out + $mask]0xc0 ! partial store
261 .type cmll_t4_decrypt,#function
262 .size cmll_t4_decrypt,.-cmll_t4_decrypt
266 ######################################################################
267 # key setup subroutines
273 "srlx %o4, 64-$rot, %g4\n\t".
274 "sllx %o4, $rot, %o4\n\t".
275 "srlx %o5, 64-$rot, %g5\n\t".
276 "sllx %o5, $rot, %o5\n\t".
277 "or %o4, %g5, %o4\n\t".
281 my ($inp,$bits,$out,$tmp)=map("%o$_",(0..5));
283 .globl cmll_t4_set_key
287 alignaddr $inp, %g0, $inp
296 brz,pt $tmp, .L256aligned
300 faligndata %f0, %f2, %f0
301 faligndata %f2, %f4, %f2
302 faligndata %f4, %f6, %f4
304 faligndata %f6, %f8, %f6
308 brz,a,pt $tmp, .L256aligned
313 faligndata %f0, %f2, %f0
314 faligndata %f2, %f4, %f2
315 faligndata %f4, %f6, %f4
319 std %f0, [$out + 0] ! k[0, 1]
321 std %f2, [$out + 8] ! k[2, 3]
329 brz,pt $tmp, .L128aligned
334 faligndata %f0, %f2, %f0
335 faligndata %f2, %f4, %f2
338 std %f0, [$out + 0] ! k[0, 1]
340 std %f2, [$out + 8] ! k[2, 3]
346 add %o7, SIGMA-1b, %o4
354 camellia_f %f16, %f2, %f0, %f2
355 camellia_f %f18, %f0, %f2, %f0
358 camellia_f %f20, %f2, %f0, %f2
359 camellia_f %f22, %f0, %f2, %f0
361 bge,pn %icc, .L256key
363 std %f0, [$out + 0x10] ! k[ 4, 5]
364 std %f2, [$out + 0x18] ! k[ 6, 7]
369 stx %o4, [$out + 0x30] ! k[12, 13]
370 stx %o5, [$out + 0x38] ! k[14, 15]
372 stx %o4, [$out + 0x40] ! k[16, 17]
373 stx %o5, [$out + 0x48] ! k[18, 19]
375 stx %o4, [$out + 0x60] ! k[24, 25]
377 stx %o4, [$out + 0x70] ! k[28, 29]
378 stx %o5, [$out + 0x78] ! k[30, 31]
380 stx %o4, [$out + 0xa0] ! k[40, 41]
381 stx %o5, [$out + 0xa8] ! k[42, 43]
383 stx %o4, [$out + 0xc0] ! k[48, 49]
384 stx %o5, [$out + 0xc8] ! k[50, 51]
386 movdtox %f28, %o4 ! k[ 0, 1]
387 movdtox %f30, %o5 ! k[ 2, 3]
389 stx %o4, [$out + 0x20] ! k[ 8, 9]
390 stx %o5, [$out + 0x28] ! k[10, 11]
392 stx %o4, [$out + 0x50] ! k[20, 21]
393 stx %o5, [$out + 0x58] ! k[22, 23]
395 stx %o5, [$out + 0x68] ! k[26, 27]
397 stx %o4, [$out + 0x80] ! k[32, 33]
398 stx %o5, [$out + 0x88] ! k[34, 35]
400 stx %o4, [$out + 0x90] ! k[36, 37]
401 stx %o5, [$out + 0x98] ! k[38, 39]
403 stx %o4, [$out + 0xb0] ! k[44, 45]
404 stx %o5, [$out + 0xb8] ! k[46, 47]
407 st $tmp, [$out + 0x110]
416 std %f0, [$out + 0x30] ! k[12, 13]
417 std %f2, [$out + 0x38] ! k[14, 15]
421 camellia_f %f24, %f2, %f0, %f2
422 camellia_f %f26, %f0, %f2, %f0
424 std %f0, [$out + 0x10] ! k[ 4, 5]
425 std %f2, [$out + 0x18] ! k[ 6, 7]
430 stx %o4, [$out + 0x50] ! k[20, 21]
431 stx %o5, [$out + 0x58] ! k[22, 23]
433 stx %o4, [$out + 0xa0] ! k[40, 41]
434 stx %o5, [$out + 0xa8] ! k[42, 43]
436 stx %o4, [$out + 0x100] ! k[64, 65]
437 stx %o5, [$out + 0x108] ! k[66, 67]
439 movdtox %f4, %o4 ! k[ 8, 9]
440 movdtox %f6, %o5 ! k[10, 11]
442 stx %o4, [$out + 0x20] ! k[ 8, 9]
443 stx %o5, [$out + 0x28] ! k[10, 11]
445 stx %o4, [$out + 0x40] ! k[16, 17]
446 stx %o5, [$out + 0x48] ! k[18, 19]
448 stx %o4, [$out + 0x90] ! k[36, 37]
449 stx %o5, [$out + 0x98] ! k[38, 39]
451 stx %o4, [$out + 0xd0] ! k[52, 53]
452 stx %o5, [$out + 0xd8] ! k[54, 55]
453 ldx [$out + 0x30], %o4 ! k[12, 13]
454 ldx [$out + 0x38], %o5 ! k[14, 15]
456 stx %o4, [$out + 0x30] ! k[12, 13]
457 stx %o5, [$out + 0x38] ! k[14, 15]
459 stx %o4, [$out + 0x70] ! k[28, 29]
460 stx %o5, [$out + 0x78] ! k[30, 31]
463 st %o4, [$out + 0xc0] ! k[48]
464 st %g5, [$out + 0xc4] ! k[49]
465 st %o5, [$out + 0xc8] ! k[50]
466 st %g4, [$out + 0xcc] ! k[51]
468 stx %o4, [$out + 0xe0] ! k[56, 57]
469 stx %o5, [$out + 0xe8] ! k[58, 59]
471 movdtox %f28, %o4 ! k[ 0, 1]
472 movdtox %f30, %o5 ! k[ 2, 3]
474 stx %o4, [$out + 0x60] ! k[24, 25]
475 stx %o5, [$out + 0x68] ! k[26, 27]
477 stx %o4, [$out + 0x80] ! k[32, 33]
478 stx %o5, [$out + 0x88] ! k[34, 35]
480 stx %o4, [$out + 0xb0] ! k[44, 45]
481 stx %o5, [$out + 0xb8] ! k[46, 47]
483 stx %o4, [$out + 0xf0] ! k[60, 61]
484 stx %o5, [$out + 0xf8] ! k[62, 63]
487 st $tmp, [$out + 0x110]
490 .type cmll_t4_set_key,#function
491 .size cmll_t4_set_key,.-cmll_t4_set_key
494 .long 0xa09e667f, 0x3bcc908b, 0xb67ae858, 0x4caa73b2
495 .long 0xc6ef372f, 0xe94f82be, 0x54ff53a5, 0xf1d36f1c
496 .long 0x10e527fa, 0xde682d1d, 0xb05688c2, 0xb3e6c1fd
499 .asciz "Camellia for SPARC T4, David S. Miller, Andy Polyakov"
504 my ($inp,$out,$len,$key,$ivec,$enc)=map("%i$_",(0..5));
505 my ($ileft,$iright,$ooff,$omask,$ivoff)=map("%l$_",(1..7));
509 _cmll128_load_enckey:
513 for ($i=2; $i<26;$i++) { # load key schedule
515 ldd [$key + `8*$i`], %f`12+2*$i`
521 .type _cmll128_load_enckey,#function
522 .size _cmll128_load_enckey,.-_cmll128_load_enckey
523 _cmll256_load_enckey=_cmll128_load_enckey
526 _cmll256_load_deckey:
527 ldd [$key + 64], %f62
528 ldd [$key + 72], %f60
531 _cmll128_load_deckey:
536 for ($i=2; $i<24;$i++) { # load key schedule
538 ldd [$key + `8*$i`], %f`62-2*$i`
542 ldx [$key + 192], %g4
544 ldx [$key + 200], %g5
545 .type _cmll256_load_deckey,#function
546 .size _cmll256_load_deckey,.-_cmll256_load_deckey
551 for ($i=0; $i<3; $i++) {
553 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2
554 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0
555 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2
556 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0
558 $code.=<<___ if ($i<2);
559 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2
560 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0
561 camellia_fl %f`16+16*$i+12`, %f0, %f0
562 camellia_fli %f`16+16*$i+14`, %f2, %f2
566 camellia_f %f56, %f2, %f0, %f4
567 camellia_f %f58, %f0, %f4, %f2
571 .type _cmll128_encrypt_1x,#function
572 .size _cmll128_encrypt_1x,.-_cmll128_encrypt_1x
573 _cmll128_decrypt_1x=_cmll128_encrypt_1x
578 for ($i=0; $i<3; $i++) {
580 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2
581 camellia_f %f`16+16*$i+0`, %f6, %f4, %f6
582 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0
583 camellia_f %f`16+16*$i+2`, %f4, %f6, %f4
584 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2
585 camellia_f %f`16+16*$i+4`, %f6, %f4, %f6
586 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0
587 camellia_f %f`16+16*$i+6`, %f4, %f6, %f4
589 $code.=<<___ if ($i<2);
590 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2
591 camellia_f %f`16+16*$i+8`, %f6, %f4, %f6
592 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0
593 camellia_f %f`16+16*$i+10`, %f4, %f6, %f4
594 camellia_fl %f`16+16*$i+12`, %f0, %f0
595 camellia_fl %f`16+16*$i+12`, %f4, %f4
596 camellia_fli %f`16+16*$i+14`, %f2, %f2
597 camellia_fli %f`16+16*$i+14`, %f6, %f6
601 camellia_f %f56, %f2, %f0, %f8
602 camellia_f %f56, %f6, %f4, %f10
603 camellia_f %f58, %f0, %f8, %f2
604 camellia_f %f58, %f4, %f10, %f6
610 .type _cmll128_encrypt_2x,#function
611 .size _cmll128_encrypt_2x,.-_cmll128_encrypt_2x
612 _cmll128_decrypt_2x=_cmll128_encrypt_2x
616 camellia_f %f16, %f2, %f0, %f2
617 camellia_f %f18, %f0, %f2, %f0
618 ldd [$key + 208], %f16
619 ldd [$key + 216], %f18
620 camellia_f %f20, %f2, %f0, %f2
621 camellia_f %f22, %f0, %f2, %f0
622 ldd [$key + 224], %f20
623 ldd [$key + 232], %f22
624 camellia_f %f24, %f2, %f0, %f2
625 camellia_f %f26, %f0, %f2, %f0
626 ldd [$key + 240], %f24
627 ldd [$key + 248], %f26
628 camellia_fl %f28, %f0, %f0
629 camellia_fli %f30, %f2, %f2
630 ldd [$key + 256], %f28
631 ldd [$key + 264], %f30
633 for ($i=1; $i<3; $i++) {
635 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2
636 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0
637 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2
638 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0
639 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2
640 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0
641 camellia_fl %f`16+16*$i+12`, %f0, %f0
642 camellia_fli %f`16+16*$i+14`, %f2, %f2
646 camellia_f %f16, %f2, %f0, %f2
647 camellia_f %f18, %f0, %f2, %f0
648 ldd [$key + 16], %f16
649 ldd [$key + 24], %f18
650 camellia_f %f20, %f2, %f0, %f2
651 camellia_f %f22, %f0, %f2, %f0
652 ldd [$key + 32], %f20
653 ldd [$key + 40], %f22
654 camellia_f %f24, %f2, %f0, %f4
655 camellia_f %f26, %f0, %f4, %f2
656 ldd [$key + 48], %f24
657 ldd [$key + 56], %f26
660 ldd [$key + 64], %f28
662 ldd [$key + 72], %f30
663 .type _cmll256_encrypt_1x,#function
664 .size _cmll256_encrypt_1x,.-_cmll256_encrypt_1x
668 camellia_f %f16, %f2, %f0, %f2
669 camellia_f %f16, %f6, %f4, %f6
670 camellia_f %f18, %f0, %f2, %f0
671 camellia_f %f18, %f4, %f6, %f4
672 ldd [$key + 208], %f16
673 ldd [$key + 216], %f18
674 camellia_f %f20, %f2, %f0, %f2
675 camellia_f %f20, %f6, %f4, %f6
676 camellia_f %f22, %f0, %f2, %f0
677 camellia_f %f22, %f4, %f6, %f4
678 ldd [$key + 224], %f20
679 ldd [$key + 232], %f22
680 camellia_f %f24, %f2, %f0, %f2
681 camellia_f %f24, %f6, %f4, %f6
682 camellia_f %f26, %f0, %f2, %f0
683 camellia_f %f26, %f4, %f6, %f4
684 ldd [$key + 240], %f24
685 ldd [$key + 248], %f26
686 camellia_fl %f28, %f0, %f0
687 camellia_fl %f28, %f4, %f4
688 camellia_fli %f30, %f2, %f2
689 camellia_fli %f30, %f6, %f6
690 ldd [$key + 256], %f28
691 ldd [$key + 264], %f30
693 for ($i=1; $i<3; $i++) {
695 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2
696 camellia_f %f`16+16*$i+0`, %f6, %f4, %f6
697 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0
698 camellia_f %f`16+16*$i+2`, %f4, %f6, %f4
699 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2
700 camellia_f %f`16+16*$i+4`, %f6, %f4, %f6
701 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0
702 camellia_f %f`16+16*$i+6`, %f4, %f6, %f4
703 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2
704 camellia_f %f`16+16*$i+8`, %f6, %f4, %f6
705 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0
706 camellia_f %f`16+16*$i+10`, %f4, %f6, %f4
707 camellia_fl %f`16+16*$i+12`, %f0, %f0
708 camellia_fl %f`16+16*$i+12`, %f4, %f4
709 camellia_fli %f`16+16*$i+14`, %f2, %f2
710 camellia_fli %f`16+16*$i+14`, %f6, %f6
714 camellia_f %f16, %f2, %f0, %f2
715 camellia_f %f16, %f6, %f4, %f6
716 camellia_f %f18, %f0, %f2, %f0
717 camellia_f %f18, %f4, %f6, %f4
718 ldd [$key + 16], %f16
719 ldd [$key + 24], %f18
720 camellia_f %f20, %f2, %f0, %f2
721 camellia_f %f20, %f6, %f4, %f6
722 camellia_f %f22, %f0, %f2, %f0
723 camellia_f %f22, %f4, %f6, %f4
724 ldd [$key + 32], %f20
725 ldd [$key + 40], %f22
726 camellia_f %f24, %f2, %f0, %f8
727 camellia_f %f24, %f6, %f4, %f10
728 camellia_f %f26, %f0, %f8, %f2
729 camellia_f %f26, %f4, %f10, %f6
730 ldd [$key + 48], %f24
731 ldd [$key + 56], %f26
736 ldd [$key + 64], %f28
738 ldd [$key + 72], %f30
739 .type _cmll256_encrypt_2x,#function
740 .size _cmll256_encrypt_2x,.-_cmll256_encrypt_2x
744 camellia_f %f16, %f2, %f0, %f2
745 camellia_f %f18, %f0, %f2, %f0
747 ldd [$key - 16], %f18
748 camellia_f %f20, %f2, %f0, %f2
749 camellia_f %f22, %f0, %f2, %f0
750 ldd [$key - 24], %f20
751 ldd [$key - 32], %f22
752 camellia_f %f24, %f2, %f0, %f2
753 camellia_f %f26, %f0, %f2, %f0
754 ldd [$key - 40], %f24
755 ldd [$key - 48], %f26
756 camellia_fl %f28, %f0, %f0
757 camellia_fli %f30, %f2, %f2
758 ldd [$key - 56], %f28
759 ldd [$key - 64], %f30
761 for ($i=1; $i<3; $i++) {
763 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2
764 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0
765 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2
766 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0
767 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2
768 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0
769 camellia_fl %f`16+16*$i+12`, %f0, %f0
770 camellia_fli %f`16+16*$i+14`, %f2, %f2
774 camellia_f %f16, %f2, %f0, %f2
775 camellia_f %f18, %f0, %f2, %f0
776 ldd [$key + 184], %f16
777 ldd [$key + 176], %f18
778 camellia_f %f20, %f2, %f0, %f2
779 camellia_f %f22, %f0, %f2, %f0
780 ldd [$key + 168], %f20
781 ldd [$key + 160], %f22
782 camellia_f %f24, %f2, %f0, %f4
783 camellia_f %f26, %f0, %f4, %f2
784 ldd [$key + 152], %f24
785 ldd [$key + 144], %f26
788 ldd [$key + 136], %f28
790 ldd [$key + 128], %f30
791 .type _cmll256_decrypt_1x,#function
792 .size _cmll256_decrypt_1x,.-_cmll256_decrypt_1x
796 camellia_f %f16, %f2, %f0, %f2
797 camellia_f %f16, %f6, %f4, %f6
798 camellia_f %f18, %f0, %f2, %f0
799 camellia_f %f18, %f4, %f6, %f4
801 ldd [$key - 16], %f18
802 camellia_f %f20, %f2, %f0, %f2
803 camellia_f %f20, %f6, %f4, %f6
804 camellia_f %f22, %f0, %f2, %f0
805 camellia_f %f22, %f4, %f6, %f4
806 ldd [$key - 24], %f20
807 ldd [$key - 32], %f22
808 camellia_f %f24, %f2, %f0, %f2
809 camellia_f %f24, %f6, %f4, %f6
810 camellia_f %f26, %f0, %f2, %f0
811 camellia_f %f26, %f4, %f6, %f4
812 ldd [$key - 40], %f24
813 ldd [$key - 48], %f26
814 camellia_fl %f28, %f0, %f0
815 camellia_fl %f28, %f4, %f4
816 camellia_fli %f30, %f2, %f2
817 camellia_fli %f30, %f6, %f6
818 ldd [$key - 56], %f28
819 ldd [$key - 64], %f30
821 for ($i=1; $i<3; $i++) {
823 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2
824 camellia_f %f`16+16*$i+0`, %f6, %f4, %f6
825 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0
826 camellia_f %f`16+16*$i+2`, %f4, %f6, %f4
827 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2
828 camellia_f %f`16+16*$i+4`, %f6, %f4, %f6
829 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0
830 camellia_f %f`16+16*$i+6`, %f4, %f6, %f4
831 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2
832 camellia_f %f`16+16*$i+8`, %f6, %f4, %f6
833 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0
834 camellia_f %f`16+16*$i+10`, %f4, %f6, %f4
835 camellia_fl %f`16+16*$i+12`, %f0, %f0
836 camellia_fl %f`16+16*$i+12`, %f4, %f4
837 camellia_fli %f`16+16*$i+14`, %f2, %f2
838 camellia_fli %f`16+16*$i+14`, %f6, %f6
842 camellia_f %f16, %f2, %f0, %f2
843 camellia_f %f16, %f6, %f4, %f6
844 camellia_f %f18, %f0, %f2, %f0
845 camellia_f %f18, %f4, %f6, %f4
846 ldd [$key + 184], %f16
847 ldd [$key + 176], %f18
848 camellia_f %f20, %f2, %f0, %f2
849 camellia_f %f20, %f6, %f4, %f6
850 camellia_f %f22, %f0, %f2, %f0
851 camellia_f %f22, %f4, %f6, %f4
852 ldd [$key + 168], %f20
853 ldd [$key + 160], %f22
854 camellia_f %f24, %f2, %f0, %f8
855 camellia_f %f24, %f6, %f4, %f10
856 camellia_f %f26, %f0, %f8, %f2
857 camellia_f %f26, %f4, %f10, %f6
858 ldd [$key + 152], %f24
859 ldd [$key + 144], %f26
864 ldd [$key + 136], %f28
866 ldd [$key + 128], %f30
867 .type _cmll256_decrypt_2x,#function
868 .size _cmll256_decrypt_2x,.-_cmll256_decrypt_2x
871 &alg_cbc_encrypt_implement("cmll",128);
872 &alg_cbc_encrypt_implement("cmll",256);
874 &alg_cbc_decrypt_implement("cmll",128);
875 &alg_cbc_decrypt_implement("cmll",256);
878 &alg_ctr32_implement("cmll",128);
879 &alg_ctr32_implement("cmll",256);
885 .global Camellia_encrypt
886 Camellia_encrypt=cmll_t4_encrypt
887 .global Camellia_decrypt
888 Camellia_decrypt=cmll_t4_decrypt
889 .global Camellia_set_key
892 andcc %o2, 7, %g0 ! double-check alignment
899 andncc %o1, 0x1c0, %g0
909 .type Camellia_set_key,#function
910 .size Camellia_set_key,.-Camellia_set_key
913 my ($inp,$out,$len,$key,$ivec,$enc)=map("%o$_",(0..5));
916 .globl Camellia_cbc_encrypt
918 Camellia_cbc_encrypt:
921 brz $enc, .Lcbc_decrypt
924 be,pt %icc, cmll128_t4_cbc_encrypt
926 ba cmll256_t4_cbc_encrypt
930 be,pt %icc, cmll128_t4_cbc_decrypt
932 ba cmll256_t4_cbc_decrypt
934 .type Camellia_cbc_encrypt,#function
935 .size Camellia_cbc_encrypt,.-Camellia_cbc_encrypt
941 close STDOUT or die "error closing STDOUT: $!";