3 # ====================================================================
4 # Written by David S. Miller <davem@devemloft.net> and Andy Polyakov
5 # <appro@openssl.org>. The module is licensed under 2-clause BSD
6 # license. March 2013. All rights reserved.
7 # ====================================================================
9 ######################################################################
12 # As with other hardware-assisted ciphers CBC encrypt results [for
13 # aligned data] are virtually identical to critical path lengths:
16 # CBC encrypt 4.14/4.15(*) 11.7/11.7
17 # CBC decrypt 1.77/4.11(**) 6.42/7.47
19 # (*) numbers after slash are for
21 # (**) this is result for largest
22 # block size, unlike all other
23 # cases smaller blocks results
26 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
27 push(@INC,"${dir}","${dir}../../perlasm");
28 require "sparcv9_modes.pl";
31 open STDOUT,">$output";
34 #include "sparc_arch.h"
37 .register %g2,#scratch
38 .register %g3,#scratch
44 { my ($inp,$out)=("%o0","%o1");
48 .globl des_t4_key_expand
49 .type des_t4_key_expand,#function
52 alignaddr $inp, %g0, $inp
54 ldd [$inp + 0x00], %f0
55 ldd [$inp + 0x08], %f2
56 faligndata %f0, %f2, %f0
57 1: des_kexpand %f0, 0, %f0
58 des_kexpand %f0, 1, %f2
59 std %f0, [$out + 0x00]
60 des_kexpand %f2, 3, %f6
61 std %f2, [$out + 0x08]
62 des_kexpand %f2, 2, %f4
63 des_kexpand %f6, 3, %f10
64 std %f6, [$out + 0x18]
65 des_kexpand %f6, 2, %f8
66 std %f4, [$out + 0x10]
67 des_kexpand %f10, 3, %f14
68 std %f10, [$out + 0x28]
69 des_kexpand %f10, 2, %f12
70 std %f8, [$out + 0x20]
71 des_kexpand %f14, 1, %f16
72 std %f14, [$out + 0x38]
73 des_kexpand %f16, 3, %f20
74 std %f12, [$out + 0x30]
75 des_kexpand %f16, 2, %f18
76 std %f16, [$out + 0x40]
77 des_kexpand %f20, 3, %f24
78 std %f20, [$out + 0x50]
79 des_kexpand %f20, 2, %f22
80 std %f18, [$out + 0x48]
81 des_kexpand %f24, 3, %f28
82 std %f24, [$out + 0x60]
83 des_kexpand %f24, 2, %f26
84 std %f22, [$out + 0x58]
85 des_kexpand %f28, 1, %f30
86 std %f28, [$out + 0x70]
87 std %f26, [$out + 0x68]
89 std %f30, [$out + 0x78]
90 .size des_t4_key_expand,.-des_t4_key_expand
93 { my ($inp,$out,$len,$key,$ivec) = map("%o$_",(0..4));
94 my ($ileft,$iright,$omask) = map("%g$_",(1..3));
97 .globl des_t4_cbc_encrypt
101 be,pn $::size_t_cc, .Lcbc_abort
103 ld [$ivec + 0], %f0 ! load ivec
108 sll $ileft, 3, $ileft
111 prefetch [$inp + 63], 20
112 sub %g0, $ileft, $iright
114 alignaddrl $out, %g0, $out
115 srl $omask, %g4, $omask
120 ldd [$key + 0x00], %f4 ! load key schedule
121 ldd [$key + 0x08], %f6
122 ldd [$key + 0x10], %f8
123 ldd [$key + 0x18], %f10
124 ldd [$key + 0x20], %f12
125 ldd [$key + 0x28], %f14
126 ldd [$key + 0x30], %f16
127 ldd [$key + 0x38], %f18
128 ldd [$key + 0x40], %f20
129 ldd [$key + 0x48], %f22
130 ldd [$key + 0x50], %f24
131 ldd [$key + 0x58], %f26
132 ldd [$key + 0x60], %f28
133 ldd [$key + 0x68], %f30
134 ldd [$key + 0x70], %f32
135 ldd [$key + 0x78], %f34
143 sllx %g4, $ileft, %g4
144 srlx %g5, $iright, %g5
148 prefetch [$inp + 8+63], 20
150 fxor %f2, %f0, %f0 ! ^= ivec
151 prefetch [$out + 63], 22
154 des_round %f4, %f6, %f0, %f0
155 des_round %f8, %f10, %f0, %f0
156 des_round %f12, %f14, %f0, %f0
157 des_round %f16, %f18, %f0, %f0
158 des_round %f20, %f22, %f0, %f0
159 des_round %f24, %f26, %f0, %f0
160 des_round %f28, %f30, %f0, %f0
161 des_round %f32, %f34, %f0, %f0
168 brnz,pt $len, .Ldes_cbc_enc_loop
171 st %f0, [$ivec + 0] ! write out ivec
179 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
180 ! and ~4x deterioration
182 faligndata %f0, %f0, %f2 ! handle unaligned output
184 stda %f2, [$out + $omask]0xc0 ! partial store
186 orn %g0, $omask, $omask
187 stda %f2, [$out + $omask]0xc0 ! partial store
189 brnz,pt $len, .Ldes_cbc_enc_loop+4
190 orn %g0, $omask, $omask
192 st %f0, [$ivec + 0] ! write out ivec
195 .type des_t4_cbc_encrypt,#function
196 .size des_t4_cbc_encrypt,.-des_t4_cbc_encrypt
198 .globl des_t4_cbc_decrypt
202 be,pn $::size_t_cc, .Lcbc_abort
204 ld [$ivec + 0], %f2 ! load ivec
209 sll $ileft, 3, $ileft
212 prefetch [$inp + 63], 20
213 sub %g0, $ileft, $iright
215 alignaddrl $out, %g0, $out
216 srl $omask, %g4, $omask
221 ldd [$key + 0x78], %f4 ! load key schedule
222 ldd [$key + 0x70], %f6
223 ldd [$key + 0x68], %f8
224 ldd [$key + 0x60], %f10
225 ldd [$key + 0x58], %f12
226 ldd [$key + 0x50], %f14
227 ldd [$key + 0x48], %f16
228 ldd [$key + 0x40], %f18
229 ldd [$key + 0x38], %f20
230 ldd [$key + 0x30], %f22
231 ldd [$key + 0x28], %f24
232 ldd [$key + 0x20], %f26
233 ldd [$key + 0x18], %f28
234 ldd [$key + 0x10], %f30
235 ldd [$key + 0x08], %f32
236 ldd [$key + 0x00], %f34
244 sllx %g4, $ileft, %g4
245 srlx %g5, $iright, %g5
249 prefetch [$inp + 8+63], 20
251 prefetch [$out + 63], 22
254 des_round %f4, %f6, %f0, %f0
255 des_round %f8, %f10, %f0, %f0
256 des_round %f12, %f14, %f0, %f0
257 des_round %f16, %f18, %f0, %f0
258 des_round %f20, %f22, %f0, %f0
259 des_round %f24, %f26, %f0, %f0
260 des_round %f28, %f30, %f0, %f0
261 des_round %f32, %f34, %f0, %f0
264 fxor %f2, %f0, %f0 ! ^= ivec
271 brnz,pt $len, .Ldes_cbc_dec_loop
274 st %f2, [$ivec + 0] ! write out ivec
279 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
280 ! and ~4x deterioration
282 faligndata %f0, %f0, %f0 ! handle unaligned output
284 stda %f0, [$out + $omask]0xc0 ! partial store
286 orn %g0, $omask, $omask
287 stda %f0, [$out + $omask]0xc0 ! partial store
289 brnz,pt $len, .Ldes_cbc_dec_loop+4
290 orn %g0, $omask, $omask
292 st %f2, [$ivec + 0] ! write out ivec
295 .type des_t4_cbc_decrypt,#function
296 .size des_t4_cbc_decrypt,.-des_t4_cbc_decrypt
299 # One might wonder why does one have back-to-back des_iip/des_ip
300 # pairs between EDE passes. Indeed, aren't they inverse of each other?
301 # They almost are. Outcome of the pair is 32-bit words being swapped
302 # in target register. Consider pair of des_iip/des_ip as a way to
303 # perform the due swap, it's actually fastest way in this case.
306 .globl des_t4_ede3_cbc_encrypt
308 des_t4_ede3_cbc_encrypt:
310 be,pn $::size_t_cc, .Lcbc_abort
312 ld [$ivec + 0], %f0 ! load ivec
317 sll $ileft, 3, $ileft
320 prefetch [$inp + 63], 20
321 sub %g0, $ileft, $iright
323 alignaddrl $out, %g0, $out
324 srl $omask, %g4, $omask
329 ldd [$key + 0x00], %f4 ! load key schedule
330 ldd [$key + 0x08], %f6
331 ldd [$key + 0x10], %f8
332 ldd [$key + 0x18], %f10
333 ldd [$key + 0x20], %f12
334 ldd [$key + 0x28], %f14
335 ldd [$key + 0x30], %f16
336 ldd [$key + 0x38], %f18
337 ldd [$key + 0x40], %f20
338 ldd [$key + 0x48], %f22
339 ldd [$key + 0x50], %f24
340 ldd [$key + 0x58], %f26
341 ldd [$key + 0x60], %f28
342 ldd [$key + 0x68], %f30
343 ldd [$key + 0x70], %f32
344 ldd [$key + 0x78], %f34
346 .Ldes_ede3_cbc_enc_loop:
352 sllx %g4, $ileft, %g4
353 srlx %g5, $iright, %g5
357 prefetch [$inp + 8+63], 20
359 fxor %f2, %f0, %f0 ! ^= ivec
360 prefetch [$out + 63], 22
363 des_round %f4, %f6, %f0, %f0
364 des_round %f8, %f10, %f0, %f0
365 des_round %f12, %f14, %f0, %f0
366 des_round %f16, %f18, %f0, %f0
367 ldd [$key + 0x100-0x08], %f36
368 ldd [$key + 0x100-0x10], %f38
369 des_round %f20, %f22, %f0, %f0
370 ldd [$key + 0x100-0x18], %f40
371 ldd [$key + 0x100-0x20], %f42
372 des_round %f24, %f26, %f0, %f0
373 ldd [$key + 0x100-0x28], %f44
374 ldd [$key + 0x100-0x30], %f46
375 des_round %f28, %f30, %f0, %f0
376 ldd [$key + 0x100-0x38], %f48
377 ldd [$key + 0x100-0x40], %f50
378 des_round %f32, %f34, %f0, %f0
379 ldd [$key + 0x100-0x48], %f52
380 ldd [$key + 0x100-0x50], %f54
383 ldd [$key + 0x100-0x58], %f56
384 ldd [$key + 0x100-0x60], %f58
386 ldd [$key + 0x100-0x68], %f60
387 ldd [$key + 0x100-0x70], %f62
388 des_round %f36, %f38, %f0, %f0
389 ldd [$key + 0x100-0x78], %f36
390 ldd [$key + 0x100-0x80], %f38
391 des_round %f40, %f42, %f0, %f0
392 des_round %f44, %f46, %f0, %f0
393 des_round %f48, %f50, %f0, %f0
394 ldd [$key + 0x100+0x00], %f40
395 ldd [$key + 0x100+0x08], %f42
396 des_round %f52, %f54, %f0, %f0
397 ldd [$key + 0x100+0x10], %f44
398 ldd [$key + 0x100+0x18], %f46
399 des_round %f56, %f58, %f0, %f0
400 ldd [$key + 0x100+0x20], %f48
401 ldd [$key + 0x100+0x28], %f50
402 des_round %f60, %f62, %f0, %f0
403 ldd [$key + 0x100+0x30], %f52
404 ldd [$key + 0x100+0x38], %f54
405 des_round %f36, %f38, %f0, %f0
406 ldd [$key + 0x100+0x40], %f56
407 ldd [$key + 0x100+0x48], %f58
410 ldd [$key + 0x100+0x50], %f60
411 ldd [$key + 0x100+0x58], %f62
413 ldd [$key + 0x100+0x60], %f36
414 ldd [$key + 0x100+0x68], %f38
415 des_round %f40, %f42, %f0, %f0
416 ldd [$key + 0x100+0x70], %f40
417 ldd [$key + 0x100+0x78], %f42
418 des_round %f44, %f46, %f0, %f0
419 des_round %f48, %f50, %f0, %f0
420 des_round %f52, %f54, %f0, %f0
421 des_round %f56, %f58, %f0, %f0
422 des_round %f60, %f62, %f0, %f0
423 des_round %f36, %f38, %f0, %f0
424 des_round %f40, %f42, %f0, %f0
431 brnz,pt $len, .Ldes_ede3_cbc_enc_loop
434 st %f0, [$ivec + 0] ! write out ivec
439 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
440 ! and ~2x deterioration
442 faligndata %f0, %f0, %f2 ! handle unaligned output
444 stda %f2, [$out + $omask]0xc0 ! partial store
446 orn %g0, $omask, $omask
447 stda %f2, [$out + $omask]0xc0 ! partial store
449 brnz,pt $len, .Ldes_ede3_cbc_enc_loop+4
450 orn %g0, $omask, $omask
452 st %f0, [$ivec + 0] ! write out ivec
455 .type des_t4_ede3_cbc_encrypt,#function
456 .size des_t4_ede3_cbc_encrypt,.-des_t4_ede3_cbc_encrypt
458 .globl des_t4_ede3_cbc_decrypt
460 des_t4_ede3_cbc_decrypt:
462 be,pn $::size_t_cc, .Lcbc_abort
464 ld [$ivec + 0], %f2 ! load ivec
469 sll $ileft, 3, $ileft
472 prefetch [$inp + 63], 20
473 sub %g0, $ileft, $iright
475 alignaddrl $out, %g0, $out
476 srl $omask, %g4, $omask
481 ldd [$key + 0x100+0x78], %f4 ! load key schedule
482 ldd [$key + 0x100+0x70], %f6
483 ldd [$key + 0x100+0x68], %f8
484 ldd [$key + 0x100+0x60], %f10
485 ldd [$key + 0x100+0x58], %f12
486 ldd [$key + 0x100+0x50], %f14
487 ldd [$key + 0x100+0x48], %f16
488 ldd [$key + 0x100+0x40], %f18
489 ldd [$key + 0x100+0x38], %f20
490 ldd [$key + 0x100+0x30], %f22
491 ldd [$key + 0x100+0x28], %f24
492 ldd [$key + 0x100+0x20], %f26
493 ldd [$key + 0x100+0x18], %f28
494 ldd [$key + 0x100+0x10], %f30
495 ldd [$key + 0x100+0x08], %f32
496 ldd [$key + 0x100+0x00], %f34
498 .Ldes_ede3_cbc_dec_loop:
504 sllx %g4, $ileft, %g4
505 srlx %g5, $iright, %g5
509 prefetch [$inp + 8+63], 20
511 prefetch [$out + 63], 22
514 des_round %f4, %f6, %f0, %f0
515 des_round %f8, %f10, %f0, %f0
516 des_round %f12, %f14, %f0, %f0
517 des_round %f16, %f18, %f0, %f0
518 ldd [$key + 0x80+0x00], %f36
519 ldd [$key + 0x80+0x08], %f38
520 des_round %f20, %f22, %f0, %f0
521 ldd [$key + 0x80+0x10], %f40
522 ldd [$key + 0x80+0x18], %f42
523 des_round %f24, %f26, %f0, %f0
524 ldd [$key + 0x80+0x20], %f44
525 ldd [$key + 0x80+0x28], %f46
526 des_round %f28, %f30, %f0, %f0
527 ldd [$key + 0x80+0x30], %f48
528 ldd [$key + 0x80+0x38], %f50
529 des_round %f32, %f34, %f0, %f0
530 ldd [$key + 0x80+0x40], %f52
531 ldd [$key + 0x80+0x48], %f54
534 ldd [$key + 0x80+0x50], %f56
535 ldd [$key + 0x80+0x58], %f58
537 ldd [$key + 0x80+0x60], %f60
538 ldd [$key + 0x80+0x68], %f62
539 des_round %f36, %f38, %f0, %f0
540 ldd [$key + 0x80+0x70], %f36
541 ldd [$key + 0x80+0x78], %f38
542 des_round %f40, %f42, %f0, %f0
543 des_round %f44, %f46, %f0, %f0
544 des_round %f48, %f50, %f0, %f0
545 ldd [$key + 0x80-0x08], %f40
546 ldd [$key + 0x80-0x10], %f42
547 des_round %f52, %f54, %f0, %f0
548 ldd [$key + 0x80-0x18], %f44
549 ldd [$key + 0x80-0x20], %f46
550 des_round %f56, %f58, %f0, %f0
551 ldd [$key + 0x80-0x28], %f48
552 ldd [$key + 0x80-0x30], %f50
553 des_round %f60, %f62, %f0, %f0
554 ldd [$key + 0x80-0x38], %f52
555 ldd [$key + 0x80-0x40], %f54
556 des_round %f36, %f38, %f0, %f0
557 ldd [$key + 0x80-0x48], %f56
558 ldd [$key + 0x80-0x50], %f58
561 ldd [$key + 0x80-0x58], %f60
562 ldd [$key + 0x80-0x60], %f62
564 ldd [$key + 0x80-0x68], %f36
565 ldd [$key + 0x80-0x70], %f38
566 des_round %f40, %f42, %f0, %f0
567 ldd [$key + 0x80-0x78], %f40
568 ldd [$key + 0x80-0x80], %f42
569 des_round %f44, %f46, %f0, %f0
570 des_round %f48, %f50, %f0, %f0
571 des_round %f52, %f54, %f0, %f0
572 des_round %f56, %f58, %f0, %f0
573 des_round %f60, %f62, %f0, %f0
574 des_round %f36, %f38, %f0, %f0
575 des_round %f40, %f42, %f0, %f0
578 fxor %f2, %f0, %f0 ! ^= ivec
585 brnz,pt $len, .Ldes_ede3_cbc_dec_loop
588 st %f2, [$ivec + 0] ! write out ivec
593 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
594 ! and ~3x deterioration
596 faligndata %f0, %f0, %f0 ! handle unaligned output
598 stda %f0, [$out + $omask]0xc0 ! partial store
600 orn %g0, $omask, $omask
601 stda %f0, [$out + $omask]0xc0 ! partial store
603 brnz,pt $len, .Ldes_ede3_cbc_dec_loop+4
604 orn %g0, $omask, $omask
606 st %f2, [$ivec + 0] ! write out ivec
609 .type des_t4_ede3_cbc_decrypt,#function
610 .size des_t4_ede3_cbc_decrypt,.-des_t4_ede3_cbc_decrypt
614 .asciz "DES for SPARC T4, David S. Miller, Andy Polyakov"