2 ! des_enc.S (generated from des_enc.m4)
4 ! UltraSPARC assembler version of the LibDES/SSLeay/OpenSSL des_enc.c file.
6 ! Version 1.0. 32-bit version.
10 ! Version 2.0. 32/64-bit, PIC-ification, blended CPU adaptation
15 ! Assembler version: Copyright Svend Olaf Mikkelsen.
17 ! Original C code: Copyright Eric A. Young.
19 ! This code can be freely used by LibDES/SSLeay/OpenSSL users.
21 ! The LibDES/SSLeay/OpenSSL copyright notices must be respected.
23 ! This version can be redistributed.
25 ! To expand the m4 macros: m4 -B 8192 des_enc.m4 > des_enc.S
27 ! Global registers 1 to 5 are used. This is the same as done by the
28 ! cc compiler. The UltraSPARC load/store little endian feature is used.
30 ! Instruction grouping often refers to one CPU cycle.
32 ! Assemble through gcc: gcc -c -mcpu=ultrasparc -o des_enc.o des_enc.S
34 ! Assemble through cc: cc -c -xarch=v8plusa -o des_enc.o des_enc.S
36 ! Performance improvement according to './apps/openssl speed des'
39 ! 23% faster than cc-5.2 -xarch=v8plus -xO5
40 ! 115% faster than gcc-3.2.1 -m32 -mcpu=ultrasparc -O5
42 ! 50% faster than cc-5.2 -xarch=v9 -xO5
43 ! 100% faster than gcc-3.2.1 -m64 -mcpu=ultrasparc -O5
46 .ident "des_enc.m4 2.0"
48 #if defined(__SUNPRO_C) && defined(__sparcv9)
49 # define ABI64 /* They've said -xarch=v9 at command line */
50 #elif defined(__GNUC__) && defined(__arch64__)
51 # define ABI64 /* They've said -m64 at command line */
55 .register %g2,#scratch
56 .register %g3,#scratch
63 # ifndef OPENSSL_SYSNAME_ULTRASPARC
64 # define OPENSSL_SYSNAME_ULTRASPARC
121 ! The logic used in initial and final permutations is the same as in
122 ! the C code. The permutations are done with a clever shift, xor, and
125 ! The macro also loads address sbox 1 to 5 to global 1 to 5, address
126 ! sbox 6 to local6, and addres sbox 8 to out3.
128 ! Rotates the halfs 3 left to bring the sbox bits in convenient positions.
130 ! Loads key first round from address in parameter 5 to out0, out1.
132 ! After the the original LibDES initial permutation, the resulting left
133 ! is in the variable initially used for right and vice versa. The macro
134 ! implements the possibility to keep the halfs in the original registers.
138 ! parameter 3 result left (modify in first round)
139 ! parameter 4 result right (use in first round)
140 ! parameter 5 key address
141 ! parameter 6 1/2 for include encryption/decryption
142 ! parameter 7 1 for move in1 to in3
143 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
144 ! parameter 9 1 for load ks3 and ks2 to in4 and in3
149 ! $1 $2 $4 $3 $5 $6 $7 $8 $9
151 ld [out2+256], local1
154 xor local4, $1, local4
155 ifelse($7,1,{mov in1, in3},{nop})
157 ld [out2+260], local2
158 and local4, local1, local4
159 ifelse($8,1,{mov in3, in4},{})
160 ifelse($8,2,{mov in4, in3},{})
162 ld [out2+280], out4 ! loop counter
163 sll local4, 4, local1
166 ld [out2+264], local3
170 ifelse($9,1,{LDPTR KS3, in4},{})
171 xor local4, $2, local4
172 nop !sethi %hi(DES_SPtrans), global1 ! sbox addr
174 ifelse($9,1,{LDPTR KS2, in3},{})
175 and local4, local2, local4
176 nop !or global1, %lo(DES_SPtrans), global1 ! sbox addr
178 sll local4, 16, local1
184 sethi %hi(16711680), local5
185 xor local4, $1, local4
187 and local4, local3, local4
188 or local5, 255, local5
190 sll local4, 2, local2
196 xor local4, $2, local4
197 add global1, 768, global4
199 and local4, local5, local4
200 add global1, 1024, global5
202 ld [out2+272], local7
203 sll local4, 8, local1
209 ld [$5], out0 ! key 7531
210 xor local4, $1, local4
211 add global1, 256, global2
213 ld [$5+4], out1 ! key 8642
214 and local4, local7, local4
215 add global1, 512, global3
217 sll local4, 1, local1
224 add global1, 1280, local6 ! address sbox 8
227 add global1, 1792, out3 ! address sbox 8
230 or local4, local3, $4
232 or local2, local1, $3
236 ld [out2+284], local5 ! 0x0000FC00 used in the rounds
237 or local2, local1, $3
241 and local1, 252, local1
247 ld [out2+284], local5 ! 0x0000FC00 used in the rounds
248 or local2, local1, $3
252 and local1, 252, local1
260 ! The logic used in the DES rounds is the same as in the C code,
261 ! except that calculations for sbox 1 and sbox 5 begin before
262 ! the previous round is finished.
264 ! In each round one half (work) is modified based on key and the
267 ! In this version we do two rounds in a loop repeated 7 times
268 ! and two rounds seperately.
270 ! One half has the bits for the sboxes in the following positions:
272 ! 777777xx555555xx333333xx111111xx
274 ! 88xx666666xx444444xx222222xx8888
276 ! The bits for each sbox are xor-ed with the key bits for that box.
277 ! The above xx bits are cleared, and the result used for lookup in
278 ! the sbox table. Each sbox entry contains the 4 output bits permuted
279 ! into 32 bits according to the P permutation.
281 ! In the description of DES, left and right are switched after
282 ! each round, except after last round. In this code the original
283 ! left and right are kept in the same register in all rounds, meaning
284 ! that after the 16 rounds the result for right is in the register
285 ! originally used for left.
287 ! parameter 1 first work (left in first round)
288 ! parameter 2 first use (right in first round)
289 ! parameter 3 enc/dec 1/-1
290 ! parameter 4 loop label
291 ! parameter 5 key address register
292 ! parameter 6 optional address for key next encryption/decryption
293 ! parameter 7 not empty for include retl
295 ! also compares in2 to 8
297 define(rounds_macro, {
300 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
304 ld [out2+284], local5 ! 0x0000FC00
306 and local1, 252, local1
311 ! local6 is address sbox 6
312 ! out3 is address sbox 8
313 ! out4 is loop counter
315 ld [global1+local1], local1
316 xor $2, out1, out1 ! 8642
317 xor $2, out0, out0 ! 7531
318 fmovs %f0, %f0 ! fxor used for alignment
320 srl out1, 4, local0 ! rotate 4 right
321 and out0, local5, local3 ! 3
324 ld [$5+$3*8], local7 ! key 7531 next round
325 srl local3, 8, local3 ! 3
326 and local0, 252, local2 ! 2
329 ld [global3+local3],local3 ! 3
330 sll out1, 28, out1 ! rotate
331 xor $1, local1, $1 ! 1 finished, local1 now sbox 7
333 ld [global2+local2], local2 ! 2
334 srl out0, 24, local1 ! 7
335 or out1, local0, out1 ! rotate
337 ldub [out2+local1], local1 ! 7 (and 0xFC)
338 srl out1, 24, local0 ! 8
339 and out1, local5, local4 ! 4
341 ldub [out2+local0], local0 ! 8 (and 0xFC)
342 srl local4, 8, local4 ! 4
343 xor $1, local2, $1 ! 2 finished local2 now sbox 6
345 ld [global4+local4],local4 ! 4
346 srl out1, 16, local2 ! 6
347 xor $1, local3, $1 ! 3 finished local3 now sbox 5
349 ld [out3+local0],local0 ! 8
350 and local2, 252, local2 ! 6
351 add global1, 1536, local5 ! address sbox 7
353 ld [local6+local2], local2 ! 6
354 srl out0, 16, local3 ! 5
355 xor $1, local4, $1 ! 4 finished
357 ld [local5+local1],local1 ! 7
358 and local3, 252, local3 ! 5
359 xor $1, local0, $1 ! 8 finished
361 ld [global5+local3],local3 ! 5
362 xor $1, local2, $1 ! 6 finished
365 ld [$5+$3*8+4], out0 ! key 8642 next round
366 xor $1, local7, local2 ! sbox 5 next round
367 xor $1, local1, $1 ! 7 finished
369 srl local2, 16, local2 ! sbox 5 next round
370 xor $1, local3, $1 ! 5 finished
372 ld [$5+$3*16+4], out1 ! key 8642 next round again
373 and local2, 252, local2 ! sbox5 next round
375 xor $1, local7, local7 ! 7531
377 ld [global5+local2], local2 ! 5
378 srl local7, 24, local3 ! 7
379 xor $1, out0, out0 ! 8642
381 ldub [out2+local3], local3 ! 7 (and 0xFC)
382 srl out0, 4, local0 ! rotate 4 right
383 and local7, 252, local1 ! 1
385 sll out0, 28, out0 ! rotate
386 xor $2, local2, $2 ! 5 finished local2 used
388 srl local0, 8, local4 ! 4
389 and local0, 252, local2 ! 2
390 ld [local5+local3], local3 ! 7
392 srl local0, 16, local5 ! 6
393 or out0, local0, out0 ! rotate
394 ld [global2+local2], local2 ! 2
397 ld [$5+$3*16], out0 ! key 7531 next round
398 and local4, 252, local4 ! 4
400 and local5, 252, local5 ! 6
401 ld [global4+local4], local4 ! 4
402 xor $2, local3, $2 ! 7 finished local3 used
404 and local0, 252, local0 ! 8
405 ld [local6+local5], local5 ! 6
406 xor $2, local2, $2 ! 2 finished local2 now sbox 3
408 srl local7, 8, local2 ! 3 start
409 ld [out3+local0], local0 ! 8
410 xor $2, local4, $2 ! 4 finished
412 and local2, 252, local2 ! 3
413 ld [global1+local1], local1 ! 1
414 xor $2, local5, $2 ! 6 finished local5 used
416 ld [global3+local2], local2 ! 3
417 xor $2, local0, $2 ! 8 finished
418 add $5, $3*16, $5 ! enc add 8, dec add -8 to key pointer
420 ld [out2+284], local5 ! 0x0000FC00
421 xor $2, out0, local4 ! sbox 1 next round
422 xor $2, local1, $2 ! 1 finished
424 xor $2, local2, $2 ! 3 finished
425 #ifdef OPENSSL_SYSNAME_ULTRASPARC
430 and local4, 252, local1 ! sbox 1 next round
434 ld [global1+local1], local1
438 srl out1, 4, local0 ! rotate
439 and out0, local5, local3
441 ld [$5+$3*8], local7 ! key 7531
442 srl local3, 8, local3
443 and local0, 252, local2
445 ld [global3+local3],local3
446 sll out1, 28, out1 ! rotate
447 xor $1, local1, $1 ! 1 finished, local1 now sbox 7
449 ld [global2+local2], local2
451 or out1, local0, out1 ! rotate
453 ldub [out2+local1], local1
455 and out1, local5, local4
457 ldub [out2+local0], local0
458 srl local4, 8, local4
459 xor $1, local2, $1 ! 2 finished local2 now sbox 6
461 ld [global4+local4],local4
463 xor $1, local3, $1 ! 3 finished local3 now sbox 5
465 ld [out3+local0],local0
466 and local2, 252, local2
467 add global1, 1536, local5 ! address sbox 7
469 ld [local6+local2], local2
471 xor $1, local4, $1 ! 4 finished
473 ld [local5+local1],local1
474 and local3, 252, local3
477 ld [global5+local3],local3
478 xor $1, local2, $1 ! 6 finished
481 ifelse($6,{}, {}, {ld [out2+280], out4}) ! loop counter
482 xor $1, local7, local2 ! sbox 5 next round
483 xor $1, local1, $1 ! 7 finished
486 srl local2, 16, local2 ! sbox 5 next round
487 xor $1, local3, $1 ! 5 finished
489 and local2, 252, local2
490 ! next round (two rounds more)
491 xor $1, local7, local7 ! 7531
493 ld [global5+local2], local2
494 srl local7, 24, local3
495 xor $1, out0, out0 ! 8642
497 ldub [out2+local3], local3
498 srl out0, 4, local0 ! rotate
499 and local7, 252, local1
501 sll out0, 28, out0 ! rotate
502 xor $2, local2, $2 ! 5 finished local2 used
504 srl local0, 8, local4
505 and local0, 252, local2
506 ld [local5+local3], local3
508 srl local0, 16, local5
509 or out0, local0, out0 ! rotate
510 ld [global2+local2], local2
513 ifelse($6,{}, {}, {ld [$6], out0}) ! key next encryption/decryption
514 and local4, 252, local4
516 and local5, 252, local5
517 ld [global4+local4], local4
518 xor $2, local3, $2 ! 7 finished local3 used
520 and local0, 252, local0
521 ld [local6+local5], local5
522 xor $2, local2, $2 ! 2 finished local2 now sbox 3
524 srl local7, 8, local2 ! 3 start
525 ld [out3+local0], local0
528 and local2, 252, local2
529 ld [global1+local1], local1
530 xor $2, local5, $2 ! 6 finished local5 used
532 ld [global3+local2], local2
536 ifelse($6,{}, {}, {ld [$6+4], out1}) ! key next encryption/decryption
540 ifelse($7,{}, {}, {retl})
547 ! parameter 1 right (original left)
548 ! parameter 2 left (original right)
549 ! parameter 3 1 for optional store to [in0]
550 ! parameter 4 1 for load input/output address to local5/7
552 ! The final permutation logic switches the halfes, meaning that
553 ! left and right ends up the the registers originally used.
558 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
560 ! initially undo the rotate 3 left done after initial permutation
561 ! original left is received shifted 3 right and 29 left in local3/4
564 or local3, local4, $1
567 sethi %hi(0x55555555), local2
570 or local2, %lo(0x55555555), local2
573 sethi %hi(0x00ff00ff), local1
574 xor local3, $1, local3
575 or local1, %lo(0x00ff00ff), local1
576 and local3, local2, local3
577 sethi %hi(0x33333333), local4
578 sll local3, 1, local2
584 xor local3, $2, local3
585 or local4, %lo(0x33333333), local4
586 and local3, local1, local3
587 sethi %hi(0x0000ffff), local1
588 sll local3, 8, local2
594 xor local3, $1, local3
595 or local1, %lo(0x0000ffff), local1
596 and local3, local4, local3
597 sethi %hi(0x0f0f0f0f), local4
598 sll local3, 2, local2
600 ifelse($4,1, {LDPTR INPUT, local5})
603 ifelse($4,1, {LDPTR OUTPUT, local7})
606 xor local3, $2, local3
607 or local4, %lo(0x0f0f0f0f), local4
608 and local3, local1, local3
609 sll local3, 16, local2
611 xor $2, local3, local1
613 srl local1, 4, local3
615 xor local3, $1, local3
616 and local3, local4, local3
617 sll local3, 4, local2
623 ifelse($3,1, {st $1, [in0]})
625 xor local1, local2, $2
627 ifelse($3,1, {st $2, [in0+4]})
634 ! Does initial permutation for next block mixed with
635 ! final permutation for current block.
637 ! parameter 1 original left
638 ! parameter 2 original right
639 ! parameter 3 left ip
640 ! parameter 4 right ip
641 ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
644 ! also adds -8 to length in2 and loads loop counter to out4
646 define(fp_ip_macro, {
649 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
651 define({temp1},{out4})
652 define({temp2},{local3})
654 define({ip1},{local1})
655 define({ip2},{local2})
656 define({ip4},{local4})
657 define({ip5},{local5})
659 ! $1 in local3, local4
663 or local3, local4, $1
666 ifelse($5,2,{mov in4, in3})
675 and temp1, ip5, temp1
676 xor local0, $3, local0
681 and local0, ip1, local0
684 sll local0, 4, local7
694 xor local0, $4, local0
695 and temp1, ip4, temp1
696 and local0, ip2, local0
699 sll local0, 16, local7
705 ld [out2+264], temp2 ! ip3
709 xor local0, $3, local0
710 and temp1, temp2, temp1
711 and local0, temp2, local0
714 sll local0, 2, local7
722 xor local0, $4, local0
723 and temp1, ip2, temp1
724 and local0, ip4, local0
726 xor $2, temp1, local4
727 sll local0, 8, local7
734 xor local0, $3, local0
737 and local0, ip5, local0
739 sll local0, 1, local7
746 and temp1, ip1, temp1
751 ifelse($5,1,{LDPTR KS2, in4})
753 xor local4, temp2, $2
755 ! reload since used as temporar:
757 ld [out2+280], out4 ! loop counter
760 ifelse($5,1,{add in4, 120, in4})
762 ifelse($5,1,{LDPTR KS1, in3})
765 or local0, local5, $4
766 or local2, local7, $3
772 ! {load_little_endian}
774 ! parameter 1 address
775 ! parameter 2 destination left
776 ! parameter 3 destination right
777 ! parameter 4 temporar
780 define(load_little_endian, {
782 ! {load_little_endian}
783 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
785 ! first in memory to rightmost in register
787 #ifdef OPENSSL_SYSNAME_ULTRASPARC
833 ! {load_little_endian_inc}
835 ! parameter 1 address
836 ! parameter 2 destination left
837 ! parameter 3 destination right
838 ! parameter 4 temporar
843 define(load_little_endian_inc, {
845 ! {load_little_endian_inc}
846 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
848 ! first in memory to rightmost in register
850 #ifdef OPENSSL_SYSNAME_ULTRASPARC
899 ! Loads 1 to 7 bytes little endian
900 ! Remaining bytes are zeroed.
902 ! parameter 1 address
904 ! parameter 3 destination register left
905 ! parameter 4 destination register right
909 ! parameter 8 return label
911 define(load_n_bytes, {
914 ! $1 $2 $5 $6 $7 $8 $7 $8 $9
919 add %o7,$7.jmp.table-$7.0,$5
971 ! {store_little_endian}
973 ! parameter 1 address
974 ! parameter 2 source left
975 ! parameter 3 source right
976 ! parameter 4 temporar
978 define(store_little_endian, {
980 ! {store_little_endian}
981 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
983 ! rightmost in register to first in memory
985 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1034 ! Stores 1 to 7 bytes little endian
1036 ! parameter 1 address
1037 ! parameter 2 length
1038 ! parameter 3 source register left
1039 ! parameter 4 source register right
1043 ! parameter 8 return label
1045 define(store_n_bytes, {
1048 ! $1 $2 $5 $6 $7 $8 $7 $8 $9
1053 add %o7,$7.jmp.table-$7.0,$5
1106 define(testvalue,{1})
1108 define(register_init, {
1110 ! For test purposes:
1112 sethi %hi(testvalue), local0
1113 or local0, %lo(testvalue), local0
1115 ifelse($1,{},{}, {mov local0, $1})
1116 ifelse($2,{},{}, {mov local0, $2})
1117 ifelse($3,{},{}, {mov local0, $3})
1118 ifelse($4,{},{}, {mov local0, $4})
1119 ifelse($5,{},{}, {mov local0, $5})
1120 ifelse($6,{},{}, {mov local0, $6})
1121 ifelse($7,{},{}, {mov local0, $7})
1122 ifelse($8,{},{}, {mov local0, $8})
1152 ! loads key next encryption/decryption first round from [in4]
1154 rounds_macro(in5, out5, 1, .des_enc.1, in3, in4, retl)
1161 ! implemented with out5 as first parameter to avoid
1162 ! register exchange in ede modes
1165 ! loads key next encryption/decryption first round from [in3]
1167 rounds_macro(out5, in5, -1, .des_dec.1, in4, in3, retl)
1171 ! void DES_encrypt1(data, ks, enc)
1172 ! *******************************
1175 .global DES_encrypt1
1176 .type DES_encrypt1,#function
1180 save %sp, FRAME, %sp
1183 mov .PIC.me.up-(.-4),out0
1185 ld [in0], in5 ! left
1188 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1189 be,pn %icc, .encrypt.dec ! enc/dec
1193 ld [in0+4], out5 ! right
1195 ! parameter 6 1/2 for include encryption/decryption
1196 ! parameter 7 1 for move in1 to in3
1197 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
1199 ip_macro(in5, out5, in5, out5, in3, 0, 1, 1)
1201 rounds_macro(in5, out5, 1, .des_encrypt1.1, in3, in4) ! in4 not used
1203 fp_macro(in5, out5, 1) ! 1 for store to [in0]
1210 add in1, 120, in3 ! use last subkey for first round
1212 ! parameter 6 1/2 for include encryption/decryption
1213 ! parameter 7 1 for move in1 to in3
1214 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
1216 ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include dec, ks in4
1218 fp_macro(out5, in5, 1) ! 1 for store to [in0]
1224 .size DES_encrypt1,.DES_encrypt1.end-DES_encrypt1
1227 ! void DES_encrypt2(data, ks, enc)
1228 !*********************************
1230 ! encrypts/decrypts without initial/final permutation
1233 .global DES_encrypt2
1234 .type DES_encrypt2,#function
1238 save %sp, FRAME, %sp
1241 mov .PIC.me.up-(.-4),out0
1243 ! Set sbox address 1 to 6 and rotate halfs 3 left
1244 ! Errors caught by destest? Yes. Still? *NO*
1246 !sethi %hi(DES_SPtrans), global1 ! address sbox 1
1248 !or global1, %lo(DES_SPtrans), global1 ! sbox 1
1250 add global1, 256, global2 ! sbox 2
1251 add global1, 512, global3 ! sbox 3
1253 ld [in0], out5 ! right
1254 add global1, 768, global4 ! sbox 4
1255 add global1, 1024, global5 ! sbox 5
1257 ld [in0+4], in5 ! left
1258 add global1, 1280, local6 ! sbox 6
1259 add global1, 1792, out3 ! sbox 8
1264 mov in1, in3 ! key address to in3
1270 add in5, local5, in5
1272 add out5, local7, out5
1275 ! we use our own stackframe
1277 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1278 be,pn %icc, .encrypt2.dec ! decryption
1282 STPTR in0, [%sp+BIAS+ARG0+0*ARGSZ]
1284 ld [in3], out0 ! key 7531 first round
1285 mov LOOPS, out4 ! loop counter
1287 ld [in3+4], out1 ! key 8642 first round
1288 sethi %hi(0x0000FC00), local5
1299 LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0
1312 ld [in4], out0 ! key 7531 first round
1313 mov LOOPS, out4 ! loop counter
1315 ld [in4+4], out1 ! key 8642 first round
1316 sethi %hi(0x0000FC00), local5
1318 mov in5, local1 ! left expected in out5
1332 LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0
1341 .size DES_encrypt2, .DES_encrypt2.end-DES_encrypt2
1344 ! void DES_encrypt3(data, ks1, ks2, ks3)
1345 ! **************************************
1348 .global DES_encrypt3
1349 .type DES_encrypt3,#function
1353 save %sp, FRAME, %sp
1356 mov .PIC.me.up-(.-4),out0
1358 ld [in0], in5 ! left
1359 add in2, 120, in4 ! ks2
1361 ld [in0+4], out5 ! right
1362 mov in3, in2 ! save ks3
1364 ! parameter 6 1/2 for include encryption/decryption
1365 ! parameter 7 1 for mov in1 to in3
1366 ! parameter 8 1 for mov in3 to in4
1367 ! parameter 9 1 for load ks3 and ks2 to in4 and in3
1369 ip_macro(in5, out5, in5, out5, in3, 1, 1, 0, 0)
1372 mov in2, in3 ! preload ks3
1377 fp_macro(in5, out5, 1)
1383 .size DES_encrypt3,.DES_encrypt3.end-DES_encrypt3
1386 ! void DES_decrypt3(data, ks1, ks2, ks3)
1387 ! **************************************
1390 .global DES_decrypt3
1391 .type DES_decrypt3,#function
1395 save %sp, FRAME, %sp
1398 mov .PIC.me.up-(.-4),out0
1400 ld [in0], in5 ! left
1401 add in3, 120, in4 ! ks3
1403 ld [in0+4], out5 ! right
1406 ! parameter 6 1/2 for include encryption/decryption
1407 ! parameter 7 1 for mov in1 to in3
1408 ! parameter 8 1 for mov in3 to in4
1409 ! parameter 9 1 for load ks3 and ks2 to in4 and in3
1411 ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 0)
1414 add in1, 120, in4 ! preload ks1
1419 fp_macro(out5, in5, 1)
1425 .size DES_decrypt3,.DES_decrypt3.end-DES_decrypt3
1427 ! input: out0 offset between .PIC.me.up and caller
1428 ! output: out0 pointer to .PIC.me.up
1429 ! out2 pointer to .des_and
1430 ! global1 pointer to DES_SPtrans
1433 add out0,%o7,out0 ! pointer to .PIC.me.up
1436 sethi %hi(DES_SPtrans),global1
1437 or global1,%lo(DES_SPtrans),global1
1438 sethi %hi(_GLOBAL_OFFSET_TABLE_-(.PIC.me.up-.)),out2
1439 add global1,out0,global1
1440 add out2,%lo(_GLOBAL_OFFSET_TABLE_-(.PIC.me.up-.)),out2
1441 LDPTR [out2+global1],global1
1443 setn DES_SPtrans,out2,global1 ! synthetic instruction !
1447 add out0,.des_and-.PIC.me.up,out2
1450 .type .des_and,#object
1455 ! This table is used for AND 0xFC when it is known that register
1456 ! bits 8-31 are zero. Makes it possible to do three arithmetic
1457 ! operations in one cycle.
1459 .byte 0, 0, 0, 0, 4, 4, 4, 4
1460 .byte 8, 8, 8, 8, 12, 12, 12, 12
1461 .byte 16, 16, 16, 16, 20, 20, 20, 20
1462 .byte 24, 24, 24, 24, 28, 28, 28, 28
1463 .byte 32, 32, 32, 32, 36, 36, 36, 36
1464 .byte 40, 40, 40, 40, 44, 44, 44, 44
1465 .byte 48, 48, 48, 48, 52, 52, 52, 52
1466 .byte 56, 56, 56, 56, 60, 60, 60, 60
1467 .byte 64, 64, 64, 64, 68, 68, 68, 68
1468 .byte 72, 72, 72, 72, 76, 76, 76, 76
1469 .byte 80, 80, 80, 80, 84, 84, 84, 84
1470 .byte 88, 88, 88, 88, 92, 92, 92, 92
1471 .byte 96, 96, 96, 96, 100, 100, 100, 100
1472 .byte 104, 104, 104, 104, 108, 108, 108, 108
1473 .byte 112, 112, 112, 112, 116, 116, 116, 116
1474 .byte 120, 120, 120, 120, 124, 124, 124, 124
1475 .byte 128, 128, 128, 128, 132, 132, 132, 132
1476 .byte 136, 136, 136, 136, 140, 140, 140, 140
1477 .byte 144, 144, 144, 144, 148, 148, 148, 148
1478 .byte 152, 152, 152, 152, 156, 156, 156, 156
1479 .byte 160, 160, 160, 160, 164, 164, 164, 164
1480 .byte 168, 168, 168, 168, 172, 172, 172, 172
1481 .byte 176, 176, 176, 176, 180, 180, 180, 180
1482 .byte 184, 184, 184, 184, 188, 188, 188, 188
1483 .byte 192, 192, 192, 192, 196, 196, 196, 196
1484 .byte 200, 200, 200, 200, 204, 204, 204, 204
1485 .byte 208, 208, 208, 208, 212, 212, 212, 212
1486 .byte 216, 216, 216, 216, 220, 220, 220, 220
1487 .byte 224, 224, 224, 224, 228, 228, 228, 228
1488 .byte 232, 232, 232, 232, 236, 236, 236, 236
1489 .byte 240, 240, 240, 240, 244, 244, 244, 244
1490 .byte 248, 248, 248, 248, 252, 252, 252, 252
1492 ! 5 numbers for initil/final permutation
1494 .word 0x0f0f0f0f ! offset 256
1495 .word 0x0000ffff ! 260
1496 .word 0x33333333 ! 264
1497 .word 0x00ff00ff ! 268
1498 .word 0x55555555 ! 272
1502 .word 0x0000FC00 ! 284
1504 ! void DES_ncbc_encrypt(input, output, length, schedule, ivec, enc)
1505 ! *****************************************************************
1509 .global DES_ncbc_encrypt
1510 .type DES_ncbc_encrypt,#function
1514 save %sp, FRAME, %sp
1516 define({INPUT}, { [%sp+BIAS+ARG0+0*ARGSZ] })
1517 define({OUTPUT}, { [%sp+BIAS+ARG0+1*ARGSZ] })
1518 define({IVEC}, { [%sp+BIAS+ARG0+4*ARGSZ] })
1521 mov .PIC.me.up-(.-4),out0
1525 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1526 be,pn %icc, .ncbc.dec
1532 ! addr left right temp label
1533 load_little_endian(in4, in5, out5, local3, .LLE1) ! iv
1535 addcc in2, -8, in2 ! bytes missing when first block done
1537 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1538 bl,pn %icc, .ncbc.enc.seven.or.less
1540 bl .ncbc.enc.seven.or.less
1542 mov in3, in4 ! schedule
1544 .ncbc.enc.next.block:
1546 load_little_endian(in0, out4, global4, local3, .LLE2) ! block
1548 .ncbc.enc.next.block_1:
1550 xor in5, out4, in5 ! iv xor
1551 xor out5, global4, out5 ! iv xor
1553 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
1554 ip_macro(in5, out5, in5, out5, in3, 0, 0, 2)
1556 .ncbc.enc.next.block_2:
1558 !// call .des_enc ! compares in2 to 8
1559 ! rounds inlined for alignment purposes
1561 add global1, 768, global4 ! address sbox 4 since register used below
1563 rounds_macro(in5, out5, 1, .ncbc.enc.1, in3, in4) ! include encryption ks in3
1565 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1566 bl,pn %icc, .ncbc.enc.next.block_fp
1568 bl .ncbc.enc.next.block_fp
1570 add in0, 8, in0 ! input address
1572 ! If 8 or more bytes are to be encrypted after this block,
1573 ! we combine final permutation for this block with initial
1574 ! permutation for next block. Load next block:
1576 load_little_endian(in0, global3, global4, local5, .LLE12)
1578 ! parameter 1 original left
1579 ! parameter 2 original right
1580 ! parameter 3 left ip
1581 ! parameter 4 right ip
1582 ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
1585 ! also adds -8 to length in2 and loads loop counter to out4
1587 fp_ip_macro(out0, out1, global3, global4, 2)
1589 store_little_endian(in1, out0, out1, local3, .SLE10) ! block
1591 ld [in3], out0 ! key 7531 first round next block
1593 xor global3, out5, in5 ! iv xor next block
1595 ld [in3+4], out1 ! key 8642
1596 add global1, 512, global3 ! address sbox 3 since register used
1597 xor global4, local1, out5 ! iv xor next block
1599 ba .ncbc.enc.next.block_2
1600 add in1, 8, in1 ! output adress
1602 .ncbc.enc.next.block_fp:
1606 store_little_endian(in1, in5, out5, local3, .SLE1) ! block
1608 addcc in2, -8, in2 ! bytes missing when next block done
1610 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1611 bpos,pt %icc, .ncbc.enc.next.block ! also jumps if 0
1613 bpos .ncbc.enc.next.block
1617 .ncbc.enc.seven.or.less:
1621 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1622 ble,pt %icc, .ncbc.enc.finish
1624 ble .ncbc.enc.finish
1628 add in2, 8, local1 ! bytes to load
1630 ! addr, length, dest left, dest right, temp, temp2, label, ret label
1631 load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB1, .ncbc.enc.next.block_1)
1633 ! Loads 1 to 7 bytes little endian to global4, out4
1639 store_little_endian(local4, in5, out5, local5, .SLE2) ! ivec
1651 LDPTR IVEC, local7 ! ivec
1652 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1653 ble,pn %icc, .ncbc.dec.finish
1655 ble .ncbc.dec.finish
1657 mov in3, in4 ! schedule
1660 mov in0, local5 ! input
1662 load_little_endian(local7, in0, in1, local3, .LLE3) ! ivec
1664 .ncbc.dec.next.block:
1666 load_little_endian(local5, in5, out5, local3, .LLE4) ! block
1668 ! parameter 6 1/2 for include encryption/decryption
1669 ! parameter 7 1 for mov in1 to in3
1670 ! parameter 8 1 for mov in3 to in4
1672 ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include decryprion ks in4
1674 fp_macro(out5, in5, 0, 1) ! 1 for input and output address to local5/7
1676 ! in2 is bytes left to be stored
1677 ! in2 is compared to 8 in the rounds
1679 xor out5, in0, out4 ! iv xor
1680 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1681 bl,pn %icc, .ncbc.dec.seven.or.less
1683 bl .ncbc.dec.seven.or.less
1685 xor in5, in1, global4 ! iv xor
1687 ! Load ivec next block now, since input and output address might be the same.
1689 load_little_endian_inc(local5, in0, in1, local3, .LLE5) ! iv
1691 store_little_endian(local7, out4, global4, local3, .SLE3)
1694 add local7, 8, local7
1697 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1698 bg,pt %icc, .ncbc.dec.next.block
1700 bg .ncbc.dec.next.block
1702 STPTR local7, OUTPUT
1707 LDPTR IVEC, local4 ! ivec
1708 store_little_endian(local4, in0, in1, local5, .SLE4)
1715 .ncbc.dec.seven.or.less:
1717 load_little_endian_inc(local5, in0, in1, local3, .LLE13) ! ivec
1719 store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB1, .ncbc.dec.store.iv)
1722 .DES_ncbc_encrypt.end:
1723 .size DES_ncbc_encrypt, .DES_ncbc_encrypt.end-DES_ncbc_encrypt
1726 ! void DES_ede3_cbc_encrypt(input, output, lenght, ks1, ks2, ks3, ivec, enc)
1727 ! **************************************************************************
1731 .global DES_ede3_cbc_encrypt
1732 .type DES_ede3_cbc_encrypt,#function
1734 DES_ede3_cbc_encrypt:
1736 save %sp, FRAME, %sp
1738 define({KS1}, { [%sp+BIAS+ARG0+3*ARGSZ] })
1739 define({KS2}, { [%sp+BIAS+ARG0+4*ARGSZ] })
1740 define({KS3}, { [%sp+BIAS+ARG0+5*ARGSZ] })
1743 mov .PIC.me.up-(.-4),out0
1745 LDPTR [%fp+BIAS+ARG0+7*ARGSZ], local3 ! enc
1746 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
1749 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1750 be,pn %icc, .ede3.dec
1758 load_little_endian(local4, in5, out5, local3, .LLE6) ! ivec
1760 addcc in2, -8, in2 ! bytes missing after next block
1762 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1763 bl,pn %icc, .ede3.enc.seven.or.less
1765 bl .ede3.enc.seven.or.less
1769 .ede3.enc.next.block:
1771 load_little_endian(in0, out4, global4, local3, .LLE7)
1773 .ede3.enc.next.block_1:
1776 xor in5, out4, in5 ! iv xor
1777 xor out5, global4, out5 ! iv xor
1780 add in4, 120, in4 ! for decryption we use last subkey first
1783 ip_macro(in5, out5, in5, out5, in3)
1785 .ede3.enc.next.block_2:
1787 call .des_enc ! ks1 in3
1790 call .des_dec ! ks2 in4
1793 call .des_enc ! ks3 in3 compares in2 to 8
1796 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1797 bl,pn %icc, .ede3.enc.next.block_fp
1799 bl .ede3.enc.next.block_fp
1803 ! If 8 or more bytes are to be encrypted after this block,
1804 ! we combine final permutation for this block with initial
1805 ! permutation for next block. Load next block:
1807 load_little_endian(in0, global3, global4, local5, .LLE11)
1809 ! parameter 1 original left
1810 ! parameter 2 original right
1811 ! parameter 3 left ip
1812 ! parameter 4 right ip
1813 ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
1816 ! also adds -8 to length in2 and loads loop counter to out4
1818 fp_ip_macro(out0, out1, global3, global4, 1)
1820 store_little_endian(in1, out0, out1, local3, .SLE9) ! block
1823 xor global3, out5, in5 ! iv xor next block
1825 ld [in3], out0 ! key 7531
1826 add global1, 512, global3 ! address sbox 3
1827 xor global4, local1, out5 ! iv xor next block
1829 ld [in3+4], out1 ! key 8642
1830 add global1, 768, global4 ! address sbox 4
1831 ba .ede3.enc.next.block_2
1834 .ede3.enc.next.block_fp:
1838 store_little_endian(in1, in5, out5, local3, .SLE5) ! block
1840 addcc in2, -8, in2 ! bytes missing when next block done
1842 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1843 bpos,pt %icc, .ede3.enc.next.block
1845 bpos .ede3.enc.next.block
1849 .ede3.enc.seven.or.less:
1853 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1854 ble,pt %icc, .ede3.enc.finish
1856 ble .ede3.enc.finish
1860 add in2, 8, local1 ! bytes to load
1862 ! addr, length, dest left, dest right, temp, temp2, label, ret label
1863 load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB2, .ede3.enc.next.block_1)
1867 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
1868 store_little_endian(local4, in5, out5, local5, .SLE6) ! ivec
1885 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1886 ble %icc, .ede3.dec.finish
1888 ble .ede3.dec.finish
1892 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local7 ! iv
1893 load_little_endian(local7, in0, in1, local3, .LLE8)
1895 .ede3.dec.next.block:
1897 load_little_endian(local5, in5, out5, local3, .LLE9)
1899 ! parameter 6 1/2 for include encryption/decryption
1900 ! parameter 7 1 for mov in1 to in3
1901 ! parameter 8 1 for mov in3 to in4
1902 ! parameter 9 1 for load ks3 and ks2 to in4 and in3
1904 ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 1) ! inc .des_dec ks3 in4
1906 call .des_enc ! ks2 in3
1909 call .des_dec ! ks1 in4
1912 fp_macro(out5, in5, 0, 1) ! 1 for input and output address local5/7
1914 ! in2 is bytes left to be stored
1915 ! in2 is compared to 8 in the rounds
1918 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1919 bl,pn %icc, .ede3.dec.seven.or.less
1921 bl .ede3.dec.seven.or.less
1923 xor in5, in1, global4
1925 load_little_endian_inc(local5, in0, in1, local3, .LLE10) ! iv next block
1927 store_little_endian(local7, out4, global4, local3, .SLE7) ! block
1931 add local7, 8, local7
1933 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1934 bg,pt %icc, .ede3.dec.next.block
1936 bg .ede3.dec.next.block
1938 STPTR local7, OUTPUT
1942 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
1943 store_little_endian(local4, in0, in1, local5, .SLE8) ! ivec
1950 .ede3.dec.seven.or.less:
1952 load_little_endian_inc(local5, in0, in1, local3, .LLE14) ! iv
1954 store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB2, .ede3.dec.store.iv)
1957 .DES_ede3_cbc_encrypt.end:
1958 .size DES_ede3_cbc_encrypt,.DES_ede3_cbc_encrypt.end-DES_ede3_cbc_encrypt