2 ! des_enc.S (generated from des_enc.m4)
4 ! UltraSPARC assembler version of the LibDES/SSLeay/OpenSSL des_enc.c file.
6 ! Version 1.0. 32-bit version.
10 ! Assembler version: Copyright Svend Olaf Mikkelsen.
12 ! Original C code: Copyright Eric A. Young.
14 ! This code can be freely used by LibDES/SSLeay/OpenSSL users.
16 ! The LibDES/SSLeay/OpenSSL copyright notices must be respected.
18 ! This version can be redistributed.
20 ! To expand the m4 macros: m4 -B 8192 des_enc.m4 > des_enc.S
22 ! Global registers 1 to 5 are used. This is the same as done by the
23 ! cc compiler. The UltraSPARC load/store little endian feature is used.
25 ! Instruction grouping often refers to one CPU cycle.
27 ! Assemble through gcc: gcc -c -mcpu=ultrasparc -o des_enc.o des_enc.S
29 ! Assemble through cc: cc -c -xarch=v8plusa -o des_enc.o des_enc.S
32 .ident "des_enc.m4 1.0"
79 ! The logic used in initial and final permutations is the same as in
80 ! the C code. The permutations are done with a clever shift, xor, and
83 ! The macro also loads address sbox 1 to 5 to global 1 to 5, address
84 ! sbox 6 to local6, and addres sbox 8 to out3.
86 ! Rotates the halfs 3 left to bring the sbox bits in convenient positions.
88 ! Loads key first round from address in parameter 5 to out0, out1.
90 ! After the the original LibDES initial permutation, the resulting left
91 ! is in the variable initially used for right and vice versa. The macro
92 ! implements the possibility to keep the halfs in the original registers.
96 ! parameter 3 result left (modify in first round)
97 ! parameter 4 result right (use in first round)
98 ! parameter 5 key address
99 ! parameter 6 1/2 for include encryption/decryption
100 ! parameter 7 1 for move in1 to in3
101 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
102 ! parameter 9 1 for load ks3 and ks2 to in4 and in3
107 ! $1 $2 $4 $3 $5 $6 $7 $8 $9
109 ld [out2+256], local1
112 xor local4, $1, local4
113 ifelse($7,1,{mov in1, in3},{nop})
115 ld [out2+260], local2
116 and local4, local1, local4
117 ifelse($8,1,{mov in3, in4},{})
118 ifelse($8,2,{mov in4, in3},{})
120 ld [out2+280], out4 ! loop counter
121 sll local4, 4, local1
124 ld [out2+264], local3
128 ifelse($9,1,{ld KS3, in4},{})
129 xor local4, $2, local4
130 sethi %hi(des_SPtrans), global1 ! sbox addr
132 ifelse($9,1,{ld KS2, in3},{})
133 and local4, local2, local4
134 or global1, %lo(des_SPtrans), global1 ! sbox addr
136 sll local4, 16, local1
142 sethi %hi(16711680), local5
143 xor local4, $1, local4
145 and local4, local3, local4
146 or local5, 255, local5
148 sll local4, 2, local2
154 xor local4, $2, local4
155 add global1, 768, global4
157 and local4, local5, local4
158 add global1, 1024, global5
160 ld [out2+272], local7
161 sll local4, 8, local1
167 ld [$5], out0 ! key 7531
168 xor local4, $1, local4
169 add global1, 256, global2
171 ld [$5+4], out1 ! key 8642
172 and local4, local7, local4
173 add global1, 512, global3
175 sll local4, 1, local1
182 add global1, 1280, local6 ! address sbox 8
185 add global1, 1792, out3 ! address sbox 8
188 or local4, local3, $4
190 or local2, local1, $3
194 ld [out2+284], local5 ! 0x0000FC00 used in the rounds
195 or local2, local1, $3
199 and local1, 252, local1
205 ld [out2+284], local5 ! 0x0000FC00 used in the rounds
206 or local2, local1, $3
210 and local1, 252, local1
218 ! The logic used in the DES rounds is the same as in the C code,
219 ! except that calculations for sbox 1 and sbox 5 begin before
220 ! the previous round is finished.
222 ! In each round one half (work) is modified based on key and the
225 ! In this version we do two rounds in a loop repeated 7 times
226 ! and two rounds seperately.
228 ! One half has the bits for the sboxes in the following positions:
230 ! 777777xx555555xx333333xx111111xx
232 ! 88xx666666xx444444xx222222xx8888
234 ! The bits for each sbox are xor-ed with the key bits for that box.
235 ! The above xx bits are cleared, and the result used for lookup in
236 ! the sbox table. Each sbox entry contains the 4 output bits permuted
237 ! into 32 bits according to the P permutation.
239 ! In the description of DES, left and right are switched after
240 ! each round, except after last round. In this code the original
241 ! left and right are kept in the same register in all rounds, meaning
242 ! that after the 16 rounds the result for right is in the register
243 ! originally used for left.
245 ! parameter 1 first work (left in first round)
246 ! parameter 2 first use (right in first round)
247 ! parameter 3 enc/dec 1/-1
248 ! parameter 4 loop label
249 ! parameter 5 key address register
250 ! parameter 6 optional address for key next encryption/decryption
251 ! parameter 7 not empty for include retl
253 ! also compares in2 to 8
255 define(rounds_macro, {
258 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
262 ld [out2+284], local5 ! 0x0000FC00
264 and local1, 252, local1
269 ! local6 is address sbox 6
270 ! out3 is address sbox 8
271 ! out4 is loop counter
273 ld [global1+local1], local1
274 xor $2, out1, out1 ! 8642
275 xor $2, out0, out0 ! 7531
276 fxor %f0, %f0, %f0 ! fxor used for alignment
278 srl out1, 4, local0 ! rotate 4 right
279 and out0, local5, local3 ! 3
282 ld [$5+$3*8], local7 ! key 7531 next round
283 srl local3, 8, local3 ! 3
284 and local0, 252, local2 ! 2
287 ld [global3+local3],local3 ! 3
288 sll out1, 28, out1 ! rotate
289 xor $1, local1, $1 ! 1 finished, local1 now sbox 7
291 ld [global2+local2], local2 ! 2
292 srl out0, 24, local1 ! 7
293 or out1, local0, out1 ! rotate
295 ldub [out2+local1], local1 ! 7 (and 0xFC)
296 srl out1, 24, local0 ! 8
297 and out1, local5, local4 ! 4
299 ldub [out2+local0], local0 ! 8 (and 0xFC)
300 srl local4, 8, local4 ! 4
301 xor $1, local2, $1 ! 2 finished local2 now sbox 6
303 ld [global4+local4],local4 ! 4
304 srl out1, 16, local2 ! 6
305 xor $1, local3, $1 ! 3 finished local3 now sbox 5
307 ld [out3+local0],local0 ! 8
308 and local2, 252, local2 ! 6
309 add global1, 1536, local5 ! address sbox 7
311 ld [local6+local2], local2 ! 6
312 srl out0, 16, local3 ! 5
313 xor $1, local4, $1 ! 4 finished
315 ld [local5+local1],local1 ! 7
316 and local3, 252, local3 ! 5
317 xor $1, local0, $1 ! 8 finished
319 ld [global5+local3],local3 ! 5
320 xor $1, local2, $1 ! 6 finished
323 ld [$5+$3*8+4], out0 ! key 8642 next round
324 xor $1, local7, local2 ! sbox 5 next round
325 xor $1, local1, $1 ! 7 finished
327 srl local2, 16, local2 ! sbox 5 next round
328 xor $1, local3, $1 ! 5 finished
330 ld [$5+$3*16+4], out1 ! key 8642 next round again
331 and local2, 252, local2 ! sbox5 next round
333 xor $1, local7, local7 ! 7531
335 ld [global5+local2], local2 ! 5
336 srl local7, 24, local3 ! 7
337 xor $1, out0, out0 ! 8642
339 ldub [out2+local3], local3 ! 7 (and 0xFC)
340 srl out0, 4, local0 ! rotate 4 right
341 and local7, 252, local1 ! 1
343 sll out0, 28, out0 ! rotate
344 xor $2, local2, $2 ! 5 finished local2 used
346 srl local0, 8, local4 ! 4
347 and local0, 252, local2 ! 2
348 ld [local5+local3], local3 ! 7
350 srl local0, 16, local5 ! 6
351 or out0, local0, out0 ! rotate
352 ld [global2+local2], local2 ! 2
355 ld [$5+$3*16], out0 ! key 7531 next round
356 and local4, 252, local4 ! 4
358 and local5, 252, local5 ! 6
359 ld [global4+local4], local4 ! 4
360 xor $2, local3, $2 ! 7 finished local3 used
362 and local0, 252, local0 ! 8
363 ld [local6+local5], local5 ! 6
364 xor $2, local2, $2 ! 2 finished local2 now sbox 3
366 srl local7, 8, local2 ! 3 start
367 ld [out3+local0], local0 ! 8
368 xor $2, local4, $2 ! 4 finished
370 and local2, 252, local2 ! 3
371 ld [global1+local1], local1 ! 1
372 xor $2, local5, $2 ! 6 finished local5 used
374 ld [global3+local2], local2 ! 3
375 xor $2, local0, $2 ! 8 finished
376 add $5, $3*16, $5 ! enc add 8, dec add -8 to key pointer
378 ld [out2+284], local5 ! 0x0000FC00
379 xor $2, out0, local4 ! sbox 1 next round
380 xor $2, local1, $2 ! 1 finished
382 xor $2, local2, $2 ! 3 finished
384 and local4, 252, local1 ! sbox 1 next round
388 ld [global1+local1], local1
392 srl out1, 4, local0 ! rotate
393 and out0, local5, local3
395 ld [$5+$3*8], local7 ! key 7531
396 srl local3, 8, local3
397 and local0, 252, local2
399 ld [global3+local3],local3
400 sll out1, 28, out1 ! rotate
401 xor $1, local1, $1 ! 1 finished, local1 now sbox 7
403 ld [global2+local2], local2
405 or out1, local0, out1 ! rotate
407 ldub [out2+local1], local1
409 and out1, local5, local4
411 ldub [out2+local0], local0
412 srl local4, 8, local4
413 xor $1, local2, $1 ! 2 finished local2 now sbox 6
415 ld [global4+local4],local4
417 xor $1, local3, $1 ! 3 finished local3 now sbox 5
419 ld [out3+local0],local0
420 and local2, 252, local2
421 add global1, 1536, local5 ! address sbox 7
423 ld [local6+local2], local2
425 xor $1, local4, $1 ! 4 finished
427 ld [local5+local1],local1
428 and local3, 252, local3
431 ld [global5+local3],local3
432 xor $1, local2, $1 ! 6 finished
435 ifelse($6,{}, {}, {ld [out2+280], out4}) ! loop counter
436 xor $1, local7, local2 ! sbox 5 next round
437 xor $1, local1, $1 ! 7 finished
440 srl local2, 16, local2 ! sbox 5 next round
441 xor $1, local3, $1 ! 5 finished
443 and local2, 252, local2
444 ! next round (two rounds more)
445 xor $1, local7, local7 ! 7531
447 ld [global5+local2], local2
448 srl local7, 24, local3
449 xor $1, out0, out0 ! 8642
451 ldub [out2+local3], local3
452 srl out0, 4, local0 ! rotate
453 and local7, 252, local1
455 sll out0, 28, out0 ! rotate
456 xor $2, local2, $2 ! 5 finished local2 used
458 srl local0, 8, local4
459 and local0, 252, local2
460 ld [local5+local3], local3
462 srl local0, 16, local5
463 or out0, local0, out0 ! rotate
464 ld [global2+local2], local2
467 ifelse($6,{}, {}, {ld [$6], out0}) ! key next encryption/decryption
468 and local4, 252, local4
470 and local5, 252, local5
471 ld [global4+local4], local4
472 xor $2, local3, $2 ! 7 finished local3 used
474 and local0, 252, local0
475 ld [local6+local5], local5
476 xor $2, local2, $2 ! 2 finished local2 now sbox 3
478 srl local7, 8, local2 ! 3 start
479 ld [out3+local0], local0
482 and local2, 252, local2
483 ld [global1+local1], local1
484 xor $2, local5, $2 ! 6 finished local5 used
486 ld [global3+local2], local2
490 ifelse($6,{}, {}, {ld [$6+4], out1}) ! key next encryption/decryption
494 ifelse($7,{}, {}, {retl})
501 ! parameter 1 right (original left)
502 ! parameter 2 left (original right)
503 ! parameter 3 1 for optional store to [in0]
504 ! parameter 4 1 for load input/output address to local5/7
506 ! The final permutation logic switches the halfes, meaning that
507 ! left and right ends up the the registers originally used.
512 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
514 ! initially undo the rotate 3 left done after initial permutation
515 ! original left is received shifted 3 right and 29 left in local3/4
518 or local3, local4, $1
521 sethi %hi(0x55555555), local2
524 or local2, %lo(0x55555555), local2
527 sethi %hi(0x00ff00ff), local1
528 xor local3, $1, local3
529 or local1, %lo(0x00ff00ff), local1
530 and local3, local2, local3
531 sethi %hi(0x33333333), local4
532 sll local3, 1, local2
538 xor local3, $2, local3
539 or local4, %lo(0x33333333), local4
540 and local3, local1, local3
541 sethi %hi(0x0000ffff), local1
542 sll local3, 8, local2
548 xor local3, $1, local3
549 or local1, %lo(0x0000ffff), local1
550 and local3, local4, local3
551 sethi %hi(0x0f0f0f0f), local4
552 sll local3, 2, local2
554 ifelse($4,1, {ld INPUT, local5})
557 ifelse($4,1, {ld OUTPUT, local7})
560 xor local3, $2, local3
561 or local4, %lo(0x0f0f0f0f), local4
562 and local3, local1, local3
563 sll local3, 16, local2
565 xor $2, local3, local1
567 srl local1, 4, local3
569 xor local3, $1, local3
570 and local3, local4, local3
571 sll local3, 4, local2
577 ifelse($3,1, {st $1, [in0]})
579 xor local1, local2, $2
581 ifelse($3,1, {st $2, [in0+4]})
588 ! Does initial permutation for next block mixed with
589 ! final permutation for current block.
591 ! parameter 1 original left
592 ! parameter 2 original right
593 ! parameter 3 left ip
594 ! parameter 4 right ip
595 ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
598 ! also adds -8 to length in2 and loads loop counter to out4
600 define(fp_ip_macro, {
603 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
605 define({temp1},{out4})
606 define({temp2},{local3})
608 define({ip1},{local1})
609 define({ip2},{local2})
610 define({ip4},{local4})
611 define({ip5},{local5})
613 ! $1 in local3, local4
617 or local3, local4, $1
620 ifelse($5,2,{mov in4, in3})
629 and temp1, ip5, temp1
630 xor local0, $3, local0
635 and local0, ip1, local0
638 sll local0, 4, local7
648 xor local0, $4, local0
649 and temp1, ip4, temp1
650 and local0, ip2, local0
653 sll local0, 16, local7
659 ld [out2+264], temp2 ! ip3
663 xor local0, $3, local0
664 and temp1, temp2, temp1
665 and local0, temp2, local0
668 sll local0, 2, local7
676 xor local0, $4, local0
677 and temp1, ip2, temp1
678 and local0, ip4, local0
680 xor $2, temp1, local4
681 sll local0, 8, local7
688 xor local0, $3, local0
691 and local0, ip5, local0
693 sll local0, 1, local7
700 and temp1, ip1, temp1
705 ifelse($5,1,{ld KS2, in4})
707 xor local4, temp2, $2
709 ! reload since used as temporar:
711 ld [out2+280], out4 ! loop counter
714 ifelse($5,1,{add in4, 120, in4})
716 ifelse($5,1,{ld KS1, in3})
719 or local0, local5, $4
720 or local2, local7, $3
726 ! {load_little_endian}
728 ! parameter 1 address
729 ! parameter 2 destination left
730 ! parameter 3 destination right
731 ! parameter 4 temporar
734 define(load_little_endian, {
736 ! {load_little_endian}
737 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
739 ! first in memory to rightmost in register
785 ! {load_little_endian_inc}
787 ! parameter 1 address
788 ! parameter 2 destination left
789 ! parameter 3 destination right
790 ! parameter 4 temporar
795 define(load_little_endian_inc, {
797 ! {load_little_endian_inc}
798 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
800 ! first in memory to rightmost in register
849 ! Loads 1 to 7 bytes little endian
850 ! Remaining bytes are zeroed.
852 ! parameter 1 address
854 ! parameter 3 destination register left
855 ! parameter 4 destination register right
859 ! parameter 8 return label
861 define(load_n_bytes, {
864 ! $1 $2 $5 $6 $7 $8 $7 $8 $9
868 sethi %hi($7.jmp.table), $5
869 or $5, %lo($7.jmp.table), $5
921 ! {store_little_endian}
923 ! parameter 1 address
924 ! parameter 2 source left
925 ! parameter 3 source right
926 ! parameter 4 temporar
928 define(store_little_endian, {
930 ! {store_little_endian}
931 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
933 ! rightmost in register to first in memory
982 ! Stores 1 to 7 bytes little endian
984 ! parameter 1 address
986 ! parameter 3 source register left
987 ! parameter 4 source register right
991 ! parameter 8 return label
993 define(store_n_bytes, {
996 ! $1 $2 $5 $6 $7 $8 $7 $8 $9
1000 sethi %hi($7.jmp.table), $5
1001 or $5, %lo($7.jmp.table), $5
1053 define(testvalue,{1})
1055 define(register_init, {
1057 ! For test purposes:
1059 sethi %hi(testvalue), local0
1060 or local0, %lo(testvalue), local0
1062 ifelse($1,{},{}, {mov local0, $1})
1063 ifelse($2,{},{}, {mov local0, $2})
1064 ifelse($3,{},{}, {mov local0, $3})
1065 ifelse($4,{},{}, {mov local0, $4})
1066 ifelse($5,{},{}, {mov local0, $5})
1067 ifelse($6,{},{}, {mov local0, $6})
1068 ifelse($7,{},{}, {mov local0, $7})
1069 ifelse($8,{},{}, {mov local0, $8})
1095 .type .des_and,#object
1100 ! This table is used for AND 0xFC when it is known that register
1101 ! bits 8-31 are zero. Makes it possible to do three arithmetic
1102 ! operations in one cycle.
1104 .byte 0, 0, 0, 0, 4, 4, 4, 4
1105 .byte 8, 8, 8, 8, 12, 12, 12, 12
1106 .byte 16, 16, 16, 16, 20, 20, 20, 20
1107 .byte 24, 24, 24, 24, 28, 28, 28, 28
1108 .byte 32, 32, 32, 32, 36, 36, 36, 36
1109 .byte 40, 40, 40, 40, 44, 44, 44, 44
1110 .byte 48, 48, 48, 48, 52, 52, 52, 52
1111 .byte 56, 56, 56, 56, 60, 60, 60, 60
1112 .byte 64, 64, 64, 64, 68, 68, 68, 68
1113 .byte 72, 72, 72, 72, 76, 76, 76, 76
1114 .byte 80, 80, 80, 80, 84, 84, 84, 84
1115 .byte 88, 88, 88, 88, 92, 92, 92, 92
1116 .byte 96, 96, 96, 96, 100, 100, 100, 100
1117 .byte 104, 104, 104, 104, 108, 108, 108, 108
1118 .byte 112, 112, 112, 112, 116, 116, 116, 116
1119 .byte 120, 120, 120, 120, 124, 124, 124, 124
1120 .byte 128, 128, 128, 128, 132, 132, 132, 132
1121 .byte 136, 136, 136, 136, 140, 140, 140, 140
1122 .byte 144, 144, 144, 144, 148, 148, 148, 148
1123 .byte 152, 152, 152, 152, 156, 156, 156, 156
1124 .byte 160, 160, 160, 160, 164, 164, 164, 164
1125 .byte 168, 168, 168, 168, 172, 172, 172, 172
1126 .byte 176, 176, 176, 176, 180, 180, 180, 180
1127 .byte 184, 184, 184, 184, 188, 188, 188, 188
1128 .byte 192, 192, 192, 192, 196, 196, 196, 196
1129 .byte 200, 200, 200, 200, 204, 204, 204, 204
1130 .byte 208, 208, 208, 208, 212, 212, 212, 212
1131 .byte 216, 216, 216, 216, 220, 220, 220, 220
1132 .byte 224, 224, 224, 224, 228, 228, 228, 228
1133 .byte 232, 232, 232, 232, 236, 236, 236, 236
1134 .byte 240, 240, 240, 240, 244, 244, 244, 244
1135 .byte 248, 248, 248, 248, 252, 252, 252, 252
1137 ! 5 numbers for initil/final permutation
1139 .word 0x0f0f0f0f ! offset 256
1140 .word 0x0000ffff ! 260
1141 .word 0x33333333 ! 264
1142 .word 0x00ff00ff ! 268
1143 .word 0x55555555 ! 272
1147 .word 0x0000FC00 ! 284
1157 ! loads key next encryption/decryption first round from [in4]
1159 rounds_macro(in5, out5, 1, .des_enc.1, in3, in4, retl)
1166 ! implemented with out5 as first parameter to avoid
1167 ! register exchange in ede modes
1170 ! loads key next encryption/decryption first round from [in3]
1172 rounds_macro(out5, in5, -1, .des_dec.1, in4, in3, retl)
1176 ! void des_encrypt(data, ks, enc)
1177 ! *******************************
1181 .type des_encrypt,#function
1187 ld [in0], in5 ! left
1188 sethi %hi(.des_and), out2 ! address constants
1191 ld [in0+4], out5 ! right
1192 be,pn %icc, .encrypt.dec ! enc/dec
1193 or out2, %lo(.des_and), out2 ! address constants
1195 ! parameter 6 1/2 for include encryption/decryption
1196 ! parameter 7 1 for move in1 to in3
1197 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
1199 ip_macro(in5, out5, in5, out5, in3, 0, 1, 1)
1201 rounds_macro(in5, out5, 1, .des_encrypt.1, in3, in4) ! in4 not used
1203 fp_macro(in5, out5, 1) ! 1 for store to [in0]
1210 add in1, 120, in3 ! use last subkey for first round
1212 ! parameter 6 1/2 for include encryption/decryption
1213 ! parameter 7 1 for move in1 to in3
1214 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
1216 ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include dec, ks in4
1218 fp_macro(out5, in5, 1) ! 1 for store to [in0]
1224 .size des_encrypt,.des_encrypt.end-des_encrypt
1227 ! void des_encrypt2(data, ks, enc)
1228 !*********************************
1230 ! encrypts/decrypts without initial/final permutation
1233 .global des_encrypt2
1234 .type des_encrypt2,#function
1240 ! Set sbox address 1 to 6 and rotate halfs 3 left
1241 ! Errors caught by destest? Yes. Still? *NO*
1243 sethi %hi(des_SPtrans), global1 ! address sbox 1
1244 sethi %hi(.des_and), out2 ! address constants
1246 or global1, %lo(des_SPtrans), global1 ! sbox 1
1247 or out2, %lo(.des_and), out2 ! adress constants
1249 add global1, 256, global2 ! sbox 2
1250 add global1, 512, global3 ! sbox 3
1252 ld [in0], out5 ! right
1253 add global1, 768, global4 ! sbox 4
1254 add global1, 1024, global5 ! sbox 5
1256 ld [in0+4], in5 ! left
1257 add global1, 1280, local6 ! sbox 6
1258 add global1, 1792, out3 ! sbox 8
1263 mov in1, in3 ! key address to in3
1269 add in5, local5, in5
1271 add out5, local7, out5
1274 ! we use our own stackframe
1276 be,pn %icc, .encrypt2.dec ! decryption
1279 ld [in3], out0 ! key 7531 first round
1280 mov LOOPS, out4 ! loop counter
1282 ld [in3+4], out1 ! key 8642 first round
1283 sethi %hi(0x0000FC00), local5
1307 ld [in4], out0 ! key 7531 first round
1308 mov LOOPS, out4 ! loop counter
1310 ld [in4+4], out1 ! key 8642 first round
1311 sethi %hi(0x0000FC00), local5
1313 mov in5, local1 ! left expected in out5
1336 .size des_encrypt2, .des_encrypt2.end-des_encrypt2
1339 ! void des_encrypt3(data, ks1, ks2, ks3)
1340 ! **************************************
1343 .global des_encrypt3
1344 .type des_encrypt3,#function
1350 ld [in0], in5 ! left
1351 add in2, 120, in4 ! ks2
1352 sethi %hi(.des_and), out2 ! address constants
1354 ld [in0+4], out5 ! right
1355 mov in3, in2 ! save ks3
1356 or out2, %lo(.des_and), out2 ! address constants
1358 ! parameter 6 1/2 for include encryption/decryption
1359 ! parameter 7 1 for mov in1 to in3
1360 ! parameter 8 1 for mov in3 to in4
1361 ! parameter 9 1 for load ks3 and ks2 to in4 and in3
1363 ip_macro(in5, out5, in5, out5, in3, 1, 1, 0, 0)
1366 mov in2, in3 ! preload ks3
1371 fp_macro(in5, out5, 1)
1377 .size des_encrypt3,.des_encrypt3.end-des_encrypt3
1380 ! void des_decrypt3(data, ks1, ks2, ks3)
1381 ! **************************************
1384 .global des_decrypt3
1385 .type des_decrypt3,#function
1391 ld [in0], in5 ! left
1392 add in3, 120, in4 ! ks3
1393 sethi %hi(.des_and), out2
1395 ld [in0+4], out5 ! right
1397 or out2, %lo(.des_and), out2
1399 ! parameter 6 1/2 for include encryption/decryption
1400 ! parameter 7 1 for mov in1 to in3
1401 ! parameter 8 1 for mov in3 to in4
1402 ! parameter 9 1 for load ks3 and ks2 to in4 and in3
1404 ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 0)
1407 add in1, 120, in4 ! preload ks1
1412 fp_macro(out5, in5, 1)
1418 .size des_decrypt3,.des_decrypt3.end-des_decrypt3
1422 ! void des_ncbc_encrypt(input, output, length, schedule, ivec, enc)
1423 ! *****************************************************************
1427 .global des_ncbc_encrypt
1428 .type des_ncbc_encrypt,#function
1434 define({INPUT}, { [%sp+68] })
1435 define({OUTPUT}, { [%sp+72] })
1436 define({IVEC}, { [%sp+84] })
1440 sethi %hi(.des_and), out2 ! address constants
1441 be,pn %icc, .ncbc.dec
1444 ! addr left right temp label
1445 load_little_endian(in4, in5, out5, local3, .LLE1) ! iv
1447 addcc in2, -8, in2 ! bytes missing when first block done
1449 mov in3, in4 ! schedule
1450 bl,pn %icc, .ncbc.enc.seven.or.less
1451 or out2, %lo(.des_and), out2
1453 .ncbc.enc.next.block:
1455 load_little_endian(in0, out4, global4, local3, .LLE2) ! block
1457 .ncbc.enc.next.block_1:
1459 xor in5, out4, in5 ! iv xor
1460 xor out5, global4, out5 ! iv xor
1462 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
1463 ip_macro(in5, out5, in5, out5, in3, 0, 0, 2)
1465 .ncbc.enc.next.block_2:
1467 !// call .des_enc ! compares in2 to 8
1468 ! rounds inlined for alignment purposes
1470 add global1, 768, global4 ! address sbox 4 since register used below
1472 rounds_macro(in5, out5, 1, .ncbc.enc.1, in3, in4) ! include encryption ks in3
1474 bl,pn %icc, .ncbc.enc.next.block_fp
1475 add in0, 8, in0 ! input address
1477 ! If 8 or more bytes are to be encrypted after this block,
1478 ! we combine final permutation for this block with initial
1479 ! permutation for next block. Load next block:
1481 load_little_endian(in0, global3, global4, local5, .LLE12)
1483 ! parameter 1 original left
1484 ! parameter 2 original right
1485 ! parameter 3 left ip
1486 ! parameter 4 right ip
1487 ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
1490 ! also adds -8 to length in2 and loads loop counter to out4
1492 fp_ip_macro(out0, out1, global3, global4, 2)
1494 store_little_endian(in1, out0, out1, local3, .SLE10) ! block
1496 ld [in3], out0 ! key 7531 first round next block
1498 xor global3, out5, in5 ! iv xor next block
1500 ld [in3+4], out1 ! key 8642
1501 add global1, 512, global3 ! address sbox 3 since register used
1502 xor global4, local1, out5 ! iv xor next block
1504 ba,pt %icc, .ncbc.enc.next.block_2
1505 add in1, 8, in1 ! output adress
1507 .ncbc.enc.next.block_fp:
1511 store_little_endian(in1, in5, out5, local3, .SLE1) ! block
1513 addcc in2, -8, in2 ! bytes missing when next block done
1515 bpos,pt %icc, .ncbc.enc.next.block ! also jumps if 0
1518 .ncbc.enc.seven.or.less:
1522 ble,pt %icc, .ncbc.enc.finish
1525 add in2, 8, local1 ! bytes to load
1527 ! addr, length, dest left, dest right, temp, temp2, label, ret label
1528 load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB1, .ncbc.enc.next.block_1)
1530 ! Loads 1 to 7 bytes little endian to global4, out4
1536 store_little_endian(local4, in5, out5, local5, .SLE2) ! ivec
1548 ld IVEC, local7 ! ivec
1549 ble,pn %icc, .ncbc.dec.finish
1550 mov in3, in4 ! schedule
1553 or out2, %lo(.des_and), out2 ! address constants low part
1554 mov in0, local5 ! input
1556 load_little_endian(local7, in0, in1, local3, .LLE3) ! ivec
1558 .ncbc.dec.next.block:
1560 load_little_endian(local5, in5, out5, local3, .LLE4) ! block
1562 ! parameter 6 1/2 for include encryption/decryption
1563 ! parameter 7 1 for mov in1 to in3
1564 ! parameter 8 1 for mov in3 to in4
1566 ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include decryprion ks in4
1568 fp_macro(out5, in5, 0, 1) ! 1 for input and output address to local5/7
1570 ! in2 is bytes left to be stored
1571 ! in2 is compared to 8 in the rounds
1573 xor out5, in0, out4 ! iv xor
1574 bl,pn %icc, .ncbc.dec.seven.or.less
1575 xor in5, in1, global4 ! iv xor
1577 ! Load ivec next block now, since input and output address might be the same.
1579 load_little_endian_inc(local5, in0, in1, local3, .LLE5) ! iv
1581 store_little_endian(local7, out4, global4, local3, .SLE3)
1584 add local7, 8, local7
1587 bg,pt %icc, .ncbc.dec.next.block
1593 ld IVEC, local4 ! ivec
1594 store_little_endian(local4, in0, in1, local5, .SLE4)
1601 .ncbc.dec.seven.or.less:
1603 load_little_endian_inc(local5, in0, in1, local3, .LLE13) ! ivec
1605 store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB1, .ncbc.dec.store.iv)
1608 .des_ncbc_encrypt.end:
1609 .size des_ncbc_encrypt, .des_ncbc_encrypt.end-des_ncbc_encrypt
1612 ! void des_ede3_cbc_encrypt(input, output, lenght, ks1, ks2, ks3, ivec, enc)
1613 ! **************************************************************************
1617 .global des_ede3_cbc_encrypt
1618 .type des_ede3_cbc_encrypt,#function
1620 des_ede3_cbc_encrypt:
1624 define({LENGTH},{ [%sp+76] })
1625 define({KS1}, { [%sp+80] })
1626 define({KS2}, { [%sp+84] })
1627 define({KS3}, { [%sp+88] })
1629 ld [%fp+96], local3 ! enc
1630 sethi %hi(.des_and), out2
1632 ld [%fp+92], local4 ! ivec
1633 or out2, %lo(.des_and), out2
1636 be,pn %icc, .ede3.dec
1641 load_little_endian(local4, in5, out5, local3, .LLE6) ! ivec
1643 addcc in2, -8, in2 ! bytes missing after next block
1645 bl,pn %icc, .ede3.enc.seven.or.less
1648 .ede3.enc.next.block:
1650 load_little_endian(in0, out4, global4, local3, .LLE7)
1652 .ede3.enc.next.block_1:
1655 xor in5, out4, in5 ! iv xor
1656 xor out5, global4, out5 ! iv xor
1659 add in4, 120, in4 ! for decryption we use last subkey first
1662 ip_macro(in5, out5, in5, out5, in3)
1664 .ede3.enc.next.block_2:
1666 call .des_enc ! ks1 in3
1669 call .des_dec ! ks2 in4
1672 call .des_enc ! ks3 in3 compares in2 to 8
1675 bl,pn %icc, .ede3.enc.next.block_fp
1678 ! If 8 or more bytes are to be encrypted after this block,
1679 ! we combine final permutation for this block with initial
1680 ! permutation for next block. Load next block:
1682 load_little_endian(in0, global3, global4, local5, .LLE11)
1684 ! parameter 1 original left
1685 ! parameter 2 original right
1686 ! parameter 3 left ip
1687 ! parameter 4 right ip
1688 ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
1691 ! also adds -8 to length in2 and loads loop counter to out4
1693 fp_ip_macro(out0, out1, global3, global4, 1)
1695 store_little_endian(in1, out0, out1, local3, .SLE9) ! block
1698 xor global3, out5, in5 ! iv xor next block
1700 ld [in3], out0 ! key 7531
1701 add global1, 512, global3 ! address sbox 3
1702 xor global4, local1, out5 ! iv xor next block
1704 ld [in3+4], out1 ! key 8642
1705 add global1, 768, global4 ! address sbox 4
1706 ba,pt %icc, .ede3.enc.next.block_2
1709 .ede3.enc.next.block_fp:
1713 store_little_endian(in1, in5, out5, local3, .SLE5) ! block
1715 addcc in2, -8, in2 ! bytes missing when next block done
1717 bpos,pt %icc, .ede3.enc.next.block
1720 .ede3.enc.seven.or.less:
1724 ble,pt %icc, .ede3.enc.finish
1727 add in2, 8, local1 ! bytes to load
1729 ! addr, length, dest left, dest right, temp, temp2, label, ret label
1730 load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB2, .ede3.enc.next.block_1)
1734 ld [%fp+92], local4 ! ivec
1735 store_little_endian(local4, in5, out5, local5, .SLE6) ! ivec
1753 ble %icc, .ede3.dec.finish
1756 ld [%fp+92], local7 ! iv
1757 load_little_endian(local7, in0, in1, local3, .LLE8)
1759 .ede3.dec.next.block:
1761 load_little_endian(local5, in5, out5, local3, .LLE9)
1763 ! parameter 6 1/2 for include encryption/decryption
1764 ! parameter 7 1 for mov in1 to in3
1765 ! parameter 8 1 for mov in3 to in4
1766 ! parameter 9 1 for load ks3 and ks2 to in4 and in3
1768 ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 1) ! inc .des_dec ks3 in4
1770 call .des_enc ! ks2 in3
1773 call .des_dec ! ks1 in4
1776 fp_macro(out5, in5, 0, 1) ! 1 for input and output address local5/7
1778 ! in2 is bytes left to be stored
1779 ! in2 is compared to 8 in the rounds
1782 bl,pn %icc, .ede3.dec.seven.or.less
1783 xor in5, in1, global4
1785 load_little_endian_inc(local5, in0, in1, local3, .LLE10) ! iv next block
1787 store_little_endian(local7, out4, global4, local3, .SLE7) ! block
1791 add local7, 8, local7
1793 bg,pt %icc, .ede3.dec.next.block
1798 ld [%fp+92], local4 ! ivec
1799 store_little_endian(local4, in0, in1, local5, .SLE8) ! ivec
1807 .ede3.dec.seven.or.less:
1809 load_little_endian_inc(local5, in0, in1, local3, .LLE14) ! iv
1811 store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB2, .ede3.dec.store.iv)
1814 .des_ede3_cbc_encrypt.end:
1815 .size des_ede3_cbc_encrypt,.des_ede3_cbc_encrypt.end-des_ede3_cbc_encrypt