From: Andy Polyakov Date: Sun, 31 Mar 2013 12:32:05 +0000 (+0200) Subject: Add support for SPARC T4 DES opcode. X-Git-Tag: master-post-reformat~1362 X-Git-Url: https://git.openssl.org/gitweb/?p=openssl.git;a=commitdiff_plain;h=c5d975a74313268a36b6a6103cd37221724137c2 Add support for SPARC T4 DES opcode. --- diff --git a/Configure b/Configure index 24c8dc2665..e5b8c3049b 100755 --- a/Configure +++ b/Configure @@ -130,7 +130,7 @@ my $x86_elf_asm="$x86_asm:elf"; my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64-gf2m.o modexp512-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86_64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:ghash-x86_64.o aesni-gcm-x86_64.o:e_padlock-x86_64.o"; my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::::ghash-ia64.o::void"; -my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o::md5-sparcv9.o:sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o::::::camellia.o cmll_misc.o cmll_cbc.o cmllt4-sparcv9.o:ghash-sparcv9.o::void"; +my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o:des_enc-sparc.o fcrypt_b.o dest4-sparcv9.o:aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o::md5-sparcv9.o:sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o::::::camellia.o cmll_misc.o cmll_cbc.o cmllt4-sparcv9.o:ghash-sparcv9.o::void"; my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::::void"; my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-alpha.o::void"; my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o sha512-mips.o::::::::"; diff --git a/TABLE b/TABLE index a3ca775a61..a3a58a4c0a 100644 --- a/TABLE +++ b/TABLE @@ -175,7 +175,7 @@ $lflags = $bn_ops = BN_LLONG RC2_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC2 BF_PTR $cpuid_obj = sparcv9cap.o sparccpuid.o $bn_obj = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o -$des_obj = des_enc-sparc.o fcrypt_b.o +$des_obj = des_enc-sparc.o fcrypt_b.o dest4-sparcv9.o $aes_obj = aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o $bf_obj = $md5_obj = md5-sparcv9.o @@ -2683,7 +2683,7 @@ $lflags = -lsocket -lnsl -ldl $bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK_LL DES_PTR DES_RISC1 DES_UNROLL BF_PTR $cpuid_obj = sparcv9cap.o sparccpuid.o $bn_obj = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o -$des_obj = des_enc-sparc.o fcrypt_b.o +$des_obj = des_enc-sparc.o fcrypt_b.o dest4-sparcv9.o $aes_obj = aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o $bf_obj = $md5_obj = md5-sparcv9.o @@ -2716,7 +2716,7 @@ $lflags = -lsocket -lnsl -ldl $bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR $cpuid_obj = sparcv9cap.o sparccpuid.o $bn_obj = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o -$des_obj = des_enc-sparc.o fcrypt_b.o +$des_obj = des_enc-sparc.o fcrypt_b.o dest4-sparcv9.o $aes_obj = aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o $bf_obj = $md5_obj = md5-sparcv9.o @@ -4432,7 +4432,7 @@ $lflags = -ldl $bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR $cpuid_obj = sparcv9cap.o sparccpuid.o $bn_obj = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o -$des_obj = des_enc-sparc.o fcrypt_b.o +$des_obj = des_enc-sparc.o fcrypt_b.o dest4-sparcv9.o $aes_obj = aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o $bf_obj = $md5_obj = md5-sparcv9.o @@ -4663,7 +4663,7 @@ $lflags = -ldl $bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR $cpuid_obj = sparcv9cap.o sparccpuid.o $bn_obj = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o -$des_obj = des_enc-sparc.o fcrypt_b.o +$des_obj = des_enc-sparc.o fcrypt_b.o dest4-sparcv9.o $aes_obj = aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o $bf_obj = $md5_obj = md5-sparcv9.o @@ -5521,7 +5521,7 @@ $lflags = -lsocket -lnsl -ldl $bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK_LL DES_PTR DES_RISC1 DES_UNROLL BF_PTR $cpuid_obj = sparcv9cap.o sparccpuid.o $bn_obj = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o -$des_obj = des_enc-sparc.o fcrypt_b.o +$des_obj = des_enc-sparc.o fcrypt_b.o dest4-sparcv9.o $aes_obj = aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o $bf_obj = $md5_obj = md5-sparcv9.o @@ -5554,7 +5554,7 @@ $lflags = -lsocket -lnsl -ldl $bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR $cpuid_obj = sparcv9cap.o sparccpuid.o $bn_obj = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o -$des_obj = des_enc-sparc.o fcrypt_b.o +$des_obj = des_enc-sparc.o fcrypt_b.o dest4-sparcv9.o $aes_obj = aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o $bf_obj = $md5_obj = md5-sparcv9.o @@ -5653,7 +5653,7 @@ $lflags = -lsocket -lnsl -ldl $bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR $cpuid_obj = sparcv9cap.o sparccpuid.o $bn_obj = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o -$des_obj = des_enc-sparc.o fcrypt_b.o +$des_obj = des_enc-sparc.o fcrypt_b.o dest4-sparcv9.o $aes_obj = aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o $bf_obj = $md5_obj = md5-sparcv9.o @@ -5686,7 +5686,7 @@ $lflags = -lsocket -lnsl -ldl $bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR $cpuid_obj = sparcv9cap.o sparccpuid.o $bn_obj = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o -$des_obj = des_enc-sparc.o fcrypt_b.o +$des_obj = des_enc-sparc.o fcrypt_b.o dest4-sparcv9.o $aes_obj = aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o $bf_obj = $md5_obj = md5-sparcv9.o diff --git a/crypto/des/Makefile b/crypto/des/Makefile index 868f6087e8..1c061a6879 100644 --- a/crypto/des/Makefile +++ b/crypto/des/Makefile @@ -61,6 +61,8 @@ des: des.o cbc3_enc.o lib des_enc-sparc.S: asm/des_enc.m4 m4 -B 8192 asm/des_enc.m4 > des_enc-sparc.S +dest4-sparcv9.s: asm/dest4-sparcv9.pl + $(PERL) asm/dest4-sparcv9.pl $(CFLAGS) > $@ des-586.s: asm/des-586.pl ../perlasm/x86asm.pl ../perlasm/cbc.pl $(PERL) asm/des-586.pl $(PERLASM_SCHEME) $(CFLAGS) > $@ diff --git a/crypto/des/asm/dest4-sparcv9.pl b/crypto/des/asm/dest4-sparcv9.pl new file mode 100644 index 0000000000..5936a658e5 --- /dev/null +++ b/crypto/des/asm/dest4-sparcv9.pl @@ -0,0 +1,594 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by David S. Miller and Andy Polyakov +# . The module is licensed under 2-clause BSD +# license. March 2013. All rights reserved. +# ==================================================================== + +###################################################################### +# DES for SPARC T4. +# +# As with other hardware-assisted ciphers CBC encrypt results [for +# aligned data] are virtually identical to critical path lengths: +# +# DES Triple-DES +# CBC encrypt 4.14/4.15(*) 11.7/11.7 +# CBC decrypt 1.77/4.11(**) 6.42/7.47 +# +# (*) numbers after slash are for +# misaligned data; +# (**) this is result for largest +# block size, unlike all other +# cases smaller blocks results +# are better[?]; + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +push(@INC,"${dir}","${dir}../../perlasm"); +require "sparcv9_modes.pl"; + +&asm_init(@ARGV); + +$code.=<<___ if ($::abibits==64); +.register %g2,#scratch +.register %g3,#scratch +___ + +$code.=<<___; +.text +___ + +{ my ($inp,$out)=("%o0","%o1"); + +$code.=<<___; +.align 32 +.globl des_t4_key_expand +.type des_t4_key_expand,#function +des_t4_key_expand: + andcc $inp, 0x7, %g0 + alignaddr $inp, %g0, $inp + bz,pt %icc, 1f + ldd [$inp + 0x00], %f0 + ldd [$inp + 0x08], %f2 + faligndata %f0, %f2, %f0 +1: des_kexpand %f0, 0, %f0 + des_kexpand %f0, 1, %f2 + std %f0, [$out + 0x00] + des_kexpand %f2, 3, %f6 + std %f2, [$out + 0x08] + des_kexpand %f2, 2, %f4 + des_kexpand %f6, 3, %f10 + std %f6, [$out + 0x18] + des_kexpand %f6, 2, %f8 + std %f4, [$out + 0x10] + des_kexpand %f10, 3, %f14 + std %f10, [$out + 0x28] + des_kexpand %f10, 2, %f12 + std %f8, [$out + 0x20] + des_kexpand %f14, 1, %f16 + std %f14, [$out + 0x38] + des_kexpand %f16, 3, %f20 + std %f12, [$out + 0x30] + des_kexpand %f16, 2, %f18 + std %f16, [$out + 0x40] + des_kexpand %f20, 3, %f24 + std %f20, [$out + 0x50] + des_kexpand %f20, 2, %f22 + std %f18, [$out + 0x48] + des_kexpand %f24, 3, %f28 + std %f24, [$out + 0x60] + des_kexpand %f24, 2, %f26 + std %f22, [$out + 0x58] + des_kexpand %f28, 1, %f30 + std %f28, [$out + 0x70] + std %f26, [$out + 0x68] + retl + std %f30, [$out + 0x78] +.size des_t4_key_expand,.-des_t4_key_expand +___ +} +{ my ($inp,$out,$len,$key,$ivec) = map("%o$_",(0..4)); + my ($ileft,$iright,$omask) = map("%g$_",(1..3)); + +$code.=<<___; +.globl des_t4_cbc_encrypt +.align 32 +des_t4_cbc_encrypt: + ld [$ivec + 0], %f0 ! load ivec + ld [$ivec + 4], %f1 + + and $inp, 7, $ileft + andn $inp, 7, $inp + sll $ileft, 3, $ileft + mov 0xff, $omask + prefetch [$inp], 20 + prefetch [$inp + 63], 20 + sub %g0, $ileft, $iright + and $out, 7, %g4 + alignaddrl $out, %g0, $out + srl $omask, %g4, $omask + srlx $len, 3, $len + movrz %g4, 0, $omask + prefetch [$out], 22 + + ldd [$key + 0x00], %f4 ! load key schedule + ldd [$key + 0x08], %f6 + ldd [$key + 0x10], %f8 + ldd [$key + 0x18], %f10 + ldd [$key + 0x20], %f12 + ldd [$key + 0x28], %f14 + ldd [$key + 0x30], %f16 + ldd [$key + 0x38], %f18 + ldd [$key + 0x40], %f20 + ldd [$key + 0x48], %f22 + ldd [$key + 0x50], %f24 + ldd [$key + 0x58], %f26 + ldd [$key + 0x60], %f28 + ldd [$key + 0x68], %f30 + ldd [$key + 0x70], %f32 + ldd [$key + 0x78], %f34 + +.Ldes_cbc_enc_loop: + ldx [$inp + 0], %g4 + brz,pt $ileft, 4f + nop + + ldx [$inp + 8], %g5 + sllx %g4, $ileft, %g4 + srlx %g5, $iright, %g5 + or %g5, %g4, %g4 +4: + movxtod %g4, %f2 + prefetch [$inp + 8+63], 20 + add $inp, 8, $inp + fxor %f2, %f0, %f0 ! ^= ivec + prefetch [$out + 63], 22 + + des_ip %f0, %f0 + des_round %f4, %f6, %f0, %f0 + des_round %f8, %f10, %f0, %f0 + des_round %f12, %f14, %f0, %f0 + des_round %f16, %f18, %f0, %f0 + des_round %f20, %f22, %f0, %f0 + des_round %f24, %f26, %f0, %f0 + des_round %f28, %f30, %f0, %f0 + des_round %f32, %f34, %f0, %f0 + des_iip %f0, %f0 + + brnz,pn $omask, 2f + sub $len, 1, $len + + std %f0, [$out + 0] + brnz,pt $len, .Ldes_cbc_enc_loop + add $out, 8, $out + + st %f0, [$ivec + 0] ! write out ivec + retl + st %f1, [$ivec + 4] + +.align 16 +2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard + ! and ~4x deterioration + ! in inp==out case + faligndata %f0, %f0, %f2 ! handle unaligned output + + stda %f8, [$out + $omask]0xc0 ! partial store + add $out, 8, $out + orn %g0, $omask, $omask + stda %f8, [$out + $omask]0xc0 ! partial store + + brnz,pt $len, .Ldes_cbc_enc_loop+4 + orn %g0, $omask, $omask + + st %f0, [$ivec + 0] ! write out ivec + retl + st %f1, [$ivec + 4] +.type des_t4_cbc_encrypt,#function +.size des_t4_cbc_encrypt,.-des_t4_cbc_encrypt + +.globl des_t4_cbc_decrypt +.align 32 +des_t4_cbc_decrypt: + ld [$ivec + 0], %f2 ! load ivec + ld [$ivec + 4], %f3 + + and $inp, 7, $ileft + andn $inp, 7, $inp + sll $ileft, 3, $ileft + mov 0xff, $omask + prefetch [$inp], 20 + prefetch [$inp + 63], 20 + sub %g0, $ileft, $iright + and $out, 7, %g4 + alignaddrl $out, %g0, $out + srl $omask, %g4, $omask + srlx $len, 3, $len + movrz %g4, 0, $omask + prefetch [$out], 22 + + ldd [$key + 0x78], %f4 ! load key schedule + ldd [$key + 0x70], %f6 + ldd [$key + 0x68], %f8 + ldd [$key + 0x60], %f10 + ldd [$key + 0x58], %f12 + ldd [$key + 0x50], %f14 + ldd [$key + 0x48], %f16 + ldd [$key + 0x40], %f18 + ldd [$key + 0x38], %f20 + ldd [$key + 0x30], %f22 + ldd [$key + 0x28], %f24 + ldd [$key + 0x20], %f26 + ldd [$key + 0x18], %f28 + ldd [$key + 0x10], %f30 + ldd [$key + 0x08], %f32 + ldd [$key + 0x00], %f34 + +.Ldes_cbc_dec_loop: + ldx [$inp + 0], %g4 + brz,pt $ileft, 4f + nop + + ldx [$inp + 8], %g5 + sllx %g4, $ileft, %g4 + srlx %g5, $iright, %g5 + or %g5, %g4, %g4 +4: + movxtod %g4, %f0 + prefetch [$inp + 8+63], 20 + add $inp, 8, $inp + prefetch [$out + 63], 22 + + des_ip %f0, %f0 + des_round %f4, %f6, %f0, %f0 + des_round %f8, %f10, %f0, %f0 + des_round %f12, %f14, %f0, %f0 + des_round %f16, %f18, %f0, %f0 + des_round %f20, %f22, %f0, %f0 + des_round %f24, %f26, %f0, %f0 + des_round %f28, %f30, %f0, %f0 + des_round %f32, %f34, %f0, %f0 + des_iip %f0, %f0 + + fxor %f2, %f0, %f0 ! ^= ivec + movxtod %g4, %f2 + + brnz,pn $omask, 2f + sub $len, 1, $len + + std %f0, [$out + 0] + brnz,pt $len, .Ldes_cbc_dec_loop + add $out, 8, $out + + st %f2, [$ivec + 0] ! write out ivec + retl + st %f3, [$ivec + 4] + +.align 16 +2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard + ! and ~4x deterioration + ! in inp==out case + faligndata %f0, %f0, %f0 ! handle unaligned output + + stda %f0, [$out + $omask]0xc0 ! partial store + add $out, 8, $out + orn %g0, $omask, $omask + stda %f0, [$out + $omask]0xc0 ! partial store + + brnz,pt $len, .Ldes_cbc_dec_loop+4 + orn %g0, $omask, $omask + + st %f2, [$ivec + 0] ! write out ivec + retl + st %f3, [$ivec + 4] +.type des_t4_cbc_decrypt,#function +.size des_t4_cbc_decrypt,.-des_t4_cbc_decrypt + +.globl des_t4_ede3_cbc_encrypt +.align 32 +des_t4_ede3_cbc_encrypt: + ld [$ivec + 0], %f0 ! load ivec + ld [$ivec + 4], %f1 + + and $inp, 7, $ileft + andn $inp, 7, $inp + sll $ileft, 3, $ileft + mov 0xff, $omask + prefetch [$inp], 20 + prefetch [$inp + 63], 20 + sub %g0, $ileft, $iright + and $out, 7, %g4 + alignaddrl $out, %g0, $out + srl $omask, %g4, $omask + srlx $len, 3, $len + movrz %g4, 0, $omask + prefetch [$out], 22 + + ldd [$key + 0x00], %f4 ! load key schedule + ldd [$key + 0x08], %f6 + ldd [$key + 0x10], %f8 + ldd [$key + 0x18], %f10 + ldd [$key + 0x20], %f12 + ldd [$key + 0x28], %f14 + ldd [$key + 0x30], %f16 + ldd [$key + 0x38], %f18 + ldd [$key + 0x40], %f20 + ldd [$key + 0x48], %f22 + ldd [$key + 0x50], %f24 + ldd [$key + 0x58], %f26 + ldd [$key + 0x60], %f28 + ldd [$key + 0x68], %f30 + ldd [$key + 0x70], %f32 + ldd [$key + 0x78], %f34 + +.Ldes_ede3_cbc_enc_loop: + ldx [$inp + 0], %g4 + brz,pt $ileft, 4f + nop + + ldx [$inp + 8], %g5 + sllx %g4, $ileft, %g4 + srlx %g5, $iright, %g5 + or %g5, %g4, %g4 +4: + movxtod %g4, %f2 + prefetch [$inp + 8+63], 20 + add $inp, 8, $inp + fxor %f2, %f0, %f0 ! ^= ivec + prefetch [$out + 63], 22 + + des_ip %f0, %f0 + des_round %f4, %f6, %f0, %f0 + des_round %f8, %f10, %f0, %f0 + des_round %f12, %f14, %f0, %f0 + des_round %f16, %f18, %f0, %f0 + ldd [$key + 0x100-0x08], %f36 + ldd [$key + 0x100-0x10], %f38 + des_round %f20, %f22, %f0, %f0 + ldd [$key + 0x100-0x18], %f40 + ldd [$key + 0x100-0x20], %f42 + des_round %f24, %f26, %f0, %f0 + ldd [$key + 0x100-0x28], %f44 + ldd [$key + 0x100-0x30], %f46 + des_round %f28, %f30, %f0, %f0 + ldd [$key + 0x100-0x38], %f48 + ldd [$key + 0x100-0x40], %f50 + des_round %f32, %f34, %f0, %f0 + ldd [$key + 0x100-0x48], %f52 + ldd [$key + 0x100-0x50], %f54 + des_iip %f0, %f0 + + ldd [$key + 0x100-0x58], %f56 + ldd [$key + 0x100-0x60], %f58 + des_ip %f0, %f0 + ldd [$key + 0x100-0x68], %f60 + ldd [$key + 0x100-0x70], %f62 + des_round %f36, %f38, %f0, %f0 + ldd [$key + 0x100-0x78], %f36 + ldd [$key + 0x100-0x80], %f38 + des_round %f40, %f42, %f0, %f0 + des_round %f44, %f46, %f0, %f0 + des_round %f48, %f50, %f0, %f0 + ldd [$key + 0x100+0x00], %f40 + ldd [$key + 0x100+0x08], %f42 + des_round %f52, %f54, %f0, %f0 + ldd [$key + 0x100+0x10], %f44 + ldd [$key + 0x100+0x18], %f46 + des_round %f56, %f58, %f0, %f0 + ldd [$key + 0x100+0x20], %f48 + ldd [$key + 0x100+0x28], %f50 + des_round %f60, %f62, %f0, %f0 + ldd [$key + 0x100+0x30], %f52 + ldd [$key + 0x100+0x38], %f54 + des_round %f36, %f38, %f0, %f0 + ldd [$key + 0x100+0x40], %f56 + ldd [$key + 0x100+0x48], %f58 + des_iip %f0, %f0 + + ldd [$key + 0x100+0x50], %f60 + ldd [$key + 0x100+0x58], %f62 + des_ip %f0, %f0 + ldd [$key + 0x100+0x60], %f36 + ldd [$key + 0x100+0x68], %f38 + des_round %f40, %f42, %f0, %f0 + ldd [$key + 0x100+0x70], %f40 + ldd [$key + 0x100+0x78], %f42 + des_round %f44, %f46, %f0, %f0 + des_round %f48, %f50, %f0, %f0 + des_round %f52, %f54, %f0, %f0 + des_round %f56, %f58, %f0, %f0 + des_round %f60, %f62, %f0, %f0 + des_round %f36, %f38, %f0, %f0 + des_round %f40, %f42, %f0, %f0 + des_iip %f0, %f0 + + brnz,pn $omask, 2f + sub $len, 1, $len + + std %f0, [$out + 0] + brnz,pt $len, .Ldes_ede3_cbc_enc_loop + add $out, 8, $out + + st %f0, [$ivec + 0] ! write out ivec + retl + st %f1, [$ivec + 4] + +.align 16 +2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard + ! and ~2x deterioration + ! in inp==out case + faligndata %f0, %f0, %f2 ! handle unaligned output + + stda %f2, [$out + $omask]0xc0 ! partial store + add $out, 8, $out + orn %g0, $omask, $omask + stda %f2, [$out + $omask]0xc0 ! partial store + + brnz,pt $len, .Ldes_ede3_cbc_enc_loop+4 + orn %g0, $omask, $omask + + st %f0, [$ivec + 0] ! write out ivec + retl + st %f1, [$ivec + 4] +.type des_t4_ede3_cbc_encrypt,#function +.size des_t4_ede3_cbc_encrypt,.-des_t4_ede3_cbc_encrypt + +.globl des_t4_ede3_cbc_decrypt +.align 32 +des_t4_ede3_cbc_decrypt: + ld [$ivec + 0], %f2 ! load ivec + ld [$ivec + 4], %f3 + + and $inp, 7, $ileft + andn $inp, 7, $inp + sll $ileft, 3, $ileft + mov 0xff, $omask + prefetch [$inp], 20 + prefetch [$inp + 63], 20 + sub %g0, $ileft, $iright + and $out, 7, %g4 + alignaddrl $out, %g0, $out + srl $omask, %g4, $omask + srlx $len, 3, $len + movrz %g4, 0, $omask + prefetch [$out], 22 + + ldd [$key + 0x100+0x78], %f4 ! load key schedule + ldd [$key + 0x100+0x70], %f6 + ldd [$key + 0x100+0x68], %f8 + ldd [$key + 0x100+0x60], %f10 + ldd [$key + 0x100+0x58], %f12 + ldd [$key + 0x100+0x50], %f14 + ldd [$key + 0x100+0x48], %f16 + ldd [$key + 0x100+0x40], %f18 + ldd [$key + 0x100+0x38], %f20 + ldd [$key + 0x100+0x30], %f22 + ldd [$key + 0x100+0x28], %f24 + ldd [$key + 0x100+0x20], %f26 + ldd [$key + 0x100+0x18], %f28 + ldd [$key + 0x100+0x10], %f30 + ldd [$key + 0x100+0x08], %f32 + ldd [$key + 0x100+0x00], %f34 + +.Ldes_ede3_cbc_dec_loop: + ldx [$inp + 0], %g4 + brz,pt $ileft, 4f + nop + + ldx [$inp + 8], %g5 + sllx %g4, $ileft, %g4 + srlx %g5, $iright, %g5 + or %g5, %g4, %g4 +4: + movxtod %g4, %f0 + prefetch [$inp + 8+63], 20 + add $inp, 8, $inp + prefetch [$out + 63], 22 + + des_ip %f0, %f0 + des_round %f4, %f6, %f0, %f0 + des_round %f8, %f10, %f0, %f0 + des_round %f12, %f14, %f0, %f0 + des_round %f16, %f18, %f0, %f0 + ldd [$key + 0x80+0x00], %f36 + ldd [$key + 0x80+0x08], %f38 + des_round %f20, %f22, %f0, %f0 + ldd [$key + 0x80+0x10], %f40 + ldd [$key + 0x80+0x18], %f42 + des_round %f24, %f26, %f0, %f0 + ldd [$key + 0x80+0x20], %f44 + ldd [$key + 0x80+0x28], %f46 + des_round %f28, %f30, %f0, %f0 + ldd [$key + 0x80+0x30], %f48 + ldd [$key + 0x80+0x38], %f50 + des_round %f32, %f34, %f0, %f0 + ldd [$key + 0x80+0x40], %f52 + ldd [$key + 0x80+0x48], %f54 + des_iip %f0, %f0 + + ldd [$key + 0x80+0x50], %f56 + ldd [$key + 0x80+0x58], %f58 + des_ip %f0, %f0 + ldd [$key + 0x80+0x60], %f60 + ldd [$key + 0x80+0x68], %f62 + des_round %f36, %f38, %f0, %f0 + ldd [$key + 0x80+0x70], %f36 + ldd [$key + 0x80+0x78], %f38 + des_round %f40, %f42, %f0, %f0 + des_round %f44, %f46, %f0, %f0 + des_round %f48, %f50, %f0, %f0 + ldd [$key + 0x80-0x08], %f40 + ldd [$key + 0x80-0x10], %f42 + des_round %f52, %f54, %f0, %f0 + ldd [$key + 0x80-0x18], %f44 + ldd [$key + 0x80-0x20], %f46 + des_round %f56, %f58, %f0, %f0 + ldd [$key + 0x80-0x28], %f48 + ldd [$key + 0x80-0x30], %f50 + des_round %f60, %f62, %f0, %f0 + ldd [$key + 0x80-0x38], %f52 + ldd [$key + 0x80-0x40], %f54 + des_round %f36, %f38, %f0, %f0 + ldd [$key + 0x80-0x48], %f56 + ldd [$key + 0x80-0x50], %f58 + des_iip %f0, %f0 + + ldd [$key + 0x80-0x58], %f60 + ldd [$key + 0x80-0x60], %f62 + des_ip %f0, %f0 + ldd [$key + 0x80-0x68], %f36 + ldd [$key + 0x80-0x70], %f38 + des_round %f40, %f42, %f0, %f0 + ldd [$key + 0x80-0x78], %f40 + ldd [$key + 0x80-0x80], %f42 + des_round %f44, %f46, %f0, %f0 + des_round %f48, %f50, %f0, %f0 + des_round %f52, %f54, %f0, %f0 + des_round %f56, %f58, %f0, %f0 + des_round %f60, %f62, %f0, %f0 + des_round %f36, %f38, %f0, %f0 + des_round %f40, %f42, %f0, %f0 + des_iip %f0, %f0 + + fxor %f2, %f0, %f0 ! ^= ivec + movxtod %g4, %f2 + + brnz,pn $omask, 2f + sub $len, 1, $len + + std %f0, [$out + 0] + brnz,pt $len, .Ldes_ede3_cbc_dec_loop + add $out, 8, $out + + st %f2, [$ivec + 0] ! write out ivec + retl + st %f3, [$ivec + 4] + +.align 16 +2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard + ! and ~3x deterioration + ! in inp==out case + faligndata %f0, %f0, %f0 ! handle unaligned output + + stda %f0, [$out + $omask]0xc0 ! partial store + add $out, 8, $out + orn %g0, $omask, $omask + stda %f0, [$out + $omask]0xc0 ! partial store + + brnz,pt $len, .Ldes_ede3_cbc_dec_loop+4 + orn %g0, $omask, $omask + + st %f2, [$ivec + 0] ! write out ivec + retl + st %f3, [$ivec + 4] +.type des_t4_ede3_cbc_decrypt,#function +.size des_t4_ede3_cbc_decrypt,.-des_t4_ede3_cbc_decrypt +___ +} +$code.=<<___; +.asciz "DES for SPARC T4, David S. Miller, Andy Polyakov" +.align 4 +___ + +&emit_assembler(); + +close STDOUT; diff --git a/crypto/evp/e_des.c b/crypto/evp/e_des.c index ca009f2c52..bf4366590f 100644 --- a/crypto/evp/e_des.c +++ b/crypto/evp/e_des.c @@ -65,6 +65,30 @@ #include #include +typedef struct + { + union { double align; DES_key_schedule ks; } ks; + union { + void (*cbc)(const void *,void *,size_t,const void *,void *); + } stream; + } EVP_DES_KEY; + +#if defined(AES_ASM) && (defined(__sparc) || defined(__sparc__)) +/* ---------^^^ this is not a typo, just a way to detect that + * assembler support was in general requested... */ +#include "sparc_arch.h" + +extern unsigned int OPENSSL_sparcv9cap_P[]; + +#define SPARC_DES_CAPABLE (OPENSSL_sparcv9cap_P[1] & CFR_DES) + +void des_t4_key_expand(const void *key, DES_key_schedule *ks); +void des_t4_cbc_encrypt(const void *inp,void *out,size_t len, + DES_key_schedule *ks,unsigned char iv[8]); +void des_t4_cbc_decrypt(const void *inp,void *out,size_t len, + DES_key_schedule *ks,unsigned char iv[8]); +#endif + static int des_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, const unsigned char *iv, int enc); static int des_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr); @@ -99,6 +123,13 @@ static int des_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, static int des_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, const unsigned char *in, size_t inl) { + EVP_DES_KEY *dat = (EVP_DES_KEY *)ctx->cipher_data; + + if (dat->stream.cbc) + { + (*dat->stream.cbc)(in,out,inl,&dat->ks.ks,ctx->iv); + return 1; + } while(inl>=EVP_MAXCHUNK) { DES_ncbc_encrypt(in, out, (long)EVP_MAXCHUNK, ctx->cipher_data, @@ -176,18 +207,18 @@ static int des_cfb8_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, return 1; } -BLOCK_CIPHER_defs(des, DES_key_schedule, NID_des, 8, 8, 8, 64, +BLOCK_CIPHER_defs(des, EVP_DES_KEY, NID_des, 8, 8, 8, 64, EVP_CIPH_RAND_KEY, des_init_key, NULL, EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, des_ctrl) -BLOCK_CIPHER_def_cfb(des,DES_key_schedule,NID_des,8,8,1, +BLOCK_CIPHER_def_cfb(des,EVP_DES_KEY,NID_des,8,8,1, EVP_CIPH_RAND_KEY, des_init_key,NULL, EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv,des_ctrl) -BLOCK_CIPHER_def_cfb(des,DES_key_schedule,NID_des,8,8,8, +BLOCK_CIPHER_def_cfb(des,EVP_DES_KEY,NID_des,8,8,8, EVP_CIPH_RAND_KEY,des_init_key,NULL, EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv,des_ctrl) @@ -196,8 +227,25 @@ static int des_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, const unsigned char *iv, int enc) { DES_cblock *deskey = (DES_cblock *)key; + EVP_DES_KEY *dat = (EVP_DES_KEY *)ctx->cipher_data; + + dat->stream.cbc = NULL; +#if defined(SPARC_DES_CAPABLE) + if (SPARC_DES_CAPABLE) + { + int mode = ctx->cipher->flags & EVP_CIPH_MODE; + + if (mode == EVP_CIPH_CBC_MODE) + { + des_t4_key_expand(key,&dat->ks.ks); + dat->stream.cbc = enc ? des_t4_cbc_encrypt : + des_t4_cbc_decrypt; + return 1; + } + } +#endif #ifdef EVP_CHECK_DES_KEY - if(DES_set_key_checked(deskey,ctx->cipher_data) != 0) + if(DES_set_key_checked(deskey,dat->ks.ks) != 0) return 0; #else DES_set_key_unchecked(deskey,ctx->cipher_data); diff --git a/crypto/evp/e_des3.c b/crypto/evp/e_des3.c index 2f016f0431..6b0bd2b367 100644 --- a/crypto/evp/e_des3.c +++ b/crypto/evp/e_des3.c @@ -67,6 +67,33 @@ #include #include +typedef struct + { + union { double align; DES_key_schedule ks[3]; } ks; + union { + void (*cbc)(const void *,void *,size_t,const void *,void *); + } stream; + } DES_EDE_KEY; +#define ks1 ks.ks[0] +#define ks2 ks.ks[1] +#define ks3 ks.ks[2] + +#if defined(AES_ASM) && (defined(__sparc) || defined(__sparc__)) +/* ---------^^^ this is not a typo, just a way to detect that + * assembler support was in general requested... */ +#include "sparc_arch.h" + +extern unsigned int OPENSSL_sparcv9cap_P[]; + +#define SPARC_DES_CAPABLE (OPENSSL_sparcv9cap_P[1] & CFR_DES) + +void des_t4_key_expand(const void *key, DES_key_schedule *ks); +void des_t4_ede3_cbc_encrypt(const void *inp,void *out,size_t len, + DES_key_schedule *ks,unsigned char iv[8]); +void des_t4_ede3_cbc_decrypt(const void *inp,void *out,size_t len, + DES_key_schedule *ks,unsigned char iv[8]); +#endif + static int des_ede_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, const unsigned char *iv,int enc); @@ -75,13 +102,6 @@ static int des_ede3_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, static int des3_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr); -typedef struct - { - DES_key_schedule ks1;/* key schedule */ - DES_key_schedule ks2;/* key schedule (for ede) */ - DES_key_schedule ks3;/* key schedule (for ede3) */ - } DES_EDE_KEY; - #define data(ctx) ((DES_EDE_KEY *)(ctx)->cipher_data) /* Because of various casts and different args can't use IMPLEMENT_BLOCK_CIPHER */ @@ -121,6 +141,8 @@ static int des_ede_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, static int des_ede_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, const unsigned char *in, size_t inl) { + DES_EDE_KEY *dat = data(ctx); + #ifdef KSSL_DEBUG { int i; @@ -132,10 +154,16 @@ static int des_ede_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, printf("\n"); } #endif /* KSSL_DEBUG */ + if (dat->stream.cbc) + { + (*dat->stream.cbc)(in,out,inl,&dat->ks,ctx->iv); + return 1; + } + if (inl>=EVP_MAXCHUNK) { DES_ede3_cbc_encrypt(in, out, (long)EVP_MAXCHUNK, - &data(ctx)->ks1, &data(ctx)->ks2, &data(ctx)->ks3, + &dat->ks1, &dat->ks2, &dat->ks3, (DES_cblock *)ctx->iv, ctx->encrypt); inl-=EVP_MAXCHUNK; in +=EVP_MAXCHUNK; @@ -143,7 +171,7 @@ static int des_ede_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, } if (inl) DES_ede3_cbc_encrypt(in, out, (long)inl, - &data(ctx)->ks1, &data(ctx)->ks2, &data(ctx)->ks3, + &dat->ks1, &dat->ks2, &dat->ks3, (DES_cblock *)ctx->iv, ctx->encrypt); return 1; } @@ -236,16 +264,35 @@ static int des_ede_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, const unsigned char *iv, int enc) { DES_cblock *deskey = (DES_cblock *)key; + DES_EDE_KEY *dat = data(ctx); + + dat->stream.cbc = NULL; +#if defined(SPARC_DES_CAPABLE) + if (SPARC_DES_CAPABLE) + { + int mode = ctx->cipher->flags & EVP_CIPH_MODE; + + if (mode == EVP_CIPH_CBC_MODE) + { + des_t4_key_expand(&deskey[0],&dat->ks1); + des_t4_key_expand(&deskey[1],&dat->ks2); + memcpy(&dat->ks3,&dat->ks1,sizeof(dat->ks1)); + dat->stream.cbc = enc ? des_t4_ede3_cbc_encrypt : + des_t4_ede3_cbc_decrypt; + return 1; + } + } +#endif #ifdef EVP_CHECK_DES_KEY - if (DES_set_key_checked(&deskey[0],&data(ctx)->ks1) - !! DES_set_key_checked(&deskey[1],&data(ctx)->ks2)) + if (DES_set_key_checked(&deskey[0],&dat->ks1) + !! DES_set_key_checked(&deskey[1],&dat->ks2)) return 0; #else - DES_set_key_unchecked(&deskey[0],&data(ctx)->ks1); - DES_set_key_unchecked(&deskey[1],&data(ctx)->ks2); + DES_set_key_unchecked(&deskey[0],&dat->ks1); + DES_set_key_unchecked(&deskey[1],&dat->ks2); #endif - memcpy(&data(ctx)->ks3,&data(ctx)->ks1, - sizeof(data(ctx)->ks1)); + memcpy(&dat->ks3,&dat->ks1, + sizeof(dat->ks1)); return 1; } @@ -253,6 +300,8 @@ static int des_ede3_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, const unsigned char *iv, int enc) { DES_cblock *deskey = (DES_cblock *)key; + DES_EDE_KEY *dat = data(ctx); + #ifdef KSSL_DEBUG { int i; @@ -264,15 +313,32 @@ static int des_ede3_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, } #endif /* KSSL_DEBUG */ + dat->stream.cbc = NULL; +#if defined(SPARC_DES_CAPABLE) + if (SPARC_DES_CAPABLE) + { + int mode = ctx->cipher->flags & EVP_CIPH_MODE; + + if (mode == EVP_CIPH_CBC_MODE) + { + des_t4_key_expand(&deskey[0],&dat->ks1); + des_t4_key_expand(&deskey[1],&dat->ks2); + des_t4_key_expand(&deskey[2],&dat->ks3); + dat->stream.cbc = enc ? des_t4_ede3_cbc_encrypt : + des_t4_ede3_cbc_decrypt; + return 1; + } + } +#endif #ifdef EVP_CHECK_DES_KEY - if (DES_set_key_checked(&deskey[0],&data(ctx)->ks1) - || DES_set_key_checked(&deskey[1],&data(ctx)->ks2) - || DES_set_key_checked(&deskey[2],&data(ctx)->ks3)) + if (DES_set_key_checked(&deskey[0],&dat->ks1) + || DES_set_key_checked(&deskey[1],&dat->ks2) + || DES_set_key_checked(&deskey[2],&dat->ks3)) return 0; #else - DES_set_key_unchecked(&deskey[0],&data(ctx)->ks1); - DES_set_key_unchecked(&deskey[1],&data(ctx)->ks2); - DES_set_key_unchecked(&deskey[2],&data(ctx)->ks3); + DES_set_key_unchecked(&deskey[0],&dat->ks1); + DES_set_key_unchecked(&deskey[1],&dat->ks2); + DES_set_key_unchecked(&deskey[2],&dat->ks3); #endif return 1; } diff --git a/crypto/perlasm/sparcv9_modes.pl b/crypto/perlasm/sparcv9_modes.pl index 363da32d59..a13ffcec2d 100644 --- a/crypto/perlasm/sparcv9_modes.pl +++ b/crypto/perlasm/sparcv9_modes.pl @@ -1582,6 +1582,63 @@ my %movxopf = ( "movdtox" => 0x110, } } +sub undes { +my ($mnemonic)=shift; +my @args=@_; +my ($ref,$opf); +my %desopf = ( "des_round" => 0b1001, + "des_ip" => 0b100110100, + "des_iip" => 0b100110101, + "des_kexpand" => 0b100110110 ); + + $ref = "$mnemonic\t".join(",",@_); + + if (defined($opf=$desopf{$mnemonic})) { # 4-arg + if ($mnemonic eq "des_round") { + foreach (@args[0..3]) { + return $ref if (!/%f([0-9]{1,2})/); + $_=$1; + if ($1>=32) { + return $ref if ($1&1); + # re-encode for upper double register addressing + $_=($1|$1>>5)&31; + } + } + return sprintf ".word\t0x%08x !%s", + 2<<30|0b011001<<19|$opf<<5|$args[0]<<14|$args[1]|$args[2]<<9|$args[3]<<25, + $ref; + } elsif ($mnemonic eq "des_kexpand") { # 3-arg + foreach (@args[0..2]) { + return $ref if (!/(%f)?([0-9]{1,2})/); + $_=$2; + if ($2>=32) { + return $ref if ($2&1); + # re-encode for upper double register addressing + $_=($2|$2>>5)&31; + } + } + return sprintf ".word\t0x%08x !%s", + 2<<30|0b110110<<19|$opf<<5|$args[0]<<14|$args[1]|$args[2]<<25, + $ref; + } else { # 2-arg + foreach (@args[0..1]) { + return $ref if (!/%f([0-9]{1,2})/); + $_=$1; + if ($1>=32) { + return $ref if ($2&1); + # re-encode for upper double register addressing + $_=($1|$1>>5)&31; + } + } + return sprintf ".word\t0x%08x !%s", + 2<<30|0b110110<<19|$opf<<5|$args[0]<<14|$args[1]<<25, + $ref; + } + } else { + return $ref; + } +} + sub emit_assembler { foreach (split("\n",$::code)) { s/\`([^\`]*)\`/eval $1/ge; @@ -1600,6 +1657,9 @@ sub emit_assembler { s/\b(camellia_[^s]+)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/ &uncamellia3($1,$2,$3,$4) /geo or + s/\b(des_\w+)\s+(?%f[0-9]{1,2}),\s*(?[%fx0-9]+)(,\s*(?%f[0-9]{1,2})(,\s*(?%f[0-9]{1,2}))?)?/ + &undes($1,$+{rs1},$+{rs2},$+{rs3},$+{rs4}) + /geo or s/\b(mov[ds]to\w+)\s+(%f[0-9]{1,2}),\s*(%[goli][0-7])/ &unmovxtox($1,$2,$3) /geo or diff --git a/test/test_t4 b/test/test_t4 index 826e1fd779..5cecb56394 100755 --- a/test/test_t4 +++ b/test/test_t4 @@ -18,7 +18,8 @@ if [ 1 ]; then HASH=`cat $PROG | $PROG dgst -hex` - AES_ALGS=" camellia-128-cbc camellia-128-cfb \ + AES_ALGS=" des-cbc des-ede-cbc des-ede3-cbc \ + camellia-128-cbc camellia-128-cfb \ camellia-192-cbc camellia-192-cfb \ camellia-256-cbc camellia-256-cfb \ aes-128-ctr aes-128-cbc aes-128-cfb aes-128-ofb \