RC4 for s390x.
authorAndy Polyakov <appro@openssl.org>
Wed, 11 Feb 2009 10:01:36 +0000 (10:01 +0000)
committerAndy Polyakov <appro@openssl.org>
Wed, 11 Feb 2009 10:01:36 +0000 (10:01 +0000)
Configure
TABLE
crypto/rc4/Makefile
crypto/rc4/asm/rc4-s390x.pl [new file with mode: 0644]

index 31f06dcb4cb09e42b454ee5e8cd87274456f4359..700982af550294d09acc1a314f7f0b19b916b5a3 100755 (executable)
--- a/Configure
+++ b/Configure
@@ -131,7 +131,7 @@ my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-
 my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::void";
 my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o::::::::::::void";
 my $mips3_asm=":bn-mips3.o::::::::::::void";
-my $s390x_asm="s390xcpuid.o:bn-s390x.o s390x-mont.o::aes-s390x.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o:::::::void";
+my $s390x_asm="s390xcpuid.o:bn-s390x.o s390x-mont.o::aes-s390x.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4-s390x.o:::::void";
 my $armv4_asm=":bn_asm.o armv4-mont.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::void";
 my $ppc32_asm="ppccpuid.o:bn-ppc.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o::::::";
 my $ppc64_asm="ppccpuid.o:bn-ppc.o ppc-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o::::::";
diff --git a/TABLE b/TABLE
index e3caccaa012b65b61adc13d64cd6c1321633f0be..eedbecced9c8e0c300796a007b6e693051a5b477 100644 (file)
--- a/TABLE
+++ b/TABLE
@@ -3550,7 +3550,7 @@ $bf_obj       =
 $md5_obj      = 
 $sha1_obj     = sha1-s390x.o sha256-s390x.o sha512-s390x.o
 $cast_obj     = 
-$rc4_obj      = 
+$rc4_obj      = rc4-s390x.o
 $rmd160_obj   = 
 $rc5_obj      = 
 $wp_obj       = 
index f932a8903567cf400b6471722448b1642d8bd5bc..264451a213f9db1b53a3b4ddd3611042049c236a 100644 (file)
@@ -50,6 +50,9 @@ rc4-x86_64.s: asm/rc4-x86_64.pl
 rc4-ia64.S: asm/rc4-ia64.pl
        $(PERL) asm/rc4-ia64.pl $(CFLAGS) > $@
 
+rc4-s390x.s:   asm/rc4-s390x.pl
+       $(PERL) asm/rc4-s390x.pl > $@
+
 rc4-ia64.s: rc4-ia64.S
        @case `awk '/^#define RC4_INT/{print$$NF}' $(TOP)/include/openssl/opensslconf.h` in \
        int)    set -x; $(CC) $(CFLAGS) -DSZ=4 -E rc4-ia64.S > $@ ;; \
diff --git a/crypto/rc4/asm/rc4-s390x.pl b/crypto/rc4/asm/rc4-s390x.pl
new file mode 100644 (file)
index 0000000..4366c4f
--- /dev/null
@@ -0,0 +1,205 @@
+#!/usr/bin/env perl
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# February 2009
+#
+# Performance is 2x of gcc 3.4.6 on z10. Coding "secret" is to avoid
+# arithmetic instructions, but adhere to load and load address in
+# order to minimize Address Generation Interlock.
+
+$rp="%r14";
+$sp="%r15";
+$code=<<___;
+.text
+
+___
+
+# void RC4(RC4_KEY *key,size_t len,const void *inp,void *out)
+{
+$acc="%r0";
+$cnt="%r1";
+$key="%r2";
+$len="%r3";
+$inp="%r4";
+$out="%r5";
+
+@XX=("%r6","%r7");
+@TX=("%r8","%r9");
+$YY="%r10";
+$TY="%r11";
+
+$code.=<<___;
+.globl RC4
+.type  RC4,\@function
+.align 64
+RC4:
+       stmg    %r6,%r11,48($sp)
+       llgc    $XX[0],0($key)
+       llgc    $YY,1($key)
+       la      $XX[0],1($XX[0])
+       llgcr   $XX[0],$XX[0]
+       llgc    $TX[0],2($XX[0],$key)
+       srlg    $cnt,$len,3
+       ltgr    $cnt,$cnt
+       jz      .Lshort
+       j       .Loop8
+
+.align 64
+.Loop8:
+___
+for ($i=0;$i<8;$i++) {
+$code.=<<___;
+       la      $YY,0($YY,$TX[0])       # $i
+       llgcr   $YY,$YY
+       la      $XX[1],1($XX[0])
+       llgcr   $XX[1],$XX[1]
+___
+$code.=<<___ if ($i>1);
+       sllg    $acc,$acc,8
+       ic      $acc,2($TY,$key)
+___
+$code.=<<___ if ($i==1);
+       llgc    $acc,2($TY,$key)
+___
+$code.=<<___;
+       llgc    $TY,2($YY,$key)
+       stc     $TX[0],2($YY,$key)
+       llgc    $TX[1],2($XX[1],$key)
+       stc     $TY,2($XX[0],$key)
+       cr      $XX[1],$YY
+       jne     .Lcmov$i
+       la      $TX[1],0($TX[0])
+.Lcmov$i:
+       la      $TY,0($TY,$TX[0])
+       llgcr   $TY,$TY
+___
+push(@TX,shift(@TX)); push(@XX,shift(@XX));     # "rotate" registers
+}
+
+$code.=<<___;
+       lg      $TX[1],0($inp)
+       sllg    $acc,$acc,8
+       la      $inp,8($inp)
+       ic      $acc,2($TY,$key)
+       xgr     $acc,$TX[1]
+       stg     $acc,0($out)
+       la      $out,8($out)
+       brct    $cnt,.Loop8
+
+.Lshort:
+       lghi    $acc,7
+       ngr     $len,$acc
+       jz      .Lexit
+       j       .Loop1
+
+.align 16
+.Loop1:
+       la      $YY,0($YY,$TX[0])
+       llgcr   $YY,$YY
+       llgc    $TY,2($YY,$key)
+       stc     $TX[0],2($YY,$key)
+       stc     $TY,2($XX[0],$key)
+       la      $TY,0($TY,$TX[0])
+       llgcr   $TY,$TY
+       la      $XX[0],1($XX[0])
+       llgcr   $XX[0],$XX[0]
+       llgc    $TY,2($TY,$key)
+       llgc    $TX[0],2($XX[0],$key)
+       llgc    $acc,0($inp)
+       la      $inp,1($inp)
+       xr      $acc,$TY
+       stc     $acc,0($out)
+       la      $out,1($out)
+       brct    $len,.Loop1
+
+.Lexit:
+       ahi     $XX[0],-1
+       stc     $XX[0],0($key)
+       stc     $YY,1($key)
+       lmg     %r6,%r11,48($sp)
+       br      $rp
+.size  RC4,.-RC4
+.string        "RC4 for s390x, CRYPTOGAMS by <appro\@openssl.org>"
+
+___
+}
+
+# void RC4_set_key(RC4_KEY *key,unsigned int len,const void *inp)
+{
+$cnt="%r0";
+$idx="%r1";
+$key="%r2";
+$len="%r3";
+$inp="%r4";
+$acc="%r5";
+$dat="%r6";
+$ikey="%r7";
+$iinp="%r8";
+
+$code.=<<___;
+.globl RC4_set_key
+.type  RC4_set_key,\@function
+.align 64
+RC4_set_key:
+       stmg    %r6,%r8,48($sp)
+       lhi     $cnt,256
+       la      $idx,0(%r0)
+       sth     $idx,0($key)
+.align 4
+.L1stloop:
+       stc     $idx,2($idx,$key)
+       la      $idx,1($idx)
+       brct    $cnt,.L1stloop
+
+       lghi    $ikey,-256
+       lr      $cnt,$len
+       la      $iinp,0(%r0)
+       la      $idx,0(%r0)
+.align 16
+.L2ndloop:
+       llgc    $acc,2+256($ikey,$key)
+       llgc    $dat,0($iinp,$inp)
+       la      $idx,0($idx,$acc)
+       la      $ikey,1($ikey)
+       la      $idx,0($idx,$dat)
+       la      $iinp,1($iinp)
+       llgcr   $idx,$idx
+       tml     $ikey,255
+       llgc    $dat,2($idx,$key)
+       stc     $dat,2+256-1($ikey,$key)
+       stc     $acc,2($idx,$key)
+       jz      .Ldone
+       brct    $cnt,.L2ndloop
+       lr      $cnt,$len
+       la      $iinp,0(%r0)
+       j       .L2ndloop
+.Ldone:
+       lmg     %r6,%r8,48($sp)
+       br      $rp
+.size  RC4_set_key,.-RC4_set_key
+
+___
+}
+
+# const char *RC4_options()
+$code.=<<___;
+.globl RC4_options
+.type  RC4_options,\@function
+.align 16
+RC4_options:
+       larl    %r2,.Loptions
+       br      %r14
+.size  RC4_options,.-RC4_options
+.section       .rodata
+.Loptions:
+.align 8
+.string        "rc4(8x,char)"
+___
+
+print $code;