From: Andy Polyakov Date: Sun, 26 Sep 1999 12:47:17 +0000 (+0000) Subject: RC4 tune-up. X-Git-Tag: OpenSSL_0_9_5beta1~500 X-Git-Url: https://git.openssl.org/?p=openssl.git;a=commitdiff_plain;h=6cc4ee03df4be25f3be44ae20bc8750b90c15705;ds=sidebyside RC4 tune-up. See comments in the code (after #if defined(RC4_CHUNK)) for more details. --- diff --git a/crypto/rc4/rc4_enc.c b/crypto/rc4/rc4_enc.c index 3256bea8cc..93a75cd8f9 100644 --- a/crypto/rc4/rc4_enc.c +++ b/crypto/rc4/rc4_enc.c @@ -78,6 +78,103 @@ void RC4(RC4_KEY *key, unsigned long len, unsigned char *indata, y=key->y; d=key->data; +#if defined(RC4_CHUNK) && (defined(L_ENDIAN) || defined(B_ENDIAN)) + /* + * The original reason for implementing this(*) was the fact that + * pre-21164a Alpha CPUs don't have byte load/store instructions + * and e.g. a byte store has to be done with 64-bit load, shift, + * and, or and finally 64-bit store. Peaking data and operating + * at natural word size made it possible to reduce amount of + * instructions as well as to perform early read-ahead without + * suffering from RAW (read-after-write) hazard. This resulted + * in >40%(**) performance improvement (on 21064 box with gcc). + * But it's not only Alpha users who win here:-) Thanks to the + * early-n-wide read-ahead this implementation also exhibits + * >40% speed-up on SPARC and almost 20% on MIPS. + * + * (*) "this" means code which recognizes the case when input + * and output pointers appear to be aligned at natural CPU + * word boundary. + * (**) i.e. according to 'apps/openssl speed rc4' benchmark, + * crypto/rc4/rc4speed.c exhibits almost 70% speed-up. + * + * + */ + +#define RC4_STEP ( \ + x=(x+1) &0xff, \ + tx=d[x], \ + y=(tx+y)&0xff, \ + ty=d[y], \ + d[y]=tx, \ + d[x]=ty, \ + (RC4_CHUNK)d[(tx+ty)&0xff]\ + ) + +#if defined(L_ENDIAN) +# define SHFT(c) ((c)*8) +# define MASK(i) (((RC4_CHUNK)-1)>>((sizeof(RC4_CHUNK)-(i))<<3)) +# define SHINC 8 +#elif defined(B_ENDIAN) +# define SHFT(c) ((sizeof(RC4_CHUNK)-(c)-1)*8) +# define MASK(i) (((RC4_CHUNK)-1)<<((sizeof(RC4_CHUNK)-(i))<<3)) +# define SHINC -8 +#else +# error "L_ENDIAN or B_ENDIAN *must* be defined!" +#endif + + if ( ( ((unsigned long)indata & (sizeof(RC4_CHUNK)-1)) | + ((unsigned long)outdata & (sizeof(RC4_CHUNK)-1)) ) == 0 + ) { + RC4_CHUNK ichunk,cipher; + + for (;len&-sizeof(RC4_CHUNK);len-=sizeof(RC4_CHUNK)) { + ichunk = *(RC4_CHUNK *)indata; + cipher = RC4_STEP<x=x; key->y=y; }