X-Git-Url: https://git.openssl.org/?p=openssl.git;a=blobdiff_plain;f=crypto%2Frc4%2Frc4_enc.c;h=93a75cd8f9f0305cf7b600a01cb5d5608d621538;hp=de57a970bf81013015bdab6b576fbe207b4e7a22;hb=6cc4ee03df4be25f3be44ae20bc8750b90c15705;hpb=d02b48c63a58ea4367a0e905979f140b7d090f86 diff --git a/crypto/rc4/rc4_enc.c b/crypto/rc4/rc4_enc.c index de57a970bf..93a75cd8f9 100644 --- a/crypto/rc4/rc4_enc.c +++ b/crypto/rc4/rc4_enc.c @@ -1,5 +1,5 @@ -/* crypto/rc4/rc4_enc.org */ -/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com) +/* crypto/rc4/rc4_enc.c */ +/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) * All rights reserved. * * This package is an SSL implementation written @@ -56,35 +56,8 @@ * [including the GNU Public Licence.] */ -/* WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING - * - * Always modify rc4_enc.org since rc4_enc.c is automatically generated from - * it during SSLeay configuration. - * WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING - */ - -#include "rc4.h" - -/* if this is defined data[i] is used instead of *data, this is a %20 - * speedup on x86 */ -#undef RC4_INDEX - -char *RC4_version="RC4 part of SSLeay 0.8.1a 24-Jun-1998"; - -char *RC4_options() - { -#ifdef RC4_INDEX - if (sizeof(RC4_INT) == 1) - return("rc4(idx,char)"); - else - return("rc4(idx,int)"); -#else - if (sizeof(RC4_INT) == 1) - return("rc4(ptr,char)"); - else - return("rc4(ptr,int)"); -#endif - } +#include +#include "rc4_locl.h" /* RC4 as implemented from a posting from * Newsgroups: sci.crypt @@ -94,44 +67,8 @@ char *RC4_options() * Date: Wed, 14 Sep 1994 06:35:31 GMT */ -void RC4_set_key(key, len, data) -RC4_KEY *key; -int len; -register unsigned char *data; - { - register RC4_INT tmp; - register int id1,id2; - register RC4_INT *d; - unsigned int i; - - d= &(key->data[0]); - for (i=0; i<256; i++) - d[i]=i; - key->x = 0; - key->y = 0; - id1=id2=0; - -#define SK_LOOP(n) { \ - tmp=d[(n)]; \ - id2 = (data[id1] + tmp + id2) & 0xff; \ - if (++id1 == len) id1=0; \ - d[(n)]=d[id2]; \ - d[id2]=tmp; } - - for (i=0; i < 256; i+=4) - { - SK_LOOP(i+0); - SK_LOOP(i+1); - SK_LOOP(i+2); - SK_LOOP(i+3); - } - } - -void RC4(key, len, indata, outdata) -RC4_KEY *key; -unsigned long len; -unsigned char *indata; -unsigned char *outdata; +void RC4(RC4_KEY *key, unsigned long len, unsigned char *indata, + unsigned char *outdata) { register RC4_INT *d; register RC4_INT x,y,tx,ty; @@ -141,6 +78,103 @@ unsigned char *outdata; y=key->y; d=key->data; +#if defined(RC4_CHUNK) && (defined(L_ENDIAN) || defined(B_ENDIAN)) + /* + * The original reason for implementing this(*) was the fact that + * pre-21164a Alpha CPUs don't have byte load/store instructions + * and e.g. a byte store has to be done with 64-bit load, shift, + * and, or and finally 64-bit store. Peaking data and operating + * at natural word size made it possible to reduce amount of + * instructions as well as to perform early read-ahead without + * suffering from RAW (read-after-write) hazard. This resulted + * in >40%(**) performance improvement (on 21064 box with gcc). + * But it's not only Alpha users who win here:-) Thanks to the + * early-n-wide read-ahead this implementation also exhibits + * >40% speed-up on SPARC and almost 20% on MIPS. + * + * (*) "this" means code which recognizes the case when input + * and output pointers appear to be aligned at natural CPU + * word boundary. + * (**) i.e. according to 'apps/openssl speed rc4' benchmark, + * crypto/rc4/rc4speed.c exhibits almost 70% speed-up. + * + * + */ + +#define RC4_STEP ( \ + x=(x+1) &0xff, \ + tx=d[x], \ + y=(tx+y)&0xff, \ + ty=d[y], \ + d[y]=tx, \ + d[x]=ty, \ + (RC4_CHUNK)d[(tx+ty)&0xff]\ + ) + +#if defined(L_ENDIAN) +# define SHFT(c) ((c)*8) +# define MASK(i) (((RC4_CHUNK)-1)>>((sizeof(RC4_CHUNK)-(i))<<3)) +# define SHINC 8 +#elif defined(B_ENDIAN) +# define SHFT(c) ((sizeof(RC4_CHUNK)-(c)-1)*8) +# define MASK(i) (((RC4_CHUNK)-1)<<((sizeof(RC4_CHUNK)-(i))<<3)) +# define SHINC -8 +#else +# error "L_ENDIAN or B_ENDIAN *must* be defined!" +#endif + + if ( ( ((unsigned long)indata & (sizeof(RC4_CHUNK)-1)) | + ((unsigned long)outdata & (sizeof(RC4_CHUNK)-1)) ) == 0 + ) { + RC4_CHUNK ichunk,cipher; + + for (;len&-sizeof(RC4_CHUNK);len-=sizeof(RC4_CHUNK)) { + ichunk = *(RC4_CHUNK *)indata; + cipher = RC4_STEP<>3L); if (i) { @@ -190,6 +223,7 @@ unsigned char *outdata; RC4_LOOP(indata,outdata,6); if (--i == 0) break; } } + } key->x=x; key->y=y; }