From: Andy Polyakov Date: Sun, 15 May 2005 22:43:00 +0000 (+0000) Subject: +20% performance improvement of P4-specific RC4_CHAR loop. X-Git-Tag: OpenSSL_0_9_8-beta1~13^2~19 X-Git-Url: https://git.openssl.org/?p=openssl.git;a=commitdiff_plain;h=804515425aa520a186c4d1b919739d1a04d782e5;hp=81a86fcf1729dea3c62db53af8890d701870c5ce +20% performance improvement of P4-specific RC4_CHAR loop. --- diff --git a/crypto/rc4/asm/rc4-586.pl b/crypto/rc4/asm/rc4-586.pl index d6e98f0811..22bda4b451 100644 --- a/crypto/rc4/asm/rc4-586.pl +++ b/crypto/rc4/asm/rc4-586.pl @@ -200,22 +200,23 @@ sub RC4 &lea ($ty,&DWP(0,$in,$ty)); &mov (&swtmp(2),$ty); + &movz ($tx,&BP(0,$d,$x)); # strangely enough unrolled loop performs over 20% slower... &set_label("RC4_CHAR_loop"); - &movz ($tx,&BP(0,$d,$x)); &add (&LB($y),&LB($tx)); &movz ($ty,&BP(0,$d,$y)); &movb (&BP(0,$d,$y),&LB($tx)); &movb (&BP(0,$d,$x),&LB($ty)); &add (&LB($ty),&LB($tx)); &movz ($ty,&BP(0,$d,$ty)); + &add (&LB($x),1); &xorb (&LB($ty),&BP(0,$in)); - &movb (&BP(0,$out),&LB($ty)); - &inc (&LB($x)); - &inc ($in); - &inc ($out); + &lea ($in,&BP(1,$in)); + &movz ($tx,&BP(0,$d,$x)); &cmp ($in,&swtmp(2)); + &movb (&BP(0,$out),&LB($ty)); + &lea ($out,&BP(1,$out)); &jb (&label("RC4_CHAR_loop")); &set_label("finished");