Two extra instructions in RC4 character loop give 80% performance
[openssl.git] / crypto / rc4 / asm / rc4-x86_64.pl
index 4b990cba077e21ac5233724af0e1f1d9d1045dcf..5236afec12b173a642274caf34169b5d04a057eb 100755 (executable)
@@ -221,6 +221,8 @@ $code.=<<___;
        movb    $TY#b,($dat,$XX[0])
        add     $TX[0]#b,$TY#b
        add     \$1,$XX[0]#b
+       movzb   $TY#b,$TY#d
+       movzb   $XX[0]#b,$XX[0]#d
        movzb   ($dat,$TY),$TY#d
        movzb   ($dat,$XX[0]),$TX[0]#d
        xorb    ($inp),$TY#b