+45% RC4 performance boost on Intel EM64T core. Unrolled loop providing
authorAndy Polyakov <appro@openssl.org>
Wed, 6 Apr 2005 09:45:42 +0000 (09:45 +0000)
committerAndy Polyakov <appro@openssl.org>
Wed, 6 Apr 2005 09:45:42 +0000 (09:45 +0000)
further +35% will follow...

Submitted by: Zou Nanhai

crypto/rc4/asm/rc4-amd64.pl

index 9e0da8af995604f1f5343fe3b5034b15d0d9657d..2d3dedde0351131848dd48a1287dae67e21877a0 100755 (executable)
@@ -181,7 +181,7 @@ $code.=<<___;
 
 .align 16
 .LRC4_CHAR:
-       inc     $XX#b
+       add     \$1,$XX#b
        movzb   `&PTR("BYTE:[$dat+$XX]")`,$TX#d
        add     $TX#b,$YY#b
        movzb   `&PTR("BYTE:[$dat+$YY]")`,$TY#d
@@ -191,9 +191,9 @@ $code.=<<___;
        movzb   `&PTR("BYTE:[$dat+$TY]")`,$TY#d
        xorb    `&PTR("BYTE:[$inp]")`,$TY#b
        movb    $TY#b,`&PTR("BYTE:[$out]")`
-       inc     $inp
-       inc     $out
-       dec     $len
+       lea     1($inp),$inp
+       lea     1($out),$out
+       sub     \$1,$len
        jnz     .LRC4_CHAR
        jmp     .Lexit
 ___