bn/asm/bn-c64xplus.asm: update commentary.
authorAndy Polyakov <appro@openssl.org>
Mon, 21 Dec 2015 13:26:12 +0000 (14:26 +0100)
committerAndy Polyakov <appro@openssl.org>
Tue, 22 Dec 2015 09:30:03 +0000 (10:30 +0100)
Reviewed-by: Rich Salz <rsalz@openssl.org>
crypto/bn/asm/bn-c64xplus.asm

index 7cd58817e72790b6df43bec01c5c07260922c16f..7b72bff4acda60849be7102be55bed81b1fd42d3 100644 (file)
@@ -284,8 +284,9 @@ _bn_mul_comba4:
        .if     0
        BNOP    sploopNxM?,3
        ;; Above mentioned m*2*(n+1)+10 does not apply in n=m=4 case,
-       ;; because of read-after-write penalties, it's rather
-       ;; n*2*(n+3)+10, or 66 cycles [plus various overheads]...
+       ;; because of low-counter effect, when prologue phase finishes
+       ;; before SPKERNEL instruction is reached. As result it's 25%
+       ;; slower than expected...
        MVK     4,B0            ; N, RILC
 ||     MVK     4,A0            ; M, outer loop counter
 ||     MV      ARG1,A5         ; copy ap