ec/asm/x25519-x86_64.pl: remove redundant carry chain.
authorAndy Polyakov <appro@openssl.org>
Tue, 27 Feb 2018 18:02:21 +0000 (19:02 +0100)
committerAndy Polyakov <appro@openssl.org>
Thu, 1 Mar 2018 12:59:28 +0000 (13:59 +0100)
Why is it redundant? We're looking at carry from addition of small,
11-bit number to 256-bit one. And carry would mean only one thing,
resulting first limb being small number and remaing ones - zeros.
Hence adding 38 to first limb can't carry.

Reviewed-by: Rich Salz <rsalz@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/5476)

crypto/ec/asm/x25519-x86_64.pl

index 9a5693965efcf1809c26bd867081cdb4640d5697..930d7bdba1007d4d868faf46ee378c8d4c387fe5 100755 (executable)
 # P4                   +22%            +40%
 # Sandy Bridge         -3%             +11%
 # Haswell              -1%             +13%
-# Broadwell(***)       +26%            +30%
-# Skylake(***)         +30%            +47%
+# Broadwell(***)       +30%            +35%
+# Skylake(***)         +33%            +47%
 # Silvermont           +20%            +26%
 # Goldmont             +40%            +50%
 # Bulldozer            +20%            +9%
-# Ryzen(***)           +35%            +32%
+# Ryzen(***)           +43%            +40%
 # VIA                  +170%           +120%
 #
 # (*)  amd64-51 is popular assembly implementation with 2^51 radix,
@@ -631,13 +631,10 @@ x25519_fe64_sqr:
        and     \$38,%rax
 
        add     %rax,$acc0
-       adc     \$0,$acc1
-       mov     $acc0,8*0(%rdi)
-       adc     \$0,$acc2
        mov     $acc1,8*1(%rdi)
-       adc     \$0,$acc3
        mov     $acc2,8*2(%rdi)
        mov     $acc3,8*3(%rdi)
+       mov     $acc0,8*0(%rdi)
 
        mov     8*3(%rsp),%r15
        mov     8*4(%rsp),%r14
@@ -674,13 +671,10 @@ x25519_fe64_mul121666:
        and     \$38,%rax
 
        add     %rax,$acc0
-       adc     \$0,$acc1
-       mov     $acc0,8*0(%rdi)
-       adc     \$0,$acc2
        mov     $acc1,8*1(%rdi)
-       adc     \$0,$acc3
        mov     $acc2,8*2(%rdi)
        mov     $acc3,8*3(%rdi)
+       mov     $acc0,8*0(%rdi)
 
        ret
 .size  x25519_fe64_mul121666,.-x25519_fe64_mul121666
@@ -769,14 +763,11 @@ x25519_fe64_tobytes:
        and     \$19,%rax
 
        add     %rax,$acc0
-       adc     \$0,$acc1
-       adc     \$0,$acc2
-       adc     \$0,$acc3
 
-       mov     $acc0,8*0(%rdi)
        mov     $acc1,8*1(%rdi)
        mov     $acc2,8*2(%rdi)
        mov     $acc3,8*3(%rdi)
+       mov     $acc0,8*0(%rdi)
 
        ret
 .size  x25519_fe64_tobytes,.-x25519_fe64_tobytes