ec/asm/x25519-x86_64.pl: fix base 2^64 add/sub and final reduction.
authorAndy Polyakov <appro@openssl.org>
Wed, 11 Jul 2018 20:08:02 +0000 (22:08 +0200)
committerAndy Polyakov <appro@openssl.org>
Sun, 15 Jul 2018 17:04:48 +0000 (19:04 +0200)
Base 2^64 addition/subtraction and final reduction failed to treat
partially reduced values correctly.

Thanks to Wycheproof Project for vectors and Paul Kehrer for report.

Reviewed-by: Rich Salz <rsalz@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/6699)

crypto/ec/asm/x25519-x86_64.pl

index 930d7bdba1007d4d868faf46ee378c8d4c387fe5..0936d5bc6da83f9b7ebb9abcc11977b467ee311c 100755 (executable)
@@ -698,12 +698,16 @@ x25519_fe64_add:
 
        add     %rax,$acc0
        adc     \$0,$acc1
-       mov     $acc0,8*0(%rdi)
        adc     \$0,$acc2
        mov     $acc1,8*1(%rdi)
        adc     \$0,$acc3
        mov     $acc2,8*2(%rdi)
+       sbb     %rax,%rax               # cf -> mask
        mov     $acc3,8*3(%rdi)
+       and     \$38,%rax
+
+       add     %rax,$acc0
+       mov     $acc0,8*0(%rdi)
 
        ret
 .size  x25519_fe64_add,.-x25519_fe64_add
@@ -727,12 +731,16 @@ x25519_fe64_sub:
 
        sub     %rax,$acc0
        sbb     \$0,$acc1
-       mov     $acc0,8*0(%rdi)
        sbb     \$0,$acc2
        mov     $acc1,8*1(%rdi)
        sbb     \$0,$acc3
        mov     $acc2,8*2(%rdi)
+       sbb     %rax,%rax               # cf -> mask
        mov     $acc3,8*3(%rdi)
+       and     \$38,%rax
+
+       sub     %rax,$acc0
+       mov     $acc0,8*0(%rdi)
 
        ret
 .size  x25519_fe64_sub,.-x25519_fe64_sub
@@ -751,6 +759,7 @@ x25519_fe64_tobytes:
        sar     \$63,$acc3              # most significant bit -> mask
        shr     \$1,%rax                # most significant bit cleared
        and     \$19,$acc3
+       add     \$19,$acc3              # compare to modulus in the same go
 
        add     $acc3,$acc0
        adc     \$0,$acc1
@@ -760,14 +769,18 @@ x25519_fe64_tobytes:
        lea     (%rax,%rax),$acc3
        sar     \$63,%rax               # most significant bit -> mask
        shr     \$1,$acc3               # most significant bit cleared
+       not     %rax
        and     \$19,%rax
 
-       add     %rax,$acc0
+       sub     %rax,$acc0
+       sbb     \$0,$acc1
+       sbb     \$0,$acc2
+       sbb     \$0,$acc3
 
+       mov     $acc0,8*0(%rdi)
        mov     $acc1,8*1(%rdi)
        mov     $acc2,8*2(%rdi)
        mov     $acc3,8*3(%rdi)
-       mov     $acc0,8*0(%rdi)
 
        ret
 .size  x25519_fe64_tobytes,.-x25519_fe64_tobytes