ec/asm/ecp_nistz256-x86_64.pl: get corner case logic right.
authorAndy Polyakov <appro@openssl.org>
Wed, 3 Feb 2016 10:26:11 +0000 (11:26 +0100)
committerAndy Polyakov <appro@openssl.org>
Tue, 23 Feb 2016 20:22:01 +0000 (21:22 +0100)
RT#4284

Reviewed-by: Rich Salz <rsalz@openssl.org>
crypto/ec/asm/ecp_nistz256-x86_64.pl

index c2621c2..824d7b5 100755 (executable)
@@ -2044,6 +2044,7 @@ $code.=<<___;
        push    %r15
        sub     \$32*5+8, %rsp
 
+.Lpoint_double_shortcut$x:
        movdqu  0x00($a_ptr), %xmm0             # copy  *(P256_POINT *)$a_ptr.x
        mov     $a_ptr, $b_ptr                  # backup copy
        movdqu  0x10($a_ptr), %xmm1
@@ -2334,6 +2335,7 @@ $code.=<<___;
         mov    0x40+8*1($b_ptr), $acc6
         mov    0x40+8*2($b_ptr), $acc7
         mov    0x40+8*3($b_ptr), $acc0
+       movq    $b_ptr, %xmm1
 
        lea     0x40-$bias($b_ptr), $a_ptr
        lea     $Z1sqr(%rsp), $r_ptr            # Z1^2
@@ -2389,7 +2391,7 @@ $code.=<<___;
        test    $acc0, $acc0
        jnz     .Ladd_proceed$x                 # (in1infty || in2infty)?
        test    $acc1, $acc1
-       jz      .Ladd_proceed$x                 # is_equal(S1,S2)?
+       jz      .Ladd_double$x                  # is_equal(S1,S2)?
 
        movq    %xmm0, $r_ptr                   # restore $r_ptr
        pxor    %xmm0, %xmm0
@@ -2401,6 +2403,13 @@ $code.=<<___;
        movdqu  %xmm0, 0x50($r_ptr)
        jmp     .Ladd_done$x
 
+.align 32
+.Ladd_double$x:
+       movq    %xmm1, $a_ptr                   # restore $a_ptr
+       movq    %xmm0, $r_ptr                   # restore $r_ptr
+       add     \$`32*(18-5)`, %rsp             # difference in frame sizes
+       jmp     .Lpoint_double_shortcut$x
+
 .align 32
 .Ladd_proceed$x:
        `&load_for_sqr("$R(%rsp)", "$src0")`