ec/asm/ecp_nistz256-{!x86_64}.pl: fix scatter_w7 function.
authorAndy Polyakov <appro@openssl.org>
Wed, 18 Jul 2018 13:14:44 +0000 (15:14 +0200)
committerAndy Polyakov <appro@openssl.org>
Sun, 22 Jul 2018 13:21:44 +0000 (15:21 +0200)
The ecp_nistz256_scatter_w7 function is called when application
attempts to use custom generator, i.e. rarely. Even though non-x86_64
versions were wrong, it didn't affect point operations, they were just
not as fast as expected.

Reviewed-by: Rich Salz <rsalz@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/6738)

crypto/ec/asm/ecp_nistz256-armv4.pl
crypto/ec/asm/ecp_nistz256-armv8.pl
crypto/ec/asm/ecp_nistz256-ppc64.pl
crypto/ec/asm/ecp_nistz256-sparcv9.pl
crypto/ec/asm/ecp_nistz256-x86.pl

index 84dcb6e17f59c8c714230d9209b0f83335427d33..83abbdd895780d157f6dd71c02712d6dc8d7a687 100755 (executable)
@@ -894,13 +894,13 @@ ecp_nistz256_scatter_w7:
 .Loop_scatter_w7:
        ldr     $mask,[$inp],#4
        subs    $index,$index,#1
-       strb    $mask,[$out,#64*0-1]
+       strb    $mask,[$out,#64*0]
        mov     $mask,$mask,lsr#8
-       strb    $mask,[$out,#64*1-1]
+       strb    $mask,[$out,#64*1]
        mov     $mask,$mask,lsr#8
-       strb    $mask,[$out,#64*2-1]
+       strb    $mask,[$out,#64*2]
        mov     $mask,$mask,lsr#8
-       strb    $mask,[$out,#64*3-1]
+       strb    $mask,[$out,#64*3]
        add     $out,$out,#64*4
        bne     .Loop_scatter_w7
 
index 2705d7cf063d43f9030562662f44fe534ac23a91..1361cb395ffb207925a8db93909b0cc60198c758 100644 (file)
@@ -1776,21 +1776,21 @@ ecp_nistz256_scatter_w7:
        prfm    pstl1strm,[$out,#4096+64*5]
        prfm    pstl1strm,[$out,#4096+64*6]
        prfm    pstl1strm,[$out,#4096+64*7]
-       strb    w3,[$out,#64*0-1]
+       strb    w3,[$out,#64*0]
        lsr     x3,x3,#8
-       strb    w3,[$out,#64*1-1]
+       strb    w3,[$out,#64*1]
        lsr     x3,x3,#8
-       strb    w3,[$out,#64*2-1]
+       strb    w3,[$out,#64*2]
        lsr     x3,x3,#8
-       strb    w3,[$out,#64*3-1]
+       strb    w3,[$out,#64*3]
        lsr     x3,x3,#8
-       strb    w3,[$out,#64*4-1]
+       strb    w3,[$out,#64*4]
        lsr     x3,x3,#8
-       strb    w3,[$out,#64*5-1]
+       strb    w3,[$out,#64*5]
        lsr     x3,x3,#8
-       strb    w3,[$out,#64*6-1]
+       strb    w3,[$out,#64*6]
        lsr     x3,x3,#8
-       strb    w3,[$out,#64*7-1]
+       strb    w3,[$out,#64*7]
        add     $out,$out,#64*8
        b.ne    .Loop_scatter_w7
 
index 0c3c186b3152511042e30187203219f35b6c4e0e..116792f7d2ce8a8c6a0ecbf4e2530e5037452055 100755 (executable)
@@ -2297,21 +2297,21 @@ ecp_nistz256_scatter_w7:
 
 .Loop_scatter_w7:
        ldu     r0,8($inp)
-       stb     r0,64*0-1($out)
+       stb     r0,64*0($out)
        srdi    r0,r0,8
-       stb     r0,64*1-1($out)
+       stb     r0,64*1($out)
        srdi    r0,r0,8
-       stb     r0,64*2-1($out)
+       stb     r0,64*2($out)
        srdi    r0,r0,8
-       stb     r0,64*3-1($out)
+       stb     r0,64*3($out)
        srdi    r0,r0,8
-       stb     r0,64*4-1($out)
+       stb     r0,64*4($out)
        srdi    r0,r0,8
-       stb     r0,64*5-1($out)
+       stb     r0,64*5($out)
        srdi    r0,r0,8
-       stb     r0,64*6-1($out)
+       stb     r0,64*6($out)
        srdi    r0,r0,8
-       stb     r0,64*7-1($out)
+       stb     r0,64*7($out)
        addi    $out,$out,64*8
        bdnz    .Loop_scatter_w7
 
index 9af1fae8538d0f6ceb2506afb811d04d46a0281f..59df0f73b61d9cfea10775f51b369065d25332fe 100755 (executable)
@@ -1531,13 +1531,13 @@ ecp_nistz256_scatter_w7:
        ld      [$inp],%l0
        add     $inp,4,$inp
        subcc   $index,1,$index
-       stb     %l0,[$out+64*0-1]
+       stb     %l0,[$out+64*0]
        srl     %l0,8,%l1
-       stb     %l1,[$out+64*1-1]
+       stb     %l1,[$out+64*1]
        srl     %l0,16,%l2
-       stb     %l2,[$out+64*2-1]
+       stb     %l2,[$out+64*2]
        srl     %l0,24,%l3
-       stb     %l3,[$out+64*3-1]
+       stb     %l3,[$out+64*3]
        bne     .Loop_scatter_w7
        add     $out,64*4,$out
 
index a89a9f93bc6e3a2bef6fcdf35334a4a792a47095..8fdd76b84fdb3f52ca73dffb3416092729b52b09 100755 (executable)
@@ -1179,7 +1179,7 @@ for ($i=0;$i<7;$i++) {
        &mov    ("esi",&wparam(1));
        &mov    ("ebp",&wparam(2));
 
-       &lea    ("edi",&DWP(-1,"edi","ebp"));
+       &lea    ("edi",&DWP(0,"edi","ebp"));
        &mov    ("ebp",64/4);
 &set_label("scatter_w7_loop");
        &mov    ("eax",&DWP(0,"esi"));