projects
/
openssl.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
bn/asm/x86_64-mont5.pl: fix carry bug in bn_sqr8x_internal.
[openssl.git]
/
crypto
/
bn
/
asm
/
x86_64-mont5.pl
diff --git
a/crypto/bn/asm/x86_64-mont5.pl
b/crypto/bn/asm/x86_64-mont5.pl
index 8f49391727f5f3a894fbd9756a82dde30a23563e..d041d738cfd5e3f49eae6d16274aa7af2b315de1 100755
(executable)
--- a/
crypto/bn/asm/x86_64-mont5.pl
+++ b/
crypto/bn/asm/x86_64-mont5.pl
@@
-1934,6
+1934,7
@@
__bn_sqr8x_reduction:
.align 32
.L8x_tail_done:
.align 32
.L8x_tail_done:
+ xor %rax,%rax
add (%rdx),%r8 # can this overflow?
adc \$0,%r9
adc \$0,%r10
add (%rdx),%r8 # can this overflow?
adc \$0,%r9
adc \$0,%r10
@@
-1941,10
+1942,8
@@
__bn_sqr8x_reduction:
adc \$0,%r12
adc \$0,%r13
adc \$0,%r14
adc \$0,%r12
adc \$0,%r13
adc \$0,%r14
- adc \$0,%r15 # can't overflow, because we
- # started with "overhung" part
- # of multiplication
- xor %rax,%rax
+ adc \$0,%r15
+ adc \$0,%rax
neg $carry
.L8x_no_tail:
neg $carry
.L8x_no_tail:
@@
-3384,6
+3383,7
@@
__bn_sqrx8x_reduction:
.align 32
.Lsqrx8x_tail_done:
.align 32
.Lsqrx8x_tail_done:
+ xor %rax,%rax
add 24+8(%rsp),%r8 # can this overflow?
adc \$0,%r9
adc \$0,%r10
add 24+8(%rsp),%r8 # can this overflow?
adc \$0,%r9
adc \$0,%r10
@@
-3391,10
+3391,8
@@
__bn_sqrx8x_reduction:
adc \$0,%r12
adc \$0,%r13
adc \$0,%r14
adc \$0,%r12
adc \$0,%r13
adc \$0,%r14
- adc \$0,%r15 # can't overflow, because we
- # started with "overhung" part
- # of multiplication
- mov $carry,%rax # xor %rax,%rax
+ adc \$0,%r15
+ adc \$0,%rax
sub 16+8(%rsp),$carry # mov 16(%rsp),%cf
.Lsqrx8x_no_tail: # %cf is 0 if jumped here
sub 16+8(%rsp),$carry # mov 16(%rsp),%cf
.Lsqrx8x_no_tail: # %cf is 0 if jumped here
@@
-3409,7
+3407,7
@@
__bn_sqrx8x_reduction:
adc 8*5($tptr),%r13
adc 8*6($tptr),%r14
adc 8*7($tptr),%r15
adc 8*5($tptr),%r13
adc 8*6($tptr),%r14
adc 8*7($tptr),%r15
- adc
%rax,%rax
# top-most carry
+ adc
\$0,%rax
# top-most carry
mov 32+8(%rsp),%rbx # n0
mov 8*8($tptr,%rcx),%rdx # modulo-scheduled "%r8"
mov 32+8(%rsp),%rbx # n0
mov 8*8($tptr,%rcx),%rdx # modulo-scheduled "%r8"