projects
/
openssl.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Update copyright year
[openssl.git]
/
crypto
/
bn
/
asm
/
x86_64-mont5.pl
diff --git
a/crypto/bn/asm/x86_64-mont5.pl
b/crypto/bn/asm/x86_64-mont5.pl
index 8f49391727f5f3a894fbd9756a82dde30a23563e..f43e13d11643a58fde41a069b400bf4b010c09a2 100755
(executable)
--- a/
crypto/bn/asm/x86_64-mont5.pl
+++ b/
crypto/bn/asm/x86_64-mont5.pl
@@
-1,5
+1,5
@@
#! /usr/bin/env perl
#! /usr/bin/env perl
-# Copyright 2011-201
6
The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 2011-201
9
The OpenSSL Project Authors. All Rights Reserved.
#
# Licensed under the OpenSSL license (the "License"). You may not use
# this file except in compliance with the License. You can obtain a copy
#
# Licensed under the OpenSSL license (the "License"). You may not use
# this file except in compliance with the License. You can obtain a copy
@@
-93,8
+93,10
@@
$code=<<___;
.type bn_mul_mont_gather5,\@function,6
.align 64
bn_mul_mont_gather5:
.type bn_mul_mont_gather5,\@function,6
.align 64
bn_mul_mont_gather5:
+.cfi_startproc
mov ${num}d,${num}d
mov %rsp,%rax
mov ${num}d,${num}d
mov %rsp,%rax
+.cfi_def_cfa_register %rax
test \$7,${num}d
jnz .Lmul_enter
___
test \$7,${num}d
jnz .Lmul_enter
___
@@
-108,11
+110,17
@@
$code.=<<___;
.Lmul_enter:
movd `($win64?56:8)`(%rsp),%xmm5 # load 7th argument
push %rbx
.Lmul_enter:
movd `($win64?56:8)`(%rsp),%xmm5 # load 7th argument
push %rbx
+.cfi_push %rbx
push %rbp
push %rbp
+.cfi_push %rbp
push %r12
push %r12
+.cfi_push %r12
push %r13
push %r13
+.cfi_push %r13
push %r14
push %r14
+.cfi_push %r14
push %r15
push %r15
+.cfi_push %r15
neg $num
mov %rsp,%r11
neg $num
mov %rsp,%r11
@@
-145,6
+153,7
@@
$code.=<<___;
lea .Linc(%rip),%r10
mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp
lea .Linc(%rip),%r10
mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp
+.cfi_cfa_expression %rsp+8,$num,8,mul,plus,deref,+8
.Lmul_body:
lea 128($bp),%r12 # reassign $bp (+size optimization)
.Lmul_body:
lea 128($bp),%r12 # reassign $bp (+size optimization)
@@
-410,38
+419,48
@@
$code.=<<___;
mov %rax,($rp,$i,8) # rp[i]=tp[i]-np[i]
mov 8($ap,$i,8),%rax # tp[i+1]
lea 1($i),$i # i++
mov %rax,($rp,$i,8) # rp[i]=tp[i]-np[i]
mov 8($ap,$i,8),%rax # tp[i+1]
lea 1($i),$i # i++
- dec $j # doesn
n
't affect CF!
+ dec $j # doesn't affect CF!
jnz .Lsub
sbb \$0,%rax # handle upmost overflow bit
jnz .Lsub
sbb \$0,%rax # handle upmost overflow bit
+ mov \$-1,%rbx
+ xor %rax,%rbx
xor $i,$i
xor $i,$i
- and %rax,$ap
- not %rax
- mov $rp,$np
- and %rax,$np
mov $num,$j # j=num
mov $num,$j # j=num
- or $np,$ap # ap=borrow?tp:rp
-.align 16
-.Lcopy: # copy or in-place refresh
- mov ($ap,$i,8),%rax
+
+.Lcopy: # conditional copy
+ mov ($rp,$i,8),%rcx
+ mov (%rsp,$i,8),%rdx
+ and %rbx,%rcx
+ and %rax,%rdx
mov $i,(%rsp,$i,8) # zap temporary vector
mov $i,(%rsp,$i,8) # zap temporary vector
- mov %rax,($rp,$i,8) # rp[i]=tp[i]
+ or %rcx,%rdx
+ mov %rdx,($rp,$i,8) # rp[i]=tp[i]
lea 1($i),$i
sub \$1,$j
jnz .Lcopy
mov 8(%rsp,$num,8),%rsi # restore %rsp
lea 1($i),$i
sub \$1,$j
jnz .Lcopy
mov 8(%rsp,$num,8),%rsi # restore %rsp
+.cfi_def_cfa %rsi,8
mov \$1,%rax
mov -48(%rsi),%r15
mov \$1,%rax
mov -48(%rsi),%r15
+.cfi_restore %r15
mov -40(%rsi),%r14
mov -40(%rsi),%r14
+.cfi_restore %r14
mov -32(%rsi),%r13
mov -32(%rsi),%r13
+.cfi_restore %r13
mov -24(%rsi),%r12
mov -24(%rsi),%r12
+.cfi_restore %r12
mov -16(%rsi),%rbp
mov -16(%rsi),%rbp
+.cfi_restore %rbp
mov -8(%rsi),%rbx
mov -8(%rsi),%rbx
+.cfi_restore %rbx
lea (%rsi),%rsp
lea (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lmul_epilogue:
ret
.Lmul_epilogue:
ret
+.cfi_endproc
.size bn_mul_mont_gather5,.-bn_mul_mont_gather5
___
{{{
.size bn_mul_mont_gather5,.-bn_mul_mont_gather5
___
{{{
@@
-451,8
+470,10
@@
$code.=<<___;
.type bn_mul4x_mont_gather5,\@function,6
.align 32
bn_mul4x_mont_gather5:
.type bn_mul4x_mont_gather5,\@function,6
.align 32
bn_mul4x_mont_gather5:
+.cfi_startproc
.byte 0x67
mov %rsp,%rax
.byte 0x67
mov %rsp,%rax
+.cfi_def_cfa_register %rax
.Lmul4x_enter:
___
$code.=<<___ if ($addx);
.Lmul4x_enter:
___
$code.=<<___ if ($addx);
@@
-462,11
+483,17
@@
$code.=<<___ if ($addx);
___
$code.=<<___;
push %rbx
___
$code.=<<___;
push %rbx
+.cfi_push %rbx
push %rbp
push %rbp
+.cfi_push %rbp
push %r12
push %r12
+.cfi_push %r12
push %r13
push %r13
+.cfi_push %r13
push %r14
push %r14
+.cfi_push %r14
push %r15
push %r15
+.cfi_push %r15
.Lmul4x_prologue:
.byte 0x67
.Lmul4x_prologue:
.byte 0x67
@@
-522,22
+549,32
@@
$code.=<<___;
neg $num
mov %rax,40(%rsp)
neg $num
mov %rax,40(%rsp)
+.cfi_cfa_expression %rsp+40,deref,+8
.Lmul4x_body:
call mul4x_internal
mov 40(%rsp),%rsi # restore %rsp
.Lmul4x_body:
call mul4x_internal
mov 40(%rsp),%rsi # restore %rsp
+.cfi_def_cfa %rsi,8
mov \$1,%rax
mov -48(%rsi),%r15
mov \$1,%rax
mov -48(%rsi),%r15
+.cfi_restore %r15
mov -40(%rsi),%r14
mov -40(%rsi),%r14
+.cfi_restore %r14
mov -32(%rsi),%r13
mov -32(%rsi),%r13
+.cfi_restore %r13
mov -24(%rsi),%r12
mov -24(%rsi),%r12
+.cfi_restore %r12
mov -16(%rsi),%rbp
mov -16(%rsi),%rbp
+.cfi_restore %rbp
mov -8(%rsi),%rbx
mov -8(%rsi),%rbx
+.cfi_restore %rbx
lea (%rsi),%rsp
lea (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lmul4x_epilogue:
ret
.Lmul4x_epilogue:
ret
+.cfi_endproc
.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5
.type mul4x_internal,\@abi-omnipotent
.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5
.type mul4x_internal,\@abi-omnipotent
@@
-1061,7
+1098,9
@@
$code.=<<___;
.type bn_power5,\@function,6
.align 32
bn_power5:
.type bn_power5,\@function,6
.align 32
bn_power5:
+.cfi_startproc
mov %rsp,%rax
mov %rsp,%rax
+.cfi_def_cfa_register %rax
___
$code.=<<___ if ($addx);
mov OPENSSL_ia32cap_P+8(%rip),%r11d
___
$code.=<<___ if ($addx);
mov OPENSSL_ia32cap_P+8(%rip),%r11d
@@
-1071,11
+1110,17
@@
$code.=<<___ if ($addx);
___
$code.=<<___;
push %rbx
___
$code.=<<___;
push %rbx
+.cfi_push %rbx
push %rbp
push %rbp
+.cfi_push %rbp
push %r12
push %r12
+.cfi_push %r12
push %r13
push %r13
+.cfi_push %r13
push %r14
push %r14
+.cfi_push %r14
push %r15
push %r15
+.cfi_push %r15
.Lpower5_prologue:
shl \$3,${num}d # convert $num to bytes
.Lpower5_prologue:
shl \$3,${num}d # convert $num to bytes
@@
-1140,6
+1185,7
@@
$code.=<<___;
#
mov $n0, 32(%rsp)
mov %rax, 40(%rsp) # save original %rsp
#
mov $n0, 32(%rsp)
mov %rax, 40(%rsp) # save original %rsp
+.cfi_cfa_expression %rsp+40,deref,+8
.Lpower5_body:
movq $rptr,%xmm1 # save $rptr, used in sqr8x
movq $nptr,%xmm2 # save $nptr
.Lpower5_body:
movq $rptr,%xmm1 # save $rptr, used in sqr8x
movq $nptr,%xmm2 # save $nptr
@@
-1166,16
+1212,25
@@
$code.=<<___;
call mul4x_internal
mov 40(%rsp),%rsi # restore %rsp
call mul4x_internal
mov 40(%rsp),%rsi # restore %rsp
+.cfi_def_cfa %rsi,8
mov \$1,%rax
mov -48(%rsi),%r15
mov \$1,%rax
mov -48(%rsi),%r15
+.cfi_restore %r15
mov -40(%rsi),%r14
mov -40(%rsi),%r14
+.cfi_restore %r14
mov -32(%rsi),%r13
mov -32(%rsi),%r13
+.cfi_restore %r13
mov -24(%rsi),%r12
mov -24(%rsi),%r12
+.cfi_restore %r12
mov -16(%rsi),%rbp
mov -16(%rsi),%rbp
+.cfi_restore %rbp
mov -8(%rsi),%rbx
mov -8(%rsi),%rbx
+.cfi_restore %rbx
lea (%rsi),%rsp
lea (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lpower5_epilogue:
ret
.Lpower5_epilogue:
ret
+.cfi_endproc
.size bn_power5,.-bn_power5
.globl bn_sqr8x_internal
.size bn_power5,.-bn_power5
.globl bn_sqr8x_internal
@@
-1934,6
+1989,7
@@
__bn_sqr8x_reduction:
.align 32
.L8x_tail_done:
.align 32
.L8x_tail_done:
+ xor %rax,%rax
add (%rdx),%r8 # can this overflow?
adc \$0,%r9
adc \$0,%r10
add (%rdx),%r8 # can this overflow?
adc \$0,%r9
adc \$0,%r10
@@
-1941,10
+1997,8
@@
__bn_sqr8x_reduction:
adc \$0,%r12
adc \$0,%r13
adc \$0,%r14
adc \$0,%r12
adc \$0,%r13
adc \$0,%r14
- adc \$0,%r15 # can't overflow, because we
- # started with "overhung" part
- # of multiplication
- xor %rax,%rax
+ adc \$0,%r15
+ adc \$0,%rax
neg $carry
.L8x_no_tail:
neg $carry
.L8x_no_tail:
@@
-2056,14
+2110,22
@@
bn_from_montgomery:
.type bn_from_mont8x,\@function,6
.align 32
bn_from_mont8x:
.type bn_from_mont8x,\@function,6
.align 32
bn_from_mont8x:
+.cfi_startproc
.byte 0x67
mov %rsp,%rax
.byte 0x67
mov %rsp,%rax
+.cfi_def_cfa_register %rax
push %rbx
push %rbx
+.cfi_push %rbx
push %rbp
push %rbp
+.cfi_push %rbp
push %r12
push %r12
+.cfi_push %r12
push %r13
push %r13
+.cfi_push %r13
push %r14
push %r14
+.cfi_push %r14
push %r15
push %r15
+.cfi_push %r15
.Lfrom_prologue:
shl \$3,${num}d # convert $num to bytes
.Lfrom_prologue:
shl \$3,${num}d # convert $num to bytes
@@
-2128,6
+2190,7
@@
bn_from_mont8x:
#
mov $n0, 32(%rsp)
mov %rax, 40(%rsp) # save original %rsp
#
mov $n0, 32(%rsp)
mov %rax, 40(%rsp) # save original %rsp
+.cfi_cfa_expression %rsp+40,deref,+8
.Lfrom_body:
mov $num,%r11
lea 48(%rsp),%rax
.Lfrom_body:
mov $num,%r11
lea 48(%rsp),%rax
@@
-2171,7
+2234,6
@@
$code.=<<___ if ($addx);
pxor %xmm0,%xmm0
lea 48(%rsp),%rax
pxor %xmm0,%xmm0
lea 48(%rsp),%rax
- mov 40(%rsp),%rsi # restore %rsp
jmp .Lfrom_mont_zero
.align 32
jmp .Lfrom_mont_zero
.align 32
@@
-2183,11
+2245,12
@@
$code.=<<___;
pxor %xmm0,%xmm0
lea 48(%rsp),%rax
pxor %xmm0,%xmm0
lea 48(%rsp),%rax
- mov 40(%rsp),%rsi # restore %rsp
jmp .Lfrom_mont_zero
.align 32
.Lfrom_mont_zero:
jmp .Lfrom_mont_zero
.align 32
.Lfrom_mont_zero:
+ mov 40(%rsp),%rsi # restore %rsp
+.cfi_def_cfa %rsi,8
movdqa %xmm0,16*0(%rax)
movdqa %xmm0,16*1(%rax)
movdqa %xmm0,16*2(%rax)
movdqa %xmm0,16*0(%rax)
movdqa %xmm0,16*1(%rax)
movdqa %xmm0,16*2(%rax)
@@
-2198,14
+2261,22
@@
$code.=<<___;
mov \$1,%rax
mov -48(%rsi),%r15
mov \$1,%rax
mov -48(%rsi),%r15
+.cfi_restore %r15
mov -40(%rsi),%r14
mov -40(%rsi),%r14
+.cfi_restore %r14
mov -32(%rsi),%r13
mov -32(%rsi),%r13
+.cfi_restore %r13
mov -24(%rsi),%r12
mov -24(%rsi),%r12
+.cfi_restore %r12
mov -16(%rsi),%rbp
mov -16(%rsi),%rbp
+.cfi_restore %rbp
mov -8(%rsi),%rbx
mov -8(%rsi),%rbx
+.cfi_restore %rbx
lea (%rsi),%rsp
lea (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lfrom_epilogue:
ret
.Lfrom_epilogue:
ret
+.cfi_endproc
.size bn_from_mont8x,.-bn_from_mont8x
___
}
.size bn_from_mont8x,.-bn_from_mont8x
___
}
@@
-2218,14
+2289,22
@@
$code.=<<___;
.type bn_mulx4x_mont_gather5,\@function,6
.align 32
bn_mulx4x_mont_gather5:
.type bn_mulx4x_mont_gather5,\@function,6
.align 32
bn_mulx4x_mont_gather5:
+.cfi_startproc
mov %rsp,%rax
mov %rsp,%rax
+.cfi_def_cfa_register %rax
.Lmulx4x_enter:
push %rbx
.Lmulx4x_enter:
push %rbx
+.cfi_push %rbx
push %rbp
push %rbp
+.cfi_push %rbp
push %r12
push %r12
+.cfi_push %r12
push %r13
push %r13
+.cfi_push %r13
push %r14
push %r14
+.cfi_push %r14
push %r15
push %r15
+.cfi_push %r15
.Lmulx4x_prologue:
shl \$3,${num}d # convert $num to bytes
.Lmulx4x_prologue:
shl \$3,${num}d # convert $num to bytes
@@
-2291,21
+2370,31
@@
bn_mulx4x_mont_gather5:
#
mov $n0, 32(%rsp) # save *n0
mov %rax,40(%rsp) # save original %rsp
#
mov $n0, 32(%rsp) # save *n0
mov %rax,40(%rsp) # save original %rsp
+.cfi_cfa_expression %rsp+40,deref,+8
.Lmulx4x_body:
call mulx4x_internal
mov 40(%rsp),%rsi # restore %rsp
.Lmulx4x_body:
call mulx4x_internal
mov 40(%rsp),%rsi # restore %rsp
+.cfi_def_cfa %rsi,8
mov \$1,%rax
mov -48(%rsi),%r15
mov \$1,%rax
mov -48(%rsi),%r15
+.cfi_restore %r15
mov -40(%rsi),%r14
mov -40(%rsi),%r14
+.cfi_restore %r14
mov -32(%rsi),%r13
mov -32(%rsi),%r13
+.cfi_restore %r13
mov -24(%rsi),%r12
mov -24(%rsi),%r12
+.cfi_restore %r12
mov -16(%rsi),%rbp
mov -16(%rsi),%rbp
+.cfi_restore %rbp
mov -8(%rsi),%rbx
mov -8(%rsi),%rbx
+.cfi_restore %rbx
lea (%rsi),%rsp
lea (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lmulx4x_epilogue:
ret
.Lmulx4x_epilogue:
ret
+.cfi_endproc
.size bn_mulx4x_mont_gather5,.-bn_mulx4x_mont_gather5
.type mulx4x_internal,\@abi-omnipotent
.size bn_mulx4x_mont_gather5,.-bn_mulx4x_mont_gather5
.type mulx4x_internal,\@abi-omnipotent
@@
-2333,7
+2422,7
@@
my $N=$STRIDE/4; # should match cache line size
$code.=<<___;
movdqa 0(%rax),%xmm0 # 00000001000000010000000000000000
movdqa 16(%rax),%xmm1 # 00000002000000020000000200000002
$code.=<<___;
movdqa 0(%rax),%xmm0 # 00000001000000010000000000000000
movdqa 16(%rax),%xmm1 # 00000002000000020000000200000002
- lea 88-112(%rsp,%r10),%r10 # place the mask after tp[num+1] (+ICache optimizaton)
+ lea 88-112(%rsp,%r10),%r10 # place the mask after tp[num+1] (+ICache optimizat
i
on)
lea 128($bp),$bptr # size optimization
pshufd \$0,%xmm5,%xmm5 # broadcast index
lea 128($bp),$bptr # size optimization
pshufd \$0,%xmm5,%xmm5 # broadcast index
@@
-2683,14
+2772,22
@@
$code.=<<___;
.type bn_powerx5,\@function,6
.align 32
bn_powerx5:
.type bn_powerx5,\@function,6
.align 32
bn_powerx5:
+.cfi_startproc
mov %rsp,%rax
mov %rsp,%rax
+.cfi_def_cfa_register %rax
.Lpowerx5_enter:
push %rbx
.Lpowerx5_enter:
push %rbx
+.cfi_push %rbx
push %rbp
push %rbp
+.cfi_push %rbp
push %r12
push %r12
+.cfi_push %r12
push %r13
push %r13
+.cfi_push %r13
push %r14
push %r14
+.cfi_push %r14
push %r15
push %r15
+.cfi_push %r15
.Lpowerx5_prologue:
shl \$3,${num}d # convert $num to bytes
.Lpowerx5_prologue:
shl \$3,${num}d # convert $num to bytes
@@
-2762,6
+2859,7
@@
bn_powerx5:
movq $bptr,%xmm4
mov $n0, 32(%rsp)
mov %rax, 40(%rsp) # save original %rsp
movq $bptr,%xmm4
mov $n0, 32(%rsp)
mov %rax, 40(%rsp) # save original %rsp
+.cfi_cfa_expression %rsp+40,deref,+8
.Lpowerx5_body:
call __bn_sqrx8x_internal
.Lpowerx5_body:
call __bn_sqrx8x_internal
@@
-2784,17
+2882,26
@@
bn_powerx5:
call mulx4x_internal
mov 40(%rsp),%rsi # restore %rsp
call mulx4x_internal
mov 40(%rsp),%rsi # restore %rsp
+.cfi_def_cfa %rsi,8
mov \$1,%rax
mov -48(%rsi),%r15
mov \$1,%rax
mov -48(%rsi),%r15
+.cfi_restore %r15
mov -40(%rsi),%r14
mov -40(%rsi),%r14
+.cfi_restore %r14
mov -32(%rsi),%r13
mov -32(%rsi),%r13
+.cfi_restore %r13
mov -24(%rsi),%r12
mov -24(%rsi),%r12
+.cfi_restore %r12
mov -16(%rsi),%rbp
mov -16(%rsi),%rbp
+.cfi_restore %rbp
mov -8(%rsi),%rbx
mov -8(%rsi),%rbx
+.cfi_restore %rbx
lea (%rsi),%rsp
lea (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lpowerx5_epilogue:
ret
.Lpowerx5_epilogue:
ret
+.cfi_endproc
.size bn_powerx5,.-bn_powerx5
.globl bn_sqrx8x_internal
.size bn_powerx5,.-bn_powerx5
.globl bn_sqrx8x_internal
@@
-2803,6
+2910,7
@@
bn_powerx5:
.align 32
bn_sqrx8x_internal:
__bn_sqrx8x_internal:
.align 32
bn_sqrx8x_internal:
__bn_sqrx8x_internal:
+.cfi_startproc
##################################################################
# Squaring part:
#
##################################################################
# Squaring part:
#
@@
-3100,11
+3208,19
@@
$code.=<<___;
.align 32
.Lsqrx8x_break:
.align 32
.Lsqrx8x_break:
- sub 16+8(%rsp),%r8 # consume last carry
+ xor $zero,$zero
+ sub 16+8(%rsp),%rbx # mov 16(%rsp),%cf
+ adcx $zero,%r8
mov 24+8(%rsp),$carry # initial $tptr, borrow $carry
mov 24+8(%rsp),$carry # initial $tptr, borrow $carry
+ adcx $zero,%r9
mov 0*8($aptr),%rdx # a[8], modulo-scheduled
mov 0*8($aptr),%rdx # a[8], modulo-scheduled
- xor %ebp,%ebp # xor $zero,$zero
+ adc \$0,%r10
mov %r8,0*8($tptr)
mov %r8,0*8($tptr)
+ adc \$0,%r11
+ adc \$0,%r12
+ adc \$0,%r13
+ adc \$0,%r14
+ adc \$0,%r15
cmp $carry,$tptr # cf=0, of=0
je .Lsqrx8x_outer_loop
cmp $carry,$tptr # cf=0, of=0
je .Lsqrx8x_outer_loop
@@
-3384,6
+3500,7
@@
__bn_sqrx8x_reduction:
.align 32
.Lsqrx8x_tail_done:
.align 32
.Lsqrx8x_tail_done:
+ xor %rax,%rax
add 24+8(%rsp),%r8 # can this overflow?
adc \$0,%r9
adc \$0,%r10
add 24+8(%rsp),%r8 # can this overflow?
adc \$0,%r9
adc \$0,%r10
@@
-3391,10
+3508,8
@@
__bn_sqrx8x_reduction:
adc \$0,%r12
adc \$0,%r13
adc \$0,%r14
adc \$0,%r12
adc \$0,%r13
adc \$0,%r14
- adc \$0,%r15 # can't overflow, because we
- # started with "overhung" part
- # of multiplication
- mov $carry,%rax # xor %rax,%rax
+ adc \$0,%r15
+ adc \$0,%rax
sub 16+8(%rsp),$carry # mov 16(%rsp),%cf
.Lsqrx8x_no_tail: # %cf is 0 if jumped here
sub 16+8(%rsp),$carry # mov 16(%rsp),%cf
.Lsqrx8x_no_tail: # %cf is 0 if jumped here
@@
-3409,7
+3524,7
@@
__bn_sqrx8x_reduction:
adc 8*5($tptr),%r13
adc 8*6($tptr),%r14
adc 8*7($tptr),%r15
adc 8*5($tptr),%r13
adc 8*6($tptr),%r14
adc 8*7($tptr),%r15
- adc
%rax,%rax
# top-most carry
+ adc
\$0,%rax
# top-most carry
mov 32+8(%rsp),%rbx # n0
mov 8*8($tptr,%rcx),%rdx # modulo-scheduled "%r8"
mov 32+8(%rsp),%rbx # n0
mov 8*8($tptr,%rcx),%rdx # modulo-scheduled "%r8"
@@
-3428,6
+3543,7
@@
__bn_sqrx8x_reduction:
cmp 8+8(%rsp),%r8 # end of t[]?
jb .Lsqrx8x_reduction_loop
ret
cmp 8+8(%rsp),%r8 # end of t[]?
jb .Lsqrx8x_reduction_loop
ret
+.cfi_endproc
.size bn_sqrx8x_internal,.-bn_sqrx8x_internal
___
}
\f
.size bn_sqrx8x_internal,.-bn_sqrx8x_internal
___
}
\f
@@
-3671,8
+3787,8
@@
mul_handler:
jb .Lcommon_seh_tail
mov 4(%r11),%r10d # HandlerData[1]
jb .Lcommon_seh_tail
mov 4(%r11),%r10d # HandlerData[1]
- lea (%rsi,%r10),%r10 #
epilogue
label
- cmp %r10,%rbx # context->Rip
>=epilogue
label
+ lea (%rsi,%r10),%r10 #
beginning of body
label
+ cmp %r10,%rbx # context->Rip
<body
label
jb .Lcommon_pop_regs
mov 152($context),%rax # pull context->Rsp
jb .Lcommon_pop_regs
mov 152($context),%rax # pull context->Rsp