X-Git-Url: https://git.openssl.org/gitweb/?p=openssl.git;a=blobdiff_plain;f=crypto%2Fbn%2Fasm%2Fx86_64-mont.pl;h=84fea1a059bd5f5eec03fa19c427a1d0912ebb32;hp=7f3a43672a5aee21cb29eaf606e582f2571b1531;hb=98939a05b6884538ba40fae2606291140f9e5839;hpb=5ac7bde7c93bccc83b1c75eea967d6f966f01d8d;ds=sidebyside diff --git a/crypto/bn/asm/x86_64-mont.pl b/crypto/bn/asm/x86_64-mont.pl index 7f3a43672a..84fea1a059 100755 --- a/crypto/bn/asm/x86_64-mont.pl +++ b/crypto/bn/asm/x86_64-mont.pl @@ -2,8 +2,9 @@ # ==================================================================== # Written by Andy Polyakov for the OpenSSL -# project. Rights for redistribution and usage in source and binary -# forms are granted according to the OpenSSL license. +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. # ==================================================================== # October 2005. @@ -22,7 +23,7 @@ $rp="%rdi"; # BN_ULONG *rp, $ap="%rsi"; # const BN_ULONG *ap, $bp="%rdx"; # const BN_ULONG *bp, $np="%rcx"; # const BN_ULONG *np, -$n0="%r8"; # BN_ULONG n0, +$n0="%r8"; # const BN_ULONG *n0, $num="%r9"; # int num); $lo0="%r10"; $hi0="%r11"; @@ -55,6 +56,8 @@ bn_mul_mont: mov %rbp,8(%rsp,$num,8) # tp[num+1]=%rsp mov %rdx,$bp # $bp reassigned, remember? + mov ($n0),$n0 # pull n0[0] value + xor $i,$i # i=0 xor $j,$j # j=0 @@ -79,26 +82,25 @@ bn_mul_mont: add $hi0,%rax adc \$0,%rdx mov %rax,$lo0 + mov ($np,$j,8),%rax mov %rdx,$hi0 - mov ($np,$j,8),%rax mulq $m1 # np[j]*m1 add $hi1,%rax + lea 1($j),$j # j++ adc \$0,%rdx add $lo0,%rax # np[j]*m1+ap[j]*bp[0] adc \$0,%rdx - mov %rax,-8(%rsp,$j,8) # tp[j-1] - mov %rdx,$hi1 - - lea 1($j),$j # j++ + mov %rax,-16(%rsp,$j,8) # tp[j-1] cmp $num,$j + mov %rdx,$hi1 jl .L1st xor %rdx,%rdx add $hi0,$hi1 adc \$0,%rdx - mov $hi1,-8(%rsp,$j,8) - mov %rdx,(%rsp,$j,8) + mov $hi1,-8(%rsp,$num,8) + mov %rdx,(%rsp,$num,8) # store upmost overflow bit lea 1($i),$i # i++ .align 4 @@ -118,6 +120,7 @@ bn_mul_mont: mulq ($np,$j,8) # np[0]*m1 add $lo0,%rax # discarded + mov 8(%rsp),$lo0 # tp[1] adc \$0,%rdx mov %rdx,$hi1 @@ -128,44 +131,42 @@ bn_mul_mont: mulq $m0 # ap[j]*bp[i] add $hi0,%rax adc \$0,%rdx - add (%rsp,$j,8),%rax # ap[j]*bp[i]+tp[j] + add %rax,$lo0 # ap[j]*bp[i]+tp[j] + mov ($np,$j,8),%rax adc \$0,%rdx - mov %rax,$lo0 mov %rdx,$hi0 - mov ($np,$j,8),%rax mulq $m1 # np[j]*m1 add $hi1,%rax + lea 1($j),$j # j++ adc \$0,%rdx add $lo0,%rax # np[j]*m1+ap[j]*bp[i]+tp[j] adc \$0,%rdx - mov %rax,-8(%rsp,$j,8) # tp[j-1] - mov %rdx,$hi1 - - lea 1($j),$j # j++ + mov (%rsp,$j,8),$lo0 cmp $num,$j + mov %rax,-16(%rsp,$j,8) # tp[j-1] + mov %rdx,$hi1 jl .Linner - xor %rdx,%rdx # $j equals to num here... + xor %rdx,%rdx add $hi0,$hi1 adc \$0,%rdx - add (%rsp,$j,8),$hi1 # pull upmost overflow bit + add $lo0,$hi1 # pull upmost overflow bit adc \$0,%rdx - mov $hi1,-8(%rsp,$j,8) - mov %rdx,(%rsp,$j,8) # store upmost overflow bit + mov $hi1,-8(%rsp,$num,8) + mov %rdx,(%rsp,$num,8) # store upmost overflow bit lea 1($i),$i # i++ cmp $num,$i jl .Louter - sub $i,$i # clear CF at once - cmp \$0,%rdx # %rdx still holds upmost overflow bit - jnz .Lsub # ... and $j still equals to num - mov -8(%rsp,$num,8),%rax - cmp -8($np,$num,8),%rax # tp[num-1]-np[num-1] - jae .Lsub - + xor $i,$i # i=0 lea -1($num),$j # j=num-1 + cmp \$0,%rdx # %rdx still holds upmost overflow bit + jnz .Lsub # CF is cleared by compare with 0 + mov (%rsp,$j,8),%rax + cmp ($np,$j,8),%rax # tp[num-1]-np[num-1] + jae .Lsub # if taken CF was cleared by above cmp .align 4 .Lcopy: mov (%rsp,$j,8),%rax @@ -190,8 +191,8 @@ bn_mul_mont: sbb ($np,$i,8),%rax mov %rax,($rp,$i,8) # rp[i]=tp[i]-np[j] lea 1($i),$i # i++ - dec $j # doesn't affect cf! - jg .Lsub + dec $j # doesn't affect CF! + jge .Lsub lea -1($num),$j # j=num-1 sbb \$0,%rdx jc .Lcopy # tp was less than np @@ -201,6 +202,7 @@ bn_mul_mont: jge .Lzap jmp .Lexit .size bn_mul_mont,.-bn_mul_mont +.asciz "Montgomery Multiplication for x86_64, CRYPTOGAMS by " ___ print $code;