From 240f5169397272eb9ab4145b4a54ce6c65fc9b35 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Ulf=20M=C3=B6ller?= Date: Wed, 6 Dec 2000 04:16:38 +0000 Subject: [PATCH] Intel assembler version for bn_sub_part_words(). I haven't got reliable timings yet, please try it out! --- crypto/bn/asm/bn-586.pl | 215 +++++++++++++++++++++++++++++++++++++++- crypto/bn/bn_mul.c | 2 + 2 files changed, 215 insertions(+), 2 deletions(-) diff --git a/crypto/bn/asm/bn-586.pl b/crypto/bn/asm/bn-586.pl index 5191bed273..2a074effe2 100644 --- a/crypto/bn/asm/bn-586.pl +++ b/crypto/bn/asm/bn-586.pl @@ -11,6 +11,7 @@ require "x86asm.pl"; &bn_div_words("bn_div_words"); &bn_add_words("bn_add_words"); &bn_sub_words("bn_sub_words"); +&bn_sub_part_words("bn_sub_part_words"); &asm_finish(); @@ -300,7 +301,7 @@ sub bn_add_words &add($tmp1,$tmp2); &adc($c,0); &dec($num) if ($i != 6); - &mov(&DWP($i*4,$r,"",0),$tmp1); # *a + &mov(&DWP($i*4,$r,"",0),$tmp1); # *r &jz(&label("aw_end")) if ($i != 6); } &set_label("aw_end",0); @@ -372,7 +373,7 @@ sub bn_sub_words &sub($tmp1,$tmp2); &adc($c,0); &dec($num) if ($i != 6); - &mov(&DWP($i*4,$r,"",0),$tmp1); # *a + &mov(&DWP($i*4,$r,"",0),$tmp1); # *r &jz(&label("aw_end")) if ($i != 6); } &set_label("aw_end",0); @@ -382,3 +383,213 @@ sub bn_sub_words &function_end($name); } +sub bn_sub_part_words + { + local($name)=@_; + + &function_begin($name,""); + + &comment(""); + $a="esi"; + $b="edi"; + $c="eax"; + $r="ebx"; + $tmp1="ecx"; + $tmp2="edx"; + $num="ebp"; + + &mov($r,&wparam(0)); # get r + &mov($a,&wparam(1)); # get a + &mov($b,&wparam(2)); # get b + &mov($num,&wparam(3)); # get num + &xor($c,$c); # clear carry + &and($num,0xfffffff8); # num / 8 + + &jz(&label("aw_finish")); + + &set_label("aw_loop",0); + for ($i=0; $i<8; $i++) + { + &comment("Round $i"); + + &mov($tmp1,&DWP($i*4,$a,"",0)); # *a + &mov($tmp2,&DWP($i*4,$b,"",0)); # *b + &sub($tmp1,$c); + &mov($c,0); + &adc($c,$c); + &sub($tmp1,$tmp2); + &adc($c,0); + &mov(&DWP($i*4,$r,"",0),$tmp1); # *r + } + + &comment(""); + &add($a,32); + &add($b,32); + &add($r,32); + &sub($num,8); + &jnz(&label("aw_loop")); + + &set_label("aw_finish",0); + &mov($num,&wparam(3)); # get num + &and($num,7); + &jz(&label("aw_end")); + + for ($i=0; $i<7; $i++) + { + &comment("Tail Round $i"); + &mov($tmp1,&DWP(0,$a,"",0)); # *a + &mov($tmp2,&DWP(0,$b,"",0));# *b + &sub($tmp1,$c); + &mov($c,0); + &adc($c,$c); + &sub($tmp1,$tmp2); + &adc($c,0); + &mov(&DWP(0,$r,"",0),$tmp1); # *r + &add($a, 4); + &add($b, 4); + &add($r, 4); + &dec($num) if ($i != 6); + &jz(&label("aw_end")) if ($i != 6); + } + &set_label("aw_end",0); + + &cmp(&wparam(4),0); + &je(&label("pw_end")); + + &mov($num,&wparam(3)); # get num + + &mov($num,&wparam(4)); # get dl + &cmp($num,0); + &je(&label("pw_end")); # unnoetig + &jge(&label("pw_pos")); + + &comment("pw_neg"); + &mov($tmp2,0); + &sub($tmp2,$num); + &mov($num,$tmp2); + &and($num,0xfffffff8); # num / 8 + &jz(&label("pw_neg_finish")); + + &set_label("pw_neg_loop",0); + for ($i=0; $i<8; $i++) + { + &comment("dl<0 Round $i"); + + &mov($tmp1,0); + &mov($tmp2,&DWP($i*4,$b,"",0)); # *b + &sub($tmp1,$c); + &mov($c,0); + &adc($c,$c); + &sub($tmp1,$tmp2); + &adc($c,0); + &mov(&DWP($i*4,$r,"",0),$tmp1); # *r + } + + &comment(""); + &add($b,32); + &add($r,32); + &sub($num,8); + &jnz(&label("pw_neg_loop")); + + &set_label("pw_neg_finish",0); + &mov($tmp2,&wparam(4)); # get dl + &mov($num,0); + &sub($num,$tmp2); + &and($num,7); + &jz(&label("pw_end")); + + for ($i=0; $i<7; $i++) + { + &comment("dl<0 Tail Round $i"); + &mov($tmp1,0); + &mov($tmp2,&DWP($i*4,$b,"",0));# *b + &sub($tmp1,$c); + &mov($c,0); + &adc($c,$c); + &sub($tmp1,$tmp2); + &adc($c,0); + &dec($num) if ($i != 6); + &mov(&DWP($i*4,$r,"",0),$tmp1); # *r + &jz(&label("pw_end")) if ($i != 6); + } + + &jmp(&label("pw_end")); + + &set_label("pw_pos",0); + + &and($num,0xfffffff8); # num / 8 + &jz(&label("pw_pos_finish")); + + &set_label("pw_pos_loop",0); + + for ($i=0; $i<8; $i++) + { + &comment("dl>0 Round $i"); + + &mov($tmp1,&DWP($i*4,$a,"",0)); # *a + &sub($tmp1,$c); + &mov(&DWP($i*4,$r,"",0),$tmp1); # *r + &jnc(&label("pw_nc".$i)); + } + + &comment(""); + &add($a,32); + &add($r,32); + &sub($num,8); + &jnz(&label("pw_pos_loop")); + + &set_label("pw_pos_finish",0); + &mov($num,&wparam(4)); # get dl + &and($num,7); + &jz(&label("pw_end")); + + for ($i=0; $i<7; $i++) + { + &comment("dl>0 Tail Round $i"); + &mov($tmp1,&DWP($i*4,$a,"",0)); # *a + &sub($tmp1,$c); + &mov(&DWP($i*4,$r,"",0),$tmp1); # *r + &jnc(&label("pw_tail_nc".$i)); + &dec($num) if ($i != 6); + &jz(&label("pw_end")) if ($i != 6); + } + &mov($c,1); + &jmp(&label("pw_end")); + + &set_label("pw_nc_loop",0); + for ($i=0; $i<8; $i++) + { + &mov($tmp1,&DWP($i*4,$a,"",0)); # *a + &mov(&DWP($i*4,$r,"",0),$tmp1); # *r + &set_label("pw_nc".$i,0); + } + + &comment(""); + &add($a,32); + &add($r,32); + &sub($num,8); + &jnz(&label("pw_nc_loop")); + + &mov($num,&wparam(4)); # get dl + &and($num,7); + &jz(&label("pw_nc_end")); + + for ($i=0; $i<7; $i++) + { + &mov($tmp1,&DWP($i*4,$a,"",0)); # *a + &mov(&DWP($i*4,$r,"",0),$tmp1); # *r + &set_label("pw_tail_nc".$i,0); + &dec($num) if ($i != 6); + &jz(&label("pw_nc_end")) if ($i != 6); + } + + &set_label("pw_nc_end",0); + &mov($c,0); + + &set_label("pw_end",0); + +# &mov("eax",$c); # $c is "eax" + + &function_end($name); + } + diff --git a/crypto/bn/bn_mul.c b/crypto/bn/bn_mul.c index eb5d525613..2810115c2b 100644 --- a/crypto/bn/bn_mul.c +++ b/crypto/bn/bn_mul.c @@ -66,6 +66,7 @@ #include "cryptlib.h" #include "bn_lcl.h" +#if defined(NO_ASM) || !defined(i386) /* Here follows specialised variants of bn_add_words() and bn_sub_words(). They have the property performing operations on arrays of different sizes. The sizes of those arrays is expressed through @@ -201,6 +202,7 @@ BN_ULONG bn_sub_part_words(BN_ULONG *r, } return c; } +#endif BN_ULONG bn_add_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, -- 2.34.1