From: Andy Polyakov Date: Tue, 24 Aug 1999 16:02:16 +0000 (+0000) Subject: Minor MIPS III/IV tune-up. X-Git-Tag: OpenSSL_0_9_5beta1~566 X-Git-Url: https://git.openssl.org/gitweb/?p=openssl.git;a=commitdiff_plain;h=0bbd03525ef2c0b0b5e4a53a43d05a0a0f0d49db Minor MIPS III/IV tune-up. --- diff --git a/Configure b/Configure index 838f3cdbe9..fcd7b39cf7 100755 --- a/Configure +++ b/Configure @@ -154,11 +154,11 @@ my %table=( # Only N32 and N64 ABIs are supported. If you need O32 ABI build, invoke # './Configure irix-[g]cc' manually. # -mips4 flag is added by ./config when appropriate. -"irix-mips3-gcc","gcc:-mabi=n32 -mmips-as -O3 -DTERMIOS -DB_ENDIAN::(unknown)::MD2_CHAR RC4_INDEX RC4_CHAR DES_UNROLL DES_RISC2 DES_PTR BF_PTR SIXTY_FOUR_BIT:asm/mips3.o::", -"irix-mips3-cc", "cc:-n32 -O2 -use_readonly_const -DTERMIOS -DB_ENDIAN::(unknown)::DES_PTR DES_RISC2 DES_UNROLL BF_PTR SIXTY_FOUR_BIT:asm/mips3.o::", +"irix-mips3-gcc","gcc:-mabi=n32 -mmips-as -O3 -DTERMIOS -DB_ENDIAN -DBN_DIV3W::(unknown)::MD2_CHAR RC4_INDEX RC4_CHAR DES_UNROLL DES_RISC2 DES_PTR BF_PTR SIXTY_FOUR_BIT:asm/mips3.o::", +"irix-mips3-cc", "cc:-n32 -O2 -use_readonly_const -DTERMIOS -DB_ENDIAN -DBN_DIV3W::(unknown)::DES_PTR DES_RISC2 DES_UNROLL BF_PTR SIXTY_FOUR_BIT:asm/mips3.o::", # N64 ABI builds. -"irix64-mips4-gcc","gcc:-mabi=64 -mips4 -mmips-as -O3 -DTERMIOS -DB_ENDIAN::(unknown)::DES_RISC2 DES_UNROLL SIXTY_FOUR_BIT_LONG:asm/mips3.o::", -"irix64-mips4-cc", "cc:-64 -mips4 -O2 -use_readonly_const -DTERMIOS -DB_ENDIAN::(unknown)::DES_RISC2 DES_UNROLL SIXTY_FOUR_BIT_LONG:asm/mips3.o::", +"irix64-mips4-gcc","gcc:-mabi=64 -mips4 -mmips-as -O3 -DTERMIOS -DB_ENDIAN -DBN_DIV3W::(unknown)::DES_RISC2 DES_UNROLL SIXTY_FOUR_BIT_LONG:asm/mips3.o::", +"irix64-mips4-cc", "cc:-64 -mips4 -O2 -use_readonly_const -DTERMIOS -DB_ENDIAN -DBN_DIV3W::(unknown)::DES_RISC2 DES_UNROLL SIXTY_FOUR_BIT_LONG:asm/mips3.o::", # HPUX 9.X config. # Don't use the bundled cc. It is broken. Use HP ANSI C if possible, or diff --git a/crypto/bn/asm/mips3.s b/crypto/bn/asm/mips3.s index 191345d920..2df4dcd4b0 100644 --- a/crypto/bn/asm/mips3.s +++ b/crypto/bn/asm/mips3.s @@ -395,32 +395,32 @@ LEAF(bn_add_words) .L_bn_add_words_loop: ld ta0,0(a2) + subu a3,4 ld t1,8(a1) - ld ta1,8(a2) + and AT,a3,MINUS4 ld t2,16(a1) - ld ta2,16(a2) + PTR_ADD a2,32 ld t3,24(a1) - ld ta3,24(a2) + PTR_ADD a0,32 + ld ta1,-24(a2) + PTR_ADD a1,32 + ld ta2,-16(a2) + ld ta3,-8(a2) daddu ta0,t0 - subu a3,4 sltu t8,ta0,t0 daddu t0,ta0,v0 - PTR_ADD a0,32 sltu v0,t0,ta0 sd t0,-32(a0) daddu v0,t8 daddu ta1,t1 - PTR_ADD a1,32 sltu t9,ta1,t1 daddu t1,ta1,v0 - PTR_ADD a2,32 sltu v0,t1,ta1 sd t1,-24(a0) daddu v0,t9 daddu ta2,t2 - and AT,a3,MINUS4 sltu t8,ta2,t2 daddu t2,ta2,v0 sltu v0,t2,ta2 @@ -495,25 +495,26 @@ LEAF(bn_sub_words) .L_bn_sub_words_loop: ld ta0,0(a2) + subu a3,4 ld t1,8(a1) - ld ta1,8(a2) + and AT,a3,MINUS4 ld t2,16(a1) - ld ta2,16(a2) + PTR_ADD a2,32 ld t3,24(a1) - ld ta3,24(a2) + PTR_ADD a0,32 + ld ta1,-24(a2) + PTR_ADD a1,32 + ld ta2,-16(a2) + ld ta3,-8(a2) sltu t8,t0,ta0 dsubu t0,ta0 - subu a3,4 dsubu ta0,t0,v0 - and AT,a3,MINUS4 - sd ta0,0(a0) + sd ta0,-32(a0) MOVNZ (t0,v0,t8) sltu t9,t1,ta1 dsubu t1,ta1 - PTR_ADD a0,32 dsubu ta1,t1,v0 - PTR_ADD a1,32 sd ta1,-24(a0) MOVNZ (t1,v0,t9) @@ -521,7 +522,6 @@ LEAF(bn_sub_words) sltu t8,t2,ta2 dsubu t2,ta2 dsubu ta2,t2,v0 - PTR_ADD a2,32 sd ta2,-16(a0) MOVNZ (t2,v0,t8) @@ -574,6 +574,51 @@ END(bn_sub_words) #undef MINUS4 +.align 5 +LEAF(bn_div_3_words) + .set reorder + move a3,a0 /* we know that bn_div_words doesn't + * touch a3, ta2, ta3 and preserves a2 + * so that we can save two arguments + * and return address in registers + * instead of stack:-) + */ + ld a0,(a3) + move ta2,a1 + ld a1,-8(a3) + move ta3,ra + move v1,zero + li v0,-1 + beq a0,a2,.L_bn_div_3_words_skip_div + bal bn_div_words + move ra,ta3 +.L_bn_div_3_words_skip_div: + dmultu ta2,v0 + ld t2,-16(a3) + move ta0,zero + mfhi t1 + mflo t0 + sltu t8,t1,v1 +.L_bn_div_3_words_inner_loop: + bnez t8,.L_bn_div_3_words_inner_loop_done + sgeu AT,t2,t0 + seq t9,t1,v1 + and AT,t9 + sltu t3,t0,ta2 + daddu v1,a2 + dsubu t1,t3 + dsubu t0,ta2 + sltu t8,t1,v1 + sltu ta0,v1,a2 + or t8,ta0 + .set noreorder + beqzl AT,.L_bn_div_3_words_inner_loop + dsubu v0,1 + .set reorder +.L_bn_div_3_words_inner_loop_done: + jr ra +END(bn_div_3_words) + .align 5 LEAF(bn_div_words) .set noreorder @@ -633,16 +678,16 @@ LEAF(bn_div_words) seq t8,HH,t1 sltu AT,HH,t1 and t2,t8 + sltu v0,t0,a2 or AT,t2 .set noreorder beqz AT,.L_bn_div_words_inner_loop1_done - sltu t2,t0,a2 - .set reorder - dsubu QT,1 + dsubu t1,v0 dsubu t0,a2 - dsubu t1,t2 b .L_bn_div_words_inner_loop1 -.L_bn_div_words_inner_loop1_done: + dsubu QT,1 + .set reorder +.L_bn_div_words_inner_loop1_done: dsll a1,32 dsubu a0,t3,t0 @@ -655,6 +700,7 @@ LEAF(bn_div_words) ddivu zero,a0,DH mflo QT .L_bn_div_words_skip_div2: +#undef DH dmultu a2,QT dsll t3,a0,32 dsrl AT,a1,32 @@ -666,69 +712,26 @@ LEAF(bn_div_words) seq t8,HH,t1 sltu AT,HH,t1 and t2,t8 + sltu v1,t0,a2 or AT,t2 .set noreorder beqz AT,.L_bn_div_words_inner_loop2_done - sltu t2,t0,a2 - .set reorder - dsubu QT,1 + dsubu t1,v1 dsubu t0,a2 - dsubu t1,t2 b .L_bn_div_words_inner_loop2 + dsubu QT,1 + .set reorder .L_bn_div_words_inner_loop2_done: +#undef HH dsubu a0,t3,t0 or v0,QT dsrl v1,a0,t9 /* v1 contains remainder if anybody wants it */ dsrl a2,t9 /* restore a2 */ jr ra -#undef HH -#undef DH #undef QT END(bn_div_words) -.align 5 -LEAF(bn_div_3_words) - .set reorder - move a3,a0 /* we know that bn_div_words doesn't - * touch a3, ta2, ta3 and preserves a2 - * so that we can save two arguments - * and return address in registers - * instead of stack:-) - */ - ld a0,(a3) - move ta2,a2 - move a2,a1 - ld a1,-8(a3) - move ta3,ra - move v1,zero - li v0,-1 - beq a0,a2,.L_bn_div_3_words_skip_div - jal bn_div_words - move ra,ta3 -.L_bn_div_3_words_skip_div: - dmultu ta2,v0 - ld t2,-16(a3) - mflo t0 - mfhi t1 -.L_bn_div_3_words_inner_loop: - sgeu AT,t2,t0 - seq t9,t1,v1 - sltu t8,t1,v1 - and AT,t9 - or AT,t8 - bnez AT,.L_bn_div_3_words_inner_loop_done - daddu v1,a2 - sltu t3,t0,ta2 - sltu AT,v1,a2 - dsubu v0,1 - dsubu t0,ta2 - dsubu t1,t3 - beqz AT,.L_bn_div_3_words_inner_loop -.L_bn_div_3_words_inner_loop_done: - jr ra -END(bn_div_3_words) - #define a_0 t0 #define a_1 t1 #define a_2 t2 diff --git a/crypto/bn/bn_div.c b/crypto/bn/bn_div.c index 150dd289a5..6dd5d99ec4 100644 --- a/crypto/bn/bn_div.c +++ b/crypto/bn/bn_div.c @@ -202,7 +202,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, { BN_ULONG q,l0; #ifdef BN_DIV3W - q=bn_div_3_words(wnump,d0,d1); + q=bn_div_3_words(wnump,d1,d0); #else #if !defined(NO_ASM) && !defined(PEDANTIC) @@ -291,8 +291,8 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, #endif } #endif /* !BN_DIV3W */ - wnum.d--; wnum.top++; l0=bn_mul_words(tmp->d,sdiv->d,div_n,q); + wnum.d--; wnum.top++; tmp->d[div_n]=l0; for (j=div_n+1; j>0; j--) if (tmp->d[j-1]) break;