crypto/bn/asm/mips3.s is moved to crypto/bn/asm/obsolete/
authorAndy Polyakov <appro@openssl.org>
Tue, 20 Jul 1999 13:43:26 +0000 (13:43 +0000)
committerAndy Polyakov <appro@openssl.org>
Tue, 20 Jul 1999 13:43:26 +0000 (13:43 +0000)
crypto/bn/asm/obsolete/mips3.s [new file with mode: 0644]

diff --git a/crypto/bn/asm/obsolete/mips3.s b/crypto/bn/asm/obsolete/mips3.s
new file mode 100644 (file)
index 0000000..e8fdd50
--- /dev/null
@@ -0,0 +1,544 @@
+/* This assember is for R4000 and above machines.  It takes advantage
+ * of the 64 bit registers present on these CPUs.
+ * Make sure that the SSLeay bignum library is compiled with
+ * SIXTY_FOUR_BIT set and BN_LLONG undefined.
+ * This must either be compiled with the system CC, or, if you use GNU gas,
+ * cc -E mips3.s|gas -o mips3.o
+ */
+       .set    reorder
+       .set    noat
+
+#define R1     $1
+#define CC     $2
+#define        R2      $3
+#define R3     $8
+#define R4     $9
+#define L1     $10
+#define L2     $11
+#define L3     $12
+#define L4     $13
+#define H1     $14
+#define H2     $15
+#define H3     $24
+#define H4     $25
+
+#define P1     $4
+#define P2     $5
+#define P3     $6
+#define P4     $7
+
+       .align  2
+       .ent    bn_mul_add_words
+       .globl  bn_mul_add_words
+.text
+bn_mul_add_words:
+       .frame  $sp,0,$31
+       .mask   0x00000000,0
+       .fmask  0x00000000,0
+
+       #blt    P3,4,$lab34
+       
+       subu    R1,P3,4
+       move    CC,$0
+       bltz    R1,$lab34
+$lab2: 
+       ld      R1,0(P1)
+        ld     L1,0(P2)
+       ld      R2,8(P1)
+        ld     L2,8(P2)
+       ld      R3,16(P1)
+        ld     L3,16(P2)
+       ld      R4,24(P1)
+        ld     L4,24(P2)
+       dmultu  L1,P4
+        daddu  R1,R1,CC
+       mflo    L1
+        sltu   CC,R1,CC
+       daddu   R1,R1,L1
+        mfhi   H1
+       sltu    L1,R1,L1
+        sd     R1,0(P1)
+       daddu   CC,CC,L1
+        dmultu L2,P4
+       daddu   CC,H1,CC
+       mflo    L2
+        daddu  R2,R2,CC
+       sltu    CC,R2,CC
+        mfhi   H2
+       daddu   R2,R2,L2
+        daddu  P2,P2,32
+       sltu    L2,R2,L2
+        sd     R2,8(P1)
+       daddu   CC,CC,L2
+        dmultu L3,P4
+       daddu   CC,H2,CC
+       mflo    L3
+        daddu  R3,R3,CC
+       sltu    CC,R3,CC
+        mfhi   H3
+       daddu   R3,R3,L3
+        daddu  P1,P1,32
+       sltu    L3,R3,L3
+        sd     R3,-16(P1)
+       daddu   CC,CC,L3
+        dmultu L4,P4
+       daddu   CC,H3,CC
+       mflo    L4
+        daddu  R4,R4,CC
+       sltu    CC,R4,CC
+        mfhi   H4
+       daddu   R4,R4,L4
+        subu   P3,P3,4
+       sltu    L4,R4,L4
+       daddu   CC,CC,L4
+       daddu   CC,H4,CC
+
+       subu    R1,P3,4
+       sd      R4,-8(P1)       # delay slot
+       bgez    R1,$lab2
+
+       bleu    P3,0,$lab3
+       .align  2
+$lab33: 
+       ld      L1,0(P2)
+        ld     R1,0(P1)
+       dmultu  L1,P4
+        daddu  R1,R1,CC
+       sltu    CC,R1,CC
+        daddu  P1,P1,8
+       mflo    L1
+        mfhi   H1
+       daddu   R1,R1,L1
+        daddu  P2,P2,8
+       sltu    L1,R1,L1
+        subu   P3,P3,1
+       daddu   CC,CC,L1
+        sd     R1,-8(P1)
+       daddu   CC,H1,CC
+        bgtz   P3,$lab33
+       j       $31
+       .align  2
+$lab3:
+       j       $31
+       .align  2
+$lab34:
+       bgt     P3,0,$lab33
+       j       $31
+       .end    bn_mul_add_words
+
+       .align  2
+       # Program Unit: bn_mul_words
+       .ent    bn_mul_words
+       .globl  bn_mul_words
+.text
+bn_mul_words:
+       .frame  $sp,0,$31
+       .mask   0x00000000,0
+       .fmask  0x00000000,0
+       
+       subu    P3,P3,4
+       move    CC,$0
+       bltz    P3,$lab45
+$lab44:        
+       ld      L1,0(P2)
+        ld     L2,8(P2)
+       ld      L3,16(P2)
+        ld     L4,24(P2)
+       dmultu  L1,P4
+        subu   P3,P3,4
+       mflo    L1
+        mfhi   H1
+       daddu   L1,L1,CC
+        dmultu L2,P4
+       sltu    CC,L1,CC
+        sd     L1,0(P1)
+       daddu   CC,H1,CC
+        mflo   L2
+       mfhi    H2
+        daddu  L2,L2,CC
+       dmultu  L3,P4
+        sltu   CC,L2,CC
+       sd      L2,8(P1)
+        daddu  CC,H2,CC
+       mflo    L3
+        mfhi   H3
+       daddu   L3,L3,CC
+        dmultu L4,P4
+       sltu    CC,L3,CC
+        sd     L3,16(P1)
+       daddu   CC,H3,CC
+        mflo   L4
+       mfhi    H4
+        daddu  L4,L4,CC
+       daddu   P1,P1,32
+        sltu   CC,L4,CC
+       daddu   P2,P2,32
+        daddu  CC,H4,CC
+       sd      L4,-8(P1)
+
+       bgez    P3,$lab44
+       b       $lab45
+$lab46:
+       ld      L1,0(P2)
+        daddu  P1,P1,8
+       dmultu  L1,P4
+        daddu  P2,P2,8
+       mflo    L1
+        mfhi   H1
+       daddu   L1,L1,CC
+        subu   P3,P3,1
+       sltu    CC,L1,CC
+        sd     L1,-8(P1)
+       daddu   CC,H1,CC
+        bgtz   P3,$lab46
+       j       $31
+$lab45:
+       addu    P3,P3,4
+       bgtz    P3,$lab46
+       j       $31
+       .align  2
+       .end    bn_mul_words
+
+       # Program Unit: bn_sqr_words
+       .ent    bn_sqr_words
+       .globl  bn_sqr_words
+.text
+bn_sqr_words:
+       .frame  $sp,0,$31
+       .mask   0x00000000,0
+       .fmask  0x00000000,0
+       
+       subu    P3,P3,4
+ b $lab55
+       bltz    P3,$lab55
+$lab54:
+       ld      L1,0(P2)
+        ld     L2,8(P2)
+       ld      L3,16(P2)
+        ld     L4,24(P2)
+
+       dmultu  L1,L1
+        subu   P3,P3,4
+       mflo    L1
+        mfhi   H1
+       sd      L1,0(P1)
+        sd     H1,8(P1)
+
+       dmultu  L2,L2
+        daddu  P1,P1,32
+       mflo    L2
+        mfhi   H2
+       sd      L2,-48(P1)
+        sd     H2,-40(P1)
+
+       dmultu  L3,L3
+        daddu  P2,P2,32
+       mflo    L3
+        mfhi   H3
+       sd      L3,-32(P1)
+        sd     H3,-24(P1)
+
+       dmultu  L4,L4
+
+       mflo    L4
+        mfhi   H4
+       sd      L4,-16(P1)
+        sd     H4,-8(P1)
+
+       bgtz    P3,$lab54
+       b       $lab55
+$lab56:        
+       ld      L1,0(P2)
+       daddu   P1,P1,16
+       dmultu  L1,L1
+       daddu   P2,P2,8
+       subu    P3,P3,1
+       mflo    L1
+       mfhi    H1
+       sd      L1,-16(P1)
+       sd      H1,-8(P1)
+
+       bgtz    P3,$lab56
+       j       $31
+$lab55:
+       daddu   P3,P3,4
+       bgtz    P3,$lab56
+       j       $31
+       .align  2
+       .end    bn_sqr_words
+
+       # Program Unit: bn_add_words
+       .ent    bn_add_words
+       .globl  bn_add_words
+.text
+bn_add_words:   # 0x590
+       .frame  $sp,0,$31
+       .mask   0x00000000,0
+       .fmask  0x00000000,0
+       
+       subu    P4,P4,4
+       move    CC,$0
+       bltz    P4,$lab65
+$lab64:        
+       ld      L1,0(P2)
+       ld      R1,0(P3)
+       ld      L2,8(P2)
+       ld      R2,8(P3)
+
+       daddu   L1,L1,CC
+        ld     L3,16(P2)
+       sltu    CC,L1,CC
+        daddu  L1,L1,R1
+       sltu    R1,L1,R1
+        ld     R3,16(P3)
+       daddu   CC,CC,R1
+        ld     L4,24(P2)
+
+       daddu   L2,L2,CC
+        ld     R4,24(P3)
+       sltu    CC,L2,CC
+        daddu  L2,L2,R2
+       sltu    R2,L2,R2
+        sd     L1,0(P1)
+       daddu   CC,CC,R2
+        daddu  P1,P1,32
+       daddu   L3,L3,CC
+        sd     L2,-24(P1)
+
+       sltu    CC,L3,CC
+        daddu  L3,L3,R3
+       sltu    R3,L3,R3
+        daddu  P2,P2,32
+       daddu   CC,CC,R3
+
+       daddu   L4,L4,CC
+        daddu  P3,P3,32
+       sltu    CC,L4,CC
+        daddu  L4,L4,R4
+       sltu    R4,L4,R4
+        subu   P4,P4,4
+       sd      L3,-16(P1)
+        daddu  CC,CC,R4
+       sd      L4,-8(P1)
+
+       bgtz    P4,$lab64
+       b       $lab65
+$lab66:
+       ld      L1,0(P2)
+        ld     R1,0(P3)
+       daddu   L1,L1,CC
+        daddu  P1,P1,8
+       sltu    CC,L1,CC
+        daddu  P2,P2,8
+       daddu   P3,P3,8
+        daddu  L1,L1,R1
+       subu    P4,P4,1
+        sltu   R1,L1,R1
+       sd      L1,-8(P1)
+        daddu  CC,CC,R1
+
+       bgtz    P4,$lab66
+       j       $31
+$lab65:
+       addu    P4,P4,4
+       bgtz    P4,$lab66
+       j       $31
+       .end    bn_add_words
+
+#if 1
+       # Program Unit: bn_div64
+       .set    at
+       .set    reorder
+       .text   
+       .align  2
+       .globl  bn_div64
+ # 321         {
+       .ent    bn_div64
+bn_div64:
+       dsubu   $sp, 64
+       sd      $31, 56($sp)
+       sd      $16, 48($sp)
+       .mask   0x80010000, -56
+       .frame  $sp, 64, $31
+       move    $9, $4
+       move    $12, $5
+       move    $16, $6
+ # 322         BN_ULONG dh,dl,q,ret=0,th,tl,t;
+       move    $31, $0
+ # 323         int i,count=2;
+       li      $13, 2
+ # 324 
+ # 325         if (d == 0) return(BN_MASK2);
+       bne     $16, 0, $80
+       dli     $2, -1
+       b       $93
+$80:
+ # 326 
+ # 327         i=BN_num_bits_word(d);
+       move    $4, $16
+       sd      $31, 16($sp)
+       sd      $9, 24($sp)
+       sd      $12, 32($sp)
+       sd      $13, 40($sp)
+       .livereg        0x800ff0e,0xfff
+       jal     BN_num_bits_word
+       dli     $4, 64
+       ld      $31, 16($sp)
+       ld      $9, 24($sp)
+       ld      $12, 32($sp)
+       ld      $13, 40($sp)
+       move    $3, $2
+ # 328         if ((i != BN_BITS2) && (h > (BN_ULONG)1<<i))
+       beq     $2, $4, $81
+       dli     $14, 1
+       dsll    $15, $14, $2
+       bleu    $9, $15, $81
+ # 329                 {
+ # 330 #if !defined(NO_STDIO) && !defined(WIN16)
+ # 331                 fprintf(stderr,"Division would overflow (%d)\n",i);
+ # 332 #endif
+ # 333                 abort();
+       sd      $3, 8($sp)
+       sd      $31, 16($sp)
+       sd      $9, 24($sp)
+       sd      $12, 32($sp)
+       sd      $13, 40($sp)
+       .livereg        0xff0e,0xfff
+       jal     abort
+       dli     $4, 64
+       ld      $3, 8($sp)
+       ld      $31, 16($sp)
+       ld      $9, 24($sp)
+       ld      $12, 32($sp)
+       ld      $13, 40($sp)
+ # 334                 }
+$81:
+ # 335         i=BN_BITS2-i;
+       dsubu   $3, $4, $3
+ # 336         if (h >= d) h-=d;
+       bltu    $9, $16, $82
+       dsubu   $9, $9, $16
+$82:
+ # 337 
+ # 338         if (i)
+       beq     $3, 0, $83
+ # 339                 {
+ # 340                 d<<=i;
+       dsll    $16, $16, $3
+ # 341                 h=(h<<i)|(l>>(BN_BITS2-i));
+       dsll    $24, $9, $3
+       dsubu   $25, $4, $3
+       dsrl    $14, $12, $25
+       or      $9, $24, $14
+ # 342                 l<<=i;
+       dsll    $12, $12, $3
+ # 343                 }
+$83:
+ # 344         dh=(d&BN_MASK2h)>>BN_BITS4;
+ # 345         dl=(d&BN_MASK2l);
+       and     $8, $16,0xFFFFFFFF00000000
+       dsrl    $8, $8, 32
+       # dli   $10,0xFFFFFFFF # Is this needed?
+       # and   $10, $16, $10
+       dsll    $10, $16, 32
+       dsrl    $10, $10, 32
+       dli     $6,0xFFFFFFFF00000000
+$84:
+ # 346         for (;;)
+ # 347                 {
+ # 348                 if ((h>>BN_BITS4) == dh)
+       dsrl    $15, $9, 32
+       bne     $8, $15, $85
+ # 349                         q=BN_MASK2l;
+       dli     $5, 0xFFFFFFFF
+       b       $86
+$85:
+ # 350                 else
+ # 351                         q=h/dh;
+       ddivu   $5, $9, $8
+$86:
+ # 352 
+ # 353                 for (;;)
+ # 354                         {
+ # 355                         t=(h-q*dh);
+       dmul    $4, $5, $8
+       dsubu   $2, $9, $4
+       move    $3, $2
+ # 356                         if ((t&BN_MASK2h) ||
+ # 357                                 ((dl*q) <= (
+ # 358                                         (t<<BN_BITS4)+
+ # 359                                         ((l&BN_MASK2h)>>BN_BITS4))))
+       and     $25, $2, $6
+       bne     $25, $0, $87
+       dmul    $24, $10, $5
+       dsll    $14, $3, 32
+       and     $15, $12, $6
+       dsrl    $25, $15, 32
+       daddu   $15, $14, $25
+       bgtu    $24, $15, $88
+$87:
+ # 360                                 break;
+       dmul    $3, $10, $5
+       b       $89
+$88:
+ # 361                         q--;
+       daddu   $5, $5, -1
+ # 362                         }
+       b       $86
+$89:
+ # 363                 th=q*dh;
+ # 364                 tl=q*dl;
+ # 365                 t=(tl>>BN_BITS4);
+ # 366                 tl=(tl<<BN_BITS4)&BN_MASK2h;
+       dsll    $14, $3, 32
+       and     $2, $14, $6
+       move    $11, $2
+ # 367                 th+=t;
+       dsrl    $25, $3, 32
+       daddu   $7, $4, $25
+ # 368 
+ # 369                 if (l < tl) th++;
+       bgeu    $12, $2, $90
+       daddu   $7, $7, 1
+$90:
+ # 370                 l-=tl;
+       dsubu   $12, $12, $11
+ # 371                 if (h < th)
+       bgeu    $9, $7, $91
+ # 372                         {
+ # 373                         h+=d;
+       daddu   $9, $9, $16
+ # 374                         q--;
+       daddu   $5, $5, -1
+ # 375                         }
+$91:
+ # 376                 h-=th;
+       dsubu   $9, $9, $7
+ # 377 
+ # 378                 if (--count == 0) break;
+       addu    $13, $13, -1
+       beq     $13, 0, $92
+ # 379 
+ # 380                 ret=q<<BN_BITS4;
+       dsll    $31, $5, 32
+ # 381                 h=((h<<BN_BITS4)|(l>>BN_BITS4))&BN_MASK2;
+       dsll    $24, $9, 32
+       dsrl    $15, $12, 32
+       or      $9, $24, $15
+ # 382                 l=(l&BN_MASK2l)<<BN_BITS4;
+       and     $12, $12, 0xFFFFFFFF
+       dsll    $12, $12, 32
+ # 383                 }
+       b       $84
+$92:
+ # 384         ret|=q;
+       or      $31, $31, $5
+ # 385         return(ret);
+       move    $2, $31
+$93:
+       ld      $16, 48($sp)
+       ld      $31, 56($sp)
+       daddu   $sp, 64
+       j       $31
+       .end    bn_div64
+#endif