Import of old SSLeay release: SSLeay 0.9.1b (unreleased)
[openssl.git] / crypto / bn / asm / mips1.s
diff --git a/crypto/bn/asm/mips1.s b/crypto/bn/asm/mips1.s
new file mode 100644 (file)
index 0000000..44fa125
--- /dev/null
@@ -0,0 +1,539 @@
+/* This assember is for R2000/R3000 machines, or higher ones that do
+ * no want to do any 64 bit arithmatic.
+ * Make sure that the SSLeay bignum library is compiled with 
+ * THIRTY_TWO_BIT set.
+ * This must either be compiled with the system CC, or, if you use GNU gas,
+ * cc -E mips1.s|gas -o mips1.o
+ */
+       .set    reorder
+       .set    noat
+
+#define R1     $1
+#define CC     $2
+#define        R2      $3
+#define R3     $8
+#define R4     $9
+#define L1     $10
+#define L2     $11
+#define L3     $12
+#define L4     $13
+#define H1     $14
+#define H2     $15
+#define H3     $24
+#define H4     $25
+
+#define P1     $4
+#define P2     $5
+#define P3     $6
+#define P4     $7
+
+       .align  2
+       .ent    bn_mul_add_words
+       .globl  bn_mul_add_words
+.text
+bn_mul_add_words:
+       .frame  $sp,0,$31
+       .mask   0x00000000,0
+       .fmask  0x00000000,0
+
+       #blt    P3,4,$lab34
+       
+       subu    R1,P3,4
+       move    CC,$0
+       bltz    R1,$lab34
+$lab2: 
+       lw      R1,0(P1)
+        lw     L1,0(P2)
+       lw      R2,4(P1)
+        lw     L2,4(P2)
+       lw      R3,8(P1)
+        lw     L3,8(P2)
+       lw      R4,12(P1)
+        lw     L4,12(P2)
+       multu   L1,P4
+        addu   R1,R1,CC
+       mflo    L1
+        sltu   CC,R1,CC
+       addu    R1,R1,L1
+        mfhi   H1
+       sltu    L1,R1,L1
+        sw     R1,0(P1)
+       addu    CC,CC,L1
+        multu  L2,P4
+       addu    CC,H1,CC
+       mflo    L2
+        addu   R2,R2,CC
+       sltu    CC,R2,CC
+        mfhi   H2
+       addu    R2,R2,L2
+        addu   P2,P2,16
+       sltu    L2,R2,L2
+        sw     R2,4(P1)
+       addu    CC,CC,L2
+        multu  L3,P4
+       addu    CC,H2,CC
+       mflo    L3
+        addu   R3,R3,CC
+       sltu    CC,R3,CC
+        mfhi   H3
+       addu    R3,R3,L3
+        addu   P1,P1,16
+       sltu    L3,R3,L3
+        sw     R3,-8(P1)
+       addu    CC,CC,L3
+        multu  L4,P4
+       addu    CC,H3,CC
+       mflo    L4
+        addu   R4,R4,CC
+       sltu    CC,R4,CC
+        mfhi   H4
+       addu    R4,R4,L4
+        subu   P3,P3,4
+       sltu    L4,R4,L4
+       addu    CC,CC,L4
+       addu    CC,H4,CC
+
+       subu    R1,P3,4
+       sw      R4,-4(P1)       # delay slot
+       bgez    R1,$lab2
+
+       bleu    P3,0,$lab3
+       .align  2
+$lab33: 
+       lw      L1,0(P2)
+        lw     R1,0(P1)
+       multu   L1,P4
+        addu   R1,R1,CC
+       sltu    CC,R1,CC
+        addu   P1,P1,4
+       mflo    L1
+        mfhi   H1
+       addu    R1,R1,L1
+        addu   P2,P2,4
+       sltu    L1,R1,L1
+        subu   P3,P3,1
+       addu    CC,CC,L1
+        sw     R1,-4(P1)
+       addu    CC,H1,CC
+        bgtz   P3,$lab33
+       j       $31
+       .align  2
+$lab3:
+       j       $31
+       .align  2
+$lab34:
+       bgt     P3,0,$lab33
+       j       $31
+       .end    bn_mul_add_words
+
+       .align  2
+       # Program Unit: bn_mul_words
+       .ent    bn_mul_words
+       .globl  bn_mul_words
+.text
+bn_mul_words:
+       .frame  $sp,0,$31
+       .mask   0x00000000,0
+       .fmask  0x00000000,0
+       
+       subu    P3,P3,4
+       move    CC,$0
+       bltz    P3,$lab45
+$lab44:        
+       lw      L1,0(P2)
+        lw     L2,4(P2)
+       lw      L3,8(P2)
+        lw     L4,12(P2)
+       multu   L1,P4
+        subu   P3,P3,4
+       mflo    L1
+        mfhi   H1
+       addu    L1,L1,CC
+        multu  L2,P4
+       sltu    CC,L1,CC
+        sw     L1,0(P1)
+       addu    CC,H1,CC
+        mflo   L2
+       mfhi    H2
+        addu   L2,L2,CC
+       multu   L3,P4
+        sltu   CC,L2,CC
+       sw      L2,4(P1)
+        addu   CC,H2,CC
+       mflo    L3
+        mfhi   H3
+       addu    L3,L3,CC
+        multu  L4,P4
+       sltu    CC,L3,CC
+        sw     L3,8(P1)
+       addu    CC,H3,CC
+        mflo   L4
+       mfhi    H4
+        addu   L4,L4,CC
+       addu    P1,P1,16
+        sltu   CC,L4,CC
+       addu    P2,P2,16
+        addu   CC,H4,CC
+       sw      L4,-4(P1)
+
+       bgez    P3,$lab44
+       b       $lab45
+$lab46:
+       lw      L1,0(P2)
+        addu   P1,P1,4
+       multu   L1,P4
+        addu   P2,P2,4
+       mflo    L1
+        mfhi   H1
+       addu    L1,L1,CC
+        subu   P3,P3,1
+       sltu    CC,L1,CC
+        sw     L1,-4(P1)
+       addu    CC,H1,CC
+        bgtz   P3,$lab46
+       j       $31
+$lab45:
+       addu    P3,P3,4
+       bgtz    P3,$lab46
+       j       $31
+       .align  2
+       .end    bn_mul_words
+
+       # Program Unit: bn_sqr_words
+       .ent    bn_sqr_words
+       .globl  bn_sqr_words
+.text
+bn_sqr_words:
+       .frame  $sp,0,$31
+       .mask   0x00000000,0
+       .fmask  0x00000000,0
+       
+       subu    P3,P3,4
+       bltz    P3,$lab55
+$lab54:
+       lw      L1,0(P2)
+        lw     L2,4(P2)
+       lw      L3,8(P2)
+        lw     L4,12(P2)
+
+       multu   L1,L1
+        subu   P3,P3,4
+       mflo    L1
+        mfhi   H1
+       sw      L1,0(P1)
+        sw     H1,4(P1)
+
+       multu   L2,L2
+        addu   P1,P1,32
+       mflo    L2
+        mfhi   H2
+       sw      L2,-24(P1)
+        sw     H2,-20(P1)
+
+       multu   L3,L3
+        addu   P2,P2,16
+       mflo    L3
+        mfhi   H3
+       sw      L3,-16(P1)
+        sw     H3,-12(P1)
+
+       multu   L4,L4
+
+       mflo    L4
+        mfhi   H4
+       sw      L4,-8(P1)
+        sw     H4,-4(P1)
+
+       bgtz    P3,$lab54
+       b       $lab55
+$lab56:        
+       lw      L1,0(P2)
+       addu    P1,P1,8
+       multu   L1,L1
+       addu    P2,P2,4
+       subu    P3,P3,1
+       mflo    L1
+       mfhi    H1
+       sw      L1,-8(P1)
+       sw      H1,-4(P1)
+
+       bgtz    P3,$lab56
+       j       $31
+$lab55:
+       addu    P3,P3,4
+       bgtz    P3,$lab56
+       j       $31
+       .align  2
+       .end    bn_sqr_words
+
+       # Program Unit: bn_add_words
+       .ent    bn_add_words
+       .globl  bn_add_words
+.text
+bn_add_words:   # 0x590
+       .frame  $sp,0,$31
+       .mask   0x00000000,0
+       .fmask  0x00000000,0
+       
+       subu    P4,P4,4
+       move    CC,$0
+       bltz    P4,$lab65
+$lab64:        
+       lw      L1,0(P2)
+       lw      R1,0(P3)
+       lw      L2,4(P2)
+       lw      R2,4(P3)
+
+       addu    L1,L1,CC
+        lw     L3,8(P2)
+       sltu    CC,L1,CC
+        addu   L1,L1,R1
+       sltu    R1,L1,R1
+        lw     R3,8(P3)
+       addu    CC,CC,R1
+        lw     L4,12(P2)
+
+       addu    L2,L2,CC
+        lw     R4,12(P3)
+       sltu    CC,L2,CC
+        addu   L2,L2,R2
+       sltu    R2,L2,R2
+        sw     L1,0(P1)
+       addu    CC,CC,R2
+        addu   P1,P1,16
+       addu    L3,L3,CC
+        sw     L2,-12(P1)
+       sltu    CC,L3,CC
+        addu   L3,L3,R3
+       sltu    R3,L3,R3
+        addu   P2,P2,16
+       addu    CC,CC,R3
+
+       addu    L4,L4,CC
+        addu   P3,P3,16
+       sltu    CC,L4,CC
+        addu   L4,L4,R4
+       subu    P4,P4,4
+        sltu   R4,L4,R4
+       sw      L3,-8(P1)
+        addu   CC,CC,R4
+       sw      L4,-4(P1)
+
+       bgtz    P4,$lab64
+       b       $lab65
+$lab66:
+       lw      L1,0(P2)
+        lw     R1,0(P3)
+       addu    L1,L1,CC
+        addu   P1,P1,4
+       sltu    CC,L1,CC
+        addu   P2,P2,4
+       addu    P3,P3,4
+        addu   L1,L1,R1
+       subu    P4,P4,1
+        sltu   R1,L1,R1
+       sw      L1,-4(P1)
+        addu   CC,CC,R1
+
+       bgtz    P4,$lab66
+       j       $31
+$lab65:
+       addu    P4,P4,4
+       bgtz    P4,$lab66
+       j       $31
+       .end    bn_add_words
+
+       # Program Unit: bn_div64
+       .set    at
+       .set    reorder
+       .text   
+       .align  2
+       .globl  bn_div64
+ # 321         {
+       .ent    bn_div64 2
+bn_div64:
+       subu    $sp, 64
+       sw      $31, 56($sp)
+       sw      $16, 48($sp)
+       .mask   0x80010000, -56
+       .frame  $sp, 64, $31
+       move    $9, $4
+       move    $12, $5
+       move    $16, $6
+ # 322         BN_ULONG dh,dl,q,ret=0,th,tl,t;
+       move    $31, $0
+ # 323         int i,count=2;
+       li      $13, 2
+ # 324 
+ # 325         if (d == 0) return(BN_MASK2);
+       bne     $16, 0, $80
+       li      $2, -1
+       b       $93
+$80:
+ # 326 
+ # 327         i=BN_num_bits_word(d);
+       move    $4, $16
+       sw      $31, 16($sp)
+       sw      $9, 24($sp)
+       sw      $12, 32($sp)
+       sw      $13, 40($sp)
+       .livereg        0x800ff0e,0xfff
+       jal     BN_num_bits_word
+       li      $4, 32
+       lw      $31, 16($sp)
+       lw      $9, 24($sp)
+       lw      $12, 32($sp)
+       lw      $13, 40($sp)
+       move    $3, $2
+ # 328         if ((i != BN_BITS2) && (h > (BN_ULONG)1<<i))
+       beq     $2, $4, $81
+       li      $14, 1
+       sll     $15, $14, $2
+       bleu    $9, $15, $81
+ # 329                 {
+ # 330 #if !defined(NO_STDIO) && !defined(WIN16)
+ # 331                 fprintf(stderr,"Division would overflow (%d)\n",i);
+ # 332 #endif
+ # 333                 abort();
+       sw      $3, 8($sp)
+       sw      $9, 24($sp)
+       sw      $12, 32($sp)
+       sw      $13, 40($sp)
+       sw      $31, 26($sp)
+       .livereg        0xff0e,0xfff
+       jal     abort
+       lw      $3, 8($sp)
+       li      $4, 32
+       lw      $9, 24($sp)
+       lw      $12, 32($sp)
+       lw      $13, 40($sp)
+       lw      $31, 26($sp)
+ # 334                 }
+$81:
+ # 335         i=BN_BITS2-i;
+       subu    $3, $4, $3
+ # 336         if (h >= d) h-=d;
+       bltu    $9, $16, $82
+       subu    $9, $9, $16
+$82:
+ # 337 
+ # 338         if (i)
+       beq     $3, 0, $83
+ # 339                 {
+ # 340                 d<<=i;
+       sll     $16, $16, $3
+ # 341                 h=(h<<i)|(l>>(BN_BITS2-i));
+       sll     $24, $9, $3
+       subu    $25, $4, $3
+       srl     $14, $12, $25
+       or      $9, $24, $14
+ # 342                 l<<=i;
+       sll     $12, $12, $3
+ # 343                 }
+$83:
+ # 344         dh=(d&BN_MASK2h)>>BN_BITS4;
+ # 345         dl=(d&BN_MASK2l);
+       and     $8, $16, -65536
+       srl     $8, $8, 16
+       and     $10, $16, 65535
+       li      $6, -65536
+$84:
+ # 346         for (;;)
+ # 347                 {
+ # 348                 if ((h>>BN_BITS4) == dh)
+       srl     $15, $9, 16
+       bne     $8, $15, $85
+ # 349                         q=BN_MASK2l;
+       li      $5, 65535
+       b       $86
+$85:
+ # 350                 else
+ # 351                         q=h/dh;
+       divu    $5, $9, $8
+$86:
+ # 352 
+ # 353                 for (;;)
+ # 354                         {
+ # 355                         t=(h-q*dh);
+       mul     $4, $5, $8
+       subu    $2, $9, $4
+       move    $3, $2
+ # 356                         if ((t&BN_MASK2h) ||
+ # 357                                 ((dl*q) <= (
+ # 358                                         (t<<BN_BITS4)+
+ # 359                                         ((l&BN_MASK2h)>>BN_BITS4))))
+       and     $25, $2, $6
+       bne     $25, $0, $87
+       mul     $24, $10, $5
+       sll     $14, $3, 16
+       and     $15, $12, $6
+       srl     $25, $15, 16
+       addu    $15, $14, $25
+       bgtu    $24, $15, $88
+$87:
+ # 360                                 break;
+       mul     $3, $10, $5
+       b       $89
+$88:
+ # 361                         q--;
+       addu    $5, $5, -1
+ # 362                         }
+       b       $86
+$89:
+ # 363                 th=q*dh;
+ # 364                 tl=q*dl;
+ # 365                 t=(tl>>BN_BITS4);
+ # 366                 tl=(tl<<BN_BITS4)&BN_MASK2h;
+       sll     $14, $3, 16
+       and     $2, $14, $6
+       move    $11, $2
+ # 367                 th+=t;
+       srl     $25, $3, 16
+       addu    $7, $4, $25
+ # 368 
+ # 369                 if (l < tl) th++;
+       bgeu    $12, $2, $90
+       addu    $7, $7, 1
+$90:
+ # 370                 l-=tl;
+       subu    $12, $12, $11
+ # 371                 if (h < th)
+       bgeu    $9, $7, $91
+ # 372                         {
+ # 373                         h+=d;
+       addu    $9, $9, $16
+ # 374                         q--;
+       addu    $5, $5, -1
+ # 375                         }
+$91:
+ # 376                 h-=th;
+       subu    $9, $9, $7
+ # 377 
+ # 378                 if (--count == 0) break;
+       addu    $13, $13, -1
+       beq     $13, 0, $92
+ # 379 
+ # 380                 ret=q<<BN_BITS4;
+       sll     $31, $5, 16
+ # 381                 h=((h<<BN_BITS4)|(l>>BN_BITS4))&BN_MASK2;
+       sll     $24, $9, 16
+       srl     $15, $12, 16
+       or      $9, $24, $15
+ # 382                 l=(l&BN_MASK2l)<<BN_BITS4;
+       and     $12, $12, 65535
+       sll     $12, $12, 16
+ # 383                 }
+       b       $84
+$92:
+ # 384         ret|=q;
+       or      $31, $31, $5
+ # 385         return(ret);
+       move    $2, $31
+$93:
+       lw      $16, 48($sp)
+       lw      $31, 56($sp)
+       addu    $sp, 64
+       j       $31
+       .end    bn_div64
+