Import of old SSLeay release: SSLeay 0.9.1b (unreleased)
[openssl.git] / crypto / bn / asm / mips3.s
1 /* This assember is for R4000 and above machines.  It takes advantage
2  * of the 64 bit registers present on these CPUs.
3  * Make sure that the SSLeay bignum library is compiled with
4  * SIXTY_FOUR_BIT set and BN_LLONG undefined.
5  * This must either be compiled with the system CC, or, if you use GNU gas,
6  * cc -E mips3.s|gas -o mips3.o
7  */
8         .set    reorder
9         .set    noat
10
11 #define R1      $1
12 #define CC      $2
13 #define R2      $3
14 #define R3      $8
15 #define R4      $9
16 #define L1      $10
17 #define L2      $11
18 #define L3      $12
19 #define L4      $13
20 #define H1      $14
21 #define H2      $15
22 #define H3      $24
23 #define H4      $25
24
25 #define P1      $4
26 #define P2      $5
27 #define P3      $6
28 #define P4      $7
29
30         .align  2
31         .ent    bn_mul_add_words
32         .globl  bn_mul_add_words
33 .text
34 bn_mul_add_words:
35         .frame  $sp,0,$31
36         .mask   0x00000000,0
37         .fmask  0x00000000,0
38
39         #blt    P3,4,$lab34
40         
41         subu    R1,P3,4
42         move    CC,$0
43         bltz    R1,$lab34
44 $lab2:  
45         ld      R1,0(P1)
46          ld     L1,0(P2)
47         ld      R2,8(P1)
48          ld     L2,8(P2)
49         ld      R3,16(P1)
50          ld     L3,16(P2)
51         ld      R4,24(P1)
52          ld     L4,24(P2)
53         dmultu  L1,P4
54          daddu  R1,R1,CC
55         mflo    L1
56          sltu   CC,R1,CC
57         daddu   R1,R1,L1
58          mfhi   H1
59         sltu    L1,R1,L1
60          sd     R1,0(P1)
61         daddu   CC,CC,L1
62          dmultu L2,P4
63         daddu   CC,H1,CC
64         mflo    L2
65          daddu  R2,R2,CC
66         sltu    CC,R2,CC
67          mfhi   H2
68         daddu   R2,R2,L2
69          daddu  P2,P2,32
70         sltu    L2,R2,L2
71          sd     R2,8(P1)
72         daddu   CC,CC,L2
73          dmultu L3,P4
74         daddu   CC,H2,CC
75         mflo    L3
76          daddu  R3,R3,CC
77         sltu    CC,R3,CC
78          mfhi   H3
79         daddu   R3,R3,L3
80          daddu  P1,P1,32
81         sltu    L3,R3,L3
82          sd     R3,-16(P1)
83         daddu   CC,CC,L3
84          dmultu L4,P4
85         daddu   CC,H3,CC
86         mflo    L4
87          daddu  R4,R4,CC
88         sltu    CC,R4,CC
89          mfhi   H4
90         daddu   R4,R4,L4
91          subu   P3,P3,4
92         sltu    L4,R4,L4
93         daddu   CC,CC,L4
94         daddu   CC,H4,CC
95
96         subu    R1,P3,4
97         sd      R4,-8(P1)       # delay slot
98         bgez    R1,$lab2
99
100         bleu    P3,0,$lab3
101         .align  2
102 $lab33: 
103         ld      L1,0(P2)
104          ld     R1,0(P1)
105         dmultu  L1,P4
106          daddu  R1,R1,CC
107         sltu    CC,R1,CC
108          daddu  P1,P1,8
109         mflo    L1
110          mfhi   H1
111         daddu   R1,R1,L1
112          daddu  P2,P2,8
113         sltu    L1,R1,L1
114          subu   P3,P3,1
115         daddu   CC,CC,L1
116          sd     R1,-8(P1)
117         daddu   CC,H1,CC
118          bgtz   P3,$lab33
119         j       $31
120         .align  2
121 $lab3:
122         j       $31
123         .align  2
124 $lab34:
125         bgt     P3,0,$lab33
126         j       $31
127         .end    bn_mul_add_words
128
129         .align  2
130         # Program Unit: bn_mul_words
131         .ent    bn_mul_words
132         .globl  bn_mul_words
133 .text
134 bn_mul_words:
135         .frame  $sp,0,$31
136         .mask   0x00000000,0
137         .fmask  0x00000000,0
138         
139         subu    P3,P3,4
140         move    CC,$0
141         bltz    P3,$lab45
142 $lab44: 
143         ld      L1,0(P2)
144          ld     L2,8(P2)
145         ld      L3,16(P2)
146          ld     L4,24(P2)
147         dmultu  L1,P4
148          subu   P3,P3,4
149         mflo    L1
150          mfhi   H1
151         daddu   L1,L1,CC
152          dmultu L2,P4
153         sltu    CC,L1,CC
154          sd     L1,0(P1)
155         daddu   CC,H1,CC
156          mflo   L2
157         mfhi    H2
158          daddu  L2,L2,CC
159         dmultu  L3,P4
160          sltu   CC,L2,CC
161         sd      L2,8(P1)
162          daddu  CC,H2,CC
163         mflo    L3
164          mfhi   H3
165         daddu   L3,L3,CC
166          dmultu L4,P4
167         sltu    CC,L3,CC
168          sd     L3,16(P1)
169         daddu   CC,H3,CC
170          mflo   L4
171         mfhi    H4
172          daddu  L4,L4,CC
173         daddu   P1,P1,32
174          sltu   CC,L4,CC
175         daddu   P2,P2,32
176          daddu  CC,H4,CC
177         sd      L4,-8(P1)
178
179         bgez    P3,$lab44
180         b       $lab45
181 $lab46:
182         ld      L1,0(P2)
183          daddu  P1,P1,8
184         dmultu  L1,P4
185          daddu  P2,P2,8
186         mflo    L1
187          mfhi   H1
188         daddu   L1,L1,CC
189          subu   P3,P3,1
190         sltu    CC,L1,CC
191          sd     L1,-8(P1)
192         daddu   CC,H1,CC
193          bgtz   P3,$lab46
194         j       $31
195 $lab45:
196         addu    P3,P3,4
197         bgtz    P3,$lab46
198         j       $31
199         .align  2
200         .end    bn_mul_words
201
202         # Program Unit: bn_sqr_words
203         .ent    bn_sqr_words
204         .globl  bn_sqr_words
205 .text
206 bn_sqr_words:
207         .frame  $sp,0,$31
208         .mask   0x00000000,0
209         .fmask  0x00000000,0
210         
211         subu    P3,P3,4
212  b $lab55
213         bltz    P3,$lab55
214 $lab54:
215         ld      L1,0(P2)
216          ld     L2,8(P2)
217         ld      L3,16(P2)
218          ld     L4,24(P2)
219
220         dmultu  L1,L1
221          subu   P3,P3,4
222         mflo    L1
223          mfhi   H1
224         sd      L1,0(P1)
225          sd     H1,8(P1)
226
227         dmultu  L2,L2
228          daddu  P1,P1,32
229         mflo    L2
230          mfhi   H2
231         sd      L2,-48(P1)
232          sd     H2,-40(P1)
233
234         dmultu  L3,L3
235          daddu  P2,P2,32
236         mflo    L3
237          mfhi   H3
238         sd      L3,-32(P1)
239          sd     H3,-24(P1)
240
241         dmultu  L4,L4
242
243         mflo    L4
244          mfhi   H4
245         sd      L4,-16(P1)
246          sd     H4,-8(P1)
247
248         bgtz    P3,$lab54
249         b       $lab55
250 $lab56: 
251         ld      L1,0(P2)
252         daddu   P1,P1,16
253         dmultu  L1,L1
254         daddu   P2,P2,8
255         subu    P3,P3,1
256         mflo    L1
257         mfhi    H1
258         sd      L1,-16(P1)
259         sd      H1,-8(P1)
260
261         bgtz    P3,$lab56
262         j       $31
263 $lab55:
264         daddu   P3,P3,4
265         bgtz    P3,$lab56
266         j       $31
267         .align  2
268         .end    bn_sqr_words
269
270         # Program Unit: bn_add_words
271         .ent    bn_add_words
272         .globl  bn_add_words
273 .text
274 bn_add_words:    # 0x590
275         .frame  $sp,0,$31
276         .mask   0x00000000,0
277         .fmask  0x00000000,0
278         
279         subu    P4,P4,4
280         move    CC,$0
281         bltz    P4,$lab65
282 $lab64: 
283         ld      L1,0(P2)
284         ld      R1,0(P3)
285         ld      L2,8(P2)
286         ld      R2,8(P3)
287
288         daddu   L1,L1,CC
289          ld     L3,16(P2)
290         sltu    CC,L1,CC
291          daddu  L1,L1,R1
292         sltu    R1,L1,R1
293          ld     R3,16(P3)
294         daddu   CC,CC,R1
295          ld     L4,24(P2)
296
297         daddu   L2,L2,CC
298          ld     R4,24(P3)
299         sltu    CC,L2,CC
300          daddu  L2,L2,R2
301         sltu    R2,L2,R2
302          sd     L1,0(P1)
303         daddu   CC,CC,R2
304          daddu  P1,P1,32
305         daddu   L3,L3,CC
306          sd     L2,-24(P1)
307
308         sltu    CC,L3,CC
309          daddu  L3,L3,R3
310         sltu    R3,L3,R3
311          daddu  P2,P2,32
312         daddu   CC,CC,R3
313
314         daddu   L4,L4,CC
315          daddu  P3,P3,32
316         sltu    CC,L4,CC
317          daddu  L4,L4,R4
318         sltu    R4,L4,R4
319          subu   P4,P4,4
320         sd      L3,-16(P1)
321          daddu  CC,CC,R4
322         sd      L4,-8(P1)
323
324         bgtz    P4,$lab64
325         b       $lab65
326 $lab66:
327         ld      L1,0(P2)
328          ld     R1,0(P3)
329         daddu   L1,L1,CC
330          daddu  P1,P1,8
331         sltu    CC,L1,CC
332          daddu  P2,P2,8
333         daddu   P3,P3,8
334          daddu  L1,L1,R1
335         subu    P4,P4,1
336          sltu   R1,L1,R1
337         sd      L1,-8(P1)
338          daddu  CC,CC,R1
339
340         bgtz    P4,$lab66
341         j       $31
342 $lab65:
343         addu    P4,P4,4
344         bgtz    P4,$lab66
345         j       $31
346         .end    bn_add_words
347
348 #if 1
349         # Program Unit: bn_div64
350         .set    at
351         .set    reorder
352         .text   
353         .align  2
354         .globl  bn_div64
355  # 321          {
356         .ent    bn_div64
357 bn_div64:
358         dsubu   $sp, 64
359         sd      $31, 56($sp)
360         sd      $16, 48($sp)
361         .mask   0x80010000, -56
362         .frame  $sp, 64, $31
363         move    $9, $4
364         move    $12, $5
365         move    $16, $6
366  # 322          BN_ULONG dh,dl,q,ret=0,th,tl,t;
367         move    $31, $0
368  # 323          int i,count=2;
369         li      $13, 2
370  # 324  
371  # 325          if (d == 0) return(BN_MASK2);
372         bne     $16, 0, $80
373         dli     $2, -1
374         b       $93
375 $80:
376  # 326  
377  # 327          i=BN_num_bits_word(d);
378         move    $4, $16
379         sd      $31, 16($sp)
380         sd      $9, 24($sp)
381         sd      $12, 32($sp)
382         sd      $13, 40($sp)
383         .livereg        0x800ff0e,0xfff
384         jal     BN_num_bits_word
385         dli     $4, 64
386         ld      $31, 16($sp)
387         ld      $9, 24($sp)
388         ld      $12, 32($sp)
389         ld      $13, 40($sp)
390         move    $3, $2
391  # 328          if ((i != BN_BITS2) && (h > (BN_ULONG)1<<i))
392         beq     $2, $4, $81
393         dli     $14, 1
394         dsll    $15, $14, $2
395         bleu    $9, $15, $81
396  # 329                  {
397  # 330  #if !defined(NO_STDIO) && !defined(WIN16)
398  # 331                  fprintf(stderr,"Division would overflow (%d)\n",i);
399  # 332  #endif
400  # 333                  abort();
401         sd      $3, 8($sp)
402         sd      $31, 16($sp)
403         sd      $9, 24($sp)
404         sd      $12, 32($sp)
405         sd      $13, 40($sp)
406         .livereg        0xff0e,0xfff
407         jal     abort
408         dli     $4, 64
409         ld      $3, 8($sp)
410         ld      $31, 16($sp)
411         ld      $9, 24($sp)
412         ld      $12, 32($sp)
413         ld      $13, 40($sp)
414  # 334                  }
415 $81:
416  # 335          i=BN_BITS2-i;
417         dsubu   $3, $4, $3
418  # 336          if (h >= d) h-=d;
419         bltu    $9, $16, $82
420         dsubu   $9, $9, $16
421 $82:
422  # 337  
423  # 338          if (i)
424         beq     $3, 0, $83
425  # 339                  {
426  # 340                  d<<=i;
427         dsll    $16, $16, $3
428  # 341                  h=(h<<i)|(l>>(BN_BITS2-i));
429         dsll    $24, $9, $3
430         dsubu   $25, $4, $3
431         dsrl    $14, $12, $25
432         or      $9, $24, $14
433  # 342                  l<<=i;
434         dsll    $12, $12, $3
435  # 343                  }
436 $83:
437  # 344          dh=(d&BN_MASK2h)>>BN_BITS4;
438  # 345          dl=(d&BN_MASK2l);
439         and     $8, $16,0xFFFFFFFF00000000
440         dsrl    $8, $8, 32
441         # dli   $10,0xFFFFFFFF # Is this needed?
442         # and   $10, $16, $10
443         dsll    $10, $16, 32
444         dsrl    $10, $10, 32
445         dli     $6,0xFFFFFFFF00000000
446 $84:
447  # 346          for (;;)
448  # 347                  {
449  # 348                  if ((h>>BN_BITS4) == dh)
450         dsrl    $15, $9, 32
451         bne     $8, $15, $85
452  # 349                          q=BN_MASK2l;
453         dli     $5, 0xFFFFFFFF
454         b       $86
455 $85:
456  # 350                  else
457  # 351                          q=h/dh;
458         ddivu   $5, $9, $8
459 $86:
460  # 352  
461  # 353                  for (;;)
462  # 354                          {
463  # 355                          t=(h-q*dh);
464         dmul    $4, $5, $8
465         dsubu   $2, $9, $4
466         move    $3, $2
467  # 356                          if ((t&BN_MASK2h) ||
468  # 357                                  ((dl*q) <= (
469  # 358                                          (t<<BN_BITS4)+
470  # 359                                          ((l&BN_MASK2h)>>BN_BITS4))))
471         and     $25, $2, $6
472         bne     $25, $0, $87
473         dmul    $24, $10, $5
474         dsll    $14, $3, 32
475         and     $15, $12, $6
476         dsrl    $25, $15, 32
477         daddu   $15, $14, $25
478         bgtu    $24, $15, $88
479 $87:
480  # 360                                  break;
481         dmul    $3, $10, $5
482         b       $89
483 $88:
484  # 361                          q--;
485         daddu   $5, $5, -1
486  # 362                          }
487         b       $86
488 $89:
489  # 363                  th=q*dh;
490  # 364                  tl=q*dl;
491  # 365                  t=(tl>>BN_BITS4);
492  # 366                  tl=(tl<<BN_BITS4)&BN_MASK2h;
493         dsll    $14, $3, 32
494         and     $2, $14, $6
495         move    $11, $2
496  # 367                  th+=t;
497         dsrl    $25, $3, 32
498         daddu   $7, $4, $25
499  # 368  
500  # 369                  if (l < tl) th++;
501         bgeu    $12, $2, $90
502         daddu   $7, $7, 1
503 $90:
504  # 370                  l-=tl;
505         dsubu   $12, $12, $11
506  # 371                  if (h < th)
507         bgeu    $9, $7, $91
508  # 372                          {
509  # 373                          h+=d;
510         daddu   $9, $9, $16
511  # 374                          q--;
512         daddu   $5, $5, -1
513  # 375                          }
514 $91:
515  # 376                  h-=th;
516         dsubu   $9, $9, $7
517  # 377  
518  # 378                  if (--count == 0) break;
519         addu    $13, $13, -1
520         beq     $13, 0, $92
521  # 379  
522  # 380                  ret=q<<BN_BITS4;
523         dsll    $31, $5, 32
524  # 381                  h=((h<<BN_BITS4)|(l>>BN_BITS4))&BN_MASK2;
525         dsll    $24, $9, 32
526         dsrl    $15, $12, 32
527         or      $9, $24, $15
528  # 382                  l=(l&BN_MASK2l)<<BN_BITS4;
529         and     $12, $12, 0xFFFFFFFF
530         dsll    $12, $12, 32
531  # 383                  }
532         b       $84
533 $92:
534  # 384          ret|=q;
535         or      $31, $31, $5
536  # 385          return(ret);
537         move    $2, $31
538 $93:
539         ld      $16, 48($sp)
540         ld      $31, 56($sp)
541         daddu   $sp, 64
542         j       $31
543         .end    bn_div64
544 #endif