Minor MIPS III/IV tune-up.
[openssl.git] / crypto / bn / asm / alpha.s.works
1
2  # DEC Alpha assember
3  # The bn_div64 is actually gcc output but the other parts are hand done.
4  # Thanks to tzeruch@ceddec.com for sending me the gcc output for
5  # bn_div64.
6  # I've gone back and re-done most of routines.
7  # The key thing to remeber for the 164 CPU is that while a
8  # multiply operation takes 8 cycles, another one can only be issued
9  # after 4 cycles have elapsed.  I've done modification to help
10  # improve this.  Also, normally, a ld instruction will not be available
11  # for about 3 cycles.
12         .file   1 "bn_asm.c"
13         .set noat
14 gcc2_compiled.:
15 __gnu_compiled_c:
16         .text
17         .align 3
18         .globl bn_mul_add_words
19         .ent bn_mul_add_words
20 bn_mul_add_words:
21 bn_mul_add_words..ng:
22         .frame $30,0,$26,0
23         .prologue 0
24         .align 5
25         subq    $18,4,$18
26         bis     $31,$31,$0
27         blt     $18,$43         # if we are -1, -2, -3 or -4 goto tail code
28         ldq     $20,0($17)      # 1 1
29         ldq     $1,0($16)       # 1 1
30         .align 3
31 $42:
32         mulq    $20,$19,$5      # 1 2 1 ######
33         ldq     $21,8($17)      # 2 1
34         ldq     $2,8($16)       # 2 1
35         umulh   $20,$19,$20     # 1 2   ######
36         ldq     $27,16($17)     # 3 1
37         ldq     $3,16($16)      # 3 1
38         mulq    $21,$19,$6      # 2 2 1 ######
39          ldq    $28,24($17)     # 4 1
40         addq    $1,$5,$1        # 1 2 2
41          ldq    $4,24($16)      # 4 1
42         umulh   $21,$19,$21     # 2 2   ######
43          cmpult $1,$5,$22       # 1 2 3 1
44         addq    $20,$22,$20     # 1 3 1
45          addq   $1,$0,$1        # 1 2 3 1
46         mulq    $27,$19,$7      # 3 2 1 ######
47          cmpult $1,$0,$0        # 1 2 3 2
48         addq    $2,$6,$2        # 2 2 2
49          addq   $20,$0,$0       # 1 3 2 
50         cmpult  $2,$6,$23       # 2 2 3 1
51          addq   $21,$23,$21     # 2 3 1
52         umulh   $27,$19,$27     # 3 2   ######
53          addq   $2,$0,$2        # 2 2 3 1
54         cmpult  $2,$0,$0        # 2 2 3 2
55          subq   $18,4,$18
56         mulq    $28,$19,$8      # 4 2 1 ######
57          addq   $21,$0,$0       # 2 3 2 
58         addq    $3,$7,$3        # 3 2 2
59          addq   $16,32,$16
60         cmpult  $3,$7,$24       # 3 2 3 1
61          stq    $1,-32($16)     # 1 2 4
62         umulh   $28,$19,$28     # 4 2   ######
63          addq   $27,$24,$27     # 3 3 1
64         addq    $3,$0,$3        # 3 2 3 1
65          stq    $2,-24($16)     # 2 2 4
66         cmpult  $3,$0,$0        # 3 2 3 2
67          stq    $3,-16($16)     # 3 2 4
68         addq    $4,$8,$4        # 4 2 2
69          addq   $27,$0,$0       # 3 3 2 
70         cmpult  $4,$8,$25       # 4 2 3 1
71          addq   $17,32,$17
72         addq    $28,$25,$28     # 4 3 1
73          addq   $4,$0,$4        # 4 2 3 1
74         cmpult  $4,$0,$0        # 4 2 3 2
75          stq    $4,-8($16)      # 4 2 4
76         addq    $28,$0,$0       # 4 3 2 
77          blt    $18,$43
78
79         ldq     $20,0($17)      # 1 1
80         ldq     $1,0($16)       # 1 1
81
82         br      $42
83
84         .align 4
85 $45:
86         ldq     $20,0($17)      # 4 1
87         ldq     $1,0($16)       # 4 1
88         mulq    $20,$19,$5      # 4 2 1
89         subq    $18,1,$18
90         addq    $16,8,$16
91         addq    $17,8,$17
92         umulh   $20,$19,$20     # 4 2
93         addq    $1,$5,$1        # 4 2 2
94         cmpult  $1,$5,$22       # 4 2 3 1
95         addq    $20,$22,$20     # 4 3 1
96         addq    $1,$0,$1        # 4 2 3 1
97         cmpult  $1,$0,$0        # 4 2 3 2
98         addq    $20,$0,$0       # 4 3 2 
99         stq     $1,-8($16)      # 4 2 4
100         bgt     $18,$45
101         ret     $31,($26),1     # else exit
102
103         .align 4
104 $43:
105         addq    $18,4,$18
106         bgt     $18,$45         # goto tail code
107         ret     $31,($26),1     # else exit
108
109         .end bn_mul_add_words
110         .align 3
111         .globl bn_mul_words
112         .ent bn_mul_words
113 bn_mul_words:
114 bn_mul_words..ng:
115         .frame $30,0,$26,0
116         .prologue 0
117         .align 5
118         subq    $18,4,$18
119         bis     $31,$31,$0
120         blt     $18,$143        # if we are -1, -2, -3 or -4 goto tail code
121         ldq     $20,0($17)      # 1 1
122         .align 3
123 $142:
124
125         mulq    $20,$19,$5      # 1 2 1 #####
126          ldq    $21,8($17)      # 2 1
127          ldq    $27,16($17)     # 3 1
128         umulh   $20,$19,$20     # 1 2   #####
129          ldq    $28,24($17)     # 4 1
130         mulq    $21,$19,$6      # 2 2 1 #####
131          addq   $5,$0,$5        # 1 2 3 1
132         subq    $18,4,$18
133          cmpult $5,$0,$0        # 1 2 3 2
134         umulh   $21,$19,$21     # 2 2   #####
135          addq   $20,$0,$0       # 1 3 2 
136         addq    $17,32,$17
137          addq   $6,$0,$6        # 2 2 3 1
138         mulq    $27,$19,$7      # 3 2 1 #####
139          cmpult $6,$0,$0        # 2 2 3 2
140         addq    $21,$0,$0       # 2 3 2 
141          addq   $16,32,$16
142         umulh   $27,$19,$27     # 3 2   #####
143          stq    $5,-32($16)     # 1 2 4
144         mulq    $28,$19,$8      # 4 2 1 #####
145          addq   $7,$0,$7        # 3 2 3 1
146         stq     $6,-24($16)     # 2 2 4
147          cmpult $7,$0,$0        # 3 2 3 2
148         umulh   $28,$19,$28     # 4 2   #####
149          addq   $27,$0,$0       # 3 3 2 
150         stq     $7,-16($16)     # 3 2 4
151          addq   $8,$0,$8        # 4 2 3 1
152         cmpult  $8,$0,$0        # 4 2 3 2
153
154         addq    $28,$0,$0       # 4 3 2 
155
156         stq     $8,-8($16)      # 4 2 4
157
158         blt     $18,$143
159
160         ldq     $20,0($17)      # 1 1
161
162         br      $142
163
164         .align 4
165 $145:
166         ldq     $20,0($17)      # 4 1
167         mulq    $20,$19,$5      # 4 2 1
168         subq    $18,1,$18
169         umulh   $20,$19,$20     # 4 2
170         addq    $5,$0,$5        # 4 2 3 1
171          addq   $16,8,$16
172         cmpult  $5,$0,$0        # 4 2 3 2
173          addq   $17,8,$17
174         addq    $20,$0,$0       # 4 3 2 
175         stq     $5,-8($16)      # 4 2 4
176
177         bgt     $18,$145
178         ret     $31,($26),1     # else exit
179
180         .align 4
181 $143:
182         addq    $18,4,$18
183         bgt     $18,$145        # goto tail code
184         ret     $31,($26),1     # else exit
185
186         .end bn_mul_words
187         .align 3
188         .globl bn_sqr_words
189         .ent bn_sqr_words
190 bn_sqr_words:
191 bn_sqr_words..ng:
192         .frame $30,0,$26,0
193         .prologue 0
194
195         subq    $18,4,$18
196         blt     $18,$543        # if we are -1, -2, -3 or -4 goto tail code
197         ldq     $20,0($17)      # 1 1
198         .align 3
199 $542:
200         mulq    $20,$20,$5              ######
201          ldq    $21,8($17)      # 1 1
202         subq    $18,4
203         umulh   $20,$20,$1              ######
204         ldq     $27,16($17)     # 1 1
205         mulq    $21,$21,$6              ######
206         ldq     $28,24($17)     # 1 1
207         stq     $5,0($16)       # r[0]
208         umulh   $21,$21,$2              ######
209         stq     $1,8($16)       # r[1]
210         mulq    $27,$27,$7              ######
211         stq     $6,16($16)      # r[0]
212         umulh   $27,$27,$3              ######
213         stq     $2,24($16)      # r[1]
214         mulq    $28,$28,$8              ######
215         stq     $7,32($16)      # r[0]
216         umulh   $28,$28,$4              ######
217         stq     $3,40($16)      # r[1]
218
219         addq    $16,64,$16
220         addq    $17,32,$17
221         stq     $8,-16($16)     # r[0]
222         stq     $4,-8($16)      # r[1]
223
224         blt     $18,$543
225         ldq     $20,0($17)      # 1 1
226         br      $542
227
228 $442:
229         ldq     $20,0($17)   # a[0]
230         mulq    $20,$20,$5  # a[0]*w low part       r2
231         addq    $16,16,$16
232         addq    $17,8,$17
233         subq    $18,1,$18
234         umulh   $20,$20,$1  # a[0]*w high part       r3
235         stq     $5,-16($16)   # r[0]
236         stq     $1,-8($16)   # r[1]
237
238         bgt     $18,$442
239         ret     $31,($26),1     # else exit
240
241         .align 4
242 $543:
243         addq    $18,4,$18
244         bgt     $18,$442        # goto tail code
245         ret     $31,($26),1     # else exit
246         .end bn_sqr_words
247
248         .align 3
249         .globl bn_add_words
250         .ent bn_add_words
251 bn_add_words:
252 bn_add_words..ng:
253         .frame $30,0,$26,0
254         .prologue 0
255
256         subq    $19,4,$19
257         bis     $31,$31,$0      # carry = 0
258         blt     $19,$900
259         ldq     $5,0($17)       # a[0]
260         ldq     $1,0($18)       # b[1]
261         .align 3
262 $901:
263         addq    $1,$5,$1        # r=a+b;
264          ldq    $6,8($17)       # a[1]
265         cmpult  $1,$5,$22       # did we overflow?
266          ldq    $2,8($18)       # b[1]
267         addq    $1,$0,$1        # c+= overflow
268          ldq    $7,16($17)      # a[2]
269         cmpult  $1,$0,$0        # overflow?
270          ldq    $3,16($18)      # b[2]
271         addq    $0,$22,$0
272          ldq    $8,24($17)      # a[3]
273         addq    $2,$6,$2        # r=a+b;
274          ldq    $4,24($18)      # b[3]
275         cmpult  $2,$6,$23       # did we overflow?
276          addq   $3,$7,$3        # r=a+b;
277         addq    $2,$0,$2        # c+= overflow
278          cmpult $3,$7,$24       # did we overflow?
279         cmpult  $2,$0,$0        # overflow?
280          addq   $4,$8,$4        # r=a+b;
281         addq    $0,$23,$0
282          cmpult $4,$8,$25       # did we overflow?
283         addq    $3,$0,$3        # c+= overflow
284          stq    $1,0($16)       # r[0]=c
285         cmpult  $3,$0,$0        # overflow?
286          stq    $2,8($16)       # r[1]=c
287         addq    $0,$24,$0
288          stq    $3,16($16)      # r[2]=c
289         addq    $4,$0,$4        # c+= overflow
290          subq   $19,4,$19       # loop--
291         cmpult  $4,$0,$0        # overflow?
292          addq   $17,32,$17      # a++
293         addq    $0,$25,$0
294          stq    $4,24($16)      # r[3]=c
295         addq    $18,32,$18      # b++
296          addq   $16,32,$16      # r++
297
298         blt     $19,$900
299          ldq    $5,0($17)       # a[0]
300         ldq     $1,0($18)       # b[1]
301          br     $901
302         .align 4
303 $945:
304         ldq     $5,0($17)       # a[0]
305          ldq    $1,0($18)       # b[1]
306         addq    $1,$5,$1        # r=a+b;
307          subq   $19,1,$19       # loop--
308         addq    $1,$0,$1        # c+= overflow
309          addq   $17,8,$17       # a++
310         cmpult  $1,$5,$22       # did we overflow?
311          cmpult $1,$0,$0        # overflow?
312         addq    $18,8,$18       # b++
313          stq    $1,0($16)       # r[0]=c
314         addq    $0,$22,$0
315          addq   $16,8,$16       # r++
316
317         bgt     $19,$945
318         ret     $31,($26),1     # else exit
319
320 $900:
321         addq    $19,4,$19
322         bgt     $19,$945        # goto tail code
323         ret     $31,($26),1     # else exit
324         .end bn_add_words
325
326  #
327  # What follows was taken directly from the C compiler with a few
328  # hacks to redo the lables.
329  #
330 .text
331         .align 3
332         .globl bn_div64
333         .ent bn_div64
334 bn_div64:
335         ldgp $29,0($27)
336 bn_div64..ng:
337         lda $30,-48($30)
338         .frame $30,48,$26,0
339         stq $26,0($30)
340         stq $9,8($30)
341         stq $10,16($30)
342         stq $11,24($30)
343         stq $12,32($30)
344         stq $13,40($30)
345         .mask 0x4003e00,-48
346         .prologue 1
347         bis $16,$16,$9
348         bis $17,$17,$10
349         bis $18,$18,$11
350         bis $31,$31,$13
351         bis $31,2,$12
352         bne $11,$119
353         lda $0,-1
354         br $31,$136
355         .align 4
356 $119:
357         bis $11,$11,$16
358         jsr $26,BN_num_bits_word
359         ldgp $29,0($26)
360         subq $0,64,$1
361         beq $1,$120
362         bis $31,1,$1
363         sll $1,$0,$1
364         cmpule $9,$1,$1
365         bne $1,$120
366  #      lda $16,_IO_stderr_
367  #      lda $17,$C32
368  #      bis $0,$0,$18
369  #      jsr $26,fprintf
370  #      ldgp $29,0($26)
371         jsr $26,abort
372         ldgp $29,0($26)
373         .align 4
374 $120:
375         bis $31,64,$3
376         cmpult $9,$11,$2
377         subq $3,$0,$1
378         addl $1,$31,$0
379         subq $9,$11,$1
380         cmoveq $2,$1,$9
381         beq $0,$122
382         zapnot $0,15,$2
383         subq $3,$0,$1
384         sll $11,$2,$11
385         sll $9,$2,$3
386         srl $10,$1,$1
387         sll $10,$2,$10
388         bis $3,$1,$9
389 $122:
390         srl $11,32,$5
391         zapnot $11,15,$6
392         lda $7,-1
393         .align 5
394 $123:
395         srl $9,32,$1
396         subq $1,$5,$1
397         bne $1,$126
398         zapnot $7,15,$27
399         br $31,$127
400         .align 4
401 $126:
402         bis $9,$9,$24
403         bis $5,$5,$25
404         divqu $24,$25,$27
405 $127:
406         srl $10,32,$4
407         .align 5
408 $128:
409         mulq $27,$5,$1
410         subq $9,$1,$3
411         zapnot $3,240,$1
412         bne $1,$129
413         mulq $6,$27,$2
414         sll $3,32,$1
415         addq $1,$4,$1
416         cmpule $2,$1,$2
417         bne $2,$129
418         subq $27,1,$27
419         br $31,$128
420         .align 4
421 $129:
422         mulq $27,$6,$1
423         mulq $27,$5,$4
424         srl $1,32,$3
425         sll $1,32,$1
426         addq $4,$3,$4
427         cmpult $10,$1,$2
428         subq $10,$1,$10
429         addq $2,$4,$2
430         cmpult $9,$2,$1
431         bis $2,$2,$4
432         beq $1,$134
433         addq $9,$11,$9
434         subq $27,1,$27
435 $134:
436         subl $12,1,$12
437         subq $9,$4,$9
438         beq $12,$124
439         sll $27,32,$13
440         sll $9,32,$2
441         srl $10,32,$1
442         sll $10,32,$10
443         bis $2,$1,$9
444         br $31,$123
445         .align 4
446 $124:
447         bis $13,$27,$0
448 $136:
449         ldq $26,0($30)
450         ldq $9,8($30)
451         ldq $10,16($30)
452         ldq $11,24($30)
453         ldq $12,32($30)
454         ldq $13,40($30)
455         addq $30,48,$30
456         ret $31,($26),1
457         .end bn_div64
458
459         .set noat
460         .text
461         .align 3
462         .globl bn_sub_words
463         .ent bn_sub_words
464 bn_sub_words:
465 bn_sub_words..ng:
466         .frame $30,0,$26,0
467         .prologue 0
468
469         subq    $19,    4,      $19
470         bis     $31,    $31,    $0
471         blt     $19,    $100
472         ldq     $1,     0($17)
473         ldq     $2,     0($18)
474 $101:
475         ldq     $3,     8($17)
476         cmpult  $1,     $2,     $4
477         ldq     $5,     8($18)
478         subq    $1,     $2,     $1
479         ldq     $6,     16($17)
480         cmpult  $1,     $0,     $2
481         ldq     $7,     16($18)
482         subq    $1,     $0,     $23
483         ldq     $8,     24($17)
484         addq    $2,     $4,     $0
485         cmpult  $3,     $5,     $24
486         subq    $3,     $5,     $3
487         ldq     $22,    24($18)
488         cmpult  $3,     $0,     $5
489         subq    $3,     $0,     $25
490         addq    $5,     $24,    $0
491         cmpult  $6,     $7,     $27
492         subq    $6,     $7,     $6
493         stq     $23,    0($16)
494         cmpult  $6,     $0,     $7
495         subq    $6,     $0,     $28
496         addq    $7,     $27,    $0
497         cmpult  $8,     $22,    $21
498         subq    $8,     $22,    $8
499         stq     $25,    8($16)
500         cmpult  $8,     $0,     $22
501         subq    $8,     $0,     $20
502         addq    $22,    $21,    $0
503         stq     $28,    16($16)
504         subq    $19,    4,      $19
505         stq     $20,    24($16)
506         addq    $17,    32,     $17
507         addq    $18,    32,     $18
508         addq    $16,    32,     $16
509         blt     $19,    $100
510         ldq     $1,     0($17)
511         ldq     $2,     0($18)
512         br      $101
513 $102:
514         ldq     $1,     0($17)
515         ldq     $2,     0($18)
516         cmpult  $1,     $2,     $27
517         subq    $1,     $2,     $1
518         cmpult  $1,     $0,     $2
519         subq    $1,     $0,     $1
520         stq     $1,     0($16)
521         addq    $2,     $27,    $0
522         addq    $17,    8,      $17
523         addq    $18,    8,      $18
524         addq    $16,    8,      $16
525         subq    $19,    1,      $19
526         bgt     $19,    $102
527         ret     $31,($26),1
528 $100:
529         addq    $19,    4,      $19
530         bgt     $19,    $102
531 $103:
532         ret     $31,($26),1
533         .end bn_sub_words