This patch was "ignited" by OpenBSD 3>=4 support. They've switched to ELF
[openssl.git] / crypto / bn / asm / alpha.works / mul_c4.pl
1 #!/usr/local/bin/perl
2 # alpha assember 
3
4 sub mul_add_c
5         {
6         local($a,$b,$c0,$c1,$c2)=@_;
7         local($l1,$h1,$t1,$t2);
8
9         &mul($a,$b,($l1)=&NR(1));
10         &muh($a,$b,($h1)=&NR(1));
11         &add($c0,$l1,$c0);
12         &cmpult($c0,$l1,($t1)=&NR(1));  &FR($l1);
13         &add($t1,$h1,$h1);              &FR($t1);
14         &add($c1,$h1,$c1);
15         &cmpult($c1,$h1,($t2)=&NR(1));  &FR($h1);
16         &add($c2,$t2,$c2);              &FR($t2);
17         }
18
19 sub bn_mul_comba4
20         {
21         local($name)=@_;
22         local(@a,@b,$r,$c0,$c1,$c2);
23
24         $cnt=1;
25         &init_pool(3);
26
27         $rp=&wparam(0);
28         $ap=&wparam(1);
29         $bp=&wparam(2);
30
31         &function_begin($name,"");
32
33         &comment("");
34
35         &ld(($a[0])=&NR(1),&QWPw(0,$ap));
36         &ld(($b[0])=&NR(1),&QWPw(0,$bp));
37         &ld(($a[1])=&NR(1),&QWPw(1,$ap));
38         &ld(($b[1])=&NR(1),&QWPw(1,$bp));
39         &mul($a[0],$b[0],($r00)=&NR(1));
40         &ld(($a[2])=&NR(1),&QWPw(2,$ap));
41         &ld(($b[2])=&NR(1),&QWPw(2,$bp));
42         &muh($a[0],$b[0],($r01)=&NR(1));
43         &FR($ap); &ld(($a[3])=&NR(1),&QWPw(3,$ap));
44         &FR($bp); &ld(($b[3])=&NR(1),&QWPw(3,$bp));
45         &mul($a[0],$b[1],($r02)=&NR(1));
46
47         ($R,$H1,$H2)=&NR(3);
48
49         &st($r00,&QWPw(0,$rp)); &FR($r00);
50
51         &mov("zero",$R);
52         &mul($a[1],$b[0],($r03)=&NR(1));
53
54         &mov("zero",$H1);
55         &mov("zero",$H0);
56          &add($R,$r01,$R);
57         &muh($a[0],$b[1],($r04)=&NR(1));
58          &cmpult($R,$r01,($t01)=&NR(1));        &FR($r01);
59          &add($R,$r02,$R);
60          &add($H1,$t01,$H1)                     &FR($t01);
61         &muh($a[1],$b[0],($r05)=&NR(1));
62          &cmpult($R,$r02,($t02)=&NR(1));        &FR($r02);
63          &add($R,$r03,$R);
64          &add($H2,$t02,$H2)                     &FR($t02);
65         &mul($a[0],$b[2],($r06)=&NR(1));
66          &cmpult($R,$r03,($t03)=&NR(1));        &FR($r03);
67          &add($H1,$t03,$H1)                     &FR($t03);
68         &st($R,&QWPw(1,$rp));
69         &add($H1,$H2,$R);
70
71         &mov("zero",$H1);
72          &add($R,$r04,$R);
73         &mov("zero",$H2);
74         &mul($a[1],$b[1],($r07)=&NR(1));
75          &cmpult($R,$r04,($t04)=&NR(1));        &FR($r04);
76          &add($R,$r05,$R);
77          &add($H1,$t04,$H1)                     &FR($t04);
78         &mul($a[2],$b[0],($r08)=&NR(1));
79          &cmpult($R,$r05,($t05)=&NR(1));        &FR($r05);
80          &add($R,$r01,$R);
81          &add($H2,$t05,$H2)                     &FR($t05);
82         &muh($a[0],$b[2],($r09)=&NR(1));
83          &cmpult($R,$r06,($t06)=&NR(1));        &FR($r06);
84          &add($R,$r07,$R);
85          &add($H1,$t06,$H1)                     &FR($t06);
86         &muh($a[1],$b[1],($r10)=&NR(1));
87          &cmpult($R,$r07,($t07)=&NR(1));        &FR($r07);
88          &add($R,$r08,$R);
89          &add($H2,$t07,$H2)                     &FR($t07);
90         &muh($a[2],$b[0],($r11)=&NR(1));
91          &cmpult($R,$r08,($t08)=&NR(1));        &FR($r08);
92          &add($H1,$t08,$H1)                     &FR($t08);
93         &st($R,&QWPw(2,$rp));
94         &add($H1,$H2,$R);
95
96         &mov("zero",$H1);
97          &add($R,$r09,$R);
98         &mov("zero",$H2);
99         &mul($a[0],$b[3],($r12)=&NR(1));
100          &cmpult($R,$r09,($t09)=&NR(1));        &FR($r09);
101          &add($R,$r10,$R);
102          &add($H1,$t09,$H1)                     &FR($t09);
103         &mul($a[1],$b[2],($r13)=&NR(1));
104          &cmpult($R,$r10,($t10)=&NR(1));        &FR($r10);
105          &add($R,$r11,$R);
106          &add($H1,$t10,$H1)                     &FR($t10);
107         &mul($a[2],$b[1],($r14)=&NR(1));
108          &cmpult($R,$r11,($t11)=&NR(1));        &FR($r11);
109          &add($R,$r12,$R);
110          &add($H1,$t11,$H1)                     &FR($t11);
111         &mul($a[3],$b[0],($r15)=&NR(1));
112          &cmpult($R,$r12,($t12)=&NR(1));        &FR($r12);
113          &add($R,$r13,$R);
114          &add($H1,$t12,$H1)                     &FR($t12);
115         &muh($a[0],$b[3],($r16)=&NR(1));
116          &cmpult($R,$r13,($t13)=&NR(1));        &FR($r13);
117          &add($R,$r14,$R);
118          &add($H1,$t13,$H1)                     &FR($t13);
119         &muh($a[1],$b[2],($r17)=&NR(1));
120          &cmpult($R,$r14,($t14)=&NR(1));        &FR($r14);
121          &add($R,$r15,$R);
122          &add($H1,$t14,$H1)                     &FR($t14);
123         &muh($a[2],$b[1],($r18)=&NR(1));
124          &cmpult($R,$r15,($t15)=&NR(1));        &FR($r15);
125          &add($H1,$t15,$H1)                     &FR($t15);
126         &st($R,&QWPw(3,$rp));
127         &add($H1,$H2,$R);
128
129         &mov("zero",$H1);
130          &add($R,$r16,$R);
131         &mov("zero",$H2);
132         &muh($a[3],$b[0],($r19)=&NR(1));
133          &cmpult($R,$r16,($t16)=&NR(1));        &FR($r16);
134          &add($R,$r17,$R);
135          &add($H1,$t16,$H1)                     &FR($t16);
136         &mul($a[1],$b[3],($r20)=&NR(1));
137          &cmpult($R,$r17,($t17)=&NR(1));        &FR($r17);
138          &add($R,$r18,$R);
139          &add($H1,$t17,$H1)                     &FR($t17);
140         &mul($a[2],$b[2],($r21)=&NR(1));
141          &cmpult($R,$r18,($t18)=&NR(1));        &FR($r18);
142          &add($R,$r19,$R);
143          &add($H1,$t18,$H1)                     &FR($t18);
144         &mul($a[3],$b[1],($r22)=&NR(1));
145          &cmpult($R,$r19,($t19)=&NR(1));        &FR($r19);
146          &add($R,$r20,$R);
147          &add($H1,$t19,$H1)                     &FR($t19);
148         &muh($a[1],$b[3],($r23)=&NR(1));
149          &cmpult($R,$r20,($t20)=&NR(1));        &FR($r20);
150          &add($R,$r21,$R);
151          &add($H1,$t20,$H1)                     &FR($t20);
152         &muh($a[2],$b[2],($r24)=&NR(1));
153          &cmpult($R,$r21,($t21)=&NR(1));        &FR($r21);
154          &add($R,$r22,$R);
155          &add($H1,$t21,$H1)                     &FR($t21);
156         &muh($a[3],$b[1],($r25)=&NR(1));
157          &cmpult($R,$r22,($t22)=&NR(1));        &FR($r22);
158          &add($H1,$t22,$H1)                     &FR($t22);
159         &st($R,&QWPw(4,$rp));
160         &add($H1,$H2,$R);
161
162         &mov("zero",$H1);
163          &add($R,$r23,$R);
164         &mov("zero",$H2);
165         &mul($a[2],$b[3],($r26)=&NR(1));
166          &cmpult($R,$r23,($t23)=&NR(1));        &FR($r23);
167          &add($R,$r24,$R);
168          &add($H1,$t23,$H1)                     &FR($t23);
169         &mul($a[3],$b[2],($r27)=&NR(1));
170          &cmpult($R,$r24,($t24)=&NR(1));        &FR($r24);
171          &add($R,$r25,$R);
172          &add($H1,$t24,$H1)                     &FR($t24);
173         &muh($a[2],$b[3],($r28)=&NR(1));
174          &cmpult($R,$r25,($t25)=&NR(1));        &FR($r25);
175          &add($R,$r26,$R);
176          &add($H1,$t25,$H1)                     &FR($t25);
177         &muh($a[3],$b[2],($r29)=&NR(1));
178          &cmpult($R,$r26,($t26)=&NR(1));        &FR($r26);
179          &add($R,$r27,$R);
180          &add($H1,$t26,$H1)                     &FR($t26);
181         &mul($a[3],$b[3],($r30)=&NR(1));
182          &cmpult($R,$r27,($t27)=&NR(1));        &FR($r27);
183          &add($H1,$t27,$H1)                     &FR($t27);
184         &st($R,&QWPw(5,$rp));
185         &add($H1,$H2,$R);
186
187         &mov("zero",$H1);
188          &add($R,$r28,$R);
189         &mov("zero",$H2);
190         &muh($a[3],$b[3],($r31)=&NR(1));
191          &cmpult($R,$r28,($t28)=&NR(1));        &FR($r28);
192          &add($R,$r29,$R);
193          &add($H1,$t28,$H1)                     &FR($t28);
194         ############
195          &cmpult($R,$r29,($t29)=&NR(1));        &FR($r29);
196          &add($R,$r30,$R);
197          &add($H1,$t29,$H1)                     &FR($t29);
198         ############
199          &cmpult($R,$r30,($t30)=&NR(1));        &FR($r30);
200          &add($H1,$t30,$H1)                     &FR($t30);
201         &st($R,&QWPw(6,$rp));
202         &add($H1,$H2,$R);
203
204          &add($R,$r31,$R);                      &FR($r31);
205         &st($R,&QWPw(7,$rp));
206
207         &FR($R,$H1,$H2);
208         &function_end($name);
209
210         &fin_pool;
211         }
212
213 1;