525ca7494b731df36cfb269f2a0c8c876d5b97b6
[openssl.git] / crypto / bn / asm / alpha.works / mul_c8.pl
1 #!/usr/local/bin/perl
2 # alpha assember 
3
4 sub bn_mul_comba8
5         {
6         local($name)=@_;
7         local(@a,@b,$r,$c0,$c1,$c2);
8
9         $cnt=1;
10         &init_pool(3);
11
12         $rp=&wparam(0);
13         $ap=&wparam(1);
14         $bp=&wparam(2);
15
16         &function_begin($name,"");
17
18         &comment("");
19
20         &stack_push(2);
21         &ld(($a[0])=&NR(1),&QWPw(0,$ap));
22         &ld(($b[0])=&NR(1),&QWPw(0,$bp));
23         &st($reg_s0,&swtmp(0)); &FR($reg_s0);
24         &st($reg_s1,&swtmp(1)); &FR($reg_s1);
25         &ld(($a[1])=&NR(1),&QWPw(1,$ap));
26         &ld(($b[1])=&NR(1),&QWPw(1,$bp));
27         &ld(($a[2])=&NR(1),&QWPw(2,$ap));
28         &ld(($b[2])=&NR(1),&QWPw(2,$bp));
29         &ld(($a[3])=&NR(1),&QWPw(3,$ap));
30         &ld(($b[3])=&NR(1),&QWPw(3,$bp));
31         &ld(($a[4])=&NR(1),&QWPw(1,$ap));
32         &ld(($b[4])=&NR(1),&QWPw(1,$bp));
33         &ld(($a[5])=&NR(1),&QWPw(1,$ap));
34         &ld(($b[5])=&NR(1),&QWPw(1,$bp));
35         &ld(($a[6])=&NR(1),&QWPw(1,$ap));
36         &ld(($b[6])=&NR(1),&QWPw(1,$bp));
37         &ld(($a[7])=&NR(1),&QWPw(1,$ap));       &FR($ap);
38         &ld(($b[7])=&NR(1),&QWPw(1,$bp));       &FR($bp);
39
40         ($c0,$c1,$c2)=&NR(3);
41         &mov("zero",$c2);
42         &mul($a[0],$b[0],$c0);
43         &muh($a[0],$b[0],$c1);
44         &st($c0,&QWPw(0,$rp));                  &FR($c0); ($c0)=&NR(1);
45         ($c0,$c1,$c2)=($c1,$c2,$c0);
46         &mov("zero",$c2);
47
48         &mul_add_c($a[0],$b[1],$c0,$c1,$c2);
49         &mul_add_c($a[1],$b[0],$c0,$c1,$c2);
50         &st($c0,&QWPw(1,$rp));                  &FR($c0); ($c0)=&NR(1);
51         ($c0,$c1,$c2)=($c1,$c2,$c0);
52         &mov("zero",$c2);
53
54         &mul_add_c($a[0],$b[2],$c0,$c1,$c2);
55         &mul_add_c($a[1],$b[1],$c0,$c1,$c2);
56         &mul_add_c($a[2],$b[0],$c0,$c1,$c2);
57         &st($c0,&QWPw(2,$rp));                  &FR($c0); ($c0)=&NR(1);
58         ($c0,$c1,$c2)=($c1,$c2,$c0);
59         &mov("zero",$c2);
60
61         &mul_add_c($a[0],$b[3],$c0,$c1,$c2);
62         &mul_add_c($a[1],$b[2],$c0,$c1,$c2);
63         &mul_add_c($a[2],$b[1],$c0,$c1,$c2);
64         &mul_add_c($a[3],$b[0],$c0,$c1,$c2);
65         &st($c0,&QWPw(3,$rp));                  &FR($c0); ($c0)=&NR(1);
66         ($c0,$c1,$c2)=($c1,$c2,$c0);
67         &mov("zero",$c2);
68
69         &mul_add_c($a[0],$b[4],$c0,$c1,$c2);
70         &mul_add_c($a[1],$b[3],$c0,$c1,$c2);
71         &mul_add_c($a[2],$b[2],$c0,$c1,$c2);
72         &mul_add_c($a[3],$b[1],$c0,$c1,$c2);
73         &mul_add_c($a[4],$b[0],$c0,$c1,$c2);
74         &st($c0,&QWPw(4,$rp));                  &FR($c0); ($c0)=&NR(1);
75         ($c0,$c1,$c2)=($c1,$c2,$c0);
76         &mov("zero",$c2);
77
78         &mul_add_c($a[0],$b[5],$c0,$c1,$c2);
79         &mul_add_c($a[1],$b[4],$c0,$c1,$c2);
80         &mul_add_c($a[2],$b[3],$c0,$c1,$c2);
81         &mul_add_c($a[3],$b[2],$c0,$c1,$c2);
82         &mul_add_c($a[4],$b[1],$c0,$c1,$c2);
83         &mul_add_c($a[5],$b[0],$c0,$c1,$c2);
84         &st($c0,&QWPw(5,$rp));                  &FR($c0); ($c0)=&NR(1);
85         ($c0,$c1,$c2)=($c1,$c2,$c0);
86         &mov("zero",$c2);
87
88         &mul_add_c($a[0],$b[6],$c0,$c1,$c2);
89         &mul_add_c($a[1],$b[5],$c0,$c1,$c2);
90         &mul_add_c($a[2],$b[4],$c0,$c1,$c2);
91         &mul_add_c($a[3],$b[3],$c0,$c1,$c2);
92         &mul_add_c($a[4],$b[2],$c0,$c1,$c2);
93         &mul_add_c($a[5],$b[1],$c0,$c1,$c2);
94         &mul_add_c($a[6],$b[0],$c0,$c1,$c2);
95         &st($c0,&QWPw(6,$rp));                  &FR($c0); ($c0)=&NR(1);
96         ($c0,$c1,$c2)=($c1,$c2,$c0);
97         &mov("zero",$c2);
98
99         &mul_add_c($a[0],$b[7],$c0,$c1,$c2);    &FR($a[0]);
100         &mul_add_c($a[1],$b[6],$c0,$c1,$c2);
101         &mul_add_c($a[2],$b[5],$c0,$c1,$c2);
102         &mul_add_c($a[3],$b[4],$c0,$c1,$c2);
103         &mul_add_c($a[4],$b[3],$c0,$c1,$c2);
104         &mul_add_c($a[5],$b[2],$c0,$c1,$c2);
105         &mul_add_c($a[6],$b[1],$c0,$c1,$c2);
106         &mul_add_c($a[7],$b[0],$c0,$c1,$c2);    &FR($b[0]);
107         &st($c0,&QWPw(7,$rp));                  &FR($c0); ($c0)=&NR(1);
108         ($c0,$c1,$c2)=($c1,$c2,$c0);
109         &mov("zero",$c2);
110
111         &mul_add_c($a[1],$b[7],$c0,$c1,$c2);    &FR($a[1]);
112         &mul_add_c($a[2],$b[6],$c0,$c1,$c2);
113         &mul_add_c($a[3],$b[5],$c0,$c1,$c2);
114         &mul_add_c($a[4],$b[4],$c0,$c1,$c2);
115         &mul_add_c($a[5],$b[3],$c0,$c1,$c2);
116         &mul_add_c($a[6],$b[2],$c0,$c1,$c2);
117         &mul_add_c($a[7],$b[1],$c0,$c1,$c2);    &FR($b[1]);
118         &st($c0,&QWPw(8,$rp));                  &FR($c0); ($c0)=&NR(1);
119         ($c0,$c1,$c2)=($c1,$c2,$c0);
120         &mov("zero",$c2);
121
122         &mul_add_c($a[2],$b[7],$c0,$c1,$c2);    &FR($a[2]);
123         &mul_add_c($a[3],$b[6],$c0,$c1,$c2);
124         &mul_add_c($a[4],$b[5],$c0,$c1,$c2);
125         &mul_add_c($a[5],$b[4],$c0,$c1,$c2);
126         &mul_add_c($a[6],$b[3],$c0,$c1,$c2);
127         &mul_add_c($a[7],$b[2],$c0,$c1,$c2);    &FR($b[2]);
128         &st($c0,&QWPw(9,$rp));                  &FR($c0); ($c0)=&NR(1);
129         ($c0,$c1,$c2)=($c1,$c2,$c0);
130         &mov("zero",$c2);
131
132         &mul_add_c($a[3],$b[7],$c0,$c1,$c2);    &FR($a[3]);
133         &mul_add_c($a[4],$b[6],$c0,$c1,$c2);
134         &mul_add_c($a[5],$b[5],$c0,$c1,$c2);
135         &mul_add_c($a[6],$b[4],$c0,$c1,$c2);
136         &mul_add_c($a[7],$b[3],$c0,$c1,$c2);    &FR($b[3]);
137         &st($c0,&QWPw(10,$rp));                 &FR($c0); ($c0)=&NR(1);
138         ($c0,$c1,$c2)=($c1,$c2,$c0);
139         &mov("zero",$c2);
140
141         &mul_add_c($a[4],$b[7],$c0,$c1,$c2);    &FR($a[4]);
142         &mul_add_c($a[5],$b[6],$c0,$c1,$c2);
143         &mul_add_c($a[6],$b[5],$c0,$c1,$c2);
144         &mul_add_c($a[7],$b[4],$c0,$c1,$c2);    &FR($b[4]);
145         &st($c0,&QWPw(11,$rp));                 &FR($c0); ($c0)=&NR(1);
146         ($c0,$c1,$c2)=($c1,$c2,$c0);
147         &mov("zero",$c2);
148
149         &mul_add_c($a[5],$b[7],$c0,$c1,$c2);    &FR($a[5]);
150         &mul_add_c($a[6],$b[6],$c0,$c1,$c2);
151         &mul_add_c($a[7],$b[5],$c0,$c1,$c2);    &FR($b[5]);
152         &st($c0,&QWPw(12,$rp));                 &FR($c0); ($c0)=&NR(1);
153         ($c0,$c1,$c2)=($c1,$c2,$c0);
154         &mov("zero",$c2);
155
156         &mul_add_c($a[6],$b[7],$c0,$c1,$c2);    &FR($a[6]);
157         &mul_add_c($a[7],$b[6],$c0,$c1,$c2);    &FR($b[6]);
158         &st($c0,&QWPw(13,$rp));                 &FR($c0); ($c0)=&NR(1);
159         ($c0,$c1,$c2)=($c1,$c2,$c0);
160         &mov("zero",$c2);
161
162         &mul_add_c($a[7],$b[7],$c0,$c1,$c2);    &FR($a[7],$b[7]);
163         &st($c0,&QWPw(14,$rp));
164         &st($c1,&QWPw(15,$rp));
165
166         &FR($c0,$c1,$c2);
167
168         &ld($reg_s0,&swtmp(0));
169         &ld($reg_s1,&swtmp(1));
170         &stack_pop(2);
171
172         &function_end($name);
173
174         &fin_pool;
175         }
176
177 1;