79d86dd25cd1d6828c9e9ebf436aaccd1d3d7daa
[openssl.git] / crypto / bn / asm / alpha.works / mul_c4.works.pl
1 #!/usr/local/bin/perl
2 # alpha assember 
3
4 sub mul_add_c
5         {
6         local($a,$b,$c0,$c1,$c2)=@_;
7         local($l1,$h1,$t1,$t2);
8
9 print STDERR "count=$cnt\n"; $cnt++;
10         &mul($a,$b,($l1)=&NR(1));
11         &muh($a,$b,($h1)=&NR(1));
12         &add($c0,$l1,$c0);
13         &cmpult($c0,$l1,($t1)=&NR(1));  &FR($l1);
14         &add($t1,$h1,$h1);              &FR($t1);
15         &add($c1,$h1,$c1);
16         &cmpult($c1,$h1,($t2)=&NR(1));  &FR($h1);
17         &add($c2,$t2,$c2);              &FR($t2);
18         }
19
20 sub bn_mul_comba4
21         {
22         local($name)=@_;
23         local(@a,@b,$r,$c0,$c1,$c2);
24
25         $cnt=1;
26         &init_pool(3);
27
28         $rp=&wparam(0);
29         $ap=&wparam(1);
30         $bp=&wparam(2);
31
32         &function_begin($name,"");
33
34         &comment("");
35
36         &ld(($a[0])=&NR(1),&QWPw(0,$ap));
37         &ld(($b[0])=&NR(1),&QWPw(0,$bp));
38         &ld(($a[1])=&NR(1),&QWPw(1,$ap));
39         &ld(($b[1])=&NR(1),&QWPw(1,$bp));
40         &ld(($a[2])=&NR(1),&QWPw(2,$ap));
41         &ld(($b[2])=&NR(1),&QWPw(2,$bp));
42         &ld(($a[3])=&NR(1),&QWPw(3,$ap));       &FR($ap);
43         &ld(($b[3])=&NR(1),&QWPw(3,$bp));       &FR($bp);
44
45         ($c0,$c1,$c2)=&NR(3);
46         &mov("zero",$c2);
47         &mul($a[0],$b[0],$c0);
48         &muh($a[0],$b[0],$c1);
49         &st($c0,&QWPw(0,$rp));                  &FR($c0); ($c0)=&NR($c0);
50         ($c0,$c1,$c2)=($c1,$c2,$c0);
51         &mov("zero",$c2);
52
53         &mul_add_c($a[0],$b[1],$c0,$c1,$c2);
54         &mul_add_c($a[1],$b[0],$c0,$c1,$c2);
55         &st($c0,&QWPw(1,$rp));                  &FR($c0); ($c0)=&NR($c0);
56         ($c0,$c1,$c2)=($c1,$c2,$c0);
57         &mov("zero",$c2);
58
59         &mul_add_c($a[1],$b[1],$c0,$c1,$c2);
60         &mul_add_c($a[0],$b[2],$c0,$c1,$c2);
61         &mul_add_c($a[2],$b[0],$c0,$c1,$c2);
62         &st($c0,&QWPw(2,$rp));                  &FR($c0); ($c0)=&NR($c0);
63         ($c0,$c1,$c2)=($c1,$c2,$c0);
64         &mov("zero",$c2);
65
66         &mul_add_c($a[0],$b[3],$c0,$c1,$c2);    &FR($a[0]);
67         &mul_add_c($a[1],$b[2],$c0,$c1,$c2);
68         &mul_add_c($a[2],$b[1],$c0,$c1,$c2);
69         &mul_add_c($a[3],$b[0],$c0,$c1,$c2);    &FR($b[0]);
70         &st($c0,&QWPw(3,$rp));                  &FR($c0); ($c0)=&NR($c0);
71         ($c0,$c1,$c2)=($c1,$c2,$c0);
72         &mov("zero",$c2);
73
74         &mul_add_c($a[1],$b[3],$c0,$c1,$c2);    &FR($a[1]);
75         &mul_add_c($a[2],$b[2],$c0,$c1,$c2);
76         &mul_add_c($a[3],$b[1],$c0,$c1,$c2);    &FR($b[1]);
77         &st($c0,&QWPw(4,$rp));                  &FR($c0); ($c0)=&NR($c0);
78         ($c0,$c1,$c2)=($c1,$c2,$c0);
79         &mov("zero",$c2);
80
81         &mul_add_c($a[2],$b[3],$c0,$c1,$c2);    &FR($a[2]);
82         &mul_add_c($a[3],$b[2],$c0,$c1,$c2);    &FR($b[2]);
83         &st($c0,&QWPw(5,$rp));                  &FR($c0); ($c0)=&NR($c0);
84         ($c0,$c1,$c2)=($c1,$c2,$c0);
85         &mov("zero",$c2);
86
87         &mul_add_c($a[3],$b[3],$c0,$c1,$c2);    &FR($a[3],$b[3]);
88         &st($c0,&QWPw(6,$rp));
89         &st($c1,&QWPw(7,$rp));
90
91         &FR($c0,$c1,$c2);
92
93         &function_end($name);
94
95         &fin_pool;
96         }
97
98 1;