Add md5-sparcv9.pl.
[openssl.git] / crypto / md5 / asm / md5-sparcv9.pl
1 #!/usr/bin/env perl
2
3 # ====================================================================
4 # Written by Andy Polyakov <appro@opensl.org> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
9
10 # MD5 for SPARCv9, 7.5 cycles per byte on UltraSPARC, >40% faster than
11 # code generated by Sun C 5.2.
12
13 $bits=32;
14 for (@ARGV)     { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
15 if ($bits==64)  { $bias=2047; $frame=192; }
16 else            { $bias=0;    $frame=112; }
17
18 $output=shift;
19 open STDOUT,">$output";
20
21 use integer;
22
23 ($ctx,$inp,$len)=("%i0","%i1","%i2");   # input arguments
24
25 # 64-bit values
26 @X=("%o0","%o1","%o2","%o3","%o4","%o5","%o7","%g1","%g2");
27 $tx="%g3";
28 ($AB,$CD)=("%g4","%g5");
29
30 # 32-bit values
31 @V=($A,$B,$C,$D)=map("%l$_",(0..3));
32 ($t1,$t2,$t3,$saved_asi)=map("%l$_",(4..7));
33 ($shr,$shl1,$shl2)=("%i3","%i4","%i5");
34
35 my @K=( 0xd76aa478,0xe8c7b756,0x242070db,0xc1bdceee,
36         0xf57c0faf,0x4787c62a,0xa8304613,0xfd469501,
37         0x698098d8,0x8b44f7af,0xffff5bb1,0x895cd7be,
38         0x6b901122,0xfd987193,0xa679438e,0x49b40821,
39
40         0xf61e2562,0xc040b340,0x265e5a51,0xe9b6c7aa,
41         0xd62f105d,0x02441453,0xd8a1e681,0xe7d3fbc8,
42         0x21e1cde6,0xc33707d6,0xf4d50d87,0x455a14ed,
43         0xa9e3e905,0xfcefa3f8,0x676f02d9,0x8d2a4c8a,
44
45         0xfffa3942,0x8771f681,0x6d9d6122,0xfde5380c,
46         0xa4beea44,0x4bdecfa9,0xf6bb4b60,0xbebfbc70,
47         0x289b7ec6,0xeaa127fa,0xd4ef3085,0x04881d05,
48         0xd9d4d039,0xe6db99e5,0x1fa27cf8,0xc4ac5665,
49
50         0xf4292244,0x432aff97,0xab9423a7,0xfc93a039,
51         0x655b59c3,0x8f0ccc92,0xffeff47d,0x85845dd1,
52         0x6fa87e4f,0xfe2ce6e0,0xa3014314,0x4e0811a1,
53         0xf7537e82,0xbd3af235,0x2ad7d2bb,0xeb86d391, 0  );
54
55 sub R0 {
56   my ($i,$a,$b,$c,$d) = @_;
57   my $rot = (7,12,17,22)[$i%4];
58   my $j   = ($i+1)/2;
59
60   if ($i&1) {
61     $code.=<<___;
62          srlx   @X[$j],$shr,@X[$j]      ! align X[`$i+1`]
63         and     $b,$t1,$t1              ! round $i
64          sllx   @X[$j+1],$shl1,$tx
65         add     $t2,$a,$a
66          sllx   $tx,$shl2,$tx
67         xor     $d,$t1,$t1
68          or     $tx,@X[$j],@X[$j]
69          sethi  %hi(@K[$i+1]),$t2
70         add     $t1,$a,$a
71          or     $t2,%lo(@K[$i+1]),$t2
72         sll     $a,$rot,$t3
73          add    @X[$j],$t2,$t2          ! X[`$i+1`]+K[`$i+1`]
74         srl     $a,32-$rot,$a
75         add     $b,$t3,$t3
76          xor     $b,$c,$t1
77         add     $t3,$a,$a
78 ___
79   } else {
80     $code.=<<___;
81          srlx   @X[$j],32,$tx           ! extract X[`2*$j+1`]
82         and     $b,$t1,$t1              ! round $i
83         add     $t2,$a,$a
84         xor     $d,$t1,$t1
85          sethi  %hi(@K[$i+1]),$t2
86         add     $t1,$a,$a
87          or     $t2,%lo(@K[$i+1]),$t2
88         sll     $a,$rot,$t3
89          add    $tx,$t2,$t2             ! X[`2*$j+1`]+K[`$i+1`]
90         srl     $a,32-$rot,$a
91         add     $b,$t3,$t3
92          xor     $b,$c,$t1
93         add     $t3,$a,$a
94 ___
95   }
96 }
97
98 sub R0_1 {
99   my ($i,$a,$b,$c,$d) = @_;
100   my $rot = (7,12,17,22)[$i%4];
101
102 $code.=<<___;
103          srlx   @X[0],32,$tx            ! extract X[1]
104         and     $b,$t1,$t1              ! round $i
105         add     $t2,$a,$a
106         xor     $d,$t1,$t1
107          sethi  %hi(@K[$i+1]),$t2
108         add     $t1,$a,$a
109          or     $t2,%lo(@K[$i+1]),$t2
110         sll     $a,$rot,$t3
111          add    $tx,$t2,$t2             ! X[1]+K[`$i+1`]
112         srl     $a,32-$rot,$a
113         add     $b,$t3,$t3
114          andn    $b,$c,$t1
115         add     $t3,$a,$a
116 ___
117 }
118
119 sub R1 {
120   my ($i,$a,$b,$c,$d) = @_;
121   my $rot = (5,9,14,20)[$i%4];
122   my $j   = $i<31 ? (1+5*($i+1))%16 : (5+3*($i+1))%16;
123   my $xi  = @X[$j/2];
124
125 $code.=<<___ if ($j&1 && ($xi=$tx));
126          srlx   @X[$j/2],32,$xi         ! extract X[$j]
127 ___
128 $code.=<<___;
129         and     $b,$d,$t3               ! round $i
130         add     $t2,$a,$a
131         or      $t3,$t1,$t1
132          sethi  %hi(@K[$i+1]),$t2
133         add     $t1,$a,$a
134          or     $t2,%lo(@K[$i+1]),$t2
135         sll     $a,$rot,$t3
136          add    $xi,$t2,$t2             ! X[$j]+K[`$i+1`]
137         srl     $a,32-$rot,$a
138         add     $b,$t3,$t3
139          `$i<31?"andn":"xor"`    $b,$c,$t1
140         add     $t3,$a,$a
141 ___
142 }
143
144 sub R2 {
145   my ($i,$a,$b,$c,$d) = @_;
146   my $rot = (4,11,16,23)[$i%4];
147   my $j   = $i<47 ? (5+3*($i+1))%16 : (0+7*($i+1))%16;
148   my $xi  = @X[$j/2];
149
150 $code.=<<___ if ($j&1 && ($xi=$tx));
151          srlx   @X[$j/2],32,$xi         ! extract X[$j]
152 ___
153 $code.=<<___;
154         add     $t2,$a,$a               ! round $i
155         xor     $b,$t1,$t1
156          sethi  %hi(@K[$i+1]),$t2
157         add     $t1,$a,$a
158          or     $t2,%lo(@K[$i+1]),$t2
159         sll     $a,$rot,$t3
160          add    $xi,$t2,$t2             ! X[$j]+K[`$i+1`]
161         srl     $a,32-$rot,$a
162         add     $b,$t3,$t3
163          xor     $b,$c,$t1
164         add     $t3,$a,$a
165 ___
166 }
167
168 sub R3 {
169   my ($i,$a,$b,$c,$d) = @_;
170   my $rot = (6,10,15,21)[$i%4];
171   my $j   = (0+7*($i+1))%16;
172   my $xi  = @X[$j/2];
173
174 $code.=<<___;
175         add     $t2,$a,$a               ! round $i
176 ___
177 $code.=<<___ if ($j&1 && ($xi=$tx));
178          srlx   @X[$j/2],32,$xi         ! extract X[$j]
179 ___
180 $code.=<<___;
181         orn     $b,$d,$t1
182          sethi  %hi(@K[$i+1]),$t2
183         xor     $c,$t1,$t1
184          or     $t2,%lo(@K[$i+1]),$t2
185         add     $t1,$a,$a
186         sll     $a,$rot,$t3
187          add    $xi,$t2,$t2             ! X[$j]+K[`$i+1`]
188         srl     $a,32-$rot,$a
189         add     $b,$t3,$t3
190         add     $t3,$a,$a
191 ___
192 }
193
194 $code.=<<___ if ($bits==64);
195 .register       %g2,#scratch
196 .register       %g3,#scratch
197 ___
198 $code.=<<___;
199 .section        ".text",#alloc,#execinstr
200
201 .globl  md5_block_asm_data_order
202 .align  32
203 md5_block_asm_data_order:
204         save    %sp,-$frame,%sp
205
206         rd      %asi,$saved_asi
207         wr      %g0,0x88,%asi           ! ASI_PRIMARY_LITTLE
208         and     $inp,7,$shr
209         andn    $inp,7,$inp
210
211         sll     $shr,3,$shr             ! *=8
212         mov     56,$shl2
213         ld      [$ctx+0],$A
214         sub     $shl2,$shr,$shl2
215         ld      [$ctx+4],$B
216         and     $shl2,32,$shl1
217         add     $shl2,8,$shl2
218         ld      [$ctx+8],$C
219         sub     $shl2,$shl1,$shl2       ! shr+shl1+shl2==64
220         ld      [$ctx+12],$D
221         nop
222
223 .Loop:
224          cmp    $shr,0                  ! was inp aligned?
225         ldxa    [$inp+0]%asi,@X[0]      ! load little-endian input
226         ldxa    [$inp+8]%asi,@X[1]
227         ldxa    [$inp+16]%asi,@X[2]
228         ldxa    [$inp+24]%asi,@X[3]
229         ldxa    [$inp+32]%asi,@X[4]
230          sllx   $A,32,$AB               ! pack A,B
231         ldxa    [$inp+40]%asi,@X[5]
232          sllx   $C,32,$CD               ! pack C,D
233         ldxa    [$inp+48]%asi,@X[6]
234          or     $B,$AB,$AB
235         ldxa    [$inp+56]%asi,@X[7]
236          or     $D,$CD,$CD
237         bnz,a,pn        %icc,.+8
238         ldxa    [$inp+64]%asi,@X[8]
239
240         srlx    @X[0],$shr,@X[0]        ! align X[0]
241         sllx    @X[1],$shl1,$tx
242          sethi  %hi(@K[0]),$t2
243         sllx    $tx,$shl2,$tx
244          or     $t2,%lo(@K[0]),$t2
245         or      $tx,@X[0],@X[0]
246          xor    $C,$D,$t1
247          add    @X[0],$t2,$t2           ! X[0]+K[0]
248 ___
249         for ($i=0;$i<15;$i++)   { &R0($i,@V);   unshift(@V,pop(@V)); }
250         for (;$i<16;$i++)       { &R0_1($i,@V); unshift(@V,pop(@V)); }
251         for (;$i<32;$i++)       { &R1($i,@V);   unshift(@V,pop(@V)); }
252         for (;$i<48;$i++)       { &R2($i,@V);   unshift(@V,pop(@V)); }
253         for (;$i<64;$i++)       { &R3($i,@V);   unshift(@V,pop(@V)); }
254 $code.=<<___;
255         srlx    $AB,32,$t1              ! unpack A,B,C,D and accumulate
256         add     $inp,64,$inp            ! advance inp
257         srlx    $CD,32,$t2
258         add     $t1,$A,$A
259         subcc   $len,1,$len             ! done yet?
260         add     $AB,$B,$B
261         add     $t2,$C,$C
262         add     $CD,$D,$D
263         srl     $B,0,$B                 ! clruw $B
264         bne     `$bits==64?"%xcc":"%icc"`,.Loop
265         srl     $D,0,$D                 ! clruw $D
266
267         st      $A,[$ctx+0]             ! write out ctx
268         st      $B,[$ctx+4]
269         st      $C,[$ctx+8]
270         st      $D,[$ctx+12]
271
272         wr      %g0,$saved_asi,%asi
273         ret
274         restore
275 .type   md5_block_asm_data_order,#function
276 .size   md5_block_asm_data_order,(.-md5_block_asm_data_order)
277
278 .asciz  "MD5 block transform for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>"
279 .align  4
280 ___
281
282 $code =~ s/\`([^\`]*)\`/eval $1/gem;
283 print $code;
284 close STDOUT;