- performance retunes, v8plus bn_*_comba routines are reimplemented;
[openssl.git] / crypto / md5 / asm / md5-sparcv9.S
1 .ident  "md5-sparcv9.S, Version 1.0"
2 .ident  "SPARC V9 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
3 .file   "md5-sparcv9.S"
4
5 /*
6  * ====================================================================
7  * Copyright (c) 1999 Andy Polyakov <appro@fy.chalmers.se>.
8  *
9  * Rights for redistribution and usage in source and binary forms are
10  * granted as long as above copyright notices are retained. Warranty
11  * of any kind is (of course:-) disclaimed.
12  * ====================================================================
13  */
14
15 /*
16  * This is my modest contribution to OpenSSL project (see
17  * http://www.openssl.org/ for more information about it) and is an
18  * assembler implementation of MD5 block hash function. I've hand-coded
19  * this for the sole reason to reach UltraSPARC-specific "load in
20  * little-endian byte order" instruction. This gives up to 15%
21  * performance improvement for cases when input message is aligned at
22  * 32 bits boundary. The module was tested under both 32 *and* 64 bit
23  * kernels. For updates see http://fy.chalmers.se/~appro/hpe/.
24  *
25  * To compile with SC4.x/SC5.x:
26  *
27  *      cc -xarch=v[9|8plus] -DULTRASPARC -DMD5_BLOCK_DATA_ORDER \
28  *              -c md5-sparcv9.S
29  *
30  * and with gcc:
31  *
32  *      gcc -mcpu=ultrasparc -DULTRASPARC -DMD5_BLOCK_DATA_ORDER \
33  *              -c md5-sparcv9.S
34  *
35  * or if above fails (it does if you have gas):
36  *
37  *      gcc -E -DULTRASPARC -DMD5_BLOCK_DATA_ORDER md5_block.sparc.S | \
38  *              as -xarch=v8plus /dev/fd/0 -o md5-sparcv9.o
39  */
40
41 #define A       %o0
42 #define B       %o1
43 #define C       %o2
44 #define D       %o3
45 #define T1      %o4
46 #define T2      %o5
47
48 #define R0      %l0
49 #define R1      %l1
50 #define R2      %l2
51 #define R3      %l3
52 #define R4      %l4
53 #define R5      %l5
54 #define R6      %l6
55 #define R7      %l7
56 #define R8      %i3
57 #define R9      %i4
58 #define R10     %i5
59 #define R11     %g1
60 #define R12     %g2
61 #define R13     %g3
62 #define RX      %g4
63
64 #define Aptr    %i0+0
65 #define Bptr    %i0+4
66 #define Cptr    %i0+8
67 #define Dptr    %i0+12
68
69 #define Aval    R5      /* those not used at the end of the last round */
70 #define Bval    R6
71 #define Cval    R7
72 #define Dval    R8
73
74 #if defined(MD5_BLOCK_DATA_ORDER)
75 # if defined(ULTRASPARC)
76 #  define       LOAD                    lda
77 #  define       X(i)                    [%i1+i*4]%asi
78 #  define       md5_block               md5_block_asm_data_order_aligned
79 #  define       ASI_PRIMARY_LITTLE      0x88
80 # else
81 #  error "MD5_BLOCK_DATA_ORDER is supported only on UltraSPARC!"
82 # endif
83 #else
84 # define        LOAD                    ld
85 # define        X(i)                    [%i1+i*4]
86 # define        md5_block               md5_block_asm_host_order
87 #endif
88
89 .section        ".text",#alloc,#execinstr
90 #if defined(__SUNPRO_C) && defined(__sparcv9)
91   /* They've said -xarch=v9 at command line */
92   .register     %g2,#scratch
93   .register     %g3,#scratch
94 # define        FRAME   -192
95 #else
96 # define        FRAME   -96
97 #endif
98
99 .align  32
100
101 .global md5_block
102 md5_block:
103         save    %sp,FRAME,%sp
104
105         ld      [Dptr],D
106 #ifdef ASI_PRIMARY_LITTLE
107         mov     %asi,%o7        ! How dare I? Well, I just do:-)
108 #else
109         nop
110 #endif
111         ld      [Cptr],C
112 #ifdef ASI_PRIMARY_LITTLE
113         mov     ASI_PRIMARY_LITTLE,%asi
114 #else
115         nop
116 #endif
117         ld      [Bptr],B
118         nop
119         ld      [Aptr],A
120         nop
121         LOAD    X(0),R0
122         nop
123         ba      .Lmd5_block_loop
124         nop
125
126 .align  32
127 .Lmd5_block_loop:
128
129 !!!!!!!!Round 0
130
131         xor     C,D,T1
132         sethi   %hi(0xd76aa478),T2
133         and     T1,B,T1
134         or      T2,%lo(0xd76aa478),T2   !=
135         xor     T1,D,T1
136         add     T1,R0,T1
137         LOAD    X(1),R1
138         add     T1,T2,T1                !=
139         add     A,T1,A
140         sll     A,7,T2
141         srl     A,32-7,A
142         or      A,T2,A                  !=
143          xor     B,C,T1
144         add     A,B,A
145
146         sethi   %hi(0xe8c7b756),T2
147         and     T1,A,T1                 !=
148         or      T2,%lo(0xe8c7b756),T2
149         xor     T1,C,T1
150         LOAD    X(2),R2
151         add     T1,R1,T1                !=
152         add     T1,T2,T1
153         add     D,T1,D
154         sll     D,12,T2
155         srl     D,32-12,D               !=
156         or      D,T2,D
157          xor     A,B,T1
158         add     D,A,D
159
160         sethi   %hi(0x242070db),T2      !=
161         and     T1,D,T1
162         or      T2,%lo(0x242070db),T2
163         xor     T1,B,T1
164         add     T1,R2,T1                !=
165         LOAD    X(3),R3
166         add     T1,T2,T1
167         add     C,T1,C
168         sll     C,17,T2                 !=
169         srl     C,32-17,C
170         or      C,T2,C
171          xor     D,A,T1
172         add     C,D,C                   !=
173
174         sethi   %hi(0xc1bdceee),T2
175         and     T1,C,T1
176         or      T2,%lo(0xc1bdceee),T2
177         xor     T1,A,T1                 !=
178         add     T1,R3,T1
179         LOAD    X(4),R4
180         add     T1,T2,T1
181         add     B,T1,B                  !=
182         sll     B,22,T2
183         srl     B,32-22,B
184         or      B,T2,B
185          xor     C,D,T1                 !=
186         add     B,C,B
187
188         sethi   %hi(0xf57c0faf),T2
189         and     T1,B,T1
190         or      T2,%lo(0xf57c0faf),T2   !=
191         xor     T1,D,T1
192         add     T1,R4,T1
193         LOAD    X(5),R5
194         add     T1,T2,T1                !=
195         add     A,T1,A
196         sll     A,7,T2
197         srl     A,32-7,A
198         or      A,T2,A                  !=
199          xor     B,C,T1
200         add     A,B,A
201
202         sethi   %hi(0x4787c62a),T2
203         and     T1,A,T1                 !=
204         or      T2,%lo(0x4787c62a),T2
205         xor     T1,C,T1
206         LOAD    X(6),R6
207         add     T1,R5,T1                !=
208         add     T1,T2,T1
209         add     D,T1,D
210         sll     D,12,T2
211         srl     D,32-12,D               !=
212         or      D,T2,D
213          xor     A,B,T1
214         add     D,A,D
215
216         sethi   %hi(0xa8304613),T2      !=
217         and     T1,D,T1
218         or      T2,%lo(0xa8304613),T2
219         xor     T1,B,T1
220         add     T1,R6,T1                !=
221         LOAD    X(7),R7
222         add     T1,T2,T1
223         add     C,T1,C
224         sll     C,17,T2                 !=
225         srl     C,32-17,C
226         or      C,T2,C
227          xor     D,A,T1
228         add     C,D,C                   !=
229
230         sethi   %hi(0xfd469501),T2
231         and     T1,C,T1
232         or      T2,%lo(0xfd469501),T2
233         xor     T1,A,T1                 !=
234         add     T1,R7,T1
235         LOAD    X(8),R8
236         add     T1,T2,T1
237         add     B,T1,B                  !=
238         sll     B,22,T2
239         srl     B,32-22,B
240         or      B,T2,B
241          xor     C,D,T1                 !=
242         add     B,C,B
243
244         sethi   %hi(0x698098d8),T2
245         and     T1,B,T1
246         or      T2,%lo(0x698098d8),T2   !=
247         xor     T1,D,T1
248         add     T1,R8,T1
249         LOAD    X(9),R9
250         add     T1,T2,T1                !=
251         add     A,T1,A
252         sll     A,7,T2
253         srl     A,32-7,A
254         or      A,T2,A                  !=
255          xor     B,C,T1
256         add     A,B,A
257
258         sethi   %hi(0x8b44f7af),T2
259         and     T1,A,T1                 !=
260         or      T2,%lo(0x8b44f7af),T2
261         xor     T1,C,T1
262         LOAD    X(10),R10
263         add     T1,R9,T1                !=
264         add     T1,T2,T1
265         add     D,T1,D
266         sll     D,12,T2
267         srl     D,32-12,D               !=
268         or      D,T2,D
269          xor     A,B,T1
270         add     D,A,D
271
272         sethi   %hi(0xffff5bb1),T2      !=
273         and     T1,D,T1
274         or      T2,%lo(0xffff5bb1),T2
275         xor     T1,B,T1
276         add     T1,R10,T1               !=
277         LOAD    X(11),R11
278         add     T1,T2,T1
279         add     C,T1,C
280         sll     C,17,T2                 !=
281         srl     C,32-17,C
282         or      C,T2,C
283          xor     D,A,T1
284         add     C,D,C                   !=
285
286         sethi   %hi(0x895cd7be),T2
287         and     T1,C,T1
288         or      T2,%lo(0x895cd7be),T2
289         xor     T1,A,T1                 !=
290         add     T1,R11,T1
291         LOAD    X(12),R12
292         add     T1,T2,T1
293         add     B,T1,B                  !=
294         sll     B,22,T2
295         srl     B,32-22,B
296         or      B,T2,B
297          xor     C,D,T1                 !=
298         add     B,C,B
299
300         sethi   %hi(0x6b901122),T2
301         and     T1,B,T1
302         or      T2,%lo(0x6b901122),T2   !=
303         xor     T1,D,T1
304         add     T1,R12,T1
305         LOAD    X(13),R13
306         add     T1,T2,T1                !=
307         add     A,T1,A
308         sll     A,7,T2
309         srl     A,32-7,A
310         or      A,T2,A                  !=
311          xor     B,C,T1
312         add     A,B,A
313
314         sethi   %hi(0xfd987193),T2
315         and     T1,A,T1                 !=
316         or      T2,%lo(0xfd987193),T2
317         xor     T1,C,T1
318         LOAD    X(14),RX
319         add     T1,R13,T1               !=
320         add     T1,T2,T1
321         add     D,T1,D
322         sll     D,12,T2
323         srl     D,32-12,D               !=
324         or      D,T2,D
325          xor     A,B,T1
326         add     D,A,D
327
328         sethi   %hi(0xa679438e),T2      !=
329         and     T1,D,T1
330         or      T2,%lo(0xa679438e),T2
331         xor     T1,B,T1
332         add     T1,RX,T1                !=
333         LOAD    X(15),RX
334         add     T1,T2,T1
335         add     C,T1,C
336         sll     C,17,T2                 !=
337         srl     C,32-17,C
338         or      C,T2,C
339          xor     D,A,T1
340         add     C,D,C                   !=
341
342         sethi   %hi(0x49b40821),T2
343         and     T1,C,T1
344         or      T2,%lo(0x49b40821),T2
345         xor     T1,A,T1                 !=
346         add     T1,RX,T1
347         !pre-LOADed     X(1),R1
348         add     T1,T2,T1
349         add     B,T1,B
350         sll     B,22,T2                 !=
351         srl     B,32-22,B
352         or      B,T2,B
353         add     B,C,B
354
355 !!!!!!!!Round 1
356
357         xor     B,C,T1                  !=
358         sethi   %hi(0xf61e2562),T2
359         and     T1,D,T1
360         or      T2,%lo(0xf61e2562),T2
361         xor     T1,C,T1                 !=
362         add     T1,R1,T1
363         !pre-LOADed     X(6),R6
364         add     T1,T2,T1
365         add     A,T1,A
366         sll     A,5,T2                  !=
367         srl     A,32-5,A
368         or      A,T2,A
369         add     A,B,A
370
371         xor     A,B,T1                  !=
372         sethi   %hi(0xc040b340),T2
373         and     T1,C,T1
374         or      T2,%lo(0xc040b340),T2
375         xor     T1,B,T1                 !=
376         add     T1,R6,T1
377         !pre-LOADed     X(11),R11
378         add     T1,T2,T1
379         add     D,T1,D
380         sll     D,9,T2                  !=
381         srl     D,32-9,D
382         or      D,T2,D
383         add     D,A,D
384
385         xor     D,A,T1                  !=
386         sethi   %hi(0x265e5a51),T2
387         and     T1,B,T1
388         or      T2,%lo(0x265e5a51),T2
389         xor     T1,A,T1                 !=
390         add     T1,R11,T1
391         !pre-LOADed     X(0),R0
392         add     T1,T2,T1
393         add     C,T1,C
394         sll     C,14,T2                 !=
395         srl     C,32-14,C
396         or      C,T2,C
397         add     C,D,C
398
399         xor     C,D,T1                  !=
400         sethi   %hi(0xe9b6c7aa),T2
401         and     T1,A,T1
402         or      T2,%lo(0xe9b6c7aa),T2
403         xor     T1,D,T1                 !=
404         add     T1,R0,T1
405         !pre-LOADed     X(5),R5
406         add     T1,T2,T1
407         add     B,T1,B
408         sll     B,20,T2                 !=
409         srl     B,32-20,B
410         or      B,T2,B
411         add     B,C,B
412
413         xor     B,C,T1                  !=
414         sethi   %hi(0xd62f105d),T2
415         and     T1,D,T1
416         or      T2,%lo(0xd62f105d),T2
417         xor     T1,C,T1                 !=
418         add     T1,R5,T1
419         !pre-LOADed     X(10),R10
420         add     T1,T2,T1
421         add     A,T1,A
422         sll     A,5,T2                  !=
423         srl     A,32-5,A
424         or      A,T2,A
425         add     A,B,A
426
427         xor     A,B,T1                  !=
428         sethi   %hi(0x02441453),T2
429         and     T1,C,T1
430         or      T2,%lo(0x02441453),T2
431         xor     T1,B,T1                 !=
432         add     T1,R10,T1
433         LOAD    X(15),RX
434         add     T1,T2,T1
435         add     D,T1,D                  !=
436         sll     D,9,T2
437         srl     D,32-9,D
438         or      D,T2,D
439         add     D,A,D                   !=
440
441         xor     D,A,T1
442         sethi   %hi(0xd8a1e681),T2
443         and     T1,B,T1
444         or      T2,%lo(0xd8a1e681),T2   !=
445         xor     T1,A,T1
446         add     T1,RX,T1
447         !pre-LOADed     X(4),R4
448         add     T1,T2,T1
449         add     C,T1,C                  !=
450         sll     C,14,T2
451         srl     C,32-14,C
452         or      C,T2,C
453         add     C,D,C                   !=
454
455         xor     C,D,T1
456         sethi   %hi(0xe7d3fbc8),T2
457         and     T1,A,T1
458         or      T2,%lo(0xe7d3fbc8),T2   !=
459         xor     T1,D,T1
460         add     T1,R4,T1
461         !pre-LOADed     X(9),R9
462         add     T1,T2,T1
463         add     B,T1,B                  !=
464         sll     B,20,T2
465         srl     B,32-20,B
466         or      B,T2,B
467         add     B,C,B                   !=
468
469         xor     B,C,T1
470         sethi   %hi(0x21e1cde6),T2
471         and     T1,D,T1
472         or      T2,%lo(0x21e1cde6),T2   !=
473         xor     T1,C,T1
474         add     T1,R9,T1
475         LOAD    X(14),RX
476         add     T1,T2,T1                !=
477         add     A,T1,A
478         sll     A,5,T2
479         srl     A,32-5,A
480         or      A,T2,A                  !=
481         add     A,B,A
482
483         xor     A,B,T1
484         sethi   %hi(0xc33707d6),T2
485         and     T1,C,T1                 !=
486         or      T2,%lo(0xc33707d6),T2
487         xor     T1,B,T1
488         add     T1,RX,T1
489         !pre-LOADed     X(3),R3
490         add     T1,T2,T1                !=
491         add     D,T1,D
492         sll     D,9,T2
493         srl     D,32-9,D
494         or      D,T2,D                  !=
495         add     D,A,D
496
497         xor     D,A,T1
498         sethi   %hi(0xf4d50d87),T2
499         and     T1,B,T1                 !=
500         or      T2,%lo(0xf4d50d87),T2
501         xor     T1,A,T1
502         add     T1,R3,T1
503         !pre-LOADed     X(8),R8
504         add     T1,T2,T1                !=
505         add     C,T1,C
506         sll     C,14,T2
507         srl     C,32-14,C
508         or      C,T2,C                  !=
509         add     C,D,C
510
511         xor     C,D,T1
512         sethi   %hi(0x455a14ed),T2
513         and     T1,A,T1                 !=
514         or      T2,%lo(0x455a14ed),T2
515         xor     T1,D,T1
516         add     T1,R8,T1
517         !pre-LOADed     X(13),R13
518         add     T1,T2,T1                !=
519         add     B,T1,B
520         sll     B,20,T2
521         srl     B,32-20,B
522         or      B,T2,B                  !=
523         add     B,C,B
524
525         xor     B,C,T1
526         sethi   %hi(0xa9e3e905),T2
527         and     T1,D,T1                 !=
528         or      T2,%lo(0xa9e3e905),T2
529         xor     T1,C,T1
530         add     T1,R13,T1
531         !pre-LOADed     X(2),R2
532         add     T1,T2,T1                !=
533         add     A,T1,A
534         sll     A,5,T2
535         srl     A,32-5,A
536         or      A,T2,A                  !=
537         add     A,B,A
538
539         xor     A,B,T1
540         sethi   %hi(0xfcefa3f8),T2
541         and     T1,C,T1                 !=
542         or      T2,%lo(0xfcefa3f8),T2
543         xor     T1,B,T1
544         add     T1,R2,T1
545         !pre-LOADed     X(7),R7
546         add     T1,T2,T1                !=
547         add     D,T1,D
548         sll     D,9,T2
549         srl     D,32-9,D
550         or      D,T2,D                  !=
551         add     D,A,D
552
553         xor     D,A,T1
554         sethi   %hi(0x676f02d9),T2
555         and     T1,B,T1                 !=
556         or      T2,%lo(0x676f02d9),T2
557         xor     T1,A,T1
558         add     T1,R7,T1
559         !pre-LOADed     X(12),R12
560         add     T1,T2,T1                !=
561         add     C,T1,C
562         sll     C,14,T2
563         srl     C,32-14,C
564         or      C,T2,C                  !=
565         add     C,D,C
566
567         xor     C,D,T1
568         sethi   %hi(0x8d2a4c8a),T2
569         and     T1,A,T1                 !=
570         or      T2,%lo(0x8d2a4c8a),T2
571         xor     T1,D,T1
572         add     T1,R12,T1
573         !pre-LOADed     X(5),R5
574         add     T1,T2,T1                !=
575         add     B,T1,B
576         sll     B,20,T2
577         srl     B,32-20,B
578         or      B,T2,B                  !=
579         add     B,C,B
580
581 !!!!!!!!Round 2
582
583         xor     B,C,T1
584         sethi   %hi(0xfffa3942),T2
585         xor     T1,D,T1                 !=
586         or      T2,%lo(0xfffa3942),T2
587         add     T1,R5,T1
588         !pre-LOADed     X(8),R8
589         add     T1,T2,T1
590         add     A,T1,A                  !=
591         sll     A,4,T2
592         srl     A,32-4,A
593         or      A,T2,A
594         add     A,B,A                   !=
595
596         xor     A,B,T1
597         sethi   %hi(0x8771f681),T2
598         xor     T1,C,T1
599         or      T2,%lo(0x8771f681),T2   !=
600         add     T1,R8,T1
601         !pre-LOADed     X(11),R11
602         add     T1,T2,T1
603         add     D,T1,D
604         sll     D,11,T2                 !=
605         srl     D,32-11,D
606         or      D,T2,D
607         add     D,A,D
608
609         xor     D,A,T1                  !=
610         sethi   %hi(0x6d9d6122),T2
611         xor     T1,B,T1
612         or      T2,%lo(0x6d9d6122),T2
613         add     T1,R11,T1               !=
614         LOAD    X(14),RX
615         add     T1,T2,T1
616         add     C,T1,C
617         sll     C,16,T2                 !=
618         srl     C,32-16,C
619         or      C,T2,C
620         add     C,D,C
621
622         xor     C,D,T1                  !=
623         sethi   %hi(0xfde5380c),T2
624         xor     T1,A,T1
625         or      T2,%lo(0xfde5380c),T2
626         add     T1,RX,T1                !=
627         !pre-LOADed     X(1),R1
628         add     T1,T2,T1
629         add     B,T1,B
630         sll     B,23,T2
631         srl     B,32-23,B               !=
632         or      B,T2,B
633         add     B,C,B
634
635         xor     B,C,T1
636         sethi   %hi(0xa4beea44),T2      !=
637         xor     T1,D,T1
638         or      T2,%lo(0xa4beea44),T2
639         add     T1,R1,T1
640         !pre-LOADed     X(4),R4
641         add     T1,T2,T1                !=
642         add     A,T1,A
643         sll     A,4,T2
644         srl     A,32-4,A
645         or      A,T2,A                  !=
646         add     A,B,A
647
648         xor     A,B,T1
649         sethi   %hi(0x4bdecfa9),T2
650         xor     T1,C,T1                 !=
651         or      T2,%lo(0x4bdecfa9),T2
652         add     T1,R4,T1
653         !pre-LOADed     X(7),R7
654         add     T1,T2,T1
655         add     D,T1,D                  !=
656         sll     D,11,T2
657         srl     D,32-11,D
658         or      D,T2,D
659         add     D,A,D                   !=
660
661         xor     D,A,T1
662         sethi   %hi(0xf6bb4b60),T2
663         xor     T1,B,T1
664         or      T2,%lo(0xf6bb4b60),T2   !=
665         add     T1,R7,T1
666         !pre-LOADed     X(10),R10
667         add     T1,T2,T1
668         add     C,T1,C
669         sll     C,16,T2                 !=
670         srl     C,32-16,C
671         or      C,T2,C
672         add     C,D,C
673
674         xor     C,D,T1                  !=
675         sethi   %hi(0xbebfbc70),T2
676         xor     T1,A,T1
677         or      T2,%lo(0xbebfbc70),T2
678         add     T1,R10,T1               !=
679         !pre-LOADed     X(13),R13
680         add     T1,T2,T1
681         add     B,T1,B
682         sll     B,23,T2
683         srl     B,32-23,B               !=
684         or      B,T2,B
685         add     B,C,B
686
687         xor     B,C,T1
688         sethi   %hi(0x289b7ec6),T2      !=
689         xor     T1,D,T1
690         or      T2,%lo(0x289b7ec6),T2
691         add     T1,R13,T1
692         !pre-LOADed     X(0),R0
693         add     T1,T2,T1                !=
694         add     A,T1,A
695         sll     A,4,T2
696         srl     A,32-4,A
697         or      A,T2,A                  !=
698         add     A,B,A
699
700         xor     A,B,T1
701         sethi   %hi(0xeaa127fa),T2
702         xor     T1,C,T1                 !=
703         or      T2,%lo(0xeaa127fa),T2
704         add     T1,R0,T1
705         !pre-LOADed     X(3),R3
706         add     T1,T2,T1
707         add     D,T1,D                  !=
708         sll     D,11,T2
709         srl     D,32-11,D
710         or      D,T2,D
711         add     D,A,D                   !=
712
713         xor     D,A,T1
714         sethi   %hi(0xd4ef3085),T2
715         xor     T1,B,T1
716         or      T2,%lo(0xd4ef3085),T2   !=
717         add     T1,R3,T1
718         !pre-LOADed     X(6),R6
719         add     T1,T2,T1
720         add     C,T1,C
721         sll     C,16,T2                 !=
722         srl     C,32-16,C
723         or      C,T2,C
724         add     C,D,C
725
726         xor     C,D,T1                  !=
727         sethi   %hi(0x04881d05),T2
728         xor     T1,A,T1
729         or      T2,%lo(0x04881d05),T2
730         add     T1,R6,T1                !=
731         !pre-LOADed     X(9),R9
732         add     T1,T2,T1
733         add     B,T1,B
734         sll     B,23,T2
735         srl     B,32-23,B               !=
736         or      B,T2,B
737         add     B,C,B
738
739         xor     B,C,T1
740         sethi   %hi(0xd9d4d039),T2      !=
741         xor     T1,D,T1
742         or      T2,%lo(0xd9d4d039),T2
743         add     T1,R9,T1
744         !pre-LOADed     X(12),R12
745         add     T1,T2,T1                !=
746         add     A,T1,A
747         sll     A,4,T2
748         srl     A,32-4,A
749         or      A,T2,A                  !=
750         add     A,B,A
751
752         xor     A,B,T1
753         sethi   %hi(0xe6db99e5),T2
754         xor     T1,C,T1                 !=
755         or      T2,%lo(0xe6db99e5),T2
756         add     T1,R12,T1
757         LOAD    X(15),RX
758         add     T1,T2,T1                !=
759         add     D,T1,D
760         sll     D,11,T2
761         srl     D,32-11,D
762         or      D,T2,D                  !=
763         add     D,A,D
764
765         xor     D,A,T1
766         sethi   %hi(0x1fa27cf8),T2
767         xor     T1,B,T1                 !=
768         or      T2,%lo(0x1fa27cf8),T2
769         add     T1,RX,T1
770         !pre-LOADed     X(2),R2
771         add     T1,T2,T1
772         add     C,T1,C                  !=
773         sll     C,16,T2
774         srl     C,32-16,C
775         or      C,T2,C
776         add     C,D,C                   !=
777
778         xor     C,D,T1
779         sethi   %hi(0xc4ac5665),T2
780         xor     T1,A,T1
781         or      T2,%lo(0xc4ac5665),T2   !=
782         add     T1,R2,T1
783         !pre-LOADed     X(0),R0
784         add     T1,T2,T1
785         add     B,T1,B
786         sll     B,23,T2                 !=
787         srl     B,32-23,B
788         or      B,T2,B
789         add     B,C,B
790
791 !!!!!!!!Round 3
792
793         orn     B,D,T1                  !=
794         sethi   %hi(0xf4292244),T2
795         xor     T1,C,T1
796         or      T2,%lo(0xf4292244),T2
797         add     T1,R0,T1                !=
798         !pre-LOADed     X(7),R7
799         add     T1,T2,T1
800         add     A,T1,A
801         sll     A,6,T2
802         srl     A,32-6,A                !=
803         or      A,T2,A
804         add     A,B,A
805
806         orn     A,C,T1
807         sethi   %hi(0x432aff97),T2      !=
808         xor     T1,B,T1
809         or      T2,%lo(0x432aff97),T2
810         LOAD    X(14),RX
811         add     T1,R7,T1                !=
812         add     T1,T2,T1
813         add     D,T1,D
814         sll     D,10,T2
815         srl     D,32-10,D               !=
816         or      D,T2,D
817         add     D,A,D
818
819         orn     D,B,T1
820         sethi   %hi(0xab9423a7),T2      !=
821         xor     T1,A,T1
822         or      T2,%lo(0xab9423a7),T2
823         add     T1,RX,T1
824         !pre-LOADed     X(5),R5
825         add     T1,T2,T1                !=
826         add     C,T1,C
827         sll     C,15,T2
828         srl     C,32-15,C
829         or      C,T2,C                  !=
830         add     C,D,C
831
832         orn     C,A,T1
833         sethi   %hi(0xfc93a039),T2
834         xor     T1,D,T1                 !=
835         or      T2,%lo(0xfc93a039),T2
836         add     T1,R5,T1
837         !pre-LOADed     X(12),R12
838         add     T1,T2,T1
839         add     B,T1,B                  !=
840         sll     B,21,T2
841         srl     B,32-21,B
842         or      B,T2,B
843         add     B,C,B                   !=
844
845         orn     B,D,T1
846         sethi   %hi(0x655b59c3),T2
847         xor     T1,C,T1
848         or      T2,%lo(0x655b59c3),T2   !=
849         add     T1,R12,T1
850         !pre-LOADed     X(3),R3
851         add     T1,T2,T1
852         add     A,T1,A
853         sll     A,6,T2                  !=
854         srl     A,32-6,A
855         or      A,T2,A
856         add     A,B,A
857
858         orn     A,C,T1                  !=
859         sethi   %hi(0x8f0ccc92),T2
860         xor     T1,B,T1
861         or      T2,%lo(0x8f0ccc92),T2
862         add     T1,R3,T1                !=
863         !pre-LOADed     X(10),R10
864         add     T1,T2,T1
865         add     D,T1,D
866         sll     D,10,T2
867         srl     D,32-10,D               !=
868         or      D,T2,D
869         add     D,A,D
870
871         orn     D,B,T1
872         sethi   %hi(0xffeff47d),T2      !=
873         xor     T1,A,T1
874         or      T2,%lo(0xffeff47d),T2
875         add     T1,R10,T1
876         !pre-LOADed     X(1),R1
877         add     T1,T2,T1                !=
878         add     C,T1,C
879         sll     C,15,T2
880         srl     C,32-15,C
881         or      C,T2,C                  !=
882         add     C,D,C
883
884         orn     C,A,T1
885         sethi   %hi(0x85845dd1),T2
886         xor     T1,D,T1                 !=
887         or      T2,%lo(0x85845dd1),T2
888         add     T1,R1,T1
889         !pre-LOADed     X(8),R8
890         add     T1,T2,T1
891         add     B,T1,B                  !=
892         sll     B,21,T2
893         srl     B,32-21,B
894         or      B,T2,B
895         add     B,C,B                   !=
896
897         orn     B,D,T1
898         sethi   %hi(0x6fa87e4f),T2
899         xor     T1,C,T1
900         or      T2,%lo(0x6fa87e4f),T2   !=
901         add     T1,R8,T1
902         LOAD    X(15),RX
903         add     T1,T2,T1
904         add     A,T1,A                  !=
905         sll     A,6,T2
906         srl     A,32-6,A
907         or      A,T2,A
908         add     A,B,A                   !=
909
910         orn     A,C,T1
911         sethi   %hi(0xfe2ce6e0),T2
912         xor     T1,B,T1
913         or      T2,%lo(0xfe2ce6e0),T2   !=
914         add     T1,RX,T1
915         !pre-LOADed     X(6),R6
916         add     T1,T2,T1
917         add     D,T1,D
918         sll     D,10,T2                 !=
919         srl     D,32-10,D
920         or      D,T2,D
921         add     D,A,D
922
923         orn     D,B,T1                  !=
924         sethi   %hi(0xa3014314),T2
925         xor     T1,A,T1
926         or      T2,%lo(0xa3014314),T2
927         add     T1,R6,T1                !=
928         !pre-LOADed     X(13),R13
929         add     T1,T2,T1
930         add     C,T1,C
931         sll     C,15,T2
932         srl     C,32-15,C               !=
933         or      C,T2,C
934         add     C,D,C
935
936         orn     C,A,T1
937         sethi   %hi(0x4e0811a1),T2      !=
938         xor     T1,D,T1
939         or      T2,%lo(0x4e0811a1),T2
940         !pre-LOADed     X(4),R4
941          ld      [Aptr],Aval
942         add     T1,R13,T1               !=
943         add     T1,T2,T1
944         add     B,T1,B
945         sll     B,21,T2
946         srl     B,32-21,B               !=
947         or      B,T2,B
948         add     B,C,B
949
950         orn     B,D,T1
951         sethi   %hi(0xf7537e82),T2      !=
952         xor     T1,C,T1
953         or      T2,%lo(0xf7537e82),T2
954         !pre-LOADed     X(11),R11
955          ld      [Dptr],Dval
956         add     T1,R4,T1                !=
957         add     T1,T2,T1
958         add     A,T1,A
959         sll     A,6,T2
960         srl     A,32-6,A                !=
961         or      A,T2,A
962         add     A,B,A
963
964         orn     A,C,T1
965         sethi   %hi(0xbd3af235),T2      !=
966         xor     T1,B,T1
967         or      T2,%lo(0xbd3af235),T2
968         !pre-LOADed     X(2),R2
969          ld      [Cptr],Cval
970         add     T1,R11,T1               !=
971         add     T1,T2,T1
972         add     D,T1,D
973         sll     D,10,T2
974         srl     D,32-10,D               !=
975         or      D,T2,D
976         add     D,A,D
977
978         orn     D,B,T1
979         sethi   %hi(0x2ad7d2bb),T2      !=
980         xor     T1,A,T1
981         or      T2,%lo(0x2ad7d2bb),T2
982         !pre-LOADed     X(9),R9
983          ld      [Bptr],Bval
984         add     T1,R2,T1                !=
985          add     Aval,A,Aval
986         add     T1,T2,T1
987          st      Aval,[Aptr]
988         add     C,T1,C                  !=
989         sll     C,15,T2
990          add     Dval,D,Dval
991         srl     C,32-15,C
992         or      C,T2,C                  !=
993          st      Dval,[Dptr]
994         add     C,D,C
995
996         orn     C,A,T1
997         sethi   %hi(0xeb86d391),T2      !=
998         xor     T1,D,T1
999         or      T2,%lo(0xeb86d391),T2
1000         add     T1,R9,T1
1001         !pre-LOADed     X(0),R0
1002          mov     Aval,A                 !=
1003         add     T1,T2,T1
1004          mov     Dval,D
1005         add     B,T1,B
1006         sll     B,21,T2                 !=
1007          add     Cval,C,Cval
1008         srl     B,32-21,B
1009          st      Cval,[Cptr]
1010         or      B,T2,B                  !=
1011         add     B,C,B
1012
1013         deccc   %i2
1014         mov     Cval,C
1015         add     B,Bval,B                !=
1016         inc     64,%i1
1017         nop
1018         st      B,[Bptr]
1019         nop                             !=
1020
1021 #ifdef  ULTRASPARC
1022         bg,a,pt %icc,.Lmd5_block_loop
1023 #else
1024         bg,a    .Lmd5_block_loop
1025 #endif
1026         LOAD    X(0),R0
1027
1028 #ifdef ASI_PRIMARY_LITTLE
1029         mov     %o7,%asi
1030 #endif
1031         ret
1032         restore %g0,0,%o0
1033
1034 .type   md5_block,#function
1035 .size   md5_block,(.-md5_block)