1 .ident "md5-sparcv9.S, Version 1.0"
2 .ident "SPARC V9 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
6 * ====================================================================
7 * Copyright (c) 1999 Andy Polyakov <appro@fy.chalmers.se>.
9 * Rights for redistribution and usage in source and binary forms are
10 * granted as long as above copyright notices are retained. Warranty
11 * of any kind is (of course:-) disclaimed.
12 * ====================================================================
16 * This is my modest contribution to OpenSSL project (see
17 * http://www.openssl.org/ for more information about it) and is an
18 * assembler implementation of MD5 block hash function. I've hand-coded
19 * this for the sole reason to reach UltraSPARC-specific "load in
20 * little-endian byte order" instruction. This gives up to 15%
21 * performance improvement for cases when input message is aligned at
22 * 32 bits boundary. The module was tested under both 32 *and* 64 bit
23 * kernels. For updates see http://fy.chalmers.se/~appro/hpe/.
25 * To compile with SC4.x/SC5.x:
27 * cc -xarch=v[9|8plus] -DULTRASPARC -DMD5_BLOCK_DATA_ORDER \
32 * gcc -mcpu=ultrasparc -DULTRASPARC -DMD5_BLOCK_DATA_ORDER \
35 * or if above fails (it does if you have gas):
37 * gcc -E -DULTRASPARC -DMD5_BLOCK_DATA_ORDER md5_block.sparc.S | \
38 * as -xarch=v8plus /dev/fd/0 -o md5-sparcv9.o
69 #define Aval R5 /* those not used at the end of the last round */
74 #if defined(MD5_BLOCK_DATA_ORDER)
75 # if defined(ULTRASPARC)
77 # define X(i) [%i1+i*4]%asi
78 # define md5_block md5_block_asm_data_order_aligned
79 # define ASI_PRIMARY_LITTLE 0x88
81 # error "MD5_BLOCK_DATA_ORDER is supported only on UltraSPARC!"
85 # define X(i) [%i1+i*4]
86 # define md5_block md5_block_asm_host_order
89 .section ".text",#alloc,#execinstr
90 #if defined(__SUNPRO_C) && defined(__sparcv9)
91 /* They've said -xarch=v9 at command line */
92 .register %g2,#scratch
93 .register %g3,#scratch
106 #ifdef ASI_PRIMARY_LITTLE
107 mov %asi,%o7 ! How dare I? Well, I just do:-)
112 #ifdef ASI_PRIMARY_LITTLE
113 mov ASI_PRIMARY_LITTLE,%asi
132 sethi %hi(0xd76aa478),T2
134 or T2,%lo(0xd76aa478),T2 !=
146 sethi %hi(0xe8c7b756),T2
148 or T2,%lo(0xe8c7b756),T2
160 sethi %hi(0x242070db),T2 !=
162 or T2,%lo(0x242070db),T2
174 sethi %hi(0xc1bdceee),T2
176 or T2,%lo(0xc1bdceee),T2
188 sethi %hi(0xf57c0faf),T2
190 or T2,%lo(0xf57c0faf),T2 !=
202 sethi %hi(0x4787c62a),T2
204 or T2,%lo(0x4787c62a),T2
216 sethi %hi(0xa8304613),T2 !=
218 or T2,%lo(0xa8304613),T2
230 sethi %hi(0xfd469501),T2
232 or T2,%lo(0xfd469501),T2
244 sethi %hi(0x698098d8),T2
246 or T2,%lo(0x698098d8),T2 !=
258 sethi %hi(0x8b44f7af),T2
260 or T2,%lo(0x8b44f7af),T2
272 sethi %hi(0xffff5bb1),T2 !=
274 or T2,%lo(0xffff5bb1),T2
286 sethi %hi(0x895cd7be),T2
288 or T2,%lo(0x895cd7be),T2
300 sethi %hi(0x6b901122),T2
302 or T2,%lo(0x6b901122),T2 !=
314 sethi %hi(0xfd987193),T2
316 or T2,%lo(0xfd987193),T2
328 sethi %hi(0xa679438e),T2 !=
330 or T2,%lo(0xa679438e),T2
342 sethi %hi(0x49b40821),T2
344 or T2,%lo(0x49b40821),T2
358 sethi %hi(0xf61e2562),T2
360 or T2,%lo(0xf61e2562),T2
372 sethi %hi(0xc040b340),T2
374 or T2,%lo(0xc040b340),T2
377 !pre-LOADed X(11),R11
386 sethi %hi(0x265e5a51),T2
388 or T2,%lo(0x265e5a51),T2
400 sethi %hi(0xe9b6c7aa),T2
402 or T2,%lo(0xe9b6c7aa),T2
414 sethi %hi(0xd62f105d),T2
416 or T2,%lo(0xd62f105d),T2
419 !pre-LOADed X(10),R10
428 sethi %hi(0x02441453),T2
430 or T2,%lo(0x02441453),T2
442 sethi %hi(0xd8a1e681),T2
444 or T2,%lo(0xd8a1e681),T2 !=
456 sethi %hi(0xe7d3fbc8),T2
458 or T2,%lo(0xe7d3fbc8),T2 !=
470 sethi %hi(0x21e1cde6),T2
472 or T2,%lo(0x21e1cde6),T2 !=
484 sethi %hi(0xc33707d6),T2
486 or T2,%lo(0xc33707d6),T2
498 sethi %hi(0xf4d50d87),T2
500 or T2,%lo(0xf4d50d87),T2
512 sethi %hi(0x455a14ed),T2
514 or T2,%lo(0x455a14ed),T2
517 !pre-LOADed X(13),R13
526 sethi %hi(0xa9e3e905),T2
528 or T2,%lo(0xa9e3e905),T2
540 sethi %hi(0xfcefa3f8),T2
542 or T2,%lo(0xfcefa3f8),T2
554 sethi %hi(0x676f02d9),T2
556 or T2,%lo(0x676f02d9),T2
559 !pre-LOADed X(12),R12
568 sethi %hi(0x8d2a4c8a),T2
570 or T2,%lo(0x8d2a4c8a),T2
584 sethi %hi(0xfffa3942),T2
586 or T2,%lo(0xfffa3942),T2
597 sethi %hi(0x8771f681),T2
599 or T2,%lo(0x8771f681),T2 !=
601 !pre-LOADed X(11),R11
610 sethi %hi(0x6d9d6122),T2
612 or T2,%lo(0x6d9d6122),T2
623 sethi %hi(0xfde5380c),T2
625 or T2,%lo(0xfde5380c),T2
636 sethi %hi(0xa4beea44),T2 !=
638 or T2,%lo(0xa4beea44),T2
649 sethi %hi(0x4bdecfa9),T2
651 or T2,%lo(0x4bdecfa9),T2
662 sethi %hi(0xf6bb4b60),T2
664 or T2,%lo(0xf6bb4b60),T2 !=
666 !pre-LOADed X(10),R10
675 sethi %hi(0xbebfbc70),T2
677 or T2,%lo(0xbebfbc70),T2
679 !pre-LOADed X(13),R13
688 sethi %hi(0x289b7ec6),T2 !=
690 or T2,%lo(0x289b7ec6),T2
701 sethi %hi(0xeaa127fa),T2
703 or T2,%lo(0xeaa127fa),T2
714 sethi %hi(0xd4ef3085),T2
716 or T2,%lo(0xd4ef3085),T2 !=
727 sethi %hi(0x04881d05),T2
729 or T2,%lo(0x04881d05),T2
740 sethi %hi(0xd9d4d039),T2 !=
742 or T2,%lo(0xd9d4d039),T2
744 !pre-LOADed X(12),R12
753 sethi %hi(0xe6db99e5),T2
755 or T2,%lo(0xe6db99e5),T2
766 sethi %hi(0x1fa27cf8),T2
768 or T2,%lo(0x1fa27cf8),T2
779 sethi %hi(0xc4ac5665),T2
781 or T2,%lo(0xc4ac5665),T2 !=
794 sethi %hi(0xf4292244),T2
796 or T2,%lo(0xf4292244),T2
807 sethi %hi(0x432aff97),T2 !=
809 or T2,%lo(0x432aff97),T2
820 sethi %hi(0xab9423a7),T2 !=
822 or T2,%lo(0xab9423a7),T2
833 sethi %hi(0xfc93a039),T2
835 or T2,%lo(0xfc93a039),T2
837 !pre-LOADed X(12),R12
846 sethi %hi(0x655b59c3),T2
848 or T2,%lo(0x655b59c3),T2 !=
859 sethi %hi(0x8f0ccc92),T2
861 or T2,%lo(0x8f0ccc92),T2
863 !pre-LOADed X(10),R10
872 sethi %hi(0xffeff47d),T2 !=
874 or T2,%lo(0xffeff47d),T2
885 sethi %hi(0x85845dd1),T2
887 or T2,%lo(0x85845dd1),T2
898 sethi %hi(0x6fa87e4f),T2
900 or T2,%lo(0x6fa87e4f),T2 !=
911 sethi %hi(0xfe2ce6e0),T2
913 or T2,%lo(0xfe2ce6e0),T2 !=
924 sethi %hi(0xa3014314),T2
926 or T2,%lo(0xa3014314),T2
928 !pre-LOADed X(13),R13
937 sethi %hi(0x4e0811a1),T2 !=
939 or T2,%lo(0x4e0811a1),T2
951 sethi %hi(0xf7537e82),T2 !=
953 or T2,%lo(0xf7537e82),T2
954 !pre-LOADed X(11),R11
965 sethi %hi(0xbd3af235),T2 !=
967 or T2,%lo(0xbd3af235),T2
979 sethi %hi(0x2ad7d2bb),T2 !=
981 or T2,%lo(0x2ad7d2bb),T2
997 sethi %hi(0xeb86d391),T2 !=
999 or T2,%lo(0xeb86d391),T2
1022 bg,a,pt %icc,.Lmd5_block_loop
1024 bg,a .Lmd5_block_loop
1028 #ifdef ASI_PRIMARY_LITTLE
1034 .type md5_block,#function
1035 .size md5_block,(.-md5_block)