This update gets endianness-neutrality right and adds second required
[openssl.git] / crypto / md5 / asm / md5-ia64.S
index 900263224fd5e57fdef446ae407a221203b546b6..73273fa82814e9e9d77577828be32347f16cb265 100644 (file)
@@ -86,6 +86,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
 #define        pPad2           p12
 #define        pPad3           p13
 #define        pSkip           p8
 #define        pPad2           p12
 #define        pPad3           p13
 #define        pSkip           p8
+//     This two below shall remain constant througout whole routine
+#define        pDataOrder      p14
+#define        pHostOrder      p15
 
 #define        A_              out24
 #define        B_              out25
 
 #define        A_              out24
 #define        B_              out25
@@ -159,6 +162,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
 #define _NOUTPUT       0
 #define        _NROTATE        24      /* this must be <= _NINPUTS */
 
 #define _NOUTPUT       0
 #define        _NROTATE        24      /* this must be <= _NINPUTS */
 
+#if defined(_HPUX_SOURCE) && !defined(_LP64)
+#define        ADDP    addp4
+#else
+#define        ADDP    add
+#endif
 
 //     Macros for getting the left and right portions of little-endian words
 
 
 //     Macros for getting the left and right portions of little-endian words
 
@@ -225,78 +233,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
 #define        LCSave          r21
 #define        PFSSave         r20
 #define        PRSave          r22
 #define        LCSave          r21
 #define        PFSSave         r20
 #define        PRSave          r22
-#define        pAgain          p14
-#define        pOff            p14
-
-       .rodata
-       // Values are specified as bytes to ensure they are
-       // in little-endian byte-order.
-       .align 4
-md5_round_constants:
-       data1 0x78, 0xa4, 0x6a, 0xd7    //     0
-       data1 0x56, 0xb7, 0xc7, 0xe8    //     1
-       data1 0xdb, 0x70, 0x20, 0x24    //     2
-       data1 0xee, 0xce, 0xbd, 0xc1    //     3
-       data1 0xaf, 0x0f, 0x7c, 0xf5    //     4
-       data1 0x2a, 0xc6, 0x87, 0x47    //     5
-       data1 0x13, 0x46, 0x30, 0xa8    //     6
-       data1 0x01, 0x95, 0x46, 0xfd    //     7
-       data1 0xd8, 0x98, 0x80, 0x69    //     8
-       data1 0xaf, 0xf7, 0x44, 0x8b    //     9
-       data1 0xb1, 0x5b, 0xff, 0xff    //    10
-       data1 0xbe, 0xd7, 0x5c, 0x89    //    11
-       data1 0x22, 0x11, 0x90, 0x6b    //    12
-       data1 0x93, 0x71, 0x98, 0xfd    //    13
-       data1 0x8e, 0x43, 0x79, 0xa6    //    14
-       data1 0x21, 0x08, 0xb4, 0x49    //    15
-       data1 0x62, 0x25, 0x1e, 0xf6    //    16
-       data1 0x40, 0xb3, 0x40, 0xc0    //    17
-       data1 0x51, 0x5a, 0x5e, 0x26    //    18
-       data1 0xaa, 0xc7, 0xb6, 0xe9    //    19
-       data1 0x5d, 0x10, 0x2f, 0xd6    //    20
-       data1 0x53, 0x14, 0x44, 0x02    //    21
-       data1 0x81, 0xe6, 0xa1, 0xd8    //    22
-       data1 0xc8, 0xfb, 0xd3, 0xe7    //    23
-       data1 0xe6, 0xcd, 0xe1, 0x21    //    24
-       data1 0xd6, 0x07, 0x37, 0xc3    //    25
-       data1 0x87, 0x0d, 0xd5, 0xf4    //    26
-       data1 0xed, 0x14, 0x5a, 0x45    //    27
-       data1 0x05, 0xe9, 0xe3, 0xa9    //    28
-       data1 0xf8, 0xa3, 0xef, 0xfc    //    29
-       data1 0xd9, 0x02, 0x6f, 0x67    //    30
-       data1 0x8a, 0x4c, 0x2a, 0x8d    //    31
-       data1 0x42, 0x39, 0xfa, 0xff    //    32
-       data1 0x81, 0xf6, 0x71, 0x87    //    33
-       data1 0x22, 0x61, 0x9d, 0x6d    //    34
-       data1 0x0c, 0x38, 0xe5, 0xfd    //    35
-       data1 0x44, 0xea, 0xbe, 0xa4    //    36
-       data1 0xa9, 0xcf, 0xde, 0x4b    //    37
-       data1 0x60, 0x4b, 0xbb, 0xf6    //    38
-       data1 0x70, 0xbc, 0xbf, 0xbe    //    39
-       data1 0xc6, 0x7e, 0x9b, 0x28    //    40
-       data1 0xfa, 0x27, 0xa1, 0xea    //    41
-       data1 0x85, 0x30, 0xef, 0xd4    //    42
-       data1 0x05, 0x1d, 0x88, 0x04    //    43
-       data1 0x39, 0xd0, 0xd4, 0xd9    //    44
-       data1 0xe5, 0x99, 0xdb, 0xe6    //    45
-       data1 0xf8, 0x7c, 0xa2, 0x1f    //    46
-       data1 0x65, 0x56, 0xac, 0xc4    //    47
-       data1 0x44, 0x22, 0x29, 0xf4    //    48
-       data1 0x97, 0xff, 0x2a, 0x43    //    49
-       data1 0xa7, 0x23, 0x94, 0xab    //    50
-       data1 0x39, 0xa0, 0x93, 0xfc    //    51
-       data1 0xc3, 0x59, 0x5b, 0x65    //    52
-       data1 0x92, 0xcc, 0x0c, 0x8f    //    53
-       data1 0x7d, 0xf4, 0xef, 0xff    //    54
-       data1 0xd1, 0x5d, 0x84, 0x85    //    55
-       data1 0x4f, 0x7e, 0xa8, 0x6f    //    56
-       data1 0xe0, 0xe6, 0x2c, 0xfe    //    57
-       data1 0x14, 0x43, 0x01, 0xa3    //    58
-       data1 0xa1, 0x11, 0x08, 0x4e    //    59
-       data1 0x82, 0x7e, 0x53, 0xf7    //    60
-       data1 0x35, 0xf2, 0x3a, 0xbd    //    61
-       data1 0xbb, 0xd2, 0xd7, 0x2a    //    62
-       data1 0x91, 0xd3, 0x86, 0xeb    //    63
+#define        pAgain          p63
+#define        pOff            p63
 
        .text
 
 
        .text
 
@@ -320,52 +258,47 @@ md5_round_constants:
 
    */
 
 
    */
 
+       .type   md5_block_asm_data_order, @function
+       .global md5_block_asm_data_order
+       .align  32
+       .proc   md5_block_asm_data_order
+md5_block_asm_data_order:
+{      .mib
+       cmp.eq  pDataOrder,pHostOrder = r0,r0
+       br.sptk.many    .md5_block
+};;
+       .endp   md5_block_asm_data_order
+
        .type   md5_block_asm_host_order, @function
        .global md5_block_asm_host_order
 
        .type   md5_block_asm_host_order, @function
        .global md5_block_asm_host_order
 
-       .align  32
        .proc   md5_block_asm_host_order
 md5_block_asm_host_order:
        .prologue
        .proc   md5_block_asm_host_order
 md5_block_asm_host_order:
        .prologue
-#ifndef __LP64__
+{      .mib
+       cmp.eq  pHostOrder,pDataOrder = r0,r0
+};;
+.md5_block:
 {      .mmi
 {      .mmi
-       .save ar.pfs, PFSSave
+       .save   ar.pfs, PFSSave
        alloc   PFSSave = ar.pfs, MD5_NINP, MD5_NLOC, MD5_NOUT, MD5_NROT
        alloc   PFSSave = ar.pfs, MD5_NINP, MD5_NLOC, MD5_NOUT, MD5_NROT
-       addp4   DPtrIn = 0, DPtrIn
-       addp4   CtxPtr0 = 0, CtxPtr0
+       ADDP    CtxPtr1 = 8, CtxPtr0
+       mov     CTable = ip
 }
 }
-;;
 {      .mmi
 {      .mmi
-       nop     0x0
-       and     InAlign = 0x3, DPtrIn
-       .save ar.lc, LCSave
+       ADDP    DPtrIn = 0, DPtrIn
+       ADDP    CtxPtr0 = 0, CtxPtr0
+       .save   ar.lc, LCSave
        mov     LCSave = ar.lc
 }
        mov     LCSave = ar.lc
 }
-#else
+;;
+.pred.rel      "mutex",pDataOrder,pHostOrder
 {      .mmi
 {      .mmi
-       .save ar.pfs, PFSSave
-       alloc   PFSSave = ar.pfs, MD5_NINP, MD5_NLOC, MD5_NOUT, MD5_NROT
+(pDataOrder)   add     CTable = .md5_tbl_data_order#-.md5_block#, CTable
+(pHostOrder)   add     CTable = .md5_tbl_host_order#-.md5_block#, CTable       
        and     InAlign = 0x3, DPtrIn
        and     InAlign = 0x3, DPtrIn
-       .save ar.lc, LCSave
-       mov     LCSave = ar.lc
 }
 }
-#endif
 
 
-{      .mmi
-       addl    CTable = @ltoffx(md5_round_constants), gp
-       ;;
-       ld8.mov CTable = [CTable], md5_round_constants // native byte-order
-       add     CtxPtr1 = 8, CtxPtr0
-}
-#ifdef B_ENDIAN
-{
-       .mmi
-       rum     psr.be          // switch to little-endian mode
-       nop.m   0x0
-       nop.i   0x0
-}
-#endif
-;;
 {      .mmi
        ld4     AccumA = [CtxPtr0], 4
        ld4     AccumC = [CtxPtr1], 4
 {      .mmi
        ld4     AccumA = [CtxPtr0], 4
        ld4     AccumC = [CtxPtr1], 4
@@ -379,15 +312,12 @@ md5_block_asm_host_order:
        ld4     AccumD = [CtxPtr1]
        dep     DPtr_ = 0, DPtrIn, 0, 2
 } ;;
        ld4     AccumD = [CtxPtr1]
        dep     DPtr_ = 0, DPtrIn, 0, 2
 } ;;
-
-{      .mmi
+#if defined(_HPUX_SOURCE) || defined(B_ENDIAN)
+(pDataOrder)   rum     psr.be;;        // switch to little-endian
+#endif
+{      .mmb
        ld4     CTable0 = [CTable], 4
        cmp.ne  pOff, p0 = 0, InAlign
        ld4     CTable0 = [CTable], 4
        cmp.ne  pOff, p0 = 0, InAlign
-} ;;
-
-{      .mib
-       nop.m 0x0
-       nop.i 0x0
 (pOff) br.cond.spnt.many .md5_unaligned
 } ;;
 
 (pOff) br.cond.spnt.many .md5_unaligned
 } ;;
 
@@ -431,9 +361,9 @@ md5_block_asm_host_order:
 } ;;
 
 .md5_exit:
 } ;;
 
 .md5_exit:
-//     Note that we switch back to the entry endianess AFTER storing so
-//     that the memory image of the hash is preserved.
-
+#if defined(_HPUX_SOURCE) || defined(B_ENDIAN)
+(pDataOrder)   sum     psr.be;;        // switch back to big-endian mode
+#endif
 {      .mmi
        st4     [CtxPtr0] = AccumB, -4
        st4     [CtxPtr1] = AccumD, -4
 {      .mmi
        st4     [CtxPtr0] = AccumB, -4
        st4     [CtxPtr1] = AccumD, -4
@@ -445,9 +375,6 @@ md5_block_asm_host_order:
        mov     ar.lc = LCSave
 } ;;
 {      .mib
        mov     ar.lc = LCSave
 } ;;
 {      .mib
-#ifdef B_ENDIAN
-       sum     psr.be          // switch back to big-endian mode
-#endif
        mov     ar.pfs = PFSSave
        br.ret.sptk.few rp
 } ;;
        mov     ar.pfs = PFSSave
        br.ret.sptk.few rp
 } ;;
@@ -1001,9 +928,99 @@ md5_digest_block##offset:                                         \
        nop 0x0 ;                                                       \
        nop 0x0 ;                                                       \
        br.cond.sptk.many md5_digest_GHI ;                              \
        nop 0x0 ;                                                       \
        nop 0x0 ;                                                       \
        br.cond.sptk.many md5_digest_GHI ;                              \
-} ;                                                                    \
+} ;;                                                                   \
        .endp md5digestBlock ## offset
 
 MD5FBLOCK(1)
 MD5FBLOCK(2)
 MD5FBLOCK(3)
        .endp md5digestBlock ## offset
 
 MD5FBLOCK(1)
 MD5FBLOCK(2)
 MD5FBLOCK(3)
+
+       .align 64
+       .type md5_constants, @object
+md5_constants:
+.md5_tbl_data_order:                   // To ensure little-endian data
+                                       // order, code as bytes.
+       data1 0x78, 0xa4, 0x6a, 0xd7    //     0
+       data1 0x56, 0xb7, 0xc7, 0xe8    //     1
+       data1 0xdb, 0x70, 0x20, 0x24    //     2
+       data1 0xee, 0xce, 0xbd, 0xc1    //     3
+       data1 0xaf, 0x0f, 0x7c, 0xf5    //     4
+       data1 0x2a, 0xc6, 0x87, 0x47    //     5
+       data1 0x13, 0x46, 0x30, 0xa8    //     6
+       data1 0x01, 0x95, 0x46, 0xfd    //     7
+       data1 0xd8, 0x98, 0x80, 0x69    //     8
+       data1 0xaf, 0xf7, 0x44, 0x8b    //     9
+       data1 0xb1, 0x5b, 0xff, 0xff    //    10
+       data1 0xbe, 0xd7, 0x5c, 0x89    //    11
+       data1 0x22, 0x11, 0x90, 0x6b    //    12
+       data1 0x93, 0x71, 0x98, 0xfd    //    13
+       data1 0x8e, 0x43, 0x79, 0xa6    //    14
+       data1 0x21, 0x08, 0xb4, 0x49    //    15
+       data1 0x62, 0x25, 0x1e, 0xf6    //    16
+       data1 0x40, 0xb3, 0x40, 0xc0    //    17
+       data1 0x51, 0x5a, 0x5e, 0x26    //    18
+       data1 0xaa, 0xc7, 0xb6, 0xe9    //    19
+       data1 0x5d, 0x10, 0x2f, 0xd6    //    20
+       data1 0x53, 0x14, 0x44, 0x02    //    21
+       data1 0x81, 0xe6, 0xa1, 0xd8    //    22
+       data1 0xc8, 0xfb, 0xd3, 0xe7    //    23
+       data1 0xe6, 0xcd, 0xe1, 0x21    //    24
+       data1 0xd6, 0x07, 0x37, 0xc3    //    25
+       data1 0x87, 0x0d, 0xd5, 0xf4    //    26
+       data1 0xed, 0x14, 0x5a, 0x45    //    27
+       data1 0x05, 0xe9, 0xe3, 0xa9    //    28
+       data1 0xf8, 0xa3, 0xef, 0xfc    //    29
+       data1 0xd9, 0x02, 0x6f, 0x67    //    30
+       data1 0x8a, 0x4c, 0x2a, 0x8d    //    31
+       data1 0x42, 0x39, 0xfa, 0xff    //    32
+       data1 0x81, 0xf6, 0x71, 0x87    //    33
+       data1 0x22, 0x61, 0x9d, 0x6d    //    34
+       data1 0x0c, 0x38, 0xe5, 0xfd    //    35
+       data1 0x44, 0xea, 0xbe, 0xa4    //    36
+       data1 0xa9, 0xcf, 0xde, 0x4b    //    37
+       data1 0x60, 0x4b, 0xbb, 0xf6    //    38
+       data1 0x70, 0xbc, 0xbf, 0xbe    //    39
+       data1 0xc6, 0x7e, 0x9b, 0x28    //    40
+       data1 0xfa, 0x27, 0xa1, 0xea    //    41
+       data1 0x85, 0x30, 0xef, 0xd4    //    42
+       data1 0x05, 0x1d, 0x88, 0x04    //    43
+       data1 0x39, 0xd0, 0xd4, 0xd9    //    44
+       data1 0xe5, 0x99, 0xdb, 0xe6    //    45
+       data1 0xf8, 0x7c, 0xa2, 0x1f    //    46
+       data1 0x65, 0x56, 0xac, 0xc4    //    47
+       data1 0x44, 0x22, 0x29, 0xf4    //    48
+       data1 0x97, 0xff, 0x2a, 0x43    //    49
+       data1 0xa7, 0x23, 0x94, 0xab    //    50
+       data1 0x39, 0xa0, 0x93, 0xfc    //    51
+       data1 0xc3, 0x59, 0x5b, 0x65    //    52
+       data1 0x92, 0xcc, 0x0c, 0x8f    //    53
+       data1 0x7d, 0xf4, 0xef, 0xff    //    54
+       data1 0xd1, 0x5d, 0x84, 0x85    //    55
+       data1 0x4f, 0x7e, 0xa8, 0x6f    //    56
+       data1 0xe0, 0xe6, 0x2c, 0xfe    //    57
+       data1 0x14, 0x43, 0x01, 0xa3    //    58
+       data1 0xa1, 0x11, 0x08, 0x4e    //    59
+       data1 0x82, 0x7e, 0x53, 0xf7    //    60
+       data1 0x35, 0xf2, 0x3a, 0xbd    //    61
+       data1 0xbb, 0xd2, 0xd7, 0x2a    //    62
+       data1 0x91, 0xd3, 0x86, 0xeb    //    63
+
+.md5_tbl_host_order:                   // OS data order, might as well
+                                       // be little-endian.
+       data4 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee    // 0
+       data4 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501    // 4
+       data4 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be    // 8
+       data4 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821    // 12
+       data4 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa    // 16
+       data4 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8    // 20
+       data4 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed    // 24
+       data4 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a    // 28
+       data4 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c    // 32
+       data4 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70    // 36
+       data4 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05    // 40
+       data4 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665    // 44
+       data4 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039    // 48
+       data4 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1    // 52
+       data4 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1    // 56
+       data4 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391    // 60
+.size  md5_constants#,64*4*2