IA-64 assembler pack: fix typos and make it work on HP-UX.
authorAndy Polyakov <appro@openssl.org>
Sat, 7 May 2011 20:36:05 +0000 (20:36 +0000)
committerAndy Polyakov <appro@openssl.org>
Sat, 7 May 2011 20:36:05 +0000 (20:36 +0000)
crypto/bn/asm/ia64-mont.pl
crypto/ia64cpuid.S
crypto/modes/asm/ghash-ia64.pl

index 1f7c0a1..e258658 100644 (file)
@@ -72,9 +72,9 @@ $code=<<___;
 // int bn_mul_mont (BN_ULONG *rp,const BN_ULONG *ap,
 //                 const BN_ULONG *bp,const BN_ULONG *np,
 //                 const BN_ULONG *n0p,int num);                       
+.align 64
 .global        bn_mul_mont#
 .proc  bn_mul_mont#
-.align 64;;
 bn_mul_mont:
        .prologue
        .body
@@ -99,9 +99,9 @@ n0=f6;
 m0=f7;
 bi=f8;
 
+.align 64
 .local bn_mul_mont_general#
 .proc  bn_mul_mont_general#
-.align 64;;
 bn_mul_mont_general:
        .prologue
 { .mmi;        .save   ar.pfs,prevfs
@@ -353,7 +353,7 @@ bn_mul_mont_general:
        mov             ar.lc=prevlc    }
 { .mib;        .restore        sp
        mov             sp=prevsp
-       mov             pr=prevpr,-2
+       mov             pr=prevpr,0x1ffff
        br.ret.sptk.many        b0      };;
 .endp  bn_mul_mont_general#
 \f
@@ -364,10 +364,10 @@ t0=r15;
 ai0=f8;  ai1=f9;  ai2=f10; ai3=f11; ai4=f12; ai5=f13; ai6=f14; ai7=f15;
 ni0=f16; ni1=f17; ni2=f18; ni3=f19; ni4=f20; ni5=f21; ni6=f22; ni7=f23;
 
+.align 64
+.skip  48              // aligns loop body
 .local bn_mul_mont_8#
 .proc  bn_mul_mont_8#
-.align 64
-.skip  48;;            // aligns loop body
 bn_mul_mont_8:
        .prologue
 { .mmi;        .save           ar.pfs,prevfs
@@ -828,7 +828,7 @@ bn_mul_mont_8:
        nop.i           0               }
 { .mmi;        ldf.fill        f18=[r18],64
        ldf.fill        f19=[r19],64
-       mov             pr=prevpr,-2    };;
+       mov             pr=prevpr,0x1ffff       };;
 { .mmi;        ldf.fill        f20=[r16]
        ldf.fill        f21=[r17]
        mov             ar.lc=prevlc    }
index dd27e16..bf5abc3 100644 (file)
@@ -168,7 +168,7 @@ OPENSSL_cleanse:
 
 .global        OPENSSL_instrument_bus#
 .proc  OPENSSL_instrument_bus#
-OPENSSL_instrument_cache:
+OPENSSL_instrument_bus:
 { .mmi;        mov             r2=r33
 #if defined(_HPUX_SOURCE) && !defined(_LP64)
        addp4           r32=0,r32
@@ -206,7 +206,7 @@ OPENSSL_instrument_cache:
 
 .global        OPENSSL_instrument_bus2#
 .proc  OPENSSL_instrument_bus2#
-OPENSSL_instrument_cache2:
+OPENSSL_instrument_bus2:
 { .mmi;        mov             r2=r33                  // put aside cnt
 #if defined(_HPUX_SOURCE) && !defined(_LP64)
        addp4           r32=0,r32
index cd75841..0354c95 100755 (executable)
@@ -98,10 +98,10 @@ inp=r26;    end=r27;
 Hhi=r28;       Hlo=r29;
 Zhi=r30;       Zlo=r31;
 
+.align 128
+.skip  16                                      // aligns loop body
 .global        gcm_gmult_4bit#
 .proc  gcm_gmult_4bit#
-.align 128
-.skip  16;;                                    // aligns loop body
 gcm_gmult_4bit:
        .prologue
 { .mmi;        .save   ar.pfs,prevfs
@@ -141,7 +141,7 @@ $code.=<<___;
 { .mmi;        add     Hlo=9,Xi;;                      // ;; is here to prevent
        add     Hhi=1,Xi                };;     // pipeline flush on Itanium
 { .mib;        st8     [Hlo]=Zlo
-       mov     pr=prevpr,-2            };;
+       mov     pr=prevpr,0x1ffff       };;
 { .mib;        st8     [Hhi]=Zhi
        mov     ar.lc=prevlc
        br.ret.sptk.many        b0      };;
@@ -175,10 +175,10 @@ ___
 $code.=<<___;
 prevsp=r3;
 
+.align 32
+.skip  16                                      // aligns loop body
 .global        gcm_ghash_4bit#
 .proc  gcm_ghash_4bit#
-.align 32
-.skip  16;;                                    // aligns loop body
 gcm_ghash_4bit:
        .prologue
 { .mmi;        .save   ar.pfs,prevfs
@@ -410,7 +410,7 @@ $code.=<<___;       # (p19)
 .endp  gcm_ghash_4bit#
 ___
 $code.=<<___;
-.align 128;;
+.align 128
 .type  rem_4bit#,\@object
 rem_4bit:
         data8  0x0000<<48, 0x1C20<<48, 0x3840<<48, 0x2460<<48