Pedantic polish to aes-ia64 and sha512-ia64.
authorAndy Polyakov <appro@openssl.org>
Wed, 20 Jul 2005 15:15:22 +0000 (15:15 +0000)
committerAndy Polyakov <appro@openssl.org>
Wed, 20 Jul 2005 15:15:22 +0000 (15:15 +0000)
crypto/aes/asm/aes-ia64.S
crypto/sha/asm/sha512-ia64.pl

index 3377838..c966682 100644 (file)
@@ -24,7 +24,9 @@
 
 rk0=r8;     rk1=r9;
 
-prsave=r10;
+pfssave=r2;
+lcsave=r10;
+prsave=r3;
 maskff=r11;
 twenty4=r14;
 sixteen=r15;
@@ -67,6 +69,9 @@ te0=r40;    te1=r41;    te2=r42;    te3=r43;
 // Clobber:    r16-r31,rk0-rk1,r32-r43
 .align 32
 _ia64_AES_encrypt:
+       .prologue
+       .altrp  b6
+       .body
 { .mmi;        alloc   r16=ar.pfs,12,0,0,8
        LDKEY   t0=[rk0],2*KSZ
        mov     pr.rot=1<<16    }
@@ -179,20 +184,21 @@ _ia64_AES_encrypt:
 .skip  16
 AES_encrypt:
        .prologue
-       .save   ar.pfs,r2
-{ .mmi;        alloc   r2=ar.pfs,3,0,12,0
-       addl    out8=@ltoff(AES_Te#),gp
-       .save   ar.lc,r3
-       mov     r3=ar.lc                }
-{ .mmi;        and     out0=3,in0
-       ADDP    in0=0,in0
-       ADDP    out11=KSZ*60,in2        };;     // &AES_KEY->rounds
+       .save   ar.pfs,pfssave
+{ .mmi;        alloc   pfssave=ar.pfs,3,0,12,0
+       and     out0=3,in0
+       mov     r3=ip                   }
+{ .mmi;        ADDP    in0=0,in0
+       ADDP    out11=KSZ*60,in2                // &AES_KEY->rounds
+       .save   ar.lc,lcsave
+       mov     lcsave=ar.lc            };;
 
-       .body
-{ .mmi;        ld8     out8=[out8]                     // Te0
-       ld4     out11=[out11]                   // AES_KEY->rounds
+{ .mmi;        ld4     out11=[out11]                   // AES_KEY->rounds
+       add     out8=(AES_Te#-AES_encrypt#),r3  // Te0
+       .save   pr,prsave
        mov     prsave=pr               }
 
+       .body
 #if defined(_HPUX_SOURCE)      // HPUX is big-endian, cut 15+15 cycles...
 { .mib; cmp.ne p6,p0=out0,r0
        add     out0=4,in0
@@ -215,8 +221,8 @@ AES_encrypt:
        ADDP    in1=0,in1
 (p6)   br.spnt .Le_o_unaligned         };;
 
-{ .mii;        mov     ar.pfs=r2
-       mov     ar.lc=r3                }
+{ .mii;        mov     ar.pfs=psfsave
+       mov     ar.lc=lcsave            }
 { .mmi;        st4     [in1]=r16,8             // s0
        st4     [in0]=r20,8             // s1
        mov     pr=prsave,0x1ffff       };;
@@ -299,10 +305,10 @@ AES_encrypt:
        mov     pr=prsave,0x1ffff       }//;;
 { .mmi;        st1     [out1]=r26,4
        st1     [out0]=r27,4
-       mov     ar.pfs=r2               };;
+       mov     ar.pfs=pfssave          };;
 { .mmi;        st1     [out3]=r28
        st1     [out2]=r29
-       mov     ar.lc=r3                }//;;
+       mov     ar.lc=lcsave            }//;;
 { .mmb;        st1     [out1]=r30
        st1     [out0]=r31
        br.ret.sptk.many        b0      };;
@@ -359,6 +365,9 @@ while(<>) {
 // Clobber:    r16-r31,rk0-rk1,r32-r43
 .align 32
 _ia64_AES_decrypt:
+       .prologue
+       .altrp  b6
+       .body
 { .mmi;        alloc   r16=ar.pfs,12,0,0,8
        LDKEY   t0=[rk0],2*KSZ
        mov     pr.rot=1<<16    }
@@ -471,20 +480,21 @@ _ia64_AES_decrypt:
 .skip  16
 AES_decrypt:
        .prologue
-       .save   ar.pfs,r2
-{ .mmi;        alloc   r2=ar.pfs,3,0,12,0
-       addl    out8=@ltoff(AES_Td#),gp
-       .save   ar.lc,r3
-       mov     r3=ar.lc                }
-{ .mmi;        and     out0=3,in0
-       ADDP    in0=0,in0
-       ADDP    out11=KSZ*60,in2        };;     // &AES_KEY->rounds
+       .save   ar.pfs,pfssave
+{ .mmi;        alloc   pfssave=ar.pfs,3,0,12,0
+       and     out0=3,in0
+       mov     r3=ip                   }
+{ .mmi;        ADDP    in0=0,in0
+       ADDP    out11=KSZ*60,in2                // &AES_KEY->rounds
+       .save   ar.lc,lcsave
+       mov     lcsave=ar.lc            };;
 
-       .body
-{ .mmi;        ld8     out8=[out8]                     // Te0
-       ld4     out11=[out11]                   // AES_KEY->rounds
+{ .mmi;        ld4     out11=[out11]                   // AES_KEY->rounds
+       add     out8=(AES_Td#-AES_decrypt#),r3  // Td0
+       .save   pr,prsave
        mov     prsave=pr               }
 
+       .body
 #if defined(_HPUX_SOURCE)      // HPUX is big-endian, cut 15+15 cycles...
 { .mib; cmp.ne p6,p0=out0,r0
        add     out0=4,in0
@@ -507,8 +517,8 @@ AES_decrypt:
        ADDP    in1=0,in1
 (p6)   br.spnt .Ld_o_unaligned         };;
 
-{ .mii;        mov     ar.pfs=r2
-       mov     ar.lc=r3                }
+{ .mii;        mov     ar.pfs=pfssave
+       mov     ar.lc=lcsave            }
 { .mmi;        st4     [in1]=r16,8             // s0
        st4     [in0]=r20,8             // s1
        mov     pr=prsave,0x1ffff       };;
@@ -591,10 +601,10 @@ AES_decrypt:
        mov     pr=prsave,0x1ffff       }//;;
 { .mmi;        st1     [out1]=r26,4
        st1     [out0]=r27,4
-       mov     ar.pfs=r2               };;
+       mov     ar.pfs=pfssave          };;
 { .mmi;        st1     [out3]=r28
        st1     [out2]=r29
-       mov     ar.lc=r3                }//;;
+       mov     ar.lc=lcsave            }//;;
 { .mmb;        st1     [out1]=r30
        st1     [out0]=r31
        br.ret.sptk.many        b0      };;
index 9de9174..628e33e 100755 (executable)
@@ -110,6 +110,8 @@ $code=<<___;
 .explicit
 .text
 
+pfssave=r2;
+lcsave=r3;
 prsave=r14;
 K=r15;
 A=r16; B=r17;  C=r18;  D=r19;
@@ -128,20 +130,17 @@ sgm0=r50; sgm1=r51;       // small constants
 .align 32
 $func:
        .prologue
-       .save   ar.pfs,r2
-{ .mmi;        alloc   r2=ar.pfs,3,17,0,16
+       .save   ar.pfs,pfssave
+{ .mmi;        alloc   pfssave=ar.pfs,3,17,0,16
        $ADDP   ctx=0,r32               // 1st arg
-       .save   ar.lc,r3
-       mov     r3=ar.lc        }
+       .save   ar.lc,lcsave
+       mov     lcsave=ar.lc    }
 { .mmi;        $ADDP   input=0,r33             // 2nd arg
-       addl    Ktbl=\@ltoff($TABLE#),gp
+       mov     num=r34                 // 3rd arg
        .save   pr,prsave
        mov     prsave=pr       };;
 
        .body
-{ .mii;        ld8     Ktbl=[Ktbl]
-       mov     num=r34         };;     // 3rd arg
-
 { .mib;        add     r8=0*$SZ,ctx
        add     r9=1*$SZ,ctx
        brp.loop.imp    .L_first16,.L_first16_ctop
@@ -151,20 +150,23 @@ $func:
        brp.loop.imp    .L_rest,.L_rest_ctop
                                };;
 // load A-H
+.Lpic_point:
 { .mmi;        $LDW    A=[r8],4*$SZ
        $LDW    B=[r9],4*$SZ
-       mov     sgm0=$sigma0[2] }
+       mov     Ktbl=ip         }
 { .mmi;        $LDW    C=[r10],4*$SZ
        $LDW    D=[r11],4*$SZ
-       mov     sgm1=$sigma1[2] };;
+       mov     sgm0=$sigma0[2] };;
 { .mmi;        $LDW    E=[r8]
-       $LDW    F=[r9]          }
+       $LDW    F=[r9]
+       add     Ktbl=($TABLE#-.Lpic_point),Ktbl         }
 { .mmi;        $LDW    G=[r10]
        $LDW    H=[r11]
        cmp.ne  p15,p14=0,r35   };;     // used in sha256_block
 
 .L_outer:
-{ .mii;        mov     ar.lc=15
+{ .mii;        mov     sgm1=$sigma1[2]
+       mov     ar.lc=15
        mov     ar.ec=1         };;
 .align 32
 .L_first16:
@@ -329,7 +331,7 @@ $code.=<<___;
 (p6)   add     Ktbl=-$SZ*$rounds,Ktbl  }
 { .mmi;        $LDW    r38=[r10],-4*$SZ
        $LDW    r39=[r11],-4*$SZ
-(p7)   mov     ar.lc=r3                };;
+(p7)   mov     ar.lc=lcsave            };;
 { .mmi;        add     A=A,r32
        add     B=B,r33
        add     C=C,r34                 }