aes/asm/vpaes-ppc.pl: comply with ABI.
authorAndy Polyakov <appro@openssl.org>
Wed, 4 Dec 2013 20:46:40 +0000 (21:46 +0100)
committerAndy Polyakov <appro@openssl.org>
Wed, 4 Dec 2013 20:46:40 +0000 (21:46 +0100)
crypto/aes/asm/vpaes-ppc.pl

index 7ee3f2a..122dfff 100644 (file)
@@ -44,7 +44,7 @@ if ($flavour =~ /64/) {
 } else { die "nonsense $flavour"; }
 
 $sp="r1";
-$FRAME=8*$SIZE_T;
+$FRAME=6*$SIZE_T+13*16;        # 13*16 is for v20-v31 offload
 
 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
@@ -296,10 +296,36 @@ Lenc_entry:
 .globl .vpaes_encrypt
 .align 5
 .vpaes_encrypt:
+       $STU    $sp,-$FRAME($sp)
+       li      r10,`15+6*$SIZE_T`
+       li      r11,`31+6*$SIZE_T`
        mflr    r6
        mfspr   r7, 256                 # save vrsave
+       stvx    v20,r10,$sp
+       addi    r10,r10,16
+       stvx    v21,r11,$sp
+       addi    r11,r11,16
+       stvx    v22,r10,$sp
+       addi    r10,r10,16
+       stvx    v23,r11,$sp
+       addi    r11,r11,16
+       stvx    v24,r10,$sp
+       addi    r10,r10,16
+       stvx    v25,r11,$sp
+       addi    r11,r11,16
+       stvx    v26,r10,$sp
+       addi    r10,r10,16
+       stvx    v27,r11,$sp
+       addi    r11,r11,16
+       stvx    v28,r10,$sp
+       addi    r10,r10,16
+       stvx    v29,r11,$sp
+       addi    r11,r11,16
+       stvx    v30,r10,$sp
+       stvx    v31,r11,$sp
+       lwz     r7,`$FRAME-4`($sp)      # save vrsave
        li      r0, -1
-       $PUSH   r6,$LRSAVE($sp)
+       $PUSH   r6,`$FRAME+$LRSAVE`($sp)
        mtspr   256, r0                 # preserve all AltiVec registers
 
        bl      _vpaes_encrypt_preheat
@@ -333,11 +359,36 @@ Lenc_entry:
        vsel    v1, $outhead, v1, $outmask
        stvx    v1, 0, $out
 
+       li      r10,`15+6*$SIZE_T`
+       li      r11,`31+6*$SIZE_T`
        mtlr    r6
        mtspr   256, r7                 # restore vrsave
+       lvx     v20,r10,$sp
+       addi    r10,r10,16
+       lvx     v21,r11,$sp
+       addi    r11,r11,16
+       lvx     v22,r10,$sp
+       addi    r10,r10,16
+       lvx     v23,r11,$sp
+       addi    r11,r11,16
+       lvx     v24,r10,$sp
+       addi    r10,r10,16
+       lvx     v25,r11,$sp
+       addi    r11,r11,16
+       lvx     v26,r10,$sp
+       addi    r10,r10,16
+       lvx     v27,r11,$sp
+       addi    r11,r11,16
+       lvx     v28,r10,$sp
+       addi    r10,r10,16
+       lvx     v29,r11,$sp
+       addi    r11,r11,16
+       lvx     v30,r10,$sp
+       lvx     v31,r11,$sp
+       addi    $sp,$sp,$FRAME
        blr
        .long   0
-       .byte   0,12,0x14,1,0,0,3,0
+       .byte   0,12,0x04,1,0x80,0,3,0
        .long   0
 .size  .vpaes_encrypt,.-.vpaes_encrypt
 
@@ -479,10 +530,36 @@ Ldec_entry:
 .globl .vpaes_decrypt
 .align 5
 .vpaes_decrypt:
+       $STU    $sp,-$FRAME($sp)
+       li      r10,`15+6*$SIZE_T`
+       li      r11,`31+6*$SIZE_T`
        mflr    r6
        mfspr   r7, 256                 # save vrsave
+       stvx    v20,r10,$sp
+       addi    r10,r10,16
+       stvx    v21,r11,$sp
+       addi    r11,r11,16
+       stvx    v22,r10,$sp
+       addi    r10,r10,16
+       stvx    v23,r11,$sp
+       addi    r11,r11,16
+       stvx    v24,r10,$sp
+       addi    r10,r10,16
+       stvx    v25,r11,$sp
+       addi    r11,r11,16
+       stvx    v26,r10,$sp
+       addi    r10,r10,16
+       stvx    v27,r11,$sp
+       addi    r11,r11,16
+       stvx    v28,r10,$sp
+       addi    r10,r10,16
+       stvx    v29,r11,$sp
+       addi    r11,r11,16
+       stvx    v30,r10,$sp
+       stvx    v31,r11,$sp
+       lwz     r7,`$FRAME-4`($sp)      # save vrsave
        li      r0, -1
-       $PUSH   r6,$LRSAVE($sp)
+       $PUSH   r6,`$FRAME+$LRSAVE`($sp)
        mtspr   256, r0                 # preserve all AltiVec registers
 
        bl      _vpaes_decrypt_preheat
@@ -516,23 +593,74 @@ Ldec_entry:
        vsel    v1, $outhead, v1, $outmask
        stvx    v1, 0, $out
 
+       li      r10,`15+6*$SIZE_T`
+       li      r11,`31+6*$SIZE_T`
        mtlr    r6
        mtspr   256, r7                 # restore vrsave
+       lvx     v20,r10,$sp
+       addi    r10,r10,16
+       lvx     v21,r11,$sp
+       addi    r11,r11,16
+       lvx     v22,r10,$sp
+       addi    r10,r10,16
+       lvx     v23,r11,$sp
+       addi    r11,r11,16
+       lvx     v24,r10,$sp
+       addi    r10,r10,16
+       lvx     v25,r11,$sp
+       addi    r11,r11,16
+       lvx     v26,r10,$sp
+       addi    r10,r10,16
+       lvx     v27,r11,$sp
+       addi    r11,r11,16
+       lvx     v28,r10,$sp
+       addi    r10,r10,16
+       lvx     v29,r11,$sp
+       addi    r11,r11,16
+       lvx     v30,r10,$sp
+       lvx     v31,r11,$sp
+       addi    $sp,$sp,$FRAME
        blr
        .long   0
-       .byte   0,12,0x14,1,0,0,3,0
+       .byte   0,12,0x04,1,0x80,0,3,0
        .long   0
 .size  .vpaes_decrypt,.-.vpaes_decrypt
 
 .globl .vpaes_cbc_encrypt
 .align 5
 .vpaes_cbc_encrypt:
-       $STU    $sp,-$FRAME($sp)
+       $STU    $sp,-`($FRAME+2*$SIZE_T)`($sp)
        mflr    r0
-       $PUSH   r30,$FRAME-$SIZE_T*2($sp)
+       li      r10,`15+6*$SIZE_T`
+       li      r11,`31+6*$SIZE_T`
+       mfspr   r12, 256
+       stvx    v20,r10,$sp
+       addi    r10,r10,16
+       stvx    v21,r11,$sp
+       addi    r11,r11,16
+       stvx    v22,r10,$sp
+       addi    r10,r10,16
+       stvx    v23,r11,$sp
+       addi    r11,r11,16
+       stvx    v24,r10,$sp
+       addi    r10,r10,16
+       stvx    v25,r11,$sp
+       addi    r11,r11,16
+       stvx    v26,r10,$sp
+       addi    r10,r10,16
+       stvx    v27,r11,$sp
+       addi    r11,r11,16
+       stvx    v28,r10,$sp
+       addi    r10,r10,16
+       stvx    v29,r11,$sp
+       addi    r11,r11,16
+       stvx    v30,r10,$sp
+       stvx    v31,r11,$sp
+       lwz     r12,`$FRAME-4`($sp)     # save vrsave
+       $PUSH   r30,`$FRAME+$SIZE_T*0`($sp)
+       $PUSH   r31,`$FRAME+$SIZE_T*1`($sp)
        li      r9, 16
-       $PUSH   r31,$FRAME-$SIZE_T*1($sp)
-       $PUSH   r0, $FRAME+$LRSAVE($sp)
+       $PUSH   r0, `$FRAME+$SIZE_T*2+$LRSAVE`($sp)
 
        sub.    r30, r5, r9             # copy length-16
        mr      r5, r6                  # copy pointer to key
@@ -540,7 +668,7 @@ Ldec_entry:
        blt     Lcbc_abort
        cmpwi   r8, 0                   # test direction
        li      r6, -1
-       mfspr   r7, 256
+       mr      r7, r12                 # copy vrsave
        mtspr   256, r6                 # preserve all AltiVec registers
 
        lvx     v24, 0, r31             # load [potentially unaligned] iv
@@ -629,12 +757,36 @@ Lcbc_done:
        stvx    v1, r6, r31
 
        mtspr   256, r7                 # restore vrsave
+       li      r10,`15+6*$SIZE_T`
+       li      r11,`31+6*$SIZE_T`
+       lvx     v20,r10,$sp
+       addi    r10,r10,16
+       lvx     v21,r11,$sp
+       addi    r11,r11,16
+       lvx     v22,r10,$sp
+       addi    r10,r10,16
+       lvx     v23,r11,$sp
+       addi    r11,r11,16
+       lvx     v24,r10,$sp
+       addi    r10,r10,16
+       lvx     v25,r11,$sp
+       addi    r11,r11,16
+       lvx     v26,r10,$sp
+       addi    r10,r10,16
+       lvx     v27,r11,$sp
+       addi    r11,r11,16
+       lvx     v28,r10,$sp
+       addi    r10,r10,16
+       lvx     v29,r11,$sp
+       addi    r11,r11,16
+       lvx     v30,r10,$sp
+       lvx     v31,r11,$sp
 Lcbc_abort:
-       $POP    r0, $FRAME+$LRSAVE($sp)
-       $POP    r30,$FRAME-$SIZE_T*2($sp)
-       $POP    r31,$FRAME-$SIZE_T*1($sp)
+       $POP    r0, `$FRAME+$SIZE_T*2+$LRSAVE`($sp)
+       $POP    r30,`$FRAME+$SIZE_T*0`($sp)
+       $POP    r31,`$FRAME+$SIZE_T*1`($sp)
        mtlr    r0
-       addi    $sp,$sp,$FRAME
+       addi    $sp,$sp,`$FRAME+$SIZE_T*2`
        blr
        .long   0
        .byte   0,12,0x04,1,0x80,2,6,0
@@ -1158,10 +1310,36 @@ Lschedule_mangle_dec:
 .globl .vpaes_set_encrypt_key
 .align 5
 .vpaes_set_encrypt_key:
+       $STU    $sp,-$FRAME($sp)
+       li      r10,`15+6*$SIZE_T`
+       li      r11,`31+6*$SIZE_T`
        mflr    r0
        mfspr   r6, 256                 # save vrsave
+       stvx    v20,r10,$sp
+       addi    r10,r10,16
+       stvx    v21,r11,$sp
+       addi    r11,r11,16
+       stvx    v22,r10,$sp
+       addi    r10,r10,16
+       stvx    v23,r11,$sp
+       addi    r11,r11,16
+       stvx    v24,r10,$sp
+       addi    r10,r10,16
+       stvx    v25,r11,$sp
+       addi    r11,r11,16
+       stvx    v26,r10,$sp
+       addi    r10,r10,16
+       stvx    v27,r11,$sp
+       addi    r11,r11,16
+       stvx    v28,r10,$sp
+       addi    r10,r10,16
+       stvx    v29,r11,$sp
+       addi    r11,r11,16
+       stvx    v30,r10,$sp
+       stvx    v31,r11,$sp
+       lwz     r6,`$FRAME-4`($sp)      # save vrsave
        li      r7, -1
-       $PUSH   r0, $LRSAVE($sp)
+       $PUSH   r0, `$FRAME+$LRSAVE`($sp)
        mtspr   256, r7                 # preserve all AltiVec registers
 
        srwi    r9, $bits, 5            # shr   \$5,%eax
@@ -1172,23 +1350,74 @@ Lschedule_mangle_dec:
        li      r8, 0x30                # mov   \$0x30,%r8d
        bl      _vpaes_schedule_core
 
-       $POP    r0, $LRSAVE($sp)
+       $POP    r0, `$FRAME+$LRSAVE`($sp)
+       li      r10,`15+6*$SIZE_T`
+       li      r11,`31+6*$SIZE_T`
        mtspr   256, r6                 # restore vrsave
        mtlr    r0
        xor     r3, r3, r3
+       lvx     v20,r10,$sp
+       addi    r10,r10,16
+       lvx     v21,r11,$sp
+       addi    r11,r11,16
+       lvx     v22,r10,$sp
+       addi    r10,r10,16
+       lvx     v23,r11,$sp
+       addi    r11,r11,16
+       lvx     v24,r10,$sp
+       addi    r10,r10,16
+       lvx     v25,r11,$sp
+       addi    r11,r11,16
+       lvx     v26,r10,$sp
+       addi    r10,r10,16
+       lvx     v27,r11,$sp
+       addi    r11,r11,16
+       lvx     v28,r10,$sp
+       addi    r10,r10,16
+       lvx     v29,r11,$sp
+       addi    r11,r11,16
+       lvx     v30,r10,$sp
+       lvx     v31,r11,$sp
+       addi    $sp,$sp,$FRAME
        blr
        .long   0
-       .byte   0,12,0x14,1,0,3,0
+       .byte   0,12,0x04,1,0x80,3,0
        .long   0
 .size  .vpaes_set_encrypt_key,.-.vpaes_set_encrypt_key
 
 .globl .vpaes_set_decrypt_key
 .align 4
 .vpaes_set_decrypt_key:
+       $STU    $sp,-$FRAME($sp)
+       li      r10,`15+6*$SIZE_T`
+       li      r11,`31+6*$SIZE_T`
        mflr    r0
        mfspr   r6, 256                 # save vrsave
+       stvx    v20,r10,$sp
+       addi    r10,r10,16
+       stvx    v21,r11,$sp
+       addi    r11,r11,16
+       stvx    v22,r10,$sp
+       addi    r10,r10,16
+       stvx    v23,r11,$sp
+       addi    r11,r11,16
+       stvx    v24,r10,$sp
+       addi    r10,r10,16
+       stvx    v25,r11,$sp
+       addi    r11,r11,16
+       stvx    v26,r10,$sp
+       addi    r10,r10,16
+       stvx    v27,r11,$sp
+       addi    r11,r11,16
+       stvx    v28,r10,$sp
+       addi    r10,r10,16
+       stvx    v29,r11,$sp
+       addi    r11,r11,16
+       stvx    v30,r10,$sp
+       stvx    v31,r11,$sp
+       lwz     r6,`$FRAME-4`($sp)      # save vrsave
        li      r7, -1
-       $PUSH   r0, $LRSAVE($sp)
+       $PUSH   r0, `$FRAME+$LRSAVE`($sp)
        mtspr   256, r7                 # preserve all AltiVec registers
 
        srwi    r9, $bits, 5            # shr   \$5,%eax
@@ -1204,18 +1433,45 @@ Lschedule_mangle_dec:
        xori    r8, r8, 32              # xor   \$32,%r8d       # nbits==192?0:32
        bl      _vpaes_schedule_core
 
-       $POP    r0,  $LRSAVE($sp)
+       $POP    r0,  `$FRAME+$LRSAVE`($sp)
+       li      r10,`15+6*$SIZE_T`
+       li      r11,`31+6*$SIZE_T`
        mtspr   256, r6                 # restore vrsave
        mtlr    r0
        xor     r3, r3, r3
+       lvx     v20,r10,$sp
+       addi    r10,r10,16
+       lvx     v21,r11,$sp
+       addi    r11,r11,16
+       lvx     v22,r10,$sp
+       addi    r10,r10,16
+       lvx     v23,r11,$sp
+       addi    r11,r11,16
+       lvx     v24,r10,$sp
+       addi    r10,r10,16
+       lvx     v25,r11,$sp
+       addi    r11,r11,16
+       lvx     v26,r10,$sp
+       addi    r10,r10,16
+       lvx     v27,r11,$sp
+       addi    r11,r11,16
+       lvx     v28,r10,$sp
+       addi    r10,r10,16
+       lvx     v29,r11,$sp
+       addi    r11,r11,16
+       lvx     v30,r10,$sp
+       lvx     v31,r11,$sp
+       addi    $sp,$sp,$FRAME
        blr
        .long   0
-       .byte   0,12,0x14,1,0,3,0
+       .byte   0,12,0x04,1,0x80,3,0
        .long   0
 .size  .vpaes_set_decrypt_key,.-.vpaes_set_decrypt_key
 ___
 }
 
+$code =~ s/\`([^\`]*)\`/eval($1)/gem;
+
 print $code;
 
 close STDOUT;