-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the Apache License 2.0 (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
# February 2010
#
# Rescheduling instructions to favour Power6 pipeline gave 10%
-# performance improvement on the platfrom in question (and marginal
+# performance improvement on the platform in question (and marginal
# improvement even on others). It should be noted that Power6 fails
# to process byte in 18 cycles, only in 23, because it fails to issue
# 4 load instructions in two cycles, only in 3. As result non-compact
$PUSH ="stw";
} else { die "nonsense $flavour"; }
+$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
+
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
bne Lenc_unaligned
Lenc_unaligned_ok:
+___
+$code.=<<___ if (!$LITTLE_ENDIAN);
lwz $s0,0($inp)
lwz $s1,4($inp)
lwz $s2,8($inp)
lwz $s3,12($inp)
+___
+$code.=<<___ if ($LITTLE_ENDIAN);
+ lwz $t0,0($inp)
+ lwz $t1,4($inp)
+ lwz $t2,8($inp)
+ lwz $t3,12($inp)
+ rotlwi $s0,$t0,8
+ rotlwi $s1,$t1,8
+ rotlwi $s2,$t2,8
+ rotlwi $s3,$t3,8
+ rlwimi $s0,$t0,24,0,7
+ rlwimi $s1,$t1,24,0,7
+ rlwimi $s2,$t2,24,0,7
+ rlwimi $s3,$t3,24,0,7
+ rlwimi $s0,$t0,24,16,23
+ rlwimi $s1,$t1,24,16,23
+ rlwimi $s2,$t2,24,16,23
+ rlwimi $s3,$t3,24,16,23
+___
+$code.=<<___;
bl LAES_Te
bl Lppc_AES_encrypt_compact
$POP $out,`$FRAME-$SIZE_T*19`($sp)
+___
+$code.=<<___ if ($LITTLE_ENDIAN);
+ rotlwi $t0,$s0,8
+ rotlwi $t1,$s1,8
+ rotlwi $t2,$s2,8
+ rotlwi $t3,$s3,8
+ rlwimi $t0,$s0,24,0,7
+ rlwimi $t1,$s1,24,0,7
+ rlwimi $t2,$s2,24,0,7
+ rlwimi $t3,$s3,24,0,7
+ rlwimi $t0,$s0,24,16,23
+ rlwimi $t1,$s1,24,16,23
+ rlwimi $t2,$s2,24,16,23
+ rlwimi $t3,$s3,24,16,23
+ stw $t0,0($out)
+ stw $t1,4($out)
+ stw $t2,8($out)
+ stw $t3,12($out)
+___
+$code.=<<___ if (!$LITTLE_ENDIAN);
stw $s0,0($out)
stw $s1,4($out)
stw $s2,8($out)
stw $s3,12($out)
+___
+$code.=<<___;
b Lenc_done
Lenc_unaligned:
.long 0
.byte 0,12,4,1,0x80,18,3,0
.long 0
-.size .AES_encrypt,.-.AES_encrypt
.align 5
Lppc_AES_encrypt:
xor $s2,$t2,$acc14
xor $s3,$t3,$acc15
addi $key,$key,16
- bdnz- Lenc_loop
+ bdnz Lenc_loop
addi $Tbl2,$Tbl0,2048
nop
blr
.long 0
.byte 0,12,0x14,0,0,0,0,0
+.size .AES_encrypt,.-.AES_encrypt
.globl .AES_decrypt
.align 7
bne Ldec_unaligned
Ldec_unaligned_ok:
+___
+$code.=<<___ if (!$LITTLE_ENDIAN);
lwz $s0,0($inp)
lwz $s1,4($inp)
lwz $s2,8($inp)
lwz $s3,12($inp)
+___
+$code.=<<___ if ($LITTLE_ENDIAN);
+ lwz $t0,0($inp)
+ lwz $t1,4($inp)
+ lwz $t2,8($inp)
+ lwz $t3,12($inp)
+ rotlwi $s0,$t0,8
+ rotlwi $s1,$t1,8
+ rotlwi $s2,$t2,8
+ rotlwi $s3,$t3,8
+ rlwimi $s0,$t0,24,0,7
+ rlwimi $s1,$t1,24,0,7
+ rlwimi $s2,$t2,24,0,7
+ rlwimi $s3,$t3,24,0,7
+ rlwimi $s0,$t0,24,16,23
+ rlwimi $s1,$t1,24,16,23
+ rlwimi $s2,$t2,24,16,23
+ rlwimi $s3,$t3,24,16,23
+___
+$code.=<<___;
bl LAES_Td
bl Lppc_AES_decrypt_compact
$POP $out,`$FRAME-$SIZE_T*19`($sp)
+___
+$code.=<<___ if ($LITTLE_ENDIAN);
+ rotlwi $t0,$s0,8
+ rotlwi $t1,$s1,8
+ rotlwi $t2,$s2,8
+ rotlwi $t3,$s3,8
+ rlwimi $t0,$s0,24,0,7
+ rlwimi $t1,$s1,24,0,7
+ rlwimi $t2,$s2,24,0,7
+ rlwimi $t3,$s3,24,0,7
+ rlwimi $t0,$s0,24,16,23
+ rlwimi $t1,$s1,24,16,23
+ rlwimi $t2,$s2,24,16,23
+ rlwimi $t3,$s3,24,16,23
+ stw $t0,0($out)
+ stw $t1,4($out)
+ stw $t2,8($out)
+ stw $t3,12($out)
+___
+$code.=<<___ if (!$LITTLE_ENDIAN);
stw $s0,0($out)
stw $s1,4($out)
stw $s2,8($out)
stw $s3,12($out)
+___
+$code.=<<___;
b Ldec_done
Ldec_unaligned:
.long 0
.byte 0,12,4,1,0x80,18,3,0
.long 0
-.size .AES_decrypt,.-.AES_decrypt
.align 5
Lppc_AES_decrypt:
xor $s2,$t2,$acc14
xor $s3,$t3,$acc15
addi $key,$key,16
- bdnz- Ldec_loop
+ bdnz Ldec_loop
addi $Tbl2,$Tbl0,2048
nop
xor $s1,$s1,$acc05
xor $s2,$s2,$acc06
xor $s3,$s3,$acc07
- xor $s0,$s0,$acc08 # ^= ROTATE(r8,8)
- xor $s1,$s1,$acc09
- xor $s2,$s2,$acc10
- xor $s3,$s3,$acc11
+ xor $s0,$s0,$acc08 # ^= ROTATE(r8,8)
+ xor $s1,$s1,$acc09
+ xor $s2,$s2,$acc10
+ xor $s3,$s3,$acc11
b Ldec_compact_loop
.align 4
blr
.long 0
.byte 0,12,0x14,0,0,0,0,0
+.size .AES_decrypt,.-.AES_decrypt
.asciz "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
.align 7