-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the Apache License 2.0 (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
-# Written by David S. Miller <davem@devemloft.net> and Andy Polyakov
-# <appro@openssl.org>. The module is licensed under 2-clause BSD
-# license. October 2012. All rights reserved.
+# Written by David S. Miller and Andy Polyakov.
+# The module is licensed under 2-clause BSD license. October 2012.
+# All rights reserved.
# ====================================================================
######################################################################
# instructions with those on critical path. Amazing!
#
# As with Intel AES-NI, question is if it's possible to improve
-# performance of parallelizeable modes by interleaving round
+# performance of parallelizable modes by interleaving round
# instructions. Provided round instruction latency and throughput
# optimal interleave factor is 2. But can we expect 2x performance
# improvement? Well, as round instructions can be issued one per
push(@INC,"${dir}","${dir}../../perlasm");
require "sparcv9_modes.pl";
-$bits=32;
-for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
-if ($bits==64) { $::bias=2047; $::frame=192; $::size_t_cc="%xcc"; }
-else { $::bias=0; $::frame=112; $::size_t_cc="%icc"; }
+$output = pop;
+open STDOUT,">$output";
$::evp=1; # if $evp is set to 0, script generates module with
# AES_[en|de]crypt, AES_set_[en|de]crypt_key and AES_cbc_encrypt entry
{
my ($inp,$out,$key,$rounds,$tmp,$mask)=map("%o$_",(0..5));
-$code=<<___;
+$code.=<<___;
+#include "sparc_arch.h"
+
+#ifdef __arch64__
+.register %g2,#scratch
+.register %g3,#scratch
+#endif
+
.text
.globl aes_t4_encrypt
my ($ileft,$iright,$ooff,$omask,$ivoff)=map("%l$_",(1..7));
$code.=<<___;
-.align 32
-_aes128_loadkey:
- ldx [$key + 0], %g4
- ldx [$key + 8], %g5
-___
-for ($i=2; $i<22;$i++) { # load key schedule
- $code.=<<___;
- ldd [$key + `8*$i`], %f`12+2*$i`
-___
-}
-$code.=<<___;
- retl
- nop
-.type _aes128_loadkey,#function
-.size _aes128_loadkey,.-_aes128_loadkey
-_aes128_load_enckey=_aes128_loadkey
-_aes128_load_deckey=_aes128_loadkey
-
.align 32
_aes128_encrypt_1x:
___
.type _aes128_encrypt_2x,#function
.size _aes128_encrypt_2x,.-_aes128_encrypt_2x
+.align 32
+_aes128_loadkey:
+ ldx [$key + 0], %g4
+ ldx [$key + 8], %g5
+___
+for ($i=2; $i<22;$i++) { # load key schedule
+ $code.=<<___;
+ ldd [$key + `8*$i`], %f`12+2*$i`
+___
+}
+$code.=<<___;
+ retl
+ nop
+.type _aes128_loadkey,#function
+.size _aes128_loadkey,.-_aes128_loadkey
+_aes128_load_enckey=_aes128_loadkey
+_aes128_load_deckey=_aes128_loadkey
+
+___
+
+&alg_cbc_encrypt_implement("aes",128);
+if ($::evp) {
+ &alg_ctr32_implement("aes",128);
+ &alg_xts_implement("aes",128,"en");
+ &alg_xts_implement("aes",128,"de");
+}
+&alg_cbc_decrypt_implement("aes",128);
+
+$code.=<<___;
.align 32
_aes128_decrypt_1x:
___
aes_dround23_l %f54, %f10, %f6, %f6
.type _aes128_decrypt_2x,#function
.size _aes128_decrypt_2x,.-_aes128_decrypt_2x
-
-.align 32
-_aes192_loadkey:
-_aes256_loadkey:
- ldx [$key + 0], %g4
- ldx [$key + 8], %g5
-___
-for ($i=2; $i<26;$i++) { # load key schedule
- $code.=<<___;
- ldd [$key + `8*$i`], %f`12+2*$i`
___
-}
-$code.=<<___;
- retl
- nop
-.type _aes192_loadkey,#function
-.size _aes192_loadkey,.-_aes192_loadkey
-_aes192_load_enckey=_aes192_loadkey
-_aes192_load_deckey=_aes192_loadkey
-_aes256_load_enckey=_aes192_loadkey
-_aes256_load_deckey=_aes192_loadkey
+$code.=<<___;
.align 32
_aes192_encrypt_1x:
___
.type _aes192_encrypt_2x,#function
.size _aes192_encrypt_2x,.-_aes192_encrypt_2x
-.align 32
-_aes192_decrypt_1x:
-___
-for ($i=0; $i<5; $i++) {
- $code.=<<___;
- aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f4
- aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2
- aes_dround01 %f`16+8*$i+4`, %f4, %f2, %f0
- aes_dround23 %f`16+8*$i+6`, %f4, %f2, %f2
-___
-}
-$code.=<<___;
- aes_dround01 %f56, %f0, %f2, %f4
- aes_dround23 %f58, %f0, %f2, %f2
- aes_dround01_l %f60, %f4, %f2, %f0
- retl
- aes_dround23_l %f62, %f4, %f2, %f2
-.type _aes192_decrypt_1x,#function
-.size _aes192_decrypt_1x,.-_aes192_decrypt_1x
-
-.align 32
-_aes192_decrypt_2x:
-___
-for ($i=0; $i<5; $i++) {
- $code.=<<___;
- aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f8
- aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2
- aes_dround01 %f`16+8*$i+0`, %f4, %f6, %f10
- aes_dround23 %f`16+8*$i+2`, %f4, %f6, %f6
- aes_dround01 %f`16+8*$i+4`, %f8, %f2, %f0
- aes_dround23 %f`16+8*$i+6`, %f8, %f2, %f2
- aes_dround01 %f`16+8*$i+4`, %f10, %f6, %f4
- aes_dround23 %f`16+8*$i+6`, %f10, %f6, %f6
-___
-}
-$code.=<<___;
- aes_dround01 %f56, %f0, %f2, %f8
- aes_dround23 %f58, %f0, %f2, %f2
- aes_dround01 %f56, %f4, %f6, %f10
- aes_dround23 %f58, %f4, %f6, %f6
- aes_dround01_l %f60, %f8, %f2, %f0
- aes_dround23_l %f62, %f8, %f2, %f2
- aes_dround01_l %f60, %f10, %f6, %f4
- retl
- aes_dround23_l %f62, %f10, %f6, %f6
-.type _aes192_decrypt_2x,#function
-.size _aes192_decrypt_2x,.-_aes192_decrypt_2x
-
.align 32
_aes256_encrypt_1x:
aes_eround01 %f16, %f0, %f2, %f4
.type _aes256_encrypt_2x,#function
.size _aes256_encrypt_2x,.-_aes256_encrypt_2x
+.align 32
+_aes192_loadkey:
+ ldx [$key + 0], %g4
+ ldx [$key + 8], %g5
+___
+for ($i=2; $i<26;$i++) { # load key schedule
+ $code.=<<___;
+ ldd [$key + `8*$i`], %f`12+2*$i`
+___
+}
+$code.=<<___;
+ retl
+ nop
+.type _aes192_loadkey,#function
+.size _aes192_loadkey,.-_aes192_loadkey
+_aes256_loadkey=_aes192_loadkey
+_aes192_load_enckey=_aes192_loadkey
+_aes192_load_deckey=_aes192_loadkey
+_aes256_load_enckey=_aes192_loadkey
+_aes256_load_deckey=_aes192_loadkey
+___
+
+&alg_cbc_encrypt_implement("aes",256);
+&alg_cbc_encrypt_implement("aes",192);
+if ($::evp) {
+ &alg_ctr32_implement("aes",256);
+ &alg_xts_implement("aes",256,"en");
+ &alg_xts_implement("aes",256,"de");
+ &alg_ctr32_implement("aes",192);
+}
+&alg_cbc_decrypt_implement("aes",192);
+&alg_cbc_decrypt_implement("aes",256);
+
+$code.=<<___;
.align 32
_aes256_decrypt_1x:
aes_dround01 %f16, %f0, %f2, %f4
ldd [$key + 40], %f22
.type _aes256_decrypt_2x,#function
.size _aes256_decrypt_2x,.-_aes256_decrypt_2x
-___
-&alg_cbc_encrypt_implement("aes",128);
-&alg_cbc_encrypt_implement("aes",192);
-&alg_cbc_encrypt_implement("aes",256);
-
-&alg_cbc_decrypt_implement("aes",128);
-&alg_cbc_decrypt_implement("aes",192);
-&alg_cbc_decrypt_implement("aes",256);
+.align 32
+_aes192_decrypt_1x:
+___
+for ($i=0; $i<5; $i++) {
+ $code.=<<___;
+ aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f4
+ aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2
+ aes_dround01 %f`16+8*$i+4`, %f4, %f2, %f0
+ aes_dround23 %f`16+8*$i+6`, %f4, %f2, %f2
+___
+}
+$code.=<<___;
+ aes_dround01 %f56, %f0, %f2, %f4
+ aes_dround23 %f58, %f0, %f2, %f2
+ aes_dround01_l %f60, %f4, %f2, %f0
+ retl
+ aes_dround23_l %f62, %f4, %f2, %f2
+.type _aes192_decrypt_1x,#function
+.size _aes192_decrypt_1x,.-_aes192_decrypt_1x
-if ($::evp) {
- &alg_ctr32_implement("aes",128);
- &alg_ctr32_implement("aes",192);
- &alg_ctr32_implement("aes",256);
+.align 32
+_aes192_decrypt_2x:
+___
+for ($i=0; $i<5; $i++) {
+ $code.=<<___;
+ aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f8
+ aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2
+ aes_dround01 %f`16+8*$i+0`, %f4, %f6, %f10
+ aes_dround23 %f`16+8*$i+2`, %f4, %f6, %f6
+ aes_dround01 %f`16+8*$i+4`, %f8, %f2, %f0
+ aes_dround23 %f`16+8*$i+6`, %f8, %f2, %f2
+ aes_dround01 %f`16+8*$i+4`, %f10, %f6, %f4
+ aes_dround23 %f`16+8*$i+6`, %f10, %f6, %f6
+___
}
+$code.=<<___;
+ aes_dround01 %f56, %f0, %f2, %f8
+ aes_dround23 %f58, %f0, %f2, %f2
+ aes_dround01 %f56, %f4, %f6, %f10
+ aes_dround23 %f58, %f4, %f6, %f6
+ aes_dround01_l %f60, %f8, %f2, %f0
+ aes_dround23_l %f62, %f8, %f2, %f2
+ aes_dround01_l %f60, %f10, %f6, %f4
+ retl
+ aes_dround23_l %f62, %f10, %f6, %f6
+.type _aes192_decrypt_2x,#function
+.size _aes192_decrypt_2x,.-_aes192_decrypt_2x
+___
}}}
if (!$::evp) {