-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
# minimize/avoid Address Generation Interlock hazard and to favour
# dual-issue z10 pipeline. This gave ~25% improvement on z10 and
# almost 50% on z9. The gain is smaller on z10, because being dual-
-# issue z10 makes it improssible to eliminate the interlock condition:
+# issue z10 makes it impossible to eliminate the interlock condition:
# critial path is not long enough. Yet it spends ~24 cycles per byte
# processed with 128-bit key.
#
$g="g";
}
-while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
+while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
open STDOUT,">$output";
$softonly=0; # allow hardware support
}
$code=<<___;
+#include "s390x_arch.h"
+
.text
.type AES_Te,\@object
or $s1,$t1
or $t2,$i2
or $t3,$i3
-
+
srlg $i1,$s2,`8-3` # i0
srlg $i2,$s2,`16-3` # i1
nr $i1,$mask
x $s2,24($key)
x $s3,28($key)
- br $ra
+ br $ra
.size _s390x_AES_encrypt,.-_s390x_AES_encrypt
___
x $s2,24($key)
x $s3,28($key)
- br $ra
+ br $ra
.size _s390x_AES_decrypt,.-_s390x_AES_decrypt
___
.Lproceed:
___
$code.=<<___ if (!$softonly);
- # convert bits to km code, [128,192,256]->[18,19,20]
+ # convert bits to km(c) code, [128,192,256]->[18,19,20]
lhi %r5,-128
lhi %r0,18
ar %r5,$bits
ar %r5,%r0
larl %r1,OPENSSL_s390xcap_P
- lg %r0,0(%r1)
- tmhl %r0,0x4000 # check for message-security assist
- jz .Lekey_internal
-
- lghi %r0,0 # query capability vector
- la %r1,16($sp)
- .long 0xb92f0042 # kmc %r4,%r2
-
- llihh %r1,0x8000
- srlg %r1,%r1,0(%r5)
- ng %r1,16($sp)
+ llihh %r0,0x8000
+ srlg %r0,%r0,0(%r5)
+ ng %r0,S390X_KM(%r1) # check availability of both km...
+ ng %r0,S390X_KMC(%r1) # ...and kmc support for given key length
jz .Lekey_internal
lmg %r0,%r1,0($inp) # just copy 128 bits...
stg %r1,24($key)
1: st $bits,236($key) # save bits [for debugging purposes]
lgr $t0,%r5
- st %r5,240($key) # save km code
+ st %r5,240($key) # save km(c) code
lghi %r2,0
br %r14
___
lhi $t1,16
cr $t0,$t1
jl .Lgo
- oill $t0,0x80 # set "decrypt" bit
+ oill $t0,S390X_DECRYPT # set "decrypt" bit
st $t0,240($key)
br $ra
___
.align 16
.Lkmc_truncated:
ahi $key,-1 # it's the way it's encoded in mvc
- tmll %r0,0x80
+ tmll %r0,S390X_DECRYPT
jnz .Lkmc_truncated_dec
lghi %r1,0
stg %r1,16*$SIZE_T($sp)
.Lcbc_enc_done:
l${g} $ivp,6*$SIZE_T($sp)
st $s0,0($ivp)
- st $s1,4($ivp)
+ st $s1,4($ivp)
st $s2,8($ivp)
st $s3,12($ivp)
clr %r0,%r1
jl .Lctr32_software
- stm${g} %r6,$s3,6*$SIZE_T($sp)
+ st${g} $s2,10*$SIZE_T($sp)
+ st${g} $s3,11*$SIZE_T($sp)
+
+ clr $len,%r1 # does work even in 64-bit mode
+ jle .Lctr32_nokma # kma is slower for <= 16 blocks
+
+ larl %r1,OPENSSL_s390xcap_P
+ lr $s2,%r0
+ llihh $s3,0x8000
+ srlg $s3,$s3,0($s2)
+ ng $s3,S390X_KMA(%r1) # check kma capability vector
+ jz .Lctr32_nokma
+
+ l${g}hi %r1,-$stdframe-112
+ l${g}r $s3,$sp
+ la $sp,0(%r1,$sp) # prepare parameter block
+
+ lhi %r1,0x0600
+ sllg $len,$len,4
+ or %r0,%r1 # set HS and LAAD flags
+
+ st${g} $s3,0($sp) # backchain
+ la %r1,$stdframe($sp)
+
+ lmg $s2,$s3,0($key) # copy key
+ stg $s2,$stdframe+80($sp)
+ stg $s3,$stdframe+88($sp)
+ lmg $s2,$s3,16($key)
+ stg $s2,$stdframe+96($sp)
+ stg $s3,$stdframe+104($sp)
+
+ lmg $s2,$s3,0($ivp) # copy iv
+ stg $s2,$stdframe+64($sp)
+ ahi $s3,-1 # kma requires counter-1
+ stg $s3,$stdframe+72($sp)
+ st $s3,$stdframe+12($sp) # copy counter
+
+ lghi $s2,0 # no AAD
+ lghi $s3,0
+
+ .long 0xb929a042 # kma $out,$s2,$inp
+ brc 1,.-4 # pay attention to "partial completion"
+
+ stg %r0,$stdframe+80($sp) # wipe key
+ stg %r0,$stdframe+88($sp)
+ stg %r0,$stdframe+96($sp)
+ stg %r0,$stdframe+104($sp)
+ la $sp,$stdframe+112($sp)
+
+ lm${g} $s2,$s3,10*$SIZE_T($sp)
+ br $ra
+
+.align 16
+.Lctr32_nokma:
+ stm${g} %r6,$s1,6*$SIZE_T($sp)
slgr $out,$inp
la %r1,0($key) # %r1 is permanent copy of $key
.Lctr32_hw_switch:
___
-$code.=<<___ if (0); ######### kmctr code was measured to be ~12% slower
- larl $s0,OPENSSL_s390xcap_P
- lg $s0,8($s0)
- tmhh $s0,0x0004 # check for message_security-assist-4
- jz .Lctr32_km_loop
-
+$code.=<<___ if (!$softonly && 0);# kmctr code was measured to be ~12% slower
llgfr $s0,%r0
lgr $s1,%r1
- lghi %r0,0
- la %r1,16($sp)
- .long 0xb92d2042 # kmctr %r4,%r2,%r2
-
+ larl %r1,OPENSSL_s390xcap_P
llihh %r0,0x8000 # check if kmctr supports the function code
srlg %r0,%r0,0($s0)
- ng %r0,16($sp)
+ ng %r0,S390X_KMCTR(%r1) # check kmctr capability vector
lgr %r0,$s0
lgr %r1,$s1
jz .Lctr32_km_loop
br $ra
.align 16
___
-$code.=<<___;
+$code.=<<___ if (!$softonly);
.Lctr32_km_loop:
la $s2,16($sp)
lgr $s3,$fp
}
########################################################################
-# void AES_xts_encrypt(const char *inp,char *out,size_t len,
-# const AES_KEY *key1, const AES_KEY *key2,
+# void AES_xts_encrypt(const unsigned char *inp, unsigned char *out,
+# size_t len, const AES_KEY *key1, const AES_KEY *key2,
# const unsigned char iv[16]);
#
{
llgfr $s0,%r0 # put aside the function code
lghi $s1,0x7f
nr $s1,%r0
- lghi %r0,0 # query capability vector
- la %r1,2*$SIZE_T($sp)
- .long 0xb92e0042 # km %r4,%r2
- llihh %r1,0x8000
- srlg %r1,%r1,32($s1) # check for 32+function code
- ng %r1,2*$SIZE_T($sp)
+ larl %r1,OPENSSL_s390xcap_P
+ llihh %r0,0x8000
+ srlg %r0,%r0,32($s1) # check for 32+function code
+ ng %r0,S390X_KM(%r1) # check km capability vector
lgr %r0,$s0 # restore the function code
la %r1,0($key1) # restore $key1
jz .Lxts_km_vanilla
lrvg $s0,$tweak+0($sp) # load the last tweak
lrvg $s1,$tweak+8($sp)
- stmg %r0,%r3,$tweak-32(%r1) # wipe copy of the key
+ stmg %r0,%r3,$tweak-32($sp) # wipe copy of the key
nill %r0,0xffdf # switch back to original function code
la %r1,0($key1) # restore pointer to $key1
llgc $len,2*$SIZE_T-1($sp)
nill $len,0x0f # $len%=16
br $ra
-
+
.align 16
.Lxts_km_vanilla:
___
lghi $i1,0x87
srag $i2,$s1,63 # broadcast upper bit
ngr $i1,$i2 # rem
- srlg $i2,$s0,63 # carry bit from lower half
- sllg $s0,$s0,1
- sllg $s1,$s1,1
+ algr $s0,$s0
+ alcgr $s1,$s1
xgr $s0,$i1
- ogr $s1,$i2
.Lxts_km_start:
lrvgr $i1,$s0 # flip byte order
lrvgr $i2,$s1
lghi $i1,0x87
srag $i2,$s1,63 # broadcast upper bit
ngr $i1,$i2 # rem
- srlg $i2,$s0,63 # carry bit from lower half
- sllg $s0,$s0,1
- sllg $s1,$s1,1
+ algr $s0,$s0
+ alcgr $s1,$s1
xgr $s0,$i1
- ogr $s1,$i2
ltr $len,$len # clear zero flag
br $ra
clr %r0,%r1
jl .Lxts_enc_software
+ st${g} $ra,5*$SIZE_T($sp)
stm${g} %r6,$s3,6*$SIZE_T($sp)
- st${g} $ra,14*$SIZE_T($sp)
sllg $len,$len,4 # $len&=~15
slgr $out,$inp
stg $i2,8($i3)
.Lxts_enc_km_done:
- l${g} $ra,14*$SIZE_T($sp)
- st${g} $sp,$tweak($sp) # wipe tweak
- st${g} $sp,$tweak($sp)
+ stg $sp,$tweak+0($sp) # wipe tweak
+ stg $sp,$tweak+8($sp)
+ l${g} $ra,5*$SIZE_T($sp)
lm${g} %r6,$s3,6*$SIZE_T($sp)
br $ra
.align 16
slgr $out,$inp
- xgr $s0,$s0 # clear upper half
- xgr $s1,$s1
- lrv $s0,$stdframe+4($sp) # load secno
- lrv $s1,$stdframe+0($sp)
- xgr $s2,$s2
- xgr $s3,$s3
+ l${g} $s3,$stdframe($sp) # ivp
+ llgf $s0,0($s3) # load iv
+ llgf $s1,4($s3)
+ llgf $s2,8($s3)
+ llgf $s3,12($s3)
stm${g} %r2,%r5,2*$SIZE_T($sp)
la $key,0($key2)
larl $tbl,AES_Te
lghi %r1,0x87
srag %r0,$s3,63 # broadcast upper bit
ngr %r1,%r0 # rem
- srlg %r0,$s1,63 # carry bit from lower half
- sllg $s1,$s1,1
- sllg $s3,$s3,1
+ algr $s1,$s1
+ alcgr $s3,$s3
xgr $s1,%r1
- ogr $s3,%r0
lrvgr $s1,$s1 # flip byte order
lrvgr $s3,$s3
- srlg $s0,$s1,32 # smash the tweak to 4x32-bits
+ srlg $s0,$s1,32 # smash the tweak to 4x32-bits
stg $s1,$tweak+0($sp) # save the tweak
llgfr $s1,$s1
srlg $s2,$s3,32
lghi %r1,0x87
srag %r0,$s3,63 # broadcast upper bit
ngr %r1,%r0 # rem
- srlg %r0,$s1,63 # carry bit from lower half
- sllg $s1,$s1,1
- sllg $s3,$s3,1
+ algr $s1,$s1
+ alcgr $s3,$s3
xgr $s1,%r1
- ogr $s3,%r0
lrvgr $s1,$s1 # flip byte order
lrvgr $s3,$s3
- srlg $s0,$s1,32 # smash the tweak to 4x32-bits
+ srlg $s0,$s1,32 # smash the tweak to 4x32-bits
stg $s1,$tweak+0($sp) # save the tweak
llgfr $s1,$s1
srlg $s2,$s3,32
br $ra
.size AES_xts_encrypt,.-AES_xts_encrypt
___
-# void AES_xts_decrypt(const char *inp,char *out,size_t len,
-# const AES_KEY *key1, const AES_KEY *key2,u64 secno);
+# void AES_xts_decrypt(const unsigned char *inp, unsigned char *out,
+# size_t len, const AES_KEY *key1, const AES_KEY *key2,
+# const unsigned char iv[16]);
#
$code.=<<___;
.globl AES_xts_decrypt
clr %r0,%r1
jl .Lxts_dec_software
+ st${g} $ra,5*$SIZE_T($sp)
stm${g} %r6,$s3,6*$SIZE_T($sp)
- st${g} $ra,14*$SIZE_T($sp)
nill $len,0xfff0 # $len&=~15
slgr $out,$inp
lghi $i1,0x87
srag $i2,$s1,63 # broadcast upper bit
ngr $i1,$i2 # rem
- srlg $i2,$s0,63 # carry bit from lower half
- sllg $s0,$s0,1
- sllg $s1,$s1,1
+ algr $s0,$s0
+ alcgr $s1,$s1
xgr $s0,$i1
- ogr $s1,$i2
lrvgr $i1,$s0 # flip byte order
lrvgr $i2,$s1
stg $s2,0($i3)
stg $s3,8($i3)
.Lxts_dec_km_done:
- l${g} $ra,14*$SIZE_T($sp)
- st${g} $sp,$tweak($sp) # wipe tweak
- st${g} $sp,$tweak($sp)
+ stg $sp,$tweak+0($sp) # wipe tweak
+ stg $sp,$tweak+8($sp)
+ l${g} $ra,5*$SIZE_T($sp)
lm${g} %r6,$s3,6*$SIZE_T($sp)
br $ra
.align 16
srlg $len,$len,4
slgr $out,$inp
- xgr $s0,$s0 # clear upper half
- xgr $s1,$s1
- lrv $s0,$stdframe+4($sp) # load secno
- lrv $s1,$stdframe+0($sp)
- xgr $s2,$s2
- xgr $s3,$s3
+ l${g} $s3,$stdframe($sp) # ivp
+ llgf $s0,0($s3) # load iv
+ llgf $s1,4($s3)
+ llgf $s2,8($s3)
+ llgf $s3,12($s3)
stm${g} %r2,%r5,2*$SIZE_T($sp)
la $key,0($key2)
larl $tbl,AES_Te
lghi %r1,0x87
srag %r0,$s3,63 # broadcast upper bit
ngr %r1,%r0 # rem
- srlg %r0,$s1,63 # carry bit from lower half
- sllg $s1,$s1,1
- sllg $s3,$s3,1
+ algr $s1,$s1
+ alcgr $s3,$s3
xgr $s1,%r1
- ogr $s3,%r0
lrvgr $s1,$s1 # flip byte order
lrvgr $s3,$s3
- srlg $s0,$s1,32 # smash the tweak to 4x32-bits
+ srlg $s0,$s1,32 # smash the tweak to 4x32-bits
stg $s1,$tweak+0($sp) # save the tweak
llgfr $s1,$s1
srlg $s2,$s3,32
lghi %r1,0x87
srag %r0,$s3,63 # broadcast upper bit
ngr %r1,%r0 # rem
- srlg %r0,$s1,63 # carry bit from lower half
- sllg $s1,$s1,1
- sllg $s3,$s3,1
+ algr $s1,$s1
+ alcgr $s3,$s3
xgr $s1,%r1
- ogr $s3,%r0
lrvgr $i2,$s1 # flip byte order
lrvgr $i3,$s3
stmg $i2,$i3,$tweak($sp) # save the 1st tweak
lghi %r1,0x87
srag %r0,$s3,63 # broadcast upper bit
ngr %r1,%r0 # rem
- srlg %r0,$s1,63 # carry bit from lower half
- sllg $s1,$s1,1
- sllg $s3,$s3,1
+ algr $s1,$s1
+ alcgr $s3,$s3
xgr $s1,%r1
- ogr $s3,%r0
lrvgr $s1,$s1 # flip byte order
lrvgr $s3,$s3
srlg $s0,$s1,32 # smash the tweak to 4x32-bits
}
$code.=<<___;
.string "AES for s390x, CRYPTOGAMS by <appro\@openssl.org>"
-.comm OPENSSL_s390xcap_P,16,8
___
$code =~ s/\`([^\`]*)\`/eval $1/gem;