.type AES_set_encrypt_key,\@function
.align 16
AES_set_encrypt_key:
+_s390x_AES_set_encrypt_key:
lghi $t0,0
cl${g}r $inp,$t0
je .Lminus1
je 1f
lg %r1,24($inp)
stg %r1,24($key)
-1: st $bits,236($key) # save bits
+1: st $bits,236($key) # save bits [for debugging purposes]
+ lgr $t0,%r5
st %r5,240($key) # save km code
lghi %r2,0
br %r14
$code.=<<___;
.align 16
.Lekey_internal:
- stm${g} %r6,%r13,6*$SIZE_T($sp) # all non-volatile regs
+ stm${g} %r4,%r13,4*$SIZE_T($sp) # all non-volatile regs and $key
larl $tbl,AES_Te+2048
la $key,16($key) # key+=4
la $t3,4($t3) # i++
brct $rounds,.L128_loop
+ lghi $t0,10
lghi %r2,0
- lm${g} %r6,%r13,6*$SIZE_T($sp)
+ lm${g} %r4,%r13,4*$SIZE_T($sp)
br $ra
.align 16
st $s2,32($key)
st $s3,36($key)
brct $rounds,.L192_continue
+ lghi $t0,12
lghi %r2,0
- lm${g} %r6,%r13,6*$SIZE_T($sp)
+ lm${g} %r4,%r13,4*$SIZE_T($sp)
br $ra
.align 16
st $s2,40($key)
st $s3,44($key)
brct $rounds,.L256_continue
+ lghi $t0,14
lghi %r2,0
- lm${g} %r6,%r13,6*$SIZE_T($sp)
+ lm${g} %r4,%r13,4*$SIZE_T($sp)
br $ra
.align 16
.type AES_set_decrypt_key,\@function
.align 16
AES_set_decrypt_key:
- st${g} $key,4*$SIZE_T($sp) # I rely on AES_set_encrypt_key to
- st${g} $ra,14*$SIZE_T($sp) # save non-volatile registers!
- bras $ra,AES_set_encrypt_key
- l${g} $key,4*$SIZE_T($sp)
+ #st${g} $key,4*$SIZE_T($sp) # I rely on AES_set_encrypt_key to
+ st${g} $ra,14*$SIZE_T($sp) # save non-volatile registers and $key!
+ bras $ra,_s390x_AES_set_encrypt_key
+ #l${g} $key,4*$SIZE_T($sp)
l${g} $ra,14*$SIZE_T($sp)
ltgr %r2,%r2
bnzr $ra
___
$code.=<<___ if (!$softonly);
- l $t0,240($key)
+ #l $t0,240($key)
lhi $t1,16
cr $t0,$t1
jl .Lgo
oill $t0,0x80 # set "decrypt" bit
st $t0,240($key)
br $ra
-
-.align 16
-.Ldkey_internal:
- st${g} $key,4*$SIZE_T($sp)
- st${g} $ra,14*$SIZE_T($sp)
- bras $ra,.Lekey_internal
- l${g} $key,4*$SIZE_T($sp)
- l${g} $ra,14*$SIZE_T($sp)
___
$code.=<<___;
-
-.Lgo: llgf $rounds,240($key)
+.align 16
+.Lgo: lgr $rounds,$t0 #llgf $rounds,240($key)
la $i1,0($key)
sllg $i2,$rounds,4
la $i2,0($i2,$key)
lghi $s1,0x7f
nr $s1,%r0
lghi %r0,0 # query capability vector
- la %r1,2*$SIZE_T($sp)
+ la %r1,$tweak-16($sp)
.long 0xb92e0042 # km %r4,%r2
llihh %r1,0x8000
srlg %r1,%r1,32($s1) # check for 32+function code
- ng %r1,2*$SIZE_T($sp)
+ ng %r1,$tweak-16($sp)
lgr %r0,$s0 # restore the function code
la %r1,0($key1) # restore $key1
jz .Lxts_km_vanilla
lrvg $s0,$tweak+0($sp) # load the last tweak
lrvg $s1,$tweak+8($sp)
- stmg %r0,%r3,$tweak-32(%r1) # wipe copy of the key
+ stmg %r0,%r3,$tweak-32($sp) # wipe copy of the key
nill %r0,0xffdf # switch back to original function code
la %r1,0($key1) # restore pointer to $key1
lghi $i1,0x87
srag $i2,$s1,63 # broadcast upper bit
ngr $i1,$i2 # rem
- srlg $i2,$s0,63 # carry bit from lower half
- sllg $s0,$s0,1
- sllg $s1,$s1,1
+ algr $s0,$s0
+ alcgr $s1,$s1
xgr $s0,$i1
- ogr $s1,$i2
.Lxts_km_start:
lrvgr $i1,$s0 # flip byte order
lrvgr $i2,$s1
lghi $i1,0x87
srag $i2,$s1,63 # broadcast upper bit
ngr $i1,$i2 # rem
- srlg $i2,$s0,63 # carry bit from lower half
- sllg $s0,$s0,1
- sllg $s1,$s1,1
+ algr $s0,$s0
+ alcgr $s1,$s1
xgr $s0,$i1
- ogr $s1,$i2
ltr $len,$len # clear zero flag
br $ra
clr %r0,%r1
jl .Lxts_enc_software
+ st${g} $ra,5*$SIZE_T($sp)
stm${g} %r6,$s3,6*$SIZE_T($sp)
- st${g} $ra,14*$SIZE_T($sp)
sllg $len,$len,4 # $len&=~15
slgr $out,$inp
stg $i2,8($i3)
.Lxts_enc_km_done:
- l${g} $ra,14*$SIZE_T($sp)
- st${g} $sp,$tweak($sp) # wipe tweak
- st${g} $sp,$tweak($sp)
+ stg $sp,$tweak+0($sp) # wipe tweak
+ stg $sp,$tweak+8($sp)
+ l${g} $ra,5*$SIZE_T($sp)
lm${g} %r6,$s3,6*$SIZE_T($sp)
br $ra
.align 16
slgr $out,$inp
- xgr $s0,$s0 # clear upper half
- xgr $s1,$s1
- lrv $s0,$stdframe+4($sp) # load secno
- lrv $s1,$stdframe+0($sp)
- xgr $s2,$s2
- xgr $s3,$s3
+ l${g} $s3,$stdframe($sp) # ivp
+ llgf $s0,0($s3) # load iv
+ llgf $s1,4($s3)
+ llgf $s2,8($s3)
+ llgf $s3,12($s3)
stm${g} %r2,%r5,2*$SIZE_T($sp)
la $key,0($key2)
larl $tbl,AES_Te
lghi %r1,0x87
srag %r0,$s3,63 # broadcast upper bit
ngr %r1,%r0 # rem
- srlg %r0,$s1,63 # carry bit from lower half
- sllg $s1,$s1,1
- sllg $s3,$s3,1
+ algr $s1,$s1
+ alcgr $s3,$s3
xgr $s1,%r1
- ogr $s3,%r0
lrvgr $s1,$s1 # flip byte order
lrvgr $s3,$s3
srlg $s0,$s1,32 # smash the tweak to 4x32-bits
lghi %r1,0x87
srag %r0,$s3,63 # broadcast upper bit
ngr %r1,%r0 # rem
- srlg %r0,$s1,63 # carry bit from lower half
- sllg $s1,$s1,1
- sllg $s3,$s3,1
+ algr $s1,$s1
+ alcgr $s3,$s3
xgr $s1,%r1
- ogr $s3,%r0
lrvgr $s1,$s1 # flip byte order
lrvgr $s3,$s3
srlg $s0,$s1,32 # smash the tweak to 4x32-bits
.size AES_xts_encrypt,.-AES_xts_encrypt
___
# void AES_xts_decrypt(const char *inp,char *out,size_t len,
-# const AES_KEY *key1, const AES_KEY *key2,u64 secno);
+# const AES_KEY *key1, const AES_KEY *key2,
+# const unsigned char iv[16]);
#
$code.=<<___;
.globl AES_xts_decrypt
clr %r0,%r1
jl .Lxts_dec_software
+ st${g} $ra,5*$SIZE_T($sp)
stm${g} %r6,$s3,6*$SIZE_T($sp)
- st${g} $ra,14*$SIZE_T($sp)
nill $len,0xfff0 # $len&=~15
slgr $out,$inp
lghi $i1,0x87
srag $i2,$s1,63 # broadcast upper bit
ngr $i1,$i2 # rem
- srlg $i2,$s0,63 # carry bit from lower half
- sllg $s0,$s0,1
- sllg $s1,$s1,1
+ algr $s0,$s0
+ alcgr $s1,$s1
xgr $s0,$i1
- ogr $s1,$i2
lrvgr $i1,$s0 # flip byte order
lrvgr $i2,$s1
stg $s2,0($i3)
stg $s3,8($i3)
.Lxts_dec_km_done:
- l${g} $ra,14*$SIZE_T($sp)
- st${g} $sp,$tweak($sp) # wipe tweak
- st${g} $sp,$tweak($sp)
+ stg $sp,$tweak+0($sp) # wipe tweak
+ stg $sp,$tweak+8($sp)
+ l${g} $ra,5*$SIZE_T($sp)
lm${g} %r6,$s3,6*$SIZE_T($sp)
br $ra
.align 16
srlg $len,$len,4
slgr $out,$inp
- xgr $s0,$s0 # clear upper half
- xgr $s1,$s1
- lrv $s0,$stdframe+4($sp) # load secno
- lrv $s1,$stdframe+0($sp)
- xgr $s2,$s2
- xgr $s3,$s3
+ l${g} $s3,$stdframe($sp) # ivp
+ llgf $s0,0($s3) # load iv
+ llgf $s1,4($s3)
+ llgf $s2,8($s3)
+ llgf $s3,12($s3)
stm${g} %r2,%r5,2*$SIZE_T($sp)
la $key,0($key2)
larl $tbl,AES_Te
lghi %r1,0x87
srag %r0,$s3,63 # broadcast upper bit
ngr %r1,%r0 # rem
- srlg %r0,$s1,63 # carry bit from lower half
- sllg $s1,$s1,1
- sllg $s3,$s3,1
+ algr $s1,$s1
+ alcgr $s3,$s3
xgr $s1,%r1
- ogr $s3,%r0
lrvgr $s1,$s1 # flip byte order
lrvgr $s3,$s3
srlg $s0,$s1,32 # smash the tweak to 4x32-bits
lghi %r1,0x87
srag %r0,$s3,63 # broadcast upper bit
ngr %r1,%r0 # rem
- srlg %r0,$s1,63 # carry bit from lower half
- sllg $s1,$s1,1
- sllg $s3,$s3,1
+ algr $s1,$s1
+ alcgr $s3,$s3
xgr $s1,%r1
- ogr $s3,%r0
lrvgr $i2,$s1 # flip byte order
lrvgr $i3,$s3
stmg $i2,$i3,$tweak($sp) # save the 1st tweak
lghi %r1,0x87
srag %r0,$s3,63 # broadcast upper bit
ngr %r1,%r0 # rem
- srlg %r0,$s1,63 # carry bit from lower half
- sllg $s1,$s1,1
- sllg $s3,$s3,1
+ algr $s1,$s1
+ alcgr $s3,$s3
xgr $s1,%r1
- ogr $s3,%r0
lrvgr $s1,$s1 # flip byte order
lrvgr $s3,$s3
srlg $s0,$s1,32 # smash the tweak to 4x32-bits