X-Git-Url: https://git.openssl.org/?a=blobdiff_plain;f=crypto%2Fsha%2Fasm%2Fsha512-s390x.pl;h=92d7a7725a67313480078ccb39b066ff4a4b0068;hb=413b6a82594ab45192dda233a77efe5637d656d6;hp=e4f1812c68a269a4733069964ecc74ea722af2e1;hpb=8626230a0227b15c0e0542f5a65f802ee32772b6;p=openssl.git diff --git a/crypto/sha/asm/sha512-s390x.pl b/crypto/sha/asm/sha512-s390x.pl index e4f1812c68..92d7a7725a 100644 --- a/crypto/sha/asm/sha512-s390x.pl +++ b/crypto/sha/asm/sha512-s390x.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # ==================================================================== # Written by Andy Polyakov for the OpenSSL @@ -26,6 +33,26 @@ # favour dual-issue z10 pipeline. Hardware SHA256/512 is ~4.7x faster # than software. +# November 2010. +# +# Adapt for -m31 build. If kernel supports what's called "highgprs" +# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit +# instructions and achieve "64-bit" performance even in 31-bit legacy +# application context. The feature is not specific to any particular +# processor, as long as it's "z-CPU". Latter implies that the code +# remains z/Architecture specific. On z990 SHA256 was measured to +# perform 2.4x and SHA512 - 13x better than code generated by gcc 4.3. + +$flavour = shift; + +if ($flavour =~ /3[12]/) { + $SIZE_T=4; + $g=""; +} else { + $SIZE_T=8; + $g="g"; +} + $t0="%r0"; $t1="%r1"; $ctx="%r2"; $t2="%r2"; @@ -44,7 +71,7 @@ $tbl="%r13"; $T1="%r14"; $sp="%r15"; -$output=shift; +while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} open STDOUT,">$output"; if ($output =~ /512/) { @@ -78,7 +105,8 @@ if ($output =~ /512/) { } $Func="sha${label}_block_data_order"; $Table="K${label}"; -$frame=160+16*$SZ; +$stdframe=16*$SIZE_T+4*8; +$frame=$stdframe+16*$SZ; sub BODY_00_15 { my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; @@ -93,9 +121,9 @@ $code.=<<___; xgr $t0,$t1 $ROT $t1,$t1,`$Sigma1[2]-$Sigma1[1]` xgr $t2,$g - $ST $T1,`160+$SZ*($i%16)`($sp) + $ST $T1,`$stdframe+$SZ*($i%16)`($sp) xgr $t0,$t1 # Sigma1(e) - la $T1,0($T1,$h) # T1+=h + algr $T1,$h # T1+=h ngr $t2,$e lgr $t1,$a algr $T1,$t0 # T1+=Sigma1(e) @@ -113,7 +141,7 @@ $code.=<<___; ngr $t2,$b algr $h,$T1 # h+=T1 ogr $t2,$t1 # Maj(a,b,c) - la $d,0($d,$T1) # d+=T1 + algr $d,$T1 # d+=T1 algr $h,$t2 # h+=Maj(a,b,c) ___ } @@ -122,19 +150,19 @@ sub BODY_16_XX { my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; $code.=<<___; - $LD $T1,`160+$SZ*(($i+1)%16)`($sp) ### $i - $LD $t1,`160+$SZ*(($i+14)%16)`($sp) + $LD $T1,`$stdframe+$SZ*(($i+1)%16)`($sp) ### $i + $LD $t1,`$stdframe+$SZ*(($i+14)%16)`($sp) $ROT $t0,$T1,$sigma0[0] $SHR $T1,$sigma0[2] $ROT $t2,$t0,`$sigma0[1]-$sigma0[0]` xgr $T1,$t0 $ROT $t0,$t1,$sigma1[0] - xgr $T1,$t2 # sigma0(X[i+1]) + xgr $T1,$t2 # sigma0(X[i+1]) $SHR $t1,$sigma1[2] - $ADD $T1,`160+$SZ*($i%16)`($sp) # +=X[i] + $ADD $T1,`$stdframe+$SZ*($i%16)`($sp) # +=X[i] xgr $t1,$t0 $ROT $t0,$t0,`$sigma1[1]-$sigma1[0]` - $ADD $T1,`160+$SZ*(($i+9)%16)`($sp) # +=X[i+9] + $ADD $T1,`$stdframe+$SZ*(($i+9)%16)`($sp) # +=X[i+9] xgr $t1,$t0 # sigma1(X[i+14]) algr $T1,$t1 # +=sigma1(X[i+14]) ___ @@ -212,31 +240,33 @@ $code.=<<___; .globl $Func .type $Func,\@function $Func: + sllg $len,$len,`log(16*$SZ)/log(2)` ___ $code.=<<___ if ($kimdfunc); - lghi %r0,0 - la %r1,16($sp) - .long 0xb93e0002 # kimd %r0,%r2 - lg %r0,16($sp) + larl %r1,OPENSSL_s390xcap_P + lg %r0,0(%r1) + tmhl %r0,0x4000 # check for message-security assist + jz .Lsoftware + lg %r0,16(%r1) # check kimd capabilities tmhh %r0,`0x8000>>$kimdfunc` jz .Lsoftware lghi %r0,$kimdfunc lgr %r1,$ctx lgr %r2,$inp - sllg %r3,$len,`log(16*$SZ)/log(2)` + lgr %r3,$len .long 0xb93e0002 # kimd %r0,%r2 brc 1,.-4 # pay attention to "partial completion" br %r14 +.align 16 .Lsoftware: ___ $code.=<<___; - sllg $len,$len,`log(16*$SZ)/log(2)` lghi %r1,-$frame - agr $len,$inp - stmg $ctx,%r15,16($sp) + la $len,0($len,$inp) + stm${g} $ctx,%r15,`2*$SIZE_T`($sp) lgr %r0,$sp la $sp,0(%r1,$sp) - stg %r0,0($sp) + st${g} %r0,0($sp) larl $tbl,$Table $LD $A,`0*$SZ`($ctx) @@ -260,7 +290,7 @@ $code.=<<___; clgr $len,$t0 jne .Lrounds_16_xx - lg $ctx,`$frame+16`($sp) + l${g} $ctx,`$frame+2*$SIZE_T`($sp) la $inp,`16*$SZ`($inp) $ADD $A,`0*$SZ`($ctx) $ADD $B,`1*$SZ`($ctx) @@ -278,13 +308,14 @@ $code.=<<___; $ST $F,`5*$SZ`($ctx) $ST $G,`6*$SZ`($ctx) $ST $H,`7*$SZ`($ctx) - clg $inp,`$frame+32`($sp) + cl${g} $inp,`$frame+4*$SIZE_T`($sp) jne .Lloop - lmg %r6,%r15,`$frame+48`($sp) + lm${g} %r6,%r15,`$frame+6*$SIZE_T`($sp) br %r14 .size $Func,.-$Func .string "SHA${label} block transform for s390x, CRYPTOGAMS by " +.comm OPENSSL_s390xcap_P,80,8 ___ $code =~ s/\`([^\`]*)\`/eval $1/gem;