# and the result should be close to 12. In the lack of instruction-
# level profiling data it's impossible to tell why...
+# November 2010.
+#
+# Adapt for -m31 build. If kernel supports what's called "highgprs"
+# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit
+# instructions and achieve "64-bit" performance even in 31-bit legacy
+# application context. The feature is not specific to any particular
+# processor, as long as it's "z-CPU". Latter implies that the code
+# remains z/Architecture specific. On z990 it was measured to perform
+# 2.8x better than 32-bit code generated by gcc 4.3.
+
+$flavour = shift;
+
+if ($flavour =~ /3[12]/) {
+ $SIZE_T=4;
+ $g="";
+} else {
+ $SIZE_T=8;
+ $g="g";
+}
+
while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
open STDOUT,">$output";
.Lsoft_gmult:
___
$code.=<<___;
- stmg %r6,%r14,48($sp)
+ stm${g} %r6,%r14,6*$SIZE_T($sp)
aghi $Xi,-1
lghi $len,1
.align 32
.Lsoft_ghash:
___
+$cdoe.=<<___ if ($flavour =~ /3[12]/);
+ llgfr $len,$len
+___
$code.=<<___;
- stmg %r6,%r14,48($sp)
+ stm${g} %r6,%r14,6*$SIZE_T($sp)
aghi $Xi,-1
srlg $len,$len,4
xgr $Zhi,$tmp
stg $Zlo,8+1($Xi)
stg $Zhi,0+1($Xi)
- lmg %r6,%r14,48($sp)
+ lm${g} %r6,%r14,6*$SIZE_T($sp)
br %r14
.type gcm_ghash_4bit,\@function
.size gcm_ghash_4bit,(.-gcm_ghash_4bit)