-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2011-2020 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the Apache License 2.0 (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# The module implements bn_GF2m_mul_2x2 polynomial multiplication used
# in bn_gf2m.c. It's kind of low-hanging mechanical port from C for
# the time being... gcc 4.3 appeared to generate poor code, therefore
-# the effort. The module delivers 55%-90% improvement on haviest ECDSA
-# verify and ECDH benchmarks for 163- and 571-bit keys on z990, and
-# 25%-30% - on z196(*). This is for 64-bit build. In 32-bit "highgprs"
-# case improvement is even higher, for example on z990 it was measured
-# 80%-150%. ECDSA sign is modest 9%-12% faster. Keep in mind that
-# these coefficients are not ones for bn_GF2m_mul_2x2 itself, as not
-# all CPU time is burnt in it...
+# the effort. And indeed, the module delivers 55%-90%(*) improvement
+# on heaviest ECDSA verify and ECDH benchmarks for 163- and 571-bit
+# key lengths on z990, 30%-55%(*) - on z10, and 70%-110%(*) - on z196.
+# This is for 64-bit build. In 32-bit "highgprs" case improvement is
+# even higher, for example on z990 it was measured 80%-150%. ECDSA
+# sign is modest 9%-12% faster. Keep in mind that these coefficients
+# are not ones for bn_GF2m_mul_2x2 itself, as not all CPU time is
+# burnt in it...
#
-# (*) Though no improvement could be measured if compared to code
-# generated by gcc 4.1. Keep in mind that z196 is out-of-order
-# execution core and is better at executing poor code.
+# (*) gcc 4.1 was observed to deliver better results than gcc 4.3,
+# so that improvement coefficients can vary from one specific
+# setup to another.
-$flavour = shift;
+# $output is the last argument if it looks like a file (it has an extension)
+# $flavour is the first argument if it doesn't look like a file
+$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
+$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
if ($flavour =~ /3[12]/) {
$SIZE_T=4;
$g="g";
}
-while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
-open STDOUT,">$output";
+$output and open STDOUT,">$output";
$stdframe=16*$SIZE_T+4*8;
if ($SIZE_T==8) {
my @r=map("%r$_",(6..9));
$code.=<<___;
- bras $ra,_mul_1x1 # a1·b1
+ bras $ra,_mul_1x1 # a1·b1
stmg $lo,$hi,16($rp)
lg $a,`$stdframe+128+4*$SIZE_T`($sp)
lg $b,`$stdframe+128+6*$SIZE_T`($sp)
- bras $ra,_mul_1x1 # a0·b0
+ bras $ra,_mul_1x1 # a0·b0
stmg $lo,$hi,0($rp)
lg $a,`$stdframe+128+3*$SIZE_T`($sp)
lg $b,`$stdframe+128+5*$SIZE_T`($sp)
xg $a,`$stdframe+128+4*$SIZE_T`($sp)
xg $b,`$stdframe+128+6*$SIZE_T`($sp)
- bras $ra,_mul_1x1 # (a0+a1)·(b0+b1)
+ bras $ra,_mul_1x1 # (a0+a1)·(b0+b1)
lmg @r[0],@r[3],0($rp)
xgr $lo,$hi
xgr $hi,@r[1]
xgr $lo,@r[0]
xgr $hi,@r[2]
- xgr $lo,@r[3]
+ xgr $lo,@r[3]
xgr $hi,@r[3]
xgr $lo,$hi
stg $hi,16($rp)
$code =~ s/\`([^\`]*)\`/eval($1)/gem;
print $code;
-close STDOUT;
+close STDOUT or die "error closing STDOUT: $!";