-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2016-2018 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the Apache License 2.0 (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# Westmere 4.58/+100% 1.43
# Sandy Bridge 3.90/+100% 1.36
# Haswell 3.88/+70% 1.18 0.72
+# Skylake 3.10/+60% 1.14 0.62
# Silvermont 11.0/+40% 4.80
+# Goldmont 4.10/+200% 2.10
# VIA Nano 6.71/+90% 2.47
# Sledgehammer 3.51/+180% 4.27
# Bulldozer 4.53/+140% 1.31
push(@INC,"${dir}","${dir}../../perlasm");
require "x86asm.pl";
-$output=pop;
-open STDOUT,">$output";
+$output=pop and open STDOUT,">$output";
-&asm_init($ARGV[0],"poly1305-x86.pl",$ARGV[$#ARGV] eq "386");
+&asm_init($ARGV[0],$ARGV[$#ARGV] eq "386");
$sse2=$avx=0;
for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
$avx = ($1>=2.09) + ($1>=2.10);
}
- if (!$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9]\.[0-9]+)/) {
+ if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|based on LLVM) ([3-9]\.[0-9]+)/) {
$avx = ($2>=3.0) + ($2>3.0);
}
}
sub lazy_reduction {
my $extra = shift;
-my $paddx = defined($extra) ? paddq : paddd;
################################################################
# lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
# and P. Schwabe
+ #
+ # [(*) see discussion in poly1305-armv4 module]
&movdqa ($T0,$D3);
&pand ($D3,$MASK);
# on Atom
&psllq ($T0,2);
&paddq ($T1,$D2); # h1 -> h2
- &$paddx ($T0,$D0); # h4 -> h0
+ &paddq ($T0,$D0); # h4 -> h0 (*)
&pand ($D1,$MASK);
&movdqa ($D2,$T1);
&psrlq ($T1,26);
&movdqa ($T0,$T1); # -> base 2^26 ...
&pand ($T1,$MASK);
- &paddd ($D0,$T1); # ... and accumuate
+ &paddd ($D0,$T1); # ... and accumulate
&movdqa ($T1,$T0);
&psrlq ($T0,26);