Following the license change, modify the boilerplates in crypto/bn/
[openssl.git] / crypto / bn / asm / via-mont.pl
index ce3cd61eb39c1f80e6372c4ae49c2ce21c7418a9..1b5cdd8f3a29872376b55d293908860762e607be 100644 (file)
@@ -1,7 +1,14 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2006-2018 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the Apache License 2.0 (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
 #
 # ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 # project. The module is, however, dual licensed under OpenSSL and
 # CRYPTOGAMS licenses depending on where you obtain it. For further
 # details see http://www.openssl.org/~appro/cryptogams/.
@@ -69,7 +76,7 @@
 # dsa 1024 bits 0.001346s 0.001595s    742.7    627.0
 # dsa 2048 bits 0.004745s 0.005582s    210.7    179.1
 #
-# Conclusions: 
+# Conclusions:
 # - VIA SDK leaves a *lot* of room for improvement (which this
 #   implementation successfully fills:-);
 # - 'rep montmul' gives up to >3x performance improvement depending on
@@ -81,7 +88,10 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
 push(@INC,"${dir}","${dir}../../perlasm");
 require "x86asm.pl";
 
-&asm_init($ARGV[0],"via-mont.pl");
+$output = pop;
+open STDOUT,">$output";
+
+&asm_init($ARGV[0]);
 
 # int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num);
 $func="bn_mul_mont_padlock";
@@ -187,17 +197,12 @@ $sp=&DWP(28,"esp");
        &data_byte(0xf3,0x0f,0xa6,0xc0);# rep montmul
 
        &mov    ("ecx","ebp");
-       &xor    ("edx","edx");                  # i=0
        &lea    ("esi",&DWP(64,"esp"));         # tp
        # edi still points at the end of padded np copy...
-       &mov    ("eax",&DWP(-4-$pad,"edi"));    # np[num-1]
        &neg    ("ebp");
        &lea    ("ebp",&DWP(-$pad,"edi","ebp",4));      # so just "rewind"
        &mov    ("edi",$rp);                    # restore rp
-
-       &shr    ("eax",30);                     # boundary condition...
-       &jz     (&label("copy"));               # ... is met
-       &xor    ("edx","edx");                  # clear CF
+       &xor    ("edx","edx");                  # i=0 and clear CF
 
 &set_label("sub",8);
        &mov    ("eax",&DWP(0,"esi","edx",4));
@@ -208,18 +213,15 @@ $sp=&DWP(28,"esp");
 
        &mov    ("eax",&DWP(0,"esi","edx",4));  # upmost overflow bit
        &sbb    ("eax",0);
-       &and    ("esi","eax");
-       &not    ("eax");
-       &mov    ("ebp","edi");
-       &and    ("ebp","eax");
-       &or     ("esi","ebp");                  # tp=carry?tp:rp
 
        &mov    ("ecx","edx");                  # num
-       &xor    ("edx","edx");                  # i=0
+       &mov    ("edx",0);                      # i=0
 
 &set_label("copy",8);
-       &mov    ("eax",&DWP(0,"esi","edx",4));
-       &mov    (&DWP(64,"esp","edx",4),"ecx"); # zap tp
+       &mov    ("ebx",&DWP(0,"esi","edx",4));
+       &mov    ("eax",&DWP(0,"edi","edx",4));
+       &mov    (&DWP(0,"esi","edx",4),"ecx");  # zap tp
+       &cmovc  ("eax","ebx");
        &mov    (&DWP(0,"edi","edx",4),"eax");
        &lea    ("edx",&DWP(1,"edx"));          # i++
        &loop   (&label("copy"));
@@ -245,3 +247,5 @@ $sp=&DWP(28,"esp");
 &asciz("Padlock Montgomery Multiplication, CRYPTOGAMS by <appro\@openssl.org>");
 
 &asm_finish();
+
+close STDOUT;