#! /usr/bin/env perl
-# Copyright 2015-2016 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 2015-2018 The OpenSSL Project Authors. All Rights Reserved.
#
# Licensed under the OpenSSL license (the "License"). You may not use
# this file except in compliance with the License. You can obtain a copy
! if a+b >= modulus, subtract modulus.
!
! But since comparison implies subtraction, we subtract
- ! modulus and then add it back if subraction borrowed.
+ ! modulus and then add it back if subtraction borrowed.
subcc @acc[0],-1,@acc[0]
subccc @acc[1],-1,@acc[1]
mov $ap,$ap_real
mov $bp,$bp_real
- ld [$bp],@acc[0] ! in2_x
- ld [$bp+4],@acc[1]
- ld [$bp+8],@acc[2]
- ld [$bp+12],@acc[3]
- ld [$bp+16],@acc[4]
- ld [$bp+20],@acc[5]
- ld [$bp+24],@acc[6]
- ld [$bp+28],@acc[7]
- ld [$bp+32],$t0 ! in2_y
- ld [$bp+32+4],$t1
- ld [$bp+32+8],$t2
- ld [$bp+32+12],$t3
- ld [$bp+32+16],$t4
- ld [$bp+32+20],$t5
- ld [$bp+32+24],$t6
- ld [$bp+32+28],$t7
- or @acc[1],@acc[0],@acc[0]
- or @acc[3],@acc[2],@acc[2]
- or @acc[5],@acc[4],@acc[4]
- or @acc[7],@acc[6],@acc[6]
- or @acc[2],@acc[0],@acc[0]
- or @acc[6],@acc[4],@acc[4]
- or @acc[4],@acc[0],@acc[0]
+ ld [$bp+64],$t0 ! in2_z
+ ld [$bp+64+4],$t1
+ ld [$bp+64+8],$t2
+ ld [$bp+64+12],$t3
+ ld [$bp+64+16],$t4
+ ld [$bp+64+20],$t5
+ ld [$bp+64+24],$t6
+ ld [$bp+64+28],$t7
or $t1,$t0,$t0
or $t3,$t2,$t2
or $t5,$t4,$t4
or $t7,$t6,$t6
or $t2,$t0,$t0
or $t6,$t4,$t4
- or $t4,$t0,$t0
- or @acc[0],$t0,$t0 ! !in2infty
+ or $t4,$t0,$t0 ! !in2infty
movrnz $t0,-1,$t0
st $t0,[%fp+STACK_BIAS-12]
- ld [$ap],@acc[0] ! in1_x
- ld [$ap+4],@acc[1]
- ld [$ap+8],@acc[2]
- ld [$ap+12],@acc[3]
- ld [$ap+16],@acc[4]
- ld [$ap+20],@acc[5]
- ld [$ap+24],@acc[6]
- ld [$ap+28],@acc[7]
- ld [$ap+32],$t0 ! in1_y
- ld [$ap+32+4],$t1
- ld [$ap+32+8],$t2
- ld [$ap+32+12],$t3
- ld [$ap+32+16],$t4
- ld [$ap+32+20],$t5
- ld [$ap+32+24],$t6
- ld [$ap+32+28],$t7
- or @acc[1],@acc[0],@acc[0]
- or @acc[3],@acc[2],@acc[2]
- or @acc[5],@acc[4],@acc[4]
- or @acc[7],@acc[6],@acc[6]
- or @acc[2],@acc[0],@acc[0]
- or @acc[6],@acc[4],@acc[4]
- or @acc[4],@acc[0],@acc[0]
+ ld [$ap+64],$t0 ! in1_z
+ ld [$ap+64+4],$t1
+ ld [$ap+64+8],$t2
+ ld [$ap+64+12],$t3
+ ld [$ap+64+16],$t4
+ ld [$ap+64+20],$t5
+ ld [$ap+64+24],$t6
+ ld [$ap+64+28],$t7
or $t1,$t0,$t0
or $t3,$t2,$t2
or $t5,$t4,$t4
or $t7,$t6,$t6
or $t2,$t0,$t0
or $t6,$t4,$t4
- or $t4,$t0,$t0
- or @acc[0],$t0,$t0 ! !in1infty
+ or $t4,$t0,$t0 ! !in1infty
movrnz $t0,-1,$t0
st $t0,[%fp+STACK_BIAS-16]
mov $ap,$ap_real
mov $bp,$bp_real
- ld [$ap],@acc[0] ! in1_x
- ld [$ap+4],@acc[1]
- ld [$ap+8],@acc[2]
- ld [$ap+12],@acc[3]
- ld [$ap+16],@acc[4]
- ld [$ap+20],@acc[5]
- ld [$ap+24],@acc[6]
- ld [$ap+28],@acc[7]
- ld [$ap+32],$t0 ! in1_y
- ld [$ap+32+4],$t1
- ld [$ap+32+8],$t2
- ld [$ap+32+12],$t3
- ld [$ap+32+16],$t4
- ld [$ap+32+20],$t5
- ld [$ap+32+24],$t6
- ld [$ap+32+28],$t7
- or @acc[1],@acc[0],@acc[0]
- or @acc[3],@acc[2],@acc[2]
- or @acc[5],@acc[4],@acc[4]
- or @acc[7],@acc[6],@acc[6]
- or @acc[2],@acc[0],@acc[0]
- or @acc[6],@acc[4],@acc[4]
- or @acc[4],@acc[0],@acc[0]
+ ld [$ap+64],$t0 ! in1_z
+ ld [$ap+64+4],$t1
+ ld [$ap+64+8],$t2
+ ld [$ap+64+12],$t3
+ ld [$ap+64+16],$t4
+ ld [$ap+64+20],$t5
+ ld [$ap+64+24],$t6
+ ld [$ap+64+28],$t7
or $t1,$t0,$t0
or $t3,$t2,$t2
or $t5,$t4,$t4
or $t7,$t6,$t6
or $t2,$t0,$t0
or $t6,$t4,$t4
- or $t4,$t0,$t0
- or @acc[0],$t0,$t0 ! !in1infty
+ or $t4,$t0,$t0 ! !in1infty
movrnz $t0,-1,$t0
st $t0,[%fp+STACK_BIAS-16]
ld [$inp],%l0
add $inp,4,$inp
subcc $index,1,$index
- stb %l0,[$out+64*0-1]
+ stb %l0,[$out+64*0]
srl %l0,8,%l1
- stb %l1,[$out+64*1-1]
+ stb %l1,[$out+64*1]
srl %l0,16,%l2
- stb %l2,[$out+64*2-1]
+ stb %l2,[$out+64*2]
srl %l0,24,%l3
- stb %l3,[$out+64*3-1]
+ stb %l3,[$out+64*3]
bne .Loop_scatter_w7
add $out,64*4,$out
########################################################################
# Following subroutines are VIS3 counterparts of those above that
# implement ones found in ecp_nistz256.c. Key difference is that they
-# use 128-bit muliplication and addition with 64-bit carry, and in order
+# use 128-bit multiplication and addition with 64-bit carry, and in order
# to do that they perform conversion from uin32_t[8] to uint64_t[4] upon
# entry and vice versa on return.
#
ldx [$bp+8*($i+1)],$bi ! bp[$i+1]
___
$code.=<<___;
- addcc $acc1,$t0,$acc1 ! accumulate high parts of multiplication
+ addcc $acc1,$t0,$acc1 ! accumulate high parts of multiplication
sllx $acc0,32,$t0
addxccc $acc2,$t1,$acc2
srlx $acc0,32,$t1
srlx $acc0,32,$t1
addxccc $acc3,$t2,$acc2 ! +=acc[0]*0xFFFFFFFF00000001
sub $acc0,$t0,$t2 ! acc0*0xFFFFFFFF00000001, low part
- addxc %g0,$t3,$acc3 ! cant't overflow
+ addxc %g0,$t3,$acc3 ! can't overflow
___
}
$code.=<<___;
stx $acc2,[%sp+LOCALS64+$in2_y+16]
stx $acc3,[%sp+LOCALS64+$in2_y+24]
- or $a1,$a0,$a0
- or $a3,$a2,$a2
- or $acc1,$acc0,$acc0
- or $acc3,$acc2,$acc2
- or $a2,$a0,$a0
- or $acc2,$acc0,$acc0
- or $acc0,$a0,$a0
- movrnz $a0,-1,$a0 ! !in2infty
- stx $a0,[%fp+STACK_BIAS-8]
-
ld [$bp+64],$acc0 ! in2_z
ld [$bp+64+4],$t0
ld [$bp+64+8],$acc1
stx $acc2,[%sp+LOCALS64+$in2_z+16]
stx $acc3,[%sp+LOCALS64+$in2_z+24]
+ or $acc1,$acc0,$acc0
+ or $acc3,$acc2,$acc2
+ or $acc2,$acc0,$acc0
+ movrnz $acc0,-1,$acc0 ! !in2infty
+ stx $acc0,[%fp+STACK_BIAS-8]
+
or $a0,$t0,$a0
ld [$ap+32],$acc0 ! in1_y
or $a1,$t1,$a1
stx $acc2,[%sp+LOCALS64+$in1_y+16]
stx $acc3,[%sp+LOCALS64+$in1_y+24]
- or $a1,$a0,$a0
- or $a3,$a2,$a2
- or $acc1,$acc0,$acc0
- or $acc3,$acc2,$acc2
- or $a2,$a0,$a0
- or $acc2,$acc0,$acc0
- or $acc0,$a0,$a0
- movrnz $a0,-1,$a0 ! !in1infty
- stx $a0,[%fp+STACK_BIAS-16]
-
ldx [%sp+LOCALS64+$in2_z],$a0 ! forward load
ldx [%sp+LOCALS64+$in2_z+8],$a1
ldx [%sp+LOCALS64+$in2_z+16],$a2
stx $acc2,[%sp+LOCALS64+$in1_z+16]
stx $acc3,[%sp+LOCALS64+$in1_z+24]
+ or $acc1,$acc0,$acc0
+ or $acc3,$acc2,$acc2
+ or $acc2,$acc0,$acc0
+ movrnz $acc0,-1,$acc0 ! !in1infty
+ stx $acc0,[%fp+STACK_BIAS-16]
+
call __ecp_nistz256_sqr_mont_vis3 ! p256_sqr_mont(Z2sqr, in2_z);
add %sp,LOCALS64+$Z2sqr,$rp
stx $acc2,[%sp+LOCALS64+$in1_y+16]
stx $acc3,[%sp+LOCALS64+$in1_y+24]
- or $a1,$a0,$a0
- or $a3,$a2,$a2
- or $acc1,$acc0,$acc0
- or $acc3,$acc2,$acc2
- or $a2,$a0,$a0
- or $acc2,$acc0,$acc0
- or $acc0,$a0,$a0
- movrnz $a0,-1,$a0 ! !in1infty
- stx $a0,[%fp+STACK_BIAS-16]
-
ld [$ap+64],$a0 ! in1_z
ld [$ap+64+4],$t0
ld [$ap+64+8],$a1
stx $a2,[%sp+LOCALS64+$in1_z+16]
stx $a3,[%sp+LOCALS64+$in1_z+24]
+ or $a1,$a0,$t0
+ or $a3,$a2,$t2
+ or $t2,$t0,$t0
+ movrnz $t0,-1,$t0 ! !in1infty
+ stx $t0,[%fp+STACK_BIAS-16]
+
call __ecp_nistz256_sqr_mont_vis3 ! p256_sqr_mont(Z1sqr, in1_z);
add %sp,LOCALS64+$Z1sqr,$rp