-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2015-2018 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
adcs $a6,$a6,$a6
mov $ff,#0
adcs $a7,$a7,$a7
-#ifdef __thumb2__
- it cs
-#endif
- movcs $ff,#-1 @ $ff = carry ? -1 : 0
+ adc $ff,$ff,#0
b .Lreduce_by_sub
.size __ecp_nistz256_mul_by_2,.-__ecp_nistz256_mul_by_2
adcs $a6,$a6,$t2
mov $ff,#0
adcs $a7,$a7,$t3
-#ifdef __thumb2__
- it cs
-#endif
- movcs $ff,#-1 @ $ff = carry ? -1 : 0, "broadcast" carry
+ adc $ff,$ff,#0
ldr lr,[sp],#4 @ pop lr
.Lreduce_by_sub:
- @ if a+b carries, subtract modulus.
+ @ if a+b >= modulus, subtract modulus.
@
+ @ But since comparison implies subtraction, we subtract
+ @ modulus and then add it back if subtraction borrowed.
+
+ subs $a0,$a0,#-1
+ sbcs $a1,$a1,#-1
+ sbcs $a2,$a2,#-1
+ sbcs $a3,$a3,#0
+ sbcs $a4,$a4,#0
+ sbcs $a5,$a5,#0
+ sbcs $a6,$a6,#1
+ sbcs $a7,$a7,#-1
+ sbc $ff,$ff,#0
+
@ Note that because mod has special form, i.e. consists of
@ 0xffffffff, 1 and 0s, we can conditionally synthesize it by
- @ using value of broadcasted carry as a whole or extracting
- @ single bit. Follow $ff register...
+ @ using value of borrow as a whole or extracting single bit.
+ @ Follow $ff register...
- subs $a0,$a0,$ff @ subtract synthesized modulus
- sbcs $a1,$a1,$ff
+ adds $a0,$a0,$ff @ add synthesized modulus
+ adcs $a1,$a1,$ff
str $a0,[$r_ptr,#0]
- sbcs $a2,$a2,$ff
+ adcs $a2,$a2,$ff
str $a1,[$r_ptr,#4]
- sbcs $a3,$a3,#0
+ adcs $a3,$a3,#0
str $a2,[$r_ptr,#8]
- sbcs $a4,$a4,#0
+ adcs $a4,$a4,#0
str $a3,[$r_ptr,#12]
- sbcs $a5,$a5,#0
+ adcs $a5,$a5,#0
str $a4,[$r_ptr,#16]
- sbcs $a6,$a6,$ff,lsr#31
+ adcs $a6,$a6,$ff,lsr#31
str $a5,[$r_ptr,#20]
- sbcs $a7,$a7,$ff
+ adcs $a7,$a7,$ff
str $a6,[$r_ptr,#24]
str $a7,[$r_ptr,#28]
adcs $a6,$a6,$a6
mov $ff,#0
adcs $a7,$a7,$a7
-#ifdef __thumb2__
- it cs
-#endif
- movcs $ff,#-1 @ $ff = carry ? -1 : 0, "broadcast" carry
-
- subs $a0,$a0,$ff @ subtract synthesized modulus, see
- @ .Lreduce_by_sub for details, except
- @ that we don't write anything to
- @ memory, but keep intermediate
- @ results in registers...
- sbcs $a1,$a1,$ff
- sbcs $a2,$a2,$ff
+ adc $ff,$ff,#0
+
+ subs $a0,$a0,#-1 @ .Lreduce_by_sub but without stores
+ sbcs $a1,$a1,#-1
+ sbcs $a2,$a2,#-1
sbcs $a3,$a3,#0
sbcs $a4,$a4,#0
- ldr $b_ptr,[$a_ptr,#0]
sbcs $a5,$a5,#0
+ sbcs $a6,$a6,#1
+ sbcs $a7,$a7,#-1
+ sbc $ff,$ff,#0
+
+ adds $a0,$a0,$ff @ add synthesized modulus
+ adcs $a1,$a1,$ff
+ adcs $a2,$a2,$ff
+ adcs $a3,$a3,#0
+ adcs $a4,$a4,#0
+ ldr $b_ptr,[$a_ptr,#0]
+ adcs $a5,$a5,#0
ldr $t1,[$a_ptr,#4]
- sbcs $a6,$a6,$ff,lsr#31
+ adcs $a6,$a6,$ff,lsr#31
ldr $t2,[$a_ptr,#8]
- sbcs $a7,$a7,$ff
+ adc $a7,$a7,$ff
ldr $t0,[$a_ptr,#12]
adds $a0,$a0,$b_ptr @ 2*a[0:7]+=a[0:7]
adcs $a6,$a6,$t2
mov $ff,#0
adcs $a7,$a7,$t3
-#ifdef __thumb2__
- it cs
-#endif
- movcs $ff,#-1 @ $ff = carry ? -1 : 0, "broadcast" carry
+ adc $ff,$ff,#0
ldr lr,[sp],#4 @ pop lr
b .Lreduce_by_sub
mov $ff,$a0,lsl#31 @ place least significant bit to most
@ significant position, now arithmetic
@ right shift by 31 will produce -1 or
- @ 0, while logical rigth shift 1 or 0,
+ @ 0, while logical right shift 1 or 0,
@ this is how modulus is conditionally
@ synthesized in this case...
ldr $a3,[$a_ptr,#12]
.Loop_scatter_w7:
ldr $mask,[$inp],#4
subs $index,$index,#1
- strb $mask,[$out,#64*0-1]
+ strb $mask,[$out,#64*0]
mov $mask,$mask,lsr#8
- strb $mask,[$out,#64*1-1]
+ strb $mask,[$out,#64*1]
mov $mask,$mask,lsr#8
- strb $mask,[$out,#64*2-1]
+ strb $mask,[$out,#64*2]
mov $mask,$mask,lsr#8
- strb $mask,[$out,#64*3-1]
+ strb $mask,[$out,#64*3]
add $out,$out,#64*4
bne .Loop_scatter_w7
adcs $a6,$a6,$a6
mov $ff,#0
adcs $a7,$a7,$a7
-#ifdef __thumb2__
- it cs
-#endif
- movcs $ff,#-1 @ $ff = carry ? -1 : 0
+ adc $ff,$ff,#0
+
+ @ if a+b >= modulus, subtract modulus.
+ @
+ @ But since comparison implies subtraction, we subtract
+ @ modulus and then add it back if subtraction borrowed.
+
+ subs $a0,$a0,#-1
+ sbcs $a1,$a1,#-1
+ sbcs $a2,$a2,#-1
+ sbcs $a3,$a3,#0
+ sbcs $a4,$a4,#0
+ sbcs $a5,$a5,#0
+ sbcs $a6,$a6,#1
+ sbcs $a7,$a7,#-1
+ sbc $ff,$ff,#0
+
+ @ Note that because mod has special form, i.e. consists of
+ @ 0xffffffff, 1 and 0s, we can conditionally synthesize it by
+ @ using value of borrow as a whole or extracting single bit.
+ @ Follow $ff register...
- subs $a0,$a0,$ff @ subtract synthesized modulus
- sbcs $a1,$a1,$ff
+ adds $a0,$a0,$ff @ add synthesized modulus
+ adcs $a1,$a1,$ff
str $a0,[$r_ptr,#0]
- sbcs $a2,$a2,$ff
+ adcs $a2,$a2,$ff
str $a1,[$r_ptr,#4]
- sbcs $a3,$a3,#0
+ adcs $a3,$a3,#0
str $a2,[$r_ptr,#8]
- sbcs $a4,$a4,#0
+ adcs $a4,$a4,#0
str $a3,[$r_ptr,#12]
- sbcs $a5,$a5,#0
+ adcs $a5,$a5,#0
str $a4,[$r_ptr,#16]
- sbcs $a6,$a6,$ff,lsr#31
+ adcs $a6,$a6,$ff,lsr#31
str $a5,[$r_ptr,#20]
- sbcs $a7,$a7,$ff
+ adcs $a7,$a7,$ff
str $a6,[$r_ptr,#24]
str $a7,[$r_ptr,#28]
___
########################################################################
-# following subroutines are "literal" implemetation of those found in
+# following subroutines are "literal" implementation of those found in
# ecp_nistz256.c
#
########################################################################
stmdb sp!,{r0-r12,lr} @ push from r0, unusual, but intentional
sub sp,sp,#32*18+16
- ldmia $b_ptr!,{r4-r11} @ copy in2
+ ldmia $b_ptr!,{r4-r11} @ copy in2_x
add r3,sp,#$in2_x
- orr r12,r4,r5
- orr r12,r12,r6
- orr r12,r12,r7
- orr r12,r12,r8
- orr r12,r12,r9
- orr r12,r12,r10
- orr r12,r12,r11
stmia r3!,{r4-r11}
- ldmia $b_ptr!,{r4-r11}
- orr r12,r12,r4
- orr r12,r12,r5
+ ldmia $b_ptr!,{r4-r11} @ copy in2_y
+ stmia r3!,{r4-r11}
+ ldmia $b_ptr,{r4-r11} @ copy in2_z
+ orr r12,r4,r5
orr r12,r12,r6
orr r12,r12,r7
orr r12,r12,r8
orr r12,r12,r9
orr r12,r12,r10
orr r12,r12,r11
- stmia r3!,{r4-r11}
- ldmia $b_ptr,{r4-r11}
cmp r12,#0
#ifdef __thumb2__
it ne
stmia r3,{r4-r11}
str r12,[sp,#32*18+8] @ !in2infty
- ldmia $a_ptr!,{r4-r11} @ copy in1
+ ldmia $a_ptr!,{r4-r11} @ copy in1_x
add r3,sp,#$in1_x
- orr r12,r4,r5
- orr r12,r12,r6
- orr r12,r12,r7
- orr r12,r12,r8
- orr r12,r12,r9
- orr r12,r12,r10
- orr r12,r12,r11
stmia r3!,{r4-r11}
- ldmia $a_ptr!,{r4-r11}
- orr r12,r12,r4
- orr r12,r12,r5
+ ldmia $a_ptr!,{r4-r11} @ copy in1_y
+ stmia r3!,{r4-r11}
+ ldmia $a_ptr,{r4-r11} @ copy in1_z
+ orr r12,r4,r5
orr r12,r12,r6
orr r12,r12,r7
orr r12,r12,r8
orr r12,r12,r9
orr r12,r12,r10
orr r12,r12,r11
- stmia r3!,{r4-r11}
- ldmia $a_ptr,{r4-r11}
cmp r12,#0
#ifdef __thumb2__
it ne
$code.=<<___;
.Ladd_done:
add sp,sp,#32*18+16+16 @ +16 means "skip even over saved r0-r3"
-#if __ARM_ARCH__>=5 || defined(__thumb__)
+#if __ARM_ARCH__>=5 || !defined(__thumb__)
ldmia sp!,{r4-r12,pc}
#else
ldmia sp!,{r4-r12,lr}
stmdb sp!,{r0-r12,lr} @ push from r0, unusual, but intentional
sub sp,sp,#32*15
- ldmia $a_ptr!,{r4-r11} @ copy in1
+ ldmia $a_ptr!,{r4-r11} @ copy in1_x
add r3,sp,#$in1_x
- orr r12,r4,r5
- orr r12,r12,r6
- orr r12,r12,r7
- orr r12,r12,r8
- orr r12,r12,r9
- orr r12,r12,r10
- orr r12,r12,r11
stmia r3!,{r4-r11}
- ldmia $a_ptr!,{r4-r11}
- orr r12,r12,r4
- orr r12,r12,r5
+ ldmia $a_ptr!,{r4-r11} @ copy in1_y
+ stmia r3!,{r4-r11}
+ ldmia $a_ptr,{r4-r11} @ copy in1_z
+ orr r12,r4,r5
orr r12,r12,r6
orr r12,r12,r7
orr r12,r12,r8
orr r12,r12,r9
orr r12,r12,r10
orr r12,r12,r11
- stmia r3!,{r4-r11}
- ldmia $a_ptr,{r4-r11}
cmp r12,#0
#ifdef __thumb2__
it ne
stmia r3,{r4-r11}
str r12,[sp,#32*15+4] @ !in1infty
- ldmia $b_ptr!,{r4-r11} @ copy in2
+ ldmia $b_ptr!,{r4-r11} @ copy in2_x
add r3,sp,#$in2_x
orr r12,r4,r5
orr r12,r12,r6
orr r12,r12,r10
orr r12,r12,r11
stmia r3!,{r4-r11}
- ldmia $b_ptr!,{r4-r11}
+ ldmia $b_ptr!,{r4-r11} @ copy in2_y
orr r12,r12,r4
orr r12,r12,r5
orr r12,r12,r6