X-Git-Url: https://git.openssl.org/gitweb/?p=openssl.git;a=blobdiff_plain;f=crypto%2Fbn%2Fasm%2Fvms.mar;h=2a752489f551b66534a64f8fd72b4c9b9f509f7a;hp=754ab5347a47839dd985b27385712d668fbdce0f;hb=6ab285bf4c9e0f48f3e8af1ad952cd5224c7d461;hpb=73343ac38abfbe34cf889e9da286d5c577ce7973

diff --git a/crypto/bn/asm/vms.mar b/crypto/bn/asm/vms.mar
index 754ab5347a..2a752489f5 100644
--- a/crypto/bn/asm/vms.mar
+++ b/crypto/bn/asm/vms.mar
@@ -1,4 +1,4 @@
-	.title	vax_bn_mul_add_word  unsigned multiply & add, 32*32+32+32=>64
+	.title	vax_bn_mul_add_words  unsigned multiply & add, 32*32+32+32=>64
 ;
 ; w.j.m. 15-jan-1999
 ;
@@ -59,7 +59,7 @@ w=16 ;(AP)	w	by value (input)
 	movl	r6,r0			; return c
 	ret
 
-	.title	vax_bn_mul_word  unsigned multiply & add, 32*32+32=>64
+	.title	vax_bn_mul_words  unsigned multiply & add, 32*32+32=>64
 ;
 ; w.j.m. 15-jan-1999
 ;
@@ -172,145 +172,148 @@ n=12 ;(AP)	n	by value (input)
 ; }
 ;
 ; Using EDIV would be very easy, if it didn't do signed calculations.
-; Therefore, som extra things have to happen around it.  The way to
-; handle that is to shift all operands right one step (basically dividing
-; them by 2) and handle the different cases depending on what the lowest
-; bit of each operand was.
+; Any time, any of the input numbers are signed, there are problems,
+; usually with integer overflow, at which point it returns useless
+; data (the quotient gets the value of l, and the remainder becomes 0).
 ;
-; To start with, let's define the following:
+; If it was just for the dividend, it would be very easy, just divide
+; it by 2 (unsigned), do the division, multiply the resulting quotient
+; and remainder by 2, add the bit that was dropped when dividing by 2
+; to the remainder, and do some adjustment so the remainder doesn't
+; end up larger than the divisor.  This method works as long as the
+; divisor is positive, so we'll keep that (with a small adjustment)
+; as the main method.
+; For some cases when the divisor is negative (from EDIV's point of
+; view, i.e. when the highest bit is set), dividing the dividend by
+; 2 isn't enough, it needs to be divided by 4.  Furthermore, the
+; divisor needs to be divided by 2 (unsigned) as well, to avoid more
+; problems with the sign.  In this case, a little extra fiddling with
+; the remainder is required.
 ;
-; a' = l & 1
-; a2 = <h,l> >> 1	# UNSIGNED shift!
-; b' = d & 1
-; b2 = d >> 1		# UNSIGNED shift!
+; So, the simplest way to handle this is always to divide the dividend
+; by 4, and to divide the divisor by 2 if it's highest bit is set.
+; After EDIV has been used, the quotient gets multiplied by 4 if the
+; original divisor was positive, otherwise 2.  The remainder, oddly
+; enough, is *always* multiplied by 4.
 ;
-; Now, use EDIV to calculate a quotient and a remainder:
+; The routine ends with comparing the resulting remainder with the
+; original divisor and if the remainder is larger, subtract the
+; original divisor from it, and increase the quotient by 1.  This is
+; done until the remainder is smaller than the divisor.
 ;
-; q'' = a2/b2
-; r'' = a2 - q''*b2
+; The complete algorithm looks like this:
 ;
-; If b' is 0, the quotient is already correct, we just need to adjust the
-; remainder:
+; d'    = d
+; l'    = l & 3
+; [h,l] = [h,l] >> 2
+; [q,r] = floor([h,l] / d)	# This is the EDIV operation
+; if (q < 0) q = -q		# I doubt this is necessary any more
 ;
-; if (b' == 0)
-;   {
-;     r = 2*r'' + a'
-;     q = q''
-;   }
-;
-; If b' is 1, we need to do other adjustements.  The first thought is the
-; following (note that r' will not always have the right value, but an
-; adjustement follows further down):
+; r'    = r >> 30
+; if (d' >= 0) q = q << 1
+; q     = q << 1
+; r     = (r << 2) + l'
 ;
-; if (b' == 1)
+; if (d' < 0)
 ;   {
-;     q' = q''
-;     r' = a - q'*b
-;
-; However, one can note the folowing relationship:
-;
-;                         r'' = a2 - q''*b2
-;                  =>   2*r'' = 2*a2 - 2*q''*b2
-;                             = { a = 2*a2 + a', b = 2*b2 + b' = 2*b2 + 1,
-;                                 q' = q'' }
-;                             = a - a' - q'*(b - 1)
-;                             = a - q'*b - a' + q'
-;                             = r' - a' + q'
-;                  =>     r'  = 2*r'' - q' + a'
-;
-; This enables us to use r'' instead of discarding and calculating another
-; modulo:
-;
-; if (b' == 1)
-;   {
-;     q' = q''
-;     r' = (r'' << 1) - q' + a'
-;
-; Now, all we have to do is adjust r', because it might be < 0:
-;
-;     while (r' < 0)
+;     [r',r] = [r',r] - q
+;     while ([r',r] < 0)
 ;       {
-;         r' = r' + b
-;         q' = q' - 1
+;         [r',r] = [r',r] + d
+;         q = q - 1
 ;       }
 ;   }
 ;
-; return q'
+; while ([r',r] >= d)
+;   {
+;     [r',r] = [r',r] - d
+;     q = q + 1
+;   }
+;
+; return q
 
 h=4 ;(AP)	h	by value (input)
 l=8 ;(AP)	l	by value (input)
 d=12 ;(AP)	d	by value (input)
 
-;aprim=r5
-;a2=r6
-;a20=r6
-;a21=r7
-;bprim=r8
-;b2=r9
-;qprim=r10	; initially used as q''
-;rprim=r11	; initially used as r''
+;lprim=r5
+;rprim=r6
+;dprim=r7
 
 
 	.psect	code,nowrt
 
-.entry	bn_div_words,^m<r2,r3,r4,r5,r6,r7,r8,r9,r10,r11>
+.entry	bn_div_words,^m<r2,r3,r4,r5,r6,r7>
 	movl	l(ap),r2
 	movl	h(ap),r3
 	movl	d(ap),r4
 
-	movl	#0,r5
-	movl	#0,r8
-	movl	#0,r0
-
-	rotl	#-1,r2,r6	; a20 = l >> 1 (almost)
-	rotl	#-1,r3,r7	; a21 = h >> 1 (almost)
-	rotl	#-1,r4,r9	; b2 = d >> 1 (almost)
-
-	tstl	r6
-	bgeq	1$
-	xorl2	#^X80000000,r6	; fixup a20 so highest bit is 0
-	incl	r5		; a' = 1
-1$:
+	bicl3	#^XFFFFFFFC,r2,r5 ; l' = l & 3
+	bicl3	#^X00000003,r2,r2
+
+	bicl3	#^XFFFFFFFC,r3,r6
+	bicl3	#^X00000003,r3,r3
+        
+	addl	r6,r2
+	rotl	#-2,r2,r2	; l = l >> 2
+	rotl	#-2,r3,r3	; h = h >> 2
+                
+	movl	#0,r6
+	movl	r4,r7		; d' = d
+
+	tstl	r4
+	beql	666$		; Uh-oh, the divisor is 0...
+	bgtr	1$
+	rotl	#-1,r4,r4	; If d is negative, shift it right.
+	bicl2	#^X80000000,r4	; Since d is then a large number, the
+				; lowest bit is insignificant
+				; (contradict that, and I'll fix the problem!)
+1$:     
+	ediv	r4,r2,r2,r3	; Do the actual division
+
+	tstl	r2
+	bgeq	3$
+	mnegl	r2,r2		; if q < 0, negate it
+3$:     
 	tstl	r7
-	bgeq	2$
-	xorl2	#^X80000000,r6	; fixup a20 so highest bit is 1,
-				; since that's what was lowest in a21
-	xorl2	#^X80000000,r7	; fixup a21 so highest bit is 1
-2$:
-	tstl	r9
-	bgeq	666$		; Uh-oh, the divisor is 0...
-	bgtr	3$
-	xorl2	#^X80000000,r9	; fixup b2 so highest bit is 1
-	incl	r8
-3$:
-	tstl	r9
-	bneq	4$		; if b2 is 0, we know that bprim is 1
+	blss	4$
+	ashl	#1,r2,r2	; q = q << 1
+4$:     
+	ashl	#1,r2,r2	; q = q << 1
+	rotl	#2,r3,r3	; r = r << 2
+	bicl3	#^XFFFFFFFC,r3,r6 ; r' gets the high bits from r
+	bicl3	#^X00000003,r3,r3
+	addl	r5,r3		; r = r + l'
+
 	tstl	r7
-	bneq	666$		; if higher half isn't 0, we overflow
-	movl	r0,r6		; otherwise, we have our result
-	brb	42$
-4$:
-	ediv	r9,r6,r10,r11
-
-	tstl	r8
-	bneq	5$		; If b' != 0, go to the other part
-;	addl3	r11,r11,r1
-;	addl2	r5,r1
-	brb	42$
+	bgeq	5$
+	bitl	#1,r7
+	beql	5$		; if d < 0 && d & 1
+	subl	r2,r3		;   [r',r] = [r',r] - q
+	sbwc	#0,r6
+45$:
+	bgeq	5$		;   while r < 0
+	decl	r2		;     q = q - 1
+	addl	r7,r3		;     [r',r] = [r',r] + d
+	adwc	#0,r6
+	brb	45$
+
 5$:
-	ashl	#1,r11,r11
-	subl2	r10,r11
-	addl2	r5,r11
-	bgeq	7$
+	tstl	r6
+	bneq	6$
+	cmpl	r3,r7
+	blssu	42$		; while [r',r] >= d'
 6$:
-	decl	r10
-	addl2	r4,r11
-	blss	6$
-7$:
-;	movl	r11,r1
+	subl	r7,r3		;   [r',r] = [r',r] - d
+	sbwc	#0,r6
+	incl	r2		;   q = q + 1
+	brb	5$	
 42$:
-	movl	r10,r0
+;	movl	r3,r1
+	movl	r2,r0
+	ret
 666$:
+	movl	#^XFFFFFFFF,r0
 	ret
 
 	.title	vax_bn_add_words  unsigned add of two arrays