- .title vax_bn_mul_add_word unsigned multiply & add, 32*32+32+32=>64
+ .title vax_bn_mul_add_words unsigned multiply & add, 32*32+32+32=>64
;
; w.j.m. 15-jan-1999
;
movl r6,r0 ; return c
ret
\f
- .title vax_bn_mul_word unsigned multiply & add, 32*32+32=>64
+ .title vax_bn_mul_words unsigned multiply & add, 32*32+32=>64
;
; w.j.m. 15-jan-1999
;
movl #1,r0 ; return SS$_NORMAL
ret
\f
- .title (generated)
-
- .psect code,nowrt
-
-.entry BN_DIV_WORDS,^m<r2,r3,r4,r5,r6,r7,r8,r9,r10>
- subl2 #4,sp
-
- clrl r9
- movl #2,r8
-
- tstl 12(ap)
- bneq noname.2
- mnegl #1,r10
- brw noname.3
- tstl r0
- nop
-noname.2:
-
- pushl 12(ap)
- calls #1,BN_NUM_BITS_WORD
- movl r0,r7
-
- cmpl r7,#32
- beql noname.4
- ashl r7,#1,r2
- cmpl 4(ap),r2
- blequ noname.4
-
- pushl r7
- calls #1,BN_DIV_WORDS_ABORT
-noname.4:
-
- subl3 r7,#32,r7
-
- movl 12(ap),r2
- cmpl 4(ap),r2
- blssu noname.5
- subl2 r2,4(ap)
-noname.5:
-
- tstl r7
- beql noname.6
-
- ashl r7,r2,12(ap)
-
- ashl r7,4(ap),r4
- subl3 r7,#32,r3
- subl3 r3,#32,r2
- extzv r3,r2,8(ap),r2
- bisl3 r4,r2,4(ap)
-
- ashl r7,8(ap),8(ap)
-noname.6:
-
- bicl3 #65535,12(ap),r2
- extzv #16,#16,r2,r5
-
- bicl3 #-65536,12(ap),r6
-
-noname.7:
-
- moval 4(ap),r2
- movzwl 2(r2),r0
- cmpl r0,r5
- bneq noname.8
-
- movzwl #65535,r4
- brb noname.9
-noname.8:
-
- clrl r1
- movl (r2),r0
- movl r5,r2
- bgeq vcg.1
- cmpl r2,r0
- bgtru vcg.2
- incl r1
- brb vcg.2
- nop
-vcg.1:
- ediv r2,r0,r1,r0
-vcg.2:
- movl r1,r4
-noname.9:
-
-noname.10:
-
- mull3 r5,r4,r0
- subl3 r0,4(ap),r3
-
- bicl3 #65535,r3,r0
- bneq noname.13
- mull3 r6,r4,r2
- ashl #16,r3,r1
- bicl3 #65535,8(ap),r0
- extzv #16,#16,r0,r0
- addl2 r0,r1
- cmpl r2,r1
- bgtru noname.12
-noname.11:
-
- brb noname.13
- nop
-noname.12:
-
- decl r4
- brb noname.10
-noname.13:
-
- mull3 r5,r4,r1
-
- mull3 r6,r4,r0
-
- extzv #16,#16,r0,r3
-
- ashl #16,r0,r2
- bicl3 #65535,r2,r0
-
- addl2 r3,r1
-
- moval 8(ap),r3
- cmpl (r3),r0
- bgequ noname.15
- incl r1
-noname.15:
-
- subl2 r0,(r3)
-
- cmpl 4(ap),r1
- bgequ noname.16
-
- addl2 12(ap),4(ap)
-
- decl r4
-noname.16:
-
- subl2 r1,4(ap)
-
- decl r8
- beql noname.18
-noname.17:
-
- ashl #16,r4,r9
-
- ashl #16,4(ap),r2
- movzwl 2(r3),r0
- bisl2 r0,r2
- bicl3 #0,r2,4(ap)
-
- bicl3 #-65536,(r3),r0
- ashl #16,r0,(r3)
- brw noname.7
- nop
-noname.18:
+ .title vax_bn_div_words unsigned divide
+;
+; Richard Levitte 20-Nov-2000
+;
+; ULONG bn_div_words(ULONG h, ULONG l, ULONG d)
+; {
+; return ((ULONG)((((ULLONG)h)<<32)|l) / (ULLONG)d);
+; }
+;
+; Using EDIV would be very easy, if it didn't do signed calculations.
+; Any time, any of the input numbers are signed, there are problems,
+; usually with integer overflow, at which point it returns useless
+; data (the quotient gets the value of l, and the remainder becomes 0).
+;
+; If it was just for the dividend, it would be very easy, just divide
+; it by 2 (unsigned), do the division, multiply the resulting quotient
+; and remainder by 2, add the bit that was dropped when dividing by 2
+; to the remainder, and do some adjustment so the remainder doesn't
+; end up larger than the divisor. This method works as long as the
+; divisor is positive, so we'll keep that (with a small adjustment)
+; as the main method.
+; For some cases when the divisor is negative (from EDIV's point of
+; view, i.e. when the highest bit is set), dividing the dividend by
+; 2 isn't enough, it needs to be divided by 4. Furthermore, the
+; divisor needs to be divided by 2 (unsigned) as well, to avoid more
+; problems with the sign. In this case, a little extra fiddling with
+; the remainder is required.
+;
+; So, the simplest way to handle this is always to divide the dividend
+; by 4, and to divide the divisor by 2 if it's highest bit is set.
+; After EDIV has been used, the quotient gets multiplied by 4 if the
+; original divisor was positive, otherwise 2. The remainder, oddly
+; enough, is *always* multiplied by 4.
+;
+; The routine ends with comparing the resulting remainder with the
+; original divisor and if the remainder is larger, subtract the
+; original divisor from it, and increase the quotient by 1. This is
+; done until the remainder is smaller than the divisor.
+;
+; The complete algorithm looks like this:
+;
+; d' = d
+; l' = l & 3
+; [h,l] = [h,l] >> 2
+; [q,r] = floor([h,l] / d) # This is the EDIV operation
+; if (q < 0) q = -q # I doubt this is necessary any more
+;
+; r' = r >> 30
+; if (d' >= 0) q = q << 1
+; q = q << 1
+; r = (r << 2) + l'
+;
+; if (d' < 0)
+; {
+; [r',r] = [r',r] - q
+; while ([r',r] < 0)
+; {
+; [r',r] = [r',r] + d
+; q = q - 1
+; }
+; }
+;
+; while ([r',r] >= d)
+; {
+; [r',r] = [r',r] - d
+; q = q + 1
+; }
+;
+; return q
- bisl2 r4,r9
+h=4 ;(AP) h by value (input)
+l=8 ;(AP) l by value (input)
+d=12 ;(AP) d by value (input)
- movl r9,r10
+;lprim=r5
+;rprim=r6
+;dprim=r7
-noname.3:
- movl r10,r0
- ret
- tstl r0
-\f
.psect code,nowrt
-.entry BN_ADD_WORDS,^m<r2,r3,r4,r5,r6,r7>
-
- tstl 16(ap)
- bgtr noname.21
- clrl r7
- brw noname.22
-noname.21:
-
- clrl r4
-
- tstl r0
-noname.23:
-
- movl 8(ap),r6
- addl3 r4,(r6),r2
-
- bicl2 #0,r2
-
- clrl r0
- cmpl r2,r4
- bgequ vcg.3
- incl r0
-vcg.3:
- movl r0,r4
-
- movl 12(ap),r5
- addl3 (r5),r2,r1
- bicl2 #0,r1
-
- clrl r0
- cmpl r1,r2
- bgequ vcg.4
- incl r0
-vcg.4:
- addl2 r0,r4
-
- movl 4(ap),r3
- movl r1,(r3)
-
- decl 16(ap)
- bgtr gen.1
- brw noname.25
-gen.1:
-noname.24:
-
- addl3 r4,4(r6),r2
-
- bicl2 #0,r2
-
- clrl r0
- cmpl r2,r4
- bgequ vcg.5
- incl r0
-vcg.5:
- movl r0,r4
-
- addl3 4(r5),r2,r1
- bicl2 #0,r1
-
- clrl r0
- cmpl r1,r2
- bgequ vcg.6
- incl r0
-vcg.6:
- addl2 r0,r4
-
- movl r1,4(r3)
-
- decl 16(ap)
- bleq noname.25
-noname.26:
-
- addl3 r4,8(r6),r2
-
- bicl2 #0,r2
-
- clrl r0
- cmpl r2,r4
- bgequ vcg.7
- incl r0
-vcg.7:
- movl r0,r4
-
- addl3 8(r5),r2,r1
- bicl2 #0,r1
-
- clrl r0
- cmpl r1,r2
- bgequ vcg.8
- incl r0
-vcg.8:
- addl2 r0,r4
-
- movl r1,8(r3)
-
- decl 16(ap)
- bleq noname.25
-noname.27:
-
- addl3 r4,12(r6),r2
+.entry bn_div_words,^m<r2,r3,r4,r5,r6,r7>
+ movl l(ap),r2
+ movl h(ap),r3
+ movl d(ap),r4
+
+ bicl3 #^XFFFFFFFC,r2,r5 ; l' = l & 3
+ bicl3 #^X00000003,r2,r2
+
+ bicl3 #^XFFFFFFFC,r3,r6
+ bicl3 #^X00000003,r3,r3
+
+ addl r6,r2
+ rotl #-2,r2,r2 ; l = l >> 2
+ rotl #-2,r3,r3 ; h = h >> 2
+
+ movl #0,r6
+ movl r4,r7 ; d' = d
- bicl2 #0,r2
+ tstl r4
+ beql 666$ ; Uh-oh, the divisor is 0...
+ bgtr 1$
+ rotl #-1,r4,r4 ; If d is negative, shift it right.
+ bicl2 #^X80000000,r4 ; Since d is then a large number, the
+ ; lowest bit is insignificant
+ ; (contradict that, and I'll fix the problem!)
+1$:
+ ediv r4,r2,r2,r3 ; Do the actual division
+
+ tstl r2
+ bgeq 3$
+ mnegl r2,r2 ; if q < 0, negate it
+3$:
+ tstl r7
+ blss 4$
+ ashl #1,r2,r2 ; q = q << 1
+4$:
+ ashl #1,r2,r2 ; q = q << 1
+ rotl #2,r3,r3 ; r = r << 2
+ bicl3 #^XFFFFFFFC,r3,r6 ; r' gets the high bits from r
+ bicl3 #^X00000003,r3,r3
+ addl r5,r3 ; r = r + l'
- clrl r0
- cmpl r2,r4
- bgequ vcg.9
- incl r0
-vcg.9:
- movl r0,r4
-
- addl3 12(r5),r2,r1
- bicl2 #0,r1
-
- clrl r0
- cmpl r1,r2
- bgequ vcg.10
- incl r0
-vcg.10:
- addl2 r0,r4
+ tstl r7
+ bgeq 5$
+ bitl #1,r7
+ beql 5$ ; if d < 0 && d & 1
+ subl r2,r3 ; [r',r] = [r',r] - q
+ sbwc #0,r6
+45$:
+ bgeq 5$ ; while r < 0
+ decl r2 ; q = q - 1
+ addl r7,r3 ; [r',r] = [r',r] + d
+ adwc #0,r6
+ brb 45$
+
+5$:
+ tstl r6
+ bneq 6$
+ cmpl r3,r7
+ blssu 42$ ; while [r',r] >= d'
+6$:
+ subl r7,r3 ; [r',r] = [r',r] - d
+ sbwc #0,r6
+ incl r2 ; q = q + 1
+ brb 5$
+42$:
+; movl r3,r1
+ movl r2,r0
+ ret
+666$:
+ movl #^XFFFFFFFF,r0
+ ret
+\f
+ .title vax_bn_add_words unsigned add of two arrays
+;
+; Richard Levitte 20-Nov-2000
+;
+; ULONG bn_add_words(ULONG r[], ULONG a[], ULONG b[], int n) {
+; ULONG c = 0;
+; int i;
+; for (i = 0; i < n; i++) <c,r[i]> = a[i] + b[i] + c;
+; return(c);
+; }
- movl r1,12(r3)
+r=4 ;(AP) r by reference (output)
+a=8 ;(AP) a by reference (input)
+b=12 ;(AP) b by reference (input)
+n=16 ;(AP) n by value (input)
- decl 16(ap)
- bleq noname.25
-noname.28:
- addl3 #16,r6,8(ap)
+ .psect code,nowrt
- addl3 #16,r5,12(ap)
+.entry bn_add_words,^m<r2,r3,r4,r5,r6>
- addl3 #16,r3,4(ap)
- brw noname.23
- tstl r0
-noname.25:
+ moval @r(ap),r2
+ moval @a(ap),r3
+ moval @b(ap),r4
+ movl n(ap),r5 ; assumed >0 by C code
+ clrl r0 ; c
- movl r4,r7
+ tstl r5 ; carry = 0
+ bleq 666$
-noname.22:
- movl r7,r0
- ret
- nop
+0$:
+ movl (r3)+,r6 ; carry untouched
+ adwc (r4)+,r6 ; carry used and touched
+ movl r6,(r2)+ ; carry untouched
+ sobgtr r5,0$ ; carry untouched
+ adwc #0,r0
+666$:
+ ret
\f
+ .title vax_bn_sub_words unsigned add of two arrays
+;
+; Richard Levitte 20-Nov-2000
+;
+; ULONG bn_sub_words(ULONG r[], ULONG a[], ULONG b[], int n) {
+; ULONG c = 0;
+; int i;
+; for (i = 0; i < n; i++) <c,r[i]> = a[i] - b[i] - c;
+; return(c);
+; }
-;r=4 ;(AP)
-;a=8 ;(AP)
-;b=12 ;(AP)
-;n=16 ;(AP) n by value (input)
+r=4 ;(AP) r by reference (output)
+a=8 ;(AP) a by reference (input)
+b=12 ;(AP) b by reference (input)
+n=16 ;(AP) n by value (input)
- .psect code,nowrt
-.entry BN_SUB_WORDS,^m<r2,r3,r4,r5,r6,r7>
+ .psect code,nowrt
- clrl r6
+.entry bn_sub_words,^m<r2,r3,r4,r5,r6>
- tstl 16(ap)
- bgtr noname.31
- clrl r7
- brw noname.32
- tstl r0
-noname.31:
+ moval @r(ap),r2
+ moval @a(ap),r3
+ moval @b(ap),r4
+ movl n(ap),r5 ; assumed >0 by C code
+ clrl r0 ; c
-noname.33:
+ tstl r5 ; carry = 0
+ bleq 666$
- movl 8(ap),r5
- movl (r5),r1
- movl 12(ap),r4
- movl (r4),r2
-
- movl 4(ap),r3
- subl3 r2,r1,r0
- subl2 r6,r0
- bicl3 #0,r0,(r3)
-
- cmpl r1,r2
- beql noname.34
- clrl r0
- cmpl r1,r2
- bgequ vcg.11
- incl r0
-vcg.11:
- movl r0,r6
-noname.34:
-
- decl 16(ap)
- bgtr gen.2
- brw noname.36
-gen.2:
-noname.35:
-
- movl 4(r5),r2
- movl 4(r4),r1
-
- subl3 r1,r2,r0
- subl2 r6,r0
- bicl3 #0,r0,4(r3)
-
- cmpl r2,r1
- beql noname.37
- clrl r0
- cmpl r2,r1
- bgequ vcg.12
- incl r0
-vcg.12:
- movl r0,r6
-noname.37:
-
- decl 16(ap)
- bleq noname.36
-noname.38:
-
- movl 8(r5),r1
- movl 8(r4),r2
-
- subl3 r2,r1,r0
- subl2 r6,r0
- bicl3 #0,r0,8(r3)
-
- cmpl r1,r2
- beql noname.39
- clrl r0
- cmpl r1,r2
- bgequ vcg.13
- incl r0
-vcg.13:
- movl r0,r6
-noname.39:
-
- decl 16(ap)
- bleq noname.36
-noname.40:
-
- movl 12(r5),r1
- movl 12(r4),r2
-
- subl3 r2,r1,r0
- subl2 r6,r0
- bicl3 #0,r0,12(r3)
-
- cmpl r1,r2
- beql noname.41
- clrl r0
- cmpl r1,r2
- bgequ vcg.14
- incl r0
-vcg.14:
- movl r0,r6
-noname.41:
-
- decl 16(ap)
- bleq noname.36
-noname.42:
-
- addl3 #16,r5,8(ap)
-
- addl3 #16,r4,12(ap)
-
- addl3 #16,r3,4(ap)
- brw noname.33
- tstl r0
-noname.36:
-
- movl r6,r7
-
-noname.32:
- movl r7,r0
- ret
- nop
+0$:
+ movl (r3)+,r6 ; carry untouched
+ sbwc (r4)+,r6 ; carry used and touched
+ movl r6,(r2)+ ; carry untouched
+ sobgtr r5,0$ ; carry untouched
+ adwc #0,r0
+666$:
+ ret
\f
;r=4 ;(AP)
ret
; For now, the code below doesn't work, so I end this prematurely.
-.end
-
- .title vax_bn_div64 division 64/32=>32
-;
-; r.l. 16-jan-1998
-;
-; unsigned int bn_div64(unsigned long h, unsigned long l, unsigned long d)
-; return <h,l>/d;
-;
-
- .psect code,nowrt
-
-h=4 ;(AP) by value (input)
-l=8 ;(AP) by value (input)
-d=12 ;(AP) by value (input)
-
-.entry bn_div64,^m<r2,r3,r4,r5,r6,r7,r8,r9>
-
- movl l(ap),r2 ; l
- movl h(ap),r3 ; h
- movl d(ap),r4 ; d
- clrl r5 ; q
- clrl r6 ; r
-
- ; Treat "negative" specially
- tstl r3
- blss 30$
-
- tstl r4
- beql 90$
-
- ediv r4,r2,r5,r6
- bvs 666$
-
- movl r5,r0
- ret
-
-30$:
- ; The theory here is to do some harmless shifting and a little
- ; bit of rounding (brackets are to designate when decimals are
- ; cut off):
- ;
- ; result = 2 * [ ([<h,0>/2] + [d/2]) / d ] + [ l / d ]
-
- movl #0,r7
- movl r3,r8 ; copy h
- ashq #-1,r7,r7 ; [<h,0>/2] => <r8,r7>
- bicl2 #^X80000000,r8 ; Remove "sign"
-
- movl r4,r9 ; copy d
- ashl #-1,r9,r9 ; [d/2] => r9
- bicl2 #^X80000000,r9 ; Remove "sign"
-
- addl2 r9,r7
- adwc #0,r8 ; [<h,0>/2] + [d/2] => <r8,r7>
-
- ediv r4,r7,r5,r6 ; [ ([<h,0>/2] + [d/2]) / d ] => <r5,r6>
- bvs 666$
-
- movl #0,r6
- ashq #1,r5,r5 ; 2 * [ ([<h,0>/2] + [d/2]) / d ] => r5
-
- movl #0,r3
- ediv r4,r2,r8,r9 ; [ l / d ] => <r8,r9>
-
- addl2 r8,r5 ;
- bcs 666$
-
- movl r5,r0
- ret
-
-90$:
- movl #-1,r0
- ret
-
-666$:
-
-
.end