X-Git-Url: https://git.openssl.org/gitweb/?p=openssl.git;a=blobdiff_plain;f=crypto%2Fbn%2Fasm%2Fvms.mar;h=2a752489f551b66534a64f8fd72b4c9b9f509f7a;hp=754ab5347a47839dd985b27385712d668fbdce0f;hb=6ab285bf4c9e0f48f3e8af1ad952cd5224c7d461;hpb=73343ac38abfbe34cf889e9da286d5c577ce7973 diff --git a/crypto/bn/asm/vms.mar b/crypto/bn/asm/vms.mar index 754ab5347a..2a752489f5 100644 --- a/crypto/bn/asm/vms.mar +++ b/crypto/bn/asm/vms.mar @@ -1,4 +1,4 @@ - .title vax_bn_mul_add_word unsigned multiply & add, 32*32+32+32=>64 + .title vax_bn_mul_add_words unsigned multiply & add, 32*32+32+32=>64 ; ; w.j.m. 15-jan-1999 ; @@ -59,7 +59,7 @@ w=16 ;(AP) w by value (input) movl r6,r0 ; return c ret - .title vax_bn_mul_word unsigned multiply & add, 32*32+32=>64 + .title vax_bn_mul_words unsigned multiply & add, 32*32+32=>64 ; ; w.j.m. 15-jan-1999 ; @@ -172,145 +172,148 @@ n=12 ;(AP) n by value (input) ; } ; ; Using EDIV would be very easy, if it didn't do signed calculations. -; Therefore, som extra things have to happen around it. The way to -; handle that is to shift all operands right one step (basically dividing -; them by 2) and handle the different cases depending on what the lowest -; bit of each operand was. +; Any time, any of the input numbers are signed, there are problems, +; usually with integer overflow, at which point it returns useless +; data (the quotient gets the value of l, and the remainder becomes 0). ; -; To start with, let's define the following: +; If it was just for the dividend, it would be very easy, just divide +; it by 2 (unsigned), do the division, multiply the resulting quotient +; and remainder by 2, add the bit that was dropped when dividing by 2 +; to the remainder, and do some adjustment so the remainder doesn't +; end up larger than the divisor. This method works as long as the +; divisor is positive, so we'll keep that (with a small adjustment) +; as the main method. +; For some cases when the divisor is negative (from EDIV's point of +; view, i.e. when the highest bit is set), dividing the dividend by +; 2 isn't enough, it needs to be divided by 4. Furthermore, the +; divisor needs to be divided by 2 (unsigned) as well, to avoid more +; problems with the sign. In this case, a little extra fiddling with +; the remainder is required. ; -; a' = l & 1 -; a2 = >> 1 # UNSIGNED shift! -; b' = d & 1 -; b2 = d >> 1 # UNSIGNED shift! +; So, the simplest way to handle this is always to divide the dividend +; by 4, and to divide the divisor by 2 if it's highest bit is set. +; After EDIV has been used, the quotient gets multiplied by 4 if the +; original divisor was positive, otherwise 2. The remainder, oddly +; enough, is *always* multiplied by 4. ; -; Now, use EDIV to calculate a quotient and a remainder: +; The routine ends with comparing the resulting remainder with the +; original divisor and if the remainder is larger, subtract the +; original divisor from it, and increase the quotient by 1. This is +; done until the remainder is smaller than the divisor. ; -; q'' = a2/b2 -; r'' = a2 - q''*b2 +; The complete algorithm looks like this: ; -; If b' is 0, the quotient is already correct, we just need to adjust the -; remainder: +; d' = d +; l' = l & 3 +; [h,l] = [h,l] >> 2 +; [q,r] = floor([h,l] / d) # This is the EDIV operation +; if (q < 0) q = -q # I doubt this is necessary any more ; -; if (b' == 0) -; { -; r = 2*r'' + a' -; q = q'' -; } -; -; If b' is 1, we need to do other adjustements. The first thought is the -; following (note that r' will not always have the right value, but an -; adjustement follows further down): +; r' = r >> 30 +; if (d' >= 0) q = q << 1 +; q = q << 1 +; r = (r << 2) + l' ; -; if (b' == 1) +; if (d' < 0) ; { -; q' = q'' -; r' = a - q'*b -; -; However, one can note the folowing relationship: -; -; r'' = a2 - q''*b2 -; => 2*r'' = 2*a2 - 2*q''*b2 -; = { a = 2*a2 + a', b = 2*b2 + b' = 2*b2 + 1, -; q' = q'' } -; = a - a' - q'*(b - 1) -; = a - q'*b - a' + q' -; = r' - a' + q' -; => r' = 2*r'' - q' + a' -; -; This enables us to use r'' instead of discarding and calculating another -; modulo: -; -; if (b' == 1) -; { -; q' = q'' -; r' = (r'' << 1) - q' + a' -; -; Now, all we have to do is adjust r', because it might be < 0: -; -; while (r' < 0) +; [r',r] = [r',r] - q +; while ([r',r] < 0) ; { -; r' = r' + b -; q' = q' - 1 +; [r',r] = [r',r] + d +; q = q - 1 ; } ; } ; -; return q' +; while ([r',r] >= d) +; { +; [r',r] = [r',r] - d +; q = q + 1 +; } +; +; return q h=4 ;(AP) h by value (input) l=8 ;(AP) l by value (input) d=12 ;(AP) d by value (input) -;aprim=r5 -;a2=r6 -;a20=r6 -;a21=r7 -;bprim=r8 -;b2=r9 -;qprim=r10 ; initially used as q'' -;rprim=r11 ; initially used as r'' +;lprim=r5 +;rprim=r6 +;dprim=r7 .psect code,nowrt -.entry bn_div_words,^m +.entry bn_div_words,^m movl l(ap),r2 movl h(ap),r3 movl d(ap),r4 - movl #0,r5 - movl #0,r8 - movl #0,r0 - - rotl #-1,r2,r6 ; a20 = l >> 1 (almost) - rotl #-1,r3,r7 ; a21 = h >> 1 (almost) - rotl #-1,r4,r9 ; b2 = d >> 1 (almost) - - tstl r6 - bgeq 1$ - xorl2 #^X80000000,r6 ; fixup a20 so highest bit is 0 - incl r5 ; a' = 1 -1$: + bicl3 #^XFFFFFFFC,r2,r5 ; l' = l & 3 + bicl3 #^X00000003,r2,r2 + + bicl3 #^XFFFFFFFC,r3,r6 + bicl3 #^X00000003,r3,r3 + + addl r6,r2 + rotl #-2,r2,r2 ; l = l >> 2 + rotl #-2,r3,r3 ; h = h >> 2 + + movl #0,r6 + movl r4,r7 ; d' = d + + tstl r4 + beql 666$ ; Uh-oh, the divisor is 0... + bgtr 1$ + rotl #-1,r4,r4 ; If d is negative, shift it right. + bicl2 #^X80000000,r4 ; Since d is then a large number, the + ; lowest bit is insignificant + ; (contradict that, and I'll fix the problem!) +1$: + ediv r4,r2,r2,r3 ; Do the actual division + + tstl r2 + bgeq 3$ + mnegl r2,r2 ; if q < 0, negate it +3$: tstl r7 - bgeq 2$ - xorl2 #^X80000000,r6 ; fixup a20 so highest bit is 1, - ; since that's what was lowest in a21 - xorl2 #^X80000000,r7 ; fixup a21 so highest bit is 1 -2$: - tstl r9 - bgeq 666$ ; Uh-oh, the divisor is 0... - bgtr 3$ - xorl2 #^X80000000,r9 ; fixup b2 so highest bit is 1 - incl r8 -3$: - tstl r9 - bneq 4$ ; if b2 is 0, we know that bprim is 1 + blss 4$ + ashl #1,r2,r2 ; q = q << 1 +4$: + ashl #1,r2,r2 ; q = q << 1 + rotl #2,r3,r3 ; r = r << 2 + bicl3 #^XFFFFFFFC,r3,r6 ; r' gets the high bits from r + bicl3 #^X00000003,r3,r3 + addl r5,r3 ; r = r + l' + tstl r7 - bneq 666$ ; if higher half isn't 0, we overflow - movl r0,r6 ; otherwise, we have our result - brb 42$ -4$: - ediv r9,r6,r10,r11 - - tstl r8 - bneq 5$ ; If b' != 0, go to the other part -; addl3 r11,r11,r1 -; addl2 r5,r1 - brb 42$ + bgeq 5$ + bitl #1,r7 + beql 5$ ; if d < 0 && d & 1 + subl r2,r3 ; [r',r] = [r',r] - q + sbwc #0,r6 +45$: + bgeq 5$ ; while r < 0 + decl r2 ; q = q - 1 + addl r7,r3 ; [r',r] = [r',r] + d + adwc #0,r6 + brb 45$ + 5$: - ashl #1,r11,r11 - subl2 r10,r11 - addl2 r5,r11 - bgeq 7$ + tstl r6 + bneq 6$ + cmpl r3,r7 + blssu 42$ ; while [r',r] >= d' 6$: - decl r10 - addl2 r4,r11 - blss 6$ -7$: -; movl r11,r1 + subl r7,r3 ; [r',r] = [r',r] - d + sbwc #0,r6 + incl r2 ; q = q + 1 + brb 5$ 42$: - movl r10,r0 +; movl r3,r1 + movl r2,r0 + ret 666$: + movl #^XFFFFFFFF,r0 ret .title vax_bn_add_words unsigned add of two arrays