X-Git-Url: https://git.openssl.org/?p=openssl.git;a=blobdiff_plain;f=crypto%2Fbn%2Fasm%2Fvms.mar;h=465f2774b6267e0b6dd4e983bfc35926860e7d31;hp=ac9d57d7b0dbdeb37794c810d9f15fab0d020318;hb=81b5eeed6acda357d449dbbba83310881852874d;hpb=7d7d2cbcb02206f3393681f2bce198e11e2e185b;ds=inline diff --git a/crypto/bn/asm/vms.mar b/crypto/bn/asm/vms.mar index ac9d57d7b0..465f2774b6 100644 --- a/crypto/bn/asm/vms.mar +++ b/crypto/bn/asm/vms.mar @@ -162,442 +162,237 @@ n=12 ;(AP) n by value (input) movl #1,r0 ; return SS$_NORMAL ret - .title (generated) - - .psect code,nowrt - -.entry BN_DIV_WORDS,^m - subl2 #4,sp - - clrl r9 - movl #2,r8 - - tstl 12(ap) - bneq noname.2 - mnegl #1,r10 - brw noname.3 - tstl r0 - nop -noname.2: - - pushl 12(ap) - calls #1,BN_NUM_BITS_WORD - movl r0,r7 - - cmpl r7,#32 - beql noname.4 - ashl r7,#1,r2 - cmpl 4(ap),r2 - blequ noname.4 - - pushl r7 - calls #1,BN_DIV_WORDS_ABORT -noname.4: - - subl3 r7,#32,r7 - - movl 12(ap),r2 - cmpl 4(ap),r2 - blssu noname.5 - subl2 r2,4(ap) -noname.5: - - tstl r7 - beql noname.6 - - ashl r7,r2,12(ap) - - ashl r7,4(ap),r4 - subl3 r7,#32,r3 - subl3 r3,#32,r2 - extzv r3,r2,8(ap),r2 - bisl3 r4,r2,4(ap) - - ashl r7,8(ap),8(ap) -noname.6: - - bicl3 #65535,12(ap),r2 - extzv #16,#16,r2,r5 - - bicl3 #-65536,12(ap),r6 - -noname.7: - - moval 4(ap),r2 - movzwl 2(r2),r0 - cmpl r0,r5 - bneq noname.8 - - movzwl #65535,r4 - brb noname.9 -noname.8: - - clrl r1 - movl (r2),r0 - movl r5,r2 - bgeq vcg.1 - cmpl r2,r0 - bgtru vcg.2 - incl r1 - brb vcg.2 - nop -vcg.1: - ediv r2,r0,r1,r0 -vcg.2: - movl r1,r4 -noname.9: - -noname.10: - - mull3 r5,r4,r0 - subl3 r0,4(ap),r3 - - bicl3 #65535,r3,r0 - bneq noname.13 - mull3 r6,r4,r2 - ashl #16,r3,r1 - bicl3 #65535,8(ap),r0 - extzv #16,#16,r0,r0 - addl2 r0,r1 - cmpl r2,r1 - bgtru noname.12 -noname.11: - - brb noname.13 - nop -noname.12: - - decl r4 - brb noname.10 -noname.13: - - mull3 r5,r4,r1 - - mull3 r6,r4,r0 - - extzv #16,#16,r0,r3 - - ashl #16,r0,r2 - bicl3 #65535,r2,r0 - - addl2 r3,r1 - - moval 8(ap),r3 - cmpl (r3),r0 - bgequ noname.15 - incl r1 -noname.15: - - subl2 r0,(r3) - - cmpl 4(ap),r1 - bgequ noname.16 - - addl2 12(ap),4(ap) - - decl r4 -noname.16: - - subl2 r1,4(ap) - - decl r8 - beql noname.18 -noname.17: - - ashl #16,r4,r9 + .title vax_bn_div_words unsigned divide +; +; Richard Levitte 20-Nov-2000 +; +; ULONG bn_div_words(ULONG h, ULONG l, ULONG d) +; { +; return ((ULONG)((((ULLONG)h)<<32)|l) / (ULLONG)d); +; } +; +; Using EDIV would be very easy, if it didn't do signed calculations. +; Therefore, som extra things have to happen around it. The way to +; handle that is to shift all operands right one step (basically dividing +; them by 2) and handle the different cases depending on what the lowest +; bit of each operand was. +; +; To start with, let's define the following: +; +; a' = l & 1 +; a2 = >> 1 # UNSIGNED shift! +; b' = d & 1 +; b2 = d >> 1 # UNSIGNED shift! +; +; Now, use EDIV to calculate a quotient and a remainder: +; +; q'' = a2/b2 +; r'' = a2 - q''*b2 +; +; If b' is 0, the quotient is already correct, we just need to adjust the +; remainder: +; +; if (b' == 0) +; { +; r = 2*r'' + a' +; q = q'' +; } +; +; If b' is 1, we need to do other adjustements. The first thought is the +; following (note that r' will not always have the right value, but an +; adjustement follows further down): +; +; if (b' == 1) +; { +; q' = q'' +; r' = a - q'*b +; +; However, one can note the folowing relationship: +; +; r'' = a2 - q''*b2 +; => 2*r'' = 2*a2 - 2*q''*b2 +; = { a = 2*a2 + a', b = 2*b2 + b' = 2*b2 + 1, +; q' = q'' } +; = a - a' - q'*(b - 1) +; = a - q'*b - a' + q' +; = r' - a' + q' +; => r' = 2*r'' - q' + a' +; +; This enables us to use r'' instead of discarding and calculating another +; modulo: +; +; if (b' == 1) +; { +; q' = q'' +; r' = (r'' << 1) - q' + a' +; +; Now, all we have to do is adjust r', because it might be < 0: +; +; while (r' < 0) +; { +; r' = r' + b +; q' = q' - 1 +; } +; } +; +; return q' - ashl #16,4(ap),r2 - movzwl 2(r3),r0 - bisl2 r0,r2 - bicl3 #0,r2,4(ap) +h=4 ;(AP) h by value (input) +l=8 ;(AP) l by value (input) +d=12 ;(AP) d by value (input) - bicl3 #-65536,(r3),r0 - ashl #16,r0,(r3) - brw noname.7 - nop -noname.18: +;aprim=r5 +;a2=r6 +;a20=r6 +;a21=r7 +;bprim=r8 +;b2=r9 +;qprim=r10 ; initially used as q'' +;rprim=r11 ; initially used as r'' - bisl2 r4,r9 - movl r9,r10 + .psect code,nowrt -noname.3: +.entry bn_div_words,^m + movl l(ap),r2 + movl h(ap),r3 + movl d(ap),r4 + + movl #0,r5 + movl #0,r8 + movl #0,r0 +; movl #0,r1 + + rotl #-1,r2,r6 ; a20 = l >> 1 (almost) + rotl #-1,r3,r7 ; a21 = h >> 1 (almost) + rotl #-1,r4,r9 ; b2 = d >> 1 (almost) + + tstl r6 + bgeq 1$ + xorl2 #^X80000000,r6 ; fixup a20 so highest bit is 0 + incl r5 ; a' = 1 +1$: + tstl r7 + bgeq 2$ + xorl2 #^X80000000,r6 ; fixup a20 so highest bit is 1, + ; since that's what was lowest in a21 + xorl2 #^X80000000,r7 ; fixup a21 so highest bit is 1 +2$: + tstl r9 + beql 666$ ; Uh-oh, the divisor is 0... + bgtr 3$ + xorl2 #^X80000000,r9 ; fixup b2 so highest bit is 0 + incl r8 ; b' = 1 +3$: + tstl r9 + bneq 4$ ; if b2 is 0, we know that b' is 1 + tstl r3 + bneq 666$ ; if higher half isn't 0, we overflow + movl r2,r10 ; otherwise, we have our result + brb 42$ ; This is a success, really. +4$: + ediv r9,r6,r10,r11 + + tstl r8 + bneq 5$ ; If b' != 0, go to the other part +; addl3 r11,r11,r1 +; addl2 r5,r1 + brb 42$ +5$: + ashl #1,r11,r11 + subl2 r10,r11 + addl2 r5,r11 + bgeq 7$ +6$: + decl r10 + addl2 r4,r11 + blss 6$ +7$: +; movl r11,r1 +42$: movl r10,r0 - ret - tstl r0 - +666$: + ret - .psect code,nowrt - -.entry BN_ADD_WORDS,^m - - tstl 16(ap) - bgtr noname.21 - clrl r7 - brw noname.22 -noname.21: - - clrl r4 - - tstl r0 -noname.23: - - movl 8(ap),r6 - addl3 r4,(r6),r2 - - bicl2 #0,r2 - - clrl r0 - cmpl r2,r4 - bgequ vcg.3 - incl r0 -vcg.3: - movl r0,r4 - - movl 12(ap),r5 - addl3 (r5),r2,r1 - bicl2 #0,r1 - - clrl r0 - cmpl r1,r2 - bgequ vcg.4 - incl r0 -vcg.4: - addl2 r0,r4 - - movl 4(ap),r3 - movl r1,(r3) - - decl 16(ap) - bgtr gen.1 - brw noname.25 -gen.1: -noname.24: - - addl3 r4,4(r6),r2 - - bicl2 #0,r2 - - clrl r0 - cmpl r2,r4 - bgequ vcg.5 - incl r0 -vcg.5: - movl r0,r4 - - addl3 4(r5),r2,r1 - bicl2 #0,r1 - - clrl r0 - cmpl r1,r2 - bgequ vcg.6 - incl r0 -vcg.6: - addl2 r0,r4 - - movl r1,4(r3) - - decl 16(ap) - bleq noname.25 -noname.26: - - addl3 r4,8(r6),r2 - - bicl2 #0,r2 - - clrl r0 - cmpl r2,r4 - bgequ vcg.7 - incl r0 -vcg.7: - movl r0,r4 - - addl3 8(r5),r2,r1 - bicl2 #0,r1 - - clrl r0 - cmpl r1,r2 - bgequ vcg.8 - incl r0 -vcg.8: - addl2 r0,r4 - - movl r1,8(r3) - - decl 16(ap) - bleq noname.25 -noname.27: - - addl3 r4,12(r6),r2 - - bicl2 #0,r2 - - clrl r0 - cmpl r2,r4 - bgequ vcg.9 - incl r0 -vcg.9: - movl r0,r4 - - addl3 12(r5),r2,r1 - bicl2 #0,r1 - - clrl r0 - cmpl r1,r2 - bgequ vcg.10 - incl r0 -vcg.10: - addl2 r0,r4 + .title vax_bn_add_words unsigned add of two arrays +; +; Richard Levitte 20-Nov-2000 +; +; ULONG bn_add_words(ULONG r[], ULONG a[], ULONG b[], int n) { +; ULONG c = 0; +; int i; +; for (i = 0; i < n; i++) = a[i] + b[i] + c; +; return(c); +; } - movl r1,12(r3) +r=4 ;(AP) r by reference (output) +a=8 ;(AP) a by reference (input) +b=12 ;(AP) b by reference (input) +n=16 ;(AP) n by value (input) - decl 16(ap) - bleq noname.25 -noname.28: - addl3 #16,r6,8(ap) + .psect code,nowrt - addl3 #16,r5,12(ap) +.entry bn_add_words,^m - addl3 #16,r3,4(ap) - brw noname.23 - tstl r0 -noname.25: + moval @r(ap),r2 + moval @a(ap),r3 + moval @b(ap),r4 + movl n(ap),r5 ; assumed >0 by C code + clrl r0 ; c - movl r4,r7 + tstl r5 ; carry = 0 + bleq 666$ -noname.22: - movl r7,r0 - ret - nop +0$: + movl (r3)+,r6 ; carry untouched + adwc (r4)+,r6 ; carry used and touched + movl r6,(r2)+ ; carry untouched + sobgtr r5,0$ ; carry untouched + adwc #0,r0 +666$: + ret + .title vax_bn_sub_words unsigned add of two arrays +; +; Richard Levitte 20-Nov-2000 +; +; ULONG bn_sub_words(ULONG r[], ULONG a[], ULONG b[], int n) { +; ULONG c = 0; +; int i; +; for (i = 0; i < n; i++) = a[i] - b[i] - c; +; return(c); +; } -;r=4 ;(AP) -;a=8 ;(AP) -;b=12 ;(AP) -;n=16 ;(AP) n by value (input) +r=4 ;(AP) r by reference (output) +a=8 ;(AP) a by reference (input) +b=12 ;(AP) b by reference (input) +n=16 ;(AP) n by value (input) - .psect code,nowrt -.entry BN_SUB_WORDS,^m + .psect code,nowrt - clrl r6 +.entry bn_sub_words,^m - tstl 16(ap) - bgtr noname.31 - clrl r7 - brw noname.32 - tstl r0 -noname.31: + moval @r(ap),r2 + moval @a(ap),r3 + moval @b(ap),r4 + movl n(ap),r5 ; assumed >0 by C code + clrl r0 ; c -noname.33: + tstl r5 ; carry = 0 + bleq 666$ - movl 8(ap),r5 - movl (r5),r1 - movl 12(ap),r4 - movl (r4),r2 - - movl 4(ap),r3 - subl3 r2,r1,r0 - subl2 r6,r0 - bicl3 #0,r0,(r3) - - cmpl r1,r2 - beql noname.34 - clrl r0 - cmpl r1,r2 - bgequ vcg.11 - incl r0 -vcg.11: - movl r0,r6 -noname.34: - - decl 16(ap) - bgtr gen.2 - brw noname.36 -gen.2: -noname.35: - - movl 4(r5),r2 - movl 4(r4),r1 - - subl3 r1,r2,r0 - subl2 r6,r0 - bicl3 #0,r0,4(r3) - - cmpl r2,r1 - beql noname.37 - clrl r0 - cmpl r2,r1 - bgequ vcg.12 - incl r0 -vcg.12: - movl r0,r6 -noname.37: - - decl 16(ap) - bleq noname.36 -noname.38: - - movl 8(r5),r1 - movl 8(r4),r2 - - subl3 r2,r1,r0 - subl2 r6,r0 - bicl3 #0,r0,8(r3) - - cmpl r1,r2 - beql noname.39 - clrl r0 - cmpl r1,r2 - bgequ vcg.13 - incl r0 -vcg.13: - movl r0,r6 -noname.39: - - decl 16(ap) - bleq noname.36 -noname.40: - - movl 12(r5),r1 - movl 12(r4),r2 - - subl3 r2,r1,r0 - subl2 r6,r0 - bicl3 #0,r0,12(r3) - - cmpl r1,r2 - beql noname.41 - clrl r0 - cmpl r1,r2 - bgequ vcg.14 - incl r0 -vcg.14: - movl r0,r6 -noname.41: - - decl 16(ap) - bleq noname.36 -noname.42: - - addl3 #16,r5,8(ap) - - addl3 #16,r4,12(ap) - - addl3 #16,r3,4(ap) - brw noname.33 - tstl r0 -noname.36: - - movl r6,r7 - -noname.32: - movl r7,r0 - ret - nop +0$: + movl (r3)+,r6 ; carry untouched + sbwc (r4)+,r6 ; carry used and touched + movl r6,(r2)+ ; carry untouched + sobgtr r5,0$ ; carry untouched + adwc #0,r0 +666$: + ret ;r=4 ;(AP) @@ -6614,82 +6409,4 @@ noname.610: ret ; For now, the code below doesn't work, so I end this prematurely. -.end - - .title vax_bn_div64 division 64/32=>32 -; -; r.l. 16-jan-1998 -; -; unsigned int bn_div64(unsigned long h, unsigned long l, unsigned long d) -; return /d; -; - - .psect code,nowrt - -h=4 ;(AP) by value (input) -l=8 ;(AP) by value (input) -d=12 ;(AP) by value (input) - -.entry bn_div64,^m - - movl l(ap),r2 ; l - movl h(ap),r3 ; h - movl d(ap),r4 ; d - clrl r5 ; q - clrl r6 ; r - - ; Treat "negative" specially - tstl r3 - blss 30$ - - tstl r4 - beql 90$ - - ediv r4,r2,r5,r6 - bvs 666$ - - movl r5,r0 - ret - -30$: - ; The theory here is to do some harmless shifting and a little - ; bit of rounding (brackets are to designate when decimals are - ; cut off): - ; - ; result = 2 * [ ([/2] + [d/2]) / d ] + [ l / d ] - - movl #0,r7 - movl r3,r8 ; copy h - ashq #-1,r7,r7 ; [/2] => - bicl2 #^X80000000,r8 ; Remove "sign" - - movl r4,r9 ; copy d - ashl #-1,r9,r9 ; [d/2] => r9 - bicl2 #^X80000000,r9 ; Remove "sign" - - addl2 r9,r7 - adwc #0,r8 ; [/2] + [d/2] => - - ediv r4,r7,r5,r6 ; [ ([/2] + [d/2]) / d ] => - bvs 666$ - - movl #0,r6 - ashq #1,r5,r5 ; 2 * [ ([/2] + [d/2]) / d ] => r5 - - movl #0,r3 - ediv r4,r2,r8,r9 ; [ l / d ] => - - addl2 r8,r5 ; - bcs 666$ - - movl r5,r0 - ret - -90$: - movl #-1,r0 - ret - -666$: - - .end