Import of old SSLeay release: SSLeay 0.9.1b (unreleased)
[openssl.git] / crypto / bn / asm / alpha.s.works
diff --git a/crypto/bn/asm/alpha.s.works b/crypto/bn/asm/alpha.s.works
new file mode 100644 (file)
index 0000000..ee6c587
--- /dev/null
@@ -0,0 +1,533 @@
+
+ # DEC Alpha assember
+ # The bn_div64 is actually gcc output but the other parts are hand done.
+ # Thanks to tzeruch@ceddec.com for sending me the gcc output for
+ # bn_div64.
+ # I've gone back and re-done most of routines.
+ # The key thing to remeber for the 164 CPU is that while a
+ # multiply operation takes 8 cycles, another one can only be issued
+ # after 4 cycles have elapsed.  I've done modification to help
+ # improve this.  Also, normally, a ld instruction will not be available
+ # for about 3 cycles.
+       .file   1 "bn_asm.c"
+       .set noat
+gcc2_compiled.:
+__gnu_compiled_c:
+       .text
+       .align 3
+       .globl bn_mul_add_words
+       .ent bn_mul_add_words
+bn_mul_add_words:
+bn_mul_add_words..ng:
+       .frame $30,0,$26,0
+       .prologue 0
+       .align 5
+       subq    $18,4,$18
+       bis     $31,$31,$0
+       blt     $18,$43         # if we are -1, -2, -3 or -4 goto tail code
+       ldq     $20,0($17)      # 1 1
+       ldq     $1,0($16)       # 1 1
+       .align 3
+$42:
+       mulq    $20,$19,$5      # 1 2 1 ######
+       ldq     $21,8($17)      # 2 1
+       ldq     $2,8($16)       # 2 1
+       umulh   $20,$19,$20     # 1 2   ######
+       ldq     $27,16($17)     # 3 1
+       ldq     $3,16($16)      # 3 1
+       mulq    $21,$19,$6      # 2 2 1 ######
+        ldq    $28,24($17)     # 4 1
+       addq    $1,$5,$1        # 1 2 2
+        ldq    $4,24($16)      # 4 1
+       umulh   $21,$19,$21     # 2 2   ######
+        cmpult $1,$5,$22       # 1 2 3 1
+       addq    $20,$22,$20     # 1 3 1
+        addq   $1,$0,$1        # 1 2 3 1
+       mulq    $27,$19,$7      # 3 2 1 ######
+        cmpult $1,$0,$0        # 1 2 3 2
+       addq    $2,$6,$2        # 2 2 2
+        addq   $20,$0,$0       # 1 3 2 
+       cmpult  $2,$6,$23       # 2 2 3 1
+        addq   $21,$23,$21     # 2 3 1
+       umulh   $27,$19,$27     # 3 2   ######
+        addq   $2,$0,$2        # 2 2 3 1
+       cmpult  $2,$0,$0        # 2 2 3 2
+        subq   $18,4,$18
+       mulq    $28,$19,$8      # 4 2 1 ######
+        addq   $21,$0,$0       # 2 3 2 
+       addq    $3,$7,$3        # 3 2 2
+        addq   $16,32,$16
+       cmpult  $3,$7,$24       # 3 2 3 1
+        stq    $1,-32($16)     # 1 2 4
+       umulh   $28,$19,$28     # 4 2   ######
+        addq   $27,$24,$27     # 3 3 1
+       addq    $3,$0,$3        # 3 2 3 1
+        stq    $2,-24($16)     # 2 2 4
+       cmpult  $3,$0,$0        # 3 2 3 2
+        stq    $3,-16($16)     # 3 2 4
+       addq    $4,$8,$4        # 4 2 2
+        addq   $27,$0,$0       # 3 3 2 
+       cmpult  $4,$8,$25       # 4 2 3 1
+        addq   $17,32,$17
+       addq    $28,$25,$28     # 4 3 1
+        addq   $4,$0,$4        # 4 2 3 1
+       cmpult  $4,$0,$0        # 4 2 3 2
+        stq    $4,-8($16)      # 4 2 4
+       addq    $28,$0,$0       # 4 3 2 
+        blt    $18,$43
+
+       ldq     $20,0($17)      # 1 1
+       ldq     $1,0($16)       # 1 1
+
+       br      $42
+
+       .align 4
+$45:
+       ldq     $20,0($17)      # 4 1
+       ldq     $1,0($16)       # 4 1
+       mulq    $20,$19,$5      # 4 2 1
+       subq    $18,1,$18
+       addq    $16,8,$16
+       addq    $17,8,$17
+       umulh   $20,$19,$20     # 4 2
+       addq    $1,$5,$1        # 4 2 2
+       cmpult  $1,$5,$22       # 4 2 3 1
+       addq    $20,$22,$20     # 4 3 1
+       addq    $1,$0,$1        # 4 2 3 1
+       cmpult  $1,$0,$0        # 4 2 3 2
+       addq    $20,$0,$0       # 4 3 2 
+       stq     $1,-8($16)      # 4 2 4
+       bgt     $18,$45
+       ret     $31,($26),1     # else exit
+
+       .align 4
+$43:
+       addq    $18,4,$18
+       bgt     $18,$45         # goto tail code
+       ret     $31,($26),1     # else exit
+
+       .end bn_mul_add_words
+       .align 3
+       .globl bn_mul_words
+       .ent bn_mul_words
+bn_mul_words:
+bn_mul_words..ng:
+       .frame $30,0,$26,0
+       .prologue 0
+       .align 5
+       subq    $18,4,$18
+       bis     $31,$31,$0
+       blt     $18,$143        # if we are -1, -2, -3 or -4 goto tail code
+       ldq     $20,0($17)      # 1 1
+       .align 3
+$142:
+
+       mulq    $20,$19,$5      # 1 2 1 #####
+        ldq    $21,8($17)      # 2 1
+        ldq    $27,16($17)     # 3 1
+       umulh   $20,$19,$20     # 1 2   #####
+        ldq    $28,24($17)     # 4 1
+       mulq    $21,$19,$6      # 2 2 1 #####
+        addq   $5,$0,$5        # 1 2 3 1
+       subq    $18,4,$18
+        cmpult $5,$0,$0        # 1 2 3 2
+       umulh   $21,$19,$21     # 2 2   #####
+        addq   $20,$0,$0       # 1 3 2 
+       addq    $17,32,$17
+        addq   $6,$0,$6        # 2 2 3 1
+       mulq    $27,$19,$7      # 3 2 1 #####
+        cmpult $6,$0,$0        # 2 2 3 2
+       addq    $21,$0,$0       # 2 3 2 
+        addq   $16,32,$16
+       umulh   $27,$19,$27     # 3 2   #####
+        stq    $5,-32($16)     # 1 2 4
+       mulq    $28,$19,$8      # 4 2 1 #####
+        addq   $7,$0,$7        # 3 2 3 1
+       stq     $6,-24($16)     # 2 2 4
+        cmpult $7,$0,$0        # 3 2 3 2
+       umulh   $28,$19,$28     # 4 2   #####
+        addq   $27,$0,$0       # 3 3 2 
+       stq     $7,-16($16)     # 3 2 4
+        addq   $8,$0,$8        # 4 2 3 1
+       cmpult  $8,$0,$0        # 4 2 3 2
+
+       addq    $28,$0,$0       # 4 3 2 
+
+       stq     $8,-8($16)      # 4 2 4
+
+       blt     $18,$143
+
+       ldq     $20,0($17)      # 1 1
+
+       br      $142
+
+       .align 4
+$145:
+       ldq     $20,0($17)      # 4 1
+       mulq    $20,$19,$5      # 4 2 1
+       subq    $18,1,$18
+       umulh   $20,$19,$20     # 4 2
+       addq    $5,$0,$5        # 4 2 3 1
+        addq   $16,8,$16
+       cmpult  $5,$0,$0        # 4 2 3 2
+        addq   $17,8,$17
+       addq    $20,$0,$0       # 4 3 2 
+       stq     $5,-8($16)      # 4 2 4
+
+       bgt     $18,$145
+       ret     $31,($26),1     # else exit
+
+       .align 4
+$143:
+       addq    $18,4,$18
+       bgt     $18,$145        # goto tail code
+       ret     $31,($26),1     # else exit
+
+       .end bn_mul_words
+       .align 3
+       .globl bn_sqr_words
+       .ent bn_sqr_words
+bn_sqr_words:
+bn_sqr_words..ng:
+       .frame $30,0,$26,0
+       .prologue 0
+
+       subq    $18,4,$18
+       blt     $18,$543        # if we are -1, -2, -3 or -4 goto tail code
+       ldq     $20,0($17)      # 1 1
+       .align 3
+$542:
+       mulq    $20,$20,$5              ######
+        ldq    $21,8($17)      # 1 1
+       subq    $18,4
+       umulh   $20,$20,$1              ######
+       ldq     $27,16($17)     # 1 1
+       mulq    $21,$21,$6              ######
+       ldq     $28,24($17)     # 1 1
+       stq     $5,0($16)       # r[0]
+       umulh   $21,$21,$2              ######
+       stq     $1,8($16)       # r[1]
+       mulq    $27,$27,$7              ######
+       stq     $6,16($16)      # r[0]
+       umulh   $27,$27,$3              ######
+       stq     $2,24($16)      # r[1]
+       mulq    $28,$28,$8              ######
+       stq     $7,32($16)      # r[0]
+       umulh   $28,$28,$4              ######
+       stq     $3,40($16)      # r[1]
+
+       addq    $16,64,$16
+       addq    $17,32,$17
+       stq     $8,-16($16)     # r[0]
+       stq     $4,-8($16)      # r[1]
+
+       blt     $18,$543
+       ldq     $20,0($17)      # 1 1
+       br      $542
+
+$442:
+       ldq     $20,0($17)   # a[0]
+       mulq    $20,$20,$5  # a[0]*w low part       r2
+       addq    $16,16,$16
+       addq    $17,8,$17
+       subq    $18,1,$18
+        umulh  $20,$20,$1  # a[0]*w high part       r3
+       stq     $5,-16($16)   # r[0]
+        stq    $1,-8($16)   # r[1]
+
+       bgt     $18,$442
+       ret     $31,($26),1     # else exit
+
+       .align 4
+$543:
+       addq    $18,4,$18
+       bgt     $18,$442        # goto tail code
+       ret     $31,($26),1     # else exit
+       .end bn_sqr_words
+
+       .align 3
+       .globl bn_add_words
+       .ent bn_add_words
+bn_add_words:
+bn_add_words..ng:
+       .frame $30,0,$26,0
+       .prologue 0
+
+       subq    $19,4,$19
+       bis     $31,$31,$0      # carry = 0
+       blt     $19,$900
+       ldq     $5,0($17)       # a[0]
+       ldq     $1,0($18)       # b[1]
+       .align 3
+$901:
+       addq    $1,$5,$1        # r=a+b;
+        ldq    $6,8($17)       # a[1]
+       cmpult  $1,$5,$22       # did we overflow?
+        ldq    $2,8($18)       # b[1]
+       addq    $1,$0,$1        # c+= overflow
+        ldq    $7,16($17)      # a[2]
+       cmpult  $1,$0,$0        # overflow?
+        ldq    $3,16($18)      # b[2]
+       addq    $0,$22,$0
+        ldq    $8,24($17)      # a[3]
+       addq    $2,$6,$2        # r=a+b;
+        ldq    $4,24($18)      # b[3]
+       cmpult  $2,$6,$23       # did we overflow?
+        addq   $3,$7,$3        # r=a+b;
+       addq    $2,$0,$2        # c+= overflow
+        cmpult $3,$7,$24       # did we overflow?
+       cmpult  $2,$0,$0        # overflow?
+        addq   $4,$8,$4        # r=a+b;
+       addq    $0,$23,$0
+        cmpult $4,$8,$25       # did we overflow?
+       addq    $3,$0,$3        # c+= overflow
+        stq    $1,0($16)       # r[0]=c
+       cmpult  $3,$0,$0        # overflow?
+        stq    $2,8($16)       # r[1]=c
+       addq    $0,$24,$0
+        stq    $3,16($16)      # r[2]=c
+       addq    $4,$0,$4        # c+= overflow
+        subq   $19,4,$19       # loop--
+       cmpult  $4,$0,$0        # overflow?
+        addq   $17,32,$17      # a++
+       addq    $0,$25,$0
+        stq    $4,24($16)      # r[3]=c
+       addq    $18,32,$18      # b++
+        addq   $16,32,$16      # r++
+
+       blt     $19,$900
+        ldq    $5,0($17)       # a[0]
+       ldq     $1,0($18)       # b[1]
+        br     $901
+       .align 4
+$945:
+       ldq     $5,0($17)       # a[0]
+        ldq    $1,0($18)       # b[1]
+       addq    $1,$5,$1        # r=a+b;
+        subq   $19,1,$19       # loop--
+       addq    $1,$0,$1        # c+= overflow
+        addq   $17,8,$17       # a++
+       cmpult  $1,$5,$22       # did we overflow?
+        cmpult $1,$0,$0        # overflow?
+       addq    $18,8,$18       # b++
+        stq    $1,0($16)       # r[0]=c
+       addq    $0,$22,$0
+        addq   $16,8,$16       # r++
+
+       bgt     $19,$945
+       ret     $31,($26),1     # else exit
+
+$900:
+       addq    $19,4,$19
+       bgt     $19,$945        # goto tail code
+       ret     $31,($26),1     # else exit
+       .end bn_add_words
+
+ #
+ # What follows was taken directly from the C compiler with a few
+ # hacks to redo the lables.
+ #
+.text
+       .align 3
+       .globl bn_div64
+       .ent bn_div64
+bn_div64:
+       ldgp $29,0($27)
+bn_div64..ng:
+       lda $30,-48($30)
+       .frame $30,48,$26,0
+       stq $26,0($30)
+       stq $9,8($30)
+       stq $10,16($30)
+       stq $11,24($30)
+       stq $12,32($30)
+       stq $13,40($30)
+       .mask 0x4003e00,-48
+       .prologue 1
+       bis $16,$16,$9
+       bis $17,$17,$10
+       bis $18,$18,$11
+       bis $31,$31,$13
+       bis $31,2,$12
+       bne $11,$119
+       lda $0,-1
+       br $31,$136
+       .align 4
+$119:
+       bis $11,$11,$16
+       jsr $26,BN_num_bits_word
+       ldgp $29,0($26)
+       subq $0,64,$1
+       beq $1,$120
+       bis $31,1,$1
+       sll $1,$0,$1
+       cmpule $9,$1,$1
+       bne $1,$120
+ #     lda $16,_IO_stderr_
+ #     lda $17,$C32
+ #     bis $0,$0,$18
+ #     jsr $26,fprintf
+ #     ldgp $29,0($26)
+       jsr $26,abort
+       ldgp $29,0($26)
+       .align 4
+$120:
+       bis $31,64,$3
+       cmpult $9,$11,$2
+       subq $3,$0,$1
+       addl $1,$31,$0
+       subq $9,$11,$1
+       cmoveq $2,$1,$9
+       beq $0,$122
+       zapnot $0,15,$2
+       subq $3,$0,$1
+       sll $11,$2,$11
+       sll $9,$2,$3
+       srl $10,$1,$1
+       sll $10,$2,$10
+       bis $3,$1,$9
+$122:
+       srl $11,32,$5
+       zapnot $11,15,$6
+       lda $7,-1
+       .align 5
+$123:
+       srl $9,32,$1
+       subq $1,$5,$1
+       bne $1,$126
+       zapnot $7,15,$27
+       br $31,$127
+       .align 4
+$126:
+       bis $9,$9,$24
+       bis $5,$5,$25
+       divqu $24,$25,$27
+$127:
+       srl $10,32,$4
+       .align 5
+$128:
+       mulq $27,$5,$1
+       subq $9,$1,$3
+       zapnot $3,240,$1
+       bne $1,$129
+       mulq $6,$27,$2
+       sll $3,32,$1
+       addq $1,$4,$1
+       cmpule $2,$1,$2
+       bne $2,$129
+       subq $27,1,$27
+       br $31,$128
+       .align 4
+$129:
+       mulq $27,$6,$1
+       mulq $27,$5,$4
+       srl $1,32,$3
+       sll $1,32,$1
+       addq $4,$3,$4
+       cmpult $10,$1,$2
+       subq $10,$1,$10
+       addq $2,$4,$2
+       cmpult $9,$2,$1
+       bis $2,$2,$4
+       beq $1,$134
+       addq $9,$11,$9
+       subq $27,1,$27
+$134:
+       subl $12,1,$12
+       subq $9,$4,$9
+       beq $12,$124
+       sll $27,32,$13
+       sll $9,32,$2
+       srl $10,32,$1
+       sll $10,32,$10
+       bis $2,$1,$9
+       br $31,$123
+       .align 4
+$124:
+       bis $13,$27,$0
+$136:
+       ldq $26,0($30)
+       ldq $9,8($30)
+       ldq $10,16($30)
+       ldq $11,24($30)
+       ldq $12,32($30)
+       ldq $13,40($30)
+       addq $30,48,$30
+       ret $31,($26),1
+       .end bn_div64
+
+       .set noat
+       .text
+       .align 3
+       .globl bn_sub_words
+       .ent bn_sub_words
+bn_sub_words:
+bn_sub_words..ng:
+       .frame $30,0,$26,0
+       .prologue 0
+
+       subq    $19,    4,      $19
+       bis     $31,    $31,    $0
+       blt     $19,    $100
+       ldq     $1,     0($17)
+       ldq     $2,     0($18)
+$101:
+       ldq     $3,     8($17)
+       cmpult  $1,     $2,     $4
+       ldq     $5,     8($18)
+       subq    $1,     $2,     $1
+       ldq     $6,     16($17)
+       cmpult  $1,     $0,     $2
+       ldq     $7,     16($18)
+       subq    $1,     $0,     $23
+       ldq     $8,     24($17)
+       addq    $2,     $4,     $0
+       cmpult  $3,     $5,     $24
+       subq    $3,     $5,     $3
+       ldq     $22,    24($18)
+       cmpult  $3,     $0,     $5
+       subq    $3,     $0,     $25
+       addq    $5,     $24,    $0
+       cmpult  $6,     $7,     $27
+       subq    $6,     $7,     $6
+       stq     $23,    0($16)
+       cmpult  $6,     $0,     $7
+       subq    $6,     $0,     $28
+       addq    $7,     $27,    $0
+       cmpult  $8,     $22,    $21
+       subq    $8,     $22,    $8
+       stq     $25,    8($16)
+       cmpult  $8,     $0,     $22
+       subq    $8,     $0,     $20
+       addq    $22,    $21,    $0
+       stq     $28,    16($16)
+       subq    $19,    4,      $19
+       stq     $20,    24($16)
+       addq    $17,    32,     $17
+       addq    $18,    32,     $18
+       addq    $16,    32,     $16
+       blt     $19,    $100
+       ldq     $1,     0($17)
+       ldq     $2,     0($18)
+       br      $101
+$102:
+       ldq     $1,     0($17)
+       ldq     $2,     0($18)
+       cmpult  $1,     $2,     $27
+       subq    $1,     $2,     $1
+       cmpult  $1,     $0,     $2
+       subq    $1,     $0,     $1
+       stq     $1,     0($16)
+       addq    $2,     $27,    $0
+       addq    $17,    8,      $17
+       addq    $18,    8,      $18
+       addq    $16,    8,      $16
+       subq    $19,    1,      $19
+       bgt     $19,    $102
+       ret     $31,($26),1
+$100:
+       addq    $19,    4,      $19
+       bgt     $19,    $102
+$103:
+       ret     $31,($26),1
+       .end bn_sub_words