projects
/
openssl.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (from parent 1:
7d1f55e
)
crypto/bn/asm/s390x.S: drop redundant instructions.
author
Andy Polyakov
<appro@openssl.org>
Fri, 10 Sep 2010 14:53:36 +0000
(14:53 +0000)
committer
Andy Polyakov
<appro@openssl.org>
Fri, 10 Sep 2010 14:53:36 +0000
(14:53 +0000)
crypto/bn/asm/s390x.S
patch
|
blob
|
history
diff --git
a/crypto/bn/asm/s390x.S
b/crypto/bn/asm/s390x.S
index 8f45f5d513cec93cb3213f26fba5a34fd4b6db7f..27ef61bd70d8d240b04fe6b0a20a8975b1346bf9 100755
(executable)
--- a/
crypto/bn/asm/s390x.S
+++ b/
crypto/bn/asm/s390x.S
@@
-1,4
+1,4
@@
-.ident "s390x.S, version 1.
0
"
+.ident "s390x.S, version 1.
1
"
// ====================================================================
// Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
// project.
// ====================================================================
// Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
// project.
@@
-24,67
+24,67
@@
bn_mul_add_words:
bler %r14 // if (len<=0) return 0;
stmg %r6,%r10,48(%r15)
bler %r14 // if (len<=0) return 0;
stmg %r6,%r10,48(%r15)
+ lghi %r10,3
lghi %r8,0 // carry = 0
lghi %r8,0 // carry = 0
- srag %r10,%r4,2 // cnt=len/4
- jz .Loop1_madd
+ nr %r10,%r4 // len%4
+ sra %r4,2 // cnt=len/4
+ jz .Loop1_madd // carry is incidentally cleared if branch taken
+ algr zero,zero // clear carry
.Loop4_madd:
lg %r7,0(%r2,%r3) // ap[i]
mlgr %r6,%r5 // *=w
.Loop4_madd:
lg %r7,0(%r2,%r3) // ap[i]
mlgr %r6,%r5 // *=w
- al
gr
%r7,%r8 // +=carry
+ al
cgr
%r7,%r8 // +=carry
alcgr %r6,zero
alg %r7,0(%r2,%r1) // +=rp[i]
alcgr %r6,zero
alg %r7,0(%r2,%r1) // +=rp[i]
- alcgr %r6,zero
stg %r7,0(%r2,%r1) // rp[i]=
lg %r9,8(%r2,%r3)
mlgr %r8,%r5
stg %r7,0(%r2,%r1) // rp[i]=
lg %r9,8(%r2,%r3)
mlgr %r8,%r5
- al
gr
%r9,%r6
+ al
cgr
%r9,%r6
alcgr %r8,zero
alg %r9,8(%r2,%r1)
alcgr %r8,zero
alg %r9,8(%r2,%r1)
- alcgr %r8,zero
stg %r9,8(%r2,%r1)
lg %r7,16(%r2,%r3)
mlgr %r6,%r5
stg %r9,8(%r2,%r1)
lg %r7,16(%r2,%r3)
mlgr %r6,%r5
- al
gr
%r7,%r8
+ al
cgr
%r7,%r8
alcgr %r6,zero
alg %r7,16(%r2,%r1)
alcgr %r6,zero
alg %r7,16(%r2,%r1)
- alcgr %r6,zero
stg %r7,16(%r2,%r1)
lg %r9,24(%r2,%r3)
mlgr %r8,%r5
stg %r7,16(%r2,%r1)
lg %r9,24(%r2,%r3)
mlgr %r8,%r5
- al
gr
%r9,%r6
+ al
cgr
%r9,%r6
alcgr %r8,zero
alg %r9,24(%r2,%r1)
alcgr %r8,zero
alg %r9,24(%r2,%r1)
- alcgr %r8,zero
stg %r9,24(%r2,%r1)
la %r2,32(%r2) // i+=4
stg %r9,24(%r2,%r1)
la %r2,32(%r2) // i+=4
- brct %r
10
,.Loop4_madd
+ brct %r
4
,.Loop4_madd
- lghi %r10,3
- nr %r4,%r10 // cnt=len%4
- jz .Lend_madd
+ la %r10,1(%r10) // see if len%4 is zero ...
+ brct %r10,.Loop1_madd // without touching condition code:-)
+
+.Lend_madd:
+ alcgr %r8,zero // collect carry bit
+ lgr %r2,%r8
+ lmg %r6,%r10,48(%r15)
+ br %r14
.Loop1_madd:
lg %r7,0(%r2,%r3) // ap[i]
mlgr %r6,%r5 // *=w
.Loop1_madd:
lg %r7,0(%r2,%r3) // ap[i]
mlgr %r6,%r5 // *=w
- al
gr
%r7,%r8 // +=carry
+ al
cgr
%r7,%r8 // +=carry
alcgr %r6,zero
alg %r7,0(%r2,%r1) // +=rp[i]
alcgr %r6,zero
alg %r7,0(%r2,%r1) // +=rp[i]
- alcgr %r6,zero
stg %r7,0(%r2,%r1) // rp[i]=
lgr %r8,%r6
la %r2,8(%r2) // i++
stg %r7,0(%r2,%r1) // rp[i]=
lgr %r8,%r6
la %r2,8(%r2) // i++
- brct %r
4
,.Loop1_madd
+ brct %r
10
,.Loop1_madd
-.Lend_madd:
- lgr %r2,%r8
- lmg %r6,%r10,48(%r15)
- br %r14
+ j .Lend_madd
.size bn_mul_add_words,.-bn_mul_add_words
// BN_ULONG bn_mul_words(BN_ULONG *r2,BN_ULONG *r3,int r4,BN_ULONG r5);
.size bn_mul_add_words,.-bn_mul_add_words
// BN_ULONG bn_mul_words(BN_ULONG *r2,BN_ULONG *r3,int r4,BN_ULONG r5);
@@
-99,57
+99,57
@@
bn_mul_words:
bler %r14 // if (len<=0) return 0;
stmg %r6,%r10,48(%r15)
bler %r14 // if (len<=0) return 0;
stmg %r6,%r10,48(%r15)
+ lghi %r10,3
lghi %r8,0 // carry = 0
lghi %r8,0 // carry = 0
- srag %r10,%r4,2 // cnt=len/4
- jz .Loop1_mul
+ nr %r10,%r4 // len%4
+ sra %r4,2 // cnt=len/4
+ jz .Loop1_mul // carry is incidentally cleared if branch taken
+ algr zero,zero // clear carry
.Loop4_mul:
lg %r7,0(%r2,%r3) // ap[i]
mlgr %r6,%r5 // *=w
.Loop4_mul:
lg %r7,0(%r2,%r3) // ap[i]
mlgr %r6,%r5 // *=w
- algr %r7,%r8 // +=carry
- alcgr %r6,zero
+ alcgr %r7,%r8 // +=carry
stg %r7,0(%r2,%r1) // rp[i]=
lg %r9,8(%r2,%r3)
mlgr %r8,%r5
stg %r7,0(%r2,%r1) // rp[i]=
lg %r9,8(%r2,%r3)
mlgr %r8,%r5
- algr %r9,%r6
- alcgr %r8,zero
+ alcgr %r9,%r6
stg %r9,8(%r2,%r1)
lg %r7,16(%r2,%r3)
mlgr %r6,%r5
stg %r9,8(%r2,%r1)
lg %r7,16(%r2,%r3)
mlgr %r6,%r5
- algr %r7,%r8
- alcgr %r6,zero
+ alcgr %r7,%r8
stg %r7,16(%r2,%r1)
lg %r9,24(%r2,%r3)
mlgr %r8,%r5
stg %r7,16(%r2,%r1)
lg %r9,24(%r2,%r3)
mlgr %r8,%r5
- algr %r9,%r6
- alcgr %r8,zero
+ alcgr %r9,%r6
stg %r9,24(%r2,%r1)
la %r2,32(%r2) // i+=4
stg %r9,24(%r2,%r1)
la %r2,32(%r2) // i+=4
- brct %r
10
,.Loop4_mul
+ brct %r
4
,.Loop4_mul
- lghi %r10,3
- nr %r4,%r10 // cnt=len%4
- jz .Lend_mul
+ la %r10,1(%r10) // see if len%4 is zero ...
+ brct %r10,.Loop1_madd // without touching condition code:-)
+
+.Lend_mul:
+ alcgr %r8,zero // collect carry bit
+ lgr %r2,%r8
+ lmg %r6,%r10,48(%r15)
+ br %r14
.Loop1_mul:
lg %r7,0(%r2,%r3) // ap[i]
mlgr %r6,%r5 // *=w
.Loop1_mul:
lg %r7,0(%r2,%r3) // ap[i]
mlgr %r6,%r5 // *=w
- algr %r7,%r8 // +=carry
- alcgr %r6,zero
+ alcgr %r7,%r8 // +=carry
stg %r7,0(%r2,%r1) // rp[i]=
lgr %r8,%r6
la %r2,8(%r2) // i++
stg %r7,0(%r2,%r1) // rp[i]=
lgr %r8,%r6
la %r2,8(%r2) // i++
- brct %r
4
,.Loop1_mul
+ brct %r
10
,.Loop1_mul
-.Lend_mul:
- lgr %r2,%r8
- lmg %r6,%r10,48(%r15)
- br %r14
+ j .Lend_mul
.size bn_mul_words,.-bn_mul_words
// void bn_sqr_words(BN_ULONG *r2,BN_ULONG *r2,int r4)
.size bn_mul_words,.-bn_mul_words
// void bn_sqr_words(BN_ULONG *r2,BN_ULONG *r2,int r4)