# SHA512 performance is >2.9x better than gcc 3.2 generated code on
# PA-7100LC, PA-RISC 1.1 processor. Then implementation detects if the
# code is executed on PA-RISC 2.0 processor and switches to 64-bit
-# code path delivering adequate peformance even in "blended" 32-bit
+# code path delivering adequate performance even in "blended" 32-bit
# build. Though 64-bit code is not any faster than code generated by
# vendor compiler on PA-8600...
#
shd $elo,$ehi,$Sigma1[0],$t1
addc $Xhi,$hhi,$hhi ; h += X[i]
shd $ehi,$elo,$Sigma1[1],$t2
- ldw,ma 8($Tbl),$Xhi
+ ldwm 8($Tbl),$Xhi
shd $elo,$ehi,$Sigma1[1],$t3
ldw -4($Tbl),$Xlo ; load K[i]
xor $t2,$t0,$t0
my ($mod,$args) = @_;
my $orig = "ldd$mod\t$args";
- if ($args =~ /([0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 3 suffices
- { my $opcode=(0x14<<26)|($2<<21)|($3<<16)|($1<<1);
- $opcode|=0x8 if ($mod eq ",ma");
+ if ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 3 suffices
+ { my $opcode=(0x14<<26)|($2<<21)|($3<<16)|(($1&0x1FF8)<<1)|(($1>>13)&1);
+ $opcode|=(1<<3) if ($mod =~ /^,m/);
+ $opcode|=(1<<2) if ($mod =~ /^,mb/);
sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
}
else { "\t".$orig; }
my ($mod,$args) = @_;
my $orig = "std$mod\t$args";
- if ($args =~ /%r([0-9]+),([0-9]+)\(%r([0-9]+)\)/) # format 3 suffices
- { sprintf "\t.WORD\t0x%08x\t; %s",
- (0x1c<<26)|($3<<21)|($1<<16)|($2<<1),$orig;
+ if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices
+ { my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1);
+ sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
}
else { "\t".$orig; }
};
s/cmpb,\*/comb,/ if ($SIZE_T==4);
+ s/\bbv\b/bve/ if ($SIZE_T==8);
+
print $_,"\n";
}