my $j=$i+1;
$code.=<<___ if ($i<79);
add $f,$K,$e
my $j=$i+1;
$code.=<<___ if ($i<79);
add $f,$K,$e
rotlwi $e,$a,5
xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
add $f,$f,@X[$i%16]
rotlwi $e,$a,5
xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
add $f,$f,@X[$i%16]
xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
rotlwi @X[$j%16],@X[$j%16],1
___
$code.=<<___ if ($i==79);
add $f,$K,$e
rotlwi @X[$j%16],@X[$j%16],1
___
$code.=<<___ if ($i==79);
add $f,$K,$e
rotlwi $e,$a,5
lwz r16,0($ctx)
add $f,$f,@X[$i%16]
rotlwi $e,$a,5
lwz r16,0($ctx)
add $f,$f,@X[$i%16]
rotlwi $b,$b,30
lwz r18,8($ctx)
rotlwi $b,$b,30
lwz r18,8($ctx)
# always virtualized setup with possibly throttled processor.
# Relative comparison is therefore more informative. This module is
# ~60% faster than integer-only sha512-ppc.pl. To anchor to something
# always virtualized setup with possibly throttled processor.
# Relative comparison is therefore more informative. This module is
# ~60% faster than integer-only sha512-ppc.pl. To anchor to something
-# else, SHA256 is 16% slower than sha1-ppc.pl and 2.5x slower than
-# hardware-assisted aes-128-cbc encrypt. SHA512 is 33% faster than
+# else, SHA256 is 24% slower than sha1-ppc.pl and 2.5x slower than
+# hardware-assisted aes-128-cbc encrypt. SHA512 is 20% faster than
# sha1-ppc.pl and 1.6x slower than aes-128-cbc. Another interesting
# result is degree of computational resources' utilization. POWER8 is
# "massively multi-threaded chip" and difference between single- and
# sha1-ppc.pl and 1.6x slower than aes-128-cbc. Another interesting
# result is degree of computational resources' utilization. POWER8 is
# "massively multi-threaded chip" and difference between single- and