# forms are granted according to the OpenSSL license.
# ====================================================================
#
# forms are granted according to the OpenSSL license.
# ====================================================================
#
#
# aes-*-cbc benchmarks are improved by >70% [compared to gcc 3.3.2 on
# Opteron 240 CPU] plus all the bells-n-whistles from 32-bit version
#
# aes-*-cbc benchmarks are improved by >70% [compared to gcc 3.3.2 on
# Opteron 240 CPU] plus all the bells-n-whistles from 32-bit version
- movzb `&lo($qs0)`,$acc0
- movzb `&lo($qs1)`,$acc1
- movzb `&lo($qs2)`,$acc2
+ movzb `&lo("$s0")`,$acc0
+ movzb `&lo("$s1")`,$acc1
+ movzb `&lo("$s2")`,$acc2
- movzb `&hi($qs1)`,$acc0
- movzb `&hi($qs2)`,$acc1
- movzb `&lo($qs3)`,$acc2
+ movzb `&hi("$s1")`,$acc0
+ movzb `&hi("$s2")`,$acc1
+ movzb `&lo("$s3")`,$acc2
- movzb `&lo($qs2)`,$acc0
- movzb `&lo($qs3)`,$acc1
- movzb `&lo($qs0)`,$acc2
+ movzb `&lo("$s2")`,$acc0
+ movzb `&lo("$s3")`,$acc1
+ movzb `&lo("$s0")`,$acc2
- movzb `&hi($qs3)`,$acc0
- movzb `&hi($qs0)`,$acc1
- movzb `&lo($qs1)`,$acc2
+ movzb `&hi("$s3")`,$acc0
+ movzb `&hi("$s0")`,$acc1
+ movzb `&lo("$s1")`,$acc2
- movzb `&hi($qs1)`,$acc1
- movzb `&hi($qs2)`,$acc2
+ movzb `&hi("$s1")`,$acc1
+ movzb `&hi("$s2")`,$acc2
- movzb `&lo($qs0)`,$acc0
- movzb `&lo($qs1)`,$acc1
- movzb `&lo($qs2)`,$acc2
+ movzb `&lo("$s0")`,$acc0
+ movzb `&lo("$s1")`,$acc1
+ movzb `&lo("$s2")`,$acc2
- movzb `&lo($qs3)`,$acc0
- movzb `&hi($qs1)`,$acc1
- movzb `&hi($qs2)`,$acc2
+ movzb `&lo("$s3")`,$acc0
+ movzb `&hi("$s1")`,$acc1
+ movzb `&hi("$s2")`,$acc2
mov 2($sbox,$acc0,8),$t3
mov 0($sbox,$acc1,8),$acc1 #$t0
mov 0($sbox,$acc2,8),$acc2 #$t1
mov 2($sbox,$acc0,8),$t3
mov 0($sbox,$acc1,8),$acc1 #$t0
mov 0($sbox,$acc2,8),$acc2 #$t1
- movzb `&hi($qs3)`,$acc0
- movzb `&hi($qs0)`,$acc1
+ movzb `&hi("$s3")`,$acc0
+ movzb `&hi("$s0")`,$acc1
shr \$16,$s3
mov 0($sbox,$acc0,8),$acc0 #$t2
mov 0($sbox,$acc1,8),$acc1 #$t3
shr \$16,$s3
mov 0($sbox,$acc0,8),$acc0 #$t2
mov 0($sbox,$acc1,8),$acc1 #$t3
- movzb `&lo($qs2)`,$acc0
- movzb `&lo($qs3)`,$acc1
- movzb `&lo($qs0)`,$acc2
+ movzb `&lo("$s2")`,$acc0
+ movzb `&lo("$s3")`,$acc1
+ movzb `&lo("$s0")`,$acc2
mov 0($sbox,$acc0,8),$acc0 #$t0
mov 0($sbox,$acc1,8),$acc1 #$t1
mov 0($sbox,$acc2,8),$acc2 #$t2
mov 0($sbox,$acc0,8),$acc0 #$t0
mov 0($sbox,$acc1,8),$acc1 #$t1
mov 0($sbox,$acc2,8),$acc2 #$t2
- movzb `&lo($qs1)`,$acc0
- movzb `&hi($qs3)`,$acc1
- movzb `&hi($qs0)`,$acc2
+ movzb `&lo("$s1")`,$acc0
+ movzb `&hi("$s3")`,$acc1
+ movzb `&hi("$s0")`,$acc2
mov 0($sbox,$acc0,8),$acc0 #$t3
mov 2($sbox,$acc1,8),$acc1 #$t0
mov 2($sbox,$acc2,8),$acc2 #$t1
mov 0($sbox,$acc0,8),$acc0 #$t3
mov 2($sbox,$acc1,8),$acc1 #$t0
mov 2($sbox,$acc2,8),$acc2 #$t1
- movzb `&hi($qs1)`,$acc0
- movzb `&hi($qs2)`,$acc1
+ movzb `&hi("$s1")`,$acc0
+ movzb `&hi("$s2")`,$acc1
mov 16+12($key),$s3
mov 2($sbox,$acc0,8),$acc0 #$t2
mov 2($sbox,$acc1,8),$acc1 #$t3
mov 16+12($key),$s3
mov 2($sbox,$acc0,8),$acc0 #$t2
mov 2($sbox,$acc1,8),$acc1 #$t3
- movzb `&lo($qs0)`,$acc0
- movzb `&lo($qs1)`,$acc1
- movzb `&lo($qs2)`,$acc2
+ movzb `&lo("$s0")`,$acc0
+ movzb `&lo("$s1")`,$acc1
+ movzb `&lo("$s2")`,$acc2
- movzb `&hi($qs3)`,$acc0
- movzb `&hi($qs0)`,$acc1
- movzb `&lo($qs3)`,$acc2
+ movzb `&hi("$s3")`,$acc0
+ movzb `&hi("$s0")`,$acc1
+ movzb `&lo("$s3")`,$acc2
- movzb `&lo($qs2)`,$acc0
- movzb `&lo($qs3)`,$acc1
- movzb `&lo($qs0)`,$acc2
+ movzb `&lo("$s2")`,$acc0
+ movzb `&lo("$s3")`,$acc1
+ movzb `&lo("$s0")`,$acc2
- movzb `&hi($qs1)`,$acc0
- movzb `&hi($qs2)`,$acc1
- movzb `&lo($qs1)`,$acc2
+ movzb `&hi("$s1")`,$acc0
+ movzb `&hi("$s2")`,$acc1
+ movzb `&lo("$s1")`,$acc2
- movzb `&lo($qs0)`,$acc0
- movzb `&lo($qs1)`,$acc1
- movzb `&lo($qs2)`,$acc2
+ movzb `&lo("$s0")`,$acc0
+ movzb `&lo("$s1")`,$acc1
+ movzb `&lo("$s2")`,$acc2
mov 2048($sbox,$acc0,4),$t0
mov 2048($sbox,$acc1,4),$t1
mov 2048($sbox,$acc2,4),$t2
mov 2048($sbox,$acc0,4),$t0
mov 2048($sbox,$acc1,4),$t1
mov 2048($sbox,$acc2,4),$t2
- movzb `&lo($qs3)`,$acc0
- movzb `&hi($qs3)`,$acc1
- movzb `&hi($qs0)`,$acc2
+ movzb `&lo("$s3")`,$acc0
+ movzb `&hi("$s3")`,$acc1
+ movzb `&hi("$s0")`,$acc2
mov 2048($sbox,$acc0,4),$t3
mov 2048($sbox,$acc1,4),$acc1 #$t0
mov 2048($sbox,$acc2,4),$acc2 #$t1
mov 2048($sbox,$acc0,4),$t3
mov 2048($sbox,$acc1,4),$acc1 #$t0
mov 2048($sbox,$acc2,4),$acc2 #$t1
- movzb `&hi($qs1)`,$acc0
- movzb `&hi($qs2)`,$acc1
+ movzb `&hi("$s1")`,$acc0
+ movzb `&hi("$s2")`,$acc1
shr \$16,$s0
mov 2048($sbox,$acc0,4),$acc0 #$t2
mov 2048($sbox,$acc1,4),$acc1 #$t3
shr \$16,$s0
mov 2048($sbox,$acc0,4),$acc0 #$t2
mov 2048($sbox,$acc1,4),$acc1 #$t3
- movzb `&lo($qs2)`,$acc0
- movzb `&lo($qs3)`,$acc1
- movzb `&lo($qs0)`,$acc2
+ movzb `&lo("$s2")`,$acc0
+ movzb `&lo("$s3")`,$acc1
+ movzb `&lo("$s0")`,$acc2
mov 2048($sbox,$acc0,4),$acc0 #$t0
mov 2048($sbox,$acc1,4),$acc1 #$t1
mov 2048($sbox,$acc2,4),$acc2 #$t2
mov 2048($sbox,$acc0,4),$acc0 #$t0
mov 2048($sbox,$acc1,4),$acc1 #$t1
mov 2048($sbox,$acc2,4),$acc2 #$t2
- movzb `&lo($qs1)`,$acc0
- movzb `&hi($qs1)`,$acc1
- movzb `&hi($qs2)`,$acc2
+ movzb `&lo("$s1")`,$acc0
+ movzb `&hi("$s1")`,$acc1
+ movzb `&hi("$s2")`,$acc2
mov 2048($sbox,$acc0,4),$acc0 #$t3
mov 2048($sbox,$acc1,4),$acc1 #$t0
mov 2048($sbox,$acc2,4),$acc2 #$t1
mov 2048($sbox,$acc0,4),$acc0 #$t3
mov 2048($sbox,$acc1,4),$acc1 #$t0
mov 2048($sbox,$acc2,4),$acc2 #$t1
- movzb `&hi($qs3)`,$acc0
- movzb `&hi($qs0)`,$acc1
+ movzb `&hi("$s3")`,$acc0
+ movzb `&hi("$s0")`,$acc1
mov 16+12($key),$s3
mov 2048($sbox,$acc0,4),$acc0 #$t2
mov 2048($sbox,$acc1,4),$acc1 #$t3
mov 16+12($key),$s3
mov 2048($sbox,$acc0,4),$acc0 #$t2
mov 2048($sbox,$acc1,4),$acc1 #$t3