projects
/
openssl.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Combat [bogus] relocations in some assember modules.
[openssl.git]
/
crypto
/
aes
/
asm
/
aes-586.pl
diff --git
a/crypto/aes/asm/aes-586.pl
b/crypto/aes/asm/aes-586.pl
index 7b75685025410170315bf13fadcbf4444f6d08eb..5eaced5082f92524346f975b3a942901a51f897f 100755
(executable)
--- a/
crypto/aes/asm/aes-586.pl
+++ b/
crypto/aes/asm/aes-586.pl
@@
-2,8
+2,9
@@
#
# ====================================================================
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
#
# ====================================================================
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
-# project. Rights for redistribution and usage in source and binary
-# forms are granted according to the OpenSSL license.
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
# ====================================================================
#
# Version 4.3.
# ====================================================================
#
# Version 4.3.
@@
-105,6
+106,7
@@
# P4 56[60] 84[100] 23
# AMD K8 48[44] 70[79] 18
# PIII 41[50] 61[91] 24
# P4 56[60] 84[100] 23
# AMD K8 48[44] 70[79] 18
# PIII 41[50] 61[91] 24
+# Core 2 32[38] 45[70] 18.5
# Pentium 120 160 77
#
# Version 4.1 switches to compact S-box even in key schedule setup.
# Pentium 120 160 77
#
# Version 4.1 switches to compact S-box even in key schedule setup.
@@
-117,8
+119,9
@@
#
# Version 4.3 implements switch between compact and non-compact block
# functions in AES_cbc_encrypt depending on how much data was asked
#
# Version 4.3 implements switch between compact and non-compact block
# functions in AES_cbc_encrypt depending on how much data was asked
-# to
process
in one stroke.
+# to
be processed
in one stroke.
#
#
+######################################################################
# Timing attacks are classified in two classes: synchronous when
# attacker consciously initiates cryptographic operation and collects
# timing data of various character afterwards, and asynchronous when
# Timing attacks are classified in two classes: synchronous when
# attacker consciously initiates cryptographic operation and collects
# timing data of various character afterwards, and asynchronous when
@@
-141,7
+144,7
@@
# timing. But note that *if* plain-text was concealed in such way that
# input to block function is distributed *uniformly*, then attack
# wouldn't apply. Now note that some encryption modes, most notably
# timing. But note that *if* plain-text was concealed in such way that
# input to block function is distributed *uniformly*, then attack
# wouldn't apply. Now note that some encryption modes, most notably
-# CBC, do mask
s
the plain-text in this exact way [secure cipher output
+# CBC, do mask the plain-text in this exact way [secure cipher output
# is distributed uniformly]. Yes, one still might find input that
# would reveal the information about given key, but if amount of
# candidate inputs to be tried is larger than amount of possible key
# is distributed uniformly]. Yes, one still might find input that
# would reveal the information about given key, but if amount of
# candidate inputs to be tried is larger than amount of possible key
@@
-183,7
+186,8
@@
# Current implementation accesses *all* cache-lines within ~50 cycles
# window, which is actually *less* than RDTSC latency on Intel P4!
# Current implementation accesses *all* cache-lines within ~50 cycles
# window, which is actually *less* than RDTSC latency on Intel P4!
-push(@INC,"perlasm","../../perlasm");
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+push(@INC,"${dir}","${dir}../../perlasm");
require "x86asm.pl";
&asm_init($ARGV[0],"aes-586.pl",$x86only = $ARGV[$#ARGV] eq "386");
require "x86asm.pl";
&asm_init($ARGV[0],"aes-586.pl",$x86only = $ARGV[$#ARGV] eq "386");
@@
-473,11
+477,10
@@
sub enctransform()
&mov ($acc,$s[$i]);
&and ($acc,0x80808080);
&mov ($tmp,$acc);
&mov ($acc,$s[$i]);
&and ($acc,0x80808080);
&mov ($tmp,$acc);
- &mov ($r2,$s[$i]);
&shr ($tmp,7);
&shr ($tmp,7);
- &
and ($r2,0x7f7f7f7f
);
+ &
lea ($r2,&DWP(0,$s[$i],$s[$i])
);
&sub ($acc,$tmp);
&sub ($acc,$tmp);
- &
lea ($r2,&DWP(0,$r2,$r2)
);
+ &
and ($r2,0xfefefefe
);
&and ($acc,0x1b1b1b1b);
&mov ($tmp,$s[$i]);
&xor ($acc,$r2); # r2
&and ($acc,0x1b1b1b1b);
&mov ($tmp,$s[$i]);
&xor ($acc,$r2); # r2
@@
-1272,54
+1275,51
@@
sub dectransform()
&mov ($acc,$s[$i]);
&and ($acc,0x80808080);
&mov ($tmp,$acc);
&mov ($acc,$s[$i]);
&and ($acc,0x80808080);
&mov ($tmp,$acc);
- &mov ($tp2,$s[$i]);
&shr ($tmp,7);
&shr ($tmp,7);
- &
and ($tp2,0x7f7f7f7f
);
+ &
lea ($tp2,&DWP(0,$s[$i],$s[$i])
);
&sub ($acc,$tmp);
&sub ($acc,$tmp);
- &a
dd ($tp2,$tp2
);
+ &a
nd ($tp2,0xfefefefe
);
&and ($acc,0x1b1b1b1b);
&xor ($acc,$tp2);
&mov ($tp2,$acc);
&and ($acc,0x80808080);
&mov ($tmp,$acc);
&and ($acc,0x1b1b1b1b);
&xor ($acc,$tp2);
&mov ($tp2,$acc);
&and ($acc,0x80808080);
&mov ($tmp,$acc);
- &mov ($tp4,$tp2);
- &xor ($tp2,$s[$i]); # tp2^tp1
&shr ($tmp,7);
&shr ($tmp,7);
- &
and ($tp4,0x7f7f7f7f
);
+ &
lea ($tp4,&DWP(0,$tp2,$tp2)
);
&sub ($acc,$tmp);
&sub ($acc,$tmp);
- &a
dd ($tp4,$tp4
);
+ &a
nd ($tp4,0xfefefefe
);
&and ($acc,0x1b1b1b1b);
&and ($acc,0x1b1b1b1b);
+ &xor ($tp2,$s[$i]); # tp2^tp1
&xor ($acc,$tp4);
&mov ($tp4,$acc);
&and ($acc,0x80808080);
&mov ($tmp,$acc);
&xor ($acc,$tp4);
&mov ($tp4,$acc);
&and ($acc,0x80808080);
&mov ($tmp,$acc);
- &mov ($tp8,$tp4);
- &xor ($tp4,$s[$i]); # tp4^tp1
&shr ($tmp,7);
&shr ($tmp,7);
- &
and ($tp8,0x7f7f7f7f
);
+ &
lea ($tp8,&DWP(0,$tp4,$tp4)
);
&sub ($acc,$tmp);
&sub ($acc,$tmp);
- &a
dd ($tp8,$tp8
);
+ &a
nd ($tp8,0xfefefefe
);
&and ($acc,0x1b1b1b1b);
&and ($acc,0x1b1b1b1b);
+ &xor ($tp4,$s[$i]); # tp4^tp1
&rotl ($s[$i],8); # = ROTATE(tp1,8)
&xor ($tp8,$acc);
&xor ($s[$i],$tp2);
&xor ($tp2,$tp8);
&rotl ($s[$i],8); # = ROTATE(tp1,8)
&xor ($tp8,$acc);
&xor ($s[$i],$tp2);
&xor ($tp2,$tp8);
- &xor ($s[$i],$tp4);
&rotl ($tp2,24);
&rotl ($tp2,24);
+ &xor ($s[$i],$tp4);
&xor ($tp4,$tp8);
&xor ($tp4,$tp8);
- &xor ($s[$i],$tp8); # ^= tp8^(tp4^tp1)^(tp2^tp1)
&rotl ($tp4,16);
&rotl ($tp4,16);
- &xor ($s[$i],$tp
2); # ^= ROTATE(tp8^tp2^tp1,24
)
+ &xor ($s[$i],$tp
8); # ^= tp8^(tp4^tp1)^(tp2^tp1
)
&rotl ($tp8,8);
&rotl ($tp8,8);
+ &xor ($s[$i],$tp2); # ^= ROTATE(tp8^tp2^tp1,24)
&xor ($s[$i],$tp4); # ^= ROTATE(tp8^tp4^tp1,16)
&xor ($s[$i],$tp4); # ^= ROTATE(tp8^tp4^tp1,16)
+ &mov ($s[0],$__s0) if($i==2); #prefetch $s0
+ &mov ($s[1],$__s1) if($i==3); #prefetch $s1
+ &mov ($s[2],$__s2) if($i==1);
&xor ($s[$i],$tp8); # ^= ROTATE(tp8,8)
&xor ($s[$i],$tp8); # ^= ROTATE(tp8,8)
- &mov ($s[0],$__s0) if($i==2); #prefetch $s0
- &mov ($s[1],$__s1) if($i==3); #prefetch $s1
- &mov ($s[2],$__s2) if($i==1);
&mov ($s[3],$__s3) if($i==1);
&mov (&DWP(4+4*$i,"esp"),$s[$i]) if($i>=2);
}
&mov ($s[3],$__s3) if($i==1);
&mov (&DWP(4+4*$i,"esp"),$s[$i]) if($i>=2);
}
@@
-2459,7
+2459,7
@@
my $mark=&DWP(76+240,"esp"); # copy of aes_key->rounds
&pushf (); # kludge, never executed
&set_label("slow_enc_tail",16);
&pushf (); # kludge, never executed
&set_label("slow_enc_tail",16);
- &emms ();
+ &emms ()
if (!$x86only)
;
&mov ($key eq "edi"? $key:"",$s3); # load out to edi
&mov ($s1,16);
&sub ($s1,$s2);
&mov ($key eq "edi"? $key:"",$s3); # load out to edi
&mov ($s1,16);
&sub ($s1,$s2);
@@
-2657,12
+2657,10
@@
sub enckey()
&xor ("eax",&DWP(1024-128,$tbl,"ecx",4)); # rcon
}
&xor ("eax",&DWP(1024-128,$tbl,"ecx",4)); # rcon
}
-# int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
-# AES_KEY *key)
&public_label("AES_Te");
&public_label("AES_Te");
-&function_begin("AES_set_encrypt_key");
- &mov ("esi",&wparam(
0
)); # user supplied key
- &mov ("edi",&wparam(
2
)); # private key schedule
+&function_begin("
_x86_
AES_set_encrypt_key");
+ &mov ("esi",&wparam(
1
)); # user supplied key
+ &mov ("edi",&wparam(
3
)); # private key schedule
&test ("esi",-1);
&jz (&label("badpointer"));
&test ("esi",-1);
&jz (&label("badpointer"));
@@
-2685,7
+2683,7
@@
sub enckey()
&mov ("ecx",&DWP(192-128,$tbl));
&mov ("edx",&DWP(224-128,$tbl));
&mov ("ecx",&DWP(192-128,$tbl));
&mov ("edx",&DWP(224-128,$tbl));
- &mov ("ecx",&wparam(
1
)); # number of bits in key
+ &mov ("ecx",&wparam(
2
)); # number of bits in key
&cmp ("ecx",128);
&je (&label("10rounds"));
&cmp ("ecx",192);
&cmp ("ecx",128);
&je (&label("10rounds"));
&cmp ("ecx",192);
@@
-2862,7
+2860,14
@@
sub enckey()
&set_label("badpointer");
&mov ("eax",-1);
&set_label("exit");
&set_label("badpointer");
&mov ("eax",-1);
&set_label("exit");
-&function_end("AES_set_encrypt_key");
+&function_end("_x86_AES_set_encrypt_key");
+
+# int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
+# AES_KEY *key)
+&function_begin_B("AES_set_encrypt_key");
+ &call ("_x86_AES_set_encrypt_key");
+ &ret ();
+&function_end_B("AES_set_encrypt_key");
sub deckey()
{ my ($i,$key,$tp1,$tp2,$tp4,$tp8) = @_;
sub deckey()
{ my ($i,$key,$tp1,$tp2,$tp4,$tp8) = @_;
@@
-2871,35
+2876,32
@@
sub deckey()
&mov ($acc,$tp1);
&and ($acc,0x80808080);
&mov ($tmp,$acc);
&mov ($acc,$tp1);
&and ($acc,0x80808080);
&mov ($tmp,$acc);
- &mov ($tp2,$tp1);
&shr ($tmp,7);
&shr ($tmp,7);
- &
and ($tp2,0x7f7f7f7f
);
+ &
lea ($tp2,&DWP(0,$tp1,$tp1)
);
&sub ($acc,$tmp);
&sub ($acc,$tmp);
- &a
dd ($tp2,$tp2
);
+ &a
nd ($tp2,0xfefefefe
);
&and ($acc,0x1b1b1b1b);
&xor ($acc,$tp2);
&mov ($tp2,$acc);
&and ($acc,0x80808080);
&mov ($tmp,$acc);
&and ($acc,0x1b1b1b1b);
&xor ($acc,$tp2);
&mov ($tp2,$acc);
&and ($acc,0x80808080);
&mov ($tmp,$acc);
- &mov ($tp4,$tp2);
- &xor ($tp2,$tp1); # tp2^tp1
&shr ($tmp,7);
&shr ($tmp,7);
- &
and ($tp4,0x7f7f7f7f
);
+ &
lea ($tp4,&DWP(0,$tp2,$tp2)
);
&sub ($acc,$tmp);
&sub ($acc,$tmp);
- &a
dd ($tp4,$tp4
);
+ &a
nd ($tp4,0xfefefefe
);
&and ($acc,0x1b1b1b1b);
&and ($acc,0x1b1b1b1b);
+ &xor ($tp2,$tp1); # tp2^tp1
&xor ($acc,$tp4);
&mov ($tp4,$acc);
&and ($acc,0x80808080);
&mov ($tmp,$acc);
&xor ($acc,$tp4);
&mov ($tp4,$acc);
&and ($acc,0x80808080);
&mov ($tmp,$acc);
- &mov ($tp8,$tp4);
- &xor ($tp4,$tp1); # tp4^tp1
&shr ($tmp,7);
&shr ($tmp,7);
- &and ($tp8,0x7f7f7f7f);
+ &lea ($tp8,&DWP(0,$tp4,$tp4));
+ &xor ($tp4,$tp1); # tp4^tp1
&sub ($acc,$tmp);
&sub ($acc,$tmp);
- &a
dd ($tp8,$tp8
);
+ &a
nd ($tp8,0xfefefefe
);
&and ($acc,0x1b1b1b1b);
&rotl ($tp1,8); # = ROTATE(tp1,8)
&xor ($tp8,$acc);
&and ($acc,0x1b1b1b1b);
&rotl ($tp1,8); # = ROTATE(tp1,8)
&xor ($tp8,$acc);
@@
-2924,18
+2926,8
@@
sub deckey()
# int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
# AES_KEY *key)
# int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
# AES_KEY *key)
-&public_label("AES_Td");
-&public_label("AES_Te");
&function_begin_B("AES_set_decrypt_key");
&function_begin_B("AES_set_decrypt_key");
- &mov ("eax",&wparam(0));
- &mov ("ecx",&wparam(1));
- &mov ("edx",&wparam(2));
- &sub ("esp",12);
- &mov (&DWP(0,"esp"),"eax");
- &mov (&DWP(4,"esp"),"ecx");
- &mov (&DWP(8,"esp"),"edx");
- &call ("AES_set_encrypt_key");
- &add ("esp",12);
+ &call ("_x86_AES_set_encrypt_key");
&cmp ("eax",0);
&je (&label("proceed"));
&ret ();
&cmp ("eax",0);
&je (&label("proceed"));
&ret ();
@@
-2991,5
+2983,6
@@
sub deckey()
&xor ("eax","eax"); # return success
&function_end("AES_set_decrypt_key");
&xor ("eax","eax"); # return success
&function_end("AES_set_decrypt_key");
+&asciz("AES for x86, CRYPTOGAMS by <appro\@openssl.org>");
&asm_finish();
&asm_finish();