X-Git-Url: https://git.openssl.org/?p=openssl.git;a=blobdiff_plain;f=crypto%2Faes%2Fasm%2Faes-586.pl;h=e279420936662db62f07c49c3194051b00455cb9;hp=c1206238819a34a226256fbd24b0cbc8cdac0f71;hb=4e28f13209134d0466ed0b06751e700be1c82d5e;hpb=e19e54904156b3c79d0325b4743500eacf80d864 diff --git a/crypto/aes/asm/aes-586.pl b/crypto/aes/asm/aes-586.pl index c120623881..e279420936 100755 --- a/crypto/aes/asm/aes-586.pl +++ b/crypto/aes/asm/aes-586.pl @@ -6,7 +6,7 @@ # forms are granted according to the OpenSSL license. # ==================================================================== # -# Version 3.4. +# Version 3.5. # # You might fail to appreciate this module performance from the first # try. If compared to "vanilla" linux-ia32-icc target, i.e. considered @@ -66,6 +66,11 @@ # stack. This unfortunately has rather strong impact on small block CBC # performance, ~2x deterioration on 16-byte block if compared to 3.3. # +# Version 3.5 checks if there is L1 cache aliasing between user-supplied +# key schedule and S-boxes and abstains from copying the former if +# there is no. This allows end-user to consciously retain small block +# performance by aligning key schedule in specific manner. +# # Current ECB performance numbers for 128-bit key in CPU cycles per # processed byte [measure commonly used by AES benchmarkers] are: # @@ -805,6 +810,7 @@ my $_ivp=&DWP(36,"esp"); #copy of wparam(4) my $_tmp=&DWP(40,"esp"); #volatile variable my $ivec=&DWP(44,"esp"); #ivec[16] my $aes_key=&DWP(60,"esp"); #copy of aes_key +my $mark=&DWP(60+240,"esp"); #copy of aes_key->rounds &public_label("AES_Te"); &public_label("AES_Td"); @@ -865,18 +871,27 @@ my $aes_key=&DWP(60,"esp"); #copy of aes_key &mov ($_key,$s3); # save copy of key &mov ($_ivp,$acc); # save copy of ivp + &mov ($mark,0); # copy of aes_key->rounds = 0; if ($compromise) { &cmp ($s2,$compromise); &jb (&label("skip_ecopy")); } - # copy key schedule to stack - &mov ("ecx",244/4); + # do we copy key schedule to stack? + &mov ($s1 eq "ebx" ? $s1 : "",$s3); + &mov ($s2 eq "ecx" ? $s2 : "",244/4); + &sub ($s1,"ebp"); &mov ("esi",$s3); + &and ($s1,0xfff); &lea ("edi",$aes_key); - &mov ($_key,"edi"); + &cmp ($s1,2048); + &jb (&label("do_ecopy")); + &cmp ($s1,4096-244); + &jb (&label("skip_ecopy")); &align (4); - &data_word(0xF689A5F3); # rep movsd - &set_label("skip_ecopy") if ($compromise); + &set_label("do_ecopy"); + &mov ($_key,"edi"); + &data_word(0xA5F3F689); # rep movsd + &set_label("skip_ecopy"); &mov ($acc,$s0); &mov ($key,16); @@ -942,18 +957,16 @@ my $aes_key=&DWP(60,"esp"); #copy of aes_key &mov (&DWP(8,$acc),$s2); &mov (&DWP(12,$acc),$s3); + &cmp ($mark,0); # was the key schedule copied? &mov ("edi",$_key); &mov ("esp",$_esp); - if ($compromise) { - &cmp (&wparam(2),$compromise); - &jb (&label("skip_ezero")); - } + &je (&label("skip_ezero")); # zero copy of key schedule &mov ("ecx",240/4); &xor ("eax","eax"); &align (4); - &data_word(0xF689ABF3); # rep stosd - &set_label("skip_ezero") if ($compromise); + &data_word(0xABF3F689); # rep stosd + &set_label("skip_ezero") &popf (); &set_label("enc_out"); &function_end_A(); @@ -968,7 +981,7 @@ my $aes_key=&DWP(60,"esp"); #copy of aes_key &cmp ($key,$acc); # compare with inp &je (&label("enc_in_place")); &align (4); - &data_word(0xF689A4F3); # rep movsb # copy input + &data_word(0xA4F3F689); # rep movsb # copy input &jmp (&label("enc_skip_in_place")); &set_label("enc_in_place"); &lea ($key,&DWP(0,$key,$s2)); @@ -976,7 +989,7 @@ my $aes_key=&DWP(60,"esp"); #copy of aes_key &mov ($s2,$s1); &xor ($s0,$s0); &align (4); - &data_word(0xF689AAF3); # rep stosb # zero tail + &data_word(0xAAF3F689); # rep stosb # zero tail &pop ($key); # pop ivp &mov ($acc,$_out); # output as input @@ -1030,18 +1043,27 @@ my $aes_key=&DWP(60,"esp"); #copy of aes_key &mov ($_key,$s3); # save copy of key &mov ($_ivp,$acc); # save copy of ivp + &mov ($mark,0); # copy of aes_key->rounds = 0; if ($compromise) { &cmp ($s2,$compromise); &jb (&label("skip_dcopy")); } - # copy key schedule to stack - &mov ("ecx",244/4); + # do we copy key schedule to stack? + &mov ($s1 eq "ebx" ? $s1 : "",$s3); + &mov ($s2 eq "ecx" ? $s2 : "",244/4); + &sub ($s1,"ebp"); &mov ("esi",$s3); + &and ($s1,0xfff); &lea ("edi",$aes_key); - &mov ($_key,"edi"); + &cmp ($s1,3072); + &jb (&label("do_dcopy")); + &cmp ($s1,4096-244); + &jb (&label("skip_dcopy")); &align (4); - &data_word(0xF689A5F3); # rep movsd - &set_label("skip_dcopy") if ($compromise); + &set_label("do_dcopy"); + &mov ($_key,"edi"); + &data_word(0xA5F3F689); # rep movsd + &set_label("skip_dcopy"); &mov ($acc,$s0); &mov ($key,24); @@ -1121,7 +1143,7 @@ my $aes_key=&DWP(60,"esp"); #copy of aes_key &lea ($s2 eq "ecx" ? $s2 : "",&DWP(16,$acc)); &mov ($acc eq "esi" ? $acc : "",$key); &mov ($key eq "edi" ? $key : "",$_out); # load out - &data_word(0xF689A4F3); # rep movsb # copy output + &data_word(0xA4F3F689); # rep movsb # copy output &mov ($key,$_inp); # use inp as temp ivp &jmp (&label("dec_end")); @@ -1188,22 +1210,20 @@ my $aes_key=&DWP(60,"esp"); #copy of aes_key &lea ($key,&DWP(0,$key,$s2)); &lea ($acc,&DWP(16,$acc,$s2)); &neg ($s2 eq "ecx" ? $s2 : ""); - &data_word(0xF689A4F3); # rep movsb # restore tail + &data_word(0xA4F3F689); # rep movsb # restore tail &align (4); &set_label("dec_out"); + &cmp ($mark,0); # was the key schedule copied? &mov ("edi",$_key); &mov ("esp",$_esp); - if ($compromise) { - &cmp (&wparam(2),$compromise); - &jb (&label("skip_dzero")); - } + &je (&label("skip_dzero")); # zero copy of key schedule &mov ("ecx",240/4); &xor ("eax","eax"); &align (4); - &data_word(0xF689ABF3); # rep stosd - &set_label("skip_dzero") if ($compromise); + &data_word(0xABF3F689); # rep stosd + &set_label("skip_dzero") &popf (); &function_end("AES_cbc_encrypt"); }