+&function_end_B("_x86_AES_decrypt");
+
+# void AES_decrypt (const void *inp,void *out,const AES_KEY *key);
+&public_label("AES_Td");
+&function_begin("AES_decrypt");
+ &mov ($acc,&wparam(0)); # load inp
+ &mov ($key,&wparam(2)); # load key
+
+ &call (&label("pic_point")); # make it PIC!
+ &set_label("pic_point");
+ &blindpop("ebp");
+ &lea ("ebp",&DWP(&label("AES_Td")."-".&label("pic_point"),"ebp"));
+
+ &mov ($s0,&DWP(0,$acc)); # load input data
+ &mov ($s1,&DWP(4,$acc));
+ &mov ($s2,&DWP(8,$acc));
+ &mov ($s3,&DWP(12,$acc));
+
+ &call ("_x86_AES_decrypt");
+
+ &mov ($acc,&wparam(1)); # load out
+ &mov (&DWP(0,$acc),$s0); # write output data
+ &mov (&DWP(4,$acc),$s1);
+ &mov (&DWP(8,$acc),$s2);
+ &mov (&DWP(12,$acc),$s3);
+&function_end("AES_decrypt");
+
+# void AES_cbc_encrypt (const void char *inp, unsigned char *out,
+# size_t length, const AES_KEY *key,
+# unsigned char *ivp,const int enc);
+&public_label("AES_Te");
+&public_label("AES_Td");
+&function_begin("AES_cbc_encrypt");
+ &mov ($s2 eq "ecx"? $s2 : "",&wparam(2)); # load len
+ &cmp ($s2,0);
+ &je (&label("enc_out"));
+
+ &call (&label("pic_point")); # make it PIC!
+ &set_label("pic_point");
+ &blindpop("ebp");
+
+ &cmp (&wparam(5),0);
+ &je (&label("DECRYPT"));
+
+ &lea ("ebp",&DWP(&label("AES_Te")."-".&label("pic_point"),"ebp"));
+
+ &mov ($acc,&wparam(0)); # load inp
+ &mov ($key,&wparam(4)); # load ivp
+
+ &test ($s2,~15);
+ &jz (&label("enc_tail")); # short input...
+
+ &mov ($s0,&DWP(0,$key)); # load iv
+ &mov ($s1,&DWP(4,$key));
+
+ &align (4);
+ &set_label("enc_loop");
+ &mov ($s2,&DWP(8,$key));
+ &mov ($s3,&DWP(12,$key));
+
+ &xor ($s0,&DWP(0,$acc)); # xor input data
+ &xor ($s1,&DWP(4,$acc));
+ &xor ($s2,&DWP(8,$acc));
+ &xor ($s3,&DWP(12,$acc));
+
+ &mov ($key,&wparam(3)); # load key
+ &call ("_x86_AES_encrypt");
+
+ &mov ($acc,&wparam(0)); # load inp
+ &mov ($key,&wparam(1)); # load out
+
+ &mov (&DWP(0,$key),$s0); # save output data
+ &mov (&DWP(4,$key),$s1);
+ &mov (&DWP(8,$key),$s2);
+ &mov (&DWP(12,$key),$s3);
+
+ &mov ($s2,&wparam(2)); # load len
+
+ &lea ($acc,&DWP(16,$acc));
+ &mov (&wparam(0),$acc); # save inp
+
+ &lea ($s3,&DWP(16,$key));
+ &mov (&wparam(1),$s3); # save out
+
+ &sub ($s2,16);
+ &test ($s2,~15);
+ &mov (&wparam(2),$s2); # save len
+ &jnz (&label("enc_loop"));
+ &test ($s2,15);
+ &jnz (&label("enc_tail"));
+ &mov ($acc,&wparam(4)); # load ivp
+ &mov ($s2,&DWP(8,$key)); # restore last dwords
+ &mov ($s3,&DWP(12,$key));
+ &mov (&DWP(0,$acc),$s0); # save iv
+ &mov (&DWP(4,$acc),$s1);
+ &mov (&DWP(8,$acc),$s2);
+ &mov (&DWP(12,$acc),$s3);
+ &set_label("enc_out");
+ &function_end_A();
+
+ &align (4);
+ &set_label("enc_tail");
+ &push ($key eq "edi" ? $key : ""); # push ivp
+ &pushf ();
+ &mov ($key,&wparam(1)); # load out
+ &xor ($s0,$s0);
+ &mov (&DWP(0,$key),$s0); # zero output
+ &mov (&DWP(4,$key),$s0);
+ &mov (&DWP(8,$key),$s0);
+ &mov (&DWP(12,$key),$s0);
+ &data_word(0x90A4F3FC); # cld; rep movsb; nop # copy input
+ &popf ();
+ &pop ($key); # pop ivp
+
+ &mov ($acc,&wparam(1)); # output as input
+ &mov ($s0,&DWP(0,$key));
+ &mov ($s1,&DWP(4,$key));
+ &mov (&wparam(2),16); # len=16
+ &jmp (&label("enc_loop")); # one more spin...
+
+#----------------------------- DECRYPT -----------------------------#
+&align (4);
+&set_label("DECRYPT");
+ &stack_push(5); # allocate temp + ivp
+
+ &lea ("ebp",&DWP(&label("AES_Td")."-".&label("pic_point"),"ebp"));
+
+ &mov ($acc,&wparam(0)); # load inp
+ &cmp ($acc,&wparam(1));
+ &je (&label("dec_in_place")); # in-place processing...
+
+ &mov ($key,&wparam(4)); # load ivp
+ &mov (&swtmp(4),$key);
+
+ &align (4);
+ &set_label("dec_loop");
+ &mov ($s0,&DWP(0,$acc)); # read input
+ &mov ($s1,&DWP(4,$acc));
+ &mov ($s2,&DWP(8,$acc));
+ &mov ($s3,&DWP(12,$acc));
+
+ &mov ($key,&wparam(3)); # load key
+ &call ("_x86_AES_decrypt");
+
+ &mov ($key,&swtmp(4)); # load ivp
+ &mov ($acc,&wparam(2)); # load len
+ &xor ($s0,&DWP(0,$key)); # xor iv
+ &xor ($s1,&DWP(4,$key));
+ &xor ($s2,&DWP(8,$key));
+ &xor ($s3,&DWP(12,$key));
+
+ &sub ($acc,16);
+ &jc (&label("dec_partial"));
+ &mov (&wparam(2),$acc); # save len
+ &mov ($acc,&wparam(0)); # load inp
+ &mov ($key,&wparam(1)); # load out
+
+ &mov (&DWP(0,$key),$s0); # write output
+ &mov (&DWP(4,$key),$s1);
+ &mov (&DWP(8,$key),$s2);
+ &mov (&DWP(12,$key),$s3);
+
+ &mov (&swtmp(4),$acc); # save ivp
+ &lea ($acc,&DWP(16,$acc));
+ &mov (&wparam(0),$acc); # save inp
+
+ &lea ($key,&DWP(16,$key));
+ &mov (&wparam(1),$key); # save out
+
+ &jnz (&label("dec_loop"));
+ &mov ($key,&swtmp(4)); # load temp ivp
+ &set_label("dec_end");
+ &mov ($acc,&wparam(4)); # load user ivp
+ &mov ($s0,&DWP(0,$key)); # load iv
+ &mov ($s1,&DWP(4,$key));
+ &mov ($s2,&DWP(8,$key));
+ &mov ($s3,&DWP(12,$key));
+ &mov (&DWP(0,$acc),$s0); # copy back to user
+ &mov (&DWP(4,$acc),$s1);
+ &mov (&DWP(8,$acc),$s2);
+ &mov (&DWP(12,$acc),$s3);
+ &jmp (&label("dec_out"));
+
+ &align (4);
+ &set_label("dec_partial");
+ &lea ($key,&swtmp(0));
+ &mov (&DWP(0,$key),$s0); # dump output to stack
+ &mov (&DWP(4,$key),$s1);
+ &mov (&DWP(8,$key),$s2);
+ &mov (&DWP(12,$key),$s3);
+ &lea ($s2 eq "ecx" ? $s2 : "",&DWP(16,$acc));
+ &mov ($acc eq "esi" ? $acc : "",$key);
+ &mov ($key eq "edi" ? $key : "",&wparam(1));
+ &pushf ();
+ &data_word(0x90A4F3FC); # cld; rep movsb; nop # copy output
+ &popf ();
+ &mov ($key,&wparam(0)); # load temp ivp
+ &jmp (&label("dec_end"));
+
+ &align (4);
+ &set_label("dec_in_place");
+ &set_label("dec_in_place_loop");
+ &lea ($key,&swtmp(0));
+ &mov ($s0,&DWP(0,$acc)); # read input
+ &mov ($s1,&DWP(4,$acc));
+ &mov ($s2,&DWP(8,$acc));
+ &mov ($s3,&DWP(12,$acc));
+
+ &mov (&DWP(0,$key),$s0); # copy to temp
+ &mov (&DWP(4,$key),$s1);
+ &mov (&DWP(8,$key),$s2);
+ &mov (&DWP(12,$key),$s3);
+
+ &mov ($key,&wparam(3)); # load key
+ &call ("_x86_AES_decrypt");
+
+ &mov ($key,&wparam(4)); # load ivp
+ &mov ($acc,&wparam(1)); # load out
+ &xor ($s0,&DWP(0,$key)); # xor iv
+ &xor ($s1,&DWP(4,$key));
+ &xor ($s2,&DWP(8,$key));
+ &xor ($s3,&DWP(12,$key));
+
+ &mov (&DWP(0,$acc),$s0); # write output
+ &mov (&DWP(4,$acc),$s1);
+ &mov (&DWP(8,$acc),$s2);
+ &mov (&DWP(12,$acc),$s3);
+
+ &lea ($acc,&DWP(16,$acc));
+ &mov (&wparam(1),$acc); # save out
+
+ &lea ($acc,&swtmp(0));
+ &mov ($s0,&DWP(0,$acc)); # read temp
+ &mov ($s1,&DWP(4,$acc));
+ &mov ($s2,&DWP(8,$acc));
+ &mov ($s3,&DWP(12,$acc));
+
+ &mov (&DWP(0,$key),$s0); # copy iv
+ &mov (&DWP(4,$key),$s1);
+ &mov (&DWP(8,$key),$s2);
+ &mov (&DWP(12,$key),$s3);
+
+ &mov ($acc,&wparam(0)); # load inp
+
+ &lea ($acc,&DWP(16,$acc));
+ &mov (&wparam(0),$acc); # save inp
+
+ &mov ($s2,&wparam(2)); # load len
+ &sub ($s2,16);
+ &jc (&label("dec_in_place_partial"));
+ &mov (&wparam(2),$s2); # save len
+ &jnz (&label("dec_in_place_loop"));
+ &jmp (&label("dec_out"));
+
+ &align (4);
+ &set_label("dec_in_place_partial");
+ # one can argue if this is actually required...
+ &mov ($key eq "edi" ? $key : "",&wparam(1));
+ &lea ($acc eq "esi" ? $acc : "",&swtmp(0));
+ &lea ($key,&DWP(0,$key,$s2));
+ &lea ($acc,&DWP(16,$acc,$s2));
+ &neg ($s2 eq "ecx" ? $s2 : "");
+ &pushf ();
+ &data_word(0x90A4F3FC); # cld; rep movsb; nop # restore tail
+ &popf ();
+ &set_label("dec_out");
+ &stack_pop(5);
+&function_end("AES_cbc_encrypt");
+
+#------------------------------------------------------------------#