2 * Support for VIA PadLock Advanced Cryptography Engine (ACE)
3 * Written by Michal Ludvig <michal@logix.cz>
4 * http://www.logix.cz/michal
6 * Big thanks to Andy Polyakov for a help with optimization,
7 * assembler fixes, port to MS Windows and a lot of other
8 * valuable work on this engine!
11 /* ====================================================================
12 * Copyright (c) 1999-2001 The OpenSSL Project. All rights reserved.
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in
23 * the documentation and/or other materials provided with the
26 * 3. All advertising materials mentioning features or use of this
27 * software must display the following acknowledgment:
28 * "This product includes software developed by the OpenSSL Project
29 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
31 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
32 * endorse or promote products derived from this software without
33 * prior written permission. For written permission, please contact
34 * licensing@OpenSSL.org.
36 * 5. Products derived from this software may not be called "OpenSSL"
37 * nor may "OpenSSL" appear in their names without prior written
38 * permission of the OpenSSL Project.
40 * 6. Redistributions of any form whatsoever must retain the following
42 * "This product includes software developed by the OpenSSL Project
43 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
45 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
46 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
48 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
49 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
50 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
51 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
52 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
54 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
55 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
56 * OF THE POSSIBILITY OF SUCH DAMAGE.
57 * ====================================================================
59 * This product includes cryptographic software written by Eric Young
60 * (eay@cryptsoft.com). This product includes software written by Tim
61 * Hudson (tjh@cryptsoft.com).
69 #include <openssl/opensslconf.h>
70 #include <openssl/crypto.h>
71 #include <openssl/dso.h>
72 #include <openssl/engine.h>
73 #include <openssl/evp.h>
74 #ifndef OPENSSL_NO_AES
75 #include <openssl/aes.h>
77 #include <openssl/rand.h>
78 #include <openssl/err.h>
81 #ifndef OPENSSL_NO_HW_PADLOCK
83 /* Attempt to have a single source for both 0.9.7 and 0.9.8 :-) */
84 #if (OPENSSL_VERSION_NUMBER >= 0x00908000L)
85 # ifndef OPENSSL_NO_DYNAMIC_ENGINE
86 # define DYNAMIC_ENGINE
88 #elif (OPENSSL_VERSION_NUMBER >= 0x00907000L)
89 # ifdef ENGINE_DYNAMIC_SUPPORT
90 # define DYNAMIC_ENGINE
93 # error "Only OpenSSL >= 0.9.7 is supported"
96 /* VIA PadLock AES is available *ONLY* on some x86 CPUs.
97 Not only that it doesn't exist elsewhere, but it
98 even can't be compiled on other platforms!
100 In addition, because of the heavy use of inline assembler,
101 compiler choice is limited to GCC and Microsoft C. */
102 #undef COMPILE_HW_PADLOCK
103 #if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM)
104 # if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \
105 (defined(_MSC_VER) && defined(_M_IX86))
106 # define COMPILE_HW_PADLOCK
110 #ifdef OPENSSL_NO_DYNAMIC_ENGINE
111 static ENGINE *ENGINE_padlock (void);
113 void ENGINE_load_padlock (void)
115 /* On non-x86 CPUs it just returns. */
116 #ifdef COMPILE_HW_PADLOCK
117 ENGINE *toadd = ENGINE_padlock ();
127 #ifdef COMPILE_HW_PADLOCK
128 /* We do these includes here to avoid header problems on platforms that
129 do not have the VIA padlock anyway... */
134 # define alloca _alloca
136 #elif defined(__GNUC__)
138 # define alloca(s) __builtin_alloca(s)
142 /* Function for ENGINE detection and control */
143 static int padlock_available(void);
144 static int padlock_init(ENGINE *e);
147 static RAND_METHOD padlock_rand;
150 #ifndef OPENSSL_NO_AES
151 static int padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid);
155 static const char *padlock_id = "padlock";
156 static char padlock_name[100];
158 /* Available features */
159 static int padlock_use_ace = 0; /* Advanced Cryptography Engine */
160 static int padlock_use_rng = 0; /* Random Number Generator */
161 #ifndef OPENSSL_NO_AES
162 static int padlock_aes_align_required = 1;
165 /* ===== Engine "management" functions ===== */
167 /* Prepare the ENGINE structure for registration */
169 padlock_bind_helper(ENGINE *e)
171 /* Check available features */
174 #if 1 /* disable RNG for now, see commentary in vicinity of RNG code */
178 /* Generate a nice engine name with available features */
179 BIO_snprintf(padlock_name, sizeof(padlock_name),
180 "VIA PadLock (%s, %s)",
181 padlock_use_rng ? "RNG" : "no-RNG",
182 padlock_use_ace ? "ACE" : "no-ACE");
184 /* Register everything or return with an error */
185 if (!ENGINE_set_id(e, padlock_id) ||
186 !ENGINE_set_name(e, padlock_name) ||
188 !ENGINE_set_init_function(e, padlock_init) ||
189 #ifndef OPENSSL_NO_AES
190 (padlock_use_ace && !ENGINE_set_ciphers (e, padlock_ciphers)) ||
192 (padlock_use_rng && !ENGINE_set_RAND (e, &padlock_rand))) {
196 /* Everything looks good */
200 #ifdef OPENSSL_NO_DYNAMIC_ENGINE
206 ENGINE *eng = ENGINE_new();
212 if (!padlock_bind_helper(eng)) {
222 /* Check availability of the engine */
224 padlock_init(ENGINE *e)
226 return (padlock_use_rng || padlock_use_ace);
229 /* This stuff is needed if this ENGINE is being compiled into a self-contained
232 #ifdef DYNAMIC_ENGINE
234 padlock_bind_fn(ENGINE *e, const char *id)
236 if (id && (strcmp(id, padlock_id) != 0)) {
240 if (!padlock_bind_helper(e)) {
247 IMPLEMENT_DYNAMIC_CHECK_FN()
248 IMPLEMENT_DYNAMIC_BIND_FN (padlock_bind_fn)
249 #endif /* DYNAMIC_ENGINE */
251 /* ===== Here comes the "real" engine ===== */
253 #ifndef OPENSSL_NO_AES
254 /* Some AES-related constants */
255 #define AES_BLOCK_SIZE 16
256 #define AES_KEY_SIZE_128 16
257 #define AES_KEY_SIZE_192 24
258 #define AES_KEY_SIZE_256 32
260 /* Here we store the status information relevant to the
263 * Inline assembler in PADLOCK_XCRYPT_ASM()
264 * depends on the order of items in this structure.
265 * Don't blindly modify, reorder, etc!
267 struct padlock_cipher_data
269 unsigned char iv[AES_BLOCK_SIZE]; /* Initialization vector */
270 union { unsigned int pad[4];
273 int dgst:1; /* n/a in C3 */
274 int align:1; /* n/a in C3 */
275 int ciphr:1; /* n/a in C3 */
276 unsigned int keygen:1;
278 unsigned int encdec:1;
281 } cword; /* Control word */
282 AES_KEY ks; /* Encryption key */
286 * Essentially this variable belongs in thread local storage.
287 * Having this variable global on the other hand can only cause
288 * few bogus key reloads [if any at all on single-CPU system],
289 * so we accept the penatly...
291 static volatile struct padlock_cipher_data *padlock_saved_context;
295 * =======================================================
296 * Inline assembler section(s).
297 * =======================================================
298 * Order of arguments is chosen to facilitate Windows port
299 * using __fastcall calling convention. If you wish to add
300 * more routines, keep in mind that first __fastcall
301 * argument is passed in %ecx and second - in %edx.
302 * =======================================================
304 #if defined(__GNUC__) && __GNUC__>=2
306 * As for excessive "push %ebx"/"pop %ebx" found all over.
307 * When generating position-independent code GCC won't let
308 * us use "b" in assembler templates nor even respect "ebx"
309 * in "clobber description." Therefore the trouble...
312 /* Helper function - check if a CPUID instruction
313 is available on this CPU */
315 padlock_insn_cpuid_available(void)
319 /* We're checking if the bit #21 of EFLAGS
320 can be toggled. If yes = CPUID is available. */
324 "xorl $0x200000, %%eax\n"
325 "movl %%eax, %%ecx\n"
326 "andl $0x200000, %%ecx\n"
331 "andl $0x200000, %%eax\n"
332 "xorl %%eax, %%ecx\n"
334 : "=r" (result) : : "eax", "ecx");
336 return (result == 0);
339 /* Load supported features of the CPU to see if
340 the PadLock is available. */
342 padlock_available(void)
344 char vendor_string[16];
345 unsigned int eax, edx;
347 /* First check if the CPUID instruction is available at all... */
348 if (! padlock_insn_cpuid_available())
351 /* Are we running on the Centaur (VIA) CPU? */
353 vendor_string[12] = 0;
357 "movl %%ebx,(%%edi)\n"
358 "movl %%edx,4(%%edi)\n"
359 "movl %%ecx,8(%%edi)\n"
361 : "+a"(eax) : "D"(vendor_string) : "ecx", "edx");
362 if (strcmp(vendor_string, "CentaurHauls") != 0)
365 /* Check for Centaur Extended Feature Flags presence */
367 asm volatile ("pushl %%ebx; cpuid; popl %%ebx"
368 : "+a"(eax) : : "ecx", "edx");
369 if (eax < 0xC0000001)
372 /* Read the Centaur Extended Feature Flags */
374 asm volatile ("pushl %%ebx; cpuid; popl %%ebx"
375 : "+a"(eax), "=d"(edx) : : "ecx");
377 /* Fill up some flags */
378 padlock_use_ace = ((edx & (0x3<<6)) == (0x3<<6));
379 padlock_use_rng = ((edx & (0x3<<2)) == (0x3<<2));
381 return padlock_use_ace + padlock_use_rng;
384 #ifndef OPENSSL_NO_AES
385 /* Our own htonl()/ntohl() */
387 padlock_bswapl(AES_KEY *ks)
389 size_t i = sizeof(ks->rd_key)/sizeof(ks->rd_key[0]);
390 unsigned int *key = ks->rd_key;
393 asm volatile ("bswapl %0" : "+r"(*key));
399 /* Force key reload from memory to the CPU microcode.
400 Loading EFLAGS from the stack clears EFLAGS[30]
401 which does the trick. */
403 padlock_reload_key(void)
405 asm volatile ("pushfl; popfl");
408 #ifndef OPENSSL_NO_AES
410 * This is heuristic key context tracing. At first one
411 * believes that one should use atomic swap instructions,
412 * but it's not actually necessary. Point is that if
413 * padlock_saved_context was changed by another thread
414 * after we've read it and before we compare it with cdata,
415 * our key *shall* be reloaded upon thread context switch
416 * and we are therefore set in either case...
419 padlock_verify_context(struct padlock_cipher_data *cdata)
431 :"+m"(padlock_saved_context)
432 : "r"(padlock_saved_context), "r"(cdata) : "cc");
435 /* Template for padlock_xcrypt_* modes */
437 * The offsets used with 'leal' instructions
438 * describe items of the 'padlock_cipher_data'
441 #define PADLOCK_XCRYPT_ASM(name,rep_xcrypt) \
442 static inline void *name(size_t cnt, \
443 struct padlock_cipher_data *cdata, \
444 void *out, const void *inp) \
446 asm volatile ( "pushl %%ebx\n" \
447 " leal 16(%0),%%edx\n" \
448 " leal 32(%0),%%ebx\n" \
451 : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \
452 : "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \
453 : "edx", "cc", "memory"); \
457 /* Generate all functions with appropriate opcodes */
458 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8") /* rep xcryptecb */
459 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0") /* rep xcryptcbc */
460 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0") /* rep xcryptcfb */
461 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8") /* rep xcryptofb */
464 /* The RNG call itself */
465 static inline unsigned int
466 padlock_xstore(void *addr, unsigned int edx_in)
468 unsigned int eax_out;
470 asm volatile (".byte 0x0f,0xa7,0xc0" /* xstore */
471 : "=a"(eax_out),"=m"(*(unsigned *)addr)
472 : "D"(addr), "d" (edx_in)
478 /* Why not inline 'rep movsd'? I failed to find information on what
479 * value in Direction Flag one can expect and consequently have to
480 * apply "better-safe-than-sorry" approach and assume "undefined."
481 * I could explicitly clear it and restore the original value upon
482 * return from padlock_aes_cipher, but it's presumably too much
483 * trouble for too little gain...
485 * In case you wonder 'rep xcrypt*' instructions above are *not*
486 * affected by the Direction Flag and pointers advance toward
487 * larger addresses unconditionally.
489 static inline unsigned char *
490 padlock_memcpy(void *dst,const void *src,size_t n)
496 do { *d++ = *s++; } while (--n);
501 #elif defined(_MSC_VER)
503 * Unlike GCC these are real functions. In order to minimize impact
504 * on performance we adhere to __fastcall calling convention in
505 * order to get two first arguments passed through %ecx and %edx.
506 * Which kind of suits very well, as instructions in question use
507 * both %ecx and %edx as input:-)
509 #define REP_XCRYPT(code) \
511 _asm _emit 0x0f _asm _emit 0xa7 \
515 * The offsets used with 'lea' instructions
516 * describe items of the 'padlock_cipher_data'
519 #define PADLOCK_XCRYPT_ASM(name,code) \
520 static void * __fastcall \
521 name (size_t cnt, void *cdata, \
522 void *outp, const void *inp) \
524 _asm lea edx,[eax+16] \
525 _asm lea ebx,[eax+32] \
531 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb,0xc8)
532 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc,0xd0)
533 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb,0xe0)
534 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb,0xe8)
536 static int __fastcall
537 padlock_xstore(void *outp,unsigned int code)
539 _asm _emit 0x0f _asm _emit 0xa7 _asm _emit 0xc0
542 static void __fastcall
543 padlock_reload_key(void)
544 { _asm pushfd _asm popfd }
546 static void __fastcall
547 padlock_verify_context(void *cdata)
552 cmp ecx,padlock_saved_context
557 mov padlock_saved_context,ecx
562 padlock_available(void)
597 mov padlock_use_ace,1
603 mov padlock_use_rng,1
610 static void __fastcall
611 padlock_bswapl(void *key)
626 /* MS actually specifies status of Direction Flag and compiler even
627 * manages to compile following as 'rep movsd' all by itself...
629 #define padlock_memcpy(o,i,n) ((unsigned char *)memcpy((o),(i),(n)&~3U))
632 /* ===== AES encryption/decryption ===== */
633 #ifndef OPENSSL_NO_AES
635 #if defined(NID_aes_128_cfb128) && ! defined (NID_aes_128_cfb)
636 #define NID_aes_128_cfb NID_aes_128_cfb128
639 #if defined(NID_aes_128_ofb128) && ! defined (NID_aes_128_ofb)
640 #define NID_aes_128_ofb NID_aes_128_ofb128
643 #if defined(NID_aes_192_cfb128) && ! defined (NID_aes_192_cfb)
644 #define NID_aes_192_cfb NID_aes_192_cfb128
647 #if defined(NID_aes_192_ofb128) && ! defined (NID_aes_192_ofb)
648 #define NID_aes_192_ofb NID_aes_192_ofb128
651 #if defined(NID_aes_256_cfb128) && ! defined (NID_aes_256_cfb)
652 #define NID_aes_256_cfb NID_aes_256_cfb128
655 #if defined(NID_aes_256_ofb128) && ! defined (NID_aes_256_ofb)
656 #define NID_aes_256_ofb NID_aes_256_ofb128
659 /* List of supported ciphers. */
660 static int padlock_cipher_nids[] = {
676 static int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids)/
677 sizeof(padlock_cipher_nids[0]));
679 /* Function prototypes ... */
680 static int padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
681 const unsigned char *iv, int enc);
682 static int padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
683 const unsigned char *in, size_t nbytes);
685 #define NEAREST_ALIGNED(ptr) ( (unsigned char *)(ptr) + \
686 ( (0x10 - ((size_t)(ptr) & 0x0F)) & 0x0F ) )
687 #define ALIGNED_CIPHER_DATA(ctx) ((struct padlock_cipher_data *)\
688 NEAREST_ALIGNED(ctx->cipher_data))
690 #define EVP_CIPHER_block_size_ECB AES_BLOCK_SIZE
691 #define EVP_CIPHER_block_size_CBC AES_BLOCK_SIZE
692 #define EVP_CIPHER_block_size_OFB 1
693 #define EVP_CIPHER_block_size_CFB 1
695 /* Declaring so many ciphers by hand would be a pain.
696 Instead introduce a bit of preprocessor magic :-) */
697 #define DECLARE_AES_EVP(ksize,lmode,umode) \
698 static const EVP_CIPHER padlock_aes_##ksize##_##lmode = { \
699 NID_aes_##ksize##_##lmode, \
700 EVP_CIPHER_block_size_##umode, \
701 AES_KEY_SIZE_##ksize, \
703 0 | EVP_CIPH_##umode##_MODE, \
704 padlock_aes_init_key, \
705 padlock_aes_cipher, \
707 sizeof(struct padlock_cipher_data) + 16, \
708 EVP_CIPHER_set_asn1_iv, \
709 EVP_CIPHER_get_asn1_iv, \
714 DECLARE_AES_EVP(128,ecb,ECB);
715 DECLARE_AES_EVP(128,cbc,CBC);
716 DECLARE_AES_EVP(128,cfb,CFB);
717 DECLARE_AES_EVP(128,ofb,OFB);
719 DECLARE_AES_EVP(192,ecb,ECB);
720 DECLARE_AES_EVP(192,cbc,CBC);
721 DECLARE_AES_EVP(192,cfb,CFB);
722 DECLARE_AES_EVP(192,ofb,OFB);
724 DECLARE_AES_EVP(256,ecb,ECB);
725 DECLARE_AES_EVP(256,cbc,CBC);
726 DECLARE_AES_EVP(256,cfb,CFB);
727 DECLARE_AES_EVP(256,ofb,OFB);
730 padlock_ciphers (ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid)
732 /* No specific cipher => return a list of supported nids ... */
734 *nids = padlock_cipher_nids;
735 return padlock_cipher_nids_num;
738 /* ... or the requested "cipher" otherwise */
740 case NID_aes_128_ecb:
741 *cipher = &padlock_aes_128_ecb;
743 case NID_aes_128_cbc:
744 *cipher = &padlock_aes_128_cbc;
746 case NID_aes_128_cfb:
747 *cipher = &padlock_aes_128_cfb;
749 case NID_aes_128_ofb:
750 *cipher = &padlock_aes_128_ofb;
753 case NID_aes_192_ecb:
754 *cipher = &padlock_aes_192_ecb;
756 case NID_aes_192_cbc:
757 *cipher = &padlock_aes_192_cbc;
759 case NID_aes_192_cfb:
760 *cipher = &padlock_aes_192_cfb;
762 case NID_aes_192_ofb:
763 *cipher = &padlock_aes_192_ofb;
766 case NID_aes_256_ecb:
767 *cipher = &padlock_aes_256_ecb;
769 case NID_aes_256_cbc:
770 *cipher = &padlock_aes_256_cbc;
772 case NID_aes_256_cfb:
773 *cipher = &padlock_aes_256_cfb;
775 case NID_aes_256_ofb:
776 *cipher = &padlock_aes_256_ofb;
780 /* Sorry, we don't support this NID */
788 /* Prepare the encryption key for PadLock usage */
790 padlock_aes_init_key (EVP_CIPHER_CTX *ctx, const unsigned char *key,
791 const unsigned char *iv, int enc)
793 struct padlock_cipher_data *cdata;
794 int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8;
796 if (key==NULL) return 0; /* ERROR */
798 cdata = ALIGNED_CIPHER_DATA(ctx);
799 memset(cdata, 0, sizeof(struct padlock_cipher_data));
801 /* Prepare Control word. */
802 if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE)
803 cdata->cword.b.encdec = 0;
805 cdata->cword.b.encdec = (ctx->encrypt == 0);
806 cdata->cword.b.rounds = 10 + (key_len - 128) / 32;
807 cdata->cword.b.ksize = (key_len - 128) / 64;
811 /* PadLock can generate an extended key for
812 AES128 in hardware */
813 memcpy(cdata->ks.rd_key, key, AES_KEY_SIZE_128);
814 cdata->cword.b.keygen = 0;
819 /* Generate an extended AES key in software.
820 Needed for AES192/AES256 */
821 /* Well, the above applies to Stepping 8 CPUs
822 and is listed as hardware errata. They most
823 likely will fix it at some point and then
824 a check for stepping would be due here. */
825 if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_CFB_MODE ||
826 EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE ||
828 AES_set_encrypt_key(key, key_len, &cdata->ks);
830 AES_set_decrypt_key(key, key_len, &cdata->ks);
832 /* OpenSSL C functions use byte-swapped extended key. */
833 padlock_bswapl(&cdata->ks);
835 cdata->cword.b.keygen = 1;
844 * This is done to cover for cases when user reuses the
845 * context for new key. The catch is that if we don't do
846 * this, padlock_eas_cipher might proceed with old key...
848 padlock_reload_key ();
854 * Simplified version of padlock_aes_cipher() used when
855 * 1) both input and output buffers are at aligned addresses.
857 * 2) running on a newer CPU that doesn't require aligned buffers.
860 padlock_aes_cipher_omnivorous(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
861 const unsigned char *in_arg, size_t nbytes)
863 struct padlock_cipher_data *cdata;
866 cdata = ALIGNED_CIPHER_DATA(ctx);
867 padlock_verify_context(cdata);
869 switch (EVP_CIPHER_CTX_mode(ctx)) {
870 case EVP_CIPH_ECB_MODE:
871 padlock_xcrypt_ecb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
874 case EVP_CIPH_CBC_MODE:
875 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
876 iv = padlock_xcrypt_cbc(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
877 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
880 case EVP_CIPH_CFB_MODE:
881 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
882 iv = padlock_xcrypt_cfb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
883 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
886 case EVP_CIPH_OFB_MODE:
887 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
888 padlock_xcrypt_ofb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
889 memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
896 memset(cdata->iv, 0, AES_BLOCK_SIZE);
901 #ifndef PADLOCK_CHUNK
902 # define PADLOCK_CHUNK 512 /* Must be a power of 2 larger than 16 */
904 #if PADLOCK_CHUNK<16 || PADLOCK_CHUNK&(PADLOCK_CHUNK-1)
905 # error "insane PADLOCK_CHUNK..."
908 /* Re-align the arguments to 16-Bytes boundaries and run the
909 encryption function itself. This function is not AES-specific. */
911 padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
912 const unsigned char *in_arg, size_t nbytes)
914 struct padlock_cipher_data *cdata;
918 int inp_misaligned, out_misaligned, realign_in_loop;
919 size_t chunk, allocated=0;
921 /* ctx->num is maintained in byte-oriented modes,
922 such as CFB and OFB... */
923 if ((chunk = ctx->num)) { /* borrow chunk variable */
924 unsigned char *ivp=ctx->iv;
926 switch (EVP_CIPHER_CTX_mode(ctx)) {
927 case EVP_CIPH_CFB_MODE:
928 if (chunk >= AES_BLOCK_SIZE)
929 return 0; /* bogus value */
932 while (chunk<AES_BLOCK_SIZE && nbytes!=0) {
933 ivp[chunk] = *(out_arg++) = *(in_arg++) ^ ivp[chunk];
936 else while (chunk<AES_BLOCK_SIZE && nbytes!=0) {
937 unsigned char c = *(in_arg++);
938 *(out_arg++) = c ^ ivp[chunk];
939 ivp[chunk++] = c, nbytes--;
942 ctx->num = chunk%AES_BLOCK_SIZE;
944 case EVP_CIPH_OFB_MODE:
945 if (chunk >= AES_BLOCK_SIZE)
946 return 0; /* bogus value */
948 while (chunk<AES_BLOCK_SIZE && nbytes!=0) {
949 *(out_arg++) = *(in_arg++) ^ ivp[chunk];
953 ctx->num = chunk%AES_BLOCK_SIZE;
961 if (nbytes % AES_BLOCK_SIZE)
962 return 0; /* are we expected to do tail processing? */
964 /* nbytes is always multiple of AES_BLOCK_SIZE in ECB and CBC
965 modes and arbitrary value in byte-oriented modes, such as
969 /* VIA promises CPUs that won't require alignment in the future.
970 For now padlock_aes_align_required is initialized to 1 and
971 the condition is never met... */
972 /* C7 core is capable to manage unaligned input in non-ECB[!]
973 mode, but performance penalties appear to be approximately
974 same as for software alignment below or ~3x. They promise to
975 improve it in the future, but for now we can just as well
976 pretend that it can only handle aligned input... */
977 if (!padlock_aes_align_required && (nbytes%AES_BLOCK_SIZE)==0)
978 return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
980 inp_misaligned = (((size_t)in_arg) & 0x0F);
981 out_misaligned = (((size_t)out_arg) & 0x0F);
983 /* Note that even if output is aligned and input not,
984 * I still prefer to loop instead of copy the whole
985 * input and then encrypt in one stroke. This is done
986 * in order to improve L1 cache utilization... */
987 realign_in_loop = out_misaligned|inp_misaligned;
989 if (!realign_in_loop && (nbytes%AES_BLOCK_SIZE)==0)
990 return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
992 /* this takes one "if" out of the loops */
994 chunk %= PADLOCK_CHUNK;
995 if (chunk==0) chunk = PADLOCK_CHUNK;
997 if (out_misaligned) {
998 /* optmize for small input */
999 allocated = (chunk<nbytes?PADLOCK_CHUNK:nbytes);
1000 out = alloca(0x10 + allocated);
1001 out = NEAREST_ALIGNED(out);
1006 cdata = ALIGNED_CIPHER_DATA(ctx);
1007 padlock_verify_context(cdata);
1009 switch (EVP_CIPHER_CTX_mode(ctx)) {
1010 case EVP_CIPH_ECB_MODE:
1013 inp = padlock_memcpy(out, in_arg, chunk);
1018 padlock_xcrypt_ecb(chunk/AES_BLOCK_SIZE, cdata, out, inp);
1021 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1023 out = out_arg+=chunk;
1026 chunk = PADLOCK_CHUNK;
1030 case EVP_CIPH_CBC_MODE:
1031 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1034 if (iv != cdata->iv)
1035 memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
1036 chunk = PADLOCK_CHUNK;
1037 cbc_shortcut: /* optimize for small input */
1039 inp = padlock_memcpy(out, in_arg, chunk);
1044 iv = padlock_xcrypt_cbc(chunk/AES_BLOCK_SIZE, cdata, out, inp);
1047 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1049 out = out_arg+=chunk;
1051 } while (nbytes -= chunk);
1052 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
1055 case EVP_CIPH_CFB_MODE:
1056 memcpy (iv = cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1057 chunk &= ~(AES_BLOCK_SIZE-1);
1058 if (chunk) goto cfb_shortcut;
1059 else goto cfb_skiploop;
1061 if (iv != cdata->iv)
1062 memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
1063 chunk = PADLOCK_CHUNK;
1064 cfb_shortcut: /* optimize for small input */
1066 inp = padlock_memcpy(out, in_arg, chunk);
1071 iv = padlock_xcrypt_cfb(chunk/AES_BLOCK_SIZE, cdata, out, inp);
1074 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1076 out = out_arg+=chunk;
1079 } while (nbytes >= AES_BLOCK_SIZE);
1083 unsigned char *ivp = cdata->iv;
1086 memcpy(ivp, iv, AES_BLOCK_SIZE);
1090 if (cdata->cword.b.encdec) {
1091 cdata->cword.b.encdec=0;
1092 padlock_reload_key();
1093 padlock_xcrypt_ecb(1,cdata,ivp,ivp);
1094 cdata->cword.b.encdec=1;
1095 padlock_reload_key();
1097 unsigned char c = *(in_arg++);
1098 *(out_arg++) = c ^ *ivp;
1099 *(ivp++) = c, nbytes--;
1102 else { padlock_reload_key();
1103 padlock_xcrypt_ecb(1,cdata,ivp,ivp);
1104 padlock_reload_key();
1106 *ivp = *(out_arg++) = *(in_arg++) ^ *ivp;
1112 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
1115 case EVP_CIPH_OFB_MODE:
1116 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1117 chunk &= ~(AES_BLOCK_SIZE-1);
1120 inp = padlock_memcpy(out, in_arg, chunk);
1125 padlock_xcrypt_ofb(chunk/AES_BLOCK_SIZE, cdata, out, inp);
1128 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1130 out = out_arg+=chunk;
1133 chunk = PADLOCK_CHUNK;
1134 } while (nbytes >= AES_BLOCK_SIZE);
1137 unsigned char *ivp = cdata->iv;
1140 padlock_reload_key(); /* empirically found */
1141 padlock_xcrypt_ecb(1,cdata,ivp,ivp);
1142 padlock_reload_key(); /* empirically found */
1144 *(out_arg++) = *(in_arg++) ^ *ivp;
1149 memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
1156 /* Clean the realign buffer if it was used */
1157 if (out_misaligned) {
1158 volatile unsigned long *p=(void *)out;
1159 size_t n = allocated/sizeof(*p);
1163 memset(cdata->iv, 0, AES_BLOCK_SIZE);
1168 #endif /* OPENSSL_NO_AES */
1170 /* ===== Random Number Generator ===== */
1172 * This code is not engaged. The reason is that it does not comply
1173 * with recommendations for VIA RNG usage for secure applications
1174 * (posted at http://www.via.com.tw/en/viac3/c3.jsp) nor does it
1175 * provide meaningful error control...
1177 /* Wrapper that provides an interface between the API and
1178 the raw PadLock RNG */
1180 padlock_rand_bytes(unsigned char *output, int count)
1182 unsigned int eax, buf;
1184 while (count >= 8) {
1185 eax = padlock_xstore(output, 0);
1186 if (!(eax&(1<<6))) return 0; /* RNG disabled */
1187 /* this ---vv--- covers DC bias, Raw Bits and String Filter */
1188 if (eax&(0x1F<<10)) return 0;
1189 if ((eax&0x1F)==0) continue; /* no data, retry... */
1190 if ((eax&0x1F)!=8) return 0; /* fatal failure... */
1195 eax = padlock_xstore(&buf, 3);
1196 if (!(eax&(1<<6))) return 0; /* RNG disabled */
1197 /* this ---vv--- covers DC bias, Raw Bits and String Filter */
1198 if (eax&(0x1F<<10)) return 0;
1199 if ((eax&0x1F)==0) continue; /* no data, retry... */
1200 if ((eax&0x1F)!=1) return 0; /* fatal failure... */
1201 *output++ = (unsigned char)buf;
1204 *(volatile unsigned int *)&buf=0;
1209 /* Dummy but necessary function */
1211 padlock_rand_status(void)
1216 /* Prepare structure for registration */
1217 static RAND_METHOD padlock_rand = {
1219 padlock_rand_bytes, /* bytes */
1222 padlock_rand_bytes, /* pseudorand */
1223 padlock_rand_status, /* rand status */
1226 #else /* !COMPILE_HW_PADLOCK */
1227 #ifndef OPENSSL_NO_DYNAMIC_ENGINE
1229 int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns);
1231 int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns) { return 0; }
1232 IMPLEMENT_DYNAMIC_CHECK_FN()
1234 #endif /* COMPILE_HW_PADLOCK */
1236 #endif /* !OPENSSL_NO_HW_PADLOCK */
1237 #endif /* !OPENSSL_NO_HW */