2 * Support for VIA PadLock Advanced Cryptography Engine (ACE)
3 * Written by Michal Ludvig <michal@logix.cz>
4 * http://www.logix.cz/michal
6 * Big thanks to Andy Polyakov for a help with optimization,
7 * assembler fixes, port to MS Windows and a lot of other
8 * valuable work on this engine!
11 /* ====================================================================
12 * Copyright (c) 1999-2001 The OpenSSL Project. All rights reserved.
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in
23 * the documentation and/or other materials provided with the
26 * 3. All advertising materials mentioning features or use of this
27 * software must display the following acknowledgment:
28 * "This product includes software developed by the OpenSSL Project
29 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
31 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
32 * endorse or promote products derived from this software without
33 * prior written permission. For written permission, please contact
34 * licensing@OpenSSL.org.
36 * 5. Products derived from this software may not be called "OpenSSL"
37 * nor may "OpenSSL" appear in their names without prior written
38 * permission of the OpenSSL Project.
40 * 6. Redistributions of any form whatsoever must retain the following
42 * "This product includes software developed by the OpenSSL Project
43 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
45 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
46 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
48 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
49 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
50 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
51 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
52 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
54 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
55 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
56 * OF THE POSSIBILITY OF SUCH DAMAGE.
57 * ====================================================================
59 * This product includes cryptographic software written by Eric Young
60 * (eay@cryptsoft.com). This product includes software written by Tim
61 * Hudson (tjh@cryptsoft.com).
68 #include <openssl/opensslconf.h>
69 #include <openssl/crypto.h>
70 #include <openssl/dso.h>
71 #include <openssl/engine.h>
72 #include <openssl/evp.h>
73 #ifndef OPENSSL_NO_AES
74 # include <openssl/aes.h>
76 #include <openssl/rand.h>
77 #include <openssl/err.h>
80 # ifndef OPENSSL_NO_HW_PADLOCK
82 /* Attempt to have a single source for both 0.9.7 and 0.9.8 :-) */
83 # if (OPENSSL_VERSION_NUMBER >= 0x00908000L)
84 # ifndef OPENSSL_NO_DYNAMIC_ENGINE
85 # define DYNAMIC_ENGINE
87 # elif (OPENSSL_VERSION_NUMBER >= 0x00907000L)
88 # ifdef ENGINE_DYNAMIC_SUPPORT
89 # define DYNAMIC_ENGINE
92 # error "Only OpenSSL >= 0.9.7 is supported"
96 * VIA PadLock AES is available *ONLY* on some x86 CPUs. Not only that it
97 * doesn't exist elsewhere, but it even can't be compiled on other platforms!
99 * In addition, because of the heavy use of inline assembler, compiler choice
100 * is limited to GCC and Microsoft C.
102 # undef COMPILE_HW_PADLOCK
103 # if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM)
104 # if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \
105 (defined(_MSC_VER) && defined(_M_IX86))
106 # define COMPILE_HW_PADLOCK
110 # ifdef OPENSSL_NO_DYNAMIC_ENGINE
111 # ifdef COMPILE_HW_PADLOCK
112 static ENGINE *ENGINE_padlock(void);
115 void ENGINE_load_padlock(void)
117 /* On non-x86 CPUs it just returns. */
118 # ifdef COMPILE_HW_PADLOCK
119 ENGINE *toadd = ENGINE_padlock();
130 # ifdef COMPILE_HW_PADLOCK
132 * We do these includes here to avoid header problems on platforms that do
133 * not have the VIA padlock anyway...
139 # define alloca _alloca
141 # elif defined(__GNUC__)
143 # define alloca(s) __builtin_alloca(s)
147 /* Function for ENGINE detection and control */
148 static int padlock_available(void);
149 static int padlock_init(ENGINE *e);
152 static RAND_METHOD padlock_rand;
155 # ifndef OPENSSL_NO_AES
156 static int padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher,
157 const int **nids, int nid);
161 static const char *padlock_id = "padlock";
162 static char padlock_name[100];
164 /* Available features */
165 static int padlock_use_ace = 0; /* Advanced Cryptography Engine */
166 static int padlock_use_rng = 0; /* Random Number Generator */
167 # ifndef OPENSSL_NO_AES
168 static int padlock_aes_align_required = 1;
171 /* ===== Engine "management" functions ===== */
173 /* Prepare the ENGINE structure for registration */
174 static int padlock_bind_helper(ENGINE *e)
176 /* Check available features */
179 # if 1 /* disable RNG for now, see commentary in
180 * vicinity of RNG code */
184 /* Generate a nice engine name with available features */
185 BIO_snprintf(padlock_name, sizeof(padlock_name),
186 "VIA PadLock (%s, %s)",
187 padlock_use_rng ? "RNG" : "no-RNG",
188 padlock_use_ace ? "ACE" : "no-ACE");
190 /* Register everything or return with an error */
191 if (!ENGINE_set_id(e, padlock_id) ||
192 !ENGINE_set_name(e, padlock_name) ||
193 !ENGINE_set_init_function(e, padlock_init) ||
194 # ifndef OPENSSL_NO_AES
195 (padlock_use_ace && !ENGINE_set_ciphers(e, padlock_ciphers)) ||
197 (padlock_use_rng && !ENGINE_set_RAND(e, &padlock_rand))) {
201 /* Everything looks good */
205 # ifdef OPENSSL_NO_DYNAMIC_ENGINE
208 static ENGINE *ENGINE_padlock(void)
210 ENGINE *eng = ENGINE_new();
216 if (!padlock_bind_helper(eng)) {
226 /* Check availability of the engine */
227 static int padlock_init(ENGINE *e)
229 return (padlock_use_rng || padlock_use_ace);
233 * This stuff is needed if this ENGINE is being compiled into a
234 * self-contained shared-library.
236 # ifdef DYNAMIC_ENGINE
237 static int padlock_bind_fn(ENGINE *e, const char *id)
239 if (id && (strcmp(id, padlock_id) != 0)) {
243 if (!padlock_bind_helper(e)) {
250 IMPLEMENT_DYNAMIC_CHECK_FN()
251 IMPLEMENT_DYNAMIC_BIND_FN(padlock_bind_fn)
252 # endif /* DYNAMIC_ENGINE */
253 /* ===== Here comes the "real" engine ===== */
254 # ifndef OPENSSL_NO_AES
255 /* Some AES-related constants */
256 # define AES_BLOCK_SIZE 16
257 # define AES_KEY_SIZE_128 16
258 # define AES_KEY_SIZE_192 24
259 # define AES_KEY_SIZE_256 32
261 * Here we store the status information relevant to the current context.
264 * BIG FAT WARNING: Inline assembler in PADLOCK_XCRYPT_ASM() depends on
265 * the order of items in this structure. Don't blindly modify, reorder,
268 struct padlock_cipher_data {
269 unsigned char iv[AES_BLOCK_SIZE]; /* Initialization vector */
274 int dgst:1; /* n/a in C3 */
275 int align:1; /* n/a in C3 */
276 int ciphr:1; /* n/a in C3 */
277 unsigned int keygen:1;
279 unsigned int encdec:1;
282 } cword; /* Control word */
283 AES_KEY ks; /* Encryption key */
287 * Essentially this variable belongs in thread local storage.
288 * Having this variable global on the other hand can only cause
289 * few bogus key reloads [if any at all on single-CPU system],
290 * so we accept the penatly...
292 static volatile struct padlock_cipher_data *padlock_saved_context;
296 * =======================================================
297 * Inline assembler section(s).
298 * =======================================================
299 * Order of arguments is chosen to facilitate Windows port
300 * using __fastcall calling convention. If you wish to add
301 * more routines, keep in mind that first __fastcall
302 * argument is passed in %ecx and second - in %edx.
303 * =======================================================
305 # if defined(__GNUC__) && __GNUC__>=2
307 * As for excessive "push %ebx"/"pop %ebx" found all over.
308 * When generating position-independent code GCC won't let
309 * us use "b" in assembler templates nor even respect "ebx"
310 * in "clobber description." Therefore the trouble...
314 * Helper function - check if a CPUID instruction is available on this CPU
316 static int padlock_insn_cpuid_available(void)
321 * We're checking if the bit #21 of EFLAGS can be toggled. If yes =
322 * CPUID is available.
324 asm volatile ("pushf\n"
326 "xorl $0x200000, %%eax\n"
327 "movl %%eax, %%ecx\n"
328 "andl $0x200000, %%ecx\n"
333 "andl $0x200000, %%eax\n"
334 "xorl %%eax, %%ecx\n"
335 "movl %%ecx, %0\n":"=r" (result)::"eax", "ecx");
337 return (result == 0);
341 * Load supported features of the CPU to see if the PadLock is available.
343 static int padlock_available(void)
345 char vendor_string[16];
346 unsigned int eax, edx;
348 /* First check if the CPUID instruction is available at all... */
349 if (!padlock_insn_cpuid_available())
352 /* Are we running on the Centaur (VIA) CPU? */
354 vendor_string[12] = 0;
355 asm volatile ("pushl %%ebx\n"
357 "movl %%ebx,(%%edi)\n"
358 "movl %%edx,4(%%edi)\n"
359 "movl %%ecx,8(%%edi)\n"
360 "popl %%ebx":"+a" (eax):"D"(vendor_string):"ecx", "edx");
361 if (strcmp(vendor_string, "CentaurHauls") != 0)
364 /* Check for Centaur Extended Feature Flags presence */
366 asm volatile ("pushl %%ebx; cpuid; popl %%ebx":"+a" (eax)::"ecx", "edx");
367 if (eax < 0xC0000001)
370 /* Read the Centaur Extended Feature Flags */
372 asm volatile ("pushl %%ebx; cpuid; popl %%ebx":"+a" (eax),
375 /* Fill up some flags */
376 padlock_use_ace = ((edx & (0x3 << 6)) == (0x3 << 6));
377 padlock_use_rng = ((edx & (0x3 << 2)) == (0x3 << 2));
379 return padlock_use_ace + padlock_use_rng;
382 # ifndef OPENSSL_NO_AES
384 /* Our own htonl()/ntohl() */
385 static inline void padlock_bswapl(AES_KEY *ks)
387 size_t i = sizeof(ks->rd_key) / sizeof(ks->rd_key[0]);
388 unsigned int *key = ks->rd_key;
391 asm volatile ("bswapl %0":"+r" (*key));
399 * Force key reload from memory to the CPU microcode. Loading EFLAGS from the
400 * stack clears EFLAGS[30] which does the trick.
402 static inline void padlock_reload_key(void)
404 asm volatile ("pushfl; popfl");
407 # ifndef OPENSSL_NO_AES
409 * This is heuristic key context tracing. At first one
410 * believes that one should use atomic swap instructions,
411 * but it's not actually necessary. Point is that if
412 * padlock_saved_context was changed by another thread
413 * after we've read it and before we compare it with cdata,
414 * our key *shall* be reloaded upon thread context switch
415 * and we are therefore set in either case...
417 static inline void padlock_verify_context(struct padlock_cipher_data *cdata)
419 asm volatile ("pushfl\n"
427 " movl %2,%0":"+m" (padlock_saved_context)
428 :"r"(padlock_saved_context), "r"(cdata):"cc");
431 /* Template for padlock_xcrypt_* modes */
433 * BIG FAT WARNING: The offsets used with 'leal' instructions describe items
434 * of the 'padlock_cipher_data' structure.
436 # define PADLOCK_XCRYPT_ASM(name,rep_xcrypt) \
437 static inline void *name(size_t cnt, \
438 struct padlock_cipher_data *cdata, \
439 void *out, const void *inp) \
441 asm volatile ( "pushl %%ebx\n" \
442 " leal 16(%0),%%edx\n" \
443 " leal 32(%0),%%ebx\n" \
446 : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \
447 : "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \
448 : "edx", "cc", "memory"); \
452 /* Generate all functions with appropriate opcodes */
454 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8")
456 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0")
458 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0")
460 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8")
462 /* The RNG call itself */
463 static inline unsigned int padlock_xstore(void *addr, unsigned int edx_in)
465 unsigned int eax_out;
467 asm volatile (".byte 0x0f,0xa7,0xc0" /* xstore */
468 :"=a" (eax_out), "=m"(*(unsigned *)addr)
469 :"D"(addr), "d"(edx_in)
476 * Why not inline 'rep movsd'? I failed to find information on what value in
477 * Direction Flag one can expect and consequently have to apply
478 * "better-safe-than-sorry" approach and assume "undefined." I could
479 * explicitly clear it and restore the original value upon return from
480 * padlock_aes_cipher, but it's presumably too much trouble for too little
481 * gain... In case you wonder 'rep xcrypt*' instructions above are *not*
482 * affected by the Direction Flag and pointers advance toward larger
483 * addresses unconditionally.
485 static inline unsigned char *padlock_memcpy(void *dst, const void *src,
499 # elif defined(_MSC_VER)
501 * Unlike GCC these are real functions. In order to minimize impact
502 * on performance we adhere to __fastcall calling convention in
503 * order to get two first arguments passed through %ecx and %edx.
504 * Which kind of suits very well, as instructions in question use
505 * both %ecx and %edx as input:-)
507 # define REP_XCRYPT(code) \
509 _asm _emit 0x0f _asm _emit 0xa7 \
513 * BIG FAT WARNING: The offsets used with 'lea' instructions describe items
514 * of the 'padlock_cipher_data' structure.
516 # define PADLOCK_XCRYPT_ASM(name,code) \
517 static void * __fastcall \
518 name (size_t cnt, void *cdata, \
519 void *outp, const void *inp) \
521 _asm lea edx,[eax+16] \
522 _asm lea ebx,[eax+32] \
528 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, 0xc8)
529 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, 0xd0)
530 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, 0xe0)
531 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, 0xe8)
533 static int __fastcall padlock_xstore(void *outp, unsigned int code)
536 _asm _emit 0x0f _asm _emit 0xa7 _asm _emit 0xc0}
537 static void __fastcall padlock_reload_key(void)
539 _asm pushfd _asm popfd}
540 static void __fastcall padlock_verify_context(void *cdata)
543 pushfd bt DWORD PTR[esp], 30 jnc skip cmp ecx,
544 padlock_saved_context je skip popfd sub esp,
545 4 skip:add esp, 4 mov padlock_saved_context,
546 ecx}} static int padlock_available(void)
549 pushfd pop eax mov ecx, eax xor eax,
550 1 << 21 push eax popfd pushfd pop eax xor eax, ecx bt eax,
551 21 jnc noluck mov eax, 0 cpuid xor eax, eax cmp ebx,
552 'tneC' jne noluck cmp edx, 'Hrua' jne noluck cmp ecx,
553 'slua' jne noluck mov eax, 0xC0000000 cpuid mov edx,
554 eax xor eax, eax cmp edx, 0xC0000001 jb noluck mov eax,
555 0xC0000001 cpuid xor eax, eax bt edx, 6 jnc skip_a bt edx,
556 7 jnc skip_a mov padlock_use_ace, 1 inc eax skip_a:bt edx,
557 2 jnc skip_r bt edx, 3 jnc skip_r mov padlock_use_rng,
558 1 inc eax skip_r:noluck:}} static void __fastcall
559 padlock_bswapl(void *key)
562 pushfd cld mov esi, ecx mov edi, ecx mov ecx, 60 up:lodsd
563 bswap eax stosd loop up popfd}}
565 * MS actually specifies status of Direction Flag and compiler even manages
566 * to compile following as 'rep movsd' all by itself...
568 # define padlock_memcpy(o,i,n) ((unsigned char *)memcpy((o),(i),(n)&~3U))
570 /* ===== AES encryption/decryption ===== */
571 # ifndef OPENSSL_NO_AES
572 # if defined(NID_aes_128_cfb128) && ! defined (NID_aes_128_cfb)
573 # define NID_aes_128_cfb NID_aes_128_cfb128
575 # if defined(NID_aes_128_ofb128) && ! defined (NID_aes_128_ofb)
576 # define NID_aes_128_ofb NID_aes_128_ofb128
578 # if defined(NID_aes_192_cfb128) && ! defined (NID_aes_192_cfb)
579 # define NID_aes_192_cfb NID_aes_192_cfb128
581 # if defined(NID_aes_192_ofb128) && ! defined (NID_aes_192_ofb)
582 # define NID_aes_192_ofb NID_aes_192_ofb128
584 # if defined(NID_aes_256_cfb128) && ! defined (NID_aes_256_cfb)
585 # define NID_aes_256_cfb NID_aes_256_cfb128
587 # if defined(NID_aes_256_ofb128) && ! defined (NID_aes_256_ofb)
588 # define NID_aes_256_ofb NID_aes_256_ofb128
591 * List of supported ciphers.
592 */ static int padlock_cipher_nids[] = {
609 static int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids) /
610 sizeof(padlock_cipher_nids[0]));
612 /* Function prototypes ... */
613 static int padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
614 const unsigned char *iv, int enc);
615 static int padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
616 const unsigned char *in, size_t nbytes);
618 # define NEAREST_ALIGNED(ptr) ( (unsigned char *)(ptr) + \
619 ( (0x10 - ((size_t)(ptr) & 0x0F)) & 0x0F ) )
620 # define ALIGNED_CIPHER_DATA(ctx) ((struct padlock_cipher_data *)\
621 NEAREST_ALIGNED(ctx->cipher_data))
623 # define EVP_CIPHER_block_size_ECB AES_BLOCK_SIZE
624 # define EVP_CIPHER_block_size_CBC AES_BLOCK_SIZE
625 # define EVP_CIPHER_block_size_OFB 1
626 # define EVP_CIPHER_block_size_CFB 1
629 * Declaring so many ciphers by hand would be a pain. Instead introduce a bit
630 * of preprocessor magic :-)
632 # define DECLARE_AES_EVP(ksize,lmode,umode) \
633 static const EVP_CIPHER padlock_aes_##ksize##_##lmode = { \
634 NID_aes_##ksize##_##lmode, \
635 EVP_CIPHER_block_size_##umode, \
636 AES_KEY_SIZE_##ksize, \
638 0 | EVP_CIPH_##umode##_MODE, \
639 padlock_aes_init_key, \
640 padlock_aes_cipher, \
642 sizeof(struct padlock_cipher_data) + 16, \
643 EVP_CIPHER_set_asn1_iv, \
644 EVP_CIPHER_get_asn1_iv, \
649 DECLARE_AES_EVP(128, ecb, ECB);
650 DECLARE_AES_EVP(128, cbc, CBC);
651 DECLARE_AES_EVP(128, cfb, CFB);
652 DECLARE_AES_EVP(128, ofb, OFB);
654 DECLARE_AES_EVP(192, ecb, ECB);
655 DECLARE_AES_EVP(192, cbc, CBC);
656 DECLARE_AES_EVP(192, cfb, CFB);
657 DECLARE_AES_EVP(192, ofb, OFB);
659 DECLARE_AES_EVP(256, ecb, ECB);
660 DECLARE_AES_EVP(256, cbc, CBC);
661 DECLARE_AES_EVP(256, cfb, CFB);
662 DECLARE_AES_EVP(256, ofb, OFB);
665 padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, const int **nids,
668 /* No specific cipher => return a list of supported nids ... */
670 *nids = padlock_cipher_nids;
671 return padlock_cipher_nids_num;
674 /* ... or the requested "cipher" otherwise */
676 case NID_aes_128_ecb:
677 *cipher = &padlock_aes_128_ecb;
679 case NID_aes_128_cbc:
680 *cipher = &padlock_aes_128_cbc;
682 case NID_aes_128_cfb:
683 *cipher = &padlock_aes_128_cfb;
685 case NID_aes_128_ofb:
686 *cipher = &padlock_aes_128_ofb;
689 case NID_aes_192_ecb:
690 *cipher = &padlock_aes_192_ecb;
692 case NID_aes_192_cbc:
693 *cipher = &padlock_aes_192_cbc;
695 case NID_aes_192_cfb:
696 *cipher = &padlock_aes_192_cfb;
698 case NID_aes_192_ofb:
699 *cipher = &padlock_aes_192_ofb;
702 case NID_aes_256_ecb:
703 *cipher = &padlock_aes_256_ecb;
705 case NID_aes_256_cbc:
706 *cipher = &padlock_aes_256_cbc;
708 case NID_aes_256_cfb:
709 *cipher = &padlock_aes_256_cfb;
711 case NID_aes_256_ofb:
712 *cipher = &padlock_aes_256_ofb;
716 /* Sorry, we don't support this NID */
724 /* Prepare the encryption key for PadLock usage */
726 padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
727 const unsigned char *iv, int enc)
729 struct padlock_cipher_data *cdata;
730 int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8;
733 return 0; /* ERROR */
735 cdata = ALIGNED_CIPHER_DATA(ctx);
736 memset(cdata, 0, sizeof(struct padlock_cipher_data));
738 /* Prepare Control word. */
739 if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE)
740 cdata->cword.b.encdec = 0;
742 cdata->cword.b.encdec = (ctx->encrypt == 0);
743 cdata->cword.b.rounds = 10 + (key_len - 128) / 32;
744 cdata->cword.b.ksize = (key_len - 128) / 64;
749 * PadLock can generate an extended key for AES128 in hardware
751 memcpy(cdata->ks.rd_key, key, AES_KEY_SIZE_128);
752 cdata->cword.b.keygen = 0;
758 * Generate an extended AES key in software. Needed for AES192/AES256
761 * Well, the above applies to Stepping 8 CPUs and is listed as
762 * hardware errata. They most likely will fix it at some point and
763 * then a check for stepping would be due here.
765 if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_CFB_MODE ||
766 EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE || enc)
767 AES_set_encrypt_key(key, key_len, &cdata->ks);
769 AES_set_decrypt_key(key, key_len, &cdata->ks);
772 * OpenSSL C functions use byte-swapped extended key.
774 padlock_bswapl(&cdata->ks);
776 cdata->cword.b.keygen = 1;
785 * This is done to cover for cases when user reuses the
786 * context for new key. The catch is that if we don't do
787 * this, padlock_eas_cipher might proceed with old key...
789 padlock_reload_key();
795 * Simplified version of padlock_aes_cipher() used when
796 * 1) both input and output buffers are at aligned addresses.
798 * 2) running on a newer CPU that doesn't require aligned buffers.
801 padlock_aes_cipher_omnivorous(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
802 const unsigned char *in_arg, size_t nbytes)
804 struct padlock_cipher_data *cdata;
807 cdata = ALIGNED_CIPHER_DATA(ctx);
808 padlock_verify_context(cdata);
810 switch (EVP_CIPHER_CTX_mode(ctx)) {
811 case EVP_CIPH_ECB_MODE:
812 padlock_xcrypt_ecb(nbytes / AES_BLOCK_SIZE, cdata, out_arg, in_arg);
815 case EVP_CIPH_CBC_MODE:
816 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
817 iv = padlock_xcrypt_cbc(nbytes / AES_BLOCK_SIZE, cdata, out_arg,
819 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
822 case EVP_CIPH_CFB_MODE:
823 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
824 iv = padlock_xcrypt_cfb(nbytes / AES_BLOCK_SIZE, cdata, out_arg,
826 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
829 case EVP_CIPH_OFB_MODE:
830 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
831 padlock_xcrypt_ofb(nbytes / AES_BLOCK_SIZE, cdata, out_arg, in_arg);
832 memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
839 memset(cdata->iv, 0, AES_BLOCK_SIZE);
844 # ifndef PADLOCK_CHUNK
845 # define PADLOCK_CHUNK 512 /* Must be a power of 2 larger than 16 */
847 # if PADLOCK_CHUNK<16 || PADLOCK_CHUNK&(PADLOCK_CHUNK-1)
848 # error "insane PADLOCK_CHUNK..."
852 * Re-align the arguments to 16-Bytes boundaries and run the encryption
853 * function itself. This function is not AES-specific.
856 padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
857 const unsigned char *in_arg, size_t nbytes)
859 struct padlock_cipher_data *cdata;
863 int inp_misaligned, out_misaligned, realign_in_loop;
864 size_t chunk, allocated = 0;
867 * ctx->num is maintained in byte-oriented modes, such as CFB and OFB...
869 if ((chunk = ctx->num)) { /* borrow chunk variable */
870 unsigned char *ivp = ctx->iv;
872 switch (EVP_CIPHER_CTX_mode(ctx)) {
873 case EVP_CIPH_CFB_MODE:
874 if (chunk >= AES_BLOCK_SIZE)
875 return 0; /* bogus value */
878 while (chunk < AES_BLOCK_SIZE && nbytes != 0) {
879 ivp[chunk] = *(out_arg++) = *(in_arg++) ^ ivp[chunk];
882 while (chunk < AES_BLOCK_SIZE && nbytes != 0) {
883 unsigned char c = *(in_arg++);
884 *(out_arg++) = c ^ ivp[chunk];
885 ivp[chunk++] = c, nbytes--;
888 ctx->num = chunk % AES_BLOCK_SIZE;
890 case EVP_CIPH_OFB_MODE:
891 if (chunk >= AES_BLOCK_SIZE)
892 return 0; /* bogus value */
894 while (chunk < AES_BLOCK_SIZE && nbytes != 0) {
895 *(out_arg++) = *(in_arg++) ^ ivp[chunk];
899 ctx->num = chunk % AES_BLOCK_SIZE;
907 if (nbytes % AES_BLOCK_SIZE)
908 return 0; /* are we expected to do tail processing? */
911 * nbytes is always multiple of AES_BLOCK_SIZE in ECB and CBC modes and
912 * arbitrary value in byte-oriented modes, such as CFB and OFB...
917 * VIA promises CPUs that won't require alignment in the future. For now
918 * padlock_aes_align_required is initialized to 1 and the condition is
922 * C7 core is capable to manage unaligned input in non-ECB[!] mode, but
923 * performance penalties appear to be approximately same as for software
924 * alignment below or ~3x. They promise to improve it in the future, but
925 * for now we can just as well pretend that it can only handle aligned
928 if (!padlock_aes_align_required && (nbytes % AES_BLOCK_SIZE) == 0)
929 return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
931 inp_misaligned = (((size_t)in_arg) & 0x0F);
932 out_misaligned = (((size_t)out_arg) & 0x0F);
935 * Note that even if output is aligned and input not, I still prefer to
936 * loop instead of copy the whole input and then encrypt in one stroke.
937 * This is done in order to improve L1 cache utilization...
939 realign_in_loop = out_misaligned | inp_misaligned;
941 if (!realign_in_loop && (nbytes % AES_BLOCK_SIZE) == 0)
942 return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
944 /* this takes one "if" out of the loops */
946 chunk %= PADLOCK_CHUNK;
948 chunk = PADLOCK_CHUNK;
950 if (out_misaligned) {
951 /* optmize for small input */
952 allocated = (chunk < nbytes ? PADLOCK_CHUNK : nbytes);
953 out = alloca(0x10 + allocated);
954 out = NEAREST_ALIGNED(out);
958 cdata = ALIGNED_CIPHER_DATA(ctx);
959 padlock_verify_context(cdata);
961 switch (EVP_CIPHER_CTX_mode(ctx)) {
962 case EVP_CIPH_ECB_MODE:
965 inp = padlock_memcpy(out, in_arg, chunk);
970 padlock_xcrypt_ecb(chunk / AES_BLOCK_SIZE, cdata, out, inp);
973 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
975 out = out_arg += chunk;
978 chunk = PADLOCK_CHUNK;
982 case EVP_CIPH_CBC_MODE:
983 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
987 memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
988 chunk = PADLOCK_CHUNK;
989 cbc_shortcut: /* optimize for small input */
991 inp = padlock_memcpy(out, in_arg, chunk);
996 iv = padlock_xcrypt_cbc(chunk / AES_BLOCK_SIZE, cdata, out, inp);
999 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1001 out = out_arg += chunk;
1003 } while (nbytes -= chunk);
1004 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
1007 case EVP_CIPH_CFB_MODE:
1008 memcpy(iv = cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1009 chunk &= ~(AES_BLOCK_SIZE - 1);
1015 if (iv != cdata->iv)
1016 memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
1017 chunk = PADLOCK_CHUNK;
1018 cfb_shortcut: /* optimize for small input */
1020 inp = padlock_memcpy(out, in_arg, chunk);
1025 iv = padlock_xcrypt_cfb(chunk / AES_BLOCK_SIZE, cdata, out, inp);
1028 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1030 out = out_arg += chunk;
1033 } while (nbytes >= AES_BLOCK_SIZE);
1037 unsigned char *ivp = cdata->iv;
1040 memcpy(ivp, iv, AES_BLOCK_SIZE);
1044 if (cdata->cword.b.encdec) {
1045 cdata->cword.b.encdec = 0;
1046 padlock_reload_key();
1047 padlock_xcrypt_ecb(1, cdata, ivp, ivp);
1048 cdata->cword.b.encdec = 1;
1049 padlock_reload_key();
1051 unsigned char c = *(in_arg++);
1052 *(out_arg++) = c ^ *ivp;
1053 *(ivp++) = c, nbytes--;
1056 padlock_reload_key();
1057 padlock_xcrypt_ecb(1, cdata, ivp, ivp);
1058 padlock_reload_key();
1060 *ivp = *(out_arg++) = *(in_arg++) ^ *ivp;
1066 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
1069 case EVP_CIPH_OFB_MODE:
1070 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1071 chunk &= ~(AES_BLOCK_SIZE - 1);
1075 inp = padlock_memcpy(out, in_arg, chunk);
1080 padlock_xcrypt_ofb(chunk / AES_BLOCK_SIZE, cdata, out, inp);
1083 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1085 out = out_arg += chunk;
1088 chunk = PADLOCK_CHUNK;
1089 } while (nbytes >= AES_BLOCK_SIZE);
1092 unsigned char *ivp = cdata->iv;
1095 padlock_reload_key(); /* empirically found */
1096 padlock_xcrypt_ecb(1, cdata, ivp, ivp);
1097 padlock_reload_key(); /* empirically found */
1099 *(out_arg++) = *(in_arg++) ^ *ivp;
1104 memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
1111 /* Clean the realign buffer if it was used */
1112 if (out_misaligned) {
1113 volatile unsigned long *p = (void *)out;
1114 size_t n = allocated / sizeof(*p);
1119 memset(cdata->iv, 0, AES_BLOCK_SIZE);
1124 # endif /* OPENSSL_NO_AES */
1126 /* ===== Random Number Generator ===== */
1128 * This code is not engaged. The reason is that it does not comply
1129 * with recommendations for VIA RNG usage for secure applications
1130 * (posted at http://www.via.com.tw/en/viac3/c3.jsp) nor does it
1131 * provide meaningful error control...
1134 * Wrapper that provides an interface between the API and the raw PadLock
1137 static int padlock_rand_bytes(unsigned char *output, int count)
1139 unsigned int eax, buf;
1141 while (count >= 8) {
1142 eax = padlock_xstore(output, 0);
1143 if (!(eax & (1 << 6)))
1144 return 0; /* RNG disabled */
1145 /* this ---vv--- covers DC bias, Raw Bits and String Filter */
1146 if (eax & (0x1F << 10))
1148 if ((eax & 0x1F) == 0)
1149 continue; /* no data, retry... */
1150 if ((eax & 0x1F) != 8)
1151 return 0; /* fatal failure... */
1156 eax = padlock_xstore(&buf, 3);
1157 if (!(eax & (1 << 6)))
1158 return 0; /* RNG disabled */
1159 /* this ---vv--- covers DC bias, Raw Bits and String Filter */
1160 if (eax & (0x1F << 10))
1162 if ((eax & 0x1F) == 0)
1163 continue; /* no data, retry... */
1164 if ((eax & 0x1F) != 1)
1165 return 0; /* fatal failure... */
1166 *output++ = (unsigned char)buf;
1169 *(volatile unsigned int *)&buf = 0;
1174 /* Dummy but necessary function */
1175 static int padlock_rand_status(void)
1180 /* Prepare structure for registration */
1181 static RAND_METHOD padlock_rand = {
1183 padlock_rand_bytes, /* bytes */
1186 padlock_rand_bytes, /* pseudorand */
1187 padlock_rand_status, /* rand status */
1190 # else /* !COMPILE_HW_PADLOCK */
1191 # ifndef OPENSSL_NO_DYNAMIC_ENGINE
1193 int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns);
1195 int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns)
1200 IMPLEMENT_DYNAMIC_CHECK_FN()
1202 # endif /* COMPILE_HW_PADLOCK */
1203 # endif /* !OPENSSL_NO_HW_PADLOCK */
1204 #endif /* !OPENSSL_NO_HW */