2 * Support for VIA PadLock Advanced Cryptography Engine (ACE)
3 * Written by Michal Ludvig <michal@logix.cz>
4 * http://www.logix.cz/michal
6 * Big thanks to Andy Polyakov for a help with optimization,
7 * assembler fixes, port to MS Windows and a lot of other
8 * valuable work on this engine!
11 /* ====================================================================
12 * Copyright (c) 1999-2001 The OpenSSL Project. All rights reserved.
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in
23 * the documentation and/or other materials provided with the
26 * 3. All advertising materials mentioning features or use of this
27 * software must display the following acknowledgment:
28 * "This product includes software developed by the OpenSSL Project
29 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
31 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
32 * endorse or promote products derived from this software without
33 * prior written permission. For written permission, please contact
34 * licensing@OpenSSL.org.
36 * 5. Products derived from this software may not be called "OpenSSL"
37 * nor may "OpenSSL" appear in their names without prior written
38 * permission of the OpenSSL Project.
40 * 6. Redistributions of any form whatsoever must retain the following
42 * "This product includes software developed by the OpenSSL Project
43 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
45 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
46 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
48 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
49 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
50 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
51 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
52 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
54 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
55 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
56 * OF THE POSSIBILITY OF SUCH DAMAGE.
57 * ====================================================================
59 * This product includes cryptographic software written by Eric Young
60 * (eay@cryptsoft.com). This product includes software written by Tim
61 * Hudson (tjh@cryptsoft.com).
68 #include <openssl/opensslconf.h>
69 #include <openssl/crypto.h>
70 #include <openssl/dso.h>
71 #include <openssl/engine.h>
72 #include <openssl/evp.h>
73 #ifndef OPENSSL_NO_AES
74 # include <openssl/aes.h>
76 #include <openssl/rand.h>
77 #include <openssl/err.h>
80 # ifndef OPENSSL_NO_HW_PADLOCK
82 /* Attempt to have a single source for both 0.9.7 and 0.9.8 :-) */
83 # if (OPENSSL_VERSION_NUMBER >= 0x00908000L)
84 # ifndef OPENSSL_NO_DYNAMIC_ENGINE
85 # define DYNAMIC_ENGINE
87 # elif (OPENSSL_VERSION_NUMBER >= 0x00907000L)
88 # ifdef ENGINE_DYNAMIC_SUPPORT
89 # define DYNAMIC_ENGINE
92 # error "Only OpenSSL >= 0.9.7 is supported"
96 * VIA PadLock AES is available *ONLY* on some x86 CPUs. Not only that it
97 * doesn't exist elsewhere, but it even can't be compiled on other platforms!
99 * In addition, because of the heavy use of inline assembler, compiler choice
100 * is limited to GCC and Microsoft C.
102 # undef COMPILE_HW_PADLOCK
103 # if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM)
104 # if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \
105 (defined(_MSC_VER) && defined(_M_IX86))
106 # define COMPILE_HW_PADLOCK
107 static ENGINE *ENGINE_padlock(void);
111 void ENGINE_load_padlock(void)
113 /* On non-x86 CPUs it just returns. */
114 # ifdef COMPILE_HW_PADLOCK
115 ENGINE *toadd = ENGINE_padlock();
124 # ifdef COMPILE_HW_PADLOCK
126 * We do these includes here to avoid header problems on platforms that do
127 * not have the VIA padlock anyway...
131 # define alloca _alloca
132 # elif defined(NETWARE_CLIB) && defined(__GNUC__)
133 void *alloca(size_t);
134 # define alloca(s) __builtin_alloca(s)
139 /* Function for ENGINE detection and control */
140 static int padlock_available(void);
141 static int padlock_init(ENGINE *e);
144 static RAND_METHOD padlock_rand;
147 # ifndef OPENSSL_NO_AES
148 static int padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher,
149 const int **nids, int nid);
153 static const char *padlock_id = "padlock";
154 static char padlock_name[100];
156 /* Available features */
157 static int padlock_use_ace = 0; /* Advanced Cryptography Engine */
158 static int padlock_use_rng = 0; /* Random Number Generator */
159 # ifndef OPENSSL_NO_AES
160 static int padlock_aes_align_required = 1;
163 /* ===== Engine "management" functions ===== */
165 /* Prepare the ENGINE structure for registration */
166 static int padlock_bind_helper(ENGINE *e)
168 /* Check available features */
171 # if 1 /* disable RNG for now, see commentary in
172 * vicinity of RNG code */
176 /* Generate a nice engine name with available features */
177 BIO_snprintf(padlock_name, sizeof(padlock_name),
178 "VIA PadLock (%s, %s)",
179 padlock_use_rng ? "RNG" : "no-RNG",
180 padlock_use_ace ? "ACE" : "no-ACE");
182 /* Register everything or return with an error */
183 if (!ENGINE_set_id(e, padlock_id) ||
184 !ENGINE_set_name(e, padlock_name) ||
185 !ENGINE_set_init_function(e, padlock_init) ||
186 # ifndef OPENSSL_NO_AES
187 (padlock_use_ace && !ENGINE_set_ciphers(e, padlock_ciphers)) ||
189 (padlock_use_rng && !ENGINE_set_RAND(e, &padlock_rand))) {
193 /* Everything looks good */
198 static ENGINE *ENGINE_padlock(void)
200 ENGINE *eng = ENGINE_new();
206 if (!padlock_bind_helper(eng)) {
214 /* Check availability of the engine */
215 static int padlock_init(ENGINE *e)
217 return (padlock_use_rng || padlock_use_ace);
221 * This stuff is needed if this ENGINE is being compiled into a
222 * self-contained shared-library.
224 # ifdef DYNAMIC_ENGINE
225 static int padlock_bind_fn(ENGINE *e, const char *id)
227 if (id && (strcmp(id, padlock_id) != 0)) {
231 if (!padlock_bind_helper(e)) {
238 IMPLEMENT_DYNAMIC_CHECK_FN()
239 IMPLEMENT_DYNAMIC_BIND_FN(padlock_bind_fn)
240 # endif /* DYNAMIC_ENGINE */
241 /* ===== Here comes the "real" engine ===== */
242 # ifndef OPENSSL_NO_AES
243 /* Some AES-related constants */
244 # define AES_BLOCK_SIZE 16
245 # define AES_KEY_SIZE_128 16
246 # define AES_KEY_SIZE_192 24
247 # define AES_KEY_SIZE_256 32
249 * Here we store the status information relevant to the current context.
252 * BIG FAT WARNING: Inline assembler in PADLOCK_XCRYPT_ASM() depends on
253 * the order of items in this structure. Don't blindly modify, reorder,
256 struct padlock_cipher_data {
257 unsigned char iv[AES_BLOCK_SIZE]; /* Initialization vector */
262 int dgst:1; /* n/a in C3 */
263 int align:1; /* n/a in C3 */
264 int ciphr:1; /* n/a in C3 */
265 unsigned int keygen:1;
267 unsigned int encdec:1;
270 } cword; /* Control word */
271 AES_KEY ks; /* Encryption key */
275 * Essentially this variable belongs in thread local storage.
276 * Having this variable global on the other hand can only cause
277 * few bogus key reloads [if any at all on single-CPU system],
278 * so we accept the penatly...
280 static volatile struct padlock_cipher_data *padlock_saved_context;
284 * =======================================================
285 * Inline assembler section(s).
286 * =======================================================
287 * Order of arguments is chosen to facilitate Windows port
288 * using __fastcall calling convention. If you wish to add
289 * more routines, keep in mind that first __fastcall
290 * argument is passed in %ecx and second - in %edx.
291 * =======================================================
293 # if defined(__GNUC__) && __GNUC__>=2
295 * As for excessive "push %ebx"/"pop %ebx" found all over.
296 * When generating position-independent code GCC won't let
297 * us use "b" in assembler templates nor even respect "ebx"
298 * in "clobber description." Therefore the trouble...
302 * Helper function - check if a CPUID instruction is available on this CPU
304 static int padlock_insn_cpuid_available(void)
309 * We're checking if the bit #21 of EFLAGS can be toggled. If yes =
310 * CPUID is available.
312 asm volatile ("pushf\n"
314 "xorl $0x200000, %%eax\n"
315 "movl %%eax, %%ecx\n"
316 "andl $0x200000, %%ecx\n"
321 "andl $0x200000, %%eax\n"
322 "xorl %%eax, %%ecx\n"
323 "movl %%ecx, %0\n":"=r" (result)::"eax", "ecx");
325 return (result == 0);
329 * Load supported features of the CPU to see if the PadLock is available.
331 static int padlock_available(void)
333 char vendor_string[16];
334 unsigned int eax, edx;
336 /* First check if the CPUID instruction is available at all... */
337 if (!padlock_insn_cpuid_available())
340 /* Are we running on the Centaur (VIA) CPU? */
342 vendor_string[12] = 0;
343 asm volatile ("pushl %%ebx\n"
345 "movl %%ebx,(%%edi)\n"
346 "movl %%edx,4(%%edi)\n"
347 "movl %%ecx,8(%%edi)\n"
348 "popl %%ebx":"+a" (eax):"D"(vendor_string):"ecx", "edx");
349 if (strcmp(vendor_string, "CentaurHauls") != 0)
352 /* Check for Centaur Extended Feature Flags presence */
354 asm volatile ("pushl %%ebx; cpuid; popl %%ebx":"+a" (eax)::"ecx", "edx");
355 if (eax < 0xC0000001)
358 /* Read the Centaur Extended Feature Flags */
360 asm volatile ("pushl %%ebx; cpuid; popl %%ebx":"+a" (eax),
363 /* Fill up some flags */
364 padlock_use_ace = ((edx & (0x3 << 6)) == (0x3 << 6));
365 padlock_use_rng = ((edx & (0x3 << 2)) == (0x3 << 2));
367 return padlock_use_ace + padlock_use_rng;
370 # ifndef OPENSSL_NO_AES
371 /* Our own htonl()/ntohl() */
372 static inline void padlock_bswapl(AES_KEY *ks)
374 size_t i = sizeof(ks->rd_key) / sizeof(ks->rd_key[0]);
375 unsigned int *key = ks->rd_key;
378 asm volatile ("bswapl %0":"+r" (*key));
385 * Force key reload from memory to the CPU microcode. Loading EFLAGS from the
386 * stack clears EFLAGS[30] which does the trick.
388 static inline void padlock_reload_key(void)
390 asm volatile ("pushfl; popfl");
393 # ifndef OPENSSL_NO_AES
395 * This is heuristic key context tracing. At first one
396 * believes that one should use atomic swap instructions,
397 * but it's not actually necessary. Point is that if
398 * padlock_saved_context was changed by another thread
399 * after we've read it and before we compare it with cdata,
400 * our key *shall* be reloaded upon thread context switch
401 * and we are therefore set in either case...
403 static inline void padlock_verify_context(struct padlock_cipher_data *cdata)
405 asm volatile ("pushfl\n"
413 " movl %2,%0":"+m" (padlock_saved_context)
414 :"r"(padlock_saved_context), "r"(cdata):"cc");
417 /* Template for padlock_xcrypt_* modes */
419 * BIG FAT WARNING: The offsets used with 'leal' instructions describe items
420 * of the 'padlock_cipher_data' structure.
422 # define PADLOCK_XCRYPT_ASM(name,rep_xcrypt) \
423 static inline void *name(size_t cnt, \
424 struct padlock_cipher_data *cdata, \
425 void *out, const void *inp) \
427 asm volatile ( "pushl %%ebx\n" \
428 " leal 16(%0),%%edx\n" \
429 " leal 32(%0),%%ebx\n" \
432 : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \
433 : "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \
434 : "edx", "cc", "memory"); \
438 /* Generate all functions with appropriate opcodes */
440 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8")
442 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0")
444 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0")
446 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8")
448 /* The RNG call itself */
449 static inline unsigned int padlock_xstore(void *addr, unsigned int edx_in)
451 unsigned int eax_out;
453 asm volatile (".byte 0x0f,0xa7,0xc0" /* xstore */
454 :"=a" (eax_out), "=m"(*(unsigned *)addr)
455 :"D"(addr), "d"(edx_in)
462 * Why not inline 'rep movsd'? I failed to find information on what value in
463 * Direction Flag one can expect and consequently have to apply
464 * "better-safe-than-sorry" approach and assume "undefined." I could
465 * explicitly clear it and restore the original value upon return from
466 * padlock_aes_cipher, but it's presumably too much trouble for too little
467 * gain... In case you wonder 'rep xcrypt*' instructions above are *not*
468 * affected by the Direction Flag and pointers advance toward larger
469 * addresses unconditionally.
471 static inline unsigned char *padlock_memcpy(void *dst, const void *src,
485 # elif defined(_MSC_VER)
487 * Unlike GCC these are real functions. In order to minimize impact
488 * on performance we adhere to __fastcall calling convention in
489 * order to get two first arguments passed through %ecx and %edx.
490 * Which kind of suits very well, as instructions in question use
491 * both %ecx and %edx as input:-)
493 # define REP_XCRYPT(code) \
495 _asm _emit 0x0f _asm _emit 0xa7 \
499 * BIG FAT WARNING: The offsets used with 'lea' instructions describe items
500 * of the 'padlock_cipher_data' structure.
502 # define PADLOCK_XCRYPT_ASM(name,code) \
503 static void * __fastcall \
504 name (size_t cnt, void *cdata, \
505 void *outp, const void *inp) \
507 _asm lea edx,[eax+16] \
508 _asm lea ebx,[eax+32] \
514 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb,0xc8)
515 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc,0xd0)
516 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb,0xe0)
517 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb,0xe8)
519 static int __fastcall padlock_xstore(void *outp, unsigned int code)
522 _asm _emit 0x0f _asm _emit 0xa7 _asm _emit 0xc0
525 static void __fastcall padlock_reload_key(void)
531 static void __fastcall padlock_verify_context(void *cdata)
537 cmp ecx,padlock_saved_context
542 mov padlock_saved_context,ecx
547 padlock_available(void)
583 mov padlock_use_ace,1
589 mov padlock_use_rng,1
596 static void __fastcall padlock_bswapl(void *key)
613 * MS actually specifies status of Direction Flag and compiler even manages
614 * to compile following as 'rep movsd' all by itself...
616 # define padlock_memcpy(o,i,n) ((unsigned char *)memcpy((o),(i),(n)&~3U))
618 /* ===== AES encryption/decryption ===== */
619 # ifndef OPENSSL_NO_AES
620 # if defined(NID_aes_128_cfb128) && ! defined (NID_aes_128_cfb)
621 # define NID_aes_128_cfb NID_aes_128_cfb128
623 # if defined(NID_aes_128_ofb128) && ! defined (NID_aes_128_ofb)
624 # define NID_aes_128_ofb NID_aes_128_ofb128
626 # if defined(NID_aes_192_cfb128) && ! defined (NID_aes_192_cfb)
627 # define NID_aes_192_cfb NID_aes_192_cfb128
629 # if defined(NID_aes_192_ofb128) && ! defined (NID_aes_192_ofb)
630 # define NID_aes_192_ofb NID_aes_192_ofb128
632 # if defined(NID_aes_256_cfb128) && ! defined (NID_aes_256_cfb)
633 # define NID_aes_256_cfb NID_aes_256_cfb128
635 # if defined(NID_aes_256_ofb128) && ! defined (NID_aes_256_ofb)
636 # define NID_aes_256_ofb NID_aes_256_ofb128
639 * List of supported ciphers.
640 */ static int padlock_cipher_nids[] = {
657 static int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids) /
658 sizeof(padlock_cipher_nids[0]));
660 /* Function prototypes ... */
661 static int padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
662 const unsigned char *iv, int enc);
663 static int padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
664 const unsigned char *in, size_t nbytes);
666 # define NEAREST_ALIGNED(ptr) ( (unsigned char *)(ptr) + \
667 ( (0x10 - ((size_t)(ptr) & 0x0F)) & 0x0F ) )
668 # define ALIGNED_CIPHER_DATA(ctx) ((struct padlock_cipher_data *)\
669 NEAREST_ALIGNED(ctx->cipher_data))
671 # define EVP_CIPHER_block_size_ECB AES_BLOCK_SIZE
672 # define EVP_CIPHER_block_size_CBC AES_BLOCK_SIZE
673 # define EVP_CIPHER_block_size_OFB 1
674 # define EVP_CIPHER_block_size_CFB 1
677 * Declaring so many ciphers by hand would be a pain. Instead introduce a bit
678 * of preprocessor magic :-)
680 # define DECLARE_AES_EVP(ksize,lmode,umode) \
681 static const EVP_CIPHER padlock_aes_##ksize##_##lmode = { \
682 NID_aes_##ksize##_##lmode, \
683 EVP_CIPHER_block_size_##umode, \
684 AES_KEY_SIZE_##ksize, \
686 0 | EVP_CIPH_##umode##_MODE, \
687 padlock_aes_init_key, \
688 padlock_aes_cipher, \
690 sizeof(struct padlock_cipher_data) + 16, \
691 EVP_CIPHER_set_asn1_iv, \
692 EVP_CIPHER_get_asn1_iv, \
697 DECLARE_AES_EVP(128, ecb, ECB);
698 DECLARE_AES_EVP(128, cbc, CBC);
699 DECLARE_AES_EVP(128, cfb, CFB);
700 DECLARE_AES_EVP(128, ofb, OFB);
702 DECLARE_AES_EVP(192, ecb, ECB);
703 DECLARE_AES_EVP(192, cbc, CBC);
704 DECLARE_AES_EVP(192, cfb, CFB);
705 DECLARE_AES_EVP(192, ofb, OFB);
707 DECLARE_AES_EVP(256, ecb, ECB);
708 DECLARE_AES_EVP(256, cbc, CBC);
709 DECLARE_AES_EVP(256, cfb, CFB);
710 DECLARE_AES_EVP(256, ofb, OFB);
713 padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, const int **nids,
716 /* No specific cipher => return a list of supported nids ... */
718 *nids = padlock_cipher_nids;
719 return padlock_cipher_nids_num;
722 /* ... or the requested "cipher" otherwise */
724 case NID_aes_128_ecb:
725 *cipher = &padlock_aes_128_ecb;
727 case NID_aes_128_cbc:
728 *cipher = &padlock_aes_128_cbc;
730 case NID_aes_128_cfb:
731 *cipher = &padlock_aes_128_cfb;
733 case NID_aes_128_ofb:
734 *cipher = &padlock_aes_128_ofb;
737 case NID_aes_192_ecb:
738 *cipher = &padlock_aes_192_ecb;
740 case NID_aes_192_cbc:
741 *cipher = &padlock_aes_192_cbc;
743 case NID_aes_192_cfb:
744 *cipher = &padlock_aes_192_cfb;
746 case NID_aes_192_ofb:
747 *cipher = &padlock_aes_192_ofb;
750 case NID_aes_256_ecb:
751 *cipher = &padlock_aes_256_ecb;
753 case NID_aes_256_cbc:
754 *cipher = &padlock_aes_256_cbc;
756 case NID_aes_256_cfb:
757 *cipher = &padlock_aes_256_cfb;
759 case NID_aes_256_ofb:
760 *cipher = &padlock_aes_256_ofb;
764 /* Sorry, we don't support this NID */
772 /* Prepare the encryption key for PadLock usage */
774 padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
775 const unsigned char *iv, int enc)
777 struct padlock_cipher_data *cdata;
778 int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8;
781 return 0; /* ERROR */
783 cdata = ALIGNED_CIPHER_DATA(ctx);
784 memset(cdata, 0, sizeof(struct padlock_cipher_data));
786 /* Prepare Control word. */
787 if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE)
788 cdata->cword.b.encdec = 0;
790 cdata->cword.b.encdec = (ctx->encrypt == 0);
791 cdata->cword.b.rounds = 10 + (key_len - 128) / 32;
792 cdata->cword.b.ksize = (key_len - 128) / 64;
797 * PadLock can generate an extended key for AES128 in hardware
799 memcpy(cdata->ks.rd_key, key, AES_KEY_SIZE_128);
800 cdata->cword.b.keygen = 0;
806 * Generate an extended AES key in software. Needed for AES192/AES256
809 * Well, the above applies to Stepping 8 CPUs and is listed as
810 * hardware errata. They most likely will fix it at some point and
811 * then a check for stepping would be due here.
813 if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_CFB_MODE ||
814 EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE || enc)
815 AES_set_encrypt_key(key, key_len, &cdata->ks);
817 AES_set_decrypt_key(key, key_len, &cdata->ks);
820 * OpenSSL C functions use byte-swapped extended key.
822 padlock_bswapl(&cdata->ks);
824 cdata->cword.b.keygen = 1;
833 * This is done to cover for cases when user reuses the
834 * context for new key. The catch is that if we don't do
835 * this, padlock_eas_cipher might proceed with old key...
837 padlock_reload_key();
843 * Simplified version of padlock_aes_cipher() used when
844 * 1) both input and output buffers are at aligned addresses.
846 * 2) running on a newer CPU that doesn't require aligned buffers.
849 padlock_aes_cipher_omnivorous(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
850 const unsigned char *in_arg, size_t nbytes)
852 struct padlock_cipher_data *cdata;
855 cdata = ALIGNED_CIPHER_DATA(ctx);
856 padlock_verify_context(cdata);
858 switch (EVP_CIPHER_CTX_mode(ctx)) {
859 case EVP_CIPH_ECB_MODE:
860 padlock_xcrypt_ecb(nbytes / AES_BLOCK_SIZE, cdata, out_arg, in_arg);
863 case EVP_CIPH_CBC_MODE:
864 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
865 iv = padlock_xcrypt_cbc(nbytes / AES_BLOCK_SIZE, cdata, out_arg,
867 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
870 case EVP_CIPH_CFB_MODE:
871 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
872 iv = padlock_xcrypt_cfb(nbytes / AES_BLOCK_SIZE, cdata, out_arg,
874 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
877 case EVP_CIPH_OFB_MODE:
878 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
879 padlock_xcrypt_ofb(nbytes / AES_BLOCK_SIZE, cdata, out_arg, in_arg);
880 memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
887 memset(cdata->iv, 0, AES_BLOCK_SIZE);
892 # ifndef PADLOCK_CHUNK
893 # define PADLOCK_CHUNK 512 /* Must be a power of 2 larger than 16 */
895 # if PADLOCK_CHUNK<16 || PADLOCK_CHUNK&(PADLOCK_CHUNK-1)
896 # error "insane PADLOCK_CHUNK..."
900 * Re-align the arguments to 16-Bytes boundaries and run the encryption
901 * function itself. This function is not AES-specific.
904 padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
905 const unsigned char *in_arg, size_t nbytes)
907 struct padlock_cipher_data *cdata;
911 int inp_misaligned, out_misaligned, realign_in_loop;
912 size_t chunk, allocated = 0;
915 * ctx->num is maintained in byte-oriented modes, such as CFB and OFB...
917 if ((chunk = ctx->num)) { /* borrow chunk variable */
918 unsigned char *ivp = ctx->iv;
920 switch (EVP_CIPHER_CTX_mode(ctx)) {
921 case EVP_CIPH_CFB_MODE:
922 if (chunk >= AES_BLOCK_SIZE)
923 return 0; /* bogus value */
926 while (chunk < AES_BLOCK_SIZE && nbytes != 0) {
927 ivp[chunk] = *(out_arg++) = *(in_arg++) ^ ivp[chunk];
930 while (chunk < AES_BLOCK_SIZE && nbytes != 0) {
931 unsigned char c = *(in_arg++);
932 *(out_arg++) = c ^ ivp[chunk];
933 ivp[chunk++] = c, nbytes--;
936 ctx->num = chunk % AES_BLOCK_SIZE;
938 case EVP_CIPH_OFB_MODE:
939 if (chunk >= AES_BLOCK_SIZE)
940 return 0; /* bogus value */
942 while (chunk < AES_BLOCK_SIZE && nbytes != 0) {
943 *(out_arg++) = *(in_arg++) ^ ivp[chunk];
947 ctx->num = chunk % AES_BLOCK_SIZE;
955 if (nbytes % AES_BLOCK_SIZE)
956 return 0; /* are we expected to do tail processing? */
959 * nbytes is always multiple of AES_BLOCK_SIZE in ECB and CBC modes and
960 * arbitrary value in byte-oriented modes, such as CFB and OFB...
965 * VIA promises CPUs that won't require alignment in the future. For now
966 * padlock_aes_align_required is initialized to 1 and the condition is
970 * C7 core is capable to manage unaligned input in non-ECB[!] mode, but
971 * performance penalties appear to be approximately same as for software
972 * alignment below or ~3x. They promise to improve it in the future, but
973 * for now we can just as well pretend that it can only handle aligned
976 if (!padlock_aes_align_required && (nbytes % AES_BLOCK_SIZE) == 0)
977 return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
979 inp_misaligned = (((size_t)in_arg) & 0x0F);
980 out_misaligned = (((size_t)out_arg) & 0x0F);
983 * Note that even if output is aligned and input not, I still prefer to
984 * loop instead of copy the whole input and then encrypt in one stroke.
985 * This is done in order to improve L1 cache utilization...
987 realign_in_loop = out_misaligned | inp_misaligned;
989 if (!realign_in_loop && (nbytes % AES_BLOCK_SIZE) == 0)
990 return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
992 /* this takes one "if" out of the loops */
994 chunk %= PADLOCK_CHUNK;
996 chunk = PADLOCK_CHUNK;
998 if (out_misaligned) {
999 /* optmize for small input */
1000 allocated = (chunk < nbytes ? PADLOCK_CHUNK : nbytes);
1001 out = alloca(0x10 + allocated);
1002 out = NEAREST_ALIGNED(out);
1006 cdata = ALIGNED_CIPHER_DATA(ctx);
1007 padlock_verify_context(cdata);
1009 switch (EVP_CIPHER_CTX_mode(ctx)) {
1010 case EVP_CIPH_ECB_MODE:
1013 inp = padlock_memcpy(out, in_arg, chunk);
1018 padlock_xcrypt_ecb(chunk / AES_BLOCK_SIZE, cdata, out, inp);
1021 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1023 out = out_arg += chunk;
1026 chunk = PADLOCK_CHUNK;
1030 case EVP_CIPH_CBC_MODE:
1031 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1034 if (iv != cdata->iv)
1035 memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
1036 chunk = PADLOCK_CHUNK;
1037 cbc_shortcut: /* optimize for small input */
1039 inp = padlock_memcpy(out, in_arg, chunk);
1044 iv = padlock_xcrypt_cbc(chunk / AES_BLOCK_SIZE, cdata, out, inp);
1047 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1049 out = out_arg += chunk;
1051 } while (nbytes -= chunk);
1052 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
1055 case EVP_CIPH_CFB_MODE:
1056 memcpy(iv = cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1057 chunk &= ~(AES_BLOCK_SIZE - 1);
1063 if (iv != cdata->iv)
1064 memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
1065 chunk = PADLOCK_CHUNK;
1066 cfb_shortcut: /* optimize for small input */
1068 inp = padlock_memcpy(out, in_arg, chunk);
1073 iv = padlock_xcrypt_cfb(chunk / AES_BLOCK_SIZE, cdata, out, inp);
1076 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1078 out = out_arg += chunk;
1081 } while (nbytes >= AES_BLOCK_SIZE);
1085 unsigned char *ivp = cdata->iv;
1088 memcpy(ivp, iv, AES_BLOCK_SIZE);
1092 if (cdata->cword.b.encdec) {
1093 cdata->cword.b.encdec = 0;
1094 padlock_reload_key();
1095 padlock_xcrypt_ecb(1, cdata, ivp, ivp);
1096 cdata->cword.b.encdec = 1;
1097 padlock_reload_key();
1099 unsigned char c = *(in_arg++);
1100 *(out_arg++) = c ^ *ivp;
1101 *(ivp++) = c, nbytes--;
1104 padlock_reload_key();
1105 padlock_xcrypt_ecb(1, cdata, ivp, ivp);
1106 padlock_reload_key();
1108 *ivp = *(out_arg++) = *(in_arg++) ^ *ivp;
1114 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
1117 case EVP_CIPH_OFB_MODE:
1118 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1119 chunk &= ~(AES_BLOCK_SIZE - 1);
1123 inp = padlock_memcpy(out, in_arg, chunk);
1128 padlock_xcrypt_ofb(chunk / AES_BLOCK_SIZE, cdata, out, inp);
1131 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1133 out = out_arg += chunk;
1136 chunk = PADLOCK_CHUNK;
1137 } while (nbytes >= AES_BLOCK_SIZE);
1140 unsigned char *ivp = cdata->iv;
1143 padlock_reload_key(); /* empirically found */
1144 padlock_xcrypt_ecb(1, cdata, ivp, ivp);
1145 padlock_reload_key(); /* empirically found */
1147 *(out_arg++) = *(in_arg++) ^ *ivp;
1152 memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
1159 /* Clean the realign buffer if it was used */
1160 if (out_misaligned) {
1161 volatile unsigned long *p = (void *)out;
1162 size_t n = allocated / sizeof(*p);
1167 memset(cdata->iv, 0, AES_BLOCK_SIZE);
1172 # endif /* OPENSSL_NO_AES */
1174 /* ===== Random Number Generator ===== */
1176 * This code is not engaged. The reason is that it does not comply
1177 * with recommendations for VIA RNG usage for secure applications
1178 * (posted at http://www.via.com.tw/en/viac3/c3.jsp) nor does it
1179 * provide meaningful error control...
1182 * Wrapper that provides an interface between the API and the raw PadLock
1185 static int padlock_rand_bytes(unsigned char *output, int count)
1187 unsigned int eax, buf;
1189 while (count >= 8) {
1190 eax = padlock_xstore(output, 0);
1191 if (!(eax & (1 << 6)))
1192 return 0; /* RNG disabled */
1193 /* this ---vv--- covers DC bias, Raw Bits and String Filter */
1194 if (eax & (0x1F << 10))
1196 if ((eax & 0x1F) == 0)
1197 continue; /* no data, retry... */
1198 if ((eax & 0x1F) != 8)
1199 return 0; /* fatal failure... */
1204 eax = padlock_xstore(&buf, 3);
1205 if (!(eax & (1 << 6)))
1206 return 0; /* RNG disabled */
1207 /* this ---vv--- covers DC bias, Raw Bits and String Filter */
1208 if (eax & (0x1F << 10))
1210 if ((eax & 0x1F) == 0)
1211 continue; /* no data, retry... */
1212 if ((eax & 0x1F) != 1)
1213 return 0; /* fatal failure... */
1214 *output++ = (unsigned char)buf;
1217 *(volatile unsigned int *)&buf = 0;
1222 /* Dummy but necessary function */
1223 static int padlock_rand_status(void)
1228 /* Prepare structure for registration */
1229 static RAND_METHOD padlock_rand = {
1231 padlock_rand_bytes, /* bytes */
1234 padlock_rand_bytes, /* pseudorand */
1235 padlock_rand_status, /* rand status */
1238 # endif /* COMPILE_HW_PADLOCK */
1240 # endif /* !OPENSSL_NO_HW_PADLOCK */
1241 #endif /* !OPENSSL_NO_HW */