2 * Copyright (C) 2008 Damien Miller <djm@mindrot.org>
3 * Copyright (c) 2010 Konstantin Belousov <kib@FreeBSD.org>
4 * Copyright (c) 2010-2011 Pawel Jakub Dawidek <pawel@dawidek.net>
5 * Copyright 2012-2013 John-Mark Gurney <jmg@FreeBSD.org>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
33 #include <sys/param.h>
34 #include <sys/libkern.h>
35 #include <sys/malloc.h>
37 #include <sys/systm.h>
38 #include <crypto/aesni/aesni.h>
40 #include "aesencdec.h"
42 MALLOC_DECLARE(M_AESNI);
45 aesni_encrypt_cbc(int rounds, const void *key_schedule, size_t len,
46 const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN])
52 ivreg = _mm_loadu_si128((const __m128i *)iv);
53 for (i = 0; i < len; i++) {
54 tot = aesni_enc(rounds - 1, key_schedule,
55 _mm_loadu_si128((const __m128i *)from) ^ ivreg);
57 _mm_storeu_si128((__m128i *)to, tot);
58 from += AES_BLOCK_LEN;
64 aesni_decrypt_cbc(int rounds, const void *key_schedule, size_t len,
65 uint8_t *buf, const uint8_t iv[AES_BLOCK_LEN])
69 __m128i ivreg, nextiv;
72 ivreg = _mm_loadu_si128((const __m128i *)iv);
73 cnt = len / AES_BLOCK_LEN / 8;
74 for (i = 0; i < cnt; i++) {
75 bufs = (__m128i *)buf;
76 aesni_dec8(rounds - 1, key_schedule, bufs[0], bufs[1],
77 bufs[2], bufs[3], bufs[4], bufs[5], bufs[6],
79 for (j = 0; j < 8; j++) {
81 bufs[j] = blocks[j] ^ ivreg;
84 buf += AES_BLOCK_LEN * 8;
87 cnt = len / AES_BLOCK_LEN;
88 for (; i < cnt; i++) {
89 bufs = (__m128i *)buf;
91 bufs[0] = aesni_dec(rounds - 1, key_schedule, bufs[0]) ^ ivreg;
98 aesni_encrypt_ecb(int rounds, const void *key_schedule, size_t len,
99 const uint8_t *from, uint8_t *to)
102 const __m128i *blocks;
105 cnt = len / AES_BLOCK_LEN / 8;
106 for (i = 0; i < cnt; i++) {
107 blocks = (const __m128i *)from;
108 aesni_enc8(rounds - 1, key_schedule, blocks[0], blocks[1],
109 blocks[2], blocks[3], blocks[4], blocks[5], blocks[6],
110 blocks[7], (__m128i *)to);
111 from += AES_BLOCK_LEN * 8;
112 to += AES_BLOCK_LEN * 8;
115 cnt = len / AES_BLOCK_LEN;
116 for (; i < cnt; i++) {
117 tot = aesni_enc(rounds - 1, key_schedule,
118 _mm_loadu_si128((const __m128i *)from));
119 _mm_storeu_si128((__m128i *)to, tot);
120 from += AES_BLOCK_LEN;
126 aesni_decrypt_ecb(int rounds, const void *key_schedule, size_t len,
127 const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN])
130 const __m128i *blocks;
133 cnt = len / AES_BLOCK_LEN / 8;
134 for (i = 0; i < cnt; i++) {
135 blocks = (const __m128i *)from;
136 aesni_dec8(rounds - 1, key_schedule, blocks[0], blocks[1],
137 blocks[2], blocks[3], blocks[4], blocks[5], blocks[6],
138 blocks[7], (__m128i *)to);
139 from += AES_BLOCK_LEN * 8;
140 to += AES_BLOCK_LEN * 8;
143 cnt = len / AES_BLOCK_LEN;
144 for (; i < cnt; i++) {
145 tot = aesni_dec(rounds - 1, key_schedule,
146 _mm_loadu_si128((const __m128i *)from));
147 _mm_storeu_si128((__m128i *)to, tot);
148 from += AES_BLOCK_LEN;
153 #define AES_XTS_BLOCKSIZE 16
154 #define AES_XTS_IVSIZE 8
155 #define AES_XTS_ALPHA 0x87 /* GF(2^128) generator polynomial */
157 static inline __m128i
158 xts_crank_lfsr(__m128i inp)
160 const __m128i alphamask = _mm_set_epi32(1, 1, 1, AES_XTS_ALPHA);
163 /* set up xor mask */
164 xtweak = _mm_shuffle_epi32(inp, 0x93);
165 xtweak = _mm_srai_epi32(xtweak, 31);
169 ret = _mm_slli_epi32(inp, 1);
176 aesni_crypt_xts_block(int rounds, const void *key_schedule, __m128i *tweak,
177 const __m128i *from, __m128i *to, int do_encrypt)
181 block = *from ^ *tweak;
184 block = aesni_enc(rounds - 1, key_schedule, block);
186 block = aesni_dec(rounds - 1, key_schedule, block);
188 *to = block ^ *tweak;
190 *tweak = xts_crank_lfsr(*tweak);
194 aesni_crypt_xts_block8(int rounds, const void *key_schedule, __m128i *tweak,
195 const __m128i *from, __m128i *to, int do_encrypt)
198 __m128i a, b, c, d, e, f, g, h;
205 * unroll the loop. This lets gcc put values directly in the
206 * register and saves memory accesses.
208 #define PREPINP(v, pos) \
210 tweaks[(pos)] = tmptweak; \
211 (v) = from[(pos)] ^ tmptweak; \
212 tmptweak = xts_crank_lfsr(tmptweak); \
225 aesni_enc8(rounds - 1, key_schedule, a, b, c, d, e, f, g, h,
228 aesni_dec8(rounds - 1, key_schedule, a, b, c, d, e, f, g, h,
231 to[0] = tmp[0] ^ tweaks[0];
232 to[1] = tmp[1] ^ tweaks[1];
233 to[2] = tmp[2] ^ tweaks[2];
234 to[3] = tmp[3] ^ tweaks[3];
235 to[4] = tmp[4] ^ tweaks[4];
236 to[5] = tmp[5] ^ tweaks[5];
237 to[6] = tmp[6] ^ tweaks[6];
238 to[7] = tmp[7] ^ tweaks[7];
242 aesni_crypt_xts(int rounds, const void *data_schedule,
243 const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
244 const uint8_t iv[AES_BLOCK_LEN], int do_encrypt)
247 uint8_t tweak[AES_XTS_BLOCKSIZE] __aligned(16);
251 * Prepare tweak as E_k2(IV). IV is specified as LE representation
252 * of a 64-bit block number which we allow to be passed in directly.
254 #if BYTE_ORDER == LITTLE_ENDIAN
255 bcopy(iv, tweak, AES_XTS_IVSIZE);
256 /* Last 64 bits of IV are always zero. */
257 bzero(tweak + AES_XTS_IVSIZE, AES_XTS_IVSIZE);
259 #error Only LITTLE_ENDIAN architectures are supported.
261 tweakreg = _mm_loadu_si128((__m128i *)&tweak[0]);
262 tweakreg = aesni_enc(rounds - 1, tweak_schedule, tweakreg);
264 cnt = len / AES_XTS_BLOCKSIZE / 8;
265 for (i = 0; i < cnt; i++) {
266 aesni_crypt_xts_block8(rounds, data_schedule, &tweakreg,
267 (const __m128i *)from, (__m128i *)to, do_encrypt);
268 from += AES_XTS_BLOCKSIZE * 8;
269 to += AES_XTS_BLOCKSIZE * 8;
272 cnt = len / AES_XTS_BLOCKSIZE;
273 for (; i < cnt; i++) {
274 aesni_crypt_xts_block(rounds, data_schedule, &tweakreg,
275 (const __m128i *)from, (__m128i *)to, do_encrypt);
276 from += AES_XTS_BLOCKSIZE;
277 to += AES_XTS_BLOCKSIZE;
282 aesni_encrypt_xts(int rounds, const void *data_schedule,
283 const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
284 const uint8_t iv[AES_BLOCK_LEN])
287 aesni_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to,
292 aesni_decrypt_xts(int rounds, const void *data_schedule,
293 const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
294 const uint8_t iv[AES_BLOCK_LEN])
297 aesni_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to,
302 aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key,
310 ses->rounds = AES128_ROUNDS;
313 ses->rounds = AES192_ROUNDS;
316 ses->rounds = AES256_ROUNDS;
325 ses->rounds = AES128_ROUNDS;
328 ses->rounds = AES256_ROUNDS;
338 aesni_set_enckey(key, ses->enc_schedule, ses->rounds);
339 aesni_set_deckey(ses->enc_schedule, ses->dec_schedule, ses->rounds);
340 if (ses->algo == CRYPTO_AES_CBC)
341 arc4rand(ses->iv, sizeof(ses->iv), 0);
342 else /* if (ses->algo == CRYPTO_AES_XTS) */ {
343 aesni_set_enckey(key + keylen / 16, ses->xts_schedule,
351 aesni_cipher_setup(struct aesni_session *ses, struct cryptoini *encini)
354 int error, saved_ctx;
357 if (!is_fpu_kern_thread(0)) {
358 error = fpu_kern_enter(td, ses->fpu_ctx, FPU_KERN_NORMAL);
365 error = aesni_cipher_setup_common(ses, encini->cri_key,
368 fpu_kern_leave(td, ses->fpu_ctx);
374 aesni_cipher_process(struct aesni_session *ses, struct cryptodesc *enccrd,
379 int error, allocated, saved_ctx;
381 buf = aesni_cipher_alloc(enccrd, crp, &allocated);
386 if (!is_fpu_kern_thread(0)) {
387 error = fpu_kern_enter(td, ses->fpu_ctx, FPU_KERN_NORMAL);
396 if ((enccrd->crd_flags & CRD_F_KEY_EXPLICIT) != 0) {
397 error = aesni_cipher_setup_common(ses, enccrd->crd_key,
403 if ((enccrd->crd_flags & CRD_F_ENCRYPT) != 0) {
404 if ((enccrd->crd_flags & CRD_F_IV_EXPLICIT) != 0)
405 bcopy(enccrd->crd_iv, ses->iv, AES_BLOCK_LEN);
406 if ((enccrd->crd_flags & CRD_F_IV_PRESENT) == 0)
407 crypto_copyback(crp->crp_flags, crp->crp_buf,
408 enccrd->crd_inject, AES_BLOCK_LEN, ses->iv);
409 if (ses->algo == CRYPTO_AES_CBC) {
410 aesni_encrypt_cbc(ses->rounds, ses->enc_schedule,
411 enccrd->crd_len, buf, buf, ses->iv);
412 } else /* if (ses->algo == CRYPTO_AES_XTS) */ {
413 aesni_encrypt_xts(ses->rounds, ses->enc_schedule,
414 ses->xts_schedule, enccrd->crd_len, buf, buf,
418 if ((enccrd->crd_flags & CRD_F_IV_EXPLICIT) != 0)
419 bcopy(enccrd->crd_iv, ses->iv, AES_BLOCK_LEN);
421 crypto_copydata(crp->crp_flags, crp->crp_buf,
422 enccrd->crd_inject, AES_BLOCK_LEN, ses->iv);
423 if (ses->algo == CRYPTO_AES_CBC) {
424 aesni_decrypt_cbc(ses->rounds, ses->dec_schedule,
425 enccrd->crd_len, buf, ses->iv);
426 } else /* if (ses->algo == CRYPTO_AES_XTS) */ {
427 aesni_decrypt_xts(ses->rounds, ses->dec_schedule,
428 ses->xts_schedule, enccrd->crd_len, buf, buf,
433 fpu_kern_leave(td, ses->fpu_ctx);
435 crypto_copyback(crp->crp_flags, crp->crp_buf, enccrd->crd_skip,
436 enccrd->crd_len, buf);
437 if ((enccrd->crd_flags & CRD_F_ENCRYPT) != 0)
438 crypto_copydata(crp->crp_flags, crp->crp_buf,
439 enccrd->crd_skip + enccrd->crd_len - AES_BLOCK_LEN,
440 AES_BLOCK_LEN, ses->iv);
443 bzero(buf, enccrd->crd_len);