2 * Copyright (C) 2008 Damien Miller <djm@mindrot.org>
3 * Copyright (c) 2010 Konstantin Belousov <kib@FreeBSD.org>
4 * Copyright (c) 2010-2011 Pawel Jakub Dawidek <pawel@dawidek.net>
5 * Copyright 2012-2013 John-Mark Gurney <jmg@FreeBSD.org>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
33 #include <sys/param.h>
34 #include <sys/libkern.h>
35 #include <sys/malloc.h>
37 #include <sys/systm.h>
38 #include <crypto/aesni/aesni.h>
40 #include "aesencdec.h"
42 MALLOC_DECLARE(M_AESNI);
49 aesni_encrypt_cbc(int rounds, const void *key_schedule, size_t len,
50 const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN])
56 ivreg = _mm_loadu_si128((const __m128i *)iv);
57 for (i = 0; i < len; i++) {
58 tot = aesni_enc(rounds - 1, key_schedule,
59 _mm_loadu_si128((const __m128i *)from) ^ ivreg);
61 _mm_storeu_si128((__m128i *)to, tot);
62 from += AES_BLOCK_LEN;
68 aesni_decrypt_cbc(int rounds, const void *key_schedule, size_t len,
69 uint8_t *buf, const uint8_t iv[AES_BLOCK_LEN])
73 __m128i ivreg, nextiv;
76 ivreg = _mm_loadu_si128((const __m128i *)iv);
77 cnt = len / AES_BLOCK_LEN / 8;
78 for (i = 0; i < cnt; i++) {
79 blks = (struct blocks8 *)buf;
80 aesni_dec8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1],
81 blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5],
82 blks->blk[6], blks->blk[7], &blocks[0]);
83 for (j = 0; j < 8; j++) {
84 nextiv = blks->blk[j];
85 blks->blk[j] = blocks[j] ^ ivreg;
88 buf += AES_BLOCK_LEN * 8;
91 cnt = len / AES_BLOCK_LEN;
92 for (; i < cnt; i++) {
93 nextiv = _mm_loadu_si128((void *)buf);
94 _mm_storeu_si128((void *)buf,
95 aesni_dec(rounds - 1, key_schedule, nextiv) ^ ivreg);
102 aesni_encrypt_ecb(int rounds, const void *key_schedule, size_t len,
103 const uint8_t *from, uint8_t *to)
108 const struct blocks8 *blks;
111 cnt = len / AES_BLOCK_LEN / 8;
112 for (i = 0; i < cnt; i++) {
113 blks = (const struct blocks8 *)from;
114 top = (struct blocks8 *)to;
115 aesni_enc8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1],
116 blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5],
117 blks->blk[6], blks->blk[7], tout);
118 top->blk[0] = tout[0];
119 top->blk[1] = tout[1];
120 top->blk[2] = tout[2];
121 top->blk[3] = tout[3];
122 top->blk[4] = tout[4];
123 top->blk[5] = tout[5];
124 top->blk[6] = tout[6];
125 top->blk[7] = tout[7];
126 from += AES_BLOCK_LEN * 8;
127 to += AES_BLOCK_LEN * 8;
130 cnt = len / AES_BLOCK_LEN;
131 for (; i < cnt; i++) {
132 tot = aesni_enc(rounds - 1, key_schedule,
133 _mm_loadu_si128((const __m128i *)from));
134 _mm_storeu_si128((__m128i *)to, tot);
135 from += AES_BLOCK_LEN;
141 aesni_decrypt_ecb(int rounds, const void *key_schedule, size_t len,
142 const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN])
146 const struct blocks8 *blks;
150 cnt = len / AES_BLOCK_LEN / 8;
151 for (i = 0; i < cnt; i++) {
152 blks = (const struct blocks8 *)from;
153 top = (struct blocks8 *)to;
154 aesni_dec8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1],
155 blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5],
156 blks->blk[6], blks->blk[7], tout);
157 top->blk[0] = tout[0];
158 top->blk[1] = tout[1];
159 top->blk[2] = tout[2];
160 top->blk[3] = tout[3];
161 top->blk[4] = tout[4];
162 top->blk[5] = tout[5];
163 top->blk[6] = tout[6];
164 top->blk[7] = tout[7];
165 from += AES_BLOCK_LEN * 8;
166 to += AES_BLOCK_LEN * 8;
169 cnt = len / AES_BLOCK_LEN;
170 for (; i < cnt; i++) {
171 tot = aesni_dec(rounds - 1, key_schedule,
172 _mm_loadu_si128((const __m128i *)from));
173 _mm_storeu_si128((__m128i *)to, tot);
174 from += AES_BLOCK_LEN;
179 #define AES_XTS_BLOCKSIZE 16
180 #define AES_XTS_IVSIZE 8
181 #define AES_XTS_ALPHA 0x87 /* GF(2^128) generator polynomial */
183 static inline __m128i
184 xts_crank_lfsr(__m128i inp)
186 const __m128i alphamask = _mm_set_epi32(1, 1, 1, AES_XTS_ALPHA);
189 /* set up xor mask */
190 xtweak = _mm_shuffle_epi32(inp, 0x93);
191 xtweak = _mm_srai_epi32(xtweak, 31);
195 ret = _mm_slli_epi32(inp, 1);
202 aesni_crypt_xts_block(int rounds, const __m128i *key_schedule, __m128i *tweak,
203 const uint8_t *from, uint8_t *to, int do_encrypt)
207 block = _mm_loadu_si128((const __m128i *)from) ^ *tweak;
210 block = aesni_enc(rounds - 1, key_schedule, block);
212 block = aesni_dec(rounds - 1, key_schedule, block);
214 _mm_storeu_si128((__m128i *)to, block ^ *tweak);
216 *tweak = xts_crank_lfsr(*tweak);
220 aesni_crypt_xts_block8(int rounds, const __m128i *key_schedule, __m128i *tweak,
221 const uint8_t *from, uint8_t *to, int do_encrypt)
224 __m128i a, b, c, d, e, f, g, h;
228 const __m128i *fromp;
233 * unroll the loop. This lets gcc put values directly in the
234 * register and saves memory accesses.
236 fromp = (const __m128i *)from;
237 #define PREPINP(v, pos) \
239 tweaks[(pos)] = tmptweak; \
240 (v) = _mm_loadu_si128(&fromp[pos]) ^ \
242 tmptweak = xts_crank_lfsr(tmptweak); \
255 aesni_enc8(rounds - 1, key_schedule, a, b, c, d, e, f, g, h,
258 aesni_dec8(rounds - 1, key_schedule, a, b, c, d, e, f, g, h,
262 _mm_storeu_si128(&top[0], tmp[0] ^ tweaks[0]);
263 _mm_storeu_si128(&top[1], tmp[1] ^ tweaks[1]);
264 _mm_storeu_si128(&top[2], tmp[2] ^ tweaks[2]);
265 _mm_storeu_si128(&top[3], tmp[3] ^ tweaks[3]);
266 _mm_storeu_si128(&top[4], tmp[4] ^ tweaks[4]);
267 _mm_storeu_si128(&top[5], tmp[5] ^ tweaks[5]);
268 _mm_storeu_si128(&top[6], tmp[6] ^ tweaks[6]);
269 _mm_storeu_si128(&top[7], tmp[7] ^ tweaks[7]);
273 aesni_crypt_xts(int rounds, const __m128i *data_schedule,
274 const __m128i *tweak_schedule, size_t len, const uint8_t *from,
275 uint8_t *to, const uint8_t iv[AES_BLOCK_LEN], int do_encrypt)
278 uint8_t tweak[AES_XTS_BLOCKSIZE] __aligned(16);
282 * Prepare tweak as E_k2(IV). IV is specified as LE representation
283 * of a 64-bit block number which we allow to be passed in directly.
285 #if BYTE_ORDER == LITTLE_ENDIAN
286 bcopy(iv, tweak, AES_XTS_IVSIZE);
287 /* Last 64 bits of IV are always zero. */
288 bzero(tweak + AES_XTS_IVSIZE, AES_XTS_IVSIZE);
290 #error Only LITTLE_ENDIAN architectures are supported.
292 tweakreg = _mm_loadu_si128((__m128i *)&tweak[0]);
293 tweakreg = aesni_enc(rounds - 1, tweak_schedule, tweakreg);
295 cnt = len / AES_XTS_BLOCKSIZE / 8;
296 for (i = 0; i < cnt; i++) {
297 aesni_crypt_xts_block8(rounds, data_schedule, &tweakreg,
298 from, to, do_encrypt);
299 from += AES_XTS_BLOCKSIZE * 8;
300 to += AES_XTS_BLOCKSIZE * 8;
303 cnt = len / AES_XTS_BLOCKSIZE;
304 for (; i < cnt; i++) {
305 aesni_crypt_xts_block(rounds, data_schedule, &tweakreg,
306 from, to, do_encrypt);
307 from += AES_XTS_BLOCKSIZE;
308 to += AES_XTS_BLOCKSIZE;
313 aesni_encrypt_xts(int rounds, const void *data_schedule,
314 const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
315 const uint8_t iv[AES_BLOCK_LEN])
318 aesni_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to,
323 aesni_decrypt_xts(int rounds, const void *data_schedule,
324 const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
325 const uint8_t iv[AES_BLOCK_LEN])
328 aesni_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to,
333 aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key,
341 ses->rounds = AES128_ROUNDS;
344 ses->rounds = AES192_ROUNDS;
347 ses->rounds = AES256_ROUNDS;
356 ses->rounds = AES128_ROUNDS;
359 ses->rounds = AES256_ROUNDS;
369 aesni_set_enckey(key, ses->enc_schedule, ses->rounds);
370 aesni_set_deckey(ses->enc_schedule, ses->dec_schedule, ses->rounds);
371 if (ses->algo == CRYPTO_AES_CBC)
372 arc4rand(ses->iv, sizeof(ses->iv), 0);
373 else /* if (ses->algo == CRYPTO_AES_XTS) */ {
374 aesni_set_enckey(key + keylen / 16, ses->xts_schedule,
382 aesni_cipher_setup(struct aesni_session *ses, struct cryptoini *encini)
385 int error, saved_ctx;
388 if (!is_fpu_kern_thread(0)) {
389 error = fpu_kern_enter(td, ses->fpu_ctx, FPU_KERN_NORMAL);
396 error = aesni_cipher_setup_common(ses, encini->cri_key,
399 fpu_kern_leave(td, ses->fpu_ctx);
405 aesni_cipher_process(struct aesni_session *ses, struct cryptodesc *enccrd,
410 int error, allocated, saved_ctx;
412 buf = aesni_cipher_alloc(enccrd, crp, &allocated);
417 if (!is_fpu_kern_thread(0)) {
418 error = fpu_kern_enter(td, ses->fpu_ctx, FPU_KERN_NORMAL);
427 if ((enccrd->crd_flags & CRD_F_KEY_EXPLICIT) != 0) {
428 error = aesni_cipher_setup_common(ses, enccrd->crd_key,
434 if ((enccrd->crd_flags & CRD_F_ENCRYPT) != 0) {
435 if ((enccrd->crd_flags & CRD_F_IV_EXPLICIT) != 0)
436 bcopy(enccrd->crd_iv, ses->iv, AES_BLOCK_LEN);
437 if ((enccrd->crd_flags & CRD_F_IV_PRESENT) == 0)
438 crypto_copyback(crp->crp_flags, crp->crp_buf,
439 enccrd->crd_inject, AES_BLOCK_LEN, ses->iv);
440 if (ses->algo == CRYPTO_AES_CBC) {
441 aesni_encrypt_cbc(ses->rounds, ses->enc_schedule,
442 enccrd->crd_len, buf, buf, ses->iv);
443 } else /* if (ses->algo == CRYPTO_AES_XTS) */ {
444 aesni_encrypt_xts(ses->rounds, ses->enc_schedule,
445 ses->xts_schedule, enccrd->crd_len, buf, buf,
449 if ((enccrd->crd_flags & CRD_F_IV_EXPLICIT) != 0)
450 bcopy(enccrd->crd_iv, ses->iv, AES_BLOCK_LEN);
452 crypto_copydata(crp->crp_flags, crp->crp_buf,
453 enccrd->crd_inject, AES_BLOCK_LEN, ses->iv);
454 if (ses->algo == CRYPTO_AES_CBC) {
455 aesni_decrypt_cbc(ses->rounds, ses->dec_schedule,
456 enccrd->crd_len, buf, ses->iv);
457 } else /* if (ses->algo == CRYPTO_AES_XTS) */ {
458 aesni_decrypt_xts(ses->rounds, ses->dec_schedule,
459 ses->xts_schedule, enccrd->crd_len, buf, buf,
464 fpu_kern_leave(td, ses->fpu_ctx);
466 crypto_copyback(crp->crp_flags, crp->crp_buf, enccrd->crd_skip,
467 enccrd->crd_len, buf);
468 if ((enccrd->crd_flags & CRD_F_ENCRYPT) != 0)
469 crypto_copydata(crp->crp_flags, crp->crp_buf,
470 enccrd->crd_skip + enccrd->crd_len - AES_BLOCK_LEN,
471 AES_BLOCK_LEN, ses->iv);
474 bzero(buf, enccrd->crd_len);