]> CyberLeo.Net >> Repos - FreeBSD/releng/10.0.git/blob - sys/crypto/aesni/aesni_wrap.c
- Copy stable/10 (r259064) to releng/10.0 as part of the
[FreeBSD/releng/10.0.git] / sys / crypto / aesni / aesni_wrap.c
1 /*-
2  * Copyright (C) 2008 Damien Miller <djm@mindrot.org>
3  * Copyright (c) 2010 Konstantin Belousov <kib@FreeBSD.org>
4  * Copyright (c) 2010-2011 Pawel Jakub Dawidek <pawel@dawidek.net>
5  * Copyright 2012-2013 John-Mark Gurney <jmg@FreeBSD.org>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32  
33 #include <sys/param.h>
34 #include <sys/libkern.h>
35 #include <sys/malloc.h>
36 #include <sys/proc.h>
37 #include <sys/systm.h>
38 #include <crypto/aesni/aesni.h>
39  
40 #include "aesencdec.h"
41
42 MALLOC_DECLARE(M_AESNI);
43
44 struct blocks8 {
45         __m128i blk[8];
46 } __packed;
47
48 void
49 aesni_encrypt_cbc(int rounds, const void *key_schedule, size_t len,
50     const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN])
51 {
52         __m128i tot, ivreg;
53         size_t i;
54
55         len /= AES_BLOCK_LEN;
56         ivreg = _mm_loadu_si128((const __m128i *)iv);
57         for (i = 0; i < len; i++) {
58                 tot = aesni_enc(rounds - 1, key_schedule,
59                     _mm_loadu_si128((const __m128i *)from) ^ ivreg);
60                 ivreg = tot;
61                 _mm_storeu_si128((__m128i *)to, tot);
62                 from += AES_BLOCK_LEN;
63                 to += AES_BLOCK_LEN;
64         }
65 }
66
67 void
68 aesni_decrypt_cbc(int rounds, const void *key_schedule, size_t len,
69     uint8_t *buf, const uint8_t iv[AES_BLOCK_LEN])
70 {
71         __m128i blocks[8];
72         struct blocks8 *blks;
73         __m128i ivreg, nextiv;
74         size_t i, j, cnt;
75
76         ivreg = _mm_loadu_si128((const __m128i *)iv);
77         cnt = len / AES_BLOCK_LEN / 8;
78         for (i = 0; i < cnt; i++) {
79                 blks = (struct blocks8 *)buf;
80                 aesni_dec8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1],
81                     blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5],
82                     blks->blk[6], blks->blk[7], &blocks[0]);
83                 for (j = 0; j < 8; j++) {
84                         nextiv = blks->blk[j];
85                         blks->blk[j] = blocks[j] ^ ivreg;
86                         ivreg = nextiv;
87                 }
88                 buf += AES_BLOCK_LEN * 8;
89         }
90         i *= 8;
91         cnt = len / AES_BLOCK_LEN;
92         for (; i < cnt; i++) {
93                 nextiv = _mm_loadu_si128((void *)buf);
94                 _mm_storeu_si128((void *)buf,
95                     aesni_dec(rounds - 1, key_schedule, nextiv) ^ ivreg);
96                 ivreg = nextiv;
97                 buf += AES_BLOCK_LEN;
98         }
99 }
100
101 void
102 aesni_encrypt_ecb(int rounds, const void *key_schedule, size_t len,
103     const uint8_t *from, uint8_t *to)
104 {
105         __m128i tot;
106         __m128i tout[8];
107         struct blocks8 *top;
108         const struct blocks8 *blks;
109         size_t i, cnt;
110
111         cnt = len / AES_BLOCK_LEN / 8;
112         for (i = 0; i < cnt; i++) {
113                 blks = (const struct blocks8 *)from;
114                 top = (struct blocks8 *)to;
115                 aesni_enc8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1],
116                     blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5],
117                     blks->blk[6], blks->blk[7], tout);
118                 top->blk[0] = tout[0];
119                 top->blk[1] = tout[1];
120                 top->blk[2] = tout[2];
121                 top->blk[3] = tout[3];
122                 top->blk[4] = tout[4];
123                 top->blk[5] = tout[5];
124                 top->blk[6] = tout[6];
125                 top->blk[7] = tout[7];
126                 from += AES_BLOCK_LEN * 8;
127                 to += AES_BLOCK_LEN * 8;
128         }
129         i *= 8;
130         cnt = len / AES_BLOCK_LEN;
131         for (; i < cnt; i++) {
132                 tot = aesni_enc(rounds - 1, key_schedule,
133                     _mm_loadu_si128((const __m128i *)from));
134                 _mm_storeu_si128((__m128i *)to, tot);
135                 from += AES_BLOCK_LEN;
136                 to += AES_BLOCK_LEN;
137         }
138 }
139
140 void
141 aesni_decrypt_ecb(int rounds, const void *key_schedule, size_t len,
142     const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN])
143 {
144         __m128i tot;
145         __m128i tout[8];
146         const struct blocks8 *blks;
147         struct blocks8 *top;
148         size_t i, cnt;
149
150         cnt = len / AES_BLOCK_LEN / 8;
151         for (i = 0; i < cnt; i++) {
152                 blks = (const struct blocks8 *)from;
153                 top = (struct blocks8 *)to;
154                 aesni_dec8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1],
155                     blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5],
156                     blks->blk[6], blks->blk[7], tout);
157                 top->blk[0] = tout[0];
158                 top->blk[1] = tout[1];
159                 top->blk[2] = tout[2];
160                 top->blk[3] = tout[3];
161                 top->blk[4] = tout[4];
162                 top->blk[5] = tout[5];
163                 top->blk[6] = tout[6];
164                 top->blk[7] = tout[7];
165                 from += AES_BLOCK_LEN * 8;
166                 to += AES_BLOCK_LEN * 8;
167         }
168         i *= 8;
169         cnt = len / AES_BLOCK_LEN;
170         for (; i < cnt; i++) {
171                 tot = aesni_dec(rounds - 1, key_schedule,
172                     _mm_loadu_si128((const __m128i *)from));
173                 _mm_storeu_si128((__m128i *)to, tot);
174                 from += AES_BLOCK_LEN;
175                 to += AES_BLOCK_LEN;
176         }
177 }
178
179 #define AES_XTS_BLOCKSIZE       16
180 #define AES_XTS_IVSIZE          8
181 #define AES_XTS_ALPHA           0x87    /* GF(2^128) generator polynomial */
182
183 static inline __m128i
184 xts_crank_lfsr(__m128i inp)
185 {
186         const __m128i alphamask = _mm_set_epi32(1, 1, 1, AES_XTS_ALPHA);
187         __m128i xtweak, ret;
188
189         /* set up xor mask */
190         xtweak = _mm_shuffle_epi32(inp, 0x93);
191         xtweak = _mm_srai_epi32(xtweak, 31);
192         xtweak &= alphamask;
193
194         /* next term */
195         ret = _mm_slli_epi32(inp, 1);
196         ret ^= xtweak;
197
198         return ret;
199 }
200
201 static void
202 aesni_crypt_xts_block(int rounds, const __m128i *key_schedule, __m128i *tweak,
203     const uint8_t *from, uint8_t *to, int do_encrypt)
204 {
205         __m128i block;
206
207         block = _mm_loadu_si128((const __m128i *)from) ^ *tweak;
208
209         if (do_encrypt)
210                 block = aesni_enc(rounds - 1, key_schedule, block);
211         else
212                 block = aesni_dec(rounds - 1, key_schedule, block);
213
214         _mm_storeu_si128((__m128i *)to, block ^ *tweak);
215
216         *tweak = xts_crank_lfsr(*tweak);
217 }
218
219 static void
220 aesni_crypt_xts_block8(int rounds, const __m128i *key_schedule, __m128i *tweak,
221     const uint8_t *from, uint8_t *to, int do_encrypt)
222 {
223         __m128i tmptweak;
224         __m128i a, b, c, d, e, f, g, h;
225         __m128i tweaks[8];
226         __m128i tmp[8];
227         __m128i *top;
228         const __m128i *fromp;
229
230         tmptweak = *tweak;
231
232         /*
233          * unroll the loop.  This lets gcc put values directly in the
234          * register and saves memory accesses.
235          */
236         fromp = (const __m128i *)from;
237 #define PREPINP(v, pos)                                         \
238                 do {                                            \
239                         tweaks[(pos)] = tmptweak;               \
240                         (v) = _mm_loadu_si128(&fromp[pos]) ^    \
241                             tmptweak;                           \
242                         tmptweak = xts_crank_lfsr(tmptweak);    \
243                 } while (0)
244         PREPINP(a, 0);
245         PREPINP(b, 1);
246         PREPINP(c, 2);
247         PREPINP(d, 3);
248         PREPINP(e, 4);
249         PREPINP(f, 5);
250         PREPINP(g, 6);
251         PREPINP(h, 7);
252         *tweak = tmptweak;
253
254         if (do_encrypt)
255                 aesni_enc8(rounds - 1, key_schedule, a, b, c, d, e, f, g, h,
256                     tmp);
257         else
258                 aesni_dec8(rounds - 1, key_schedule, a, b, c, d, e, f, g, h,
259                     tmp);
260
261         top = (__m128i *)to;
262         _mm_storeu_si128(&top[0], tmp[0] ^ tweaks[0]);
263         _mm_storeu_si128(&top[1], tmp[1] ^ tweaks[1]);
264         _mm_storeu_si128(&top[2], tmp[2] ^ tweaks[2]);
265         _mm_storeu_si128(&top[3], tmp[3] ^ tweaks[3]);
266         _mm_storeu_si128(&top[4], tmp[4] ^ tweaks[4]);
267         _mm_storeu_si128(&top[5], tmp[5] ^ tweaks[5]);
268         _mm_storeu_si128(&top[6], tmp[6] ^ tweaks[6]);
269         _mm_storeu_si128(&top[7], tmp[7] ^ tweaks[7]);
270 }
271
272 static void
273 aesni_crypt_xts(int rounds, const __m128i *data_schedule,
274     const __m128i *tweak_schedule, size_t len, const uint8_t *from,
275     uint8_t *to, const uint8_t iv[AES_BLOCK_LEN], int do_encrypt)
276 {
277         __m128i tweakreg;
278         uint8_t tweak[AES_XTS_BLOCKSIZE] __aligned(16);
279         size_t i, cnt;
280
281         /*
282          * Prepare tweak as E_k2(IV). IV is specified as LE representation
283          * of a 64-bit block number which we allow to be passed in directly.
284          */
285 #if BYTE_ORDER == LITTLE_ENDIAN
286         bcopy(iv, tweak, AES_XTS_IVSIZE);
287         /* Last 64 bits of IV are always zero. */
288         bzero(tweak + AES_XTS_IVSIZE, AES_XTS_IVSIZE);
289 #else
290 #error Only LITTLE_ENDIAN architectures are supported.
291 #endif
292         tweakreg = _mm_loadu_si128((__m128i *)&tweak[0]);
293         tweakreg = aesni_enc(rounds - 1, tweak_schedule, tweakreg);
294
295         cnt = len / AES_XTS_BLOCKSIZE / 8;
296         for (i = 0; i < cnt; i++) {
297                 aesni_crypt_xts_block8(rounds, data_schedule, &tweakreg,
298                     from, to, do_encrypt);
299                 from += AES_XTS_BLOCKSIZE * 8;
300                 to += AES_XTS_BLOCKSIZE * 8;
301         }
302         i *= 8;
303         cnt = len / AES_XTS_BLOCKSIZE;
304         for (; i < cnt; i++) {
305                 aesni_crypt_xts_block(rounds, data_schedule, &tweakreg,
306                     from, to, do_encrypt);
307                 from += AES_XTS_BLOCKSIZE;
308                 to += AES_XTS_BLOCKSIZE;
309         }
310 }
311
312 void
313 aesni_encrypt_xts(int rounds, const void *data_schedule,
314     const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
315     const uint8_t iv[AES_BLOCK_LEN])
316 {
317
318         aesni_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to,
319             iv, 1);
320 }
321
322 void
323 aesni_decrypt_xts(int rounds, const void *data_schedule,
324     const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
325     const uint8_t iv[AES_BLOCK_LEN])
326 {
327
328         aesni_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to,
329             iv, 0);
330 }
331
332 static int
333 aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key,
334     int keylen)
335 {
336
337         switch (ses->algo) {
338         case CRYPTO_AES_CBC:
339                 switch (keylen) {
340                 case 128:
341                         ses->rounds = AES128_ROUNDS;
342                         break;
343                 case 192:
344                         ses->rounds = AES192_ROUNDS;
345                         break;
346                 case 256:
347                         ses->rounds = AES256_ROUNDS;
348                         break;
349                 default:
350                         return (EINVAL);
351                 }
352                 break;
353         case CRYPTO_AES_XTS:
354                 switch (keylen) {
355                 case 256:
356                         ses->rounds = AES128_ROUNDS;
357                         break;
358                 case 512:
359                         ses->rounds = AES256_ROUNDS;
360                         break;
361                 default:
362                         return (EINVAL);
363                 }
364                 break;
365         default:
366                 return (EINVAL);
367         }
368
369         aesni_set_enckey(key, ses->enc_schedule, ses->rounds);
370         aesni_set_deckey(ses->enc_schedule, ses->dec_schedule, ses->rounds);
371         if (ses->algo == CRYPTO_AES_CBC)
372                 arc4rand(ses->iv, sizeof(ses->iv), 0);
373         else /* if (ses->algo == CRYPTO_AES_XTS) */ {
374                 aesni_set_enckey(key + keylen / 16, ses->xts_schedule,
375                     ses->rounds);
376         }
377
378         return (0);
379 }
380
381 int
382 aesni_cipher_setup(struct aesni_session *ses, struct cryptoini *encini)
383 {
384         struct thread *td;
385         int error, saved_ctx;
386
387         td = curthread;
388         if (!is_fpu_kern_thread(0)) {
389                 error = fpu_kern_enter(td, ses->fpu_ctx, FPU_KERN_NORMAL);
390                 saved_ctx = 1;
391         } else {
392                 error = 0;
393                 saved_ctx = 0;
394         }
395         if (error == 0) {
396                 error = aesni_cipher_setup_common(ses, encini->cri_key,
397                     encini->cri_klen);
398                 if (saved_ctx)
399                         fpu_kern_leave(td, ses->fpu_ctx);
400         }
401         return (error);
402 }
403
404 int
405 aesni_cipher_process(struct aesni_session *ses, struct cryptodesc *enccrd,
406     struct cryptop *crp)
407 {
408         struct thread *td;
409         uint8_t *buf;
410         int error, allocated, saved_ctx;
411
412         buf = aesni_cipher_alloc(enccrd, crp, &allocated);
413         if (buf == NULL)
414                 return (ENOMEM);
415
416         td = curthread;
417         if (!is_fpu_kern_thread(0)) {
418                 error = fpu_kern_enter(td, ses->fpu_ctx, FPU_KERN_NORMAL);
419                 if (error != 0)
420                         goto out;
421                 saved_ctx = 1;
422         } else {
423                 saved_ctx = 0;
424                 error = 0;
425         }
426
427         if ((enccrd->crd_flags & CRD_F_KEY_EXPLICIT) != 0) {
428                 error = aesni_cipher_setup_common(ses, enccrd->crd_key,
429                     enccrd->crd_klen);
430                 if (error != 0)
431                         goto out;
432         }
433
434         if ((enccrd->crd_flags & CRD_F_ENCRYPT) != 0) {
435                 if ((enccrd->crd_flags & CRD_F_IV_EXPLICIT) != 0)
436                         bcopy(enccrd->crd_iv, ses->iv, AES_BLOCK_LEN);
437                 if ((enccrd->crd_flags & CRD_F_IV_PRESENT) == 0)
438                         crypto_copyback(crp->crp_flags, crp->crp_buf,
439                             enccrd->crd_inject, AES_BLOCK_LEN, ses->iv);
440                 if (ses->algo == CRYPTO_AES_CBC) {
441                         aesni_encrypt_cbc(ses->rounds, ses->enc_schedule,
442                             enccrd->crd_len, buf, buf, ses->iv);
443                 } else /* if (ses->algo == CRYPTO_AES_XTS) */ {
444                         aesni_encrypt_xts(ses->rounds, ses->enc_schedule,
445                             ses->xts_schedule, enccrd->crd_len, buf, buf,
446                             ses->iv);
447                 }
448         } else {
449                 if ((enccrd->crd_flags & CRD_F_IV_EXPLICIT) != 0)
450                         bcopy(enccrd->crd_iv, ses->iv, AES_BLOCK_LEN);
451                 else
452                         crypto_copydata(crp->crp_flags, crp->crp_buf,
453                             enccrd->crd_inject, AES_BLOCK_LEN, ses->iv);
454                 if (ses->algo == CRYPTO_AES_CBC) {
455                         aesni_decrypt_cbc(ses->rounds, ses->dec_schedule,
456                             enccrd->crd_len, buf, ses->iv);
457                 } else /* if (ses->algo == CRYPTO_AES_XTS) */ {
458                         aesni_decrypt_xts(ses->rounds, ses->dec_schedule,
459                             ses->xts_schedule, enccrd->crd_len, buf, buf,
460                             ses->iv);
461                 }
462         }
463         if (saved_ctx)
464                 fpu_kern_leave(td, ses->fpu_ctx);
465         if (allocated)
466                 crypto_copyback(crp->crp_flags, crp->crp_buf, enccrd->crd_skip,
467                     enccrd->crd_len, buf);
468         if ((enccrd->crd_flags & CRD_F_ENCRYPT) != 0)
469                 crypto_copydata(crp->crp_flags, crp->crp_buf,
470                     enccrd->crd_skip + enccrd->crd_len - AES_BLOCK_LEN,
471                     AES_BLOCK_LEN, ses->iv);
472  out:
473         if (allocated) {
474                 bzero(buf, enccrd->crd_len);
475                 free(buf, M_AESNI);
476         }
477         return (error);
478 }