]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/crypto/aesni/aesni_wrap.c
Implement pci_enable_msi() and pci_disable_msi() in the LinuxKPI.
[FreeBSD/FreeBSD.git] / sys / crypto / aesni / aesni_wrap.c
1 /*-
2  * Copyright (C) 2008 Damien Miller <djm@mindrot.org>
3  * Copyright (c) 2010 Konstantin Belousov <kib@FreeBSD.org>
4  * Copyright (c) 2010-2011 Pawel Jakub Dawidek <pawel@dawidek.net>
5  * Copyright 2012-2013 John-Mark Gurney <jmg@FreeBSD.org>
6  * Copyright (c) 2014 The FreeBSD Foundation
7  * All rights reserved.
8  *
9  * Portions of this software were developed by John-Mark Gurney
10  * under sponsorship of the FreeBSD Foundation and
11  * Rubicon Communications, LLC (Netgate).
12  * 
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37
38 #include <sys/param.h>
39 #include <sys/libkern.h>
40 #include <sys/malloc.h>
41 #include <sys/proc.h>
42 #include <sys/systm.h>
43 #include <crypto/aesni/aesni.h>
44
45 #include <opencrypto/gmac.h>
46
47 #include "aesencdec.h"
48 #include <smmintrin.h>
49
50 MALLOC_DECLARE(M_AESNI);
51
52 struct blocks8 {
53         __m128i blk[8];
54 } __packed;
55
56 void
57 aesni_encrypt_cbc(int rounds, const void *key_schedule, size_t len,
58     const uint8_t *from, uint8_t *to, const uint8_t iv[static AES_BLOCK_LEN])
59 {
60         __m128i tot, ivreg;
61         size_t i;
62
63         len /= AES_BLOCK_LEN;
64         ivreg = _mm_loadu_si128((const __m128i *)iv);
65         for (i = 0; i < len; i++) {
66                 tot = aesni_enc(rounds - 1, key_schedule,
67                     _mm_loadu_si128((const __m128i *)from) ^ ivreg);
68                 ivreg = tot;
69                 _mm_storeu_si128((__m128i *)to, tot);
70                 from += AES_BLOCK_LEN;
71                 to += AES_BLOCK_LEN;
72         }
73 }
74
75 void
76 aesni_decrypt_cbc(int rounds, const void *key_schedule, size_t len,
77     uint8_t *buf, const uint8_t iv[static AES_BLOCK_LEN])
78 {
79         __m128i blocks[8];
80         struct blocks8 *blks;
81         __m128i ivreg, nextiv;
82         size_t i, j, cnt;
83
84         ivreg = _mm_loadu_si128((const __m128i *)iv);
85         cnt = len / AES_BLOCK_LEN / 8;
86         for (i = 0; i < cnt; i++) {
87                 blks = (struct blocks8 *)buf;
88                 aesni_dec8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1],
89                     blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5],
90                     blks->blk[6], blks->blk[7], &blocks[0]);
91                 for (j = 0; j < 8; j++) {
92                         nextiv = blks->blk[j];
93                         blks->blk[j] = blocks[j] ^ ivreg;
94                         ivreg = nextiv;
95                 }
96                 buf += AES_BLOCK_LEN * 8;
97         }
98         i *= 8;
99         cnt = len / AES_BLOCK_LEN;
100         for (; i < cnt; i++) {
101                 nextiv = _mm_loadu_si128((void *)buf);
102                 _mm_storeu_si128((void *)buf,
103                     aesni_dec(rounds - 1, key_schedule, nextiv) ^ ivreg);
104                 ivreg = nextiv;
105                 buf += AES_BLOCK_LEN;
106         }
107 }
108
109 void
110 aesni_encrypt_ecb(int rounds, const void *key_schedule, size_t len,
111     const uint8_t *from, uint8_t *to)
112 {
113         __m128i tot;
114         __m128i tout[8];
115         struct blocks8 *top;
116         const struct blocks8 *blks;
117         size_t i, cnt;
118
119         cnt = len / AES_BLOCK_LEN / 8;
120         for (i = 0; i < cnt; i++) {
121                 blks = (const struct blocks8 *)from;
122                 top = (struct blocks8 *)to;
123                 aesni_enc8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1],
124                     blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5],
125                     blks->blk[6], blks->blk[7], tout);
126                 top->blk[0] = tout[0];
127                 top->blk[1] = tout[1];
128                 top->blk[2] = tout[2];
129                 top->blk[3] = tout[3];
130                 top->blk[4] = tout[4];
131                 top->blk[5] = tout[5];
132                 top->blk[6] = tout[6];
133                 top->blk[7] = tout[7];
134                 from += AES_BLOCK_LEN * 8;
135                 to += AES_BLOCK_LEN * 8;
136         }
137         i *= 8;
138         cnt = len / AES_BLOCK_LEN;
139         for (; i < cnt; i++) {
140                 tot = aesni_enc(rounds - 1, key_schedule,
141                     _mm_loadu_si128((const __m128i *)from));
142                 _mm_storeu_si128((__m128i *)to, tot);
143                 from += AES_BLOCK_LEN;
144                 to += AES_BLOCK_LEN;
145         }
146 }
147
148 void
149 aesni_decrypt_ecb(int rounds, const void *key_schedule, size_t len,
150     const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN])
151 {
152         __m128i tot;
153         __m128i tout[8];
154         const struct blocks8 *blks;
155         struct blocks8 *top;
156         size_t i, cnt;
157
158         cnt = len / AES_BLOCK_LEN / 8;
159         for (i = 0; i < cnt; i++) {
160                 blks = (const struct blocks8 *)from;
161                 top = (struct blocks8 *)to;
162                 aesni_dec8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1],
163                     blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5],
164                     blks->blk[6], blks->blk[7], tout);
165                 top->blk[0] = tout[0];
166                 top->blk[1] = tout[1];
167                 top->blk[2] = tout[2];
168                 top->blk[3] = tout[3];
169                 top->blk[4] = tout[4];
170                 top->blk[5] = tout[5];
171                 top->blk[6] = tout[6];
172                 top->blk[7] = tout[7];
173                 from += AES_BLOCK_LEN * 8;
174                 to += AES_BLOCK_LEN * 8;
175         }
176         i *= 8;
177         cnt = len / AES_BLOCK_LEN;
178         for (; i < cnt; i++) {
179                 tot = aesni_dec(rounds - 1, key_schedule,
180                     _mm_loadu_si128((const __m128i *)from));
181                 _mm_storeu_si128((__m128i *)to, tot);
182                 from += AES_BLOCK_LEN;
183                 to += AES_BLOCK_LEN;
184         }
185 }
186
187 /*
188  * mixed endian increment, low 64bits stored in hi word to be compatible
189  * with _icm's BSWAP.
190  */
191 static inline __m128i
192 nextc(__m128i x)
193 {
194         const __m128i ONE = _mm_setr_epi32(0, 0, 1, 0);
195         const __m128i ZERO = _mm_setzero_si128();
196
197         x = _mm_add_epi64(x, ONE);
198         __m128i t = _mm_cmpeq_epi64(x, ZERO);
199         t = _mm_unpackhi_epi64(t, ZERO);
200         x = _mm_sub_epi64(x, t);
201
202         return x;
203 }
204
205 void
206 aesni_encrypt_icm(int rounds, const void *key_schedule, size_t len,
207     const uint8_t *from, uint8_t *to, const uint8_t iv[static AES_BLOCK_LEN])
208 {
209         __m128i tot;
210         __m128i tmp1, tmp2, tmp3, tmp4;
211         __m128i tmp5, tmp6, tmp7, tmp8;
212         __m128i ctr1, ctr2, ctr3, ctr4;
213         __m128i ctr5, ctr6, ctr7, ctr8;
214         __m128i BSWAP_EPI64;
215         __m128i tout[8];
216         struct blocks8 *top;
217         const struct blocks8 *blks;
218         size_t i, cnt;
219
220         BSWAP_EPI64 = _mm_set_epi8(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7);
221
222         ctr1 = _mm_loadu_si128((const __m128i *)iv);
223         ctr1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
224
225         cnt = len / AES_BLOCK_LEN / 8;
226         for (i = 0; i < cnt; i++) {
227                 tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
228                 ctr2 = nextc(ctr1);
229                 tmp2 = _mm_shuffle_epi8(ctr2, BSWAP_EPI64);
230                 ctr3 = nextc(ctr2);
231                 tmp3 = _mm_shuffle_epi8(ctr3, BSWAP_EPI64);
232                 ctr4 = nextc(ctr3);
233                 tmp4 = _mm_shuffle_epi8(ctr4, BSWAP_EPI64);
234                 ctr5 = nextc(ctr4);
235                 tmp5 = _mm_shuffle_epi8(ctr5, BSWAP_EPI64);
236                 ctr6 = nextc(ctr5);
237                 tmp6 = _mm_shuffle_epi8(ctr6, BSWAP_EPI64);
238                 ctr7 = nextc(ctr6);
239                 tmp7 = _mm_shuffle_epi8(ctr7, BSWAP_EPI64);
240                 ctr8 = nextc(ctr7);
241                 tmp8 = _mm_shuffle_epi8(ctr8, BSWAP_EPI64);
242                 ctr1 = nextc(ctr8);
243
244                 blks = (const struct blocks8 *)from;
245                 top = (struct blocks8 *)to;
246                 aesni_enc8(rounds - 1, key_schedule, tmp1, tmp2, tmp3, tmp4,
247                     tmp5, tmp6, tmp7, tmp8, tout);
248
249                 top->blk[0] = blks->blk[0] ^ tout[0];
250                 top->blk[1] = blks->blk[1] ^ tout[1];
251                 top->blk[2] = blks->blk[2] ^ tout[2];
252                 top->blk[3] = blks->blk[3] ^ tout[3];
253                 top->blk[4] = blks->blk[4] ^ tout[4];
254                 top->blk[5] = blks->blk[5] ^ tout[5];
255                 top->blk[6] = blks->blk[6] ^ tout[6];
256                 top->blk[7] = blks->blk[7] ^ tout[7];
257
258                 from += AES_BLOCK_LEN * 8;
259                 to += AES_BLOCK_LEN * 8;
260         }
261         i *= 8;
262         cnt = len / AES_BLOCK_LEN;
263         for (; i < cnt; i++) {
264                 tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
265                 ctr1 = nextc(ctr1);
266
267                 tot = aesni_enc(rounds - 1, key_schedule, tmp1);
268
269                 tot = tot ^ _mm_loadu_si128((const __m128i *)from);
270                 _mm_storeu_si128((__m128i *)to, tot);
271
272                 from += AES_BLOCK_LEN;
273                 to += AES_BLOCK_LEN;
274         }
275
276         /* handle remaining partial round */
277         if (len % AES_BLOCK_LEN != 0) {
278                 tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
279                 tot = aesni_enc(rounds - 1, key_schedule, tmp1);
280                 tot = tot ^ _mm_loadu_si128((const __m128i *)from);
281                 memcpy(to, &tot, len % AES_BLOCK_LEN);
282         }
283 }
284
285 #define AES_XTS_BLOCKSIZE       16
286 #define AES_XTS_IVSIZE          8
287 #define AES_XTS_ALPHA           0x87    /* GF(2^128) generator polynomial */
288
289 static inline __m128i
290 xts_crank_lfsr(__m128i inp)
291 {
292         const __m128i alphamask = _mm_set_epi32(1, 1, 1, AES_XTS_ALPHA);
293         __m128i xtweak, ret;
294
295         /* set up xor mask */
296         xtweak = _mm_shuffle_epi32(inp, 0x93);
297         xtweak = _mm_srai_epi32(xtweak, 31);
298         xtweak &= alphamask;
299
300         /* next term */
301         ret = _mm_slli_epi32(inp, 1);
302         ret ^= xtweak;
303
304         return ret;
305 }
306
307 static void
308 aesni_crypt_xts_block(int rounds, const __m128i *key_schedule, __m128i *tweak,
309     const uint8_t *from, uint8_t *to, int do_encrypt)
310 {
311         __m128i block;
312
313         block = _mm_loadu_si128((const __m128i *)from) ^ *tweak;
314
315         if (do_encrypt)
316                 block = aesni_enc(rounds - 1, key_schedule, block);
317         else
318                 block = aesni_dec(rounds - 1, key_schedule, block);
319
320         _mm_storeu_si128((__m128i *)to, block ^ *tweak);
321
322         *tweak = xts_crank_lfsr(*tweak);
323 }
324
325 static void
326 aesni_crypt_xts_block8(int rounds, const __m128i *key_schedule, __m128i *tweak,
327     const uint8_t *from, uint8_t *to, int do_encrypt)
328 {
329         __m128i tmptweak;
330         __m128i a, b, c, d, e, f, g, h;
331         __m128i tweaks[8];
332         __m128i tmp[8];
333         __m128i *top;
334         const __m128i *fromp;
335
336         tmptweak = *tweak;
337
338         /*
339          * unroll the loop.  This lets gcc put values directly in the
340          * register and saves memory accesses.
341          */
342         fromp = (const __m128i *)from;
343 #define PREPINP(v, pos)                                         \
344                 do {                                            \
345                         tweaks[(pos)] = tmptweak;               \
346                         (v) = _mm_loadu_si128(&fromp[pos]) ^    \
347                             tmptweak;                           \
348                         tmptweak = xts_crank_lfsr(tmptweak);    \
349                 } while (0)
350         PREPINP(a, 0);
351         PREPINP(b, 1);
352         PREPINP(c, 2);
353         PREPINP(d, 3);
354         PREPINP(e, 4);
355         PREPINP(f, 5);
356         PREPINP(g, 6);
357         PREPINP(h, 7);
358         *tweak = tmptweak;
359
360         if (do_encrypt)
361                 aesni_enc8(rounds - 1, key_schedule, a, b, c, d, e, f, g, h,
362                     tmp);
363         else
364                 aesni_dec8(rounds - 1, key_schedule, a, b, c, d, e, f, g, h,
365                     tmp);
366
367         top = (__m128i *)to;
368         _mm_storeu_si128(&top[0], tmp[0] ^ tweaks[0]);
369         _mm_storeu_si128(&top[1], tmp[1] ^ tweaks[1]);
370         _mm_storeu_si128(&top[2], tmp[2] ^ tweaks[2]);
371         _mm_storeu_si128(&top[3], tmp[3] ^ tweaks[3]);
372         _mm_storeu_si128(&top[4], tmp[4] ^ tweaks[4]);
373         _mm_storeu_si128(&top[5], tmp[5] ^ tweaks[5]);
374         _mm_storeu_si128(&top[6], tmp[6] ^ tweaks[6]);
375         _mm_storeu_si128(&top[7], tmp[7] ^ tweaks[7]);
376 }
377
378 static void
379 aesni_crypt_xts(int rounds, const __m128i *data_schedule,
380     const __m128i *tweak_schedule, size_t len, const uint8_t *from,
381     uint8_t *to, const uint8_t iv[static AES_BLOCK_LEN], int do_encrypt)
382 {
383         __m128i tweakreg;
384         uint8_t tweak[AES_XTS_BLOCKSIZE] __aligned(16);
385         size_t i, cnt;
386
387         /*
388          * Prepare tweak as E_k2(IV). IV is specified as LE representation
389          * of a 64-bit block number which we allow to be passed in directly.
390          */
391 #if BYTE_ORDER == LITTLE_ENDIAN
392         bcopy(iv, tweak, AES_XTS_IVSIZE);
393         /* Last 64 bits of IV are always zero. */
394         bzero(tweak + AES_XTS_IVSIZE, AES_XTS_IVSIZE);
395 #else
396 #error Only LITTLE_ENDIAN architectures are supported.
397 #endif
398         tweakreg = _mm_loadu_si128((__m128i *)&tweak[0]);
399         tweakreg = aesni_enc(rounds - 1, tweak_schedule, tweakreg);
400
401         cnt = len / AES_XTS_BLOCKSIZE / 8;
402         for (i = 0; i < cnt; i++) {
403                 aesni_crypt_xts_block8(rounds, data_schedule, &tweakreg,
404                     from, to, do_encrypt);
405                 from += AES_XTS_BLOCKSIZE * 8;
406                 to += AES_XTS_BLOCKSIZE * 8;
407         }
408         i *= 8;
409         cnt = len / AES_XTS_BLOCKSIZE;
410         for (; i < cnt; i++) {
411                 aesni_crypt_xts_block(rounds, data_schedule, &tweakreg,
412                     from, to, do_encrypt);
413                 from += AES_XTS_BLOCKSIZE;
414                 to += AES_XTS_BLOCKSIZE;
415         }
416 }
417
418 void
419 aesni_encrypt_xts(int rounds, const void *data_schedule,
420     const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
421     const uint8_t iv[static AES_BLOCK_LEN])
422 {
423
424         aesni_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to,
425             iv, 1);
426 }
427
428 void
429 aesni_decrypt_xts(int rounds, const void *data_schedule,
430     const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
431     const uint8_t iv[static AES_BLOCK_LEN])
432 {
433
434         aesni_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to,
435             iv, 0);
436 }
437
438 int
439 aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key,
440     int keylen)
441 {
442         int decsched;
443
444         decsched = 1;
445
446         switch (ses->algo) {
447         case CRYPTO_AES_ICM:
448         case CRYPTO_AES_NIST_GCM_16:
449         case CRYPTO_AES_CCM_16:
450                 decsched = 0;
451                 /* FALLTHROUGH */
452         case CRYPTO_AES_CBC:
453                 switch (keylen) {
454                 case 128:
455                         ses->rounds = AES128_ROUNDS;
456                         break;
457                 case 192:
458                         ses->rounds = AES192_ROUNDS;
459                         break;
460                 case 256:
461                         ses->rounds = AES256_ROUNDS;
462                         break;
463                 default:
464                         CRYPTDEB("invalid CBC/ICM/GCM key length");
465                         return (EINVAL);
466                 }
467                 break;
468         case CRYPTO_AES_XTS:
469                 switch (keylen) {
470                 case 256:
471                         ses->rounds = AES128_ROUNDS;
472                         break;
473                 case 512:
474                         ses->rounds = AES256_ROUNDS;
475                         break;
476                 default:
477                         CRYPTDEB("invalid XTS key length");
478                         return (EINVAL);
479                 }
480                 break;
481         default:
482                 return (EINVAL);
483         }
484
485         aesni_set_enckey(key, ses->enc_schedule, ses->rounds);
486         if (decsched)
487                 aesni_set_deckey(ses->enc_schedule, ses->dec_schedule,
488                     ses->rounds);
489
490         if (ses->algo == CRYPTO_AES_XTS)
491                 aesni_set_enckey(key + keylen / 16, ses->xts_schedule,
492                     ses->rounds);
493
494         return (0);
495 }