2 * Copyright (c) 2016 The FreeBSD Foundation
5 * This software was developed by Andrew Turner under
6 * sponsorship from the FreeBSD Foundation.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * This code is built with floating-point enabled. Make sure to have entered
32 * into floating-point context before calling any of these functions.
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/malloc.h>
41 #include <sys/queue.h>
43 #include <opencrypto/cryptodev.h>
44 #include <crypto/armv8/armv8_crypto.h>
49 armv8_aes_enc(int rounds, const uint8x16_t *keysched, const uint8x16_t from)
55 for (i = 0; i < rounds - 1; i += 2) {
56 tmp = vaeseq_u8(tmp, keysched[i]);
57 tmp = vaesmcq_u8(tmp);
58 tmp = vaeseq_u8(tmp, keysched[i + 1]);
59 tmp = vaesmcq_u8(tmp);
62 tmp = vaeseq_u8(tmp, keysched[rounds - 1]);
63 tmp = vaesmcq_u8(tmp);
64 tmp = vaeseq_u8(tmp, keysched[rounds]);
65 tmp = veorq_u8(tmp, keysched[rounds + 1]);
71 armv8_aes_dec(int rounds, const uint8x16_t *keysched, const uint8x16_t from)
77 for (i = 0; i < rounds - 1; i += 2) {
78 tmp = vaesdq_u8(tmp, keysched[i]);
79 tmp = vaesimcq_u8(tmp);
80 tmp = vaesdq_u8(tmp, keysched[i+1]);
81 tmp = vaesimcq_u8(tmp);
84 tmp = vaesdq_u8(tmp, keysched[rounds - 1]);
85 tmp = vaesimcq_u8(tmp);
86 tmp = vaesdq_u8(tmp, keysched[rounds]);
87 tmp = veorq_u8(tmp, keysched[rounds + 1]);
93 armv8_aes_encrypt_cbc(int rounds, const void *key_schedule, size_t len,
94 const uint8_t *from, uint8_t *to, const uint8_t iv[static AES_BLOCK_LEN])
96 uint8x16_t tot, ivreg, tmp;
100 ivreg = vld1q_u8(iv);
101 for (i = 0; i < len; i++) {
102 tmp = vld1q_u8(from);
103 tot = armv8_aes_enc(rounds - 1, key_schedule,
104 veorq_u8(tmp, ivreg));
107 from += AES_BLOCK_LEN;
113 armv8_aes_decrypt_cbc(int rounds, const void *key_schedule, size_t len,
114 uint8_t *buf, const uint8_t iv[static AES_BLOCK_LEN])
116 uint8x16_t ivreg, nextiv, tmp;
119 len /= AES_BLOCK_LEN;
120 ivreg = vld1q_u8(iv);
121 for (i = 0; i < len; i++) {
122 nextiv = vld1q_u8(buf);
123 tmp = armv8_aes_dec(rounds - 1, key_schedule, nextiv);
124 vst1q_u8(buf, veorq_u8(tmp, ivreg));
126 buf += AES_BLOCK_LEN;
130 #define AES_XTS_BLOCKSIZE 16
131 #define AES_XTS_IVSIZE 8
132 #define AES_XTS_ALPHA 0x87 /* GF(2^128) generator polynomial */
134 static inline int32x4_t
135 xts_crank_lfsr(int32x4_t inp)
137 const int32x4_t alphamask = {AES_XTS_ALPHA, 1, 1, 1};
138 int32x4_t xtweak, ret;
140 /* set up xor mask */
141 xtweak = vextq_s32(inp, inp, 3);
142 xtweak = vshrq_n_s32(xtweak, 31);
146 ret = vshlq_n_s32(inp, 1);
153 armv8_aes_crypt_xts_block(int rounds, const uint8x16_t *key_schedule,
154 uint8x16_t *tweak, const uint8_t *from, uint8_t *to, int do_encrypt)
158 block = vld1q_u8(from) ^ *tweak;
161 block = armv8_aes_enc(rounds - 1, key_schedule, block);
163 block = armv8_aes_dec(rounds - 1, key_schedule, block);
165 vst1q_u8(to, block ^ *tweak);
167 *tweak = vreinterpretq_u8_s32(xts_crank_lfsr(vreinterpretq_s32_u8(*tweak)));
171 armv8_aes_crypt_xts(int rounds, const uint8x16_t *data_schedule,
172 const uint8x16_t *tweak_schedule, size_t len, const uint8_t *from,
173 uint8_t *to, const uint8_t iv[static AES_BLOCK_LEN], int do_encrypt)
176 uint8_t tweak[AES_XTS_BLOCKSIZE] __aligned(16);
180 * Prepare tweak as E_k2(IV). IV is specified as LE representation
181 * of a 64-bit block number which we allow to be passed in directly.
183 #if BYTE_ORDER == LITTLE_ENDIAN
184 bcopy(iv, tweak, AES_XTS_IVSIZE);
185 /* Last 64 bits of IV are always zero. */
186 bzero(tweak + AES_XTS_IVSIZE, AES_XTS_IVSIZE);
188 #error Only LITTLE_ENDIAN architectures are supported.
190 tweakreg = vld1q_u8(tweak);
191 tweakreg = armv8_aes_enc(rounds - 1, tweak_schedule, tweakreg);
193 cnt = len / AES_XTS_BLOCKSIZE;
194 for (i = 0; i < cnt; i++) {
195 armv8_aes_crypt_xts_block(rounds, data_schedule, &tweakreg,
196 from, to, do_encrypt);
197 from += AES_XTS_BLOCKSIZE;
198 to += AES_XTS_BLOCKSIZE;
203 armv8_aes_encrypt_xts(int rounds, const void *data_schedule,
204 const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
205 const uint8_t iv[static AES_BLOCK_LEN])
208 armv8_aes_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to,
213 armv8_aes_decrypt_xts(int rounds, const void *data_schedule,
214 const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
215 const uint8_t iv[static AES_BLOCK_LEN])
218 armv8_aes_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to,