2 * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29 br_aes_ct_bitslice_Sbox(uint32_t *q)
32 * This S-box implementation is a straightforward translation of
33 * the circuit described by Boyar and Peralta in "A new
34 * combinational logic minimization technique with applications
35 * to cryptology" (https://eprint.iacr.org/2009/191.pdf).
37 * Note that variables x* (input) and s* (output) are numbered
38 * in "reverse" order (x0 is the high bit, x7 is the low bit).
41 uint32_t x0, x1, x2, x3, x4, x5, x6, x7;
42 uint32_t y1, y2, y3, y4, y5, y6, y7, y8, y9;
43 uint32_t y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
45 uint32_t z0, z1, z2, z3, z4, z5, z6, z7, z8, z9;
46 uint32_t z10, z11, z12, z13, z14, z15, z16, z17;
47 uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9;
48 uint32_t t10, t11, t12, t13, t14, t15, t16, t17, t18, t19;
49 uint32_t t20, t21, t22, t23, t24, t25, t26, t27, t28, t29;
50 uint32_t t30, t31, t32, t33, t34, t35, t36, t37, t38, t39;
51 uint32_t t40, t41, t42, t43, t44, t45, t46, t47, t48, t49;
52 uint32_t t50, t51, t52, t53, t54, t55, t56, t57, t58, t59;
53 uint32_t t60, t61, t62, t63, t64, t65, t66, t67;
54 uint32_t s0, s1, s2, s3, s4, s5, s6, s7;
66 * Top linear transformation.
161 * Bottom linear transformation.
206 br_aes_ct_ortho(uint32_t *q)
208 #define SWAPN(cl, ch, s, x, y) do { \
212 (x) = (a & (uint32_t)cl) | ((b & (uint32_t)cl) << (s)); \
213 (y) = ((a & (uint32_t)ch) >> (s)) | (b & (uint32_t)ch); \
216 #define SWAP2(x, y) SWAPN(0x55555555, 0xAAAAAAAA, 1, x, y)
217 #define SWAP4(x, y) SWAPN(0x33333333, 0xCCCCCCCC, 2, x, y)
218 #define SWAP8(x, y) SWAPN(0x0F0F0F0F, 0xF0F0F0F0, 4, x, y)
236 static const unsigned char Rcon[] = {
237 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36
246 for (i = 0; i < 8; i ++) {
250 br_aes_ct_bitslice_Sbox(q);
257 br_aes_ct_keysched(uint32_t *comp_skey, const void *key, size_t key_len)
260 int i, j, k, nk, nkf;
278 nk = (int)(key_len >> 2);
279 nkf = (int)((num_rounds + 1) << 2);
281 for (i = 0; i < nk; i ++) {
282 tmp = br_dec32le((const unsigned char *)key + (i << 2));
283 skey[(i << 1) + 0] = tmp;
284 skey[(i << 1) + 1] = tmp;
286 for (i = nk, j = 0, k = 0; i < nkf; i ++) {
288 tmp = (tmp << 24) | (tmp >> 8);
289 tmp = sub_word(tmp) ^ Rcon[k];
290 } else if (nk > 6 && j == 4) {
293 tmp ^= skey[(i - nk) << 1];
294 skey[(i << 1) + 0] = tmp;
295 skey[(i << 1) + 1] = tmp;
301 for (i = 0; i < nkf; i += 4) {
302 br_aes_ct_ortho(skey + (i << 1));
304 for (i = 0, j = 0; i < nkf; i ++, j += 2) {
305 comp_skey[i] = (skey[j + 0] & 0x55555555)
306 | (skey[j + 1] & 0xAAAAAAAA);
313 br_aes_ct_skey_expand(uint32_t *skey,
314 unsigned num_rounds, const uint32_t *comp_skey)
318 n = (num_rounds + 1) << 2;
319 for (u = 0, v = 0; u < n; u ++, v += 2) {
322 x = y = comp_skey[u];
324 skey[v + 0] = x | (x << 1);
326 skey[v + 1] = y | (y >> 1);