2 * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29 br_aes_ct64_bitslice_Sbox(uint64_t *q)
32 * This S-box implementation is a straightforward translation of
33 * the circuit described by Boyar and Peralta in "A new
34 * combinational logic minimization technique with applications
35 * to cryptology" (https://eprint.iacr.org/2009/191.pdf).
37 * Note that variables x* (input) and s* (output) are numbered
38 * in "reverse" order (x0 is the high bit, x7 is the low bit).
41 uint64_t x0, x1, x2, x3, x4, x5, x6, x7;
42 uint64_t y1, y2, y3, y4, y5, y6, y7, y8, y9;
43 uint64_t y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
45 uint64_t z0, z1, z2, z3, z4, z5, z6, z7, z8, z9;
46 uint64_t z10, z11, z12, z13, z14, z15, z16, z17;
47 uint64_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9;
48 uint64_t t10, t11, t12, t13, t14, t15, t16, t17, t18, t19;
49 uint64_t t20, t21, t22, t23, t24, t25, t26, t27, t28, t29;
50 uint64_t t30, t31, t32, t33, t34, t35, t36, t37, t38, t39;
51 uint64_t t40, t41, t42, t43, t44, t45, t46, t47, t48, t49;
52 uint64_t t50, t51, t52, t53, t54, t55, t56, t57, t58, t59;
53 uint64_t t60, t61, t62, t63, t64, t65, t66, t67;
54 uint64_t s0, s1, s2, s3, s4, s5, s6, s7;
66 * Top linear transformation.
161 * Bottom linear transformation.
206 br_aes_ct64_ortho(uint64_t *q)
208 #define SWAPN(cl, ch, s, x, y) do { \
212 (x) = (a & (uint64_t)cl) | ((b & (uint64_t)cl) << (s)); \
213 (y) = ((a & (uint64_t)ch) >> (s)) | (b & (uint64_t)ch); \
216 #define SWAP2(x, y) SWAPN(0x5555555555555555, 0xAAAAAAAAAAAAAAAA, 1, x, y)
217 #define SWAP4(x, y) SWAPN(0x3333333333333333, 0xCCCCCCCCCCCCCCCC, 2, x, y)
218 #define SWAP8(x, y) SWAPN(0x0F0F0F0F0F0F0F0F, 0xF0F0F0F0F0F0F0F0, 4, x, y)
238 br_aes_ct64_interleave_in(uint64_t *q0, uint64_t *q1, const uint32_t *w)
240 uint64_t x0, x1, x2, x3;
250 x0 &= (uint64_t)0x0000FFFF0000FFFF;
251 x1 &= (uint64_t)0x0000FFFF0000FFFF;
252 x2 &= (uint64_t)0x0000FFFF0000FFFF;
253 x3 &= (uint64_t)0x0000FFFF0000FFFF;
258 x0 &= (uint64_t)0x00FF00FF00FF00FF;
259 x1 &= (uint64_t)0x00FF00FF00FF00FF;
260 x2 &= (uint64_t)0x00FF00FF00FF00FF;
261 x3 &= (uint64_t)0x00FF00FF00FF00FF;
262 *q0 = x0 | (x2 << 8);
263 *q1 = x1 | (x3 << 8);
268 br_aes_ct64_interleave_out(uint32_t *w, uint64_t q0, uint64_t q1)
270 uint64_t x0, x1, x2, x3;
272 x0 = q0 & (uint64_t)0x00FF00FF00FF00FF;
273 x1 = q1 & (uint64_t)0x00FF00FF00FF00FF;
274 x2 = (q0 >> 8) & (uint64_t)0x00FF00FF00FF00FF;
275 x3 = (q1 >> 8) & (uint64_t)0x00FF00FF00FF00FF;
280 x0 &= (uint64_t)0x0000FFFF0000FFFF;
281 x1 &= (uint64_t)0x0000FFFF0000FFFF;
282 x2 &= (uint64_t)0x0000FFFF0000FFFF;
283 x3 &= (uint64_t)0x0000FFFF0000FFFF;
284 w[0] = (uint32_t)x0 | (uint32_t)(x0 >> 16);
285 w[1] = (uint32_t)x1 | (uint32_t)(x1 >> 16);
286 w[2] = (uint32_t)x2 | (uint32_t)(x2 >> 16);
287 w[3] = (uint32_t)x3 | (uint32_t)(x3 >> 16);
290 static const unsigned char Rcon[] = {
291 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36
299 memset(q, 0, sizeof q);
301 br_aes_ct64_ortho(q);
302 br_aes_ct64_bitslice_Sbox(q);
303 br_aes_ct64_ortho(q);
304 return (uint32_t)q[0];
309 br_aes_ct64_keysched(uint64_t *comp_skey, const void *key, size_t key_len)
312 int i, j, k, nk, nkf;
330 nk = (int)(key_len >> 2);
331 nkf = (int)((num_rounds + 1) << 2);
332 br_range_dec32le(skey, (key_len >> 2), key);
333 tmp = skey[(key_len >> 2) - 1];
334 for (i = nk, j = 0, k = 0; i < nkf; i ++) {
336 tmp = (tmp << 24) | (tmp >> 8);
337 tmp = sub_word(tmp) ^ Rcon[k];
338 } else if (nk > 6 && j == 4) {
349 for (i = 0, j = 0; i < nkf; i += 4, j += 2) {
352 br_aes_ct64_interleave_in(&q[0], &q[4], skey + i);
359 br_aes_ct64_ortho(q);
361 (q[0] & (uint64_t)0x1111111111111111)
362 | (q[1] & (uint64_t)0x2222222222222222)
363 | (q[2] & (uint64_t)0x4444444444444444)
364 | (q[3] & (uint64_t)0x8888888888888888);
366 (q[4] & (uint64_t)0x1111111111111111)
367 | (q[5] & (uint64_t)0x2222222222222222)
368 | (q[6] & (uint64_t)0x4444444444444444)
369 | (q[7] & (uint64_t)0x8888888888888888);
376 br_aes_ct64_skey_expand(uint64_t *skey,
377 unsigned num_rounds, const uint64_t *comp_skey)
381 n = (num_rounds + 1) << 1;
382 for (u = 0, v = 0; u < n; u ++, v += 4) {
383 uint64_t x0, x1, x2, x3;
385 x0 = x1 = x2 = x3 = comp_skey[u];
386 x0 &= (uint64_t)0x1111111111111111;
387 x1 &= (uint64_t)0x2222222222222222;
388 x2 &= (uint64_t)0x4444444444444444;
389 x3 &= (uint64_t)0x8888888888888888;
393 skey[v + 0] = (x0 << 4) - x0;
394 skey[v + 1] = (x1 << 4) - x1;
395 skey[v + 2] = (x2 << 4) - x2;
396 skey[v + 3] = (x3 << 4) - x3;