2 * Copyright (c) 2018 Thomas Pornin <pornin@bolet.org>
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 static const uint64_t RC[] = {
31 0x0000000000000001, 0x0000000000008082,
32 0x800000000000808A, 0x8000000080008000,
33 0x000000000000808B, 0x0000000080000001,
34 0x8000000080008081, 0x8000000000008009,
35 0x000000000000008A, 0x0000000000000088,
36 0x0000000080008009, 0x000000008000000A,
37 0x000000008000808B, 0x800000000000008B,
38 0x8000000000008089, 0x8000000000008003,
39 0x8000000000008002, 0x8000000000000080,
40 0x000000000000800A, 0x800000008000000A,
41 0x8000000080008081, 0x8000000000008080,
42 0x0000000080000001, 0x8000000080008008
46 * XOR a block of data into the provided state. This supports only
47 * blocks whose length is a multiple of 64 bits.
50 xor_block(uint64_t *A, const void *data, size_t rate)
54 for (u = 0; u < rate; u += 8) {
55 A[u >> 3] ^= br_dec64le((const unsigned char *)data + u);
60 * Process a block with the provided data. The data length must be a
61 * multiple of 8 (in bytes); normally, this is the "rate".
64 process_block(uint64_t *A)
66 uint64_t t0, t1, t2, t3, t4;
67 uint64_t tt0, tt1, tt2, tt3;
69 uint64_t c0, c1, c2, c3, c4, bnn;
73 * Compute the 24 rounds. This loop is partially unrolled (each
74 * iteration computes two rounds).
76 for (j = 0; j < 24; j += 2) {
81 tt0 = (tt0 << 1) | (tt0 >> 63);
91 tt0 = (tt0 << 1) | (tt0 >> 63);
101 tt0 = (tt0 << 1) | (tt0 >> 63);
111 tt0 = (tt0 << 1) | (tt0 >> 63);
121 tt0 = (tt0 << 1) | (tt0 >> 63);
153 A[ 5] = (A[ 5] << 36) | (A[ 5] >> (64 - 36));
154 A[10] = (A[10] << 3) | (A[10] >> (64 - 3));
155 A[15] = (A[15] << 41) | (A[15] >> (64 - 41));
156 A[20] = (A[20] << 18) | (A[20] >> (64 - 18));
157 A[ 1] = (A[ 1] << 1) | (A[ 1] >> (64 - 1));
158 A[ 6] = (A[ 6] << 44) | (A[ 6] >> (64 - 44));
159 A[11] = (A[11] << 10) | (A[11] >> (64 - 10));
160 A[16] = (A[16] << 45) | (A[16] >> (64 - 45));
161 A[21] = (A[21] << 2) | (A[21] >> (64 - 2));
162 A[ 2] = (A[ 2] << 62) | (A[ 2] >> (64 - 62));
163 A[ 7] = (A[ 7] << 6) | (A[ 7] >> (64 - 6));
164 A[12] = (A[12] << 43) | (A[12] >> (64 - 43));
165 A[17] = (A[17] << 15) | (A[17] >> (64 - 15));
166 A[22] = (A[22] << 61) | (A[22] >> (64 - 61));
167 A[ 3] = (A[ 3] << 28) | (A[ 3] >> (64 - 28));
168 A[ 8] = (A[ 8] << 55) | (A[ 8] >> (64 - 55));
169 A[13] = (A[13] << 25) | (A[13] >> (64 - 25));
170 A[18] = (A[18] << 21) | (A[18] >> (64 - 21));
171 A[23] = (A[23] << 56) | (A[23] >> (64 - 56));
172 A[ 4] = (A[ 4] << 27) | (A[ 4] >> (64 - 27));
173 A[ 9] = (A[ 9] << 20) | (A[ 9] >> (64 - 20));
174 A[14] = (A[14] << 39) | (A[14] >> (64 - 39));
175 A[19] = (A[19] << 8) | (A[19] >> (64 - 8));
176 A[24] = (A[24] << 14) | (A[24] >> (64 - 14));
257 A[ 0] = A[ 0] ^ RC[j + 0];
262 tt0 = (tt0 << 1) | (tt0 >> 63);
272 tt0 = (tt0 << 1) | (tt0 >> 63);
282 tt0 = (tt0 << 1) | (tt0 >> 63);
292 tt0 = (tt0 << 1) | (tt0 >> 63);
302 tt0 = (tt0 << 1) | (tt0 >> 63);
334 A[ 3] = (A[ 3] << 36) | (A[ 3] >> (64 - 36));
335 A[ 1] = (A[ 1] << 3) | (A[ 1] >> (64 - 3));
336 A[ 4] = (A[ 4] << 41) | (A[ 4] >> (64 - 41));
337 A[ 2] = (A[ 2] << 18) | (A[ 2] >> (64 - 18));
338 A[ 6] = (A[ 6] << 1) | (A[ 6] >> (64 - 1));
339 A[ 9] = (A[ 9] << 44) | (A[ 9] >> (64 - 44));
340 A[ 7] = (A[ 7] << 10) | (A[ 7] >> (64 - 10));
341 A[ 5] = (A[ 5] << 45) | (A[ 5] >> (64 - 45));
342 A[ 8] = (A[ 8] << 2) | (A[ 8] >> (64 - 2));
343 A[12] = (A[12] << 62) | (A[12] >> (64 - 62));
344 A[10] = (A[10] << 6) | (A[10] >> (64 - 6));
345 A[13] = (A[13] << 43) | (A[13] >> (64 - 43));
346 A[11] = (A[11] << 15) | (A[11] >> (64 - 15));
347 A[14] = (A[14] << 61) | (A[14] >> (64 - 61));
348 A[18] = (A[18] << 28) | (A[18] >> (64 - 28));
349 A[16] = (A[16] << 55) | (A[16] >> (64 - 55));
350 A[19] = (A[19] << 25) | (A[19] >> (64 - 25));
351 A[17] = (A[17] << 21) | (A[17] >> (64 - 21));
352 A[15] = (A[15] << 56) | (A[15] >> (64 - 56));
353 A[24] = (A[24] << 27) | (A[24] >> (64 - 27));
354 A[22] = (A[22] << 20) | (A[22] >> (64 - 20));
355 A[20] = (A[20] << 39) | (A[20] >> (64 - 39));
356 A[23] = (A[23] << 8) | (A[23] >> (64 - 8));
357 A[21] = (A[21] << 14) | (A[21] >> (64 - 14));
438 A[ 0] = A[ 0] ^ RC[j + 1];
468 /* see bearssl_kdf.h */
470 br_shake_init(br_shake_context *sc, int security_level)
472 sc->rate = 200 - (size_t)(security_level >> 2);
474 memset(sc->A, 0, sizeof sc->A);
475 sc->A[ 1] = ~(uint64_t)0;
476 sc->A[ 2] = ~(uint64_t)0;
477 sc->A[ 8] = ~(uint64_t)0;
478 sc->A[12] = ~(uint64_t)0;
479 sc->A[17] = ~(uint64_t)0;
480 sc->A[20] = ~(uint64_t)0;
483 /* see bearssl_kdf.h */
485 br_shake_inject(br_shake_context *sc, const void *data, size_t len)
487 const unsigned char *buf;
500 memcpy(sc->dbuf + dptr, buf, clen);
505 xor_block(sc->A, sc->dbuf, rate);
506 process_block(sc->A);
513 /* see bearssl_kdf.h */
515 br_shake_flip(br_shake_context *sc)
518 * We apply padding and pre-XOR the value into the state. We
519 * set dptr to the end of the buffer, so that first call to
520 * shake_extract() will process the block.
522 if ((sc->dptr + 1) == sc->rate) {
523 sc->dbuf[sc->dptr ++] = 0x9F;
525 sc->dbuf[sc->dptr ++] = 0x1F;
526 memset(sc->dbuf + sc->dptr, 0x00, sc->rate - sc->dptr - 1);
527 sc->dbuf[sc->rate - 1] = 0x80;
530 xor_block(sc->A, sc->dbuf, sc->rate);
533 /* see bearssl_kdf.h */
535 br_shake_produce(br_shake_context *sc, void *out, size_t len)
553 br_enc64le(dbuf + 0, A[ 0]);
554 br_enc64le(dbuf + 8, ~A[ 1]);
555 br_enc64le(dbuf + 16, ~A[ 2]);
556 br_enc64le(dbuf + 24, A[ 3]);
557 br_enc64le(dbuf + 32, A[ 4]);
558 br_enc64le(dbuf + 40, A[ 5]);
559 br_enc64le(dbuf + 48, A[ 6]);
560 br_enc64le(dbuf + 56, A[ 7]);
561 br_enc64le(dbuf + 64, ~A[ 8]);
562 br_enc64le(dbuf + 72, A[ 9]);
563 br_enc64le(dbuf + 80, A[10]);
564 br_enc64le(dbuf + 88, A[11]);
565 br_enc64le(dbuf + 96, ~A[12]);
566 br_enc64le(dbuf + 104, A[13]);
567 br_enc64le(dbuf + 112, A[14]);
568 br_enc64le(dbuf + 120, A[15]);
569 br_enc64le(dbuf + 128, A[16]);
570 br_enc64le(dbuf + 136, ~A[17]);
571 br_enc64le(dbuf + 144, A[18]);
572 br_enc64le(dbuf + 152, A[19]);
573 br_enc64le(dbuf + 160, ~A[20]);
574 br_enc64le(dbuf + 168, A[21]);
575 br_enc64le(dbuf + 176, A[22]);
576 br_enc64le(dbuf + 184, A[23]);
577 br_enc64le(dbuf + 192, A[24]);
584 memcpy(buf, sc->dbuf + dptr, clen);