2 * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 #define BR_POWER_ASM_MACROS 1
30 /* see bearssl_block.h */
32 br_aes_pwr8_ctr_init(br_aes_pwr8_ctr_keys *ctx,
33 const void *key, size_t len)
35 ctx->vtable = &br_aes_pwr8_ctr_vtable;
36 ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);
40 ctr_128(const unsigned char *sk, const unsigned char *ivbuf,
41 unsigned char *buf, size_t num_blocks)
43 long cc0, cc1, cc2, cc3;
46 static const uint32_t idx2be[] = {
47 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
50 static const uint32_t ctrinc[] = {
61 * Load subkeys into v0..v10
63 lxvw4x(32, %[cc0], %[sk])
64 addi(%[cc0], %[cc0], 16)
65 lxvw4x(33, %[cc0], %[sk])
66 addi(%[cc0], %[cc0], 16)
67 lxvw4x(34, %[cc0], %[sk])
68 addi(%[cc0], %[cc0], 16)
69 lxvw4x(35, %[cc0], %[sk])
70 addi(%[cc0], %[cc0], 16)
71 lxvw4x(36, %[cc0], %[sk])
72 addi(%[cc0], %[cc0], 16)
73 lxvw4x(37, %[cc0], %[sk])
74 addi(%[cc0], %[cc0], 16)
75 lxvw4x(38, %[cc0], %[sk])
76 addi(%[cc0], %[cc0], 16)
77 lxvw4x(39, %[cc0], %[sk])
78 addi(%[cc0], %[cc0], 16)
79 lxvw4x(40, %[cc0], %[sk])
80 addi(%[cc0], %[cc0], 16)
81 lxvw4x(41, %[cc0], %[sk])
82 addi(%[cc0], %[cc0], 16)
83 lxvw4x(42, %[cc0], %[sk])
88 * v15 = constant for byteswapping words
90 lxvw4x(47, 0, %[idx2be])
93 * v28 = increment for IV counter.
95 lxvw4x(60, 0, %[ctrinc])
98 * Load IV into v16..v19
100 lxvw4x(48, %[cc0], %[ivbuf])
101 lxvw4x(49, %[cc1], %[ivbuf])
102 lxvw4x(50, %[cc2], %[ivbuf])
103 lxvw4x(51, %[cc3], %[ivbuf])
105 vperm(16, 16, 16, 15)
106 vperm(17, 17, 17, 15)
107 vperm(18, 18, 18, 15)
108 vperm(19, 19, 19, 15)
114 * Compute next IV into v24..v27
122 * Load next data blocks. We do this early on but we
123 * won't need them until IV encryption is done.
125 lxvw4x(52, %[cc0], %[buf])
126 lxvw4x(53, %[cc1], %[buf])
127 lxvw4x(54, %[cc2], %[buf])
128 lxvw4x(55, %[cc3], %[buf])
131 * Encrypt the current IV.
173 vcipherlast(16, 16, 10)
174 vcipherlast(17, 17, 10)
175 vcipherlast(18, 18, 10)
176 vcipherlast(19, 19, 10)
179 vperm(16, 16, 16, 15)
180 vperm(17, 17, 17, 15)
181 vperm(18, 18, 18, 15)
182 vperm(19, 19, 19, 15)
186 * Load next plaintext word and XOR with encrypted IV.
192 stxvw4x(48, %[cc0], %[buf])
193 stxvw4x(49, %[cc1], %[buf])
194 stxvw4x(50, %[cc2], %[buf])
195 stxvw4x(51, %[cc3], %[buf])
197 addi(%[buf], %[buf], 64)
209 : [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
211 : [sk] "b" (sk), [ivbuf] "b" (ivbuf), [num_blocks] "b" (num_blocks >> 2),
212 [ctrinc] "b" (ctrinc)
214 , [idx2be] "b" (idx2be)
216 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
217 "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
218 "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
224 ctr_192(const unsigned char *sk, const unsigned char *ivbuf,
225 unsigned char *buf, size_t num_blocks)
227 long cc0, cc1, cc2, cc3;
230 static const uint32_t idx2be[] = {
231 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
234 static const uint32_t ctrinc[] = {
245 * Load subkeys into v0..v12
247 lxvw4x(32, %[cc0], %[sk])
248 addi(%[cc0], %[cc0], 16)
249 lxvw4x(33, %[cc0], %[sk])
250 addi(%[cc0], %[cc0], 16)
251 lxvw4x(34, %[cc0], %[sk])
252 addi(%[cc0], %[cc0], 16)
253 lxvw4x(35, %[cc0], %[sk])
254 addi(%[cc0], %[cc0], 16)
255 lxvw4x(36, %[cc0], %[sk])
256 addi(%[cc0], %[cc0], 16)
257 lxvw4x(37, %[cc0], %[sk])
258 addi(%[cc0], %[cc0], 16)
259 lxvw4x(38, %[cc0], %[sk])
260 addi(%[cc0], %[cc0], 16)
261 lxvw4x(39, %[cc0], %[sk])
262 addi(%[cc0], %[cc0], 16)
263 lxvw4x(40, %[cc0], %[sk])
264 addi(%[cc0], %[cc0], 16)
265 lxvw4x(41, %[cc0], %[sk])
266 addi(%[cc0], %[cc0], 16)
267 lxvw4x(42, %[cc0], %[sk])
268 addi(%[cc0], %[cc0], 16)
269 lxvw4x(43, %[cc0], %[sk])
270 addi(%[cc0], %[cc0], 16)
271 lxvw4x(44, %[cc0], %[sk])
276 * v15 = constant for byteswapping words
278 lxvw4x(47, 0, %[idx2be])
281 * v28 = increment for IV counter.
283 lxvw4x(60, 0, %[ctrinc])
286 * Load IV into v16..v19
288 lxvw4x(48, %[cc0], %[ivbuf])
289 lxvw4x(49, %[cc1], %[ivbuf])
290 lxvw4x(50, %[cc2], %[ivbuf])
291 lxvw4x(51, %[cc3], %[ivbuf])
293 vperm(16, 16, 16, 15)
294 vperm(17, 17, 17, 15)
295 vperm(18, 18, 18, 15)
296 vperm(19, 19, 19, 15)
302 * Compute next IV into v24..v27
310 * Load next data blocks. We do this early on but we
311 * won't need them until IV encryption is done.
313 lxvw4x(52, %[cc0], %[buf])
314 lxvw4x(53, %[cc1], %[buf])
315 lxvw4x(54, %[cc2], %[buf])
316 lxvw4x(55, %[cc3], %[buf])
319 * Encrypt the current IV.
369 vcipherlast(16, 16, 12)
370 vcipherlast(17, 17, 12)
371 vcipherlast(18, 18, 12)
372 vcipherlast(19, 19, 12)
375 vperm(16, 16, 16, 15)
376 vperm(17, 17, 17, 15)
377 vperm(18, 18, 18, 15)
378 vperm(19, 19, 19, 15)
382 * Load next plaintext word and XOR with encrypted IV.
388 stxvw4x(48, %[cc0], %[buf])
389 stxvw4x(49, %[cc1], %[buf])
390 stxvw4x(50, %[cc2], %[buf])
391 stxvw4x(51, %[cc3], %[buf])
393 addi(%[buf], %[buf], 64)
405 : [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
407 : [sk] "b" (sk), [ivbuf] "b" (ivbuf), [num_blocks] "b" (num_blocks >> 2),
408 [ctrinc] "b" (ctrinc)
410 , [idx2be] "b" (idx2be)
412 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
413 "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
414 "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
420 ctr_256(const unsigned char *sk, const unsigned char *ivbuf,
421 unsigned char *buf, size_t num_blocks)
423 long cc0, cc1, cc2, cc3;
426 static const uint32_t idx2be[] = {
427 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
430 static const uint32_t ctrinc[] = {
441 * Load subkeys into v0..v14
443 lxvw4x(32, %[cc0], %[sk])
444 addi(%[cc0], %[cc0], 16)
445 lxvw4x(33, %[cc0], %[sk])
446 addi(%[cc0], %[cc0], 16)
447 lxvw4x(34, %[cc0], %[sk])
448 addi(%[cc0], %[cc0], 16)
449 lxvw4x(35, %[cc0], %[sk])
450 addi(%[cc0], %[cc0], 16)
451 lxvw4x(36, %[cc0], %[sk])
452 addi(%[cc0], %[cc0], 16)
453 lxvw4x(37, %[cc0], %[sk])
454 addi(%[cc0], %[cc0], 16)
455 lxvw4x(38, %[cc0], %[sk])
456 addi(%[cc0], %[cc0], 16)
457 lxvw4x(39, %[cc0], %[sk])
458 addi(%[cc0], %[cc0], 16)
459 lxvw4x(40, %[cc0], %[sk])
460 addi(%[cc0], %[cc0], 16)
461 lxvw4x(41, %[cc0], %[sk])
462 addi(%[cc0], %[cc0], 16)
463 lxvw4x(42, %[cc0], %[sk])
464 addi(%[cc0], %[cc0], 16)
465 lxvw4x(43, %[cc0], %[sk])
466 addi(%[cc0], %[cc0], 16)
467 lxvw4x(44, %[cc0], %[sk])
468 addi(%[cc0], %[cc0], 16)
469 lxvw4x(45, %[cc0], %[sk])
470 addi(%[cc0], %[cc0], 16)
471 lxvw4x(46, %[cc0], %[sk])
476 * v15 = constant for byteswapping words
478 lxvw4x(47, 0, %[idx2be])
481 * v28 = increment for IV counter.
483 lxvw4x(60, 0, %[ctrinc])
486 * Load IV into v16..v19
488 lxvw4x(48, %[cc0], %[ivbuf])
489 lxvw4x(49, %[cc1], %[ivbuf])
490 lxvw4x(50, %[cc2], %[ivbuf])
491 lxvw4x(51, %[cc3], %[ivbuf])
493 vperm(16, 16, 16, 15)
494 vperm(17, 17, 17, 15)
495 vperm(18, 18, 18, 15)
496 vperm(19, 19, 19, 15)
502 * Compute next IV into v24..v27
510 * Load next data blocks. We do this early on but we
511 * won't need them until IV encryption is done.
513 lxvw4x(52, %[cc0], %[buf])
514 lxvw4x(53, %[cc1], %[buf])
515 lxvw4x(54, %[cc2], %[buf])
516 lxvw4x(55, %[cc3], %[buf])
519 * Encrypt the current IV.
577 vcipherlast(16, 16, 14)
578 vcipherlast(17, 17, 14)
579 vcipherlast(18, 18, 14)
580 vcipherlast(19, 19, 14)
583 vperm(16, 16, 16, 15)
584 vperm(17, 17, 17, 15)
585 vperm(18, 18, 18, 15)
586 vperm(19, 19, 19, 15)
590 * Load next plaintext word and XOR with encrypted IV.
596 stxvw4x(48, %[cc0], %[buf])
597 stxvw4x(49, %[cc1], %[buf])
598 stxvw4x(50, %[cc2], %[buf])
599 stxvw4x(51, %[cc3], %[buf])
601 addi(%[buf], %[buf], 64)
613 : [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
615 : [sk] "b" (sk), [ivbuf] "b" (ivbuf), [num_blocks] "b" (num_blocks >> 2),
616 [ctrinc] "b" (ctrinc)
618 , [idx2be] "b" (idx2be)
620 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
621 "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
622 "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
627 /* see bearssl_block.h */
629 br_aes_pwr8_ctr_run(const br_aes_pwr8_ctr_keys *ctx,
630 const void *iv, uint32_t cc, void *data, size_t len)
633 unsigned char ivbuf[64];
636 memcpy(ivbuf + 0, iv, 12);
637 memcpy(ivbuf + 16, iv, 12);
638 memcpy(ivbuf + 32, iv, 12);
639 memcpy(ivbuf + 48, iv, 12);
641 br_enc32be(ivbuf + 12, cc + 0);
642 br_enc32be(ivbuf + 28, cc + 1);
643 br_enc32be(ivbuf + 44, cc + 2);
644 br_enc32be(ivbuf + 60, cc + 3);
645 switch (ctx->num_rounds) {
647 ctr_128(ctx->skey.skni, ivbuf, buf,
648 (len >> 4) & ~(size_t)3);
651 ctr_192(ctx->skey.skni, ivbuf, buf,
652 (len >> 4) & ~(size_t)3);
655 ctr_256(ctx->skey.skni, ivbuf, buf,
656 (len >> 4) & ~(size_t)3);
659 cc += (len >> 4) & ~(size_t)3;
660 buf += len & ~(size_t)63;
664 unsigned char tmp[64];
666 memcpy(tmp, buf, len);
667 memset(tmp + len, 0, (sizeof tmp) - len);
668 br_enc32be(ivbuf + 12, cc + 0);
669 br_enc32be(ivbuf + 28, cc + 1);
670 br_enc32be(ivbuf + 44, cc + 2);
671 br_enc32be(ivbuf + 60, cc + 3);
672 switch (ctx->num_rounds) {
674 ctr_128(ctx->skey.skni, ivbuf, tmp, 4);
677 ctr_192(ctx->skey.skni, ivbuf, tmp, 4);
680 ctr_256(ctx->skey.skni, ivbuf, tmp, 4);
683 memcpy(buf, tmp, len);
684 cc += (len + 15) >> 4;
689 /* see bearssl_block.h */
690 const br_block_ctr_class br_aes_pwr8_ctr_vtable = {
691 sizeof(br_aes_pwr8_ctr_keys),
694 (void (*)(const br_block_ctr_class **, const void *, size_t))
695 &br_aes_pwr8_ctr_init,
696 (uint32_t (*)(const br_block_ctr_class *const *,
697 const void *, uint32_t, void *, size_t))
701 /* see bearssl_block.h */
702 const br_block_ctr_class *
703 br_aes_pwr8_ctr_get_vtable(void)
705 return br_aes_pwr8_supported() ? &br_aes_pwr8_ctr_vtable : NULL;
710 /* see bearssl_block.h */
711 const br_block_ctr_class *
712 br_aes_pwr8_ctr_get_vtable(void)