2 * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 #define BR_POWER_ASM_MACROS 1
30 /* see bearssl_block.h */
32 br_aes_pwr8_cbcdec_init(br_aes_pwr8_cbcdec_keys *ctx,
33 const void *key, size_t len)
35 ctx->vtable = &br_aes_pwr8_cbcdec_vtable;
36 ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);
40 cbcdec_128(const unsigned char *sk,
41 const unsigned char *iv, unsigned char *buf, size_t num_blocks)
43 long cc0, cc1, cc2, cc3;
46 static const uint32_t idx2be[] = {
47 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
58 * Load subkeys into v0..v10
60 lxvw4x(32, %[cc0], %[sk])
61 addi(%[cc0], %[cc0], 16)
62 lxvw4x(33, %[cc0], %[sk])
63 addi(%[cc0], %[cc0], 16)
64 lxvw4x(34, %[cc0], %[sk])
65 addi(%[cc0], %[cc0], 16)
66 lxvw4x(35, %[cc0], %[sk])
67 addi(%[cc0], %[cc0], 16)
68 lxvw4x(36, %[cc0], %[sk])
69 addi(%[cc0], %[cc0], 16)
70 lxvw4x(37, %[cc0], %[sk])
71 addi(%[cc0], %[cc0], 16)
72 lxvw4x(38, %[cc0], %[sk])
73 addi(%[cc0], %[cc0], 16)
74 lxvw4x(39, %[cc0], %[sk])
75 addi(%[cc0], %[cc0], 16)
76 lxvw4x(40, %[cc0], %[sk])
77 addi(%[cc0], %[cc0], 16)
78 lxvw4x(41, %[cc0], %[sk])
79 addi(%[cc0], %[cc0], 16)
80 lxvw4x(42, %[cc0], %[sk])
85 * v15 = constant for byteswapping words
87 lxvw4x(47, 0, %[idx2be])
100 * Load next ciphertext words in v16..v19. Also save them
103 lxvw4x(48, %[cc0], %[buf])
104 lxvw4x(49, %[cc1], %[buf])
105 lxvw4x(50, %[cc2], %[buf])
106 lxvw4x(51, %[cc3], %[buf])
108 vperm(16, 16, 16, 15)
109 vperm(17, 17, 17, 15)
110 vperm(18, 18, 18, 15)
111 vperm(19, 19, 19, 15)
119 * Decrypt the blocks.
161 vncipherlast(16, 16, 0)
162 vncipherlast(17, 17, 0)
163 vncipherlast(18, 18, 0)
164 vncipherlast(19, 19, 0)
167 * XOR decrypted blocks with IV / previous block.
175 * Store back result (with byteswap)
178 vperm(16, 16, 16, 15)
179 vperm(17, 17, 17, 15)
180 vperm(18, 18, 18, 15)
181 vperm(19, 19, 19, 15)
183 stxvw4x(48, %[cc0], %[buf])
184 stxvw4x(49, %[cc1], %[buf])
185 stxvw4x(50, %[cc2], %[buf])
186 stxvw4x(51, %[cc3], %[buf])
189 * Fourth encrypted block is IV for next run.
193 addi(%[buf], %[buf], 64)
197 : [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
199 : [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (num_blocks >> 2)
201 , [idx2be] "b" (idx2be)
203 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
204 "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
205 "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
211 cbcdec_192(const unsigned char *sk,
212 const unsigned char *iv, unsigned char *buf, size_t num_blocks)
214 long cc0, cc1, cc2, cc3;
217 static const uint32_t idx2be[] = {
218 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
229 * Load subkeys into v0..v12
231 lxvw4x(32, %[cc0], %[sk])
232 addi(%[cc0], %[cc0], 16)
233 lxvw4x(33, %[cc0], %[sk])
234 addi(%[cc0], %[cc0], 16)
235 lxvw4x(34, %[cc0], %[sk])
236 addi(%[cc0], %[cc0], 16)
237 lxvw4x(35, %[cc0], %[sk])
238 addi(%[cc0], %[cc0], 16)
239 lxvw4x(36, %[cc0], %[sk])
240 addi(%[cc0], %[cc0], 16)
241 lxvw4x(37, %[cc0], %[sk])
242 addi(%[cc0], %[cc0], 16)
243 lxvw4x(38, %[cc0], %[sk])
244 addi(%[cc0], %[cc0], 16)
245 lxvw4x(39, %[cc0], %[sk])
246 addi(%[cc0], %[cc0], 16)
247 lxvw4x(40, %[cc0], %[sk])
248 addi(%[cc0], %[cc0], 16)
249 lxvw4x(41, %[cc0], %[sk])
250 addi(%[cc0], %[cc0], 16)
251 lxvw4x(42, %[cc0], %[sk])
252 addi(%[cc0], %[cc0], 16)
253 lxvw4x(43, %[cc0], %[sk])
254 addi(%[cc0], %[cc0], 16)
255 lxvw4x(44, %[cc0], %[sk])
260 * v15 = constant for byteswapping words
262 lxvw4x(47, 0, %[idx2be])
269 vperm(24, 24, 24, 15)
275 * Load next ciphertext words in v16..v19. Also save them
278 lxvw4x(48, %[cc0], %[buf])
279 lxvw4x(49, %[cc1], %[buf])
280 lxvw4x(50, %[cc2], %[buf])
281 lxvw4x(51, %[cc3], %[buf])
283 vperm(16, 16, 16, 15)
284 vperm(17, 17, 17, 15)
285 vperm(18, 18, 18, 15)
286 vperm(19, 19, 19, 15)
294 * Decrypt the blocks.
344 vncipherlast(16, 16, 0)
345 vncipherlast(17, 17, 0)
346 vncipherlast(18, 18, 0)
347 vncipherlast(19, 19, 0)
350 * XOR decrypted blocks with IV / previous block.
358 * Store back result (with byteswap)
361 vperm(16, 16, 16, 15)
362 vperm(17, 17, 17, 15)
363 vperm(18, 18, 18, 15)
364 vperm(19, 19, 19, 15)
366 stxvw4x(48, %[cc0], %[buf])
367 stxvw4x(49, %[cc1], %[buf])
368 stxvw4x(50, %[cc2], %[buf])
369 stxvw4x(51, %[cc3], %[buf])
372 * Fourth encrypted block is IV for next run.
376 addi(%[buf], %[buf], 64)
380 : [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
382 : [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (num_blocks >> 2)
384 , [idx2be] "b" (idx2be)
386 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
387 "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
388 "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
394 cbcdec_256(const unsigned char *sk,
395 const unsigned char *iv, unsigned char *buf, size_t num_blocks)
397 long cc0, cc1, cc2, cc3;
400 static const uint32_t idx2be[] = {
401 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
412 * Load subkeys into v0..v14
414 lxvw4x(32, %[cc0], %[sk])
415 addi(%[cc0], %[cc0], 16)
416 lxvw4x(33, %[cc0], %[sk])
417 addi(%[cc0], %[cc0], 16)
418 lxvw4x(34, %[cc0], %[sk])
419 addi(%[cc0], %[cc0], 16)
420 lxvw4x(35, %[cc0], %[sk])
421 addi(%[cc0], %[cc0], 16)
422 lxvw4x(36, %[cc0], %[sk])
423 addi(%[cc0], %[cc0], 16)
424 lxvw4x(37, %[cc0], %[sk])
425 addi(%[cc0], %[cc0], 16)
426 lxvw4x(38, %[cc0], %[sk])
427 addi(%[cc0], %[cc0], 16)
428 lxvw4x(39, %[cc0], %[sk])
429 addi(%[cc0], %[cc0], 16)
430 lxvw4x(40, %[cc0], %[sk])
431 addi(%[cc0], %[cc0], 16)
432 lxvw4x(41, %[cc0], %[sk])
433 addi(%[cc0], %[cc0], 16)
434 lxvw4x(42, %[cc0], %[sk])
435 addi(%[cc0], %[cc0], 16)
436 lxvw4x(43, %[cc0], %[sk])
437 addi(%[cc0], %[cc0], 16)
438 lxvw4x(44, %[cc0], %[sk])
439 addi(%[cc0], %[cc0], 16)
440 lxvw4x(45, %[cc0], %[sk])
441 addi(%[cc0], %[cc0], 16)
442 lxvw4x(46, %[cc0], %[sk])
447 * v15 = constant for byteswapping words
449 lxvw4x(47, 0, %[idx2be])
456 vperm(24, 24, 24, 15)
462 * Load next ciphertext words in v16..v19. Also save them
465 lxvw4x(48, %[cc0], %[buf])
466 lxvw4x(49, %[cc1], %[buf])
467 lxvw4x(50, %[cc2], %[buf])
468 lxvw4x(51, %[cc3], %[buf])
470 vperm(16, 16, 16, 15)
471 vperm(17, 17, 17, 15)
472 vperm(18, 18, 18, 15)
473 vperm(19, 19, 19, 15)
481 * Decrypt the blocks.
539 vncipherlast(16, 16, 0)
540 vncipherlast(17, 17, 0)
541 vncipherlast(18, 18, 0)
542 vncipherlast(19, 19, 0)
545 * XOR decrypted blocks with IV / previous block.
553 * Store back result (with byteswap)
556 vperm(16, 16, 16, 15)
557 vperm(17, 17, 17, 15)
558 vperm(18, 18, 18, 15)
559 vperm(19, 19, 19, 15)
561 stxvw4x(48, %[cc0], %[buf])
562 stxvw4x(49, %[cc1], %[buf])
563 stxvw4x(50, %[cc2], %[buf])
564 stxvw4x(51, %[cc3], %[buf])
567 * Fourth encrypted block is IV for next run.
571 addi(%[buf], %[buf], 64)
575 : [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
577 : [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (num_blocks >> 2)
579 , [idx2be] "b" (idx2be)
581 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
582 "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
583 "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
588 /* see bearssl_block.h */
590 br_aes_pwr8_cbcdec_run(const br_aes_pwr8_cbcdec_keys *ctx,
591 void *iv, void *data, size_t len)
593 unsigned char nextiv[16];
600 memcpy(nextiv, buf + len - 16, 16);
603 unsigned char tmp[16];
605 num_blocks = (len >> 4) & ~(size_t)3;
606 memcpy(tmp, buf + (num_blocks << 4) - 16, 16);
607 switch (ctx->num_rounds) {
609 cbcdec_128(ctx->skey.skni, iv, buf, num_blocks);
612 cbcdec_192(ctx->skey.skni, iv, buf, num_blocks);
615 cbcdec_256(ctx->skey.skni, iv, buf, num_blocks);
618 buf += num_blocks << 4;
623 unsigned char tmp[64];
625 memcpy(tmp, buf, len);
626 memset(tmp + len, 0, (sizeof tmp) - len);
627 switch (ctx->num_rounds) {
629 cbcdec_128(ctx->skey.skni, iv, tmp, 4);
632 cbcdec_192(ctx->skey.skni, iv, tmp, 4);
635 cbcdec_256(ctx->skey.skni, iv, tmp, 4);
638 memcpy(buf, tmp, len);
640 memcpy(iv, nextiv, 16);
643 /* see bearssl_block.h */
644 const br_block_cbcdec_class br_aes_pwr8_cbcdec_vtable = {
645 sizeof(br_aes_pwr8_cbcdec_keys),
648 (void (*)(const br_block_cbcdec_class **, const void *, size_t))
649 &br_aes_pwr8_cbcdec_init,
650 (void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
651 &br_aes_pwr8_cbcdec_run
654 /* see bearssl_block.h */
655 const br_block_cbcdec_class *
656 br_aes_pwr8_cbcdec_get_vtable(void)
658 return br_aes_pwr8_supported() ? &br_aes_pwr8_cbcdec_vtable : NULL;
663 /* see bearssl_block.h */
664 const br_block_cbcdec_class *
665 br_aes_pwr8_cbcdec_get_vtable(void)