2 /* Do not modify. This file is auto-generated from keccak1600-armv4.pl. */
7 #if defined(__thumb2__)
14 .type iotas32, %object
17 .long 0x00000001, 0x00000000
18 .long 0x00000000, 0x00000089
19 .long 0x00000000, 0x8000008b
20 .long 0x00000000, 0x80008080
21 .long 0x00000001, 0x0000008b
22 .long 0x00000001, 0x00008000
23 .long 0x00000001, 0x80008088
24 .long 0x00000001, 0x80000082
25 .long 0x00000000, 0x0000000b
26 .long 0x00000000, 0x0000000a
27 .long 0x00000001, 0x00008082
28 .long 0x00000000, 0x00008003
29 .long 0x00000001, 0x0000808b
30 .long 0x00000001, 0x8000000b
31 .long 0x00000001, 0x8000008a
32 .long 0x00000001, 0x80000081
33 .long 0x00000000, 0x80000081
34 .long 0x00000000, 0x80000008
35 .long 0x00000000, 0x00000083
36 .long 0x00000000, 0x80008003
37 .long 0x00000001, 0x80008088
38 .long 0x00000000, 0x80000088
39 .long 0x00000001, 0x00008000
40 .long 0x00000000, 0x80008082
41 .size iotas32,.-iotas32
43 .type KeccakF1600_int, %function
49 ldmia r9,{r4,r5,r6,r7,r8,r9} @ A[4][2..4]
58 ldmia r12,{r0,r1,r2,r3} @ A[0][0..1]
59 ldmia r10,{r10,r11,r12,r14} @ A[1][0..1]
82 ldrd r10,r11,[sp,#104]
84 ldrd r12,r14,[sp,#112]
88 ldrd r10,r11,[sp,#120]
90 ldrd r12,r14,[sp,#128]
94 ldrd r10,r11,[sp,#136]
96 ldrd r12,r14,[sp,#144]
100 ldrd r10,r11,[sp,#152]
102 ldrd r12,r14,[sp,#160]
106 ldrd r10,r11,[sp,#168]
108 ldrd r12,r14,[sp,#16]
112 ldrd r10,r11,[sp,#24]
114 ldrd r12,r14,[sp,#32]
121 ldmia r10,{r10,r11,r12,r14} @ A[1][2..3]
127 ldmia r10,{r10,r11,r12,r14} @ A[1][4]..A[2][0]
133 ldmia r10,{r10,r11,r12,r14} @ A[2][1..2]
139 ldmia r10,{r10,r11,r12,r14} @ A[2][3..4]
145 ldmia r10,{r10,r11,r12,r14} @ A[3][0..1]
151 ldmia r10,{r10,r11,r12,r14} @ A[3][2..3]
157 ldmia r10,{r10,r11,r12,r14} @ A[3][4]..A[4][0]
159 ldr r10,[sp,#168] @ A[4][1]
163 ldr r12,[sp,#16] @ A[0][2]
171 ldmia r10,{r10,r11,r12,r14} @ A[0][3..4]
178 eor r10,r0,r5,ror#32-1 @ E[0] = ROL64(C[2], 1) ^ C[0];
180 str r10,[sp,#208] @ D[1] = E[0]
186 strd r10,r11,[sp,#208] @ D[1] = E[0]
188 eor r12,r6,r1,ror#32-1 @ E[1] = ROL64(C[0], 1) ^ C[3];
191 str r12,[sp,#232] @ D[4] = E[1]
193 eor r0,r8,r3,ror#32-1 @ C[0] = ROL64(C[1], 1) ^ C[4];
197 strd r12,r14,[sp,#232] @ D[4] = E[1]
201 str r0,[sp,#200] @ D[0] = C[0]
203 eor r2,r2,r7,ror#32-1 @ C[1] = ROL64(C[3], 1) ^ C[1];
211 strd r0,r1,[sp,#200] @ D[0] = C[0]
219 str r2,[sp,#216] @ D[2] = C[1]
221 eor r4,r4,r9,ror#32-1 @ C[2] = ROL64(C[4], 1) ^ C[2];
225 strd r2,r3,[sp,#216] @ D[2] = C[1]
238 str r4,[sp,#224] @ D[3] = C[2]
244 strd r4,r5,[sp,#224] @ D[3] = C[2]
250 @ mov r7,r7,ror#32-10 @ C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]); /* D[3] */
251 @ mov r6,r6,ror#32-11
266 ldrd r12,r14,[sp,#96]
268 @ mov r8,r8,ror#32-7 @ C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]); /* D[4] */
270 eor r1,r1,r5 @ C[0] = A[0][0] ^ C[0];
281 mov r5,r12,ror#32-21 @ C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]);
282 ldr r12,[sp,#444] @ load counter
288 ldmia r14,{r10,r11} @ iotas[i]
289 bic r12,r4,r2,ror#32-22
290 bic r14,r5,r3,ror#32-22
291 mov r2,r2,ror#32-22 @ C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]);
298 str r10,[sp,#240] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
304 strd r10,r11,[sp,#240] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
307 bic r10,r8,r6,ror#32-(11-7)
308 bic r11,r9,r7,ror#32-(10-7)
309 eor r12,r2,r12,ror#32-11
311 str r12,[sp,#248] @ R[0][1] = C[1] ^ (~C[2] & C[3]);
313 eor r14,r3,r14,ror#32-10
317 strd r12,r14,[sp,#248] @ R[0][1] = C[1] ^ (~C[2] & C[3]);
319 eor r10,r4,r10,ror#32-7
320 eor r11,r5,r11,ror#32-7
322 str r10,[sp,#256] @ R[0][2] = C[2] ^ (~C[3] & C[4]);
324 bic r12,r0,r8,ror#32-7
328 strd r10,r11,[sp,#256] @ R[0][2] = C[2] ^ (~C[3] & C[4]);
330 bic r14,r1,r9,ror#32-7
331 eor r12,r12,r6,ror#32-11
333 str r12,[sp,#264] @ R[0][3] = C[3] ^ (~C[4] & C[0]);
335 eor r14,r14,r7,ror#32-10
339 strd r12,r14,[sp,#264] @ R[0][3] = C[3] ^ (~C[4] & C[0]);
344 ldr r0,[sp,#24] @ A[0][3]
350 ldrd r0,r1,[sp,#24] @ A[0][3]
352 eor r10,r10,r8,ror#32-7
353 eor r11,r11,r9,ror#32-7
355 str r10,[sp,#272] @ R[0][4] = C[4] ^ (~C[0] & C[1]);
361 strd r10,r11,[sp,#272] @ R[0][4] = C[4] ^ (~C[0] & C[1]);
364 ldmia r14,{r10,r11,r12,r14} @ D[3..4]
365 ldmia r9,{r6,r7,r8,r9} @ D[0..1]
368 ldr r2,[sp,#72] @ A[1][4]
374 ldrd r2,r3,[sp,#72] @ A[1][4]
377 @ mov r0,r0,ror#32-14 @ C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]);
379 ldr r10,[sp,#128] @ A[3][1]
381 @ mov r1,r1,ror#32-14
385 ldrd r10,r11,[sp,#128] @ A[3][1]
390 ldr r4,[sp,#80] @ A[2][0]
396 ldrd r4,r5,[sp,#80] @ A[2][0]
398 @ mov r2,r2,ror#32-10 @ C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
399 @ mov r3,r3,ror#32-10
403 ldr r12,[sp,#216] @ D[2]
409 ldrd r12,r14,[sp,#216] @ D[2]
411 mov r5,r6,ror#32-1 @ C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]);
416 ldr r8,[sp,#176] @ A[4][2]
422 ldrd r8,r9,[sp,#176] @ A[4][2]
424 mov r7,r10,ror#32-22 @ C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]);
427 bic r10,r4,r2,ror#32-10
428 bic r11,r5,r3,ror#32-10
431 mov r9,r12,ror#32-30 @ C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]);
433 eor r10,r10,r0,ror#32-14
434 eor r11,r11,r1,ror#32-14
436 str r10,[sp,#280] @ R[1][0] = C[0] ^ (~C[1] & C[2])
442 strd r10,r11,[sp,#280] @ R[1][0] = C[0] ^ (~C[1] & C[2])
445 eor r12,r12,r2,ror#32-10
447 str r12,[sp,#288] @ R[1][1] = C[1] ^ (~C[2] & C[3]);
449 eor r14,r14,r3,ror#32-10
453 strd r12,r14,[sp,#288] @ R[1][1] = C[1] ^ (~C[2] & C[3]);
462 str r10,[sp,#296] @ R[1][2] = C[2] ^ (~C[3] & C[4]);
464 bic r2,r2,r0,ror#32-(14-10)
468 strd r10,r11,[sp,#296] @ R[1][2] = C[2] ^ (~C[3] & C[4]);
470 eor r12,r6,r12,ror#32-14
471 bic r11,r3,r1,ror#32-(14-10)
473 str r12,[sp,#304] @ R[1][3] = C[3] ^ (~C[4] & C[0]);
475 eor r14,r7,r14,ror#32-14
479 strd r12,r14,[sp,#304] @ R[1][3] = C[3] ^ (~C[4] & C[0]);
483 ldr r1,[sp,#8] @ A[0][1]
485 eor r10,r8,r2,ror#32-10
489 ldrd r1,r0,[sp,#8] @ A[0][1]
491 eor r11,r9,r11,ror#32-10
493 str r10,[sp,#312] @ R[1][4] = C[4] ^ (~C[0] & C[1]);
498 strd r10,r11,[sp,#312] @ R[1][4] = C[4] ^ (~C[0] & C[1]);
502 ldmia r12,{r10,r11,r12,r14} @ D[1..2]
504 ldr r2,[sp,#56] @ A[1][2]
509 ldrd r2,r3,[sp,#56] @ A[1][2]
511 ldmia r9,{r6,r7,r8,r9} @ D[3..4]
515 ldr r4,[sp,#104] @ A[2][3]
521 ldrd r4,r5,[sp,#104] @ A[2][3]
523 mov r0,r0,ror#32-1 @ C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]);
527 ldr r10,[sp,#152] @ A[3][4]
533 ldrd r10,r11,[sp,#152] @ A[3][4]
535 @ mov r2,r2,ror#32-3 @ C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]);
537 ldr r12,[sp,#200] @ D[0]
543 ldrd r12,r14,[sp,#200] @ D[0]
548 @ mov r5,r6,ror#32-12 @ C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
549 @ mov r4,r7,ror#32-13 @ [track reverse order below]
553 ldr r8,[sp,#160] @ A[4][0]
559 ldrd r8,r9,[sp,#160] @ A[4][0]
561 mov r6,r10,ror#32-4 @ C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
566 mov r8,r12,ror#32-9 @ C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
569 bic r10,r5,r2,ror#13-3
570 bic r11,r4,r3,ror#12-3
571 bic r12,r6,r5,ror#32-13
572 bic r14,r7,r4,ror#32-12
573 eor r10,r0,r10,ror#32-13
574 eor r11,r1,r11,ror#32-12
576 str r10,[sp,#320] @ R[2][0] = C[0] ^ (~C[1] & C[2])
578 eor r12,r12,r2,ror#32-3
582 strd r10,r11,[sp,#320] @ R[2][0] = C[0] ^ (~C[1] & C[2])
584 eor r14,r14,r3,ror#32-3
586 str r12,[sp,#328] @ R[2][1] = C[1] ^ (~C[2] & C[3]);
593 strd r12,r14,[sp,#328] @ R[2][1] = C[1] ^ (~C[2] & C[3]);
595 eor r10,r10,r5,ror#32-13
596 eor r11,r11,r4,ror#32-12
598 str r10,[sp,#336] @ R[2][2] = C[2] ^ (~C[3] & C[4]);
604 strd r10,r11,[sp,#336] @ R[2][2] = C[2] ^ (~C[3] & C[4]);
610 str r12,[sp,#344] @ R[2][3] = C[3] ^ (~C[4] & C[0]);
616 strd r12,r14,[sp,#344] @ R[2][3] = C[3] ^ (~C[4] & C[0]);
620 ldr r1,[sp,#32] @ A[0][4] [in reverse order]
622 eor r10,r8,r10,ror#32-3
626 ldrd r1,r0,[sp,#32] @ A[0][4] [in reverse order]
628 eor r11,r9,r11,ror#32-3
630 str r10,[sp,#352] @ R[2][4] = C[4] ^ (~C[0] & C[1]);
636 strd r10,r11,[sp,#352] @ R[2][4] = C[4] ^ (~C[0] & C[1]);
640 ldr r10,[sp,#232] @ D[4]
645 ldrd r10,r11,[sp,#232] @ D[4]
648 ldr r12,[sp,#200] @ D[0]
653 ldrd r12,r14,[sp,#200] @ D[0]
656 ldmia r9,{r6,r7,r8,r9} @ D[1..2]
660 ldr r2,[sp,#40] @ A[1][0]
666 ldrd r2,r3,[sp,#40] @ A[1][0]
668 @ mov r1,r10,ror#32-13 @ C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]);
670 ldr r4,[sp,#88] @ A[2][1]
672 @ mov r0,r11,ror#32-14 @ [was loaded in reverse order]
676 ldrd r4,r5,[sp,#88] @ A[2][1]
681 ldr r10,[sp,#136] @ A[3][2]
687 ldrd r10,r11,[sp,#136] @ A[3][2]
689 @ mov r2,r2,ror#32-18 @ C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]);
691 ldr r12,[sp,#224] @ D[3]
693 @ mov r3,r3,ror#32-18
697 ldrd r12,r14,[sp,#224] @ D[3]
702 mov r4,r6,ror#32-5 @ C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]);
707 ldr r8,[sp,#184] @ A[4][3]
713 ldrd r8,r9,[sp,#184] @ A[4][3]
715 mov r7,r10,ror#32-7 @ C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
720 mov r8,r12,ror#32-28 @ C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
723 bic r10,r4,r2,ror#32-18
724 bic r11,r5,r3,ror#32-18
725 eor r10,r10,r0,ror#32-14
726 eor r11,r11,r1,ror#32-13
728 str r10,[sp,#360] @ R[3][0] = C[0] ^ (~C[1] & C[2])
734 strd r10,r11,[sp,#360] @ R[3][0] = C[0] ^ (~C[1] & C[2])
737 eor r12,r12,r2,ror#32-18
739 str r12,[sp,#368] @ R[3][1] = C[1] ^ (~C[2] & C[3]);
741 eor r14,r14,r3,ror#32-18
745 strd r12,r14,[sp,#368] @ R[3][1] = C[1] ^ (~C[2] & C[3]);
754 str r10,[sp,#376] @ R[3][2] = C[2] ^ (~C[3] & C[4]);
756 bic r2,r2,r0,ror#18-14
760 strd r10,r11,[sp,#376] @ R[3][2] = C[2] ^ (~C[3] & C[4]);
762 eor r12,r6,r12,ror#32-14
763 bic r11,r3,r1,ror#18-13
764 eor r14,r7,r14,ror#32-13
766 str r12,[sp,#384] @ R[3][3] = C[3] ^ (~C[4] & C[0]);
771 strd r12,r14,[sp,#384] @ R[3][3] = C[3] ^ (~C[4] & C[0]);
775 ldr r0,[sp,#16] @ A[0][2]
777 eor r10,r8,r2,ror#32-18
781 ldrd r0,r1,[sp,#16] @ A[0][2]
783 eor r11,r9,r11,ror#32-18
785 str r10,[sp,#392] @ R[3][4] = C[4] ^ (~C[0] & C[1]);
790 strd r10,r11,[sp,#392] @ R[3][4] = C[4] ^ (~C[0] & C[1]);
793 ldmia r14,{r10,r11,r12,r14} @ D[2..3]
795 ldr r2,[sp,#64] @ A[1][3]
800 ldrd r2,r3,[sp,#64] @ A[1][3]
803 ldr r6,[sp,#232] @ D[4]
808 ldrd r6,r7,[sp,#232] @ D[4]
813 ldr r4,[sp,#112] @ A[2][4]
819 ldrd r4,r5,[sp,#112] @ A[2][4]
821 @ mov r0,r0,ror#32-31 @ C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]);
823 ldr r8,[sp,#200] @ D[0]
825 @ mov r1,r1,ror#32-31
829 ldrd r8,r9,[sp,#200] @ D[0]
834 ldr r10,[sp,#120] @ A[3][0]
840 ldrd r10,r11,[sp,#120] @ A[3][0]
842 mov r3,r12,ror#32-27 @ C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]);
844 ldr r12,[sp,#208] @ D[1]
850 ldrd r12,r14,[sp,#208] @ D[1]
855 mov r5,r6,ror#32-19 @ C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]);
860 ldr r8,[sp,#168] @ A[4][1]
866 ldrd r8,r9,[sp,#168] @ A[4][1]
868 mov r7,r10,ror#32-20 @ C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]);
873 @ mov r8,r2,ror#32-1 @ C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
878 eor r10,r10,r0,ror#32-31
880 str r10,[sp,#400] @ R[4][0] = C[0] ^ (~C[1] & C[2])
882 eor r11,r11,r1,ror#32-31
886 strd r10,r11,[sp,#400] @ R[4][0] = C[0] ^ (~C[1] & C[2])
893 str r12,[sp,#408] @ R[4][1] = C[1] ^ (~C[2] & C[3]);
899 strd r12,r14,[sp,#408] @ R[4][1] = C[1] ^ (~C[2] & C[3]);
902 bic r12,r0,r8,ror#31-1
903 bic r14,r1,r9,ror#31-1
904 eor r4,r4,r10,ror#32-1
906 str r4,[sp,#416] @ R[4][2] = C[2] ^= (~C[3] & C[4]);
908 eor r5,r5,r11,ror#32-1
912 strd r4,r5,[sp,#416] @ R[4][2] = C[2] ^= (~C[3] & C[4]);
914 eor r6,r6,r12,ror#32-31
915 eor r7,r7,r14,ror#32-31
917 str r6,[sp,#424] @ R[4][3] = C[3] ^= (~C[4] & C[0]);
919 bic r10,r2,r0,ror#32-31
923 strd r6,r7,[sp,#424] @ R[4][3] = C[3] ^= (~C[4] & C[0]);
925 bic r11,r3,r1,ror#32-31
927 eor r8,r10,r8,ror#32-1
929 eor r9,r11,r9,ror#32-1
931 str r8,[sp,#432] @ R[4][4] = C[4] ^= (~C[0] & C[1]);
936 strd r8,r9,[sp,#432] @ R[4][4] = C[4] ^= (~C[0] & C[1]);
938 ldmia r12,{r0,r1,r2,r3} @ A[0][0..1]
939 ldmia r10,{r10,r11,r12,r14} @ A[1][0..1]
944 ldrd r10,r11,[sp,#296]
946 ldrd r12,r14,[sp,#304]
950 ldrd r10,r11,[sp,#312]
952 ldrd r12,r14,[sp,#320]
956 ldrd r10,r11,[sp,#328]
958 ldrd r12,r14,[sp,#336]
962 ldrd r10,r11,[sp,#344]
964 ldrd r12,r14,[sp,#352]
968 ldrd r10,r11,[sp,#360]
970 ldrd r12,r14,[sp,#368]
974 ldrd r10,r11,[sp,#376]
976 ldrd r12,r14,[sp,#384]
980 ldrd r10,r11,[sp,#392]
982 ldrd r12,r14,[sp,#400]
986 ldrd r10,r11,[sp,#408]
988 ldrd r12,r14,[sp,#256]
992 ldrd r10,r11,[sp,#264]
994 ldrd r12,r14,[sp,#272]
1001 ldmia r10,{r10,r11,r12,r14} @ A[1][2..3]
1007 ldmia r10,{r10,r11,r12,r14} @ A[1][4]..A[2][0]
1013 ldmia r10,{r10,r11,r12,r14} @ A[2][1..2]
1019 ldmia r10,{r10,r11,r12,r14} @ A[2][3..4]
1025 ldmia r10,{r10,r11,r12,r14} @ A[3][0..1]
1031 ldmia r10,{r10,r11,r12,r14} @ A[3][2..3]
1037 ldmia r10,{r10,r11,r12,r14} @ A[3][4]..A[4][0]
1039 ldr r10,[sp,#408] @ A[4][1]
1043 ldr r12,[sp,#256] @ A[0][2]
1051 ldmia r10,{r10,r11,r12,r14} @ A[0][3..4]
1058 eor r10,r0,r5,ror#32-1 @ E[0] = ROL64(C[2], 1) ^ C[0];
1060 str r10,[sp,#208] @ D[1] = E[0]
1066 strd r10,r11,[sp,#208] @ D[1] = E[0]
1068 eor r12,r6,r1,ror#32-1 @ E[1] = ROL64(C[0], 1) ^ C[3];
1071 str r12,[sp,#232] @ D[4] = E[1]
1073 eor r0,r8,r3,ror#32-1 @ C[0] = ROL64(C[1], 1) ^ C[4];
1077 strd r12,r14,[sp,#232] @ D[4] = E[1]
1081 str r0,[sp,#200] @ D[0] = C[0]
1083 eor r2,r2,r7,ror#32-1 @ C[1] = ROL64(C[3], 1) ^ C[1];
1091 strd r0,r1,[sp,#200] @ D[0] = C[0]
1096 ldrd r7,r6,[sp,#384]
1099 str r2,[sp,#216] @ D[2] = C[1]
1101 eor r4,r4,r9,ror#32-1 @ C[2] = ROL64(C[4], 1) ^ C[2];
1105 strd r2,r3,[sp,#216] @ D[2] = C[1]
1115 ldrd r8,r9,[sp,#432]
1118 str r4,[sp,#224] @ D[3] = C[2]
1124 strd r4,r5,[sp,#224] @ D[3] = C[2]
1130 @ mov r7,r7,ror#32-10 @ C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]); /* D[3] */
1131 @ mov r6,r6,ror#32-11
1135 ldrd r4,r5,[sp,#240]
1146 ldrd r12,r14,[sp,#336]
1148 @ mov r8,r8,ror#32-7 @ C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]); /* D[4] */
1149 @ mov r9,r9,ror#32-7
1150 eor r1,r1,r5 @ C[0] = A[0][0] ^ C[0];
1159 ldrd r2,r3,[sp,#288]
1161 mov r5,r12,ror#32-21 @ C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]);
1162 ldr r12,[sp,#444] @ load counter
1165 mov r4,r14,ror#32-22
1169 ldr r10,[r14,#8] @ iotas[i].lo
1173 ldr r11,[r14,#12] @ iotas[i].hi
1175 ldrd r10,r11,[r14,#8] @ iotas[i].lo
1178 str r12,[sp,#444] @ store counter
1179 bic r12,r4,r2,ror#32-22
1180 bic r14,r5,r3,ror#32-22
1181 mov r2,r2,ror#32-22 @ C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]);
1188 str r10,[sp,#0] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
1190 bic r12,r6,r4,ror#11
1194 strd r10,r11,[sp,#0] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
1196 bic r14,r7,r5,ror#10
1197 bic r10,r8,r6,ror#32-(11-7)
1198 bic r11,r9,r7,ror#32-(10-7)
1199 eor r12,r2,r12,ror#32-11
1201 str r12,[sp,#8] @ R[0][1] = C[1] ^ (~C[2] & C[3]);
1203 eor r14,r3,r14,ror#32-10
1207 strd r12,r14,[sp,#8] @ R[0][1] = C[1] ^ (~C[2] & C[3]);
1209 eor r10,r4,r10,ror#32-7
1210 eor r11,r5,r11,ror#32-7
1212 str r10,[sp,#16] @ R[0][2] = C[2] ^ (~C[3] & C[4]);
1214 bic r12,r0,r8,ror#32-7
1218 strd r10,r11,[sp,#16] @ R[0][2] = C[2] ^ (~C[3] & C[4]);
1220 bic r14,r1,r9,ror#32-7
1221 eor r12,r12,r6,ror#32-11
1223 str r12,[sp,#24] @ R[0][3] = C[3] ^ (~C[4] & C[0]);
1225 eor r14,r14,r7,ror#32-10
1229 strd r12,r14,[sp,#24] @ R[0][3] = C[3] ^ (~C[4] & C[0]);
1234 ldr r0,[sp,#264] @ A[0][3]
1240 ldrd r0,r1,[sp,#264] @ A[0][3]
1242 eor r10,r10,r8,ror#32-7
1243 eor r11,r11,r9,ror#32-7
1245 str r10,[sp,#32] @ R[0][4] = C[4] ^ (~C[0] & C[1]);
1251 strd r10,r11,[sp,#32] @ R[0][4] = C[4] ^ (~C[0] & C[1]);
1254 ldmia r14,{r10,r11,r12,r14} @ D[3..4]
1255 ldmia r9,{r6,r7,r8,r9} @ D[0..1]
1258 ldr r2,[sp,#312] @ A[1][4]
1264 ldrd r2,r3,[sp,#312] @ A[1][4]
1267 @ mov r0,r0,ror#32-14 @ C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]);
1269 ldr r10,[sp,#368] @ A[3][1]
1271 @ mov r1,r1,ror#32-14
1275 ldrd r10,r11,[sp,#368] @ A[3][1]
1280 ldr r4,[sp,#320] @ A[2][0]
1286 ldrd r4,r5,[sp,#320] @ A[2][0]
1288 @ mov r2,r2,ror#32-10 @ C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
1289 @ mov r3,r3,ror#32-10
1293 ldr r12,[sp,#216] @ D[2]
1299 ldrd r12,r14,[sp,#216] @ D[2]
1301 mov r5,r6,ror#32-1 @ C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]);
1306 ldr r8,[sp,#416] @ A[4][2]
1312 ldrd r8,r9,[sp,#416] @ A[4][2]
1314 mov r7,r10,ror#32-22 @ C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]);
1315 mov r6,r11,ror#32-23
1317 bic r10,r4,r2,ror#32-10
1318 bic r11,r5,r3,ror#32-10
1321 mov r9,r12,ror#32-30 @ C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]);
1322 mov r8,r14,ror#32-31
1323 eor r10,r10,r0,ror#32-14
1324 eor r11,r11,r1,ror#32-14
1326 str r10,[sp,#40] @ R[1][0] = C[0] ^ (~C[1] & C[2])
1332 strd r10,r11,[sp,#40] @ R[1][0] = C[0] ^ (~C[1] & C[2])
1335 eor r12,r12,r2,ror#32-10
1337 str r12,[sp,#48] @ R[1][1] = C[1] ^ (~C[2] & C[3]);
1339 eor r14,r14,r3,ror#32-10
1343 strd r12,r14,[sp,#48] @ R[1][1] = C[1] ^ (~C[2] & C[3]);
1347 bic r12,r0,r8,ror#14
1348 bic r14,r1,r9,ror#14
1352 str r10,[sp,#56] @ R[1][2] = C[2] ^ (~C[3] & C[4]);
1354 bic r2,r2,r0,ror#32-(14-10)
1358 strd r10,r11,[sp,#56] @ R[1][2] = C[2] ^ (~C[3] & C[4]);
1360 eor r12,r6,r12,ror#32-14
1361 bic r11,r3,r1,ror#32-(14-10)
1363 str r12,[sp,#64] @ R[1][3] = C[3] ^ (~C[4] & C[0]);
1365 eor r14,r7,r14,ror#32-14
1369 strd r12,r14,[sp,#64] @ R[1][3] = C[3] ^ (~C[4] & C[0]);
1373 ldr r1,[sp,#248] @ A[0][1]
1375 eor r10,r8,r2,ror#32-10
1379 ldrd r1,r0,[sp,#248] @ A[0][1]
1381 eor r11,r9,r11,ror#32-10
1383 str r10,[sp,#72] @ R[1][4] = C[4] ^ (~C[0] & C[1]);
1388 strd r10,r11,[sp,#72] @ R[1][4] = C[4] ^ (~C[0] & C[1]);
1392 ldmia r12,{r10,r11,r12,r14} @ D[1..2]
1394 ldr r2,[sp,#296] @ A[1][2]
1399 ldrd r2,r3,[sp,#296] @ A[1][2]
1401 ldmia r9,{r6,r7,r8,r9} @ D[3..4]
1405 ldr r4,[sp,#344] @ A[2][3]
1411 ldrd r4,r5,[sp,#344] @ A[2][3]
1413 mov r0,r0,ror#32-1 @ C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]);
1417 ldr r10,[sp,#392] @ A[3][4]
1423 ldrd r10,r11,[sp,#392] @ A[3][4]
1425 @ mov r2,r2,ror#32-3 @ C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]);
1427 ldr r12,[sp,#200] @ D[0]
1429 @ mov r3,r3,ror#32-3
1433 ldrd r12,r14,[sp,#200] @ D[0]
1438 @ mov r5,r6,ror#32-12 @ C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
1439 @ mov r4,r7,ror#32-13 @ [track reverse order below]
1443 ldr r8,[sp,#400] @ A[4][0]
1449 ldrd r8,r9,[sp,#400] @ A[4][0]
1451 mov r6,r10,ror#32-4 @ C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
1456 mov r8,r12,ror#32-9 @ C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
1459 bic r10,r5,r2,ror#13-3
1460 bic r11,r4,r3,ror#12-3
1461 bic r12,r6,r5,ror#32-13
1462 bic r14,r7,r4,ror#32-12
1463 eor r10,r0,r10,ror#32-13
1464 eor r11,r1,r11,ror#32-12
1466 str r10,[sp,#80] @ R[2][0] = C[0] ^ (~C[1] & C[2])
1468 eor r12,r12,r2,ror#32-3
1472 strd r10,r11,[sp,#80] @ R[2][0] = C[0] ^ (~C[1] & C[2])
1474 eor r14,r14,r3,ror#32-3
1476 str r12,[sp,#88] @ R[2][1] = C[1] ^ (~C[2] & C[3]);
1483 strd r12,r14,[sp,#88] @ R[2][1] = C[1] ^ (~C[2] & C[3]);
1485 eor r10,r10,r5,ror#32-13
1486 eor r11,r11,r4,ror#32-12
1488 str r10,[sp,#96] @ R[2][2] = C[2] ^ (~C[3] & C[4]);
1494 strd r10,r11,[sp,#96] @ R[2][2] = C[2] ^ (~C[3] & C[4]);
1500 str r12,[sp,#104] @ R[2][3] = C[3] ^ (~C[4] & C[0]);
1506 strd r12,r14,[sp,#104] @ R[2][3] = C[3] ^ (~C[4] & C[0]);
1510 ldr r1,[sp,#272] @ A[0][4] [in reverse order]
1512 eor r10,r8,r10,ror#32-3
1516 ldrd r1,r0,[sp,#272] @ A[0][4] [in reverse order]
1518 eor r11,r9,r11,ror#32-3
1520 str r10,[sp,#112] @ R[2][4] = C[4] ^ (~C[0] & C[1]);
1526 strd r10,r11,[sp,#112] @ R[2][4] = C[4] ^ (~C[0] & C[1]);
1530 ldr r10,[sp,#232] @ D[4]
1535 ldrd r10,r11,[sp,#232] @ D[4]
1538 ldr r12,[sp,#200] @ D[0]
1543 ldrd r12,r14,[sp,#200] @ D[0]
1546 ldmia r9,{r6,r7,r8,r9} @ D[1..2]
1550 ldr r2,[sp,#280] @ A[1][0]
1556 ldrd r2,r3,[sp,#280] @ A[1][0]
1558 @ mov r1,r10,ror#32-13 @ C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]);
1560 ldr r4,[sp,#328] @ A[2][1]
1562 @ mov r0,r11,ror#32-14 @ [was loaded in reverse order]
1566 ldrd r4,r5,[sp,#328] @ A[2][1]
1571 ldr r10,[sp,#376] @ A[3][2]
1577 ldrd r10,r11,[sp,#376] @ A[3][2]
1579 @ mov r2,r2,ror#32-18 @ C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]);
1581 ldr r12,[sp,#224] @ D[3]
1583 @ mov r3,r3,ror#32-18
1587 ldrd r12,r14,[sp,#224] @ D[3]
1592 mov r4,r6,ror#32-5 @ C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]);
1597 ldr r8,[sp,#424] @ A[4][3]
1603 ldrd r8,r9,[sp,#424] @ A[4][3]
1605 mov r7,r10,ror#32-7 @ C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
1610 mov r8,r12,ror#32-28 @ C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
1611 mov r9,r14,ror#32-28
1613 bic r10,r4,r2,ror#32-18
1614 bic r11,r5,r3,ror#32-18
1615 eor r10,r10,r0,ror#32-14
1616 eor r11,r11,r1,ror#32-13
1618 str r10,[sp,#120] @ R[3][0] = C[0] ^ (~C[1] & C[2])
1624 strd r10,r11,[sp,#120] @ R[3][0] = C[0] ^ (~C[1] & C[2])
1627 eor r12,r12,r2,ror#32-18
1629 str r12,[sp,#128] @ R[3][1] = C[1] ^ (~C[2] & C[3]);
1631 eor r14,r14,r3,ror#32-18
1635 strd r12,r14,[sp,#128] @ R[3][1] = C[1] ^ (~C[2] & C[3]);
1639 bic r12,r0,r8,ror#14
1640 bic r14,r1,r9,ror#13
1644 str r10,[sp,#136] @ R[3][2] = C[2] ^ (~C[3] & C[4]);
1646 bic r2,r2,r0,ror#18-14
1650 strd r10,r11,[sp,#136] @ R[3][2] = C[2] ^ (~C[3] & C[4]);
1652 eor r12,r6,r12,ror#32-14
1653 bic r11,r3,r1,ror#18-13
1654 eor r14,r7,r14,ror#32-13
1656 str r12,[sp,#144] @ R[3][3] = C[3] ^ (~C[4] & C[0]);
1661 strd r12,r14,[sp,#144] @ R[3][3] = C[3] ^ (~C[4] & C[0]);
1665 ldr r0,[sp,#256] @ A[0][2]
1667 eor r10,r8,r2,ror#32-18
1671 ldrd r0,r1,[sp,#256] @ A[0][2]
1673 eor r11,r9,r11,ror#32-18
1675 str r10,[sp,#152] @ R[3][4] = C[4] ^ (~C[0] & C[1]);
1680 strd r10,r11,[sp,#152] @ R[3][4] = C[4] ^ (~C[0] & C[1]);
1683 ldmia r14,{r10,r11,r12,r14} @ D[2..3]
1685 ldr r2,[sp,#304] @ A[1][3]
1690 ldrd r2,r3,[sp,#304] @ A[1][3]
1693 ldr r6,[sp,#232] @ D[4]
1698 ldrd r6,r7,[sp,#232] @ D[4]
1703 ldr r4,[sp,#352] @ A[2][4]
1709 ldrd r4,r5,[sp,#352] @ A[2][4]
1711 @ mov r0,r0,ror#32-31 @ C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]);
1713 ldr r8,[sp,#200] @ D[0]
1715 @ mov r1,r1,ror#32-31
1719 ldrd r8,r9,[sp,#200] @ D[0]
1724 ldr r10,[sp,#360] @ A[3][0]
1730 ldrd r10,r11,[sp,#360] @ A[3][0]
1732 mov r3,r12,ror#32-27 @ C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]);
1734 ldr r12,[sp,#208] @ D[1]
1736 mov r2,r14,ror#32-28
1740 ldrd r12,r14,[sp,#208] @ D[1]
1745 mov r5,r6,ror#32-19 @ C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]);
1750 ldr r8,[sp,#408] @ A[4][1]
1756 ldrd r8,r9,[sp,#408] @ A[4][1]
1758 mov r7,r10,ror#32-20 @ C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]);
1759 mov r6,r11,ror#32-21
1763 @ mov r8,r2,ror#32-1 @ C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
1764 @ mov r9,r3,ror#32-1
1768 eor r10,r10,r0,ror#32-31
1770 str r10,[sp,#160] @ R[4][0] = C[0] ^ (~C[1] & C[2])
1772 eor r11,r11,r1,ror#32-31
1776 strd r10,r11,[sp,#160] @ R[4][0] = C[0] ^ (~C[1] & C[2])
1783 str r12,[sp,#168] @ R[4][1] = C[1] ^ (~C[2] & C[3]);
1789 strd r12,r14,[sp,#168] @ R[4][1] = C[1] ^ (~C[2] & C[3]);
1792 bic r12,r0,r8,ror#31-1
1793 bic r14,r1,r9,ror#31-1
1794 eor r4,r4,r10,ror#32-1
1796 str r4,[sp,#176] @ R[4][2] = C[2] ^= (~C[3] & C[4]);
1798 eor r5,r5,r11,ror#32-1
1802 strd r4,r5,[sp,#176] @ R[4][2] = C[2] ^= (~C[3] & C[4]);
1804 eor r6,r6,r12,ror#32-31
1805 eor r7,r7,r14,ror#32-31
1807 str r6,[sp,#184] @ R[4][3] = C[3] ^= (~C[4] & C[0]);
1809 bic r10,r2,r0,ror#32-31
1813 strd r6,r7,[sp,#184] @ R[4][3] = C[3] ^= (~C[4] & C[0]);
1815 bic r11,r3,r1,ror#32-31
1817 eor r8,r10,r8,ror#32-1
1819 eor r9,r11,r9,ror#32-1
1821 str r8,[sp,#192] @ R[4][4] = C[4] ^= (~C[0] & C[1]);
1826 strd r8,r9,[sp,#192] @ R[4][4] = C[4] ^= (~C[0] & C[1]);
1831 .size KeccakF1600_int,.-KeccakF1600_int
1833 .type KeccakF1600, %function
1836 stmdb sp!,{r0,r4-r11,lr}
1837 sub sp,sp,#440+16 @ space for A[5][5],D[5],T[5][5],...
1841 ldmia r0, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ copy A[5][5] to stack
1842 stmia sp, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1843 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1844 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1845 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1846 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1847 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1848 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1849 ldmia r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1852 stmia r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1854 bl KeccakF1600_enter
1856 ldr r11, [sp,#440+16] @ restore pointer to A
1857 ldmia sp, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1858 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ return A[5][5]
1859 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1860 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1861 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1862 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1863 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1864 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1865 ldmia r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1866 stmia r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1869 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
1870 .size KeccakF1600,.-KeccakF1600
1872 .type SHA3_absorb,%function
1875 stmdb sp!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
1886 ldmia r0, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ copy A[5][5] to stack
1887 stmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1888 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1889 stmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1890 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1891 stmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1892 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1893 stmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1894 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1895 stmia r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1897 ldr r11,[sp,#476] @ restore r11
1904 mov r6,#0x11 @ compose constants
1909 orr r6,r6,r6,lsl#16 @ 0x11111111
1910 orr r9,r9,r9,lsl#16 @ 0x00ff00ff
1911 orr r8,r8,r8,lsl#16 @ 0x0f0f0f0f
1912 orr r7,r6,r6,lsl#1 @ 0x33333333
1913 orr r6,r6,r6,lsl#2 @ 0x55555555
1926 str r0,[sp,#480] @ save len - bsz
1939 orr r0,r0,r3,lsl#24 @ lo
1943 orr r1,r1,r3,lsl#24 @ hi
1945 and r2,r0,r6 @ &=0x55555555
1946 and r0,r0,r6,lsl#1 @ &=0xaaaaaaaa
1947 and r3,r1,r6 @ &=0x55555555
1948 and r1,r1,r6,lsl#1 @ &=0xaaaaaaaa
1953 and r2,r2,r7 @ &=0x33333333
1954 and r0,r0,r7,lsl#2 @ &=0xcccccccc
1955 and r3,r3,r7 @ &=0x33333333
1956 and r1,r1,r7,lsl#2 @ &=0xcccccccc
1961 and r2,r2,r8 @ &=0x0f0f0f0f
1962 and r0,r0,r8,lsl#4 @ &=0xf0f0f0f0
1963 and r3,r3,r8 @ &=0x0f0f0f0f
1964 and r1,r1,r8,lsl#4 @ &=0xf0f0f0f0
1965 ldmia r10,{r4,r5} @ A_flat[i]
1970 and r2,r2,r9 @ &=0x00ff00ff
1971 and r0,r0,r9,lsl#8 @ &=0xff00ff00
1972 and r3,r3,r9 @ &=0x00ff00ff
1973 and r1,r1,r9,lsl#8 @ &=0xff00ff00
1985 stmia r10!,{r4,r5} @ A_flat[i++] ^= BitInterleave(inp[0..7])
1995 ldmia r14,{r6,r7,r8,r9,r10,r11,r12,r14} @ restore constants and variables
2001 ldmia sp, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2002 stmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ return A[5][5]
2003 ldmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2004 stmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2005 ldmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2006 stmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2007 ldmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2008 stmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2009 ldmia r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2010 stmia r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2014 mov r0,r12 @ return value
2015 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
2016 .size SHA3_absorb,.-SHA3_absorb
2018 .type SHA3_squeeze,%function
2021 stmdb sp!,{r0,r3-r10,lr}
2034 mov r6,#0x11 @ compose constants
2039 orr r6,r6,r6,lsl#16 @ 0x11111111
2040 orr r9,r9,r9,lsl#16 @ 0x00ff00ff
2041 orr r8,r8,r8,lsl#16 @ 0x0f0f0f0f
2042 orr r7,r6,r6,lsl#1 @ 0x33333333
2043 orr r6,r6,r6,lsl#2 @ 0x55555555
2045 stmdb sp!,{r6,r7,r8,r9}
2052 ldmia r10!,{r0,r1} @ A_flat[i++]
2055 mov r3,r1,lsl#16 @ r3 = r1 << 16
2056 mov r2,r2,lsr#16 @ r2 = r0 & 0x0000ffff
2058 mov r0,r0,lsr#16 @ r0 = r0 >> 16
2059 mov r1,r1,lsl#16 @ r1 = r1 & 0xffff0000
2065 and r2,r2,r9 @ &=0x00ff00ff
2066 and r3,r3,r9,lsl#8 @ &=0xff00ff00
2067 and r0,r0,r9 @ &=0x00ff00ff
2068 and r1,r1,r9,lsl#8 @ &=0xff00ff00
2073 and r2,r2,r8 @ &=0x0f0f0f0f
2074 and r3,r3,r8,lsl#4 @ &=0xf0f0f0f0
2075 and r0,r0,r8 @ &=0x0f0f0f0f
2076 and r1,r1,r8,lsl#4 @ &=0xf0f0f0f0
2081 and r2,r2,r7 @ &=0x33333333
2082 and r3,r3,r7,lsl#2 @ &=0xcccccccc
2083 and r0,r0,r7 @ &=0x33333333
2084 and r1,r1,r7,lsl#2 @ &=0xcccccccc
2089 and r2,r2,r6 @ &=0x55555555
2090 and r3,r3,r6,lsl#1 @ &=0xaaaaaaaa
2091 and r0,r0,r6 @ &=0x55555555
2092 and r1,r1,r6,lsl#1 @ &=0xaaaaaaaa
2117 subs r12,r12,#8 @ bsz -= 8
2120 mov r0,r14 @ original r10
2124 ldmia sp,{r6,r7,r8,r9,r10,r12} @ restore constants and variables
2160 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
2161 .size SHA3_squeeze,.-SHA3_squeeze
2162 #if __ARM_MAX_ARCH__>=7
2165 .type iotas64, %object
2168 .quad 0x0000000000000001
2169 .quad 0x0000000000008082
2170 .quad 0x800000000000808a
2171 .quad 0x8000000080008000
2172 .quad 0x000000000000808b
2173 .quad 0x0000000080000001
2174 .quad 0x8000000080008081
2175 .quad 0x8000000000008009
2176 .quad 0x000000000000008a
2177 .quad 0x0000000000000088
2178 .quad 0x0000000080008009
2179 .quad 0x000000008000000a
2180 .quad 0x000000008000808b
2181 .quad 0x800000000000008b
2182 .quad 0x8000000000008089
2183 .quad 0x8000000000008003
2184 .quad 0x8000000000008002
2185 .quad 0x8000000000000080
2186 .quad 0x000000000000800a
2187 .quad 0x800000008000000a
2188 .quad 0x8000000080008081
2189 .quad 0x8000000000008080
2190 .quad 0x0000000080000001
2191 .quad 0x8000000080008008
2192 .size iotas64,.-iotas64
2194 .type KeccakF1600_neon, %function
2199 mov r3, #24 @ loop counter
2205 vst1.64 {q4}, [r0,:64] @ offload A[0..1][4]
2206 veor q13, q0, q5 @ A[0..1][0]^A[2..3][0]
2207 vst1.64 {d18}, [r1,:64] @ offload A[2][4]
2208 veor q14, q1, q6 @ A[0..1][1]^A[2..3][1]
2209 veor q15, q2, q7 @ A[0..1][2]^A[2..3][2]
2210 veor d26, d26, d27 @ C[0]=A[0][0]^A[1][0]^A[2][0]^A[3][0]
2211 veor d27, d28, d29 @ C[1]=A[0][1]^A[1][1]^A[2][1]^A[3][1]
2212 veor q14, q3, q8 @ A[0..1][3]^A[2..3][3]
2213 veor q4, q4, q9 @ A[0..1][4]^A[2..3][4]
2214 veor d30, d30, d31 @ C[2]=A[0][2]^A[1][2]^A[2][2]^A[3][2]
2215 veor d31, d28, d29 @ C[3]=A[0][3]^A[1][3]^A[2][3]^A[3][3]
2216 veor d25, d8, d9 @ C[4]=A[0][4]^A[1][4]^A[2][4]^A[3][4]
2217 veor q13, q13, q10 @ C[0..1]^=A[4][0..1]
2218 veor q14, q15, q11 @ C[2..3]^=A[4][2..3]
2219 veor d25, d25, d24 @ C[4]^=A[4][4]
2221 vadd.u64 q4, q13, q13 @ C[0..1]<<1
2222 vadd.u64 q15, q14, q14 @ C[2..3]<<1
2223 vadd.u64 d18, d25, d25 @ C[4]<<1
2224 vsri.u64 q4, q13, #63 @ ROL64(C[0..1],1)
2225 vsri.u64 q15, q14, #63 @ ROL64(C[2..3],1)
2226 vsri.u64 d18, d25, #63 @ ROL64(C[4],1)
2227 veor d25, d25, d9 @ D[0] = C[4] ^= ROL64(C[1],1)
2228 veor q13, q13, q15 @ D[1..2] = C[0..1] ^ ROL64(C[2..3],1)
2229 veor d28, d28, d18 @ D[3] = C[2] ^= ROL64(C[4],1)
2230 veor d29, d29, d8 @ D[4] = C[3] ^= ROL64(C[0],1)
2232 veor d0, d0, d25 @ A[0][0] ^= C[4]
2233 veor d1, d1, d25 @ A[1][0] ^= C[4]
2234 veor d10, d10, d25 @ A[2][0] ^= C[4]
2235 veor d11, d11, d25 @ A[3][0] ^= C[4]
2236 veor d20, d20, d25 @ A[4][0] ^= C[4]
2238 veor d2, d2, d26 @ A[0][1] ^= D[1]
2239 veor d3, d3, d26 @ A[1][1] ^= D[1]
2240 veor d12, d12, d26 @ A[2][1] ^= D[1]
2241 veor d13, d13, d26 @ A[3][1] ^= D[1]
2242 veor d21, d21, d26 @ A[4][1] ^= D[1]
2245 veor d6, d6, d28 @ A[0][3] ^= C[2]
2246 veor d7, d7, d28 @ A[1][3] ^= C[2]
2247 veor d16, d16, d28 @ A[2][3] ^= C[2]
2248 veor d17, d17, d28 @ A[3][3] ^= C[2]
2249 veor d23, d23, d28 @ A[4][3] ^= C[2]
2250 vld1.64 {q4}, [r0,:64] @ restore A[0..1][4]
2253 vld1.64 {d18}, [r1,:64] @ restore A[2][4]
2254 veor q2, q2, q13 @ A[0..1][2] ^= D[2]
2255 veor q7, q7, q13 @ A[2..3][2] ^= D[2]
2256 veor d22, d22, d27 @ A[4][2] ^= D[2]
2258 veor q4, q4, q14 @ A[0..1][4] ^= C[3]
2259 veor q9, q9, q14 @ A[2..3][4] ^= C[3]
2260 veor d24, d24, d29 @ A[4][4] ^= C[3]
2263 vmov d26, d2 @ C[1] = A[0][1]
2264 vshl.u64 d2, d3, #44
2265 vmov d27, d4 @ C[2] = A[0][2]
2266 vshl.u64 d4, d14, #43
2267 vmov d28, d6 @ C[3] = A[0][3]
2268 vshl.u64 d6, d17, #21
2269 vmov d29, d8 @ C[4] = A[0][4]
2270 vshl.u64 d8, d24, #14
2271 vsri.u64 d2, d3, #64-44 @ A[0][1] = ROL64(A[1][1], rhotates[1][1])
2272 vsri.u64 d4, d14, #64-43 @ A[0][2] = ROL64(A[2][2], rhotates[2][2])
2273 vsri.u64 d6, d17, #64-21 @ A[0][3] = ROL64(A[3][3], rhotates[3][3])
2274 vsri.u64 d8, d24, #64-14 @ A[0][4] = ROL64(A[4][4], rhotates[4][4])
2276 vshl.u64 d3, d9, #20
2277 vshl.u64 d14, d16, #25
2278 vshl.u64 d17, d15, #15
2279 vshl.u64 d24, d21, #2
2280 vsri.u64 d3, d9, #64-20 @ A[1][1] = ROL64(A[1][4], rhotates[1][4])
2281 vsri.u64 d14, d16, #64-25 @ A[2][2] = ROL64(A[2][3], rhotates[2][3])
2282 vsri.u64 d17, d15, #64-15 @ A[3][3] = ROL64(A[3][2], rhotates[3][2])
2283 vsri.u64 d24, d21, #64-2 @ A[4][4] = ROL64(A[4][1], rhotates[4][1])
2285 vshl.u64 d9, d22, #61
2286 @ vshl.u64 d16, d19, #8
2287 vshl.u64 d15, d12, #10
2288 vshl.u64 d21, d7, #55
2289 vsri.u64 d9, d22, #64-61 @ A[1][4] = ROL64(A[4][2], rhotates[4][2])
2290 vext.8 d16, d19, d19, #8-1 @ A[2][3] = ROL64(A[3][4], rhotates[3][4])
2291 vsri.u64 d15, d12, #64-10 @ A[3][2] = ROL64(A[2][1], rhotates[2][1])
2292 vsri.u64 d21, d7, #64-55 @ A[4][1] = ROL64(A[1][3], rhotates[1][3])
2294 vshl.u64 d22, d18, #39
2295 @ vshl.u64 d19, d23, #56
2296 vshl.u64 d12, d5, #6
2297 vshl.u64 d7, d13, #45
2298 vsri.u64 d22, d18, #64-39 @ A[4][2] = ROL64(A[2][4], rhotates[2][4])
2299 vext.8 d19, d23, d23, #8-7 @ A[3][4] = ROL64(A[4][3], rhotates[4][3])
2300 vsri.u64 d12, d5, #64-6 @ A[2][1] = ROL64(A[1][2], rhotates[1][2])
2301 vsri.u64 d7, d13, #64-45 @ A[1][3] = ROL64(A[3][1], rhotates[3][1])
2303 vshl.u64 d18, d20, #18
2304 vshl.u64 d23, d11, #41
2305 vshl.u64 d5, d10, #3
2306 vshl.u64 d13, d1, #36
2307 vsri.u64 d18, d20, #64-18 @ A[2][4] = ROL64(A[4][0], rhotates[4][0])
2308 vsri.u64 d23, d11, #64-41 @ A[4][3] = ROL64(A[3][0], rhotates[3][0])
2309 vsri.u64 d5, d10, #64-3 @ A[1][2] = ROL64(A[2][0], rhotates[2][0])
2310 vsri.u64 d13, d1, #64-36 @ A[3][1] = ROL64(A[1][0], rhotates[1][0])
2312 vshl.u64 d1, d28, #28
2313 vshl.u64 d10, d26, #1
2314 vshl.u64 d11, d29, #27
2315 vshl.u64 d20, d27, #62
2316 vsri.u64 d1, d28, #64-28 @ A[1][0] = ROL64(C[3], rhotates[0][3])
2317 vsri.u64 d10, d26, #64-1 @ A[2][0] = ROL64(C[1], rhotates[0][1])
2318 vsri.u64 d11, d29, #64-27 @ A[3][0] = ROL64(C[4], rhotates[0][4])
2319 vsri.u64 d20, d27, #64-62 @ A[4][0] = ROL64(C[2], rhotates[0][2])
2325 veor q13, q13, q0 @ A[0..1][0] ^ (~A[0..1][1] & A[0..1][2])
2326 veor q14, q14, q1 @ A[0..1][1] ^ (~A[0..1][2] & A[0..1][3])
2327 veor q2, q2, q15 @ A[0..1][2] ^= (~A[0..1][3] & A[0..1][4])
2328 vst1.64 {q13}, [r0,:64] @ offload A[0..1][0]
2331 vmov q1, q14 @ A[0..1][1]
2332 veor q3, q3, q13 @ A[0..1][3] ^= (~A[0..1][4] & A[0..1][0])
2333 veor q4, q4, q15 @ A[0..1][4] ^= (~A[0..1][0] & A[0..1][1])
2336 vmov q0, q5 @ A[2..3][0]
2338 vmov q15, q6 @ A[2..3][1]
2339 veor q5, q5, q13 @ A[2..3][0] ^= (~A[2..3][1] & A[2..3][2])
2341 veor q6, q6, q14 @ A[2..3][1] ^= (~A[2..3][2] & A[2..3][3])
2343 veor q7, q7, q13 @ A[2..3][2] ^= (~A[2..3][3] & A[2..3][4])
2345 veor q8, q8, q14 @ A[2..3][3] ^= (~A[2..3][4] & A[2..3][0])
2346 vmov q14, q10 @ A[4][0..1]
2347 veor q9, q9, q13 @ A[2..3][4] ^= (~A[2..3][0] & A[2..3][1])
2349 vld1.64 d25, [r2,:64]! @ Iota[i++]
2352 vld1.64 {q0}, [r0,:64] @ restore A[0..1][0]
2353 veor d20, d20, d26 @ A[4][0] ^= (~A[4][1] & A[4][2])
2355 veor d21, d21, d27 @ A[4][1] ^= (~A[4][2] & A[4][3])
2357 veor d22, d22, d26 @ A[4][2] ^= (~A[4][3] & A[4][4])
2359 veor d23, d23, d27 @ A[4][3] ^= (~A[4][4] & A[4][0])
2360 veor d0, d0, d25 @ A[0][0] ^= Iota[i]
2361 veor d24, d24, d26 @ A[4][4] ^= (~A[4][0] & A[4][1])
2367 .size KeccakF1600_neon,.-KeccakF1600_neon
2369 .globl SHA3_absorb_neon
2370 .type SHA3_absorb_neon, %function
2373 stmdb sp!, {r4,r5,r6,lr}
2374 vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
2380 vld1.32 {d0}, [r0,:64]! @ A[0][0]
2381 vld1.32 {d2}, [r0,:64]! @ A[0][1]
2382 vld1.32 {d4}, [r0,:64]! @ A[0][2]
2383 vld1.32 {d6}, [r0,:64]! @ A[0][3]
2384 vld1.32 {d8}, [r0,:64]! @ A[0][4]
2386 vld1.32 {d1}, [r0,:64]! @ A[1][0]
2387 vld1.32 {d3}, [r0,:64]! @ A[1][1]
2388 vld1.32 {d5}, [r0,:64]! @ A[1][2]
2389 vld1.32 {d7}, [r0,:64]! @ A[1][3]
2390 vld1.32 {d9}, [r0,:64]! @ A[1][4]
2392 vld1.32 {d10}, [r0,:64]! @ A[2][0]
2393 vld1.32 {d12}, [r0,:64]! @ A[2][1]
2394 vld1.32 {d14}, [r0,:64]! @ A[2][2]
2395 vld1.32 {d16}, [r0,:64]! @ A[2][3]
2396 vld1.32 {d18}, [r0,:64]! @ A[2][4]
2398 vld1.32 {d11}, [r0,:64]! @ A[3][0]
2399 vld1.32 {d13}, [r0,:64]! @ A[3][1]
2400 vld1.32 {d15}, [r0,:64]! @ A[3][2]
2401 vld1.32 {d17}, [r0,:64]! @ A[3][3]
2402 vld1.32 {d19}, [r0,:64]! @ A[3][4]
2404 vld1.32 {d20,d21,d22,d23}, [r0,:64]! @ A[4][0..3]
2405 vld1.32 {d24}, [r0,:64] @ A[4][4]
2406 sub r0, r0, #24*8 @ rewind
2411 subs r12, r5, r6 @ len - bsz
2415 vld1.8 {d31}, [r4]! @ endian-neutral loads...
2417 veor d0, d0, d31 @ A[0][0] ^= *inp++
2420 veor d2, d2, d31 @ A[0][1] ^= *inp++
2424 veor d4, d4, d31 @ A[0][2] ^= *inp++
2427 veor d6, d6, d31 @ A[0][3] ^= *inp++
2431 veor d8, d8, d31 @ A[0][4] ^= *inp++
2435 veor d1, d1, d31 @ A[1][0] ^= *inp++
2439 veor d3, d3, d31 @ A[1][1] ^= *inp++
2442 veor d5, d5, d31 @ A[1][2] ^= *inp++
2446 veor d7, d7, d31 @ A[1][3] ^= *inp++
2449 veor d9, d9, d31 @ A[1][4] ^= *inp++
2454 veor d10, d10, d31 @ A[2][0] ^= *inp++
2457 veor d12, d12, d31 @ A[2][1] ^= *inp++
2461 veor d14, d14, d31 @ A[2][2] ^= *inp++
2464 veor d16, d16, d31 @ A[2][3] ^= *inp++
2468 veor d18, d18, d31 @ A[2][4] ^= *inp++
2472 veor d11, d11, d31 @ A[3][0] ^= *inp++
2476 veor d13, d13, d31 @ A[3][1] ^= *inp++
2479 veor d15, d15, d31 @ A[3][2] ^= *inp++
2483 veor d17, d17, d31 @ A[3][3] ^= *inp++
2486 veor d19, d19, d31 @ A[3][4] ^= *inp++
2491 veor d20, d20, d31 @ A[4][0] ^= *inp++
2494 veor d21, d21, d31 @ A[4][1] ^= *inp++
2498 veor d22, d22, d31 @ A[4][2] ^= *inp++
2501 veor d23, d23, d31 @ A[4][3] ^= *inp++
2504 veor d24, d24, d31 @ A[4][4] ^= *inp++
2512 vst1.32 {d0}, [r0,:64]! @ A[0][0..4]
2513 vst1.32 {d2}, [r0,:64]!
2514 vst1.32 {d4}, [r0,:64]!
2515 vst1.32 {d6}, [r0,:64]!
2516 vst1.32 {d8}, [r0,:64]!
2518 vst1.32 {d1}, [r0,:64]! @ A[1][0..4]
2519 vst1.32 {d3}, [r0,:64]!
2520 vst1.32 {d5}, [r0,:64]!
2521 vst1.32 {d7}, [r0,:64]!
2522 vst1.32 {d9}, [r0,:64]!
2524 vst1.32 {d10}, [r0,:64]! @ A[2][0..4]
2525 vst1.32 {d12}, [r0,:64]!
2526 vst1.32 {d14}, [r0,:64]!
2527 vst1.32 {d16}, [r0,:64]!
2528 vst1.32 {d18}, [r0,:64]!
2530 vst1.32 {d11}, [r0,:64]! @ A[3][0..4]
2531 vst1.32 {d13}, [r0,:64]!
2532 vst1.32 {d15}, [r0,:64]!
2533 vst1.32 {d17}, [r0,:64]!
2534 vst1.32 {d19}, [r0,:64]!
2536 vst1.32 {d20,d21,d22,d23}, [r0,:64]! @ A[4][0..4]
2537 vst1.32 {d24}, [r0,:64]
2539 mov r0, r5 @ return value
2540 vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
2541 ldmia sp!, {r4,r5,r6,pc}
2542 .size SHA3_absorb_neon,.-SHA3_absorb_neon
2544 .globl SHA3_squeeze_neon
2545 .type SHA3_squeeze_neon, %function
2548 stmdb sp!, {r4,r5,r6,lr}
2553 mov r12, r0 @ A_flat
2555 b .Loop_squeeze_neon
2560 blo .Lsqueeze_neon_tail
2561 vld1.32 {d0}, [r12]!
2562 vst1.8 {d0}, [r4]! @ endian-neutral store
2564 subs r5, r5, #8 @ len -= 8
2565 beq .Lsqueeze_neon_done
2567 subs r14, r14, #8 @ bsz -= 8
2568 bhi .Loop_squeeze_neon
2570 vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
2572 vld1.32 {d0}, [r0,:64]! @ A[0][0..4]
2573 vld1.32 {d2}, [r0,:64]!
2574 vld1.32 {d4}, [r0,:64]!
2575 vld1.32 {d6}, [r0,:64]!
2576 vld1.32 {d8}, [r0,:64]!
2578 vld1.32 {d1}, [r0,:64]! @ A[1][0..4]
2579 vld1.32 {d3}, [r0,:64]!
2580 vld1.32 {d5}, [r0,:64]!
2581 vld1.32 {d7}, [r0,:64]!
2582 vld1.32 {d9}, [r0,:64]!
2584 vld1.32 {d10}, [r0,:64]! @ A[2][0..4]
2585 vld1.32 {d12}, [r0,:64]!
2586 vld1.32 {d14}, [r0,:64]!
2587 vld1.32 {d16}, [r0,:64]!
2588 vld1.32 {d18}, [r0,:64]!
2590 vld1.32 {d11}, [r0,:64]! @ A[3][0..4]
2591 vld1.32 {d13}, [r0,:64]!
2592 vld1.32 {d15}, [r0,:64]!
2593 vld1.32 {d17}, [r0,:64]!
2594 vld1.32 {d19}, [r0,:64]!
2596 vld1.32 {d20,d21,d22,d23}, [r0,:64]! @ A[4][0..4]
2597 vld1.32 {d24}, [r0,:64]
2598 sub r0, r0, #24*8 @ rewind
2602 mov r12, r0 @ A_flat
2603 vst1.32 {d0}, [r0,:64]! @ A[0][0..4]
2604 vst1.32 {d2}, [r0,:64]!
2605 vst1.32 {d4}, [r0,:64]!
2606 vst1.32 {d6}, [r0,:64]!
2607 vst1.32 {d8}, [r0,:64]!
2609 vst1.32 {d1}, [r0,:64]! @ A[1][0..4]
2610 vst1.32 {d3}, [r0,:64]!
2611 vst1.32 {d5}, [r0,:64]!
2612 vst1.32 {d7}, [r0,:64]!
2613 vst1.32 {d9}, [r0,:64]!
2615 vst1.32 {d10}, [r0,:64]! @ A[2][0..4]
2616 vst1.32 {d12}, [r0,:64]!
2617 vst1.32 {d14}, [r0,:64]!
2618 vst1.32 {d16}, [r0,:64]!
2619 vst1.32 {d18}, [r0,:64]!
2621 vst1.32 {d11}, [r0,:64]! @ A[3][0..4]
2622 vst1.32 {d13}, [r0,:64]!
2623 vst1.32 {d15}, [r0,:64]!
2624 vst1.32 {d17}, [r0,:64]!
2625 vst1.32 {d19}, [r0,:64]!
2627 vst1.32 {d20,d21,d22,d23}, [r0,:64]! @ A[4][0..4]
2629 vst1.32 {d24}, [r0,:64]
2630 mov r0, r12 @ rewind
2632 vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
2633 b .Loop_squeeze_neon
2636 .Lsqueeze_neon_tail:
2639 strb r2, [r4],#1 @ endian-neutral store
2641 blo .Lsqueeze_neon_done
2644 beq .Lsqueeze_neon_done
2648 blo .Lsqueeze_neon_done
2650 beq .Lsqueeze_neon_done
2655 blo .Lsqueeze_neon_done
2658 beq .Lsqueeze_neon_done
2661 .Lsqueeze_neon_done:
2662 ldmia sp!, {r4,r5,r6,pc}
2663 .size SHA3_squeeze_neon,.-SHA3_squeeze_neon
2665 .byte 75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0