]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - secure/lib/libcrypto/arm/keccak1600-armv4.S
Use a template assembly file to generate the embedded MFS.
[FreeBSD/FreeBSD.git] / secure / lib / libcrypto / arm / keccak1600-armv4.S
1 /* $FreeBSD$ */
2 /* Do not modify. This file is auto-generated from keccak1600-armv4.pl. */
3 #include "arm_arch.h"
4
5 .text
6
7 #if defined(__thumb2__)
8 .syntax unified
9 .thumb
10 #else
11 .code   32
12 #endif
13
14 .type   iotas32, %object
15 .align  5
16 iotas32:
17 .long   0x00000001, 0x00000000
18 .long   0x00000000, 0x00000089
19 .long   0x00000000, 0x8000008b
20 .long   0x00000000, 0x80008080
21 .long   0x00000001, 0x0000008b
22 .long   0x00000001, 0x00008000
23 .long   0x00000001, 0x80008088
24 .long   0x00000001, 0x80000082
25 .long   0x00000000, 0x0000000b
26 .long   0x00000000, 0x0000000a
27 .long   0x00000001, 0x00008082
28 .long   0x00000000, 0x00008003
29 .long   0x00000001, 0x0000808b
30 .long   0x00000001, 0x8000000b
31 .long   0x00000001, 0x8000008a
32 .long   0x00000001, 0x80000081
33 .long   0x00000000, 0x80000081
34 .long   0x00000000, 0x80000008
35 .long   0x00000000, 0x00000083
36 .long   0x00000000, 0x80008003
37 .long   0x00000001, 0x80008088
38 .long   0x00000000, 0x80000088
39 .long   0x00000001, 0x00008000
40 .long   0x00000000, 0x80008082
41 .size   iotas32,.-iotas32
42
43 .type   KeccakF1600_int, %function
44 .align  5
45 KeccakF1600_int:
46         add     r9,sp,#176
47         add     r12,sp,#0
48         add     r10,sp,#40
49         ldmia   r9,{r4,r5,r6,r7,r8,r9}          @ A[4][2..4]
50 KeccakF1600_enter:
51         str     lr,[sp,#440]
52         eor     r11,r11,r11
53         str     r11,[sp,#444]
54         b       .Lround2x
55
56 .align  4
57 .Lround2x:
58         ldmia   r12,{r0,r1,r2,r3}               @ A[0][0..1]
59         ldmia   r10,{r10,r11,r12,r14}   @ A[1][0..1]
60 #ifdef  __thumb2__
61         eor     r0,r0,r10
62         eor     r1,r1,r11
63         eor     r2,r2,r12
64         ldrd    r10,r11,[sp,#56]
65         eor     r3,r3,r14
66         ldrd    r12,r14,[sp,#64]
67         eor     r4,r4,r10
68         eor     r5,r5,r11
69         eor     r6,r6,r12
70         ldrd    r10,r11,[sp,#72]
71         eor     r7,r7,r14
72         ldrd    r12,r14,[sp,#80]
73         eor     r8,r8,r10
74         eor     r9,r9,r11
75         eor     r0,r0,r12
76         ldrd    r10,r11,[sp,#88]
77         eor     r1,r1,r14
78         ldrd    r12,r14,[sp,#96]
79         eor     r2,r2,r10
80         eor     r3,r3,r11
81         eor     r4,r4,r12
82         ldrd    r10,r11,[sp,#104]
83         eor     r5,r5,r14
84         ldrd    r12,r14,[sp,#112]
85         eor     r6,r6,r10
86         eor     r7,r7,r11
87         eor     r8,r8,r12
88         ldrd    r10,r11,[sp,#120]
89         eor     r9,r9,r14
90         ldrd    r12,r14,[sp,#128]
91         eor     r0,r0,r10
92         eor     r1,r1,r11
93         eor     r2,r2,r12
94         ldrd    r10,r11,[sp,#136]
95         eor     r3,r3,r14
96         ldrd    r12,r14,[sp,#144]
97         eor     r4,r4,r10
98         eor     r5,r5,r11
99         eor     r6,r6,r12
100         ldrd    r10,r11,[sp,#152]
101         eor     r7,r7,r14
102         ldrd    r12,r14,[sp,#160]
103         eor     r8,r8,r10
104         eor     r9,r9,r11
105         eor     r0,r0,r12
106         ldrd    r10,r11,[sp,#168]
107         eor     r1,r1,r14
108         ldrd    r12,r14,[sp,#16]
109         eor     r2,r2,r10
110         eor     r3,r3,r11
111         eor     r4,r4,r12
112         ldrd    r10,r11,[sp,#24]
113         eor     r5,r5,r14
114         ldrd    r12,r14,[sp,#32]
115 #else
116         eor     r0,r0,r10
117         add     r10,sp,#56
118         eor     r1,r1,r11
119         eor     r2,r2,r12
120         eor     r3,r3,r14
121         ldmia   r10,{r10,r11,r12,r14}   @ A[1][2..3]
122         eor     r4,r4,r10
123         add     r10,sp,#72
124         eor     r5,r5,r11
125         eor     r6,r6,r12
126         eor     r7,r7,r14
127         ldmia   r10,{r10,r11,r12,r14}   @ A[1][4]..A[2][0]
128         eor     r8,r8,r10
129         add     r10,sp,#88
130         eor     r9,r9,r11
131         eor     r0,r0,r12
132         eor     r1,r1,r14
133         ldmia   r10,{r10,r11,r12,r14}   @ A[2][1..2]
134         eor     r2,r2,r10
135         add     r10,sp,#104
136         eor     r3,r3,r11
137         eor     r4,r4,r12
138         eor     r5,r5,r14
139         ldmia   r10,{r10,r11,r12,r14}   @ A[2][3..4]
140         eor     r6,r6,r10
141         add     r10,sp,#120
142         eor     r7,r7,r11
143         eor     r8,r8,r12
144         eor     r9,r9,r14
145         ldmia   r10,{r10,r11,r12,r14}   @ A[3][0..1]
146         eor     r0,r0,r10
147         add     r10,sp,#136
148         eor     r1,r1,r11
149         eor     r2,r2,r12
150         eor     r3,r3,r14
151         ldmia   r10,{r10,r11,r12,r14}   @ A[3][2..3]
152         eor     r4,r4,r10
153         add     r10,sp,#152
154         eor     r5,r5,r11
155         eor     r6,r6,r12
156         eor     r7,r7,r14
157         ldmia   r10,{r10,r11,r12,r14}   @ A[3][4]..A[4][0]
158         eor     r8,r8,r10
159         ldr     r10,[sp,#168]           @ A[4][1]
160         eor     r9,r9,r11
161         ldr     r11,[sp,#168+4]
162         eor     r0,r0,r12
163         ldr     r12,[sp,#16]            @ A[0][2]
164         eor     r1,r1,r14
165         ldr     r14,[sp,#16+4]
166         eor     r2,r2,r10
167         add     r10,sp,#24
168         eor     r3,r3,r11
169         eor     r4,r4,r12
170         eor     r5,r5,r14
171         ldmia   r10,{r10,r11,r12,r14}   @ A[0][3..4]
172 #endif
173         eor     r6,r6,r10
174         eor     r7,r7,r11
175         eor     r8,r8,r12
176         eor     r9,r9,r14
177
178         eor     r10,r0,r5,ror#32-1      @ E[0] = ROL64(C[2], 1) ^ C[0];
179 #ifndef __thumb2__
180         str     r10,[sp,#208]           @ D[1] = E[0]
181 #endif
182         eor     r11,r1,r4
183 #ifndef __thumb2__
184         str     r11,[sp,#208+4]
185 #else
186         strd    r10,r11,[sp,#208]               @ D[1] = E[0]
187 #endif
188         eor     r12,r6,r1,ror#32-1      @ E[1] = ROL64(C[0], 1) ^ C[3];
189         eor     r14,r7,r0
190 #ifndef __thumb2__
191         str     r12,[sp,#232]           @ D[4] = E[1]
192 #endif
193         eor     r0,r8,r3,ror#32-1       @ C[0] = ROL64(C[1], 1) ^ C[4];
194 #ifndef __thumb2__
195         str     r14,[sp,#232+4]
196 #else
197         strd    r12,r14,[sp,#232]               @ D[4] = E[1]
198 #endif
199         eor     r1,r9,r2
200 #ifndef __thumb2__
201         str     r0,[sp,#200]            @ D[0] = C[0]
202 #endif
203         eor     r2,r2,r7,ror#32-1       @ C[1] = ROL64(C[3], 1) ^ C[1];
204 #ifndef __thumb2__
205         ldr     r7,[sp,#144]
206 #endif
207         eor     r3,r3,r6
208 #ifndef __thumb2__
209         str     r1,[sp,#200+4]
210 #else
211         strd    r0,r1,[sp,#200]         @ D[0] = C[0]
212 #endif
213 #ifndef __thumb2__
214         ldr     r6,[sp,#144+4]
215 #else
216         ldrd    r7,r6,[sp,#144]
217 #endif
218 #ifndef __thumb2__
219         str     r2,[sp,#216]            @ D[2] = C[1]
220 #endif
221         eor     r4,r4,r9,ror#32-1       @ C[2] = ROL64(C[4], 1) ^ C[2];
222 #ifndef __thumb2__
223         str     r3,[sp,#216+4]
224 #else
225         strd    r2,r3,[sp,#216]         @ D[2] = C[1]
226 #endif
227         eor     r5,r5,r8
228
229 #ifndef __thumb2__
230         ldr     r8,[sp,#192]
231 #endif
232 #ifndef __thumb2__
233         ldr     r9,[sp,#192+4]
234 #else
235         ldrd    r8,r9,[sp,#192]
236 #endif
237 #ifndef __thumb2__
238         str     r4,[sp,#224]            @ D[3] = C[2]
239 #endif
240         eor     r7,r7,r4
241 #ifndef __thumb2__
242         str     r5,[sp,#224+4]
243 #else
244         strd    r4,r5,[sp,#224]         @ D[3] = C[2]
245 #endif
246         eor     r6,r6,r5
247 #ifndef __thumb2__
248         ldr     r4,[sp,#0]
249 #endif
250         @ mov   r7,r7,ror#32-10         @ C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]);   /* D[3] */
251         @ mov   r6,r6,ror#32-11
252 #ifndef __thumb2__
253         ldr     r5,[sp,#0+4]
254 #else
255         ldrd    r4,r5,[sp,#0]
256 #endif
257         eor     r8,r8,r12
258         eor     r9,r9,r14
259 #ifndef __thumb2__
260         ldr     r12,[sp,#96]
261 #endif
262         eor     r0,r0,r4
263 #ifndef __thumb2__
264         ldr     r14,[sp,#96+4]
265 #else
266         ldrd    r12,r14,[sp,#96]
267 #endif
268         @ mov   r8,r8,ror#32-7          @ C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]);   /* D[4] */
269         @ mov   r9,r9,ror#32-7
270         eor     r1,r1,r5                @ C[0] =       A[0][0] ^ C[0];
271         eor     r12,r12,r2
272 #ifndef __thumb2__
273         ldr     r2,[sp,#48]
274 #endif
275         eor     r14,r14,r3
276 #ifndef __thumb2__
277         ldr     r3,[sp,#48+4]
278 #else
279         ldrd    r2,r3,[sp,#48]
280 #endif
281         mov     r5,r12,ror#32-21                @ C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]);
282         ldr     r12,[sp,#444]                   @ load counter
283         eor     r2,r2,r10
284         adr     r10,iotas32
285         mov     r4,r14,ror#32-22
286         add     r14,r10,r12
287         eor     r3,r3,r11
288         ldmia   r14,{r10,r11}           @ iotas[i]
289         bic     r12,r4,r2,ror#32-22
290         bic     r14,r5,r3,ror#32-22
291         mov     r2,r2,ror#32-22         @ C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]);
292         mov     r3,r3,ror#32-22
293         eor     r12,r12,r0
294         eor     r14,r14,r1
295         eor     r10,r10,r12
296         eor     r11,r11,r14
297 #ifndef __thumb2__
298         str     r10,[sp,#240]           @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
299 #endif
300         bic     r12,r6,r4,ror#11
301 #ifndef __thumb2__
302         str     r11,[sp,#240+4]
303 #else
304         strd    r10,r11,[sp,#240]               @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
305 #endif
306         bic     r14,r7,r5,ror#10
307         bic     r10,r8,r6,ror#32-(11-7)
308         bic     r11,r9,r7,ror#32-(10-7)
309         eor     r12,r2,r12,ror#32-11
310 #ifndef __thumb2__
311         str     r12,[sp,#248]           @ R[0][1] = C[1] ^ (~C[2] & C[3]);
312 #endif
313         eor     r14,r3,r14,ror#32-10
314 #ifndef __thumb2__
315         str     r14,[sp,#248+4]
316 #else
317         strd    r12,r14,[sp,#248]               @ R[0][1] = C[1] ^ (~C[2] & C[3]);
318 #endif
319         eor     r10,r4,r10,ror#32-7
320         eor     r11,r5,r11,ror#32-7
321 #ifndef __thumb2__
322         str     r10,[sp,#256]           @ R[0][2] = C[2] ^ (~C[3] & C[4]);
323 #endif
324         bic     r12,r0,r8,ror#32-7
325 #ifndef __thumb2__
326         str     r11,[sp,#256+4]
327 #else
328         strd    r10,r11,[sp,#256]               @ R[0][2] = C[2] ^ (~C[3] & C[4]);
329 #endif
330         bic     r14,r1,r9,ror#32-7
331         eor     r12,r12,r6,ror#32-11
332 #ifndef __thumb2__
333         str     r12,[sp,#264]           @ R[0][3] = C[3] ^ (~C[4] & C[0]);
334 #endif
335         eor     r14,r14,r7,ror#32-10
336 #ifndef __thumb2__
337         str     r14,[sp,#264+4]
338 #else
339         strd    r12,r14,[sp,#264]               @ R[0][3] = C[3] ^ (~C[4] & C[0]);
340 #endif
341         bic     r10,r2,r0
342         add     r14,sp,#224
343 #ifndef __thumb2__
344         ldr     r0,[sp,#24]             @ A[0][3]
345 #endif
346         bic     r11,r3,r1
347 #ifndef __thumb2__
348         ldr     r1,[sp,#24+4]
349 #else
350         ldrd    r0,r1,[sp,#24]          @ A[0][3]
351 #endif
352         eor     r10,r10,r8,ror#32-7
353         eor     r11,r11,r9,ror#32-7
354 #ifndef __thumb2__
355         str     r10,[sp,#272]           @ R[0][4] = C[4] ^ (~C[0] & C[1]);
356 #endif
357         add     r9,sp,#200
358 #ifndef __thumb2__
359         str     r11,[sp,#272+4]
360 #else
361         strd    r10,r11,[sp,#272]               @ R[0][4] = C[4] ^ (~C[0] & C[1]);
362 #endif
363
364         ldmia   r14,{r10,r11,r12,r14}   @ D[3..4]
365         ldmia   r9,{r6,r7,r8,r9}                @ D[0..1]
366
367 #ifndef __thumb2__
368         ldr     r2,[sp,#72]             @ A[1][4]
369 #endif
370         eor     r0,r0,r10
371 #ifndef __thumb2__
372         ldr     r3,[sp,#72+4]
373 #else
374         ldrd    r2,r3,[sp,#72]          @ A[1][4]
375 #endif
376         eor     r1,r1,r11
377         @ mov   r0,r0,ror#32-14         @ C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]);
378 #ifndef __thumb2__
379         ldr     r10,[sp,#128]           @ A[3][1]
380 #endif
381         @ mov   r1,r1,ror#32-14
382 #ifndef __thumb2__
383         ldr     r11,[sp,#128+4]
384 #else
385         ldrd    r10,r11,[sp,#128]               @ A[3][1]
386 #endif
387
388         eor     r2,r2,r12
389 #ifndef __thumb2__
390         ldr     r4,[sp,#80]             @ A[2][0]
391 #endif
392         eor     r3,r3,r14
393 #ifndef __thumb2__
394         ldr     r5,[sp,#80+4]
395 #else
396         ldrd    r4,r5,[sp,#80]          @ A[2][0]
397 #endif
398         @ mov   r2,r2,ror#32-10         @ C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
399         @ mov   r3,r3,ror#32-10
400
401         eor     r6,r6,r4
402 #ifndef __thumb2__
403         ldr     r12,[sp,#216]           @ D[2]
404 #endif
405         eor     r7,r7,r5
406 #ifndef __thumb2__
407         ldr     r14,[sp,#216+4]
408 #else
409         ldrd    r12,r14,[sp,#216]               @ D[2]
410 #endif
411         mov     r5,r6,ror#32-1          @ C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]);
412         mov     r4,r7,ror#32-2
413
414         eor     r10,r10,r8
415 #ifndef __thumb2__
416         ldr     r8,[sp,#176]            @ A[4][2]
417 #endif
418         eor     r11,r11,r9
419 #ifndef __thumb2__
420         ldr     r9,[sp,#176+4]
421 #else
422         ldrd    r8,r9,[sp,#176]         @ A[4][2]
423 #endif
424         mov     r7,r10,ror#32-22                @ C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]);
425         mov     r6,r11,ror#32-23
426
427         bic     r10,r4,r2,ror#32-10
428         bic     r11,r5,r3,ror#32-10
429         eor     r12,r12,r8
430         eor     r14,r14,r9
431         mov     r9,r12,ror#32-30                @ C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]);
432         mov     r8,r14,ror#32-31
433         eor     r10,r10,r0,ror#32-14
434         eor     r11,r11,r1,ror#32-14
435 #ifndef __thumb2__
436         str     r10,[sp,#280]           @ R[1][0] = C[0] ^ (~C[1] & C[2])
437 #endif
438         bic     r12,r6,r4
439 #ifndef __thumb2__
440         str     r11,[sp,#280+4]
441 #else
442         strd    r10,r11,[sp,#280]               @ R[1][0] = C[0] ^ (~C[1] & C[2])
443 #endif
444         bic     r14,r7,r5
445         eor     r12,r12,r2,ror#32-10
446 #ifndef __thumb2__
447         str     r12,[sp,#288]           @ R[1][1] = C[1] ^ (~C[2] & C[3]);
448 #endif
449         eor     r14,r14,r3,ror#32-10
450 #ifndef __thumb2__
451         str     r14,[sp,#288+4]
452 #else
453         strd    r12,r14,[sp,#288]               @ R[1][1] = C[1] ^ (~C[2] & C[3]);
454 #endif
455         bic     r10,r8,r6
456         bic     r11,r9,r7
457         bic     r12,r0,r8,ror#14
458         bic     r14,r1,r9,ror#14
459         eor     r10,r10,r4
460         eor     r11,r11,r5
461 #ifndef __thumb2__
462         str     r10,[sp,#296]           @ R[1][2] = C[2] ^ (~C[3] & C[4]);
463 #endif
464         bic     r2,r2,r0,ror#32-(14-10)
465 #ifndef __thumb2__
466         str     r11,[sp,#296+4]
467 #else
468         strd    r10,r11,[sp,#296]               @ R[1][2] = C[2] ^ (~C[3] & C[4]);
469 #endif
470         eor     r12,r6,r12,ror#32-14
471         bic     r11,r3,r1,ror#32-(14-10)
472 #ifndef __thumb2__
473         str     r12,[sp,#304]           @ R[1][3] = C[3] ^ (~C[4] & C[0]);
474 #endif
475         eor     r14,r7,r14,ror#32-14
476 #ifndef __thumb2__
477         str     r14,[sp,#304+4]
478 #else
479         strd    r12,r14,[sp,#304]               @ R[1][3] = C[3] ^ (~C[4] & C[0]);
480 #endif
481         add     r12,sp,#208
482 #ifndef __thumb2__
483         ldr     r1,[sp,#8]              @ A[0][1]
484 #endif
485         eor     r10,r8,r2,ror#32-10
486 #ifndef __thumb2__
487         ldr     r0,[sp,#8+4]
488 #else
489         ldrd    r1,r0,[sp,#8]           @ A[0][1]
490 #endif
491         eor     r11,r9,r11,ror#32-10
492 #ifndef __thumb2__
493         str     r10,[sp,#312]           @ R[1][4] = C[4] ^ (~C[0] & C[1]);
494 #endif
495 #ifndef __thumb2__
496         str     r11,[sp,#312+4]
497 #else
498         strd    r10,r11,[sp,#312]               @ R[1][4] = C[4] ^ (~C[0] & C[1]);
499 #endif
500
501         add     r9,sp,#224
502         ldmia   r12,{r10,r11,r12,r14}   @ D[1..2]
503 #ifndef __thumb2__
504         ldr     r2,[sp,#56]             @ A[1][2]
505 #endif
506 #ifndef __thumb2__
507         ldr     r3,[sp,#56+4]
508 #else
509         ldrd    r2,r3,[sp,#56]          @ A[1][2]
510 #endif
511         ldmia   r9,{r6,r7,r8,r9}                @ D[3..4]
512
513         eor     r1,r1,r10
514 #ifndef __thumb2__
515         ldr     r4,[sp,#104]            @ A[2][3]
516 #endif
517         eor     r0,r0,r11
518 #ifndef __thumb2__
519         ldr     r5,[sp,#104+4]
520 #else
521         ldrd    r4,r5,[sp,#104]         @ A[2][3]
522 #endif
523         mov     r0,r0,ror#32-1          @ C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]);
524
525         eor     r2,r2,r12
526 #ifndef __thumb2__
527         ldr     r10,[sp,#152]           @ A[3][4]
528 #endif
529         eor     r3,r3,r14
530 #ifndef __thumb2__
531         ldr     r11,[sp,#152+4]
532 #else
533         ldrd    r10,r11,[sp,#152]               @ A[3][4]
534 #endif
535         @ mov   r2,r2,ror#32-3          @ C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]);
536 #ifndef __thumb2__
537         ldr     r12,[sp,#200]           @ D[0]
538 #endif
539         @ mov   r3,r3,ror#32-3
540 #ifndef __thumb2__
541         ldr     r14,[sp,#200+4]
542 #else
543         ldrd    r12,r14,[sp,#200]               @ D[0]
544 #endif
545
546         eor     r4,r4,r6
547         eor     r5,r5,r7
548         @ mov   r5,r6,ror#32-12         @ C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
549         @ mov   r4,r7,ror#32-13         @ [track reverse order below]
550
551         eor     r10,r10,r8
552 #ifndef __thumb2__
553         ldr     r8,[sp,#160]            @ A[4][0]
554 #endif
555         eor     r11,r11,r9
556 #ifndef __thumb2__
557         ldr     r9,[sp,#160+4]
558 #else
559         ldrd    r8,r9,[sp,#160]         @ A[4][0]
560 #endif
561         mov     r6,r10,ror#32-4         @ C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
562         mov     r7,r11,ror#32-4
563
564         eor     r12,r12,r8
565         eor     r14,r14,r9
566         mov     r8,r12,ror#32-9         @ C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
567         mov     r9,r14,ror#32-9
568
569         bic     r10,r5,r2,ror#13-3
570         bic     r11,r4,r3,ror#12-3
571         bic     r12,r6,r5,ror#32-13
572         bic     r14,r7,r4,ror#32-12
573         eor     r10,r0,r10,ror#32-13
574         eor     r11,r1,r11,ror#32-12
575 #ifndef __thumb2__
576         str     r10,[sp,#320]           @ R[2][0] = C[0] ^ (~C[1] & C[2])
577 #endif
578         eor     r12,r12,r2,ror#32-3
579 #ifndef __thumb2__
580         str     r11,[sp,#320+4]
581 #else
582         strd    r10,r11,[sp,#320]               @ R[2][0] = C[0] ^ (~C[1] & C[2])
583 #endif
584         eor     r14,r14,r3,ror#32-3
585 #ifndef __thumb2__
586         str     r12,[sp,#328]           @ R[2][1] = C[1] ^ (~C[2] & C[3]);
587 #endif
588         bic     r10,r8,r6
589         bic     r11,r9,r7
590 #ifndef __thumb2__
591         str     r14,[sp,#328+4]
592 #else
593         strd    r12,r14,[sp,#328]               @ R[2][1] = C[1] ^ (~C[2] & C[3]);
594 #endif
595         eor     r10,r10,r5,ror#32-13
596         eor     r11,r11,r4,ror#32-12
597 #ifndef __thumb2__
598         str     r10,[sp,#336]           @ R[2][2] = C[2] ^ (~C[3] & C[4]);
599 #endif
600         bic     r12,r0,r8
601 #ifndef __thumb2__
602         str     r11,[sp,#336+4]
603 #else
604         strd    r10,r11,[sp,#336]               @ R[2][2] = C[2] ^ (~C[3] & C[4]);
605 #endif
606         bic     r14,r1,r9
607         eor     r12,r12,r6
608         eor     r14,r14,r7
609 #ifndef __thumb2__
610         str     r12,[sp,#344]           @ R[2][3] = C[3] ^ (~C[4] & C[0]);
611 #endif
612         bic     r10,r2,r0,ror#3
613 #ifndef __thumb2__
614         str     r14,[sp,#344+4]
615 #else
616         strd    r12,r14,[sp,#344]               @ R[2][3] = C[3] ^ (~C[4] & C[0]);
617 #endif
618         bic     r11,r3,r1,ror#3
619 #ifndef __thumb2__
620         ldr     r1,[sp,#32]             @ A[0][4] [in reverse order]
621 #endif
622         eor     r10,r8,r10,ror#32-3
623 #ifndef __thumb2__
624         ldr     r0,[sp,#32+4]
625 #else
626         ldrd    r1,r0,[sp,#32]          @ A[0][4] [in reverse order]
627 #endif
628         eor     r11,r9,r11,ror#32-3
629 #ifndef __thumb2__
630         str     r10,[sp,#352]           @ R[2][4] = C[4] ^ (~C[0] & C[1]);
631 #endif
632         add     r9,sp,#208
633 #ifndef __thumb2__
634         str     r11,[sp,#352+4]
635 #else
636         strd    r10,r11,[sp,#352]               @ R[2][4] = C[4] ^ (~C[0] & C[1]);
637 #endif
638
639 #ifndef __thumb2__
640         ldr     r10,[sp,#232]           @ D[4]
641 #endif
642 #ifndef __thumb2__
643         ldr     r11,[sp,#232+4]
644 #else
645         ldrd    r10,r11,[sp,#232]               @ D[4]
646 #endif
647 #ifndef __thumb2__
648         ldr     r12,[sp,#200]           @ D[0]
649 #endif
650 #ifndef __thumb2__
651         ldr     r14,[sp,#200+4]
652 #else
653         ldrd    r12,r14,[sp,#200]               @ D[0]
654 #endif
655
656         ldmia   r9,{r6,r7,r8,r9}                @ D[1..2]
657
658         eor     r1,r1,r10
659 #ifndef __thumb2__
660         ldr     r2,[sp,#40]             @ A[1][0]
661 #endif
662         eor     r0,r0,r11
663 #ifndef __thumb2__
664         ldr     r3,[sp,#40+4]
665 #else
666         ldrd    r2,r3,[sp,#40]          @ A[1][0]
667 #endif
668         @ mov   r1,r10,ror#32-13                @ C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]);
669 #ifndef __thumb2__
670         ldr     r4,[sp,#88]             @ A[2][1]
671 #endif
672         @ mov   r0,r11,ror#32-14                @ [was loaded in reverse order]
673 #ifndef __thumb2__
674         ldr     r5,[sp,#88+4]
675 #else
676         ldrd    r4,r5,[sp,#88]          @ A[2][1]
677 #endif
678
679         eor     r2,r2,r12
680 #ifndef __thumb2__
681         ldr     r10,[sp,#136]           @ A[3][2]
682 #endif
683         eor     r3,r3,r14
684 #ifndef __thumb2__
685         ldr     r11,[sp,#136+4]
686 #else
687         ldrd    r10,r11,[sp,#136]               @ A[3][2]
688 #endif
689         @ mov   r2,r2,ror#32-18         @ C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]);
690 #ifndef __thumb2__
691         ldr     r12,[sp,#224]           @ D[3]
692 #endif
693         @ mov   r3,r3,ror#32-18
694 #ifndef __thumb2__
695         ldr     r14,[sp,#224+4]
696 #else
697         ldrd    r12,r14,[sp,#224]               @ D[3]
698 #endif
699
700         eor     r6,r6,r4
701         eor     r7,r7,r5
702         mov     r4,r6,ror#32-5          @ C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]);
703         mov     r5,r7,ror#32-5
704
705         eor     r10,r10,r8
706 #ifndef __thumb2__
707         ldr     r8,[sp,#184]            @ A[4][3]
708 #endif
709         eor     r11,r11,r9
710 #ifndef __thumb2__
711         ldr     r9,[sp,#184+4]
712 #else
713         ldrd    r8,r9,[sp,#184]         @ A[4][3]
714 #endif
715         mov     r7,r10,ror#32-7         @ C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
716         mov     r6,r11,ror#32-8
717
718         eor     r12,r12,r8
719         eor     r14,r14,r9
720         mov     r8,r12,ror#32-28                @ C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
721         mov     r9,r14,ror#32-28
722
723         bic     r10,r4,r2,ror#32-18
724         bic     r11,r5,r3,ror#32-18
725         eor     r10,r10,r0,ror#32-14
726         eor     r11,r11,r1,ror#32-13
727 #ifndef __thumb2__
728         str     r10,[sp,#360]           @ R[3][0] = C[0] ^ (~C[1] & C[2])
729 #endif
730         bic     r12,r6,r4
731 #ifndef __thumb2__
732         str     r11,[sp,#360+4]
733 #else
734         strd    r10,r11,[sp,#360]               @ R[3][0] = C[0] ^ (~C[1] & C[2])
735 #endif
736         bic     r14,r7,r5
737         eor     r12,r12,r2,ror#32-18
738 #ifndef __thumb2__
739         str     r12,[sp,#368]           @ R[3][1] = C[1] ^ (~C[2] & C[3]);
740 #endif
741         eor     r14,r14,r3,ror#32-18
742 #ifndef __thumb2__
743         str     r14,[sp,#368+4]
744 #else
745         strd    r12,r14,[sp,#368]               @ R[3][1] = C[1] ^ (~C[2] & C[3]);
746 #endif
747         bic     r10,r8,r6
748         bic     r11,r9,r7
749         bic     r12,r0,r8,ror#14
750         bic     r14,r1,r9,ror#13
751         eor     r10,r10,r4
752         eor     r11,r11,r5
753 #ifndef __thumb2__
754         str     r10,[sp,#376]           @ R[3][2] = C[2] ^ (~C[3] & C[4]);
755 #endif
756         bic     r2,r2,r0,ror#18-14
757 #ifndef __thumb2__
758         str     r11,[sp,#376+4]
759 #else
760         strd    r10,r11,[sp,#376]               @ R[3][2] = C[2] ^ (~C[3] & C[4]);
761 #endif
762         eor     r12,r6,r12,ror#32-14
763         bic     r11,r3,r1,ror#18-13
764         eor     r14,r7,r14,ror#32-13
765 #ifndef __thumb2__
766         str     r12,[sp,#384]           @ R[3][3] = C[3] ^ (~C[4] & C[0]);
767 #endif
768 #ifndef __thumb2__
769         str     r14,[sp,#384+4]
770 #else
771         strd    r12,r14,[sp,#384]               @ R[3][3] = C[3] ^ (~C[4] & C[0]);
772 #endif
773         add     r14,sp,#216
774 #ifndef __thumb2__
775         ldr     r0,[sp,#16]             @ A[0][2]
776 #endif
777         eor     r10,r8,r2,ror#32-18
778 #ifndef __thumb2__
779         ldr     r1,[sp,#16+4]
780 #else
781         ldrd    r0,r1,[sp,#16]          @ A[0][2]
782 #endif
783         eor     r11,r9,r11,ror#32-18
784 #ifndef __thumb2__
785         str     r10,[sp,#392]           @ R[3][4] = C[4] ^ (~C[0] & C[1]);
786 #endif
787 #ifndef __thumb2__
788         str     r11,[sp,#392+4]
789 #else
790         strd    r10,r11,[sp,#392]               @ R[3][4] = C[4] ^ (~C[0] & C[1]);
791 #endif
792
793         ldmia   r14,{r10,r11,r12,r14}   @ D[2..3]
794 #ifndef __thumb2__
795         ldr     r2,[sp,#64]             @ A[1][3]
796 #endif
797 #ifndef __thumb2__
798         ldr     r3,[sp,#64+4]
799 #else
800         ldrd    r2,r3,[sp,#64]          @ A[1][3]
801 #endif
802 #ifndef __thumb2__
803         ldr     r6,[sp,#232]            @ D[4]
804 #endif
805 #ifndef __thumb2__
806         ldr     r7,[sp,#232+4]
807 #else
808         ldrd    r6,r7,[sp,#232]         @ D[4]
809 #endif
810
811         eor     r0,r0,r10
812 #ifndef __thumb2__
813         ldr     r4,[sp,#112]            @ A[2][4]
814 #endif
815         eor     r1,r1,r11
816 #ifndef __thumb2__
817         ldr     r5,[sp,#112+4]
818 #else
819         ldrd    r4,r5,[sp,#112]         @ A[2][4]
820 #endif
821         @ mov   r0,r0,ror#32-31         @ C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]);
822 #ifndef __thumb2__
823         ldr     r8,[sp,#200]            @ D[0]
824 #endif
825         @ mov   r1,r1,ror#32-31
826 #ifndef __thumb2__
827         ldr     r9,[sp,#200+4]
828 #else
829         ldrd    r8,r9,[sp,#200]         @ D[0]
830 #endif
831
832         eor     r12,r12,r2
833 #ifndef __thumb2__
834         ldr     r10,[sp,#120]           @ A[3][0]
835 #endif
836         eor     r14,r14,r3
837 #ifndef __thumb2__
838         ldr     r11,[sp,#120+4]
839 #else
840         ldrd    r10,r11,[sp,#120]               @ A[3][0]
841 #endif
842         mov     r3,r12,ror#32-27                @ C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]);
843 #ifndef __thumb2__
844         ldr     r12,[sp,#208]           @ D[1]
845 #endif
846         mov     r2,r14,ror#32-28
847 #ifndef __thumb2__
848         ldr     r14,[sp,#208+4]
849 #else
850         ldrd    r12,r14,[sp,#208]               @ D[1]
851 #endif
852
853         eor     r6,r6,r4
854         eor     r7,r7,r5
855         mov     r5,r6,ror#32-19         @ C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]);
856         mov     r4,r7,ror#32-20
857
858         eor     r10,r10,r8
859 #ifndef __thumb2__
860         ldr     r8,[sp,#168]            @ A[4][1]
861 #endif
862         eor     r11,r11,r9
863 #ifndef __thumb2__
864         ldr     r9,[sp,#168+4]
865 #else
866         ldrd    r8,r9,[sp,#168]         @ A[4][1]
867 #endif
868         mov     r7,r10,ror#32-20                @ C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]);
869         mov     r6,r11,ror#32-21
870
871         eor     r8,r8,r12
872         eor     r9,r9,r14
873         @ mov   r8,r2,ror#32-1          @ C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
874         @ mov   r9,r3,ror#32-1
875
876         bic     r10,r4,r2
877         bic     r11,r5,r3
878         eor     r10,r10,r0,ror#32-31
879 #ifndef __thumb2__
880         str     r10,[sp,#400]           @ R[4][0] = C[0] ^ (~C[1] & C[2])
881 #endif
882         eor     r11,r11,r1,ror#32-31
883 #ifndef __thumb2__
884         str     r11,[sp,#400+4]
885 #else
886         strd    r10,r11,[sp,#400]               @ R[4][0] = C[0] ^ (~C[1] & C[2])
887 #endif
888         bic     r12,r6,r4
889         bic     r14,r7,r5
890         eor     r12,r12,r2
891         eor     r14,r14,r3
892 #ifndef __thumb2__
893         str     r12,[sp,#408]           @ R[4][1] = C[1] ^ (~C[2] & C[3]);
894 #endif
895         bic     r10,r8,r6,ror#1
896 #ifndef __thumb2__
897         str     r14,[sp,#408+4]
898 #else
899         strd    r12,r14,[sp,#408]               @ R[4][1] = C[1] ^ (~C[2] & C[3]);
900 #endif
901         bic     r11,r9,r7,ror#1
902         bic     r12,r0,r8,ror#31-1
903         bic     r14,r1,r9,ror#31-1
904         eor     r4,r4,r10,ror#32-1
905 #ifndef __thumb2__
906         str     r4,[sp,#416]            @ R[4][2] = C[2] ^= (~C[3] & C[4]);
907 #endif
908         eor     r5,r5,r11,ror#32-1
909 #ifndef __thumb2__
910         str     r5,[sp,#416+4]
911 #else
912         strd    r4,r5,[sp,#416]         @ R[4][2] = C[2] ^= (~C[3] & C[4]);
913 #endif
914         eor     r6,r6,r12,ror#32-31
915         eor     r7,r7,r14,ror#32-31
916 #ifndef __thumb2__
917         str     r6,[sp,#424]            @ R[4][3] = C[3] ^= (~C[4] & C[0]);
918 #endif
919         bic     r10,r2,r0,ror#32-31
920 #ifndef __thumb2__
921         str     r7,[sp,#424+4]
922 #else
923         strd    r6,r7,[sp,#424]         @ R[4][3] = C[3] ^= (~C[4] & C[0]);
924 #endif
925         bic     r11,r3,r1,ror#32-31
926         add     r12,sp,#240
927         eor     r8,r10,r8,ror#32-1
928         add     r10,sp,#280
929         eor     r9,r11,r9,ror#32-1
930 #ifndef __thumb2__
931         str     r8,[sp,#432]            @ R[4][4] = C[4] ^= (~C[0] & C[1]);
932 #endif
933 #ifndef __thumb2__
934         str     r9,[sp,#432+4]
935 #else
936         strd    r8,r9,[sp,#432]         @ R[4][4] = C[4] ^= (~C[0] & C[1]);
937 #endif
938         ldmia   r12,{r0,r1,r2,r3}               @ A[0][0..1]
939         ldmia   r10,{r10,r11,r12,r14}   @ A[1][0..1]
940 #ifdef  __thumb2__
941         eor     r0,r0,r10
942         eor     r1,r1,r11
943         eor     r2,r2,r12
944         ldrd    r10,r11,[sp,#296]
945         eor     r3,r3,r14
946         ldrd    r12,r14,[sp,#304]
947         eor     r4,r4,r10
948         eor     r5,r5,r11
949         eor     r6,r6,r12
950         ldrd    r10,r11,[sp,#312]
951         eor     r7,r7,r14
952         ldrd    r12,r14,[sp,#320]
953         eor     r8,r8,r10
954         eor     r9,r9,r11
955         eor     r0,r0,r12
956         ldrd    r10,r11,[sp,#328]
957         eor     r1,r1,r14
958         ldrd    r12,r14,[sp,#336]
959         eor     r2,r2,r10
960         eor     r3,r3,r11
961         eor     r4,r4,r12
962         ldrd    r10,r11,[sp,#344]
963         eor     r5,r5,r14
964         ldrd    r12,r14,[sp,#352]
965         eor     r6,r6,r10
966         eor     r7,r7,r11
967         eor     r8,r8,r12
968         ldrd    r10,r11,[sp,#360]
969         eor     r9,r9,r14
970         ldrd    r12,r14,[sp,#368]
971         eor     r0,r0,r10
972         eor     r1,r1,r11
973         eor     r2,r2,r12
974         ldrd    r10,r11,[sp,#376]
975         eor     r3,r3,r14
976         ldrd    r12,r14,[sp,#384]
977         eor     r4,r4,r10
978         eor     r5,r5,r11
979         eor     r6,r6,r12
980         ldrd    r10,r11,[sp,#392]
981         eor     r7,r7,r14
982         ldrd    r12,r14,[sp,#400]
983         eor     r8,r8,r10
984         eor     r9,r9,r11
985         eor     r0,r0,r12
986         ldrd    r10,r11,[sp,#408]
987         eor     r1,r1,r14
988         ldrd    r12,r14,[sp,#256]
989         eor     r2,r2,r10
990         eor     r3,r3,r11
991         eor     r4,r4,r12
992         ldrd    r10,r11,[sp,#264]
993         eor     r5,r5,r14
994         ldrd    r12,r14,[sp,#272]
995 #else
996         eor     r0,r0,r10
997         add     r10,sp,#296
998         eor     r1,r1,r11
999         eor     r2,r2,r12
1000         eor     r3,r3,r14
1001         ldmia   r10,{r10,r11,r12,r14}   @ A[1][2..3]
1002         eor     r4,r4,r10
1003         add     r10,sp,#312
1004         eor     r5,r5,r11
1005         eor     r6,r6,r12
1006         eor     r7,r7,r14
1007         ldmia   r10,{r10,r11,r12,r14}   @ A[1][4]..A[2][0]
1008         eor     r8,r8,r10
1009         add     r10,sp,#328
1010         eor     r9,r9,r11
1011         eor     r0,r0,r12
1012         eor     r1,r1,r14
1013         ldmia   r10,{r10,r11,r12,r14}   @ A[2][1..2]
1014         eor     r2,r2,r10
1015         add     r10,sp,#344
1016         eor     r3,r3,r11
1017         eor     r4,r4,r12
1018         eor     r5,r5,r14
1019         ldmia   r10,{r10,r11,r12,r14}   @ A[2][3..4]
1020         eor     r6,r6,r10
1021         add     r10,sp,#360
1022         eor     r7,r7,r11
1023         eor     r8,r8,r12
1024         eor     r9,r9,r14
1025         ldmia   r10,{r10,r11,r12,r14}   @ A[3][0..1]
1026         eor     r0,r0,r10
1027         add     r10,sp,#376
1028         eor     r1,r1,r11
1029         eor     r2,r2,r12
1030         eor     r3,r3,r14
1031         ldmia   r10,{r10,r11,r12,r14}   @ A[3][2..3]
1032         eor     r4,r4,r10
1033         add     r10,sp,#392
1034         eor     r5,r5,r11
1035         eor     r6,r6,r12
1036         eor     r7,r7,r14
1037         ldmia   r10,{r10,r11,r12,r14}   @ A[3][4]..A[4][0]
1038         eor     r8,r8,r10
1039         ldr     r10,[sp,#408]           @ A[4][1]
1040         eor     r9,r9,r11
1041         ldr     r11,[sp,#408+4]
1042         eor     r0,r0,r12
1043         ldr     r12,[sp,#256]           @ A[0][2]
1044         eor     r1,r1,r14
1045         ldr     r14,[sp,#256+4]
1046         eor     r2,r2,r10
1047         add     r10,sp,#264
1048         eor     r3,r3,r11
1049         eor     r4,r4,r12
1050         eor     r5,r5,r14
1051         ldmia   r10,{r10,r11,r12,r14}   @ A[0][3..4]
1052 #endif
1053         eor     r6,r6,r10
1054         eor     r7,r7,r11
1055         eor     r8,r8,r12
1056         eor     r9,r9,r14
1057
1058         eor     r10,r0,r5,ror#32-1      @ E[0] = ROL64(C[2], 1) ^ C[0];
1059 #ifndef __thumb2__
1060         str     r10,[sp,#208]           @ D[1] = E[0]
1061 #endif
1062         eor     r11,r1,r4
1063 #ifndef __thumb2__
1064         str     r11,[sp,#208+4]
1065 #else
1066         strd    r10,r11,[sp,#208]               @ D[1] = E[0]
1067 #endif
1068         eor     r12,r6,r1,ror#32-1      @ E[1] = ROL64(C[0], 1) ^ C[3];
1069         eor     r14,r7,r0
1070 #ifndef __thumb2__
1071         str     r12,[sp,#232]           @ D[4] = E[1]
1072 #endif
1073         eor     r0,r8,r3,ror#32-1       @ C[0] = ROL64(C[1], 1) ^ C[4];
1074 #ifndef __thumb2__
1075         str     r14,[sp,#232+4]
1076 #else
1077         strd    r12,r14,[sp,#232]               @ D[4] = E[1]
1078 #endif
1079         eor     r1,r9,r2
1080 #ifndef __thumb2__
1081         str     r0,[sp,#200]            @ D[0] = C[0]
1082 #endif
1083         eor     r2,r2,r7,ror#32-1       @ C[1] = ROL64(C[3], 1) ^ C[1];
1084 #ifndef __thumb2__
1085         ldr     r7,[sp,#384]
1086 #endif
1087         eor     r3,r3,r6
1088 #ifndef __thumb2__
1089         str     r1,[sp,#200+4]
1090 #else
1091         strd    r0,r1,[sp,#200]         @ D[0] = C[0]
1092 #endif
1093 #ifndef __thumb2__
1094         ldr     r6,[sp,#384+4]
1095 #else
1096         ldrd    r7,r6,[sp,#384]
1097 #endif
1098 #ifndef __thumb2__
1099         str     r2,[sp,#216]            @ D[2] = C[1]
1100 #endif
1101         eor     r4,r4,r9,ror#32-1       @ C[2] = ROL64(C[4], 1) ^ C[2];
1102 #ifndef __thumb2__
1103         str     r3,[sp,#216+4]
1104 #else
1105         strd    r2,r3,[sp,#216]         @ D[2] = C[1]
1106 #endif
1107         eor     r5,r5,r8
1108
1109 #ifndef __thumb2__
1110         ldr     r8,[sp,#432]
1111 #endif
1112 #ifndef __thumb2__
1113         ldr     r9,[sp,#432+4]
1114 #else
1115         ldrd    r8,r9,[sp,#432]
1116 #endif
1117 #ifndef __thumb2__
1118         str     r4,[sp,#224]            @ D[3] = C[2]
1119 #endif
1120         eor     r7,r7,r4
1121 #ifndef __thumb2__
1122         str     r5,[sp,#224+4]
1123 #else
1124         strd    r4,r5,[sp,#224]         @ D[3] = C[2]
1125 #endif
1126         eor     r6,r6,r5
1127 #ifndef __thumb2__
1128         ldr     r4,[sp,#240]
1129 #endif
1130         @ mov   r7,r7,ror#32-10         @ C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]);   /* D[3] */
1131         @ mov   r6,r6,ror#32-11
1132 #ifndef __thumb2__
1133         ldr     r5,[sp,#240+4]
1134 #else
1135         ldrd    r4,r5,[sp,#240]
1136 #endif
1137         eor     r8,r8,r12
1138         eor     r9,r9,r14
1139 #ifndef __thumb2__
1140         ldr     r12,[sp,#336]
1141 #endif
1142         eor     r0,r0,r4
1143 #ifndef __thumb2__
1144         ldr     r14,[sp,#336+4]
1145 #else
1146         ldrd    r12,r14,[sp,#336]
1147 #endif
1148         @ mov   r8,r8,ror#32-7          @ C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]);   /* D[4] */
1149         @ mov   r9,r9,ror#32-7
1150         eor     r1,r1,r5                @ C[0] =       A[0][0] ^ C[0];
1151         eor     r12,r12,r2
1152 #ifndef __thumb2__
1153         ldr     r2,[sp,#288]
1154 #endif
1155         eor     r14,r14,r3
1156 #ifndef __thumb2__
1157         ldr     r3,[sp,#288+4]
1158 #else
1159         ldrd    r2,r3,[sp,#288]
1160 #endif
1161         mov     r5,r12,ror#32-21                @ C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]);
1162         ldr     r12,[sp,#444]                   @ load counter
1163         eor     r2,r2,r10
1164         adr     r10,iotas32
1165         mov     r4,r14,ror#32-22
1166         add     r14,r10,r12
1167         eor     r3,r3,r11
1168 #ifndef __thumb2__
1169         ldr     r10,[r14,#8]            @ iotas[i].lo
1170 #endif
1171         add     r12,r12,#16
1172 #ifndef __thumb2__
1173         ldr     r11,[r14,#12]           @ iotas[i].hi
1174 #else
1175         ldrd    r10,r11,[r14,#8]                @ iotas[i].lo
1176 #endif
1177         cmp     r12,#192
1178         str     r12,[sp,#444]                   @ store counter
1179         bic     r12,r4,r2,ror#32-22
1180         bic     r14,r5,r3,ror#32-22
1181         mov     r2,r2,ror#32-22         @ C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]);
1182         mov     r3,r3,ror#32-22
1183         eor     r12,r12,r0
1184         eor     r14,r14,r1
1185         eor     r10,r10,r12
1186         eor     r11,r11,r14
1187 #ifndef __thumb2__
1188         str     r10,[sp,#0]             @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
1189 #endif
1190         bic     r12,r6,r4,ror#11
1191 #ifndef __thumb2__
1192         str     r11,[sp,#0+4]
1193 #else
1194         strd    r10,r11,[sp,#0]         @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
1195 #endif
1196         bic     r14,r7,r5,ror#10
1197         bic     r10,r8,r6,ror#32-(11-7)
1198         bic     r11,r9,r7,ror#32-(10-7)
1199         eor     r12,r2,r12,ror#32-11
1200 #ifndef __thumb2__
1201         str     r12,[sp,#8]             @ R[0][1] = C[1] ^ (~C[2] & C[3]);
1202 #endif
1203         eor     r14,r3,r14,ror#32-10
1204 #ifndef __thumb2__
1205         str     r14,[sp,#8+4]
1206 #else
1207         strd    r12,r14,[sp,#8]         @ R[0][1] = C[1] ^ (~C[2] & C[3]);
1208 #endif
1209         eor     r10,r4,r10,ror#32-7
1210         eor     r11,r5,r11,ror#32-7
1211 #ifndef __thumb2__
1212         str     r10,[sp,#16]            @ R[0][2] = C[2] ^ (~C[3] & C[4]);
1213 #endif
1214         bic     r12,r0,r8,ror#32-7
1215 #ifndef __thumb2__
1216         str     r11,[sp,#16+4]
1217 #else
1218         strd    r10,r11,[sp,#16]                @ R[0][2] = C[2] ^ (~C[3] & C[4]);
1219 #endif
1220         bic     r14,r1,r9,ror#32-7
1221         eor     r12,r12,r6,ror#32-11
1222 #ifndef __thumb2__
1223         str     r12,[sp,#24]            @ R[0][3] = C[3] ^ (~C[4] & C[0]);
1224 #endif
1225         eor     r14,r14,r7,ror#32-10
1226 #ifndef __thumb2__
1227         str     r14,[sp,#24+4]
1228 #else
1229         strd    r12,r14,[sp,#24]                @ R[0][3] = C[3] ^ (~C[4] & C[0]);
1230 #endif
1231         bic     r10,r2,r0
1232         add     r14,sp,#224
1233 #ifndef __thumb2__
1234         ldr     r0,[sp,#264]            @ A[0][3]
1235 #endif
1236         bic     r11,r3,r1
1237 #ifndef __thumb2__
1238         ldr     r1,[sp,#264+4]
1239 #else
1240         ldrd    r0,r1,[sp,#264]         @ A[0][3]
1241 #endif
1242         eor     r10,r10,r8,ror#32-7
1243         eor     r11,r11,r9,ror#32-7
1244 #ifndef __thumb2__
1245         str     r10,[sp,#32]            @ R[0][4] = C[4] ^ (~C[0] & C[1]);
1246 #endif
1247         add     r9,sp,#200
1248 #ifndef __thumb2__
1249         str     r11,[sp,#32+4]
1250 #else
1251         strd    r10,r11,[sp,#32]                @ R[0][4] = C[4] ^ (~C[0] & C[1]);
1252 #endif
1253
1254         ldmia   r14,{r10,r11,r12,r14}   @ D[3..4]
1255         ldmia   r9,{r6,r7,r8,r9}                @ D[0..1]
1256
1257 #ifndef __thumb2__
1258         ldr     r2,[sp,#312]            @ A[1][4]
1259 #endif
1260         eor     r0,r0,r10
1261 #ifndef __thumb2__
1262         ldr     r3,[sp,#312+4]
1263 #else
1264         ldrd    r2,r3,[sp,#312]         @ A[1][4]
1265 #endif
1266         eor     r1,r1,r11
1267         @ mov   r0,r0,ror#32-14         @ C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]);
1268 #ifndef __thumb2__
1269         ldr     r10,[sp,#368]           @ A[3][1]
1270 #endif
1271         @ mov   r1,r1,ror#32-14
1272 #ifndef __thumb2__
1273         ldr     r11,[sp,#368+4]
1274 #else
1275         ldrd    r10,r11,[sp,#368]               @ A[3][1]
1276 #endif
1277
1278         eor     r2,r2,r12
1279 #ifndef __thumb2__
1280         ldr     r4,[sp,#320]            @ A[2][0]
1281 #endif
1282         eor     r3,r3,r14
1283 #ifndef __thumb2__
1284         ldr     r5,[sp,#320+4]
1285 #else
1286         ldrd    r4,r5,[sp,#320]         @ A[2][0]
1287 #endif
1288         @ mov   r2,r2,ror#32-10         @ C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
1289         @ mov   r3,r3,ror#32-10
1290
1291         eor     r6,r6,r4
1292 #ifndef __thumb2__
1293         ldr     r12,[sp,#216]           @ D[2]
1294 #endif
1295         eor     r7,r7,r5
1296 #ifndef __thumb2__
1297         ldr     r14,[sp,#216+4]
1298 #else
1299         ldrd    r12,r14,[sp,#216]               @ D[2]
1300 #endif
1301         mov     r5,r6,ror#32-1          @ C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]);
1302         mov     r4,r7,ror#32-2
1303
1304         eor     r10,r10,r8
1305 #ifndef __thumb2__
1306         ldr     r8,[sp,#416]            @ A[4][2]
1307 #endif
1308         eor     r11,r11,r9
1309 #ifndef __thumb2__
1310         ldr     r9,[sp,#416+4]
1311 #else
1312         ldrd    r8,r9,[sp,#416]         @ A[4][2]
1313 #endif
1314         mov     r7,r10,ror#32-22                @ C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]);
1315         mov     r6,r11,ror#32-23
1316
1317         bic     r10,r4,r2,ror#32-10
1318         bic     r11,r5,r3,ror#32-10
1319         eor     r12,r12,r8
1320         eor     r14,r14,r9
1321         mov     r9,r12,ror#32-30                @ C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]);
1322         mov     r8,r14,ror#32-31
1323         eor     r10,r10,r0,ror#32-14
1324         eor     r11,r11,r1,ror#32-14
1325 #ifndef __thumb2__
1326         str     r10,[sp,#40]            @ R[1][0] = C[0] ^ (~C[1] & C[2])
1327 #endif
1328         bic     r12,r6,r4
1329 #ifndef __thumb2__
1330         str     r11,[sp,#40+4]
1331 #else
1332         strd    r10,r11,[sp,#40]                @ R[1][0] = C[0] ^ (~C[1] & C[2])
1333 #endif
1334         bic     r14,r7,r5
1335         eor     r12,r12,r2,ror#32-10
1336 #ifndef __thumb2__
1337         str     r12,[sp,#48]            @ R[1][1] = C[1] ^ (~C[2] & C[3]);
1338 #endif
1339         eor     r14,r14,r3,ror#32-10
1340 #ifndef __thumb2__
1341         str     r14,[sp,#48+4]
1342 #else
1343         strd    r12,r14,[sp,#48]                @ R[1][1] = C[1] ^ (~C[2] & C[3]);
1344 #endif
1345         bic     r10,r8,r6
1346         bic     r11,r9,r7
1347         bic     r12,r0,r8,ror#14
1348         bic     r14,r1,r9,ror#14
1349         eor     r10,r10,r4
1350         eor     r11,r11,r5
1351 #ifndef __thumb2__
1352         str     r10,[sp,#56]            @ R[1][2] = C[2] ^ (~C[3] & C[4]);
1353 #endif
1354         bic     r2,r2,r0,ror#32-(14-10)
1355 #ifndef __thumb2__
1356         str     r11,[sp,#56+4]
1357 #else
1358         strd    r10,r11,[sp,#56]                @ R[1][2] = C[2] ^ (~C[3] & C[4]);
1359 #endif
1360         eor     r12,r6,r12,ror#32-14
1361         bic     r11,r3,r1,ror#32-(14-10)
1362 #ifndef __thumb2__
1363         str     r12,[sp,#64]            @ R[1][3] = C[3] ^ (~C[4] & C[0]);
1364 #endif
1365         eor     r14,r7,r14,ror#32-14
1366 #ifndef __thumb2__
1367         str     r14,[sp,#64+4]
1368 #else
1369         strd    r12,r14,[sp,#64]                @ R[1][3] = C[3] ^ (~C[4] & C[0]);
1370 #endif
1371         add     r12,sp,#208
1372 #ifndef __thumb2__
1373         ldr     r1,[sp,#248]            @ A[0][1]
1374 #endif
1375         eor     r10,r8,r2,ror#32-10
1376 #ifndef __thumb2__
1377         ldr     r0,[sp,#248+4]
1378 #else
1379         ldrd    r1,r0,[sp,#248]         @ A[0][1]
1380 #endif
1381         eor     r11,r9,r11,ror#32-10
1382 #ifndef __thumb2__
1383         str     r10,[sp,#72]            @ R[1][4] = C[4] ^ (~C[0] & C[1]);
1384 #endif
1385 #ifndef __thumb2__
1386         str     r11,[sp,#72+4]
1387 #else
1388         strd    r10,r11,[sp,#72]                @ R[1][4] = C[4] ^ (~C[0] & C[1]);
1389 #endif
1390
1391         add     r9,sp,#224
1392         ldmia   r12,{r10,r11,r12,r14}   @ D[1..2]
1393 #ifndef __thumb2__
1394         ldr     r2,[sp,#296]            @ A[1][2]
1395 #endif
1396 #ifndef __thumb2__
1397         ldr     r3,[sp,#296+4]
1398 #else
1399         ldrd    r2,r3,[sp,#296]         @ A[1][2]
1400 #endif
1401         ldmia   r9,{r6,r7,r8,r9}                @ D[3..4]
1402
1403         eor     r1,r1,r10
1404 #ifndef __thumb2__
1405         ldr     r4,[sp,#344]            @ A[2][3]
1406 #endif
1407         eor     r0,r0,r11
1408 #ifndef __thumb2__
1409         ldr     r5,[sp,#344+4]
1410 #else
1411         ldrd    r4,r5,[sp,#344]         @ A[2][3]
1412 #endif
1413         mov     r0,r0,ror#32-1          @ C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]);
1414
1415         eor     r2,r2,r12
1416 #ifndef __thumb2__
1417         ldr     r10,[sp,#392]           @ A[3][4]
1418 #endif
1419         eor     r3,r3,r14
1420 #ifndef __thumb2__
1421         ldr     r11,[sp,#392+4]
1422 #else
1423         ldrd    r10,r11,[sp,#392]               @ A[3][4]
1424 #endif
1425         @ mov   r2,r2,ror#32-3          @ C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]);
1426 #ifndef __thumb2__
1427         ldr     r12,[sp,#200]           @ D[0]
1428 #endif
1429         @ mov   r3,r3,ror#32-3
1430 #ifndef __thumb2__
1431         ldr     r14,[sp,#200+4]
1432 #else
1433         ldrd    r12,r14,[sp,#200]               @ D[0]
1434 #endif
1435
1436         eor     r4,r4,r6
1437         eor     r5,r5,r7
1438         @ mov   r5,r6,ror#32-12         @ C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
1439         @ mov   r4,r7,ror#32-13         @ [track reverse order below]
1440
1441         eor     r10,r10,r8
1442 #ifndef __thumb2__
1443         ldr     r8,[sp,#400]            @ A[4][0]
1444 #endif
1445         eor     r11,r11,r9
1446 #ifndef __thumb2__
1447         ldr     r9,[sp,#400+4]
1448 #else
1449         ldrd    r8,r9,[sp,#400]         @ A[4][0]
1450 #endif
1451         mov     r6,r10,ror#32-4         @ C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
1452         mov     r7,r11,ror#32-4
1453
1454         eor     r12,r12,r8
1455         eor     r14,r14,r9
1456         mov     r8,r12,ror#32-9         @ C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
1457         mov     r9,r14,ror#32-9
1458
1459         bic     r10,r5,r2,ror#13-3
1460         bic     r11,r4,r3,ror#12-3
1461         bic     r12,r6,r5,ror#32-13
1462         bic     r14,r7,r4,ror#32-12
1463         eor     r10,r0,r10,ror#32-13
1464         eor     r11,r1,r11,ror#32-12
1465 #ifndef __thumb2__
1466         str     r10,[sp,#80]            @ R[2][0] = C[0] ^ (~C[1] & C[2])
1467 #endif
1468         eor     r12,r12,r2,ror#32-3
1469 #ifndef __thumb2__
1470         str     r11,[sp,#80+4]
1471 #else
1472         strd    r10,r11,[sp,#80]                @ R[2][0] = C[0] ^ (~C[1] & C[2])
1473 #endif
1474         eor     r14,r14,r3,ror#32-3
1475 #ifndef __thumb2__
1476         str     r12,[sp,#88]            @ R[2][1] = C[1] ^ (~C[2] & C[3]);
1477 #endif
1478         bic     r10,r8,r6
1479         bic     r11,r9,r7
1480 #ifndef __thumb2__
1481         str     r14,[sp,#88+4]
1482 #else
1483         strd    r12,r14,[sp,#88]                @ R[2][1] = C[1] ^ (~C[2] & C[3]);
1484 #endif
1485         eor     r10,r10,r5,ror#32-13
1486         eor     r11,r11,r4,ror#32-12
1487 #ifndef __thumb2__
1488         str     r10,[sp,#96]            @ R[2][2] = C[2] ^ (~C[3] & C[4]);
1489 #endif
1490         bic     r12,r0,r8
1491 #ifndef __thumb2__
1492         str     r11,[sp,#96+4]
1493 #else
1494         strd    r10,r11,[sp,#96]                @ R[2][2] = C[2] ^ (~C[3] & C[4]);
1495 #endif
1496         bic     r14,r1,r9
1497         eor     r12,r12,r6
1498         eor     r14,r14,r7
1499 #ifndef __thumb2__
1500         str     r12,[sp,#104]           @ R[2][3] = C[3] ^ (~C[4] & C[0]);
1501 #endif
1502         bic     r10,r2,r0,ror#3
1503 #ifndef __thumb2__
1504         str     r14,[sp,#104+4]
1505 #else
1506         strd    r12,r14,[sp,#104]               @ R[2][3] = C[3] ^ (~C[4] & C[0]);
1507 #endif
1508         bic     r11,r3,r1,ror#3
1509 #ifndef __thumb2__
1510         ldr     r1,[sp,#272]            @ A[0][4] [in reverse order]
1511 #endif
1512         eor     r10,r8,r10,ror#32-3
1513 #ifndef __thumb2__
1514         ldr     r0,[sp,#272+4]
1515 #else
1516         ldrd    r1,r0,[sp,#272]         @ A[0][4] [in reverse order]
1517 #endif
1518         eor     r11,r9,r11,ror#32-3
1519 #ifndef __thumb2__
1520         str     r10,[sp,#112]           @ R[2][4] = C[4] ^ (~C[0] & C[1]);
1521 #endif
1522         add     r9,sp,#208
1523 #ifndef __thumb2__
1524         str     r11,[sp,#112+4]
1525 #else
1526         strd    r10,r11,[sp,#112]               @ R[2][4] = C[4] ^ (~C[0] & C[1]);
1527 #endif
1528
1529 #ifndef __thumb2__
1530         ldr     r10,[sp,#232]           @ D[4]
1531 #endif
1532 #ifndef __thumb2__
1533         ldr     r11,[sp,#232+4]
1534 #else
1535         ldrd    r10,r11,[sp,#232]               @ D[4]
1536 #endif
1537 #ifndef __thumb2__
1538         ldr     r12,[sp,#200]           @ D[0]
1539 #endif
1540 #ifndef __thumb2__
1541         ldr     r14,[sp,#200+4]
1542 #else
1543         ldrd    r12,r14,[sp,#200]               @ D[0]
1544 #endif
1545
1546         ldmia   r9,{r6,r7,r8,r9}                @ D[1..2]
1547
1548         eor     r1,r1,r10
1549 #ifndef __thumb2__
1550         ldr     r2,[sp,#280]            @ A[1][0]
1551 #endif
1552         eor     r0,r0,r11
1553 #ifndef __thumb2__
1554         ldr     r3,[sp,#280+4]
1555 #else
1556         ldrd    r2,r3,[sp,#280]         @ A[1][0]
1557 #endif
1558         @ mov   r1,r10,ror#32-13                @ C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]);
1559 #ifndef __thumb2__
1560         ldr     r4,[sp,#328]            @ A[2][1]
1561 #endif
1562         @ mov   r0,r11,ror#32-14                @ [was loaded in reverse order]
1563 #ifndef __thumb2__
1564         ldr     r5,[sp,#328+4]
1565 #else
1566         ldrd    r4,r5,[sp,#328]         @ A[2][1]
1567 #endif
1568
1569         eor     r2,r2,r12
1570 #ifndef __thumb2__
1571         ldr     r10,[sp,#376]           @ A[3][2]
1572 #endif
1573         eor     r3,r3,r14
1574 #ifndef __thumb2__
1575         ldr     r11,[sp,#376+4]
1576 #else
1577         ldrd    r10,r11,[sp,#376]               @ A[3][2]
1578 #endif
1579         @ mov   r2,r2,ror#32-18         @ C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]);
1580 #ifndef __thumb2__
1581         ldr     r12,[sp,#224]           @ D[3]
1582 #endif
1583         @ mov   r3,r3,ror#32-18
1584 #ifndef __thumb2__
1585         ldr     r14,[sp,#224+4]
1586 #else
1587         ldrd    r12,r14,[sp,#224]               @ D[3]
1588 #endif
1589
1590         eor     r6,r6,r4
1591         eor     r7,r7,r5
1592         mov     r4,r6,ror#32-5          @ C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]);
1593         mov     r5,r7,ror#32-5
1594
1595         eor     r10,r10,r8
1596 #ifndef __thumb2__
1597         ldr     r8,[sp,#424]            @ A[4][3]
1598 #endif
1599         eor     r11,r11,r9
1600 #ifndef __thumb2__
1601         ldr     r9,[sp,#424+4]
1602 #else
1603         ldrd    r8,r9,[sp,#424]         @ A[4][3]
1604 #endif
1605         mov     r7,r10,ror#32-7         @ C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
1606         mov     r6,r11,ror#32-8
1607
1608         eor     r12,r12,r8
1609         eor     r14,r14,r9
1610         mov     r8,r12,ror#32-28                @ C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
1611         mov     r9,r14,ror#32-28
1612
1613         bic     r10,r4,r2,ror#32-18
1614         bic     r11,r5,r3,ror#32-18
1615         eor     r10,r10,r0,ror#32-14
1616         eor     r11,r11,r1,ror#32-13
1617 #ifndef __thumb2__
1618         str     r10,[sp,#120]           @ R[3][0] = C[0] ^ (~C[1] & C[2])
1619 #endif
1620         bic     r12,r6,r4
1621 #ifndef __thumb2__
1622         str     r11,[sp,#120+4]
1623 #else
1624         strd    r10,r11,[sp,#120]               @ R[3][0] = C[0] ^ (~C[1] & C[2])
1625 #endif
1626         bic     r14,r7,r5
1627         eor     r12,r12,r2,ror#32-18
1628 #ifndef __thumb2__
1629         str     r12,[sp,#128]           @ R[3][1] = C[1] ^ (~C[2] & C[3]);
1630 #endif
1631         eor     r14,r14,r3,ror#32-18
1632 #ifndef __thumb2__
1633         str     r14,[sp,#128+4]
1634 #else
1635         strd    r12,r14,[sp,#128]               @ R[3][1] = C[1] ^ (~C[2] & C[3]);
1636 #endif
1637         bic     r10,r8,r6
1638         bic     r11,r9,r7
1639         bic     r12,r0,r8,ror#14
1640         bic     r14,r1,r9,ror#13
1641         eor     r10,r10,r4
1642         eor     r11,r11,r5
1643 #ifndef __thumb2__
1644         str     r10,[sp,#136]           @ R[3][2] = C[2] ^ (~C[3] & C[4]);
1645 #endif
1646         bic     r2,r2,r0,ror#18-14
1647 #ifndef __thumb2__
1648         str     r11,[sp,#136+4]
1649 #else
1650         strd    r10,r11,[sp,#136]               @ R[3][2] = C[2] ^ (~C[3] & C[4]);
1651 #endif
1652         eor     r12,r6,r12,ror#32-14
1653         bic     r11,r3,r1,ror#18-13
1654         eor     r14,r7,r14,ror#32-13
1655 #ifndef __thumb2__
1656         str     r12,[sp,#144]           @ R[3][3] = C[3] ^ (~C[4] & C[0]);
1657 #endif
1658 #ifndef __thumb2__
1659         str     r14,[sp,#144+4]
1660 #else
1661         strd    r12,r14,[sp,#144]               @ R[3][3] = C[3] ^ (~C[4] & C[0]);
1662 #endif
1663         add     r14,sp,#216
1664 #ifndef __thumb2__
1665         ldr     r0,[sp,#256]            @ A[0][2]
1666 #endif
1667         eor     r10,r8,r2,ror#32-18
1668 #ifndef __thumb2__
1669         ldr     r1,[sp,#256+4]
1670 #else
1671         ldrd    r0,r1,[sp,#256]         @ A[0][2]
1672 #endif
1673         eor     r11,r9,r11,ror#32-18
1674 #ifndef __thumb2__
1675         str     r10,[sp,#152]           @ R[3][4] = C[4] ^ (~C[0] & C[1]);
1676 #endif
1677 #ifndef __thumb2__
1678         str     r11,[sp,#152+4]
1679 #else
1680         strd    r10,r11,[sp,#152]               @ R[3][4] = C[4] ^ (~C[0] & C[1]);
1681 #endif
1682
1683         ldmia   r14,{r10,r11,r12,r14}   @ D[2..3]
1684 #ifndef __thumb2__
1685         ldr     r2,[sp,#304]            @ A[1][3]
1686 #endif
1687 #ifndef __thumb2__
1688         ldr     r3,[sp,#304+4]
1689 #else
1690         ldrd    r2,r3,[sp,#304]         @ A[1][3]
1691 #endif
1692 #ifndef __thumb2__
1693         ldr     r6,[sp,#232]            @ D[4]
1694 #endif
1695 #ifndef __thumb2__
1696         ldr     r7,[sp,#232+4]
1697 #else
1698         ldrd    r6,r7,[sp,#232]         @ D[4]
1699 #endif
1700
1701         eor     r0,r0,r10
1702 #ifndef __thumb2__
1703         ldr     r4,[sp,#352]            @ A[2][4]
1704 #endif
1705         eor     r1,r1,r11
1706 #ifndef __thumb2__
1707         ldr     r5,[sp,#352+4]
1708 #else
1709         ldrd    r4,r5,[sp,#352]         @ A[2][4]
1710 #endif
1711         @ mov   r0,r0,ror#32-31         @ C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]);
1712 #ifndef __thumb2__
1713         ldr     r8,[sp,#200]            @ D[0]
1714 #endif
1715         @ mov   r1,r1,ror#32-31
1716 #ifndef __thumb2__
1717         ldr     r9,[sp,#200+4]
1718 #else
1719         ldrd    r8,r9,[sp,#200]         @ D[0]
1720 #endif
1721
1722         eor     r12,r12,r2
1723 #ifndef __thumb2__
1724         ldr     r10,[sp,#360]           @ A[3][0]
1725 #endif
1726         eor     r14,r14,r3
1727 #ifndef __thumb2__
1728         ldr     r11,[sp,#360+4]
1729 #else
1730         ldrd    r10,r11,[sp,#360]               @ A[3][0]
1731 #endif
1732         mov     r3,r12,ror#32-27                @ C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]);
1733 #ifndef __thumb2__
1734         ldr     r12,[sp,#208]           @ D[1]
1735 #endif
1736         mov     r2,r14,ror#32-28
1737 #ifndef __thumb2__
1738         ldr     r14,[sp,#208+4]
1739 #else
1740         ldrd    r12,r14,[sp,#208]               @ D[1]
1741 #endif
1742
1743         eor     r6,r6,r4
1744         eor     r7,r7,r5
1745         mov     r5,r6,ror#32-19         @ C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]);
1746         mov     r4,r7,ror#32-20
1747
1748         eor     r10,r10,r8
1749 #ifndef __thumb2__
1750         ldr     r8,[sp,#408]            @ A[4][1]
1751 #endif
1752         eor     r11,r11,r9
1753 #ifndef __thumb2__
1754         ldr     r9,[sp,#408+4]
1755 #else
1756         ldrd    r8,r9,[sp,#408]         @ A[4][1]
1757 #endif
1758         mov     r7,r10,ror#32-20                @ C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]);
1759         mov     r6,r11,ror#32-21
1760
1761         eor     r8,r8,r12
1762         eor     r9,r9,r14
1763         @ mov   r8,r2,ror#32-1          @ C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
1764         @ mov   r9,r3,ror#32-1
1765
1766         bic     r10,r4,r2
1767         bic     r11,r5,r3
1768         eor     r10,r10,r0,ror#32-31
1769 #ifndef __thumb2__
1770         str     r10,[sp,#160]           @ R[4][0] = C[0] ^ (~C[1] & C[2])
1771 #endif
1772         eor     r11,r11,r1,ror#32-31
1773 #ifndef __thumb2__
1774         str     r11,[sp,#160+4]
1775 #else
1776         strd    r10,r11,[sp,#160]               @ R[4][0] = C[0] ^ (~C[1] & C[2])
1777 #endif
1778         bic     r12,r6,r4
1779         bic     r14,r7,r5
1780         eor     r12,r12,r2
1781         eor     r14,r14,r3
1782 #ifndef __thumb2__
1783         str     r12,[sp,#168]           @ R[4][1] = C[1] ^ (~C[2] & C[3]);
1784 #endif
1785         bic     r10,r8,r6,ror#1
1786 #ifndef __thumb2__
1787         str     r14,[sp,#168+4]
1788 #else
1789         strd    r12,r14,[sp,#168]               @ R[4][1] = C[1] ^ (~C[2] & C[3]);
1790 #endif
1791         bic     r11,r9,r7,ror#1
1792         bic     r12,r0,r8,ror#31-1
1793         bic     r14,r1,r9,ror#31-1
1794         eor     r4,r4,r10,ror#32-1
1795 #ifndef __thumb2__
1796         str     r4,[sp,#176]            @ R[4][2] = C[2] ^= (~C[3] & C[4]);
1797 #endif
1798         eor     r5,r5,r11,ror#32-1
1799 #ifndef __thumb2__
1800         str     r5,[sp,#176+4]
1801 #else
1802         strd    r4,r5,[sp,#176]         @ R[4][2] = C[2] ^= (~C[3] & C[4]);
1803 #endif
1804         eor     r6,r6,r12,ror#32-31
1805         eor     r7,r7,r14,ror#32-31
1806 #ifndef __thumb2__
1807         str     r6,[sp,#184]            @ R[4][3] = C[3] ^= (~C[4] & C[0]);
1808 #endif
1809         bic     r10,r2,r0,ror#32-31
1810 #ifndef __thumb2__
1811         str     r7,[sp,#184+4]
1812 #else
1813         strd    r6,r7,[sp,#184]         @ R[4][3] = C[3] ^= (~C[4] & C[0]);
1814 #endif
1815         bic     r11,r3,r1,ror#32-31
1816         add     r12,sp,#0
1817         eor     r8,r10,r8,ror#32-1
1818         add     r10,sp,#40
1819         eor     r9,r11,r9,ror#32-1
1820 #ifndef __thumb2__
1821         str     r8,[sp,#192]            @ R[4][4] = C[4] ^= (~C[0] & C[1]);
1822 #endif
1823 #ifndef __thumb2__
1824         str     r9,[sp,#192+4]
1825 #else
1826         strd    r8,r9,[sp,#192]         @ R[4][4] = C[4] ^= (~C[0] & C[1]);
1827 #endif
1828         blo     .Lround2x
1829
1830         ldr     pc,[sp,#440]
1831 .size   KeccakF1600_int,.-KeccakF1600_int
1832
1833 .type   KeccakF1600, %function
1834 .align  5
1835 KeccakF1600:
1836         stmdb   sp!,{r0,r4-r11,lr}
1837         sub     sp,sp,#440+16                   @ space for A[5][5],D[5],T[5][5],...
1838
1839         add     r10,r0,#40
1840         add     r11,sp,#40
1841         ldmia   r0,    {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}          @ copy A[5][5] to stack
1842         stmia   sp,    {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1843         ldmia   r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1844         stmia   r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1845         ldmia   r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1846         stmia   r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1847         ldmia   r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1848         stmia   r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1849         ldmia   r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1850         add     r12,sp,#0
1851         add     r10,sp,#40
1852         stmia   r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1853
1854         bl      KeccakF1600_enter
1855
1856         ldr     r11, [sp,#440+16]               @ restore pointer to A
1857         ldmia   sp,    {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1858         stmia   r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}            @ return A[5][5]
1859         ldmia   r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1860         stmia   r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1861         ldmia   r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1862         stmia   r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1863         ldmia   r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1864         stmia   r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1865         ldmia   r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1866         stmia   r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1867
1868         add     sp,sp,#440+20
1869         ldmia   sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
1870 .size   KeccakF1600,.-KeccakF1600
1871 .globl  SHA3_absorb
1872 .type   SHA3_absorb,%function
1873 .align  5
1874 SHA3_absorb:
1875         stmdb   sp!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
1876         sub     sp,sp,#456+16
1877
1878         add     r10,r0,#40
1879         @ mov   r11,r1
1880         mov     r12,r2
1881         mov     r14,r3
1882         cmp     r2,r3
1883         blo     .Labsorb_abort
1884
1885         add     r11,sp,#0
1886         ldmia   r0,      {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}        @ copy A[5][5] to stack
1887         stmia   r11!,   {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1888         ldmia   r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1889         stmia   r11!,   {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1890         ldmia   r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1891         stmia   r11!,   {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1892         ldmia   r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1893         stmia   r11!,   {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1894         ldmia   r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1895         stmia   r11,    {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1896
1897         ldr     r11,[sp,#476]           @ restore r11
1898 #ifdef  __thumb2__
1899         mov     r9,#0x00ff00ff
1900         mov     r8,#0x0f0f0f0f
1901         mov     r7,#0x33333333
1902         mov     r6,#0x55555555
1903 #else
1904         mov     r6,#0x11                @ compose constants
1905         mov     r8,#0x0f
1906         mov     r9,#0xff
1907         orr     r6,r6,r6,lsl#8
1908         orr     r8,r8,r8,lsl#8
1909         orr     r6,r6,r6,lsl#16         @ 0x11111111
1910         orr     r9,r9,r9,lsl#16         @ 0x00ff00ff
1911         orr     r8,r8,r8,lsl#16         @ 0x0f0f0f0f
1912         orr     r7,r6,r6,lsl#1          @ 0x33333333
1913         orr     r6,r6,r6,lsl#2          @ 0x55555555
1914 #endif
1915         str     r9,[sp,#468]
1916         str     r8,[sp,#464]
1917         str     r7,[sp,#460]
1918         str     r6,[sp,#456]
1919         b       .Loop_absorb
1920
1921 .align  4
1922 .Loop_absorb:
1923         subs    r0,r12,r14
1924         blo     .Labsorbed
1925         add     r10,sp,#0
1926         str     r0,[sp,#480]            @ save len - bsz
1927
1928 .align  4
1929 .Loop_block:
1930         ldrb    r0,[r11],#1
1931         ldrb    r1,[r11],#1
1932         ldrb    r2,[r11],#1
1933         ldrb    r3,[r11],#1
1934         ldrb    r4,[r11],#1
1935         orr     r0,r0,r1,lsl#8
1936         ldrb    r1,[r11],#1
1937         orr     r0,r0,r2,lsl#16
1938         ldrb    r2,[r11],#1
1939         orr     r0,r0,r3,lsl#24         @ lo
1940         ldrb    r3,[r11],#1
1941         orr     r1,r4,r1,lsl#8
1942         orr     r1,r1,r2,lsl#16
1943         orr     r1,r1,r3,lsl#24         @ hi
1944
1945         and     r2,r0,r6                @ &=0x55555555
1946         and     r0,r0,r6,lsl#1          @ &=0xaaaaaaaa
1947         and     r3,r1,r6                @ &=0x55555555
1948         and     r1,r1,r6,lsl#1          @ &=0xaaaaaaaa
1949         orr     r2,r2,r2,lsr#1
1950         orr     r0,r0,r0,lsl#1
1951         orr     r3,r3,r3,lsr#1
1952         orr     r1,r1,r1,lsl#1
1953         and     r2,r2,r7                @ &=0x33333333
1954         and     r0,r0,r7,lsl#2          @ &=0xcccccccc
1955         and     r3,r3,r7                @ &=0x33333333
1956         and     r1,r1,r7,lsl#2          @ &=0xcccccccc
1957         orr     r2,r2,r2,lsr#2
1958         orr     r0,r0,r0,lsl#2
1959         orr     r3,r3,r3,lsr#2
1960         orr     r1,r1,r1,lsl#2
1961         and     r2,r2,r8                @ &=0x0f0f0f0f
1962         and     r0,r0,r8,lsl#4          @ &=0xf0f0f0f0
1963         and     r3,r3,r8                @ &=0x0f0f0f0f
1964         and     r1,r1,r8,lsl#4          @ &=0xf0f0f0f0
1965         ldmia   r10,{r4,r5}             @ A_flat[i]
1966         orr     r2,r2,r2,lsr#4
1967         orr     r0,r0,r0,lsl#4
1968         orr     r3,r3,r3,lsr#4
1969         orr     r1,r1,r1,lsl#4
1970         and     r2,r2,r9                @ &=0x00ff00ff
1971         and     r0,r0,r9,lsl#8          @ &=0xff00ff00
1972         and     r3,r3,r9                @ &=0x00ff00ff
1973         and     r1,r1,r9,lsl#8          @ &=0xff00ff00
1974         orr     r2,r2,r2,lsr#8
1975         orr     r0,r0,r0,lsl#8
1976         orr     r3,r3,r3,lsr#8
1977         orr     r1,r1,r1,lsl#8
1978
1979         mov     r2,r2,lsl#16
1980         mov     r1,r1,lsr#16
1981         eor     r4,r4,r3,lsl#16
1982         eor     r5,r5,r0,lsr#16
1983         eor     r4,r4,r2,lsr#16
1984         eor     r5,r5,r1,lsl#16
1985         stmia   r10!,{r4,r5}    @ A_flat[i++] ^= BitInterleave(inp[0..7])
1986
1987         subs    r14,r14,#8
1988         bhi     .Loop_block
1989
1990         str     r11,[sp,#476]
1991
1992         bl      KeccakF1600_int
1993
1994         add     r14,sp,#456
1995         ldmia   r14,{r6,r7,r8,r9,r10,r11,r12,r14}       @ restore constants and variables
1996         b       .Loop_absorb
1997
1998 .align  4
1999 .Labsorbed:
2000         add     r11,sp,#40
2001         ldmia   sp,      {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2002         stmia   r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}    @ return A[5][5]
2003         ldmia   r11!,   {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2004         stmia   r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2005         ldmia   r11!,   {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2006         stmia   r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2007         ldmia   r11!,   {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2008         stmia   r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2009         ldmia   r11,    {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2010         stmia   r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2011
2012 .Labsorb_abort:
2013         add     sp,sp,#456+32
2014         mov     r0,r12                  @ return value
2015         ldmia   sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
2016 .size   SHA3_absorb,.-SHA3_absorb
2017 .globl  SHA3_squeeze
2018 .type   SHA3_squeeze,%function
2019 .align  5
2020 SHA3_squeeze:
2021         stmdb   sp!,{r0,r3-r10,lr}
2022
2023         mov     r10,r0
2024         mov     r4,r1
2025         mov     r5,r2
2026         mov     r12,r3
2027
2028 #ifdef  __thumb2__
2029         mov     r9,#0x00ff00ff
2030         mov     r8,#0x0f0f0f0f
2031         mov     r7,#0x33333333
2032         mov     r6,#0x55555555
2033 #else
2034         mov     r6,#0x11                @ compose constants
2035         mov     r8,#0x0f
2036         mov     r9,#0xff
2037         orr     r6,r6,r6,lsl#8
2038         orr     r8,r8,r8,lsl#8
2039         orr     r6,r6,r6,lsl#16         @ 0x11111111
2040         orr     r9,r9,r9,lsl#16         @ 0x00ff00ff
2041         orr     r8,r8,r8,lsl#16         @ 0x0f0f0f0f
2042         orr     r7,r6,r6,lsl#1          @ 0x33333333
2043         orr     r6,r6,r6,lsl#2          @ 0x55555555
2044 #endif
2045         stmdb   sp!,{r6,r7,r8,r9}
2046
2047         mov     r14,r10
2048         b       .Loop_squeeze
2049
2050 .align  4
2051 .Loop_squeeze:
2052         ldmia   r10!,{r0,r1}    @ A_flat[i++]
2053
2054         mov     r2,r0,lsl#16
2055         mov     r3,r1,lsl#16            @ r3 = r1 << 16
2056         mov     r2,r2,lsr#16            @ r2 = r0 & 0x0000ffff
2057         mov     r1,r1,lsr#16
2058         mov     r0,r0,lsr#16            @ r0 = r0 >> 16
2059         mov     r1,r1,lsl#16            @ r1 = r1 & 0xffff0000
2060
2061         orr     r2,r2,r2,lsl#8
2062         orr     r3,r3,r3,lsr#8
2063         orr     r0,r0,r0,lsl#8
2064         orr     r1,r1,r1,lsr#8
2065         and     r2,r2,r9                @ &=0x00ff00ff
2066         and     r3,r3,r9,lsl#8          @ &=0xff00ff00
2067         and     r0,r0,r9                @ &=0x00ff00ff
2068         and     r1,r1,r9,lsl#8          @ &=0xff00ff00
2069         orr     r2,r2,r2,lsl#4
2070         orr     r3,r3,r3,lsr#4
2071         orr     r0,r0,r0,lsl#4
2072         orr     r1,r1,r1,lsr#4
2073         and     r2,r2,r8                @ &=0x0f0f0f0f
2074         and     r3,r3,r8,lsl#4          @ &=0xf0f0f0f0
2075         and     r0,r0,r8                @ &=0x0f0f0f0f
2076         and     r1,r1,r8,lsl#4          @ &=0xf0f0f0f0
2077         orr     r2,r2,r2,lsl#2
2078         orr     r3,r3,r3,lsr#2
2079         orr     r0,r0,r0,lsl#2
2080         orr     r1,r1,r1,lsr#2
2081         and     r2,r2,r7                @ &=0x33333333
2082         and     r3,r3,r7,lsl#2          @ &=0xcccccccc
2083         and     r0,r0,r7                @ &=0x33333333
2084         and     r1,r1,r7,lsl#2          @ &=0xcccccccc
2085         orr     r2,r2,r2,lsl#1
2086         orr     r3,r3,r3,lsr#1
2087         orr     r0,r0,r0,lsl#1
2088         orr     r1,r1,r1,lsr#1
2089         and     r2,r2,r6                @ &=0x55555555
2090         and     r3,r3,r6,lsl#1          @ &=0xaaaaaaaa
2091         and     r0,r0,r6                @ &=0x55555555
2092         and     r1,r1,r6,lsl#1          @ &=0xaaaaaaaa
2093
2094         orr     r2,r2,r3
2095         orr     r0,r0,r1
2096
2097         cmp     r5,#8
2098         blo     .Lsqueeze_tail
2099         mov     r1,r2,lsr#8
2100         strb    r2,[r4],#1
2101         mov     r3,r2,lsr#16
2102         strb    r1,[r4],#1
2103         mov     r2,r2,lsr#24
2104         strb    r3,[r4],#1
2105         strb    r2,[r4],#1
2106
2107         mov     r1,r0,lsr#8
2108         strb    r0,[r4],#1
2109         mov     r3,r0,lsr#16
2110         strb    r1,[r4],#1
2111         mov     r0,r0,lsr#24
2112         strb    r3,[r4],#1
2113         strb    r0,[r4],#1
2114         subs    r5,r5,#8
2115         beq     .Lsqueeze_done
2116
2117         subs    r12,r12,#8              @ bsz -= 8
2118         bhi     .Loop_squeeze
2119
2120         mov     r0,r14                  @ original r10
2121
2122         bl      KeccakF1600
2123
2124         ldmia   sp,{r6,r7,r8,r9,r10,r12}                @ restore constants and variables
2125         mov     r14,r10
2126         b       .Loop_squeeze
2127
2128 .align  4
2129 .Lsqueeze_tail:
2130         strb    r2,[r4],#1
2131         mov     r2,r2,lsr#8
2132         subs    r5,r5,#1
2133         beq     .Lsqueeze_done
2134         strb    r2,[r4],#1
2135         mov     r2,r2,lsr#8
2136         subs    r5,r5,#1
2137         beq     .Lsqueeze_done
2138         strb    r2,[r4],#1
2139         mov     r2,r2,lsr#8
2140         subs    r5,r5,#1
2141         beq     .Lsqueeze_done
2142         strb    r2,[r4],#1
2143         subs    r5,r5,#1
2144         beq     .Lsqueeze_done
2145
2146         strb    r0,[r4],#1
2147         mov     r0,r0,lsr#8
2148         subs    r5,r5,#1
2149         beq     .Lsqueeze_done
2150         strb    r0,[r4],#1
2151         mov     r0,r0,lsr#8
2152         subs    r5,r5,#1
2153         beq     .Lsqueeze_done
2154         strb    r0,[r4]
2155         b       .Lsqueeze_done
2156
2157 .align  4
2158 .Lsqueeze_done:
2159         add     sp,sp,#24
2160         ldmia   sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
2161 .size   SHA3_squeeze,.-SHA3_squeeze
2162 #if __ARM_MAX_ARCH__>=7
2163 .fpu    neon
2164
2165 .type   iotas64, %object
2166 .align  5
2167 iotas64:
2168 .quad   0x0000000000000001
2169 .quad   0x0000000000008082
2170 .quad   0x800000000000808a
2171 .quad   0x8000000080008000
2172 .quad   0x000000000000808b
2173 .quad   0x0000000080000001
2174 .quad   0x8000000080008081
2175 .quad   0x8000000000008009
2176 .quad   0x000000000000008a
2177 .quad   0x0000000000000088
2178 .quad   0x0000000080008009
2179 .quad   0x000000008000000a
2180 .quad   0x000000008000808b
2181 .quad   0x800000000000008b
2182 .quad   0x8000000000008089
2183 .quad   0x8000000000008003
2184 .quad   0x8000000000008002
2185 .quad   0x8000000000000080
2186 .quad   0x000000000000800a
2187 .quad   0x800000008000000a
2188 .quad   0x8000000080008081
2189 .quad   0x8000000000008080
2190 .quad   0x0000000080000001
2191 .quad   0x8000000080008008
2192 .size   iotas64,.-iotas64
2193
2194 .type   KeccakF1600_neon, %function
2195 .align  5
2196 KeccakF1600_neon:
2197         add     r1, r0, #16
2198         adr     r2, iotas64
2199         mov     r3, #24                 @ loop counter
2200         b       .Loop_neon
2201
2202 .align  4
2203 .Loop_neon:
2204         @ Theta
2205         vst1.64 {q4},  [r0,:64]         @ offload A[0..1][4]
2206         veor    q13, q0,  q5            @ A[0..1][0]^A[2..3][0]
2207         vst1.64 {d18}, [r1,:64]         @ offload A[2][4]
2208         veor    q14, q1,  q6            @ A[0..1][1]^A[2..3][1]
2209         veor    q15, q2,  q7            @ A[0..1][2]^A[2..3][2]
2210         veor    d26, d26, d27           @ C[0]=A[0][0]^A[1][0]^A[2][0]^A[3][0]
2211         veor    d27, d28, d29           @ C[1]=A[0][1]^A[1][1]^A[2][1]^A[3][1]
2212         veor    q14, q3,  q8            @ A[0..1][3]^A[2..3][3]
2213         veor    q4,  q4,  q9            @ A[0..1][4]^A[2..3][4]
2214         veor    d30, d30, d31           @ C[2]=A[0][2]^A[1][2]^A[2][2]^A[3][2]
2215         veor    d31, d28, d29           @ C[3]=A[0][3]^A[1][3]^A[2][3]^A[3][3]
2216         veor    d25, d8,  d9            @ C[4]=A[0][4]^A[1][4]^A[2][4]^A[3][4]
2217         veor    q13, q13, q10           @ C[0..1]^=A[4][0..1]
2218         veor    q14, q15, q11           @ C[2..3]^=A[4][2..3]
2219         veor    d25, d25, d24           @ C[4]^=A[4][4]
2220
2221         vadd.u64        q4,  q13, q13           @ C[0..1]<<1
2222         vadd.u64        q15, q14, q14           @ C[2..3]<<1
2223         vadd.u64        d18, d25, d25           @ C[4]<<1
2224         vsri.u64        q4,  q13, #63           @ ROL64(C[0..1],1)
2225         vsri.u64        q15, q14, #63           @ ROL64(C[2..3],1)
2226         vsri.u64        d18, d25, #63           @ ROL64(C[4],1)
2227         veor    d25, d25, d9            @ D[0] = C[4] ^= ROL64(C[1],1)
2228         veor    q13, q13, q15           @ D[1..2] = C[0..1] ^ ROL64(C[2..3],1)
2229         veor    d28, d28, d18           @ D[3] = C[2] ^= ROL64(C[4],1)
2230         veor    d29, d29, d8            @ D[4] = C[3] ^= ROL64(C[0],1)
2231
2232         veor    d0,  d0,  d25           @ A[0][0] ^= C[4]
2233         veor    d1,  d1,  d25           @ A[1][0] ^= C[4]
2234         veor    d10, d10, d25           @ A[2][0] ^= C[4]
2235         veor    d11, d11, d25           @ A[3][0] ^= C[4]
2236         veor    d20, d20, d25           @ A[4][0] ^= C[4]
2237
2238         veor    d2,  d2,  d26           @ A[0][1] ^= D[1]
2239         veor    d3,  d3,  d26           @ A[1][1] ^= D[1]
2240         veor    d12, d12, d26           @ A[2][1] ^= D[1]
2241         veor    d13, d13, d26           @ A[3][1] ^= D[1]
2242         veor    d21, d21, d26           @ A[4][1] ^= D[1]
2243         vmov    d26, d27
2244
2245         veor    d6,  d6,  d28           @ A[0][3] ^= C[2]
2246         veor    d7,  d7,  d28           @ A[1][3] ^= C[2]
2247         veor    d16, d16, d28           @ A[2][3] ^= C[2]
2248         veor    d17, d17, d28           @ A[3][3] ^= C[2]
2249         veor    d23, d23, d28           @ A[4][3] ^= C[2]
2250         vld1.64 {q4},  [r0,:64]         @ restore A[0..1][4]
2251         vmov    d28, d29
2252
2253         vld1.64 {d18}, [r1,:64]         @ restore A[2][4]
2254         veor    q2,  q2,  q13           @ A[0..1][2] ^= D[2]
2255         veor    q7,  q7,  q13           @ A[2..3][2] ^= D[2]
2256         veor    d22, d22, d27           @ A[4][2]    ^= D[2]
2257
2258         veor    q4,  q4,  q14           @ A[0..1][4] ^= C[3]
2259         veor    q9,  q9,  q14           @ A[2..3][4] ^= C[3]
2260         veor    d24, d24, d29           @ A[4][4]    ^= C[3]
2261
2262         @ Rho + Pi
2263         vmov    d26, d2                 @ C[1] = A[0][1]
2264         vshl.u64        d2,  d3,  #44
2265         vmov    d27, d4                 @ C[2] = A[0][2]
2266         vshl.u64        d4,  d14, #43
2267         vmov    d28, d6                 @ C[3] = A[0][3]
2268         vshl.u64        d6,  d17, #21
2269         vmov    d29, d8                 @ C[4] = A[0][4]
2270         vshl.u64        d8,  d24, #14
2271         vsri.u64        d2,  d3,  #64-44        @ A[0][1] = ROL64(A[1][1], rhotates[1][1])
2272         vsri.u64        d4,  d14, #64-43        @ A[0][2] = ROL64(A[2][2], rhotates[2][2])
2273         vsri.u64        d6,  d17, #64-21        @ A[0][3] = ROL64(A[3][3], rhotates[3][3])
2274         vsri.u64        d8,  d24, #64-14        @ A[0][4] = ROL64(A[4][4], rhotates[4][4])
2275
2276         vshl.u64        d3,  d9,  #20
2277         vshl.u64        d14, d16, #25
2278         vshl.u64        d17, d15, #15
2279         vshl.u64        d24, d21, #2
2280         vsri.u64        d3,  d9,  #64-20        @ A[1][1] = ROL64(A[1][4], rhotates[1][4])
2281         vsri.u64        d14, d16, #64-25        @ A[2][2] = ROL64(A[2][3], rhotates[2][3])
2282         vsri.u64        d17, d15, #64-15        @ A[3][3] = ROL64(A[3][2], rhotates[3][2])
2283         vsri.u64        d24, d21, #64-2         @ A[4][4] = ROL64(A[4][1], rhotates[4][1])
2284
2285         vshl.u64        d9,  d22, #61
2286         @ vshl.u64      d16, d19, #8
2287         vshl.u64        d15, d12, #10
2288         vshl.u64        d21, d7,  #55
2289         vsri.u64        d9,  d22, #64-61        @ A[1][4] = ROL64(A[4][2], rhotates[4][2])
2290         vext.8  d16, d19, d19, #8-1     @ A[2][3] = ROL64(A[3][4], rhotates[3][4])
2291         vsri.u64        d15, d12, #64-10        @ A[3][2] = ROL64(A[2][1], rhotates[2][1])
2292         vsri.u64        d21, d7,  #64-55        @ A[4][1] = ROL64(A[1][3], rhotates[1][3])
2293
2294         vshl.u64        d22, d18, #39
2295         @ vshl.u64      d19, d23, #56
2296         vshl.u64        d12, d5,  #6
2297         vshl.u64        d7,  d13, #45
2298         vsri.u64        d22, d18, #64-39        @ A[4][2] = ROL64(A[2][4], rhotates[2][4])
2299         vext.8  d19, d23, d23, #8-7     @ A[3][4] = ROL64(A[4][3], rhotates[4][3])
2300         vsri.u64        d12, d5,  #64-6         @ A[2][1] = ROL64(A[1][2], rhotates[1][2])
2301         vsri.u64        d7,  d13, #64-45        @ A[1][3] = ROL64(A[3][1], rhotates[3][1])
2302
2303         vshl.u64        d18, d20, #18
2304         vshl.u64        d23, d11, #41
2305         vshl.u64        d5,  d10, #3
2306         vshl.u64        d13, d1,  #36
2307         vsri.u64        d18, d20, #64-18        @ A[2][4] = ROL64(A[4][0], rhotates[4][0])
2308         vsri.u64        d23, d11, #64-41        @ A[4][3] = ROL64(A[3][0], rhotates[3][0])
2309         vsri.u64        d5,  d10, #64-3         @ A[1][2] = ROL64(A[2][0], rhotates[2][0])
2310         vsri.u64        d13, d1,  #64-36        @ A[3][1] = ROL64(A[1][0], rhotates[1][0])
2311
2312         vshl.u64        d1,  d28, #28
2313         vshl.u64        d10, d26, #1
2314         vshl.u64        d11, d29, #27
2315         vshl.u64        d20, d27, #62
2316         vsri.u64        d1,  d28, #64-28        @ A[1][0] = ROL64(C[3],    rhotates[0][3])
2317         vsri.u64        d10, d26, #64-1         @ A[2][0] = ROL64(C[1],    rhotates[0][1])
2318         vsri.u64        d11, d29, #64-27        @ A[3][0] = ROL64(C[4],    rhotates[0][4])
2319         vsri.u64        d20, d27, #64-62        @ A[4][0] = ROL64(C[2],    rhotates[0][2])
2320
2321         @ Chi + Iota
2322         vbic    q13, q2,  q1
2323         vbic    q14, q3,  q2
2324         vbic    q15, q4,  q3
2325         veor    q13, q13, q0            @ A[0..1][0] ^ (~A[0..1][1] & A[0..1][2])
2326         veor    q14, q14, q1            @ A[0..1][1] ^ (~A[0..1][2] & A[0..1][3])
2327         veor    q2,  q2,  q15           @ A[0..1][2] ^= (~A[0..1][3] & A[0..1][4])
2328         vst1.64 {q13}, [r0,:64]         @ offload A[0..1][0]
2329         vbic    q13, q0,  q4
2330         vbic    q15, q1,  q0
2331         vmov    q1,  q14                @ A[0..1][1]
2332         veor    q3,  q3,  q13           @ A[0..1][3] ^= (~A[0..1][4] & A[0..1][0])
2333         veor    q4,  q4,  q15           @ A[0..1][4] ^= (~A[0..1][0] & A[0..1][1])
2334
2335         vbic    q13, q7,  q6
2336         vmov    q0,  q5                 @ A[2..3][0]
2337         vbic    q14, q8,  q7
2338         vmov    q15, q6                 @ A[2..3][1]
2339         veor    q5,  q5,  q13           @ A[2..3][0] ^= (~A[2..3][1] & A[2..3][2])
2340         vbic    q13, q9,  q8
2341         veor    q6,  q6,  q14           @ A[2..3][1] ^= (~A[2..3][2] & A[2..3][3])
2342         vbic    q14, q0,  q9
2343         veor    q7,  q7,  q13           @ A[2..3][2] ^= (~A[2..3][3] & A[2..3][4])
2344         vbic    q13, q15, q0
2345         veor    q8,  q8,  q14           @ A[2..3][3] ^= (~A[2..3][4] & A[2..3][0])
2346         vmov    q14, q10                @ A[4][0..1]
2347         veor    q9,  q9,  q13           @ A[2..3][4] ^= (~A[2..3][0] & A[2..3][1])
2348
2349         vld1.64 d25, [r2,:64]!          @ Iota[i++]
2350         vbic    d26, d22, d21
2351         vbic    d27, d23, d22
2352         vld1.64 {q0}, [r0,:64]          @ restore A[0..1][0]
2353         veor    d20, d20, d26           @ A[4][0] ^= (~A[4][1] & A[4][2])
2354         vbic    d26, d24, d23
2355         veor    d21, d21, d27           @ A[4][1] ^= (~A[4][2] & A[4][3])
2356         vbic    d27, d28, d24
2357         veor    d22, d22, d26           @ A[4][2] ^= (~A[4][3] & A[4][4])
2358         vbic    d26, d29, d28
2359         veor    d23, d23, d27           @ A[4][3] ^= (~A[4][4] & A[4][0])
2360         veor    d0,  d0,  d25           @ A[0][0] ^= Iota[i]
2361         veor    d24, d24, d26           @ A[4][4] ^= (~A[4][0] & A[4][1])
2362
2363         subs    r3, r3, #1
2364         bne     .Loop_neon
2365
2366 .word   0xe12fff1e
2367 .size   KeccakF1600_neon,.-KeccakF1600_neon
2368
2369 .globl  SHA3_absorb_neon
2370 .type   SHA3_absorb_neon, %function
2371 .align  5
2372 SHA3_absorb_neon:
2373         stmdb   sp!, {r4,r5,r6,lr}
2374         vstmdb  sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
2375
2376         mov     r4, r1                  @ inp
2377         mov     r5, r2                  @ len
2378         mov     r6, r3                  @ bsz
2379
2380         vld1.32 {d0}, [r0,:64]!         @ A[0][0]
2381         vld1.32 {d2}, [r0,:64]!         @ A[0][1]
2382         vld1.32 {d4}, [r0,:64]!         @ A[0][2]
2383         vld1.32 {d6}, [r0,:64]!         @ A[0][3]
2384         vld1.32 {d8}, [r0,:64]!         @ A[0][4]
2385
2386         vld1.32 {d1}, [r0,:64]!         @ A[1][0]
2387         vld1.32 {d3}, [r0,:64]!         @ A[1][1]
2388         vld1.32 {d5}, [r0,:64]!         @ A[1][2]
2389         vld1.32 {d7}, [r0,:64]!         @ A[1][3]
2390         vld1.32 {d9}, [r0,:64]!         @ A[1][4]
2391
2392         vld1.32 {d10}, [r0,:64]!                @ A[2][0]
2393         vld1.32 {d12}, [r0,:64]!                @ A[2][1]
2394         vld1.32 {d14}, [r0,:64]!                @ A[2][2]
2395         vld1.32 {d16}, [r0,:64]!                @ A[2][3]
2396         vld1.32 {d18}, [r0,:64]!                @ A[2][4]
2397
2398         vld1.32 {d11}, [r0,:64]!                @ A[3][0]
2399         vld1.32 {d13}, [r0,:64]!                @ A[3][1]
2400         vld1.32 {d15}, [r0,:64]!                @ A[3][2]
2401         vld1.32 {d17}, [r0,:64]!                @ A[3][3]
2402         vld1.32 {d19}, [r0,:64]!                @ A[3][4]
2403
2404         vld1.32 {d20,d21,d22,d23}, [r0,:64]!    @ A[4][0..3]
2405         vld1.32 {d24}, [r0,:64]         @ A[4][4]
2406         sub     r0, r0, #24*8           @ rewind
2407         b       .Loop_absorb_neon
2408
2409 .align  4
2410 .Loop_absorb_neon:
2411         subs    r12, r5, r6             @ len - bsz
2412         blo     .Labsorbed_neon
2413         mov     r5, r12
2414
2415         vld1.8  {d31}, [r4]!            @ endian-neutral loads...
2416         cmp     r6, #8*2
2417         veor    d0, d0, d31             @ A[0][0] ^= *inp++
2418         blo     .Lprocess_neon
2419         vld1.8  {d31}, [r4]!
2420         veor    d2, d2, d31             @ A[0][1] ^= *inp++
2421         beq     .Lprocess_neon
2422         vld1.8  {d31}, [r4]!
2423         cmp     r6, #8*4
2424         veor    d4, d4, d31             @ A[0][2] ^= *inp++
2425         blo     .Lprocess_neon
2426         vld1.8  {d31}, [r4]!
2427         veor    d6, d6, d31             @ A[0][3] ^= *inp++
2428         beq     .Lprocess_neon
2429         vld1.8  {d31},[r4]!
2430         cmp     r6, #8*6
2431         veor    d8, d8, d31             @ A[0][4] ^= *inp++
2432         blo     .Lprocess_neon
2433
2434         vld1.8  {d31}, [r4]!
2435         veor    d1, d1, d31             @ A[1][0] ^= *inp++
2436         beq     .Lprocess_neon
2437         vld1.8  {d31}, [r4]!
2438         cmp     r6, #8*8
2439         veor    d3, d3, d31             @ A[1][1] ^= *inp++
2440         blo     .Lprocess_neon
2441         vld1.8  {d31}, [r4]!
2442         veor    d5, d5, d31             @ A[1][2] ^= *inp++
2443         beq     .Lprocess_neon
2444         vld1.8  {d31}, [r4]!
2445         cmp     r6, #8*10
2446         veor    d7, d7, d31             @ A[1][3] ^= *inp++
2447         blo     .Lprocess_neon
2448         vld1.8  {d31}, [r4]!
2449         veor    d9, d9, d31             @ A[1][4] ^= *inp++
2450         beq     .Lprocess_neon
2451
2452         vld1.8  {d31}, [r4]!
2453         cmp     r6, #8*12
2454         veor    d10, d10, d31           @ A[2][0] ^= *inp++
2455         blo     .Lprocess_neon
2456         vld1.8  {d31}, [r4]!
2457         veor    d12, d12, d31           @ A[2][1] ^= *inp++
2458         beq     .Lprocess_neon
2459         vld1.8  {d31}, [r4]!
2460         cmp     r6, #8*14
2461         veor    d14, d14, d31           @ A[2][2] ^= *inp++
2462         blo     .Lprocess_neon
2463         vld1.8  {d31}, [r4]!
2464         veor    d16, d16, d31           @ A[2][3] ^= *inp++
2465         beq     .Lprocess_neon
2466         vld1.8  {d31}, [r4]!
2467         cmp     r6, #8*16
2468         veor    d18, d18, d31           @ A[2][4] ^= *inp++
2469         blo     .Lprocess_neon
2470
2471         vld1.8  {d31}, [r4]!
2472         veor    d11, d11, d31           @ A[3][0] ^= *inp++
2473         beq     .Lprocess_neon
2474         vld1.8  {d31}, [r4]!
2475         cmp     r6, #8*18
2476         veor    d13, d13, d31           @ A[3][1] ^= *inp++
2477         blo     .Lprocess_neon
2478         vld1.8  {d31}, [r4]!
2479         veor    d15, d15, d31           @ A[3][2] ^= *inp++
2480         beq     .Lprocess_neon
2481         vld1.8  {d31}, [r4]!
2482         cmp     r6, #8*20
2483         veor    d17, d17, d31           @ A[3][3] ^= *inp++
2484         blo     .Lprocess_neon
2485         vld1.8  {d31}, [r4]!
2486         veor    d19, d19, d31           @ A[3][4] ^= *inp++
2487         beq     .Lprocess_neon
2488
2489         vld1.8  {d31}, [r4]!
2490         cmp     r6, #8*22
2491         veor    d20, d20, d31           @ A[4][0] ^= *inp++
2492         blo     .Lprocess_neon
2493         vld1.8  {d31}, [r4]!
2494         veor    d21, d21, d31           @ A[4][1] ^= *inp++
2495         beq     .Lprocess_neon
2496         vld1.8  {d31}, [r4]!
2497         cmp     r6, #8*24
2498         veor    d22, d22, d31           @ A[4][2] ^= *inp++
2499         blo     .Lprocess_neon
2500         vld1.8  {d31}, [r4]!
2501         veor    d23, d23, d31           @ A[4][3] ^= *inp++
2502         beq     .Lprocess_neon
2503         vld1.8  {d31}, [r4]!
2504         veor    d24, d24, d31           @ A[4][4] ^= *inp++
2505
2506 .Lprocess_neon:
2507         bl      KeccakF1600_neon
2508         b       .Loop_absorb_neon
2509
2510 .align  4
2511 .Labsorbed_neon:
2512         vst1.32 {d0}, [r0,:64]!         @ A[0][0..4]
2513         vst1.32 {d2}, [r0,:64]!
2514         vst1.32 {d4}, [r0,:64]!
2515         vst1.32 {d6}, [r0,:64]!
2516         vst1.32 {d8}, [r0,:64]!
2517
2518         vst1.32 {d1}, [r0,:64]!         @ A[1][0..4]
2519         vst1.32 {d3}, [r0,:64]!
2520         vst1.32 {d5}, [r0,:64]!
2521         vst1.32 {d7}, [r0,:64]!
2522         vst1.32 {d9}, [r0,:64]!
2523
2524         vst1.32 {d10}, [r0,:64]!                @ A[2][0..4]
2525         vst1.32 {d12}, [r0,:64]!
2526         vst1.32 {d14}, [r0,:64]!
2527         vst1.32 {d16}, [r0,:64]!
2528         vst1.32 {d18}, [r0,:64]!
2529
2530         vst1.32 {d11}, [r0,:64]!                @ A[3][0..4]
2531         vst1.32 {d13}, [r0,:64]!
2532         vst1.32 {d15}, [r0,:64]!
2533         vst1.32 {d17}, [r0,:64]!
2534         vst1.32 {d19}, [r0,:64]!
2535
2536         vst1.32 {d20,d21,d22,d23}, [r0,:64]!    @ A[4][0..4]
2537         vst1.32 {d24}, [r0,:64]
2538
2539         mov     r0, r5                  @ return value
2540         vldmia  sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
2541         ldmia   sp!, {r4,r5,r6,pc}
2542 .size   SHA3_absorb_neon,.-SHA3_absorb_neon
2543
2544 .globl  SHA3_squeeze_neon
2545 .type   SHA3_squeeze_neon, %function
2546 .align  5
2547 SHA3_squeeze_neon:
2548         stmdb   sp!, {r4,r5,r6,lr}
2549
2550         mov     r4, r1                  @ out
2551         mov     r5, r2                  @ len
2552         mov     r6, r3                  @ bsz
2553         mov     r12, r0                 @ A_flat
2554         mov     r14, r3                 @ bsz
2555         b       .Loop_squeeze_neon
2556
2557 .align  4
2558 .Loop_squeeze_neon:
2559         cmp     r5, #8
2560         blo     .Lsqueeze_neon_tail
2561         vld1.32 {d0}, [r12]!
2562         vst1.8  {d0}, [r4]!             @ endian-neutral store
2563
2564         subs    r5, r5, #8              @ len -= 8
2565         beq     .Lsqueeze_neon_done
2566
2567         subs    r14, r14, #8            @ bsz -= 8
2568         bhi     .Loop_squeeze_neon
2569
2570         vstmdb  sp!,  {d8,d9,d10,d11,d12,d13,d14,d15}
2571
2572         vld1.32 {d0}, [r0,:64]!         @ A[0][0..4]
2573         vld1.32 {d2}, [r0,:64]!
2574         vld1.32 {d4}, [r0,:64]!
2575         vld1.32 {d6}, [r0,:64]!
2576         vld1.32 {d8}, [r0,:64]!
2577
2578         vld1.32 {d1}, [r0,:64]!         @ A[1][0..4]
2579         vld1.32 {d3}, [r0,:64]!
2580         vld1.32 {d5}, [r0,:64]!
2581         vld1.32 {d7}, [r0,:64]!
2582         vld1.32 {d9}, [r0,:64]!
2583
2584         vld1.32 {d10}, [r0,:64]!                @ A[2][0..4]
2585         vld1.32 {d12}, [r0,:64]!
2586         vld1.32 {d14}, [r0,:64]!
2587         vld1.32 {d16}, [r0,:64]!
2588         vld1.32 {d18}, [r0,:64]!
2589
2590         vld1.32 {d11}, [r0,:64]!                @ A[3][0..4]
2591         vld1.32 {d13}, [r0,:64]!
2592         vld1.32 {d15}, [r0,:64]!
2593         vld1.32 {d17}, [r0,:64]!
2594         vld1.32 {d19}, [r0,:64]!
2595
2596         vld1.32 {d20,d21,d22,d23}, [r0,:64]!    @ A[4][0..4]
2597         vld1.32 {d24}, [r0,:64]
2598         sub     r0, r0, #24*8           @ rewind
2599
2600         bl      KeccakF1600_neon
2601
2602         mov     r12, r0                 @ A_flat
2603         vst1.32 {d0}, [r0,:64]!         @ A[0][0..4]
2604         vst1.32 {d2}, [r0,:64]!
2605         vst1.32 {d4}, [r0,:64]!
2606         vst1.32 {d6}, [r0,:64]!
2607         vst1.32 {d8}, [r0,:64]!
2608
2609         vst1.32 {d1}, [r0,:64]!         @ A[1][0..4]
2610         vst1.32 {d3}, [r0,:64]!
2611         vst1.32 {d5}, [r0,:64]!
2612         vst1.32 {d7}, [r0,:64]!
2613         vst1.32 {d9}, [r0,:64]!
2614
2615         vst1.32 {d10}, [r0,:64]!                @ A[2][0..4]
2616         vst1.32 {d12}, [r0,:64]!
2617         vst1.32 {d14}, [r0,:64]!
2618         vst1.32 {d16}, [r0,:64]!
2619         vst1.32 {d18}, [r0,:64]!
2620
2621         vst1.32 {d11}, [r0,:64]!                @ A[3][0..4]
2622         vst1.32 {d13}, [r0,:64]!
2623         vst1.32 {d15}, [r0,:64]!
2624         vst1.32 {d17}, [r0,:64]!
2625         vst1.32 {d19}, [r0,:64]!
2626
2627         vst1.32 {d20,d21,d22,d23}, [r0,:64]!    @ A[4][0..4]
2628         mov     r14, r6                 @ bsz
2629         vst1.32 {d24}, [r0,:64]
2630         mov     r0,  r12                @ rewind
2631
2632         vldmia  sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
2633         b       .Loop_squeeze_neon
2634
2635 .align  4
2636 .Lsqueeze_neon_tail:
2637         ldmia   r12, {r2,r3}
2638         cmp     r5, #2
2639         strb    r2, [r4],#1             @ endian-neutral store
2640         mov     r2, r2, lsr#8
2641         blo     .Lsqueeze_neon_done
2642         strb    r2, [r4], #1
2643         mov     r2, r2, lsr#8
2644         beq     .Lsqueeze_neon_done
2645         strb    r2, [r4], #1
2646         mov     r2, r2, lsr#8
2647         cmp     r5, #4
2648         blo     .Lsqueeze_neon_done
2649         strb    r2, [r4], #1
2650         beq     .Lsqueeze_neon_done
2651
2652         strb    r3, [r4], #1
2653         mov     r3, r3, lsr#8
2654         cmp     r5, #6
2655         blo     .Lsqueeze_neon_done
2656         strb    r3, [r4], #1
2657         mov     r3, r3, lsr#8
2658         beq     .Lsqueeze_neon_done
2659         strb    r3, [r4], #1
2660
2661 .Lsqueeze_neon_done:
2662         ldmia   sp!, {r4,r5,r6,pc}
2663 .size   SHA3_squeeze_neon,.-SHA3_squeeze_neon
2664 #endif
2665 .byte   75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2666 .align  2
2667 .align  2