]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - secure/lib/libcrypto/aarch64/sha256-armv8.S
Merge ^/vendor/NetBSD/tests/dist@r312294
[FreeBSD/FreeBSD.git] / secure / lib / libcrypto / aarch64 / sha256-armv8.S
1 /* $FreeBSD$ */
2 /* Do not modify. This file is auto-generated from sha512-armv8.pl. */
3 #include "arm_arch.h"
4
5 .text
6
7 .globl  sha256_block_data_order
8 .type   sha256_block_data_order,%function
9 .align  6
10 sha256_block_data_order:
11         ldr     x16,.LOPENSSL_armcap_P
12         adr     x17,.LOPENSSL_armcap_P
13         add     x16,x16,x17
14         ldr     w16,[x16]
15         tst     w16,#ARMV8_SHA256
16         b.ne    .Lv8_entry
17         stp     x29,x30,[sp,#-128]!
18         add     x29,sp,#0
19
20         stp     x19,x20,[sp,#16]
21         stp     x21,x22,[sp,#32]
22         stp     x23,x24,[sp,#48]
23         stp     x25,x26,[sp,#64]
24         stp     x27,x28,[sp,#80]
25         sub     sp,sp,#4*4
26
27         ldp     w20,w21,[x0]                            // load context
28         ldp     w22,w23,[x0,#2*4]
29         ldp     w24,w25,[x0,#4*4]
30         add     x2,x1,x2,lsl#6  // end of input
31         ldp     w26,w27,[x0,#6*4]
32         adr     x30,K256
33         stp     x0,x2,[x29,#96]
34
35 .Loop:
36         ldp     w3,w4,[x1],#2*4
37         ldr     w19,[x30],#4                    // *K++
38         eor     w28,w21,w22                             // magic seed
39         str     x1,[x29,#112]
40 #ifndef __ARMEB__
41         rev     w3,w3                   // 0
42 #endif
43         ror     w16,w24,#6
44         add     w27,w27,w19                     // h+=K[i]
45         eor     w6,w24,w24,ror#14
46         and     w17,w25,w24
47         bic     w19,w26,w24
48         add     w27,w27,w3                      // h+=X[i]
49         orr     w17,w17,w19                     // Ch(e,f,g)
50         eor     w19,w20,w21                     // a^b, b^c in next round
51         eor     w16,w16,w6,ror#11       // Sigma1(e)
52         ror     w6,w20,#2
53         add     w27,w27,w17                     // h+=Ch(e,f,g)
54         eor     w17,w20,w20,ror#9
55         add     w27,w27,w16                     // h+=Sigma1(e)
56         and     w28,w28,w19                     // (b^c)&=(a^b)
57         add     w23,w23,w27                     // d+=h
58         eor     w28,w28,w21                     // Maj(a,b,c)
59         eor     w17,w6,w17,ror#13       // Sigma0(a)
60         add     w27,w27,w28                     // h+=Maj(a,b,c)
61         ldr     w28,[x30],#4            // *K++, w19 in next round
62         //add   w27,w27,w17                     // h+=Sigma0(a)
63 #ifndef __ARMEB__
64         rev     w4,w4                   // 1
65 #endif
66         ldp     w5,w6,[x1],#2*4
67         add     w27,w27,w17                     // h+=Sigma0(a)
68         ror     w16,w23,#6
69         add     w26,w26,w28                     // h+=K[i]
70         eor     w7,w23,w23,ror#14
71         and     w17,w24,w23
72         bic     w28,w25,w23
73         add     w26,w26,w4                      // h+=X[i]
74         orr     w17,w17,w28                     // Ch(e,f,g)
75         eor     w28,w27,w20                     // a^b, b^c in next round
76         eor     w16,w16,w7,ror#11       // Sigma1(e)
77         ror     w7,w27,#2
78         add     w26,w26,w17                     // h+=Ch(e,f,g)
79         eor     w17,w27,w27,ror#9
80         add     w26,w26,w16                     // h+=Sigma1(e)
81         and     w19,w19,w28                     // (b^c)&=(a^b)
82         add     w22,w22,w26                     // d+=h
83         eor     w19,w19,w20                     // Maj(a,b,c)
84         eor     w17,w7,w17,ror#13       // Sigma0(a)
85         add     w26,w26,w19                     // h+=Maj(a,b,c)
86         ldr     w19,[x30],#4            // *K++, w28 in next round
87         //add   w26,w26,w17                     // h+=Sigma0(a)
88 #ifndef __ARMEB__
89         rev     w5,w5                   // 2
90 #endif
91         add     w26,w26,w17                     // h+=Sigma0(a)
92         ror     w16,w22,#6
93         add     w25,w25,w19                     // h+=K[i]
94         eor     w8,w22,w22,ror#14
95         and     w17,w23,w22
96         bic     w19,w24,w22
97         add     w25,w25,w5                      // h+=X[i]
98         orr     w17,w17,w19                     // Ch(e,f,g)
99         eor     w19,w26,w27                     // a^b, b^c in next round
100         eor     w16,w16,w8,ror#11       // Sigma1(e)
101         ror     w8,w26,#2
102         add     w25,w25,w17                     // h+=Ch(e,f,g)
103         eor     w17,w26,w26,ror#9
104         add     w25,w25,w16                     // h+=Sigma1(e)
105         and     w28,w28,w19                     // (b^c)&=(a^b)
106         add     w21,w21,w25                     // d+=h
107         eor     w28,w28,w27                     // Maj(a,b,c)
108         eor     w17,w8,w17,ror#13       // Sigma0(a)
109         add     w25,w25,w28                     // h+=Maj(a,b,c)
110         ldr     w28,[x30],#4            // *K++, w19 in next round
111         //add   w25,w25,w17                     // h+=Sigma0(a)
112 #ifndef __ARMEB__
113         rev     w6,w6                   // 3
114 #endif
115         ldp     w7,w8,[x1],#2*4
116         add     w25,w25,w17                     // h+=Sigma0(a)
117         ror     w16,w21,#6
118         add     w24,w24,w28                     // h+=K[i]
119         eor     w9,w21,w21,ror#14
120         and     w17,w22,w21
121         bic     w28,w23,w21
122         add     w24,w24,w6                      // h+=X[i]
123         orr     w17,w17,w28                     // Ch(e,f,g)
124         eor     w28,w25,w26                     // a^b, b^c in next round
125         eor     w16,w16,w9,ror#11       // Sigma1(e)
126         ror     w9,w25,#2
127         add     w24,w24,w17                     // h+=Ch(e,f,g)
128         eor     w17,w25,w25,ror#9
129         add     w24,w24,w16                     // h+=Sigma1(e)
130         and     w19,w19,w28                     // (b^c)&=(a^b)
131         add     w20,w20,w24                     // d+=h
132         eor     w19,w19,w26                     // Maj(a,b,c)
133         eor     w17,w9,w17,ror#13       // Sigma0(a)
134         add     w24,w24,w19                     // h+=Maj(a,b,c)
135         ldr     w19,[x30],#4            // *K++, w28 in next round
136         //add   w24,w24,w17                     // h+=Sigma0(a)
137 #ifndef __ARMEB__
138         rev     w7,w7                   // 4
139 #endif
140         add     w24,w24,w17                     // h+=Sigma0(a)
141         ror     w16,w20,#6
142         add     w23,w23,w19                     // h+=K[i]
143         eor     w10,w20,w20,ror#14
144         and     w17,w21,w20
145         bic     w19,w22,w20
146         add     w23,w23,w7                      // h+=X[i]
147         orr     w17,w17,w19                     // Ch(e,f,g)
148         eor     w19,w24,w25                     // a^b, b^c in next round
149         eor     w16,w16,w10,ror#11      // Sigma1(e)
150         ror     w10,w24,#2
151         add     w23,w23,w17                     // h+=Ch(e,f,g)
152         eor     w17,w24,w24,ror#9
153         add     w23,w23,w16                     // h+=Sigma1(e)
154         and     w28,w28,w19                     // (b^c)&=(a^b)
155         add     w27,w27,w23                     // d+=h
156         eor     w28,w28,w25                     // Maj(a,b,c)
157         eor     w17,w10,w17,ror#13      // Sigma0(a)
158         add     w23,w23,w28                     // h+=Maj(a,b,c)
159         ldr     w28,[x30],#4            // *K++, w19 in next round
160         //add   w23,w23,w17                     // h+=Sigma0(a)
161 #ifndef __ARMEB__
162         rev     w8,w8                   // 5
163 #endif
164         ldp     w9,w10,[x1],#2*4
165         add     w23,w23,w17                     // h+=Sigma0(a)
166         ror     w16,w27,#6
167         add     w22,w22,w28                     // h+=K[i]
168         eor     w11,w27,w27,ror#14
169         and     w17,w20,w27
170         bic     w28,w21,w27
171         add     w22,w22,w8                      // h+=X[i]
172         orr     w17,w17,w28                     // Ch(e,f,g)
173         eor     w28,w23,w24                     // a^b, b^c in next round
174         eor     w16,w16,w11,ror#11      // Sigma1(e)
175         ror     w11,w23,#2
176         add     w22,w22,w17                     // h+=Ch(e,f,g)
177         eor     w17,w23,w23,ror#9
178         add     w22,w22,w16                     // h+=Sigma1(e)
179         and     w19,w19,w28                     // (b^c)&=(a^b)
180         add     w26,w26,w22                     // d+=h
181         eor     w19,w19,w24                     // Maj(a,b,c)
182         eor     w17,w11,w17,ror#13      // Sigma0(a)
183         add     w22,w22,w19                     // h+=Maj(a,b,c)
184         ldr     w19,[x30],#4            // *K++, w28 in next round
185         //add   w22,w22,w17                     // h+=Sigma0(a)
186 #ifndef __ARMEB__
187         rev     w9,w9                   // 6
188 #endif
189         add     w22,w22,w17                     // h+=Sigma0(a)
190         ror     w16,w26,#6
191         add     w21,w21,w19                     // h+=K[i]
192         eor     w12,w26,w26,ror#14
193         and     w17,w27,w26
194         bic     w19,w20,w26
195         add     w21,w21,w9                      // h+=X[i]
196         orr     w17,w17,w19                     // Ch(e,f,g)
197         eor     w19,w22,w23                     // a^b, b^c in next round
198         eor     w16,w16,w12,ror#11      // Sigma1(e)
199         ror     w12,w22,#2
200         add     w21,w21,w17                     // h+=Ch(e,f,g)
201         eor     w17,w22,w22,ror#9
202         add     w21,w21,w16                     // h+=Sigma1(e)
203         and     w28,w28,w19                     // (b^c)&=(a^b)
204         add     w25,w25,w21                     // d+=h
205         eor     w28,w28,w23                     // Maj(a,b,c)
206         eor     w17,w12,w17,ror#13      // Sigma0(a)
207         add     w21,w21,w28                     // h+=Maj(a,b,c)
208         ldr     w28,[x30],#4            // *K++, w19 in next round
209         //add   w21,w21,w17                     // h+=Sigma0(a)
210 #ifndef __ARMEB__
211         rev     w10,w10                 // 7
212 #endif
213         ldp     w11,w12,[x1],#2*4
214         add     w21,w21,w17                     // h+=Sigma0(a)
215         ror     w16,w25,#6
216         add     w20,w20,w28                     // h+=K[i]
217         eor     w13,w25,w25,ror#14
218         and     w17,w26,w25
219         bic     w28,w27,w25
220         add     w20,w20,w10                     // h+=X[i]
221         orr     w17,w17,w28                     // Ch(e,f,g)
222         eor     w28,w21,w22                     // a^b, b^c in next round
223         eor     w16,w16,w13,ror#11      // Sigma1(e)
224         ror     w13,w21,#2
225         add     w20,w20,w17                     // h+=Ch(e,f,g)
226         eor     w17,w21,w21,ror#9
227         add     w20,w20,w16                     // h+=Sigma1(e)
228         and     w19,w19,w28                     // (b^c)&=(a^b)
229         add     w24,w24,w20                     // d+=h
230         eor     w19,w19,w22                     // Maj(a,b,c)
231         eor     w17,w13,w17,ror#13      // Sigma0(a)
232         add     w20,w20,w19                     // h+=Maj(a,b,c)
233         ldr     w19,[x30],#4            // *K++, w28 in next round
234         //add   w20,w20,w17                     // h+=Sigma0(a)
235 #ifndef __ARMEB__
236         rev     w11,w11                 // 8
237 #endif
238         add     w20,w20,w17                     // h+=Sigma0(a)
239         ror     w16,w24,#6
240         add     w27,w27,w19                     // h+=K[i]
241         eor     w14,w24,w24,ror#14
242         and     w17,w25,w24
243         bic     w19,w26,w24
244         add     w27,w27,w11                     // h+=X[i]
245         orr     w17,w17,w19                     // Ch(e,f,g)
246         eor     w19,w20,w21                     // a^b, b^c in next round
247         eor     w16,w16,w14,ror#11      // Sigma1(e)
248         ror     w14,w20,#2
249         add     w27,w27,w17                     // h+=Ch(e,f,g)
250         eor     w17,w20,w20,ror#9
251         add     w27,w27,w16                     // h+=Sigma1(e)
252         and     w28,w28,w19                     // (b^c)&=(a^b)
253         add     w23,w23,w27                     // d+=h
254         eor     w28,w28,w21                     // Maj(a,b,c)
255         eor     w17,w14,w17,ror#13      // Sigma0(a)
256         add     w27,w27,w28                     // h+=Maj(a,b,c)
257         ldr     w28,[x30],#4            // *K++, w19 in next round
258         //add   w27,w27,w17                     // h+=Sigma0(a)
259 #ifndef __ARMEB__
260         rev     w12,w12                 // 9
261 #endif
262         ldp     w13,w14,[x1],#2*4
263         add     w27,w27,w17                     // h+=Sigma0(a)
264         ror     w16,w23,#6
265         add     w26,w26,w28                     // h+=K[i]
266         eor     w15,w23,w23,ror#14
267         and     w17,w24,w23
268         bic     w28,w25,w23
269         add     w26,w26,w12                     // h+=X[i]
270         orr     w17,w17,w28                     // Ch(e,f,g)
271         eor     w28,w27,w20                     // a^b, b^c in next round
272         eor     w16,w16,w15,ror#11      // Sigma1(e)
273         ror     w15,w27,#2
274         add     w26,w26,w17                     // h+=Ch(e,f,g)
275         eor     w17,w27,w27,ror#9
276         add     w26,w26,w16                     // h+=Sigma1(e)
277         and     w19,w19,w28                     // (b^c)&=(a^b)
278         add     w22,w22,w26                     // d+=h
279         eor     w19,w19,w20                     // Maj(a,b,c)
280         eor     w17,w15,w17,ror#13      // Sigma0(a)
281         add     w26,w26,w19                     // h+=Maj(a,b,c)
282         ldr     w19,[x30],#4            // *K++, w28 in next round
283         //add   w26,w26,w17                     // h+=Sigma0(a)
284 #ifndef __ARMEB__
285         rev     w13,w13                 // 10
286 #endif
287         add     w26,w26,w17                     // h+=Sigma0(a)
288         ror     w16,w22,#6
289         add     w25,w25,w19                     // h+=K[i]
290         eor     w0,w22,w22,ror#14
291         and     w17,w23,w22
292         bic     w19,w24,w22
293         add     w25,w25,w13                     // h+=X[i]
294         orr     w17,w17,w19                     // Ch(e,f,g)
295         eor     w19,w26,w27                     // a^b, b^c in next round
296         eor     w16,w16,w0,ror#11       // Sigma1(e)
297         ror     w0,w26,#2
298         add     w25,w25,w17                     // h+=Ch(e,f,g)
299         eor     w17,w26,w26,ror#9
300         add     w25,w25,w16                     // h+=Sigma1(e)
301         and     w28,w28,w19                     // (b^c)&=(a^b)
302         add     w21,w21,w25                     // d+=h
303         eor     w28,w28,w27                     // Maj(a,b,c)
304         eor     w17,w0,w17,ror#13       // Sigma0(a)
305         add     w25,w25,w28                     // h+=Maj(a,b,c)
306         ldr     w28,[x30],#4            // *K++, w19 in next round
307         //add   w25,w25,w17                     // h+=Sigma0(a)
308 #ifndef __ARMEB__
309         rev     w14,w14                 // 11
310 #endif
311         ldp     w15,w0,[x1],#2*4
312         add     w25,w25,w17                     // h+=Sigma0(a)
313         str     w6,[sp,#12]
314         ror     w16,w21,#6
315         add     w24,w24,w28                     // h+=K[i]
316         eor     w6,w21,w21,ror#14
317         and     w17,w22,w21
318         bic     w28,w23,w21
319         add     w24,w24,w14                     // h+=X[i]
320         orr     w17,w17,w28                     // Ch(e,f,g)
321         eor     w28,w25,w26                     // a^b, b^c in next round
322         eor     w16,w16,w6,ror#11       // Sigma1(e)
323         ror     w6,w25,#2
324         add     w24,w24,w17                     // h+=Ch(e,f,g)
325         eor     w17,w25,w25,ror#9
326         add     w24,w24,w16                     // h+=Sigma1(e)
327         and     w19,w19,w28                     // (b^c)&=(a^b)
328         add     w20,w20,w24                     // d+=h
329         eor     w19,w19,w26                     // Maj(a,b,c)
330         eor     w17,w6,w17,ror#13       // Sigma0(a)
331         add     w24,w24,w19                     // h+=Maj(a,b,c)
332         ldr     w19,[x30],#4            // *K++, w28 in next round
333         //add   w24,w24,w17                     // h+=Sigma0(a)
334 #ifndef __ARMEB__
335         rev     w15,w15                 // 12
336 #endif
337         add     w24,w24,w17                     // h+=Sigma0(a)
338         str     w7,[sp,#0]
339         ror     w16,w20,#6
340         add     w23,w23,w19                     // h+=K[i]
341         eor     w7,w20,w20,ror#14
342         and     w17,w21,w20
343         bic     w19,w22,w20
344         add     w23,w23,w15                     // h+=X[i]
345         orr     w17,w17,w19                     // Ch(e,f,g)
346         eor     w19,w24,w25                     // a^b, b^c in next round
347         eor     w16,w16,w7,ror#11       // Sigma1(e)
348         ror     w7,w24,#2
349         add     w23,w23,w17                     // h+=Ch(e,f,g)
350         eor     w17,w24,w24,ror#9
351         add     w23,w23,w16                     // h+=Sigma1(e)
352         and     w28,w28,w19                     // (b^c)&=(a^b)
353         add     w27,w27,w23                     // d+=h
354         eor     w28,w28,w25                     // Maj(a,b,c)
355         eor     w17,w7,w17,ror#13       // Sigma0(a)
356         add     w23,w23,w28                     // h+=Maj(a,b,c)
357         ldr     w28,[x30],#4            // *K++, w19 in next round
358         //add   w23,w23,w17                     // h+=Sigma0(a)
359 #ifndef __ARMEB__
360         rev     w0,w0                   // 13
361 #endif
362         ldp     w1,w2,[x1]
363         add     w23,w23,w17                     // h+=Sigma0(a)
364         str     w8,[sp,#4]
365         ror     w16,w27,#6
366         add     w22,w22,w28                     // h+=K[i]
367         eor     w8,w27,w27,ror#14
368         and     w17,w20,w27
369         bic     w28,w21,w27
370         add     w22,w22,w0                      // h+=X[i]
371         orr     w17,w17,w28                     // Ch(e,f,g)
372         eor     w28,w23,w24                     // a^b, b^c in next round
373         eor     w16,w16,w8,ror#11       // Sigma1(e)
374         ror     w8,w23,#2
375         add     w22,w22,w17                     // h+=Ch(e,f,g)
376         eor     w17,w23,w23,ror#9
377         add     w22,w22,w16                     // h+=Sigma1(e)
378         and     w19,w19,w28                     // (b^c)&=(a^b)
379         add     w26,w26,w22                     // d+=h
380         eor     w19,w19,w24                     // Maj(a,b,c)
381         eor     w17,w8,w17,ror#13       // Sigma0(a)
382         add     w22,w22,w19                     // h+=Maj(a,b,c)
383         ldr     w19,[x30],#4            // *K++, w28 in next round
384         //add   w22,w22,w17                     // h+=Sigma0(a)
385 #ifndef __ARMEB__
386         rev     w1,w1                   // 14
387 #endif
388         ldr     w6,[sp,#12]
389         add     w22,w22,w17                     // h+=Sigma0(a)
390         str     w9,[sp,#8]
391         ror     w16,w26,#6
392         add     w21,w21,w19                     // h+=K[i]
393         eor     w9,w26,w26,ror#14
394         and     w17,w27,w26
395         bic     w19,w20,w26
396         add     w21,w21,w1                      // h+=X[i]
397         orr     w17,w17,w19                     // Ch(e,f,g)
398         eor     w19,w22,w23                     // a^b, b^c in next round
399         eor     w16,w16,w9,ror#11       // Sigma1(e)
400         ror     w9,w22,#2
401         add     w21,w21,w17                     // h+=Ch(e,f,g)
402         eor     w17,w22,w22,ror#9
403         add     w21,w21,w16                     // h+=Sigma1(e)
404         and     w28,w28,w19                     // (b^c)&=(a^b)
405         add     w25,w25,w21                     // d+=h
406         eor     w28,w28,w23                     // Maj(a,b,c)
407         eor     w17,w9,w17,ror#13       // Sigma0(a)
408         add     w21,w21,w28                     // h+=Maj(a,b,c)
409         ldr     w28,[x30],#4            // *K++, w19 in next round
410         //add   w21,w21,w17                     // h+=Sigma0(a)
411 #ifndef __ARMEB__
412         rev     w2,w2                   // 15
413 #endif
414         ldr     w7,[sp,#0]
415         add     w21,w21,w17                     // h+=Sigma0(a)
416         str     w10,[sp,#12]
417         ror     w16,w25,#6
418         add     w20,w20,w28                     // h+=K[i]
419         ror     w9,w4,#7
420         and     w17,w26,w25
421         ror     w8,w1,#17
422         bic     w28,w27,w25
423         ror     w10,w21,#2
424         add     w20,w20,w2                      // h+=X[i]
425         eor     w16,w16,w25,ror#11
426         eor     w9,w9,w4,ror#18
427         orr     w17,w17,w28                     // Ch(e,f,g)
428         eor     w28,w21,w22                     // a^b, b^c in next round
429         eor     w16,w16,w25,ror#25      // Sigma1(e)
430         eor     w10,w10,w21,ror#13
431         add     w20,w20,w17                     // h+=Ch(e,f,g)
432         and     w19,w19,w28                     // (b^c)&=(a^b)
433         eor     w8,w8,w1,ror#19
434         eor     w9,w9,w4,lsr#3  // sigma0(X[i+1])
435         add     w20,w20,w16                     // h+=Sigma1(e)
436         eor     w19,w19,w22                     // Maj(a,b,c)
437         eor     w17,w10,w21,ror#22      // Sigma0(a)
438         eor     w8,w8,w1,lsr#10 // sigma1(X[i+14])
439         add     w3,w3,w12
440         add     w24,w24,w20                     // d+=h
441         add     w20,w20,w19                     // h+=Maj(a,b,c)
442         ldr     w19,[x30],#4            // *K++, w28 in next round
443         add     w3,w3,w9
444         add     w20,w20,w17                     // h+=Sigma0(a)
445         add     w3,w3,w8
446 .Loop_16_xx:
447         ldr     w8,[sp,#4]
448         str     w11,[sp,#0]
449         ror     w16,w24,#6
450         add     w27,w27,w19                     // h+=K[i]
451         ror     w10,w5,#7
452         and     w17,w25,w24
453         ror     w9,w2,#17
454         bic     w19,w26,w24
455         ror     w11,w20,#2
456         add     w27,w27,w3                      // h+=X[i]
457         eor     w16,w16,w24,ror#11
458         eor     w10,w10,w5,ror#18
459         orr     w17,w17,w19                     // Ch(e,f,g)
460         eor     w19,w20,w21                     // a^b, b^c in next round
461         eor     w16,w16,w24,ror#25      // Sigma1(e)
462         eor     w11,w11,w20,ror#13
463         add     w27,w27,w17                     // h+=Ch(e,f,g)
464         and     w28,w28,w19                     // (b^c)&=(a^b)
465         eor     w9,w9,w2,ror#19
466         eor     w10,w10,w5,lsr#3        // sigma0(X[i+1])
467         add     w27,w27,w16                     // h+=Sigma1(e)
468         eor     w28,w28,w21                     // Maj(a,b,c)
469         eor     w17,w11,w20,ror#22      // Sigma0(a)
470         eor     w9,w9,w2,lsr#10 // sigma1(X[i+14])
471         add     w4,w4,w13
472         add     w23,w23,w27                     // d+=h
473         add     w27,w27,w28                     // h+=Maj(a,b,c)
474         ldr     w28,[x30],#4            // *K++, w19 in next round
475         add     w4,w4,w10
476         add     w27,w27,w17                     // h+=Sigma0(a)
477         add     w4,w4,w9
478         ldr     w9,[sp,#8]
479         str     w12,[sp,#4]
480         ror     w16,w23,#6
481         add     w26,w26,w28                     // h+=K[i]
482         ror     w11,w6,#7
483         and     w17,w24,w23
484         ror     w10,w3,#17
485         bic     w28,w25,w23
486         ror     w12,w27,#2
487         add     w26,w26,w4                      // h+=X[i]
488         eor     w16,w16,w23,ror#11
489         eor     w11,w11,w6,ror#18
490         orr     w17,w17,w28                     // Ch(e,f,g)
491         eor     w28,w27,w20                     // a^b, b^c in next round
492         eor     w16,w16,w23,ror#25      // Sigma1(e)
493         eor     w12,w12,w27,ror#13
494         add     w26,w26,w17                     // h+=Ch(e,f,g)
495         and     w19,w19,w28                     // (b^c)&=(a^b)
496         eor     w10,w10,w3,ror#19
497         eor     w11,w11,w6,lsr#3        // sigma0(X[i+1])
498         add     w26,w26,w16                     // h+=Sigma1(e)
499         eor     w19,w19,w20                     // Maj(a,b,c)
500         eor     w17,w12,w27,ror#22      // Sigma0(a)
501         eor     w10,w10,w3,lsr#10       // sigma1(X[i+14])
502         add     w5,w5,w14
503         add     w22,w22,w26                     // d+=h
504         add     w26,w26,w19                     // h+=Maj(a,b,c)
505         ldr     w19,[x30],#4            // *K++, w28 in next round
506         add     w5,w5,w11
507         add     w26,w26,w17                     // h+=Sigma0(a)
508         add     w5,w5,w10
509         ldr     w10,[sp,#12]
510         str     w13,[sp,#8]
511         ror     w16,w22,#6
512         add     w25,w25,w19                     // h+=K[i]
513         ror     w12,w7,#7
514         and     w17,w23,w22
515         ror     w11,w4,#17
516         bic     w19,w24,w22
517         ror     w13,w26,#2
518         add     w25,w25,w5                      // h+=X[i]
519         eor     w16,w16,w22,ror#11
520         eor     w12,w12,w7,ror#18
521         orr     w17,w17,w19                     // Ch(e,f,g)
522         eor     w19,w26,w27                     // a^b, b^c in next round
523         eor     w16,w16,w22,ror#25      // Sigma1(e)
524         eor     w13,w13,w26,ror#13
525         add     w25,w25,w17                     // h+=Ch(e,f,g)
526         and     w28,w28,w19                     // (b^c)&=(a^b)
527         eor     w11,w11,w4,ror#19
528         eor     w12,w12,w7,lsr#3        // sigma0(X[i+1])
529         add     w25,w25,w16                     // h+=Sigma1(e)
530         eor     w28,w28,w27                     // Maj(a,b,c)
531         eor     w17,w13,w26,ror#22      // Sigma0(a)
532         eor     w11,w11,w4,lsr#10       // sigma1(X[i+14])
533         add     w6,w6,w15
534         add     w21,w21,w25                     // d+=h
535         add     w25,w25,w28                     // h+=Maj(a,b,c)
536         ldr     w28,[x30],#4            // *K++, w19 in next round
537         add     w6,w6,w12
538         add     w25,w25,w17                     // h+=Sigma0(a)
539         add     w6,w6,w11
540         ldr     w11,[sp,#0]
541         str     w14,[sp,#12]
542         ror     w16,w21,#6
543         add     w24,w24,w28                     // h+=K[i]
544         ror     w13,w8,#7
545         and     w17,w22,w21
546         ror     w12,w5,#17
547         bic     w28,w23,w21
548         ror     w14,w25,#2
549         add     w24,w24,w6                      // h+=X[i]
550         eor     w16,w16,w21,ror#11
551         eor     w13,w13,w8,ror#18
552         orr     w17,w17,w28                     // Ch(e,f,g)
553         eor     w28,w25,w26                     // a^b, b^c in next round
554         eor     w16,w16,w21,ror#25      // Sigma1(e)
555         eor     w14,w14,w25,ror#13
556         add     w24,w24,w17                     // h+=Ch(e,f,g)
557         and     w19,w19,w28                     // (b^c)&=(a^b)
558         eor     w12,w12,w5,ror#19
559         eor     w13,w13,w8,lsr#3        // sigma0(X[i+1])
560         add     w24,w24,w16                     // h+=Sigma1(e)
561         eor     w19,w19,w26                     // Maj(a,b,c)
562         eor     w17,w14,w25,ror#22      // Sigma0(a)
563         eor     w12,w12,w5,lsr#10       // sigma1(X[i+14])
564         add     w7,w7,w0
565         add     w20,w20,w24                     // d+=h
566         add     w24,w24,w19                     // h+=Maj(a,b,c)
567         ldr     w19,[x30],#4            // *K++, w28 in next round
568         add     w7,w7,w13
569         add     w24,w24,w17                     // h+=Sigma0(a)
570         add     w7,w7,w12
571         ldr     w12,[sp,#4]
572         str     w15,[sp,#0]
573         ror     w16,w20,#6
574         add     w23,w23,w19                     // h+=K[i]
575         ror     w14,w9,#7
576         and     w17,w21,w20
577         ror     w13,w6,#17
578         bic     w19,w22,w20
579         ror     w15,w24,#2
580         add     w23,w23,w7                      // h+=X[i]
581         eor     w16,w16,w20,ror#11
582         eor     w14,w14,w9,ror#18
583         orr     w17,w17,w19                     // Ch(e,f,g)
584         eor     w19,w24,w25                     // a^b, b^c in next round
585         eor     w16,w16,w20,ror#25      // Sigma1(e)
586         eor     w15,w15,w24,ror#13
587         add     w23,w23,w17                     // h+=Ch(e,f,g)
588         and     w28,w28,w19                     // (b^c)&=(a^b)
589         eor     w13,w13,w6,ror#19
590         eor     w14,w14,w9,lsr#3        // sigma0(X[i+1])
591         add     w23,w23,w16                     // h+=Sigma1(e)
592         eor     w28,w28,w25                     // Maj(a,b,c)
593         eor     w17,w15,w24,ror#22      // Sigma0(a)
594         eor     w13,w13,w6,lsr#10       // sigma1(X[i+14])
595         add     w8,w8,w1
596         add     w27,w27,w23                     // d+=h
597         add     w23,w23,w28                     // h+=Maj(a,b,c)
598         ldr     w28,[x30],#4            // *K++, w19 in next round
599         add     w8,w8,w14
600         add     w23,w23,w17                     // h+=Sigma0(a)
601         add     w8,w8,w13
602         ldr     w13,[sp,#8]
603         str     w0,[sp,#4]
604         ror     w16,w27,#6
605         add     w22,w22,w28                     // h+=K[i]
606         ror     w15,w10,#7
607         and     w17,w20,w27
608         ror     w14,w7,#17
609         bic     w28,w21,w27
610         ror     w0,w23,#2
611         add     w22,w22,w8                      // h+=X[i]
612         eor     w16,w16,w27,ror#11
613         eor     w15,w15,w10,ror#18
614         orr     w17,w17,w28                     // Ch(e,f,g)
615         eor     w28,w23,w24                     // a^b, b^c in next round
616         eor     w16,w16,w27,ror#25      // Sigma1(e)
617         eor     w0,w0,w23,ror#13
618         add     w22,w22,w17                     // h+=Ch(e,f,g)
619         and     w19,w19,w28                     // (b^c)&=(a^b)
620         eor     w14,w14,w7,ror#19
621         eor     w15,w15,w10,lsr#3       // sigma0(X[i+1])
622         add     w22,w22,w16                     // h+=Sigma1(e)
623         eor     w19,w19,w24                     // Maj(a,b,c)
624         eor     w17,w0,w23,ror#22       // Sigma0(a)
625         eor     w14,w14,w7,lsr#10       // sigma1(X[i+14])
626         add     w9,w9,w2
627         add     w26,w26,w22                     // d+=h
628         add     w22,w22,w19                     // h+=Maj(a,b,c)
629         ldr     w19,[x30],#4            // *K++, w28 in next round
630         add     w9,w9,w15
631         add     w22,w22,w17                     // h+=Sigma0(a)
632         add     w9,w9,w14
633         ldr     w14,[sp,#12]
634         str     w1,[sp,#8]
635         ror     w16,w26,#6
636         add     w21,w21,w19                     // h+=K[i]
637         ror     w0,w11,#7
638         and     w17,w27,w26
639         ror     w15,w8,#17
640         bic     w19,w20,w26
641         ror     w1,w22,#2
642         add     w21,w21,w9                      // h+=X[i]
643         eor     w16,w16,w26,ror#11
644         eor     w0,w0,w11,ror#18
645         orr     w17,w17,w19                     // Ch(e,f,g)
646         eor     w19,w22,w23                     // a^b, b^c in next round
647         eor     w16,w16,w26,ror#25      // Sigma1(e)
648         eor     w1,w1,w22,ror#13
649         add     w21,w21,w17                     // h+=Ch(e,f,g)
650         and     w28,w28,w19                     // (b^c)&=(a^b)
651         eor     w15,w15,w8,ror#19
652         eor     w0,w0,w11,lsr#3 // sigma0(X[i+1])
653         add     w21,w21,w16                     // h+=Sigma1(e)
654         eor     w28,w28,w23                     // Maj(a,b,c)
655         eor     w17,w1,w22,ror#22       // Sigma0(a)
656         eor     w15,w15,w8,lsr#10       // sigma1(X[i+14])
657         add     w10,w10,w3
658         add     w25,w25,w21                     // d+=h
659         add     w21,w21,w28                     // h+=Maj(a,b,c)
660         ldr     w28,[x30],#4            // *K++, w19 in next round
661         add     w10,w10,w0
662         add     w21,w21,w17                     // h+=Sigma0(a)
663         add     w10,w10,w15
664         ldr     w15,[sp,#0]
665         str     w2,[sp,#12]
666         ror     w16,w25,#6
667         add     w20,w20,w28                     // h+=K[i]
668         ror     w1,w12,#7
669         and     w17,w26,w25
670         ror     w0,w9,#17
671         bic     w28,w27,w25
672         ror     w2,w21,#2
673         add     w20,w20,w10                     // h+=X[i]
674         eor     w16,w16,w25,ror#11
675         eor     w1,w1,w12,ror#18
676         orr     w17,w17,w28                     // Ch(e,f,g)
677         eor     w28,w21,w22                     // a^b, b^c in next round
678         eor     w16,w16,w25,ror#25      // Sigma1(e)
679         eor     w2,w2,w21,ror#13
680         add     w20,w20,w17                     // h+=Ch(e,f,g)
681         and     w19,w19,w28                     // (b^c)&=(a^b)
682         eor     w0,w0,w9,ror#19
683         eor     w1,w1,w12,lsr#3 // sigma0(X[i+1])
684         add     w20,w20,w16                     // h+=Sigma1(e)
685         eor     w19,w19,w22                     // Maj(a,b,c)
686         eor     w17,w2,w21,ror#22       // Sigma0(a)
687         eor     w0,w0,w9,lsr#10 // sigma1(X[i+14])
688         add     w11,w11,w4
689         add     w24,w24,w20                     // d+=h
690         add     w20,w20,w19                     // h+=Maj(a,b,c)
691         ldr     w19,[x30],#4            // *K++, w28 in next round
692         add     w11,w11,w1
693         add     w20,w20,w17                     // h+=Sigma0(a)
694         add     w11,w11,w0
695         ldr     w0,[sp,#4]
696         str     w3,[sp,#0]
697         ror     w16,w24,#6
698         add     w27,w27,w19                     // h+=K[i]
699         ror     w2,w13,#7
700         and     w17,w25,w24
701         ror     w1,w10,#17
702         bic     w19,w26,w24
703         ror     w3,w20,#2
704         add     w27,w27,w11                     // h+=X[i]
705         eor     w16,w16,w24,ror#11
706         eor     w2,w2,w13,ror#18
707         orr     w17,w17,w19                     // Ch(e,f,g)
708         eor     w19,w20,w21                     // a^b, b^c in next round
709         eor     w16,w16,w24,ror#25      // Sigma1(e)
710         eor     w3,w3,w20,ror#13
711         add     w27,w27,w17                     // h+=Ch(e,f,g)
712         and     w28,w28,w19                     // (b^c)&=(a^b)
713         eor     w1,w1,w10,ror#19
714         eor     w2,w2,w13,lsr#3 // sigma0(X[i+1])
715         add     w27,w27,w16                     // h+=Sigma1(e)
716         eor     w28,w28,w21                     // Maj(a,b,c)
717         eor     w17,w3,w20,ror#22       // Sigma0(a)
718         eor     w1,w1,w10,lsr#10        // sigma1(X[i+14])
719         add     w12,w12,w5
720         add     w23,w23,w27                     // d+=h
721         add     w27,w27,w28                     // h+=Maj(a,b,c)
722         ldr     w28,[x30],#4            // *K++, w19 in next round
723         add     w12,w12,w2
724         add     w27,w27,w17                     // h+=Sigma0(a)
725         add     w12,w12,w1
726         ldr     w1,[sp,#8]
727         str     w4,[sp,#4]
728         ror     w16,w23,#6
729         add     w26,w26,w28                     // h+=K[i]
730         ror     w3,w14,#7
731         and     w17,w24,w23
732         ror     w2,w11,#17
733         bic     w28,w25,w23
734         ror     w4,w27,#2
735         add     w26,w26,w12                     // h+=X[i]
736         eor     w16,w16,w23,ror#11
737         eor     w3,w3,w14,ror#18
738         orr     w17,w17,w28                     // Ch(e,f,g)
739         eor     w28,w27,w20                     // a^b, b^c in next round
740         eor     w16,w16,w23,ror#25      // Sigma1(e)
741         eor     w4,w4,w27,ror#13
742         add     w26,w26,w17                     // h+=Ch(e,f,g)
743         and     w19,w19,w28                     // (b^c)&=(a^b)
744         eor     w2,w2,w11,ror#19
745         eor     w3,w3,w14,lsr#3 // sigma0(X[i+1])
746         add     w26,w26,w16                     // h+=Sigma1(e)
747         eor     w19,w19,w20                     // Maj(a,b,c)
748         eor     w17,w4,w27,ror#22       // Sigma0(a)
749         eor     w2,w2,w11,lsr#10        // sigma1(X[i+14])
750         add     w13,w13,w6
751         add     w22,w22,w26                     // d+=h
752         add     w26,w26,w19                     // h+=Maj(a,b,c)
753         ldr     w19,[x30],#4            // *K++, w28 in next round
754         add     w13,w13,w3
755         add     w26,w26,w17                     // h+=Sigma0(a)
756         add     w13,w13,w2
757         ldr     w2,[sp,#12]
758         str     w5,[sp,#8]
759         ror     w16,w22,#6
760         add     w25,w25,w19                     // h+=K[i]
761         ror     w4,w15,#7
762         and     w17,w23,w22
763         ror     w3,w12,#17
764         bic     w19,w24,w22
765         ror     w5,w26,#2
766         add     w25,w25,w13                     // h+=X[i]
767         eor     w16,w16,w22,ror#11
768         eor     w4,w4,w15,ror#18
769         orr     w17,w17,w19                     // Ch(e,f,g)
770         eor     w19,w26,w27                     // a^b, b^c in next round
771         eor     w16,w16,w22,ror#25      // Sigma1(e)
772         eor     w5,w5,w26,ror#13
773         add     w25,w25,w17                     // h+=Ch(e,f,g)
774         and     w28,w28,w19                     // (b^c)&=(a^b)
775         eor     w3,w3,w12,ror#19
776         eor     w4,w4,w15,lsr#3 // sigma0(X[i+1])
777         add     w25,w25,w16                     // h+=Sigma1(e)
778         eor     w28,w28,w27                     // Maj(a,b,c)
779         eor     w17,w5,w26,ror#22       // Sigma0(a)
780         eor     w3,w3,w12,lsr#10        // sigma1(X[i+14])
781         add     w14,w14,w7
782         add     w21,w21,w25                     // d+=h
783         add     w25,w25,w28                     // h+=Maj(a,b,c)
784         ldr     w28,[x30],#4            // *K++, w19 in next round
785         add     w14,w14,w4
786         add     w25,w25,w17                     // h+=Sigma0(a)
787         add     w14,w14,w3
788         ldr     w3,[sp,#0]
789         str     w6,[sp,#12]
790         ror     w16,w21,#6
791         add     w24,w24,w28                     // h+=K[i]
792         ror     w5,w0,#7
793         and     w17,w22,w21
794         ror     w4,w13,#17
795         bic     w28,w23,w21
796         ror     w6,w25,#2
797         add     w24,w24,w14                     // h+=X[i]
798         eor     w16,w16,w21,ror#11
799         eor     w5,w5,w0,ror#18
800         orr     w17,w17,w28                     // Ch(e,f,g)
801         eor     w28,w25,w26                     // a^b, b^c in next round
802         eor     w16,w16,w21,ror#25      // Sigma1(e)
803         eor     w6,w6,w25,ror#13
804         add     w24,w24,w17                     // h+=Ch(e,f,g)
805         and     w19,w19,w28                     // (b^c)&=(a^b)
806         eor     w4,w4,w13,ror#19
807         eor     w5,w5,w0,lsr#3  // sigma0(X[i+1])
808         add     w24,w24,w16                     // h+=Sigma1(e)
809         eor     w19,w19,w26                     // Maj(a,b,c)
810         eor     w17,w6,w25,ror#22       // Sigma0(a)
811         eor     w4,w4,w13,lsr#10        // sigma1(X[i+14])
812         add     w15,w15,w8
813         add     w20,w20,w24                     // d+=h
814         add     w24,w24,w19                     // h+=Maj(a,b,c)
815         ldr     w19,[x30],#4            // *K++, w28 in next round
816         add     w15,w15,w5
817         add     w24,w24,w17                     // h+=Sigma0(a)
818         add     w15,w15,w4
819         ldr     w4,[sp,#4]
820         str     w7,[sp,#0]
821         ror     w16,w20,#6
822         add     w23,w23,w19                     // h+=K[i]
823         ror     w6,w1,#7
824         and     w17,w21,w20
825         ror     w5,w14,#17
826         bic     w19,w22,w20
827         ror     w7,w24,#2
828         add     w23,w23,w15                     // h+=X[i]
829         eor     w16,w16,w20,ror#11
830         eor     w6,w6,w1,ror#18
831         orr     w17,w17,w19                     // Ch(e,f,g)
832         eor     w19,w24,w25                     // a^b, b^c in next round
833         eor     w16,w16,w20,ror#25      // Sigma1(e)
834         eor     w7,w7,w24,ror#13
835         add     w23,w23,w17                     // h+=Ch(e,f,g)
836         and     w28,w28,w19                     // (b^c)&=(a^b)
837         eor     w5,w5,w14,ror#19
838         eor     w6,w6,w1,lsr#3  // sigma0(X[i+1])
839         add     w23,w23,w16                     // h+=Sigma1(e)
840         eor     w28,w28,w25                     // Maj(a,b,c)
841         eor     w17,w7,w24,ror#22       // Sigma0(a)
842         eor     w5,w5,w14,lsr#10        // sigma1(X[i+14])
843         add     w0,w0,w9
844         add     w27,w27,w23                     // d+=h
845         add     w23,w23,w28                     // h+=Maj(a,b,c)
846         ldr     w28,[x30],#4            // *K++, w19 in next round
847         add     w0,w0,w6
848         add     w23,w23,w17                     // h+=Sigma0(a)
849         add     w0,w0,w5
850         ldr     w5,[sp,#8]
851         str     w8,[sp,#4]
852         ror     w16,w27,#6
853         add     w22,w22,w28                     // h+=K[i]
854         ror     w7,w2,#7
855         and     w17,w20,w27
856         ror     w6,w15,#17
857         bic     w28,w21,w27
858         ror     w8,w23,#2
859         add     w22,w22,w0                      // h+=X[i]
860         eor     w16,w16,w27,ror#11
861         eor     w7,w7,w2,ror#18
862         orr     w17,w17,w28                     // Ch(e,f,g)
863         eor     w28,w23,w24                     // a^b, b^c in next round
864         eor     w16,w16,w27,ror#25      // Sigma1(e)
865         eor     w8,w8,w23,ror#13
866         add     w22,w22,w17                     // h+=Ch(e,f,g)
867         and     w19,w19,w28                     // (b^c)&=(a^b)
868         eor     w6,w6,w15,ror#19
869         eor     w7,w7,w2,lsr#3  // sigma0(X[i+1])
870         add     w22,w22,w16                     // h+=Sigma1(e)
871         eor     w19,w19,w24                     // Maj(a,b,c)
872         eor     w17,w8,w23,ror#22       // Sigma0(a)
873         eor     w6,w6,w15,lsr#10        // sigma1(X[i+14])
874         add     w1,w1,w10
875         add     w26,w26,w22                     // d+=h
876         add     w22,w22,w19                     // h+=Maj(a,b,c)
877         ldr     w19,[x30],#4            // *K++, w28 in next round
878         add     w1,w1,w7
879         add     w22,w22,w17                     // h+=Sigma0(a)
880         add     w1,w1,w6
881         ldr     w6,[sp,#12]
882         str     w9,[sp,#8]
883         ror     w16,w26,#6
884         add     w21,w21,w19                     // h+=K[i]
885         ror     w8,w3,#7
886         and     w17,w27,w26
887         ror     w7,w0,#17
888         bic     w19,w20,w26
889         ror     w9,w22,#2
890         add     w21,w21,w1                      // h+=X[i]
891         eor     w16,w16,w26,ror#11
892         eor     w8,w8,w3,ror#18
893         orr     w17,w17,w19                     // Ch(e,f,g)
894         eor     w19,w22,w23                     // a^b, b^c in next round
895         eor     w16,w16,w26,ror#25      // Sigma1(e)
896         eor     w9,w9,w22,ror#13
897         add     w21,w21,w17                     // h+=Ch(e,f,g)
898         and     w28,w28,w19                     // (b^c)&=(a^b)
899         eor     w7,w7,w0,ror#19
900         eor     w8,w8,w3,lsr#3  // sigma0(X[i+1])
901         add     w21,w21,w16                     // h+=Sigma1(e)
902         eor     w28,w28,w23                     // Maj(a,b,c)
903         eor     w17,w9,w22,ror#22       // Sigma0(a)
904         eor     w7,w7,w0,lsr#10 // sigma1(X[i+14])
905         add     w2,w2,w11
906         add     w25,w25,w21                     // d+=h
907         add     w21,w21,w28                     // h+=Maj(a,b,c)
908         ldr     w28,[x30],#4            // *K++, w19 in next round
909         add     w2,w2,w8
910         add     w21,w21,w17                     // h+=Sigma0(a)
911         add     w2,w2,w7
912         ldr     w7,[sp,#0]
913         str     w10,[sp,#12]
914         ror     w16,w25,#6
915         add     w20,w20,w28                     // h+=K[i]
916         ror     w9,w4,#7
917         and     w17,w26,w25
918         ror     w8,w1,#17
919         bic     w28,w27,w25
920         ror     w10,w21,#2
921         add     w20,w20,w2                      // h+=X[i]
922         eor     w16,w16,w25,ror#11
923         eor     w9,w9,w4,ror#18
924         orr     w17,w17,w28                     // Ch(e,f,g)
925         eor     w28,w21,w22                     // a^b, b^c in next round
926         eor     w16,w16,w25,ror#25      // Sigma1(e)
927         eor     w10,w10,w21,ror#13
928         add     w20,w20,w17                     // h+=Ch(e,f,g)
929         and     w19,w19,w28                     // (b^c)&=(a^b)
930         eor     w8,w8,w1,ror#19
931         eor     w9,w9,w4,lsr#3  // sigma0(X[i+1])
932         add     w20,w20,w16                     // h+=Sigma1(e)
933         eor     w19,w19,w22                     // Maj(a,b,c)
934         eor     w17,w10,w21,ror#22      // Sigma0(a)
935         eor     w8,w8,w1,lsr#10 // sigma1(X[i+14])
936         add     w3,w3,w12
937         add     w24,w24,w20                     // d+=h
938         add     w20,w20,w19                     // h+=Maj(a,b,c)
939         ldr     w19,[x30],#4            // *K++, w28 in next round
940         add     w3,w3,w9
941         add     w20,w20,w17                     // h+=Sigma0(a)
942         add     w3,w3,w8
943         cbnz    w19,.Loop_16_xx
944
945         ldp     x0,x2,[x29,#96]
946         ldr     x1,[x29,#112]
947         sub     x30,x30,#260            // rewind
948
949         ldp     w3,w4,[x0]
950         ldp     w5,w6,[x0,#2*4]
951         add     x1,x1,#14*4                     // advance input pointer
952         ldp     w7,w8,[x0,#4*4]
953         add     w20,w20,w3
954         ldp     w9,w10,[x0,#6*4]
955         add     w21,w21,w4
956         add     w22,w22,w5
957         add     w23,w23,w6
958         stp     w20,w21,[x0]
959         add     w24,w24,w7
960         add     w25,w25,w8
961         stp     w22,w23,[x0,#2*4]
962         add     w26,w26,w9
963         add     w27,w27,w10
964         cmp     x1,x2
965         stp     w24,w25,[x0,#4*4]
966         stp     w26,w27,[x0,#6*4]
967         b.ne    .Loop
968
969         ldp     x19,x20,[x29,#16]
970         add     sp,sp,#4*4
971         ldp     x21,x22,[x29,#32]
972         ldp     x23,x24,[x29,#48]
973         ldp     x25,x26,[x29,#64]
974         ldp     x27,x28,[x29,#80]
975         ldp     x29,x30,[sp],#128
976         ret
977 .size   sha256_block_data_order,.-sha256_block_data_order
978
979 .align  6
980 .type   K256,%object
981 K256:
982         .long   0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
983         .long   0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
984         .long   0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
985         .long   0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
986         .long   0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
987         .long   0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
988         .long   0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
989         .long   0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
990         .long   0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
991         .long   0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
992         .long   0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
993         .long   0xd192e819,0xd6990624,0xf40e3585,0x106aa070
994         .long   0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
995         .long   0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
996         .long   0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
997         .long   0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
998         .long   0       //terminator
999 .size   K256,.-K256
1000 .align  3
1001 .LOPENSSL_armcap_P:
1002         .quad   OPENSSL_armcap_P-.
1003 .asciz  "SHA256 block transform for ARMv8, CRYPTOGAMS by <appro@openssl.org>"
1004 .align  2
1005 .type   sha256_block_armv8,%function
1006 .align  6
1007 sha256_block_armv8:
1008 .Lv8_entry:
1009         stp             x29,x30,[sp,#-16]!
1010         add             x29,sp,#0
1011
1012         ld1             {v0.4s,v1.4s},[x0]
1013         adr             x3,K256
1014
1015 .Loop_hw:
1016         ld1             {v4.16b-v7.16b},[x1],#64
1017         sub             x2,x2,#1
1018         ld1             {v16.4s},[x3],#16
1019         rev32           v4.16b,v4.16b
1020         rev32           v5.16b,v5.16b
1021         rev32           v6.16b,v6.16b
1022         rev32           v7.16b,v7.16b
1023         orr             v18.16b,v0.16b,v0.16b           // offload
1024         orr             v19.16b,v1.16b,v1.16b
1025         ld1             {v17.4s},[x3],#16
1026         add             v16.4s,v16.4s,v4.4s
1027         .inst   0x5e2828a4      //sha256su0 v4.16b,v5.16b
1028         orr             v2.16b,v0.16b,v0.16b
1029         .inst   0x5e104020      //sha256h v0.16b,v1.16b,v16.4s
1030         .inst   0x5e105041      //sha256h2 v1.16b,v2.16b,v16.4s
1031         .inst   0x5e0760c4      //sha256su1 v4.16b,v6.16b,v7.16b
1032         ld1             {v16.4s},[x3],#16
1033         add             v17.4s,v17.4s,v5.4s
1034         .inst   0x5e2828c5      //sha256su0 v5.16b,v6.16b
1035         orr             v2.16b,v0.16b,v0.16b
1036         .inst   0x5e114020      //sha256h v0.16b,v1.16b,v17.4s
1037         .inst   0x5e115041      //sha256h2 v1.16b,v2.16b,v17.4s
1038         .inst   0x5e0460e5      //sha256su1 v5.16b,v7.16b,v4.16b
1039         ld1             {v17.4s},[x3],#16
1040         add             v16.4s,v16.4s,v6.4s
1041         .inst   0x5e2828e6      //sha256su0 v6.16b,v7.16b
1042         orr             v2.16b,v0.16b,v0.16b
1043         .inst   0x5e104020      //sha256h v0.16b,v1.16b,v16.4s
1044         .inst   0x5e105041      //sha256h2 v1.16b,v2.16b,v16.4s
1045         .inst   0x5e056086      //sha256su1 v6.16b,v4.16b,v5.16b
1046         ld1             {v16.4s},[x3],#16
1047         add             v17.4s,v17.4s,v7.4s
1048         .inst   0x5e282887      //sha256su0 v7.16b,v4.16b
1049         orr             v2.16b,v0.16b,v0.16b
1050         .inst   0x5e114020      //sha256h v0.16b,v1.16b,v17.4s
1051         .inst   0x5e115041      //sha256h2 v1.16b,v2.16b,v17.4s
1052         .inst   0x5e0660a7      //sha256su1 v7.16b,v5.16b,v6.16b
1053         ld1             {v17.4s},[x3],#16
1054         add             v16.4s,v16.4s,v4.4s
1055         .inst   0x5e2828a4      //sha256su0 v4.16b,v5.16b
1056         orr             v2.16b,v0.16b,v0.16b
1057         .inst   0x5e104020      //sha256h v0.16b,v1.16b,v16.4s
1058         .inst   0x5e105041      //sha256h2 v1.16b,v2.16b,v16.4s
1059         .inst   0x5e0760c4      //sha256su1 v4.16b,v6.16b,v7.16b
1060         ld1             {v16.4s},[x3],#16
1061         add             v17.4s,v17.4s,v5.4s
1062         .inst   0x5e2828c5      //sha256su0 v5.16b,v6.16b
1063         orr             v2.16b,v0.16b,v0.16b
1064         .inst   0x5e114020      //sha256h v0.16b,v1.16b,v17.4s
1065         .inst   0x5e115041      //sha256h2 v1.16b,v2.16b,v17.4s
1066         .inst   0x5e0460e5      //sha256su1 v5.16b,v7.16b,v4.16b
1067         ld1             {v17.4s},[x3],#16
1068         add             v16.4s,v16.4s,v6.4s
1069         .inst   0x5e2828e6      //sha256su0 v6.16b,v7.16b
1070         orr             v2.16b,v0.16b,v0.16b
1071         .inst   0x5e104020      //sha256h v0.16b,v1.16b,v16.4s
1072         .inst   0x5e105041      //sha256h2 v1.16b,v2.16b,v16.4s
1073         .inst   0x5e056086      //sha256su1 v6.16b,v4.16b,v5.16b
1074         ld1             {v16.4s},[x3],#16
1075         add             v17.4s,v17.4s,v7.4s
1076         .inst   0x5e282887      //sha256su0 v7.16b,v4.16b
1077         orr             v2.16b,v0.16b,v0.16b
1078         .inst   0x5e114020      //sha256h v0.16b,v1.16b,v17.4s
1079         .inst   0x5e115041      //sha256h2 v1.16b,v2.16b,v17.4s
1080         .inst   0x5e0660a7      //sha256su1 v7.16b,v5.16b,v6.16b
1081         ld1             {v17.4s},[x3],#16
1082         add             v16.4s,v16.4s,v4.4s
1083         .inst   0x5e2828a4      //sha256su0 v4.16b,v5.16b
1084         orr             v2.16b,v0.16b,v0.16b
1085         .inst   0x5e104020      //sha256h v0.16b,v1.16b,v16.4s
1086         .inst   0x5e105041      //sha256h2 v1.16b,v2.16b,v16.4s
1087         .inst   0x5e0760c4      //sha256su1 v4.16b,v6.16b,v7.16b
1088         ld1             {v16.4s},[x3],#16
1089         add             v17.4s,v17.4s,v5.4s
1090         .inst   0x5e2828c5      //sha256su0 v5.16b,v6.16b
1091         orr             v2.16b,v0.16b,v0.16b
1092         .inst   0x5e114020      //sha256h v0.16b,v1.16b,v17.4s
1093         .inst   0x5e115041      //sha256h2 v1.16b,v2.16b,v17.4s
1094         .inst   0x5e0460e5      //sha256su1 v5.16b,v7.16b,v4.16b
1095         ld1             {v17.4s},[x3],#16
1096         add             v16.4s,v16.4s,v6.4s
1097         .inst   0x5e2828e6      //sha256su0 v6.16b,v7.16b
1098         orr             v2.16b,v0.16b,v0.16b
1099         .inst   0x5e104020      //sha256h v0.16b,v1.16b,v16.4s
1100         .inst   0x5e105041      //sha256h2 v1.16b,v2.16b,v16.4s
1101         .inst   0x5e056086      //sha256su1 v6.16b,v4.16b,v5.16b
1102         ld1             {v16.4s},[x3],#16
1103         add             v17.4s,v17.4s,v7.4s
1104         .inst   0x5e282887      //sha256su0 v7.16b,v4.16b
1105         orr             v2.16b,v0.16b,v0.16b
1106         .inst   0x5e114020      //sha256h v0.16b,v1.16b,v17.4s
1107         .inst   0x5e115041      //sha256h2 v1.16b,v2.16b,v17.4s
1108         .inst   0x5e0660a7      //sha256su1 v7.16b,v5.16b,v6.16b
1109         ld1             {v17.4s},[x3],#16
1110         add             v16.4s,v16.4s,v4.4s
1111         orr             v2.16b,v0.16b,v0.16b
1112         .inst   0x5e104020      //sha256h v0.16b,v1.16b,v16.4s
1113         .inst   0x5e105041      //sha256h2 v1.16b,v2.16b,v16.4s
1114
1115         ld1             {v16.4s},[x3],#16
1116         add             v17.4s,v17.4s,v5.4s
1117         orr             v2.16b,v0.16b,v0.16b
1118         .inst   0x5e114020      //sha256h v0.16b,v1.16b,v17.4s
1119         .inst   0x5e115041      //sha256h2 v1.16b,v2.16b,v17.4s
1120
1121         ld1             {v17.4s},[x3]
1122         add             v16.4s,v16.4s,v6.4s
1123         sub             x3,x3,#64*4-16  // rewind
1124         orr             v2.16b,v0.16b,v0.16b
1125         .inst   0x5e104020      //sha256h v0.16b,v1.16b,v16.4s
1126         .inst   0x5e105041      //sha256h2 v1.16b,v2.16b,v16.4s
1127
1128         add             v17.4s,v17.4s,v7.4s
1129         orr             v2.16b,v0.16b,v0.16b
1130         .inst   0x5e114020      //sha256h v0.16b,v1.16b,v17.4s
1131         .inst   0x5e115041      //sha256h2 v1.16b,v2.16b,v17.4s
1132
1133         add             v0.4s,v0.4s,v18.4s
1134         add             v1.4s,v1.4s,v19.4s
1135
1136         cbnz            x2,.Loop_hw
1137
1138         st1             {v0.4s,v1.4s},[x0]
1139
1140         ldr             x29,[sp],#16
1141         ret
1142 .size   sha256_block_armv8,.-sha256_block_armv8
1143 .comm   OPENSSL_armcap_P,4,4