]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - secure/lib/libcrypto/arm/sha512-armv4.S
MFC: r304636
[FreeBSD/FreeBSD.git] / secure / lib / libcrypto / arm / sha512-armv4.S
1         # $FreeBSD$
2         # Do not modify. This file is auto-generated from sha512-armv4.pl.
3 #include "arm_arch.h"
4 #ifdef __ARMEL__
5 # define LO 0
6 # define HI 4
7 # define WORD64(hi0,lo0,hi1,lo1)        .word   lo0,hi0, lo1,hi1
8 #else
9 # define HI 0
10 # define LO 4
11 # define WORD64(hi0,lo0,hi1,lo1)        .word   hi0,lo0, hi1,lo1
12 #endif
13
14 .text
15 .code   32
16 .type   K512,%object
17 .align  5
18 K512:
19 WORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd)
20 WORD64(0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc)
21 WORD64(0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019)
22 WORD64(0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118)
23 WORD64(0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe)
24 WORD64(0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2)
25 WORD64(0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1)
26 WORD64(0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694)
27 WORD64(0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3)
28 WORD64(0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65)
29 WORD64(0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483)
30 WORD64(0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5)
31 WORD64(0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210)
32 WORD64(0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4)
33 WORD64(0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725)
34 WORD64(0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70)
35 WORD64(0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926)
36 WORD64(0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df)
37 WORD64(0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8)
38 WORD64(0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b)
39 WORD64(0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001)
40 WORD64(0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30)
41 WORD64(0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910)
42 WORD64(0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8)
43 WORD64(0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53)
44 WORD64(0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8)
45 WORD64(0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb)
46 WORD64(0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3)
47 WORD64(0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60)
48 WORD64(0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec)
49 WORD64(0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9)
50 WORD64(0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b)
51 WORD64(0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207)
52 WORD64(0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178)
53 WORD64(0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6)
54 WORD64(0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b)
55 WORD64(0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493)
56 WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c)
57 WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
58 WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
59 .size   K512,.-K512
60 #if __ARM_MAX_ARCH__>=7
61 .LOPENSSL_armcap:
62 .word   OPENSSL_armcap_P-sha512_block_data_order
63 .skip   32-4
64 #else
65 .skip   32
66 #endif
67
68 .global sha512_block_data_order
69 .type   sha512_block_data_order,%function
70 sha512_block_data_order:
71         sub     r3,pc,#8                @ sha512_block_data_order
72         add     r2,r1,r2,lsl#7  @ len to point at the end of inp
73 #if __ARM_MAX_ARCH__>=7
74         ldr     r12,.LOPENSSL_armcap
75         ldr     r12,[r3,r12]            @ OPENSSL_armcap_P
76         tst     r12,#1
77         bne     .LNEON
78 #endif
79         stmdb   sp!,{r4-r12,lr}
80         sub     r14,r3,#672             @ K512
81         sub     sp,sp,#9*8
82
83         ldr     r7,[r0,#32+LO]
84         ldr     r8,[r0,#32+HI]
85         ldr     r9, [r0,#48+LO]
86         ldr     r10, [r0,#48+HI]
87         ldr     r11, [r0,#56+LO]
88         ldr     r12, [r0,#56+HI]
89 .Loop:
90         str     r9, [sp,#48+0]
91         str     r10, [sp,#48+4]
92         str     r11, [sp,#56+0]
93         str     r12, [sp,#56+4]
94         ldr     r5,[r0,#0+LO]
95         ldr     r6,[r0,#0+HI]
96         ldr     r3,[r0,#8+LO]
97         ldr     r4,[r0,#8+HI]
98         ldr     r9, [r0,#16+LO]
99         ldr     r10, [r0,#16+HI]
100         ldr     r11, [r0,#24+LO]
101         ldr     r12, [r0,#24+HI]
102         str     r3,[sp,#8+0]
103         str     r4,[sp,#8+4]
104         str     r9, [sp,#16+0]
105         str     r10, [sp,#16+4]
106         str     r11, [sp,#24+0]
107         str     r12, [sp,#24+4]
108         ldr     r3,[r0,#40+LO]
109         ldr     r4,[r0,#40+HI]
110         str     r3,[sp,#40+0]
111         str     r4,[sp,#40+4]
112
113 .L00_15:
114 #if __ARM_ARCH__<7
115         ldrb    r3,[r1,#7]
116         ldrb    r9, [r1,#6]
117         ldrb    r10, [r1,#5]
118         ldrb    r11, [r1,#4]
119         ldrb    r4,[r1,#3]
120         ldrb    r12, [r1,#2]
121         orr     r3,r3,r9,lsl#8
122         ldrb    r9, [r1,#1]
123         orr     r3,r3,r10,lsl#16
124         ldrb    r10, [r1],#8
125         orr     r3,r3,r11,lsl#24
126         orr     r4,r4,r12,lsl#8
127         orr     r4,r4,r9,lsl#16
128         orr     r4,r4,r10,lsl#24
129 #else
130         ldr     r3,[r1,#4]
131         ldr     r4,[r1],#8
132 #ifdef __ARMEL__
133         rev     r3,r3
134         rev     r4,r4
135 #endif
136 #endif
137         @ Sigma1(x)     (ROTR((x),14) ^ ROTR((x),18)  ^ ROTR((x),41))
138         @ LO            lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
139         @ HI            hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
140         mov     r9,r7,lsr#14
141         str     r3,[sp,#64+0]
142         mov     r10,r8,lsr#14
143         str     r4,[sp,#64+4]
144         eor     r9,r9,r8,lsl#18
145         ldr     r11,[sp,#56+0]  @ h.lo
146         eor     r10,r10,r7,lsl#18
147         ldr     r12,[sp,#56+4]  @ h.hi
148         eor     r9,r9,r7,lsr#18
149         eor     r10,r10,r8,lsr#18
150         eor     r9,r9,r8,lsl#14
151         eor     r10,r10,r7,lsl#14
152         eor     r9,r9,r8,lsr#9
153         eor     r10,r10,r7,lsr#9
154         eor     r9,r9,r7,lsl#23
155         eor     r10,r10,r8,lsl#23       @ Sigma1(e)
156         adds    r3,r3,r9
157         ldr     r9,[sp,#40+0]   @ f.lo
158         adc     r4,r4,r10               @ T += Sigma1(e)
159         ldr     r10,[sp,#40+4]  @ f.hi
160         adds    r3,r3,r11
161         ldr     r11,[sp,#48+0]  @ g.lo
162         adc     r4,r4,r12               @ T += h
163         ldr     r12,[sp,#48+4]  @ g.hi
164
165         eor     r9,r9,r11
166         str     r7,[sp,#32+0]
167         eor     r10,r10,r12
168         str     r8,[sp,#32+4]
169         and     r9,r9,r7
170         str     r5,[sp,#0+0]
171         and     r10,r10,r8
172         str     r6,[sp,#0+4]
173         eor     r9,r9,r11
174         ldr     r11,[r14,#LO]   @ K[i].lo
175         eor     r10,r10,r12             @ Ch(e,f,g)
176         ldr     r12,[r14,#HI]   @ K[i].hi
177
178         adds    r3,r3,r9
179         ldr     r7,[sp,#24+0]   @ d.lo
180         adc     r4,r4,r10               @ T += Ch(e,f,g)
181         ldr     r8,[sp,#24+4]   @ d.hi
182         adds    r3,r3,r11
183         and     r9,r11,#0xff
184         adc     r4,r4,r12               @ T += K[i]
185         adds    r7,r7,r3
186         ldr     r11,[sp,#8+0]   @ b.lo
187         adc     r8,r8,r4                @ d += T
188         teq     r9,#148
189
190         ldr     r12,[sp,#16+0]  @ c.lo
191         orreq   r14,r14,#1
192         @ Sigma0(x)     (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
193         @ LO            lo>>28^hi<<4  ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
194         @ HI            hi>>28^lo<<4  ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
195         mov     r9,r5,lsr#28
196         mov     r10,r6,lsr#28
197         eor     r9,r9,r6,lsl#4
198         eor     r10,r10,r5,lsl#4
199         eor     r9,r9,r6,lsr#2
200         eor     r10,r10,r5,lsr#2
201         eor     r9,r9,r5,lsl#30
202         eor     r10,r10,r6,lsl#30
203         eor     r9,r9,r6,lsr#7
204         eor     r10,r10,r5,lsr#7
205         eor     r9,r9,r5,lsl#25
206         eor     r10,r10,r6,lsl#25       @ Sigma0(a)
207         adds    r3,r3,r9
208         and     r9,r5,r11
209         adc     r4,r4,r10               @ T += Sigma0(a)
210
211         ldr     r10,[sp,#8+4]   @ b.hi
212         orr     r5,r5,r11
213         ldr     r11,[sp,#16+4]  @ c.hi
214         and     r5,r5,r12
215         and     r12,r6,r10
216         orr     r6,r6,r10
217         orr     r5,r5,r9                @ Maj(a,b,c).lo
218         and     r6,r6,r11
219         adds    r5,r5,r3
220         orr     r6,r6,r12               @ Maj(a,b,c).hi
221         sub     sp,sp,#8
222         adc     r6,r6,r4                @ h += T
223         tst     r14,#1
224         add     r14,r14,#8
225         tst     r14,#1
226         beq     .L00_15
227         ldr     r9,[sp,#184+0]
228         ldr     r10,[sp,#184+4]
229         bic     r14,r14,#1
230 .L16_79:
231         @ sigma0(x)     (ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
232         @ LO            lo>>1^hi<<31  ^ lo>>8^hi<<24 ^ lo>>7^hi<<25
233         @ HI            hi>>1^lo<<31  ^ hi>>8^lo<<24 ^ hi>>7
234         mov     r3,r9,lsr#1
235         ldr     r11,[sp,#80+0]
236         mov     r4,r10,lsr#1
237         ldr     r12,[sp,#80+4]
238         eor     r3,r3,r10,lsl#31
239         eor     r4,r4,r9,lsl#31
240         eor     r3,r3,r9,lsr#8
241         eor     r4,r4,r10,lsr#8
242         eor     r3,r3,r10,lsl#24
243         eor     r4,r4,r9,lsl#24
244         eor     r3,r3,r9,lsr#7
245         eor     r4,r4,r10,lsr#7
246         eor     r3,r3,r10,lsl#25
247
248         @ sigma1(x)     (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
249         @ LO            lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26
250         @ HI            hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6
251         mov     r9,r11,lsr#19
252         mov     r10,r12,lsr#19
253         eor     r9,r9,r12,lsl#13
254         eor     r10,r10,r11,lsl#13
255         eor     r9,r9,r12,lsr#29
256         eor     r10,r10,r11,lsr#29
257         eor     r9,r9,r11,lsl#3
258         eor     r10,r10,r12,lsl#3
259         eor     r9,r9,r11,lsr#6
260         eor     r10,r10,r12,lsr#6
261         ldr     r11,[sp,#120+0]
262         eor     r9,r9,r12,lsl#26
263
264         ldr     r12,[sp,#120+4]
265         adds    r3,r3,r9
266         ldr     r9,[sp,#192+0]
267         adc     r4,r4,r10
268
269         ldr     r10,[sp,#192+4]
270         adds    r3,r3,r11
271         adc     r4,r4,r12
272         adds    r3,r3,r9
273         adc     r4,r4,r10
274         @ Sigma1(x)     (ROTR((x),14) ^ ROTR((x),18)  ^ ROTR((x),41))
275         @ LO            lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
276         @ HI            hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
277         mov     r9,r7,lsr#14
278         str     r3,[sp,#64+0]
279         mov     r10,r8,lsr#14
280         str     r4,[sp,#64+4]
281         eor     r9,r9,r8,lsl#18
282         ldr     r11,[sp,#56+0]  @ h.lo
283         eor     r10,r10,r7,lsl#18
284         ldr     r12,[sp,#56+4]  @ h.hi
285         eor     r9,r9,r7,lsr#18
286         eor     r10,r10,r8,lsr#18
287         eor     r9,r9,r8,lsl#14
288         eor     r10,r10,r7,lsl#14
289         eor     r9,r9,r8,lsr#9
290         eor     r10,r10,r7,lsr#9
291         eor     r9,r9,r7,lsl#23
292         eor     r10,r10,r8,lsl#23       @ Sigma1(e)
293         adds    r3,r3,r9
294         ldr     r9,[sp,#40+0]   @ f.lo
295         adc     r4,r4,r10               @ T += Sigma1(e)
296         ldr     r10,[sp,#40+4]  @ f.hi
297         adds    r3,r3,r11
298         ldr     r11,[sp,#48+0]  @ g.lo
299         adc     r4,r4,r12               @ T += h
300         ldr     r12,[sp,#48+4]  @ g.hi
301
302         eor     r9,r9,r11
303         str     r7,[sp,#32+0]
304         eor     r10,r10,r12
305         str     r8,[sp,#32+4]
306         and     r9,r9,r7
307         str     r5,[sp,#0+0]
308         and     r10,r10,r8
309         str     r6,[sp,#0+4]
310         eor     r9,r9,r11
311         ldr     r11,[r14,#LO]   @ K[i].lo
312         eor     r10,r10,r12             @ Ch(e,f,g)
313         ldr     r12,[r14,#HI]   @ K[i].hi
314
315         adds    r3,r3,r9
316         ldr     r7,[sp,#24+0]   @ d.lo
317         adc     r4,r4,r10               @ T += Ch(e,f,g)
318         ldr     r8,[sp,#24+4]   @ d.hi
319         adds    r3,r3,r11
320         and     r9,r11,#0xff
321         adc     r4,r4,r12               @ T += K[i]
322         adds    r7,r7,r3
323         ldr     r11,[sp,#8+0]   @ b.lo
324         adc     r8,r8,r4                @ d += T
325         teq     r9,#23
326
327         ldr     r12,[sp,#16+0]  @ c.lo
328         orreq   r14,r14,#1
329         @ Sigma0(x)     (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
330         @ LO            lo>>28^hi<<4  ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
331         @ HI            hi>>28^lo<<4  ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
332         mov     r9,r5,lsr#28
333         mov     r10,r6,lsr#28
334         eor     r9,r9,r6,lsl#4
335         eor     r10,r10,r5,lsl#4
336         eor     r9,r9,r6,lsr#2
337         eor     r10,r10,r5,lsr#2
338         eor     r9,r9,r5,lsl#30
339         eor     r10,r10,r6,lsl#30
340         eor     r9,r9,r6,lsr#7
341         eor     r10,r10,r5,lsr#7
342         eor     r9,r9,r5,lsl#25
343         eor     r10,r10,r6,lsl#25       @ Sigma0(a)
344         adds    r3,r3,r9
345         and     r9,r5,r11
346         adc     r4,r4,r10               @ T += Sigma0(a)
347
348         ldr     r10,[sp,#8+4]   @ b.hi
349         orr     r5,r5,r11
350         ldr     r11,[sp,#16+4]  @ c.hi
351         and     r5,r5,r12
352         and     r12,r6,r10
353         orr     r6,r6,r10
354         orr     r5,r5,r9                @ Maj(a,b,c).lo
355         and     r6,r6,r11
356         adds    r5,r5,r3
357         orr     r6,r6,r12               @ Maj(a,b,c).hi
358         sub     sp,sp,#8
359         adc     r6,r6,r4                @ h += T
360         tst     r14,#1
361         add     r14,r14,#8
362         ldreq   r9,[sp,#184+0]
363         ldreq   r10,[sp,#184+4]
364         beq     .L16_79
365         bic     r14,r14,#1
366
367         ldr     r3,[sp,#8+0]
368         ldr     r4,[sp,#8+4]
369         ldr     r9, [r0,#0+LO]
370         ldr     r10, [r0,#0+HI]
371         ldr     r11, [r0,#8+LO]
372         ldr     r12, [r0,#8+HI]
373         adds    r9,r5,r9
374         str     r9, [r0,#0+LO]
375         adc     r10,r6,r10
376         str     r10, [r0,#0+HI]
377         adds    r11,r3,r11
378         str     r11, [r0,#8+LO]
379         adc     r12,r4,r12
380         str     r12, [r0,#8+HI]
381
382         ldr     r5,[sp,#16+0]
383         ldr     r6,[sp,#16+4]
384         ldr     r3,[sp,#24+0]
385         ldr     r4,[sp,#24+4]
386         ldr     r9, [r0,#16+LO]
387         ldr     r10, [r0,#16+HI]
388         ldr     r11, [r0,#24+LO]
389         ldr     r12, [r0,#24+HI]
390         adds    r9,r5,r9
391         str     r9, [r0,#16+LO]
392         adc     r10,r6,r10
393         str     r10, [r0,#16+HI]
394         adds    r11,r3,r11
395         str     r11, [r0,#24+LO]
396         adc     r12,r4,r12
397         str     r12, [r0,#24+HI]
398
399         ldr     r3,[sp,#40+0]
400         ldr     r4,[sp,#40+4]
401         ldr     r9, [r0,#32+LO]
402         ldr     r10, [r0,#32+HI]
403         ldr     r11, [r0,#40+LO]
404         ldr     r12, [r0,#40+HI]
405         adds    r7,r7,r9
406         str     r7,[r0,#32+LO]
407         adc     r8,r8,r10
408         str     r8,[r0,#32+HI]
409         adds    r11,r3,r11
410         str     r11, [r0,#40+LO]
411         adc     r12,r4,r12
412         str     r12, [r0,#40+HI]
413
414         ldr     r5,[sp,#48+0]
415         ldr     r6,[sp,#48+4]
416         ldr     r3,[sp,#56+0]
417         ldr     r4,[sp,#56+4]
418         ldr     r9, [r0,#48+LO]
419         ldr     r10, [r0,#48+HI]
420         ldr     r11, [r0,#56+LO]
421         ldr     r12, [r0,#56+HI]
422         adds    r9,r5,r9
423         str     r9, [r0,#48+LO]
424         adc     r10,r6,r10
425         str     r10, [r0,#48+HI]
426         adds    r11,r3,r11
427         str     r11, [r0,#56+LO]
428         adc     r12,r4,r12
429         str     r12, [r0,#56+HI]
430
431         add     sp,sp,#640
432         sub     r14,r14,#640
433
434         teq     r1,r2
435         bne     .Loop
436
437         add     sp,sp,#8*9              @ destroy frame
438 #if __ARM_ARCH__>=5
439         ldmia   sp!,{r4-r12,pc}
440 #else
441         ldmia   sp!,{r4-r12,lr}
442         tst     lr,#1
443         moveq   pc,lr                   @ be binary compatible with V4, yet
444         .word   0xe12fff1e                      @ interoperable with Thumb ISA:-)
445 #endif
446 #if __ARM_MAX_ARCH__>=7
447 .arch   armv7-a
448 .fpu    neon
449
450 .align  4
451 .LNEON:
452         dmb                             @ errata #451034 on early Cortex A8
453         vstmdb  sp!,{d8-d15}            @ ABI specification says so
454         sub     r3,r3,#672              @ K512
455         vldmia  r0,{d16-d23}            @ load context
456 .Loop_neon:
457         vshr.u64        d24,d20,#14     @ 0
458 #if 0<16
459         vld1.64         {d0},[r1]!      @ handles unaligned
460 #endif
461         vshr.u64        d25,d20,#18
462 #if 0>0
463          vadd.i64       d16,d30                 @ h+=Maj from the past
464 #endif
465         vshr.u64        d26,d20,#41
466         vld1.64         {d28},[r3,:64]! @ K[i++]
467         vsli.64         d24,d20,#50
468         vsli.64         d25,d20,#46
469         vmov            d29,d20
470         vsli.64         d26,d20,#23
471 #if 0<16 && defined(__ARMEL__)
472         vrev64.8        d0,d0
473 #endif
474         veor            d25,d24
475         vbsl            d29,d21,d22             @ Ch(e,f,g)
476         vshr.u64        d24,d16,#28
477         veor            d26,d25                 @ Sigma1(e)
478         vadd.i64        d27,d29,d23
479         vshr.u64        d25,d16,#34
480         vsli.64         d24,d16,#36
481         vadd.i64        d27,d26
482         vshr.u64        d26,d16,#39
483         vadd.i64        d28,d0
484         vsli.64         d25,d16,#30
485         veor            d30,d16,d17
486         vsli.64         d26,d16,#25
487         veor            d23,d24,d25
488         vadd.i64        d27,d28
489         vbsl            d30,d18,d17             @ Maj(a,b,c)
490         veor            d23,d26                 @ Sigma0(a)
491         vadd.i64        d19,d27
492         vadd.i64        d30,d27
493         @ vadd.i64      d23,d30
494         vshr.u64        d24,d19,#14     @ 1
495 #if 1<16
496         vld1.64         {d1},[r1]!      @ handles unaligned
497 #endif
498         vshr.u64        d25,d19,#18
499 #if 1>0
500          vadd.i64       d23,d30                 @ h+=Maj from the past
501 #endif
502         vshr.u64        d26,d19,#41
503         vld1.64         {d28},[r3,:64]! @ K[i++]
504         vsli.64         d24,d19,#50
505         vsli.64         d25,d19,#46
506         vmov            d29,d19
507         vsli.64         d26,d19,#23
508 #if 1<16 && defined(__ARMEL__)
509         vrev64.8        d1,d1
510 #endif
511         veor            d25,d24
512         vbsl            d29,d20,d21             @ Ch(e,f,g)
513         vshr.u64        d24,d23,#28
514         veor            d26,d25                 @ Sigma1(e)
515         vadd.i64        d27,d29,d22
516         vshr.u64        d25,d23,#34
517         vsli.64         d24,d23,#36
518         vadd.i64        d27,d26
519         vshr.u64        d26,d23,#39
520         vadd.i64        d28,d1
521         vsli.64         d25,d23,#30
522         veor            d30,d23,d16
523         vsli.64         d26,d23,#25
524         veor            d22,d24,d25
525         vadd.i64        d27,d28
526         vbsl            d30,d17,d16             @ Maj(a,b,c)
527         veor            d22,d26                 @ Sigma0(a)
528         vadd.i64        d18,d27
529         vadd.i64        d30,d27
530         @ vadd.i64      d22,d30
531         vshr.u64        d24,d18,#14     @ 2
532 #if 2<16
533         vld1.64         {d2},[r1]!      @ handles unaligned
534 #endif
535         vshr.u64        d25,d18,#18
536 #if 2>0
537          vadd.i64       d22,d30                 @ h+=Maj from the past
538 #endif
539         vshr.u64        d26,d18,#41
540         vld1.64         {d28},[r3,:64]! @ K[i++]
541         vsli.64         d24,d18,#50
542         vsli.64         d25,d18,#46
543         vmov            d29,d18
544         vsli.64         d26,d18,#23
545 #if 2<16 && defined(__ARMEL__)
546         vrev64.8        d2,d2
547 #endif
548         veor            d25,d24
549         vbsl            d29,d19,d20             @ Ch(e,f,g)
550         vshr.u64        d24,d22,#28
551         veor            d26,d25                 @ Sigma1(e)
552         vadd.i64        d27,d29,d21
553         vshr.u64        d25,d22,#34
554         vsli.64         d24,d22,#36
555         vadd.i64        d27,d26
556         vshr.u64        d26,d22,#39
557         vadd.i64        d28,d2
558         vsli.64         d25,d22,#30
559         veor            d30,d22,d23
560         vsli.64         d26,d22,#25
561         veor            d21,d24,d25
562         vadd.i64        d27,d28
563         vbsl            d30,d16,d23             @ Maj(a,b,c)
564         veor            d21,d26                 @ Sigma0(a)
565         vadd.i64        d17,d27
566         vadd.i64        d30,d27
567         @ vadd.i64      d21,d30
568         vshr.u64        d24,d17,#14     @ 3
569 #if 3<16
570         vld1.64         {d3},[r1]!      @ handles unaligned
571 #endif
572         vshr.u64        d25,d17,#18
573 #if 3>0
574          vadd.i64       d21,d30                 @ h+=Maj from the past
575 #endif
576         vshr.u64        d26,d17,#41
577         vld1.64         {d28},[r3,:64]! @ K[i++]
578         vsli.64         d24,d17,#50
579         vsli.64         d25,d17,#46
580         vmov            d29,d17
581         vsli.64         d26,d17,#23
582 #if 3<16 && defined(__ARMEL__)
583         vrev64.8        d3,d3
584 #endif
585         veor            d25,d24
586         vbsl            d29,d18,d19             @ Ch(e,f,g)
587         vshr.u64        d24,d21,#28
588         veor            d26,d25                 @ Sigma1(e)
589         vadd.i64        d27,d29,d20
590         vshr.u64        d25,d21,#34
591         vsli.64         d24,d21,#36
592         vadd.i64        d27,d26
593         vshr.u64        d26,d21,#39
594         vadd.i64        d28,d3
595         vsli.64         d25,d21,#30
596         veor            d30,d21,d22
597         vsli.64         d26,d21,#25
598         veor            d20,d24,d25
599         vadd.i64        d27,d28
600         vbsl            d30,d23,d22             @ Maj(a,b,c)
601         veor            d20,d26                 @ Sigma0(a)
602         vadd.i64        d16,d27
603         vadd.i64        d30,d27
604         @ vadd.i64      d20,d30
605         vshr.u64        d24,d16,#14     @ 4
606 #if 4<16
607         vld1.64         {d4},[r1]!      @ handles unaligned
608 #endif
609         vshr.u64        d25,d16,#18
610 #if 4>0
611          vadd.i64       d20,d30                 @ h+=Maj from the past
612 #endif
613         vshr.u64        d26,d16,#41
614         vld1.64         {d28},[r3,:64]! @ K[i++]
615         vsli.64         d24,d16,#50
616         vsli.64         d25,d16,#46
617         vmov            d29,d16
618         vsli.64         d26,d16,#23
619 #if 4<16 && defined(__ARMEL__)
620         vrev64.8        d4,d4
621 #endif
622         veor            d25,d24
623         vbsl            d29,d17,d18             @ Ch(e,f,g)
624         vshr.u64        d24,d20,#28
625         veor            d26,d25                 @ Sigma1(e)
626         vadd.i64        d27,d29,d19
627         vshr.u64        d25,d20,#34
628         vsli.64         d24,d20,#36
629         vadd.i64        d27,d26
630         vshr.u64        d26,d20,#39
631         vadd.i64        d28,d4
632         vsli.64         d25,d20,#30
633         veor            d30,d20,d21
634         vsli.64         d26,d20,#25
635         veor            d19,d24,d25
636         vadd.i64        d27,d28
637         vbsl            d30,d22,d21             @ Maj(a,b,c)
638         veor            d19,d26                 @ Sigma0(a)
639         vadd.i64        d23,d27
640         vadd.i64        d30,d27
641         @ vadd.i64      d19,d30
642         vshr.u64        d24,d23,#14     @ 5
643 #if 5<16
644         vld1.64         {d5},[r1]!      @ handles unaligned
645 #endif
646         vshr.u64        d25,d23,#18
647 #if 5>0
648          vadd.i64       d19,d30                 @ h+=Maj from the past
649 #endif
650         vshr.u64        d26,d23,#41
651         vld1.64         {d28},[r3,:64]! @ K[i++]
652         vsli.64         d24,d23,#50
653         vsli.64         d25,d23,#46
654         vmov            d29,d23
655         vsli.64         d26,d23,#23
656 #if 5<16 && defined(__ARMEL__)
657         vrev64.8        d5,d5
658 #endif
659         veor            d25,d24
660         vbsl            d29,d16,d17             @ Ch(e,f,g)
661         vshr.u64        d24,d19,#28
662         veor            d26,d25                 @ Sigma1(e)
663         vadd.i64        d27,d29,d18
664         vshr.u64        d25,d19,#34
665         vsli.64         d24,d19,#36
666         vadd.i64        d27,d26
667         vshr.u64        d26,d19,#39
668         vadd.i64        d28,d5
669         vsli.64         d25,d19,#30
670         veor            d30,d19,d20
671         vsli.64         d26,d19,#25
672         veor            d18,d24,d25
673         vadd.i64        d27,d28
674         vbsl            d30,d21,d20             @ Maj(a,b,c)
675         veor            d18,d26                 @ Sigma0(a)
676         vadd.i64        d22,d27
677         vadd.i64        d30,d27
678         @ vadd.i64      d18,d30
679         vshr.u64        d24,d22,#14     @ 6
680 #if 6<16
681         vld1.64         {d6},[r1]!      @ handles unaligned
682 #endif
683         vshr.u64        d25,d22,#18
684 #if 6>0
685          vadd.i64       d18,d30                 @ h+=Maj from the past
686 #endif
687         vshr.u64        d26,d22,#41
688         vld1.64         {d28},[r3,:64]! @ K[i++]
689         vsli.64         d24,d22,#50
690         vsli.64         d25,d22,#46
691         vmov            d29,d22
692         vsli.64         d26,d22,#23
693 #if 6<16 && defined(__ARMEL__)
694         vrev64.8        d6,d6
695 #endif
696         veor            d25,d24
697         vbsl            d29,d23,d16             @ Ch(e,f,g)
698         vshr.u64        d24,d18,#28
699         veor            d26,d25                 @ Sigma1(e)
700         vadd.i64        d27,d29,d17
701         vshr.u64        d25,d18,#34
702         vsli.64         d24,d18,#36
703         vadd.i64        d27,d26
704         vshr.u64        d26,d18,#39
705         vadd.i64        d28,d6
706         vsli.64         d25,d18,#30
707         veor            d30,d18,d19
708         vsli.64         d26,d18,#25
709         veor            d17,d24,d25
710         vadd.i64        d27,d28
711         vbsl            d30,d20,d19             @ Maj(a,b,c)
712         veor            d17,d26                 @ Sigma0(a)
713         vadd.i64        d21,d27
714         vadd.i64        d30,d27
715         @ vadd.i64      d17,d30
716         vshr.u64        d24,d21,#14     @ 7
717 #if 7<16
718         vld1.64         {d7},[r1]!      @ handles unaligned
719 #endif
720         vshr.u64        d25,d21,#18
721 #if 7>0
722          vadd.i64       d17,d30                 @ h+=Maj from the past
723 #endif
724         vshr.u64        d26,d21,#41
725         vld1.64         {d28},[r3,:64]! @ K[i++]
726         vsli.64         d24,d21,#50
727         vsli.64         d25,d21,#46
728         vmov            d29,d21
729         vsli.64         d26,d21,#23
730 #if 7<16 && defined(__ARMEL__)
731         vrev64.8        d7,d7
732 #endif
733         veor            d25,d24
734         vbsl            d29,d22,d23             @ Ch(e,f,g)
735         vshr.u64        d24,d17,#28
736         veor            d26,d25                 @ Sigma1(e)
737         vadd.i64        d27,d29,d16
738         vshr.u64        d25,d17,#34
739         vsli.64         d24,d17,#36
740         vadd.i64        d27,d26
741         vshr.u64        d26,d17,#39
742         vadd.i64        d28,d7
743         vsli.64         d25,d17,#30
744         veor            d30,d17,d18
745         vsli.64         d26,d17,#25
746         veor            d16,d24,d25
747         vadd.i64        d27,d28
748         vbsl            d30,d19,d18             @ Maj(a,b,c)
749         veor            d16,d26                 @ Sigma0(a)
750         vadd.i64        d20,d27
751         vadd.i64        d30,d27
752         @ vadd.i64      d16,d30
753         vshr.u64        d24,d20,#14     @ 8
754 #if 8<16
755         vld1.64         {d8},[r1]!      @ handles unaligned
756 #endif
757         vshr.u64        d25,d20,#18
758 #if 8>0
759          vadd.i64       d16,d30                 @ h+=Maj from the past
760 #endif
761         vshr.u64        d26,d20,#41
762         vld1.64         {d28},[r3,:64]! @ K[i++]
763         vsli.64         d24,d20,#50
764         vsli.64         d25,d20,#46
765         vmov            d29,d20
766         vsli.64         d26,d20,#23
767 #if 8<16 && defined(__ARMEL__)
768         vrev64.8        d8,d8
769 #endif
770         veor            d25,d24
771         vbsl            d29,d21,d22             @ Ch(e,f,g)
772         vshr.u64        d24,d16,#28
773         veor            d26,d25                 @ Sigma1(e)
774         vadd.i64        d27,d29,d23
775         vshr.u64        d25,d16,#34
776         vsli.64         d24,d16,#36
777         vadd.i64        d27,d26
778         vshr.u64        d26,d16,#39
779         vadd.i64        d28,d8
780         vsli.64         d25,d16,#30
781         veor            d30,d16,d17
782         vsli.64         d26,d16,#25
783         veor            d23,d24,d25
784         vadd.i64        d27,d28
785         vbsl            d30,d18,d17             @ Maj(a,b,c)
786         veor            d23,d26                 @ Sigma0(a)
787         vadd.i64        d19,d27
788         vadd.i64        d30,d27
789         @ vadd.i64      d23,d30
790         vshr.u64        d24,d19,#14     @ 9
791 #if 9<16
792         vld1.64         {d9},[r1]!      @ handles unaligned
793 #endif
794         vshr.u64        d25,d19,#18
795 #if 9>0
796          vadd.i64       d23,d30                 @ h+=Maj from the past
797 #endif
798         vshr.u64        d26,d19,#41
799         vld1.64         {d28},[r3,:64]! @ K[i++]
800         vsli.64         d24,d19,#50
801         vsli.64         d25,d19,#46
802         vmov            d29,d19
803         vsli.64         d26,d19,#23
804 #if 9<16 && defined(__ARMEL__)
805         vrev64.8        d9,d9
806 #endif
807         veor            d25,d24
808         vbsl            d29,d20,d21             @ Ch(e,f,g)
809         vshr.u64        d24,d23,#28
810         veor            d26,d25                 @ Sigma1(e)
811         vadd.i64        d27,d29,d22
812         vshr.u64        d25,d23,#34
813         vsli.64         d24,d23,#36
814         vadd.i64        d27,d26
815         vshr.u64        d26,d23,#39
816         vadd.i64        d28,d9
817         vsli.64         d25,d23,#30
818         veor            d30,d23,d16
819         vsli.64         d26,d23,#25
820         veor            d22,d24,d25
821         vadd.i64        d27,d28
822         vbsl            d30,d17,d16             @ Maj(a,b,c)
823         veor            d22,d26                 @ Sigma0(a)
824         vadd.i64        d18,d27
825         vadd.i64        d30,d27
826         @ vadd.i64      d22,d30
827         vshr.u64        d24,d18,#14     @ 10
828 #if 10<16
829         vld1.64         {d10},[r1]!     @ handles unaligned
830 #endif
831         vshr.u64        d25,d18,#18
832 #if 10>0
833          vadd.i64       d22,d30                 @ h+=Maj from the past
834 #endif
835         vshr.u64        d26,d18,#41
836         vld1.64         {d28},[r3,:64]! @ K[i++]
837         vsli.64         d24,d18,#50
838         vsli.64         d25,d18,#46
839         vmov            d29,d18
840         vsli.64         d26,d18,#23
841 #if 10<16 && defined(__ARMEL__)
842         vrev64.8        d10,d10
843 #endif
844         veor            d25,d24
845         vbsl            d29,d19,d20             @ Ch(e,f,g)
846         vshr.u64        d24,d22,#28
847         veor            d26,d25                 @ Sigma1(e)
848         vadd.i64        d27,d29,d21
849         vshr.u64        d25,d22,#34
850         vsli.64         d24,d22,#36
851         vadd.i64        d27,d26
852         vshr.u64        d26,d22,#39
853         vadd.i64        d28,d10
854         vsli.64         d25,d22,#30
855         veor            d30,d22,d23
856         vsli.64         d26,d22,#25
857         veor            d21,d24,d25
858         vadd.i64        d27,d28
859         vbsl            d30,d16,d23             @ Maj(a,b,c)
860         veor            d21,d26                 @ Sigma0(a)
861         vadd.i64        d17,d27
862         vadd.i64        d30,d27
863         @ vadd.i64      d21,d30
864         vshr.u64        d24,d17,#14     @ 11
865 #if 11<16
866         vld1.64         {d11},[r1]!     @ handles unaligned
867 #endif
868         vshr.u64        d25,d17,#18
869 #if 11>0
870          vadd.i64       d21,d30                 @ h+=Maj from the past
871 #endif
872         vshr.u64        d26,d17,#41
873         vld1.64         {d28},[r3,:64]! @ K[i++]
874         vsli.64         d24,d17,#50
875         vsli.64         d25,d17,#46
876         vmov            d29,d17
877         vsli.64         d26,d17,#23
878 #if 11<16 && defined(__ARMEL__)
879         vrev64.8        d11,d11
880 #endif
881         veor            d25,d24
882         vbsl            d29,d18,d19             @ Ch(e,f,g)
883         vshr.u64        d24,d21,#28
884         veor            d26,d25                 @ Sigma1(e)
885         vadd.i64        d27,d29,d20
886         vshr.u64        d25,d21,#34
887         vsli.64         d24,d21,#36
888         vadd.i64        d27,d26
889         vshr.u64        d26,d21,#39
890         vadd.i64        d28,d11
891         vsli.64         d25,d21,#30
892         veor            d30,d21,d22
893         vsli.64         d26,d21,#25
894         veor            d20,d24,d25
895         vadd.i64        d27,d28
896         vbsl            d30,d23,d22             @ Maj(a,b,c)
897         veor            d20,d26                 @ Sigma0(a)
898         vadd.i64        d16,d27
899         vadd.i64        d30,d27
900         @ vadd.i64      d20,d30
901         vshr.u64        d24,d16,#14     @ 12
902 #if 12<16
903         vld1.64         {d12},[r1]!     @ handles unaligned
904 #endif
905         vshr.u64        d25,d16,#18
906 #if 12>0
907          vadd.i64       d20,d30                 @ h+=Maj from the past
908 #endif
909         vshr.u64        d26,d16,#41
910         vld1.64         {d28},[r3,:64]! @ K[i++]
911         vsli.64         d24,d16,#50
912         vsli.64         d25,d16,#46
913         vmov            d29,d16
914         vsli.64         d26,d16,#23
915 #if 12<16 && defined(__ARMEL__)
916         vrev64.8        d12,d12
917 #endif
918         veor            d25,d24
919         vbsl            d29,d17,d18             @ Ch(e,f,g)
920         vshr.u64        d24,d20,#28
921         veor            d26,d25                 @ Sigma1(e)
922         vadd.i64        d27,d29,d19
923         vshr.u64        d25,d20,#34
924         vsli.64         d24,d20,#36
925         vadd.i64        d27,d26
926         vshr.u64        d26,d20,#39
927         vadd.i64        d28,d12
928         vsli.64         d25,d20,#30
929         veor            d30,d20,d21
930         vsli.64         d26,d20,#25
931         veor            d19,d24,d25
932         vadd.i64        d27,d28
933         vbsl            d30,d22,d21             @ Maj(a,b,c)
934         veor            d19,d26                 @ Sigma0(a)
935         vadd.i64        d23,d27
936         vadd.i64        d30,d27
937         @ vadd.i64      d19,d30
938         vshr.u64        d24,d23,#14     @ 13
939 #if 13<16
940         vld1.64         {d13},[r1]!     @ handles unaligned
941 #endif
942         vshr.u64        d25,d23,#18
943 #if 13>0
944          vadd.i64       d19,d30                 @ h+=Maj from the past
945 #endif
946         vshr.u64        d26,d23,#41
947         vld1.64         {d28},[r3,:64]! @ K[i++]
948         vsli.64         d24,d23,#50
949         vsli.64         d25,d23,#46
950         vmov            d29,d23
951         vsli.64         d26,d23,#23
952 #if 13<16 && defined(__ARMEL__)
953         vrev64.8        d13,d13
954 #endif
955         veor            d25,d24
956         vbsl            d29,d16,d17             @ Ch(e,f,g)
957         vshr.u64        d24,d19,#28
958         veor            d26,d25                 @ Sigma1(e)
959         vadd.i64        d27,d29,d18
960         vshr.u64        d25,d19,#34
961         vsli.64         d24,d19,#36
962         vadd.i64        d27,d26
963         vshr.u64        d26,d19,#39
964         vadd.i64        d28,d13
965         vsli.64         d25,d19,#30
966         veor            d30,d19,d20
967         vsli.64         d26,d19,#25
968         veor            d18,d24,d25
969         vadd.i64        d27,d28
970         vbsl            d30,d21,d20             @ Maj(a,b,c)
971         veor            d18,d26                 @ Sigma0(a)
972         vadd.i64        d22,d27
973         vadd.i64        d30,d27
974         @ vadd.i64      d18,d30
975         vshr.u64        d24,d22,#14     @ 14
976 #if 14<16
977         vld1.64         {d14},[r1]!     @ handles unaligned
978 #endif
979         vshr.u64        d25,d22,#18
980 #if 14>0
981          vadd.i64       d18,d30                 @ h+=Maj from the past
982 #endif
983         vshr.u64        d26,d22,#41
984         vld1.64         {d28},[r3,:64]! @ K[i++]
985         vsli.64         d24,d22,#50
986         vsli.64         d25,d22,#46
987         vmov            d29,d22
988         vsli.64         d26,d22,#23
989 #if 14<16 && defined(__ARMEL__)
990         vrev64.8        d14,d14
991 #endif
992         veor            d25,d24
993         vbsl            d29,d23,d16             @ Ch(e,f,g)
994         vshr.u64        d24,d18,#28
995         veor            d26,d25                 @ Sigma1(e)
996         vadd.i64        d27,d29,d17
997         vshr.u64        d25,d18,#34
998         vsli.64         d24,d18,#36
999         vadd.i64        d27,d26
1000         vshr.u64        d26,d18,#39
1001         vadd.i64        d28,d14
1002         vsli.64         d25,d18,#30
1003         veor            d30,d18,d19
1004         vsli.64         d26,d18,#25
1005         veor            d17,d24,d25
1006         vadd.i64        d27,d28
1007         vbsl            d30,d20,d19             @ Maj(a,b,c)
1008         veor            d17,d26                 @ Sigma0(a)
1009         vadd.i64        d21,d27
1010         vadd.i64        d30,d27
1011         @ vadd.i64      d17,d30
1012         vshr.u64        d24,d21,#14     @ 15
1013 #if 15<16
1014         vld1.64         {d15},[r1]!     @ handles unaligned
1015 #endif
1016         vshr.u64        d25,d21,#18
1017 #if 15>0
1018          vadd.i64       d17,d30                 @ h+=Maj from the past
1019 #endif
1020         vshr.u64        d26,d21,#41
1021         vld1.64         {d28},[r3,:64]! @ K[i++]
1022         vsli.64         d24,d21,#50
1023         vsli.64         d25,d21,#46
1024         vmov            d29,d21
1025         vsli.64         d26,d21,#23
1026 #if 15<16 && defined(__ARMEL__)
1027         vrev64.8        d15,d15
1028 #endif
1029         veor            d25,d24
1030         vbsl            d29,d22,d23             @ Ch(e,f,g)
1031         vshr.u64        d24,d17,#28
1032         veor            d26,d25                 @ Sigma1(e)
1033         vadd.i64        d27,d29,d16
1034         vshr.u64        d25,d17,#34
1035         vsli.64         d24,d17,#36
1036         vadd.i64        d27,d26
1037         vshr.u64        d26,d17,#39
1038         vadd.i64        d28,d15
1039         vsli.64         d25,d17,#30
1040         veor            d30,d17,d18
1041         vsli.64         d26,d17,#25
1042         veor            d16,d24,d25
1043         vadd.i64        d27,d28
1044         vbsl            d30,d19,d18             @ Maj(a,b,c)
1045         veor            d16,d26                 @ Sigma0(a)
1046         vadd.i64        d20,d27
1047         vadd.i64        d30,d27
1048         @ vadd.i64      d16,d30
1049         mov             r12,#4
1050 .L16_79_neon:
1051         subs            r12,#1
1052         vshr.u64        q12,q7,#19
1053         vshr.u64        q13,q7,#61
1054          vadd.i64       d16,d30                 @ h+=Maj from the past
1055         vshr.u64        q15,q7,#6
1056         vsli.64         q12,q7,#45
1057         vext.8          q14,q0,q1,#8    @ X[i+1]
1058         vsli.64         q13,q7,#3
1059         veor            q15,q12
1060         vshr.u64        q12,q14,#1
1061         veor            q15,q13                         @ sigma1(X[i+14])
1062         vshr.u64        q13,q14,#8
1063         vadd.i64        q0,q15
1064         vshr.u64        q15,q14,#7
1065         vsli.64         q12,q14,#63
1066         vsli.64         q13,q14,#56
1067         vext.8          q14,q4,q5,#8    @ X[i+9]
1068         veor            q15,q12
1069         vshr.u64        d24,d20,#14             @ from NEON_00_15
1070         vadd.i64        q0,q14
1071         vshr.u64        d25,d20,#18             @ from NEON_00_15
1072         veor            q15,q13                         @ sigma0(X[i+1])
1073         vshr.u64        d26,d20,#41             @ from NEON_00_15
1074         vadd.i64        q0,q15
1075         vld1.64         {d28},[r3,:64]! @ K[i++]
1076         vsli.64         d24,d20,#50
1077         vsli.64         d25,d20,#46
1078         vmov            d29,d20
1079         vsli.64         d26,d20,#23
1080 #if 16<16 && defined(__ARMEL__)
1081         vrev64.8        ,
1082 #endif
1083         veor            d25,d24
1084         vbsl            d29,d21,d22             @ Ch(e,f,g)
1085         vshr.u64        d24,d16,#28
1086         veor            d26,d25                 @ Sigma1(e)
1087         vadd.i64        d27,d29,d23
1088         vshr.u64        d25,d16,#34
1089         vsli.64         d24,d16,#36
1090         vadd.i64        d27,d26
1091         vshr.u64        d26,d16,#39
1092         vadd.i64        d28,d0
1093         vsli.64         d25,d16,#30
1094         veor            d30,d16,d17
1095         vsli.64         d26,d16,#25
1096         veor            d23,d24,d25
1097         vadd.i64        d27,d28
1098         vbsl            d30,d18,d17             @ Maj(a,b,c)
1099         veor            d23,d26                 @ Sigma0(a)
1100         vadd.i64        d19,d27
1101         vadd.i64        d30,d27
1102         @ vadd.i64      d23,d30
1103         vshr.u64        d24,d19,#14     @ 17
1104 #if 17<16
1105         vld1.64         {d1},[r1]!      @ handles unaligned
1106 #endif
1107         vshr.u64        d25,d19,#18
1108 #if 17>0
1109          vadd.i64       d23,d30                 @ h+=Maj from the past
1110 #endif
1111         vshr.u64        d26,d19,#41
1112         vld1.64         {d28},[r3,:64]! @ K[i++]
1113         vsli.64         d24,d19,#50
1114         vsli.64         d25,d19,#46
1115         vmov            d29,d19
1116         vsli.64         d26,d19,#23
1117 #if 17<16 && defined(__ARMEL__)
1118         vrev64.8        ,
1119 #endif
1120         veor            d25,d24
1121         vbsl            d29,d20,d21             @ Ch(e,f,g)
1122         vshr.u64        d24,d23,#28
1123         veor            d26,d25                 @ Sigma1(e)
1124         vadd.i64        d27,d29,d22
1125         vshr.u64        d25,d23,#34
1126         vsli.64         d24,d23,#36
1127         vadd.i64        d27,d26
1128         vshr.u64        d26,d23,#39
1129         vadd.i64        d28,d1
1130         vsli.64         d25,d23,#30
1131         veor            d30,d23,d16
1132         vsli.64         d26,d23,#25
1133         veor            d22,d24,d25
1134         vadd.i64        d27,d28
1135         vbsl            d30,d17,d16             @ Maj(a,b,c)
1136         veor            d22,d26                 @ Sigma0(a)
1137         vadd.i64        d18,d27
1138         vadd.i64        d30,d27
1139         @ vadd.i64      d22,d30
1140         vshr.u64        q12,q0,#19
1141         vshr.u64        q13,q0,#61
1142          vadd.i64       d22,d30                 @ h+=Maj from the past
1143         vshr.u64        q15,q0,#6
1144         vsli.64         q12,q0,#45
1145         vext.8          q14,q1,q2,#8    @ X[i+1]
1146         vsli.64         q13,q0,#3
1147         veor            q15,q12
1148         vshr.u64        q12,q14,#1
1149         veor            q15,q13                         @ sigma1(X[i+14])
1150         vshr.u64        q13,q14,#8
1151         vadd.i64        q1,q15
1152         vshr.u64        q15,q14,#7
1153         vsli.64         q12,q14,#63
1154         vsli.64         q13,q14,#56
1155         vext.8          q14,q5,q6,#8    @ X[i+9]
1156         veor            q15,q12
1157         vshr.u64        d24,d18,#14             @ from NEON_00_15
1158         vadd.i64        q1,q14
1159         vshr.u64        d25,d18,#18             @ from NEON_00_15
1160         veor            q15,q13                         @ sigma0(X[i+1])
1161         vshr.u64        d26,d18,#41             @ from NEON_00_15
1162         vadd.i64        q1,q15
1163         vld1.64         {d28},[r3,:64]! @ K[i++]
1164         vsli.64         d24,d18,#50
1165         vsli.64         d25,d18,#46
1166         vmov            d29,d18
1167         vsli.64         d26,d18,#23
1168 #if 18<16 && defined(__ARMEL__)
1169         vrev64.8        ,
1170 #endif
1171         veor            d25,d24
1172         vbsl            d29,d19,d20             @ Ch(e,f,g)
1173         vshr.u64        d24,d22,#28
1174         veor            d26,d25                 @ Sigma1(e)
1175         vadd.i64        d27,d29,d21
1176         vshr.u64        d25,d22,#34
1177         vsli.64         d24,d22,#36
1178         vadd.i64        d27,d26
1179         vshr.u64        d26,d22,#39
1180         vadd.i64        d28,d2
1181         vsli.64         d25,d22,#30
1182         veor            d30,d22,d23
1183         vsli.64         d26,d22,#25
1184         veor            d21,d24,d25
1185         vadd.i64        d27,d28
1186         vbsl            d30,d16,d23             @ Maj(a,b,c)
1187         veor            d21,d26                 @ Sigma0(a)
1188         vadd.i64        d17,d27
1189         vadd.i64        d30,d27
1190         @ vadd.i64      d21,d30
1191         vshr.u64        d24,d17,#14     @ 19
1192 #if 19<16
1193         vld1.64         {d3},[r1]!      @ handles unaligned
1194 #endif
1195         vshr.u64        d25,d17,#18
1196 #if 19>0
1197          vadd.i64       d21,d30                 @ h+=Maj from the past
1198 #endif
1199         vshr.u64        d26,d17,#41
1200         vld1.64         {d28},[r3,:64]! @ K[i++]
1201         vsli.64         d24,d17,#50
1202         vsli.64         d25,d17,#46
1203         vmov            d29,d17
1204         vsli.64         d26,d17,#23
1205 #if 19<16 && defined(__ARMEL__)
1206         vrev64.8        ,
1207 #endif
1208         veor            d25,d24
1209         vbsl            d29,d18,d19             @ Ch(e,f,g)
1210         vshr.u64        d24,d21,#28
1211         veor            d26,d25                 @ Sigma1(e)
1212         vadd.i64        d27,d29,d20
1213         vshr.u64        d25,d21,#34
1214         vsli.64         d24,d21,#36
1215         vadd.i64        d27,d26
1216         vshr.u64        d26,d21,#39
1217         vadd.i64        d28,d3
1218         vsli.64         d25,d21,#30
1219         veor            d30,d21,d22
1220         vsli.64         d26,d21,#25
1221         veor            d20,d24,d25
1222         vadd.i64        d27,d28
1223         vbsl            d30,d23,d22             @ Maj(a,b,c)
1224         veor            d20,d26                 @ Sigma0(a)
1225         vadd.i64        d16,d27
1226         vadd.i64        d30,d27
1227         @ vadd.i64      d20,d30
1228         vshr.u64        q12,q1,#19
1229         vshr.u64        q13,q1,#61
1230          vadd.i64       d20,d30                 @ h+=Maj from the past
1231         vshr.u64        q15,q1,#6
1232         vsli.64         q12,q1,#45
1233         vext.8          q14,q2,q3,#8    @ X[i+1]
1234         vsli.64         q13,q1,#3
1235         veor            q15,q12
1236         vshr.u64        q12,q14,#1
1237         veor            q15,q13                         @ sigma1(X[i+14])
1238         vshr.u64        q13,q14,#8
1239         vadd.i64        q2,q15
1240         vshr.u64        q15,q14,#7
1241         vsli.64         q12,q14,#63
1242         vsli.64         q13,q14,#56
1243         vext.8          q14,q6,q7,#8    @ X[i+9]
1244         veor            q15,q12
1245         vshr.u64        d24,d16,#14             @ from NEON_00_15
1246         vadd.i64        q2,q14
1247         vshr.u64        d25,d16,#18             @ from NEON_00_15
1248         veor            q15,q13                         @ sigma0(X[i+1])
1249         vshr.u64        d26,d16,#41             @ from NEON_00_15
1250         vadd.i64        q2,q15
1251         vld1.64         {d28},[r3,:64]! @ K[i++]
1252         vsli.64         d24,d16,#50
1253         vsli.64         d25,d16,#46
1254         vmov            d29,d16
1255         vsli.64         d26,d16,#23
1256 #if 20<16 && defined(__ARMEL__)
1257         vrev64.8        ,
1258 #endif
1259         veor            d25,d24
1260         vbsl            d29,d17,d18             @ Ch(e,f,g)
1261         vshr.u64        d24,d20,#28
1262         veor            d26,d25                 @ Sigma1(e)
1263         vadd.i64        d27,d29,d19
1264         vshr.u64        d25,d20,#34
1265         vsli.64         d24,d20,#36
1266         vadd.i64        d27,d26
1267         vshr.u64        d26,d20,#39
1268         vadd.i64        d28,d4
1269         vsli.64         d25,d20,#30
1270         veor            d30,d20,d21
1271         vsli.64         d26,d20,#25
1272         veor            d19,d24,d25
1273         vadd.i64        d27,d28
1274         vbsl            d30,d22,d21             @ Maj(a,b,c)
1275         veor            d19,d26                 @ Sigma0(a)
1276         vadd.i64        d23,d27
1277         vadd.i64        d30,d27
1278         @ vadd.i64      d19,d30
1279         vshr.u64        d24,d23,#14     @ 21
1280 #if 21<16
1281         vld1.64         {d5},[r1]!      @ handles unaligned
1282 #endif
1283         vshr.u64        d25,d23,#18
1284 #if 21>0
1285          vadd.i64       d19,d30                 @ h+=Maj from the past
1286 #endif
1287         vshr.u64        d26,d23,#41
1288         vld1.64         {d28},[r3,:64]! @ K[i++]
1289         vsli.64         d24,d23,#50
1290         vsli.64         d25,d23,#46
1291         vmov            d29,d23
1292         vsli.64         d26,d23,#23
1293 #if 21<16 && defined(__ARMEL__)
1294         vrev64.8        ,
1295 #endif
1296         veor            d25,d24
1297         vbsl            d29,d16,d17             @ Ch(e,f,g)
1298         vshr.u64        d24,d19,#28
1299         veor            d26,d25                 @ Sigma1(e)
1300         vadd.i64        d27,d29,d18
1301         vshr.u64        d25,d19,#34
1302         vsli.64         d24,d19,#36
1303         vadd.i64        d27,d26
1304         vshr.u64        d26,d19,#39
1305         vadd.i64        d28,d5
1306         vsli.64         d25,d19,#30
1307         veor            d30,d19,d20
1308         vsli.64         d26,d19,#25
1309         veor            d18,d24,d25
1310         vadd.i64        d27,d28
1311         vbsl            d30,d21,d20             @ Maj(a,b,c)
1312         veor            d18,d26                 @ Sigma0(a)
1313         vadd.i64        d22,d27
1314         vadd.i64        d30,d27
1315         @ vadd.i64      d18,d30
1316         vshr.u64        q12,q2,#19
1317         vshr.u64        q13,q2,#61
1318          vadd.i64       d18,d30                 @ h+=Maj from the past
1319         vshr.u64        q15,q2,#6
1320         vsli.64         q12,q2,#45
1321         vext.8          q14,q3,q4,#8    @ X[i+1]
1322         vsli.64         q13,q2,#3
1323         veor            q15,q12
1324         vshr.u64        q12,q14,#1
1325         veor            q15,q13                         @ sigma1(X[i+14])
1326         vshr.u64        q13,q14,#8
1327         vadd.i64        q3,q15
1328         vshr.u64        q15,q14,#7
1329         vsli.64         q12,q14,#63
1330         vsli.64         q13,q14,#56
1331         vext.8          q14,q7,q0,#8    @ X[i+9]
1332         veor            q15,q12
1333         vshr.u64        d24,d22,#14             @ from NEON_00_15
1334         vadd.i64        q3,q14
1335         vshr.u64        d25,d22,#18             @ from NEON_00_15
1336         veor            q15,q13                         @ sigma0(X[i+1])
1337         vshr.u64        d26,d22,#41             @ from NEON_00_15
1338         vadd.i64        q3,q15
1339         vld1.64         {d28},[r3,:64]! @ K[i++]
1340         vsli.64         d24,d22,#50
1341         vsli.64         d25,d22,#46
1342         vmov            d29,d22
1343         vsli.64         d26,d22,#23
1344 #if 22<16 && defined(__ARMEL__)
1345         vrev64.8        ,
1346 #endif
1347         veor            d25,d24
1348         vbsl            d29,d23,d16             @ Ch(e,f,g)
1349         vshr.u64        d24,d18,#28
1350         veor            d26,d25                 @ Sigma1(e)
1351         vadd.i64        d27,d29,d17
1352         vshr.u64        d25,d18,#34
1353         vsli.64         d24,d18,#36
1354         vadd.i64        d27,d26
1355         vshr.u64        d26,d18,#39
1356         vadd.i64        d28,d6
1357         vsli.64         d25,d18,#30
1358         veor            d30,d18,d19
1359         vsli.64         d26,d18,#25
1360         veor            d17,d24,d25
1361         vadd.i64        d27,d28
1362         vbsl            d30,d20,d19             @ Maj(a,b,c)
1363         veor            d17,d26                 @ Sigma0(a)
1364         vadd.i64        d21,d27
1365         vadd.i64        d30,d27
1366         @ vadd.i64      d17,d30
1367         vshr.u64        d24,d21,#14     @ 23
1368 #if 23<16
1369         vld1.64         {d7},[r1]!      @ handles unaligned
1370 #endif
1371         vshr.u64        d25,d21,#18
1372 #if 23>0
1373          vadd.i64       d17,d30                 @ h+=Maj from the past
1374 #endif
1375         vshr.u64        d26,d21,#41
1376         vld1.64         {d28},[r3,:64]! @ K[i++]
1377         vsli.64         d24,d21,#50
1378         vsli.64         d25,d21,#46
1379         vmov            d29,d21
1380         vsli.64         d26,d21,#23
1381 #if 23<16 && defined(__ARMEL__)
1382         vrev64.8        ,
1383 #endif
1384         veor            d25,d24
1385         vbsl            d29,d22,d23             @ Ch(e,f,g)
1386         vshr.u64        d24,d17,#28
1387         veor            d26,d25                 @ Sigma1(e)
1388         vadd.i64        d27,d29,d16
1389         vshr.u64        d25,d17,#34
1390         vsli.64         d24,d17,#36
1391         vadd.i64        d27,d26
1392         vshr.u64        d26,d17,#39
1393         vadd.i64        d28,d7
1394         vsli.64         d25,d17,#30
1395         veor            d30,d17,d18
1396         vsli.64         d26,d17,#25
1397         veor            d16,d24,d25
1398         vadd.i64        d27,d28
1399         vbsl            d30,d19,d18             @ Maj(a,b,c)
1400         veor            d16,d26                 @ Sigma0(a)
1401         vadd.i64        d20,d27
1402         vadd.i64        d30,d27
1403         @ vadd.i64      d16,d30
1404         vshr.u64        q12,q3,#19
1405         vshr.u64        q13,q3,#61
1406          vadd.i64       d16,d30                 @ h+=Maj from the past
1407         vshr.u64        q15,q3,#6
1408         vsli.64         q12,q3,#45
1409         vext.8          q14,q4,q5,#8    @ X[i+1]
1410         vsli.64         q13,q3,#3
1411         veor            q15,q12
1412         vshr.u64        q12,q14,#1
1413         veor            q15,q13                         @ sigma1(X[i+14])
1414         vshr.u64        q13,q14,#8
1415         vadd.i64        q4,q15
1416         vshr.u64        q15,q14,#7
1417         vsli.64         q12,q14,#63
1418         vsli.64         q13,q14,#56
1419         vext.8          q14,q0,q1,#8    @ X[i+9]
1420         veor            q15,q12
1421         vshr.u64        d24,d20,#14             @ from NEON_00_15
1422         vadd.i64        q4,q14
1423         vshr.u64        d25,d20,#18             @ from NEON_00_15
1424         veor            q15,q13                         @ sigma0(X[i+1])
1425         vshr.u64        d26,d20,#41             @ from NEON_00_15
1426         vadd.i64        q4,q15
1427         vld1.64         {d28},[r3,:64]! @ K[i++]
1428         vsli.64         d24,d20,#50
1429         vsli.64         d25,d20,#46
1430         vmov            d29,d20
1431         vsli.64         d26,d20,#23
1432 #if 24<16 && defined(__ARMEL__)
1433         vrev64.8        ,
1434 #endif
1435         veor            d25,d24
1436         vbsl            d29,d21,d22             @ Ch(e,f,g)
1437         vshr.u64        d24,d16,#28
1438         veor            d26,d25                 @ Sigma1(e)
1439         vadd.i64        d27,d29,d23
1440         vshr.u64        d25,d16,#34
1441         vsli.64         d24,d16,#36
1442         vadd.i64        d27,d26
1443         vshr.u64        d26,d16,#39
1444         vadd.i64        d28,d8
1445         vsli.64         d25,d16,#30
1446         veor            d30,d16,d17
1447         vsli.64         d26,d16,#25
1448         veor            d23,d24,d25
1449         vadd.i64        d27,d28
1450         vbsl            d30,d18,d17             @ Maj(a,b,c)
1451         veor            d23,d26                 @ Sigma0(a)
1452         vadd.i64        d19,d27
1453         vadd.i64        d30,d27
1454         @ vadd.i64      d23,d30
1455         vshr.u64        d24,d19,#14     @ 25
1456 #if 25<16
1457         vld1.64         {d9},[r1]!      @ handles unaligned
1458 #endif
1459         vshr.u64        d25,d19,#18
1460 #if 25>0
1461          vadd.i64       d23,d30                 @ h+=Maj from the past
1462 #endif
1463         vshr.u64        d26,d19,#41
1464         vld1.64         {d28},[r3,:64]! @ K[i++]
1465         vsli.64         d24,d19,#50
1466         vsli.64         d25,d19,#46
1467         vmov            d29,d19
1468         vsli.64         d26,d19,#23
1469 #if 25<16 && defined(__ARMEL__)
1470         vrev64.8        ,
1471 #endif
1472         veor            d25,d24
1473         vbsl            d29,d20,d21             @ Ch(e,f,g)
1474         vshr.u64        d24,d23,#28
1475         veor            d26,d25                 @ Sigma1(e)
1476         vadd.i64        d27,d29,d22
1477         vshr.u64        d25,d23,#34
1478         vsli.64         d24,d23,#36
1479         vadd.i64        d27,d26
1480         vshr.u64        d26,d23,#39
1481         vadd.i64        d28,d9
1482         vsli.64         d25,d23,#30
1483         veor            d30,d23,d16
1484         vsli.64         d26,d23,#25
1485         veor            d22,d24,d25
1486         vadd.i64        d27,d28
1487         vbsl            d30,d17,d16             @ Maj(a,b,c)
1488         veor            d22,d26                 @ Sigma0(a)
1489         vadd.i64        d18,d27
1490         vadd.i64        d30,d27
1491         @ vadd.i64      d22,d30
1492         vshr.u64        q12,q4,#19
1493         vshr.u64        q13,q4,#61
1494          vadd.i64       d22,d30                 @ h+=Maj from the past
1495         vshr.u64        q15,q4,#6
1496         vsli.64         q12,q4,#45
1497         vext.8          q14,q5,q6,#8    @ X[i+1]
1498         vsli.64         q13,q4,#3
1499         veor            q15,q12
1500         vshr.u64        q12,q14,#1
1501         veor            q15,q13                         @ sigma1(X[i+14])
1502         vshr.u64        q13,q14,#8
1503         vadd.i64        q5,q15
1504         vshr.u64        q15,q14,#7
1505         vsli.64         q12,q14,#63
1506         vsli.64         q13,q14,#56
1507         vext.8          q14,q1,q2,#8    @ X[i+9]
1508         veor            q15,q12
1509         vshr.u64        d24,d18,#14             @ from NEON_00_15
1510         vadd.i64        q5,q14
1511         vshr.u64        d25,d18,#18             @ from NEON_00_15
1512         veor            q15,q13                         @ sigma0(X[i+1])
1513         vshr.u64        d26,d18,#41             @ from NEON_00_15
1514         vadd.i64        q5,q15
1515         vld1.64         {d28},[r3,:64]! @ K[i++]
1516         vsli.64         d24,d18,#50
1517         vsli.64         d25,d18,#46
1518         vmov            d29,d18
1519         vsli.64         d26,d18,#23
1520 #if 26<16 && defined(__ARMEL__)
1521         vrev64.8        ,
1522 #endif
1523         veor            d25,d24
1524         vbsl            d29,d19,d20             @ Ch(e,f,g)
1525         vshr.u64        d24,d22,#28
1526         veor            d26,d25                 @ Sigma1(e)
1527         vadd.i64        d27,d29,d21
1528         vshr.u64        d25,d22,#34
1529         vsli.64         d24,d22,#36
1530         vadd.i64        d27,d26
1531         vshr.u64        d26,d22,#39
1532         vadd.i64        d28,d10
1533         vsli.64         d25,d22,#30
1534         veor            d30,d22,d23
1535         vsli.64         d26,d22,#25
1536         veor            d21,d24,d25
1537         vadd.i64        d27,d28
1538         vbsl            d30,d16,d23             @ Maj(a,b,c)
1539         veor            d21,d26                 @ Sigma0(a)
1540         vadd.i64        d17,d27
1541         vadd.i64        d30,d27
1542         @ vadd.i64      d21,d30
1543         vshr.u64        d24,d17,#14     @ 27
1544 #if 27<16
1545         vld1.64         {d11},[r1]!     @ handles unaligned
1546 #endif
1547         vshr.u64        d25,d17,#18
1548 #if 27>0
1549          vadd.i64       d21,d30                 @ h+=Maj from the past
1550 #endif
1551         vshr.u64        d26,d17,#41
1552         vld1.64         {d28},[r3,:64]! @ K[i++]
1553         vsli.64         d24,d17,#50
1554         vsli.64         d25,d17,#46
1555         vmov            d29,d17
1556         vsli.64         d26,d17,#23
1557 #if 27<16 && defined(__ARMEL__)
1558         vrev64.8        ,
1559 #endif
1560         veor            d25,d24
1561         vbsl            d29,d18,d19             @ Ch(e,f,g)
1562         vshr.u64        d24,d21,#28
1563         veor            d26,d25                 @ Sigma1(e)
1564         vadd.i64        d27,d29,d20
1565         vshr.u64        d25,d21,#34
1566         vsli.64         d24,d21,#36
1567         vadd.i64        d27,d26
1568         vshr.u64        d26,d21,#39
1569         vadd.i64        d28,d11
1570         vsli.64         d25,d21,#30
1571         veor            d30,d21,d22
1572         vsli.64         d26,d21,#25
1573         veor            d20,d24,d25
1574         vadd.i64        d27,d28
1575         vbsl            d30,d23,d22             @ Maj(a,b,c)
1576         veor            d20,d26                 @ Sigma0(a)
1577         vadd.i64        d16,d27
1578         vadd.i64        d30,d27
1579         @ vadd.i64      d20,d30
1580         vshr.u64        q12,q5,#19
1581         vshr.u64        q13,q5,#61
1582          vadd.i64       d20,d30                 @ h+=Maj from the past
1583         vshr.u64        q15,q5,#6
1584         vsli.64         q12,q5,#45
1585         vext.8          q14,q6,q7,#8    @ X[i+1]
1586         vsli.64         q13,q5,#3
1587         veor            q15,q12
1588         vshr.u64        q12,q14,#1
1589         veor            q15,q13                         @ sigma1(X[i+14])
1590         vshr.u64        q13,q14,#8
1591         vadd.i64        q6,q15
1592         vshr.u64        q15,q14,#7
1593         vsli.64         q12,q14,#63
1594         vsli.64         q13,q14,#56
1595         vext.8          q14,q2,q3,#8    @ X[i+9]
1596         veor            q15,q12
1597         vshr.u64        d24,d16,#14             @ from NEON_00_15
1598         vadd.i64        q6,q14
1599         vshr.u64        d25,d16,#18             @ from NEON_00_15
1600         veor            q15,q13                         @ sigma0(X[i+1])
1601         vshr.u64        d26,d16,#41             @ from NEON_00_15
1602         vadd.i64        q6,q15
1603         vld1.64         {d28},[r3,:64]! @ K[i++]
1604         vsli.64         d24,d16,#50
1605         vsli.64         d25,d16,#46
1606         vmov            d29,d16
1607         vsli.64         d26,d16,#23
1608 #if 28<16 && defined(__ARMEL__)
1609         vrev64.8        ,
1610 #endif
1611         veor            d25,d24
1612         vbsl            d29,d17,d18             @ Ch(e,f,g)
1613         vshr.u64        d24,d20,#28
1614         veor            d26,d25                 @ Sigma1(e)
1615         vadd.i64        d27,d29,d19
1616         vshr.u64        d25,d20,#34
1617         vsli.64         d24,d20,#36
1618         vadd.i64        d27,d26
1619         vshr.u64        d26,d20,#39
1620         vadd.i64        d28,d12
1621         vsli.64         d25,d20,#30
1622         veor            d30,d20,d21
1623         vsli.64         d26,d20,#25
1624         veor            d19,d24,d25
1625         vadd.i64        d27,d28
1626         vbsl            d30,d22,d21             @ Maj(a,b,c)
1627         veor            d19,d26                 @ Sigma0(a)
1628         vadd.i64        d23,d27
1629         vadd.i64        d30,d27
1630         @ vadd.i64      d19,d30
1631         vshr.u64        d24,d23,#14     @ 29
1632 #if 29<16
1633         vld1.64         {d13},[r1]!     @ handles unaligned
1634 #endif
1635         vshr.u64        d25,d23,#18
1636 #if 29>0
1637          vadd.i64       d19,d30                 @ h+=Maj from the past
1638 #endif
1639         vshr.u64        d26,d23,#41
1640         vld1.64         {d28},[r3,:64]! @ K[i++]
1641         vsli.64         d24,d23,#50
1642         vsli.64         d25,d23,#46
1643         vmov            d29,d23
1644         vsli.64         d26,d23,#23
1645 #if 29<16 && defined(__ARMEL__)
1646         vrev64.8        ,
1647 #endif
1648         veor            d25,d24
1649         vbsl            d29,d16,d17             @ Ch(e,f,g)
1650         vshr.u64        d24,d19,#28
1651         veor            d26,d25                 @ Sigma1(e)
1652         vadd.i64        d27,d29,d18
1653         vshr.u64        d25,d19,#34
1654         vsli.64         d24,d19,#36
1655         vadd.i64        d27,d26
1656         vshr.u64        d26,d19,#39
1657         vadd.i64        d28,d13
1658         vsli.64         d25,d19,#30
1659         veor            d30,d19,d20
1660         vsli.64         d26,d19,#25
1661         veor            d18,d24,d25
1662         vadd.i64        d27,d28
1663         vbsl            d30,d21,d20             @ Maj(a,b,c)
1664         veor            d18,d26                 @ Sigma0(a)
1665         vadd.i64        d22,d27
1666         vadd.i64        d30,d27
1667         @ vadd.i64      d18,d30
1668         vshr.u64        q12,q6,#19
1669         vshr.u64        q13,q6,#61
1670          vadd.i64       d18,d30                 @ h+=Maj from the past
1671         vshr.u64        q15,q6,#6
1672         vsli.64         q12,q6,#45
1673         vext.8          q14,q7,q0,#8    @ X[i+1]
1674         vsli.64         q13,q6,#3
1675         veor            q15,q12
1676         vshr.u64        q12,q14,#1
1677         veor            q15,q13                         @ sigma1(X[i+14])
1678         vshr.u64        q13,q14,#8
1679         vadd.i64        q7,q15
1680         vshr.u64        q15,q14,#7
1681         vsli.64         q12,q14,#63
1682         vsli.64         q13,q14,#56
1683         vext.8          q14,q3,q4,#8    @ X[i+9]
1684         veor            q15,q12
1685         vshr.u64        d24,d22,#14             @ from NEON_00_15
1686         vadd.i64        q7,q14
1687         vshr.u64        d25,d22,#18             @ from NEON_00_15
1688         veor            q15,q13                         @ sigma0(X[i+1])
1689         vshr.u64        d26,d22,#41             @ from NEON_00_15
1690         vadd.i64        q7,q15
1691         vld1.64         {d28},[r3,:64]! @ K[i++]
1692         vsli.64         d24,d22,#50
1693         vsli.64         d25,d22,#46
1694         vmov            d29,d22
1695         vsli.64         d26,d22,#23
1696 #if 30<16 && defined(__ARMEL__)
1697         vrev64.8        ,
1698 #endif
1699         veor            d25,d24
1700         vbsl            d29,d23,d16             @ Ch(e,f,g)
1701         vshr.u64        d24,d18,#28
1702         veor            d26,d25                 @ Sigma1(e)
1703         vadd.i64        d27,d29,d17
1704         vshr.u64        d25,d18,#34
1705         vsli.64         d24,d18,#36
1706         vadd.i64        d27,d26
1707         vshr.u64        d26,d18,#39
1708         vadd.i64        d28,d14
1709         vsli.64         d25,d18,#30
1710         veor            d30,d18,d19
1711         vsli.64         d26,d18,#25
1712         veor            d17,d24,d25
1713         vadd.i64        d27,d28
1714         vbsl            d30,d20,d19             @ Maj(a,b,c)
1715         veor            d17,d26                 @ Sigma0(a)
1716         vadd.i64        d21,d27
1717         vadd.i64        d30,d27
1718         @ vadd.i64      d17,d30
1719         vshr.u64        d24,d21,#14     @ 31
1720 #if 31<16
1721         vld1.64         {d15},[r1]!     @ handles unaligned
1722 #endif
1723         vshr.u64        d25,d21,#18
1724 #if 31>0
1725          vadd.i64       d17,d30                 @ h+=Maj from the past
1726 #endif
1727         vshr.u64        d26,d21,#41
1728         vld1.64         {d28},[r3,:64]! @ K[i++]
1729         vsli.64         d24,d21,#50
1730         vsli.64         d25,d21,#46
1731         vmov            d29,d21
1732         vsli.64         d26,d21,#23
1733 #if 31<16 && defined(__ARMEL__)
1734         vrev64.8        ,
1735 #endif
1736         veor            d25,d24
1737         vbsl            d29,d22,d23             @ Ch(e,f,g)
1738         vshr.u64        d24,d17,#28
1739         veor            d26,d25                 @ Sigma1(e)
1740         vadd.i64        d27,d29,d16
1741         vshr.u64        d25,d17,#34
1742         vsli.64         d24,d17,#36
1743         vadd.i64        d27,d26
1744         vshr.u64        d26,d17,#39
1745         vadd.i64        d28,d15
1746         vsli.64         d25,d17,#30
1747         veor            d30,d17,d18
1748         vsli.64         d26,d17,#25
1749         veor            d16,d24,d25
1750         vadd.i64        d27,d28
1751         vbsl            d30,d19,d18             @ Maj(a,b,c)
1752         veor            d16,d26                 @ Sigma0(a)
1753         vadd.i64        d20,d27
1754         vadd.i64        d30,d27
1755         @ vadd.i64      d16,d30
1756         bne             .L16_79_neon
1757
1758          vadd.i64       d16,d30         @ h+=Maj from the past
1759         vldmia          r0,{d24-d31}    @ load context to temp
1760         vadd.i64        q8,q12          @ vectorized accumulate
1761         vadd.i64        q9,q13
1762         vadd.i64        q10,q14
1763         vadd.i64        q11,q15
1764         vstmia          r0,{d16-d23}    @ save context
1765         teq             r1,r2
1766         sub             r3,#640 @ rewind K512
1767         bne             .Loop_neon
1768
1769         vldmia  sp!,{d8-d15}            @ epilogue
1770         bx      lr                              @ .word 0xe12fff1e
1771 #endif
1772 .size   sha512_block_data_order,.-sha512_block_data_order
1773 .asciz  "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro@openssl.org>"
1774 .align  2
1775 #if __ARM_MAX_ARCH__>=7
1776 .comm   OPENSSL_armcap_P,4,4
1777 #endif