2 /* Do not modify. This file is auto-generated from sha512-armv8.pl. */
3 // Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved.
5 // Licensed under the OpenSSL license (the "License"). You may not use
6 // this file except in compliance with the License. You can obtain a copy
7 // in the file LICENSE in the source distribution or at
8 // https://www.openssl.org/source/license.html
10 // ====================================================================
11 // Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 // project. The module is, however, dual licensed under OpenSSL and
13 // CRYPTOGAMS licenses depending on where you obtain it. For further
14 // details see http://www.openssl.org/~appro/cryptogams/.
16 // Permission to use under GPLv2 terms is granted.
17 // ====================================================================
19 // SHA256/512 for ARMv8.
21 // Performance in cycles per processed byte and improvement coefficient
22 // over code generated with "default" compiler:
24 // SHA256-hw SHA256(*) SHA512
25 // Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**))
26 // Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***))
27 // Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***))
28 // Denver 2.01 10.5 (+26%) 6.70 (+8%)
29 // X-Gene 20.0 (+100%) 12.8 (+300%(***))
30 // Mongoose 2.36 13.0 (+50%) 8.36 (+33%)
31 // Kryo 1.92 17.4 (+30%) 11.2 (+8%)
33 // (*) Software SHA256 results are of lesser relevance, presented
34 // mostly for informational purposes.
35 // (**) The result is a trade-off: it's possible to improve it by
36 // 10% (or by 1 cycle per round), but at the cost of 20% loss
37 // on Cortex-A53 (or by 4 cycles per round).
38 // (***) Super-impressive coefficients over gcc-generated code are
39 // indication of some compiler "pathology", most notably code
40 // generated with -mgeneral-regs-only is significantly faster
41 // and the gap is only 40-90%.
45 // Originally it was reckoned that it makes no sense to implement NEON
46 // version of SHA256 for 64-bit processors. This is because performance
47 // improvement on most wide-spread Cortex-A5x processors was observed
48 // to be marginal, same on Cortex-A53 and ~10% on A57. But then it was
49 // observed that 32-bit NEON SHA256 performs significantly better than
50 // 64-bit scalar version on *some* of the more recent processors. As
51 // result 64-bit NEON version of SHA256 was added to provide best
52 // all-round performance. For example it executes ~30% faster on X-Gene
53 // and Mongoose. [For reference, NEON version of SHA512 is bound to
54 // deliver much less improvement, likely *negative* on Cortex-A5x.
55 // Which is why NEON support is limited to SHA256.]
58 # include "arm_arch.h"
64 .hidden OPENSSL_armcap_P
65 .globl sha512_block_data_order
66 .type sha512_block_data_order,%function
68 sha512_block_data_order:
71 ldrsw x16,.LOPENSSL_armcap_P
73 ldr x16,.LOPENSSL_armcap_P
75 adr x17,.LOPENSSL_armcap_P
81 .inst 0xd503233f // paciasp
82 stp x29,x30,[sp,#-128]!
92 ldp x20,x21,[x0] // load context
95 add x2,x1,x2,lsl#7 // end of input
102 ldr x19,[x30],#8 // *K++
103 eor x28,x21,x22 // magic seed
105 #ifndef __AARCH64EB__
109 add x27,x27,x19 // h+=K[i]
110 eor x6,x24,x24,ror#23
113 add x27,x27,x3 // h+=X[i]
114 orr x17,x17,x19 // Ch(e,f,g)
115 eor x19,x20,x21 // a^b, b^c in next round
116 eor x16,x16,x6,ror#18 // Sigma1(e)
118 add x27,x27,x17 // h+=Ch(e,f,g)
119 eor x17,x20,x20,ror#5
120 add x27,x27,x16 // h+=Sigma1(e)
121 and x28,x28,x19 // (b^c)&=(a^b)
122 add x23,x23,x27 // d+=h
123 eor x28,x28,x21 // Maj(a,b,c)
124 eor x17,x6,x17,ror#34 // Sigma0(a)
125 add x27,x27,x28 // h+=Maj(a,b,c)
126 ldr x28,[x30],#8 // *K++, x19 in next round
127 //add x27,x27,x17 // h+=Sigma0(a)
128 #ifndef __AARCH64EB__
132 add x27,x27,x17 // h+=Sigma0(a)
134 add x26,x26,x28 // h+=K[i]
135 eor x7,x23,x23,ror#23
138 add x26,x26,x4 // h+=X[i]
139 orr x17,x17,x28 // Ch(e,f,g)
140 eor x28,x27,x20 // a^b, b^c in next round
141 eor x16,x16,x7,ror#18 // Sigma1(e)
143 add x26,x26,x17 // h+=Ch(e,f,g)
144 eor x17,x27,x27,ror#5
145 add x26,x26,x16 // h+=Sigma1(e)
146 and x19,x19,x28 // (b^c)&=(a^b)
147 add x22,x22,x26 // d+=h
148 eor x19,x19,x20 // Maj(a,b,c)
149 eor x17,x7,x17,ror#34 // Sigma0(a)
150 add x26,x26,x19 // h+=Maj(a,b,c)
151 ldr x19,[x30],#8 // *K++, x28 in next round
152 //add x26,x26,x17 // h+=Sigma0(a)
153 #ifndef __AARCH64EB__
156 add x26,x26,x17 // h+=Sigma0(a)
158 add x25,x25,x19 // h+=K[i]
159 eor x8,x22,x22,ror#23
162 add x25,x25,x5 // h+=X[i]
163 orr x17,x17,x19 // Ch(e,f,g)
164 eor x19,x26,x27 // a^b, b^c in next round
165 eor x16,x16,x8,ror#18 // Sigma1(e)
167 add x25,x25,x17 // h+=Ch(e,f,g)
168 eor x17,x26,x26,ror#5
169 add x25,x25,x16 // h+=Sigma1(e)
170 and x28,x28,x19 // (b^c)&=(a^b)
171 add x21,x21,x25 // d+=h
172 eor x28,x28,x27 // Maj(a,b,c)
173 eor x17,x8,x17,ror#34 // Sigma0(a)
174 add x25,x25,x28 // h+=Maj(a,b,c)
175 ldr x28,[x30],#8 // *K++, x19 in next round
176 //add x25,x25,x17 // h+=Sigma0(a)
177 #ifndef __AARCH64EB__
181 add x25,x25,x17 // h+=Sigma0(a)
183 add x24,x24,x28 // h+=K[i]
184 eor x9,x21,x21,ror#23
187 add x24,x24,x6 // h+=X[i]
188 orr x17,x17,x28 // Ch(e,f,g)
189 eor x28,x25,x26 // a^b, b^c in next round
190 eor x16,x16,x9,ror#18 // Sigma1(e)
192 add x24,x24,x17 // h+=Ch(e,f,g)
193 eor x17,x25,x25,ror#5
194 add x24,x24,x16 // h+=Sigma1(e)
195 and x19,x19,x28 // (b^c)&=(a^b)
196 add x20,x20,x24 // d+=h
197 eor x19,x19,x26 // Maj(a,b,c)
198 eor x17,x9,x17,ror#34 // Sigma0(a)
199 add x24,x24,x19 // h+=Maj(a,b,c)
200 ldr x19,[x30],#8 // *K++, x28 in next round
201 //add x24,x24,x17 // h+=Sigma0(a)
202 #ifndef __AARCH64EB__
205 add x24,x24,x17 // h+=Sigma0(a)
207 add x23,x23,x19 // h+=K[i]
208 eor x10,x20,x20,ror#23
211 add x23,x23,x7 // h+=X[i]
212 orr x17,x17,x19 // Ch(e,f,g)
213 eor x19,x24,x25 // a^b, b^c in next round
214 eor x16,x16,x10,ror#18 // Sigma1(e)
216 add x23,x23,x17 // h+=Ch(e,f,g)
217 eor x17,x24,x24,ror#5
218 add x23,x23,x16 // h+=Sigma1(e)
219 and x28,x28,x19 // (b^c)&=(a^b)
220 add x27,x27,x23 // d+=h
221 eor x28,x28,x25 // Maj(a,b,c)
222 eor x17,x10,x17,ror#34 // Sigma0(a)
223 add x23,x23,x28 // h+=Maj(a,b,c)
224 ldr x28,[x30],#8 // *K++, x19 in next round
225 //add x23,x23,x17 // h+=Sigma0(a)
226 #ifndef __AARCH64EB__
230 add x23,x23,x17 // h+=Sigma0(a)
232 add x22,x22,x28 // h+=K[i]
233 eor x11,x27,x27,ror#23
236 add x22,x22,x8 // h+=X[i]
237 orr x17,x17,x28 // Ch(e,f,g)
238 eor x28,x23,x24 // a^b, b^c in next round
239 eor x16,x16,x11,ror#18 // Sigma1(e)
241 add x22,x22,x17 // h+=Ch(e,f,g)
242 eor x17,x23,x23,ror#5
243 add x22,x22,x16 // h+=Sigma1(e)
244 and x19,x19,x28 // (b^c)&=(a^b)
245 add x26,x26,x22 // d+=h
246 eor x19,x19,x24 // Maj(a,b,c)
247 eor x17,x11,x17,ror#34 // Sigma0(a)
248 add x22,x22,x19 // h+=Maj(a,b,c)
249 ldr x19,[x30],#8 // *K++, x28 in next round
250 //add x22,x22,x17 // h+=Sigma0(a)
251 #ifndef __AARCH64EB__
254 add x22,x22,x17 // h+=Sigma0(a)
256 add x21,x21,x19 // h+=K[i]
257 eor x12,x26,x26,ror#23
260 add x21,x21,x9 // h+=X[i]
261 orr x17,x17,x19 // Ch(e,f,g)
262 eor x19,x22,x23 // a^b, b^c in next round
263 eor x16,x16,x12,ror#18 // Sigma1(e)
265 add x21,x21,x17 // h+=Ch(e,f,g)
266 eor x17,x22,x22,ror#5
267 add x21,x21,x16 // h+=Sigma1(e)
268 and x28,x28,x19 // (b^c)&=(a^b)
269 add x25,x25,x21 // d+=h
270 eor x28,x28,x23 // Maj(a,b,c)
271 eor x17,x12,x17,ror#34 // Sigma0(a)
272 add x21,x21,x28 // h+=Maj(a,b,c)
273 ldr x28,[x30],#8 // *K++, x19 in next round
274 //add x21,x21,x17 // h+=Sigma0(a)
275 #ifndef __AARCH64EB__
278 ldp x11,x12,[x1],#2*8
279 add x21,x21,x17 // h+=Sigma0(a)
281 add x20,x20,x28 // h+=K[i]
282 eor x13,x25,x25,ror#23
285 add x20,x20,x10 // h+=X[i]
286 orr x17,x17,x28 // Ch(e,f,g)
287 eor x28,x21,x22 // a^b, b^c in next round
288 eor x16,x16,x13,ror#18 // Sigma1(e)
290 add x20,x20,x17 // h+=Ch(e,f,g)
291 eor x17,x21,x21,ror#5
292 add x20,x20,x16 // h+=Sigma1(e)
293 and x19,x19,x28 // (b^c)&=(a^b)
294 add x24,x24,x20 // d+=h
295 eor x19,x19,x22 // Maj(a,b,c)
296 eor x17,x13,x17,ror#34 // Sigma0(a)
297 add x20,x20,x19 // h+=Maj(a,b,c)
298 ldr x19,[x30],#8 // *K++, x28 in next round
299 //add x20,x20,x17 // h+=Sigma0(a)
300 #ifndef __AARCH64EB__
303 add x20,x20,x17 // h+=Sigma0(a)
305 add x27,x27,x19 // h+=K[i]
306 eor x14,x24,x24,ror#23
309 add x27,x27,x11 // h+=X[i]
310 orr x17,x17,x19 // Ch(e,f,g)
311 eor x19,x20,x21 // a^b, b^c in next round
312 eor x16,x16,x14,ror#18 // Sigma1(e)
314 add x27,x27,x17 // h+=Ch(e,f,g)
315 eor x17,x20,x20,ror#5
316 add x27,x27,x16 // h+=Sigma1(e)
317 and x28,x28,x19 // (b^c)&=(a^b)
318 add x23,x23,x27 // d+=h
319 eor x28,x28,x21 // Maj(a,b,c)
320 eor x17,x14,x17,ror#34 // Sigma0(a)
321 add x27,x27,x28 // h+=Maj(a,b,c)
322 ldr x28,[x30],#8 // *K++, x19 in next round
323 //add x27,x27,x17 // h+=Sigma0(a)
324 #ifndef __AARCH64EB__
327 ldp x13,x14,[x1],#2*8
328 add x27,x27,x17 // h+=Sigma0(a)
330 add x26,x26,x28 // h+=K[i]
331 eor x15,x23,x23,ror#23
334 add x26,x26,x12 // h+=X[i]
335 orr x17,x17,x28 // Ch(e,f,g)
336 eor x28,x27,x20 // a^b, b^c in next round
337 eor x16,x16,x15,ror#18 // Sigma1(e)
339 add x26,x26,x17 // h+=Ch(e,f,g)
340 eor x17,x27,x27,ror#5
341 add x26,x26,x16 // h+=Sigma1(e)
342 and x19,x19,x28 // (b^c)&=(a^b)
343 add x22,x22,x26 // d+=h
344 eor x19,x19,x20 // Maj(a,b,c)
345 eor x17,x15,x17,ror#34 // Sigma0(a)
346 add x26,x26,x19 // h+=Maj(a,b,c)
347 ldr x19,[x30],#8 // *K++, x28 in next round
348 //add x26,x26,x17 // h+=Sigma0(a)
349 #ifndef __AARCH64EB__
352 add x26,x26,x17 // h+=Sigma0(a)
354 add x25,x25,x19 // h+=K[i]
355 eor x0,x22,x22,ror#23
358 add x25,x25,x13 // h+=X[i]
359 orr x17,x17,x19 // Ch(e,f,g)
360 eor x19,x26,x27 // a^b, b^c in next round
361 eor x16,x16,x0,ror#18 // Sigma1(e)
363 add x25,x25,x17 // h+=Ch(e,f,g)
364 eor x17,x26,x26,ror#5
365 add x25,x25,x16 // h+=Sigma1(e)
366 and x28,x28,x19 // (b^c)&=(a^b)
367 add x21,x21,x25 // d+=h
368 eor x28,x28,x27 // Maj(a,b,c)
369 eor x17,x0,x17,ror#34 // Sigma0(a)
370 add x25,x25,x28 // h+=Maj(a,b,c)
371 ldr x28,[x30],#8 // *K++, x19 in next round
372 //add x25,x25,x17 // h+=Sigma0(a)
373 #ifndef __AARCH64EB__
377 add x25,x25,x17 // h+=Sigma0(a)
380 add x24,x24,x28 // h+=K[i]
381 eor x6,x21,x21,ror#23
384 add x24,x24,x14 // h+=X[i]
385 orr x17,x17,x28 // Ch(e,f,g)
386 eor x28,x25,x26 // a^b, b^c in next round
387 eor x16,x16,x6,ror#18 // Sigma1(e)
389 add x24,x24,x17 // h+=Ch(e,f,g)
390 eor x17,x25,x25,ror#5
391 add x24,x24,x16 // h+=Sigma1(e)
392 and x19,x19,x28 // (b^c)&=(a^b)
393 add x20,x20,x24 // d+=h
394 eor x19,x19,x26 // Maj(a,b,c)
395 eor x17,x6,x17,ror#34 // Sigma0(a)
396 add x24,x24,x19 // h+=Maj(a,b,c)
397 ldr x19,[x30],#8 // *K++, x28 in next round
398 //add x24,x24,x17 // h+=Sigma0(a)
399 #ifndef __AARCH64EB__
402 add x24,x24,x17 // h+=Sigma0(a)
405 add x23,x23,x19 // h+=K[i]
406 eor x7,x20,x20,ror#23
409 add x23,x23,x15 // h+=X[i]
410 orr x17,x17,x19 // Ch(e,f,g)
411 eor x19,x24,x25 // a^b, b^c in next round
412 eor x16,x16,x7,ror#18 // Sigma1(e)
414 add x23,x23,x17 // h+=Ch(e,f,g)
415 eor x17,x24,x24,ror#5
416 add x23,x23,x16 // h+=Sigma1(e)
417 and x28,x28,x19 // (b^c)&=(a^b)
418 add x27,x27,x23 // d+=h
419 eor x28,x28,x25 // Maj(a,b,c)
420 eor x17,x7,x17,ror#34 // Sigma0(a)
421 add x23,x23,x28 // h+=Maj(a,b,c)
422 ldr x28,[x30],#8 // *K++, x19 in next round
423 //add x23,x23,x17 // h+=Sigma0(a)
424 #ifndef __AARCH64EB__
428 add x23,x23,x17 // h+=Sigma0(a)
431 add x22,x22,x28 // h+=K[i]
432 eor x8,x27,x27,ror#23
435 add x22,x22,x0 // h+=X[i]
436 orr x17,x17,x28 // Ch(e,f,g)
437 eor x28,x23,x24 // a^b, b^c in next round
438 eor x16,x16,x8,ror#18 // Sigma1(e)
440 add x22,x22,x17 // h+=Ch(e,f,g)
441 eor x17,x23,x23,ror#5
442 add x22,x22,x16 // h+=Sigma1(e)
443 and x19,x19,x28 // (b^c)&=(a^b)
444 add x26,x26,x22 // d+=h
445 eor x19,x19,x24 // Maj(a,b,c)
446 eor x17,x8,x17,ror#34 // Sigma0(a)
447 add x22,x22,x19 // h+=Maj(a,b,c)
448 ldr x19,[x30],#8 // *K++, x28 in next round
449 //add x22,x22,x17 // h+=Sigma0(a)
450 #ifndef __AARCH64EB__
454 add x22,x22,x17 // h+=Sigma0(a)
457 add x21,x21,x19 // h+=K[i]
458 eor x9,x26,x26,ror#23
461 add x21,x21,x1 // h+=X[i]
462 orr x17,x17,x19 // Ch(e,f,g)
463 eor x19,x22,x23 // a^b, b^c in next round
464 eor x16,x16,x9,ror#18 // Sigma1(e)
466 add x21,x21,x17 // h+=Ch(e,f,g)
467 eor x17,x22,x22,ror#5
468 add x21,x21,x16 // h+=Sigma1(e)
469 and x28,x28,x19 // (b^c)&=(a^b)
470 add x25,x25,x21 // d+=h
471 eor x28,x28,x23 // Maj(a,b,c)
472 eor x17,x9,x17,ror#34 // Sigma0(a)
473 add x21,x21,x28 // h+=Maj(a,b,c)
474 ldr x28,[x30],#8 // *K++, x19 in next round
475 //add x21,x21,x17 // h+=Sigma0(a)
476 #ifndef __AARCH64EB__
480 add x21,x21,x17 // h+=Sigma0(a)
483 add x20,x20,x28 // h+=K[i]
489 add x20,x20,x2 // h+=X[i]
490 eor x16,x16,x25,ror#18
492 orr x17,x17,x28 // Ch(e,f,g)
493 eor x28,x21,x22 // a^b, b^c in next round
494 eor x16,x16,x25,ror#41 // Sigma1(e)
495 eor x10,x10,x21,ror#34
496 add x20,x20,x17 // h+=Ch(e,f,g)
497 and x19,x19,x28 // (b^c)&=(a^b)
499 eor x9,x9,x4,lsr#7 // sigma0(X[i+1])
500 add x20,x20,x16 // h+=Sigma1(e)
501 eor x19,x19,x22 // Maj(a,b,c)
502 eor x17,x10,x21,ror#39 // Sigma0(a)
503 eor x8,x8,x1,lsr#6 // sigma1(X[i+14])
505 add x24,x24,x20 // d+=h
506 add x20,x20,x19 // h+=Maj(a,b,c)
507 ldr x19,[x30],#8 // *K++, x28 in next round
509 add x20,x20,x17 // h+=Sigma0(a)
515 add x27,x27,x19 // h+=K[i]
521 add x27,x27,x3 // h+=X[i]
522 eor x16,x16,x24,ror#18
524 orr x17,x17,x19 // Ch(e,f,g)
525 eor x19,x20,x21 // a^b, b^c in next round
526 eor x16,x16,x24,ror#41 // Sigma1(e)
527 eor x11,x11,x20,ror#34
528 add x27,x27,x17 // h+=Ch(e,f,g)
529 and x28,x28,x19 // (b^c)&=(a^b)
531 eor x10,x10,x5,lsr#7 // sigma0(X[i+1])
532 add x27,x27,x16 // h+=Sigma1(e)
533 eor x28,x28,x21 // Maj(a,b,c)
534 eor x17,x11,x20,ror#39 // Sigma0(a)
535 eor x9,x9,x2,lsr#6 // sigma1(X[i+14])
537 add x23,x23,x27 // d+=h
538 add x27,x27,x28 // h+=Maj(a,b,c)
539 ldr x28,[x30],#8 // *K++, x19 in next round
541 add x27,x27,x17 // h+=Sigma0(a)
546 add x26,x26,x28 // h+=K[i]
552 add x26,x26,x4 // h+=X[i]
553 eor x16,x16,x23,ror#18
555 orr x17,x17,x28 // Ch(e,f,g)
556 eor x28,x27,x20 // a^b, b^c in next round
557 eor x16,x16,x23,ror#41 // Sigma1(e)
558 eor x12,x12,x27,ror#34
559 add x26,x26,x17 // h+=Ch(e,f,g)
560 and x19,x19,x28 // (b^c)&=(a^b)
561 eor x10,x10,x3,ror#61
562 eor x11,x11,x6,lsr#7 // sigma0(X[i+1])
563 add x26,x26,x16 // h+=Sigma1(e)
564 eor x19,x19,x20 // Maj(a,b,c)
565 eor x17,x12,x27,ror#39 // Sigma0(a)
566 eor x10,x10,x3,lsr#6 // sigma1(X[i+14])
568 add x22,x22,x26 // d+=h
569 add x26,x26,x19 // h+=Maj(a,b,c)
570 ldr x19,[x30],#8 // *K++, x28 in next round
572 add x26,x26,x17 // h+=Sigma0(a)
577 add x25,x25,x19 // h+=K[i]
583 add x25,x25,x5 // h+=X[i]
584 eor x16,x16,x22,ror#18
586 orr x17,x17,x19 // Ch(e,f,g)
587 eor x19,x26,x27 // a^b, b^c in next round
588 eor x16,x16,x22,ror#41 // Sigma1(e)
589 eor x13,x13,x26,ror#34
590 add x25,x25,x17 // h+=Ch(e,f,g)
591 and x28,x28,x19 // (b^c)&=(a^b)
592 eor x11,x11,x4,ror#61
593 eor x12,x12,x7,lsr#7 // sigma0(X[i+1])
594 add x25,x25,x16 // h+=Sigma1(e)
595 eor x28,x28,x27 // Maj(a,b,c)
596 eor x17,x13,x26,ror#39 // Sigma0(a)
597 eor x11,x11,x4,lsr#6 // sigma1(X[i+14])
599 add x21,x21,x25 // d+=h
600 add x25,x25,x28 // h+=Maj(a,b,c)
601 ldr x28,[x30],#8 // *K++, x19 in next round
603 add x25,x25,x17 // h+=Sigma0(a)
608 add x24,x24,x28 // h+=K[i]
614 add x24,x24,x6 // h+=X[i]
615 eor x16,x16,x21,ror#18
617 orr x17,x17,x28 // Ch(e,f,g)
618 eor x28,x25,x26 // a^b, b^c in next round
619 eor x16,x16,x21,ror#41 // Sigma1(e)
620 eor x14,x14,x25,ror#34
621 add x24,x24,x17 // h+=Ch(e,f,g)
622 and x19,x19,x28 // (b^c)&=(a^b)
623 eor x12,x12,x5,ror#61
624 eor x13,x13,x8,lsr#7 // sigma0(X[i+1])
625 add x24,x24,x16 // h+=Sigma1(e)
626 eor x19,x19,x26 // Maj(a,b,c)
627 eor x17,x14,x25,ror#39 // Sigma0(a)
628 eor x12,x12,x5,lsr#6 // sigma1(X[i+14])
630 add x20,x20,x24 // d+=h
631 add x24,x24,x19 // h+=Maj(a,b,c)
632 ldr x19,[x30],#8 // *K++, x28 in next round
634 add x24,x24,x17 // h+=Sigma0(a)
639 add x23,x23,x19 // h+=K[i]
645 add x23,x23,x7 // h+=X[i]
646 eor x16,x16,x20,ror#18
648 orr x17,x17,x19 // Ch(e,f,g)
649 eor x19,x24,x25 // a^b, b^c in next round
650 eor x16,x16,x20,ror#41 // Sigma1(e)
651 eor x15,x15,x24,ror#34
652 add x23,x23,x17 // h+=Ch(e,f,g)
653 and x28,x28,x19 // (b^c)&=(a^b)
654 eor x13,x13,x6,ror#61
655 eor x14,x14,x9,lsr#7 // sigma0(X[i+1])
656 add x23,x23,x16 // h+=Sigma1(e)
657 eor x28,x28,x25 // Maj(a,b,c)
658 eor x17,x15,x24,ror#39 // Sigma0(a)
659 eor x13,x13,x6,lsr#6 // sigma1(X[i+14])
661 add x27,x27,x23 // d+=h
662 add x23,x23,x28 // h+=Maj(a,b,c)
663 ldr x28,[x30],#8 // *K++, x19 in next round
665 add x23,x23,x17 // h+=Sigma0(a)
670 add x22,x22,x28 // h+=K[i]
676 add x22,x22,x8 // h+=X[i]
677 eor x16,x16,x27,ror#18
678 eor x15,x15,x10,ror#8
679 orr x17,x17,x28 // Ch(e,f,g)
680 eor x28,x23,x24 // a^b, b^c in next round
681 eor x16,x16,x27,ror#41 // Sigma1(e)
683 add x22,x22,x17 // h+=Ch(e,f,g)
684 and x19,x19,x28 // (b^c)&=(a^b)
685 eor x14,x14,x7,ror#61
686 eor x15,x15,x10,lsr#7 // sigma0(X[i+1])
687 add x22,x22,x16 // h+=Sigma1(e)
688 eor x19,x19,x24 // Maj(a,b,c)
689 eor x17,x0,x23,ror#39 // Sigma0(a)
690 eor x14,x14,x7,lsr#6 // sigma1(X[i+14])
692 add x26,x26,x22 // d+=h
693 add x22,x22,x19 // h+=Maj(a,b,c)
694 ldr x19,[x30],#8 // *K++, x28 in next round
696 add x22,x22,x17 // h+=Sigma0(a)
701 add x21,x21,x19 // h+=K[i]
707 add x21,x21,x9 // h+=X[i]
708 eor x16,x16,x26,ror#18
710 orr x17,x17,x19 // Ch(e,f,g)
711 eor x19,x22,x23 // a^b, b^c in next round
712 eor x16,x16,x26,ror#41 // Sigma1(e)
714 add x21,x21,x17 // h+=Ch(e,f,g)
715 and x28,x28,x19 // (b^c)&=(a^b)
716 eor x15,x15,x8,ror#61
717 eor x0,x0,x11,lsr#7 // sigma0(X[i+1])
718 add x21,x21,x16 // h+=Sigma1(e)
719 eor x28,x28,x23 // Maj(a,b,c)
720 eor x17,x1,x22,ror#39 // Sigma0(a)
721 eor x15,x15,x8,lsr#6 // sigma1(X[i+14])
723 add x25,x25,x21 // d+=h
724 add x21,x21,x28 // h+=Maj(a,b,c)
725 ldr x28,[x30],#8 // *K++, x19 in next round
727 add x21,x21,x17 // h+=Sigma0(a)
732 add x20,x20,x28 // h+=K[i]
738 add x20,x20,x10 // h+=X[i]
739 eor x16,x16,x25,ror#18
741 orr x17,x17,x28 // Ch(e,f,g)
742 eor x28,x21,x22 // a^b, b^c in next round
743 eor x16,x16,x25,ror#41 // Sigma1(e)
745 add x20,x20,x17 // h+=Ch(e,f,g)
746 and x19,x19,x28 // (b^c)&=(a^b)
748 eor x1,x1,x12,lsr#7 // sigma0(X[i+1])
749 add x20,x20,x16 // h+=Sigma1(e)
750 eor x19,x19,x22 // Maj(a,b,c)
751 eor x17,x2,x21,ror#39 // Sigma0(a)
752 eor x0,x0,x9,lsr#6 // sigma1(X[i+14])
754 add x24,x24,x20 // d+=h
755 add x20,x20,x19 // h+=Maj(a,b,c)
756 ldr x19,[x30],#8 // *K++, x28 in next round
758 add x20,x20,x17 // h+=Sigma0(a)
763 add x27,x27,x19 // h+=K[i]
769 add x27,x27,x11 // h+=X[i]
770 eor x16,x16,x24,ror#18
772 orr x17,x17,x19 // Ch(e,f,g)
773 eor x19,x20,x21 // a^b, b^c in next round
774 eor x16,x16,x24,ror#41 // Sigma1(e)
776 add x27,x27,x17 // h+=Ch(e,f,g)
777 and x28,x28,x19 // (b^c)&=(a^b)
779 eor x2,x2,x13,lsr#7 // sigma0(X[i+1])
780 add x27,x27,x16 // h+=Sigma1(e)
781 eor x28,x28,x21 // Maj(a,b,c)
782 eor x17,x3,x20,ror#39 // Sigma0(a)
783 eor x1,x1,x10,lsr#6 // sigma1(X[i+14])
785 add x23,x23,x27 // d+=h
786 add x27,x27,x28 // h+=Maj(a,b,c)
787 ldr x28,[x30],#8 // *K++, x19 in next round
789 add x27,x27,x17 // h+=Sigma0(a)
794 add x26,x26,x28 // h+=K[i]
800 add x26,x26,x12 // h+=X[i]
801 eor x16,x16,x23,ror#18
803 orr x17,x17,x28 // Ch(e,f,g)
804 eor x28,x27,x20 // a^b, b^c in next round
805 eor x16,x16,x23,ror#41 // Sigma1(e)
807 add x26,x26,x17 // h+=Ch(e,f,g)
808 and x19,x19,x28 // (b^c)&=(a^b)
810 eor x3,x3,x14,lsr#7 // sigma0(X[i+1])
811 add x26,x26,x16 // h+=Sigma1(e)
812 eor x19,x19,x20 // Maj(a,b,c)
813 eor x17,x4,x27,ror#39 // Sigma0(a)
814 eor x2,x2,x11,lsr#6 // sigma1(X[i+14])
816 add x22,x22,x26 // d+=h
817 add x26,x26,x19 // h+=Maj(a,b,c)
818 ldr x19,[x30],#8 // *K++, x28 in next round
820 add x26,x26,x17 // h+=Sigma0(a)
825 add x25,x25,x19 // h+=K[i]
831 add x25,x25,x13 // h+=X[i]
832 eor x16,x16,x22,ror#18
834 orr x17,x17,x19 // Ch(e,f,g)
835 eor x19,x26,x27 // a^b, b^c in next round
836 eor x16,x16,x22,ror#41 // Sigma1(e)
838 add x25,x25,x17 // h+=Ch(e,f,g)
839 and x28,x28,x19 // (b^c)&=(a^b)
841 eor x4,x4,x15,lsr#7 // sigma0(X[i+1])
842 add x25,x25,x16 // h+=Sigma1(e)
843 eor x28,x28,x27 // Maj(a,b,c)
844 eor x17,x5,x26,ror#39 // Sigma0(a)
845 eor x3,x3,x12,lsr#6 // sigma1(X[i+14])
847 add x21,x21,x25 // d+=h
848 add x25,x25,x28 // h+=Maj(a,b,c)
849 ldr x28,[x30],#8 // *K++, x19 in next round
851 add x25,x25,x17 // h+=Sigma0(a)
856 add x24,x24,x28 // h+=K[i]
862 add x24,x24,x14 // h+=X[i]
863 eor x16,x16,x21,ror#18
865 orr x17,x17,x28 // Ch(e,f,g)
866 eor x28,x25,x26 // a^b, b^c in next round
867 eor x16,x16,x21,ror#41 // Sigma1(e)
869 add x24,x24,x17 // h+=Ch(e,f,g)
870 and x19,x19,x28 // (b^c)&=(a^b)
872 eor x5,x5,x0,lsr#7 // sigma0(X[i+1])
873 add x24,x24,x16 // h+=Sigma1(e)
874 eor x19,x19,x26 // Maj(a,b,c)
875 eor x17,x6,x25,ror#39 // Sigma0(a)
876 eor x4,x4,x13,lsr#6 // sigma1(X[i+14])
878 add x20,x20,x24 // d+=h
879 add x24,x24,x19 // h+=Maj(a,b,c)
880 ldr x19,[x30],#8 // *K++, x28 in next round
882 add x24,x24,x17 // h+=Sigma0(a)
887 add x23,x23,x19 // h+=K[i]
893 add x23,x23,x15 // h+=X[i]
894 eor x16,x16,x20,ror#18
896 orr x17,x17,x19 // Ch(e,f,g)
897 eor x19,x24,x25 // a^b, b^c in next round
898 eor x16,x16,x20,ror#41 // Sigma1(e)
900 add x23,x23,x17 // h+=Ch(e,f,g)
901 and x28,x28,x19 // (b^c)&=(a^b)
903 eor x6,x6,x1,lsr#7 // sigma0(X[i+1])
904 add x23,x23,x16 // h+=Sigma1(e)
905 eor x28,x28,x25 // Maj(a,b,c)
906 eor x17,x7,x24,ror#39 // Sigma0(a)
907 eor x5,x5,x14,lsr#6 // sigma1(X[i+14])
909 add x27,x27,x23 // d+=h
910 add x23,x23,x28 // h+=Maj(a,b,c)
911 ldr x28,[x30],#8 // *K++, x19 in next round
913 add x23,x23,x17 // h+=Sigma0(a)
918 add x22,x22,x28 // h+=K[i]
924 add x22,x22,x0 // h+=X[i]
925 eor x16,x16,x27,ror#18
927 orr x17,x17,x28 // Ch(e,f,g)
928 eor x28,x23,x24 // a^b, b^c in next round
929 eor x16,x16,x27,ror#41 // Sigma1(e)
931 add x22,x22,x17 // h+=Ch(e,f,g)
932 and x19,x19,x28 // (b^c)&=(a^b)
934 eor x7,x7,x2,lsr#7 // sigma0(X[i+1])
935 add x22,x22,x16 // h+=Sigma1(e)
936 eor x19,x19,x24 // Maj(a,b,c)
937 eor x17,x8,x23,ror#39 // Sigma0(a)
938 eor x6,x6,x15,lsr#6 // sigma1(X[i+14])
940 add x26,x26,x22 // d+=h
941 add x22,x22,x19 // h+=Maj(a,b,c)
942 ldr x19,[x30],#8 // *K++, x28 in next round
944 add x22,x22,x17 // h+=Sigma0(a)
949 add x21,x21,x19 // h+=K[i]
955 add x21,x21,x1 // h+=X[i]
956 eor x16,x16,x26,ror#18
958 orr x17,x17,x19 // Ch(e,f,g)
959 eor x19,x22,x23 // a^b, b^c in next round
960 eor x16,x16,x26,ror#41 // Sigma1(e)
962 add x21,x21,x17 // h+=Ch(e,f,g)
963 and x28,x28,x19 // (b^c)&=(a^b)
965 eor x8,x8,x3,lsr#7 // sigma0(X[i+1])
966 add x21,x21,x16 // h+=Sigma1(e)
967 eor x28,x28,x23 // Maj(a,b,c)
968 eor x17,x9,x22,ror#39 // Sigma0(a)
969 eor x7,x7,x0,lsr#6 // sigma1(X[i+14])
971 add x25,x25,x21 // d+=h
972 add x21,x21,x28 // h+=Maj(a,b,c)
973 ldr x28,[x30],#8 // *K++, x19 in next round
975 add x21,x21,x17 // h+=Sigma0(a)
980 add x20,x20,x28 // h+=K[i]
986 add x20,x20,x2 // h+=X[i]
987 eor x16,x16,x25,ror#18
989 orr x17,x17,x28 // Ch(e,f,g)
990 eor x28,x21,x22 // a^b, b^c in next round
991 eor x16,x16,x25,ror#41 // Sigma1(e)
992 eor x10,x10,x21,ror#34
993 add x20,x20,x17 // h+=Ch(e,f,g)
994 and x19,x19,x28 // (b^c)&=(a^b)
996 eor x9,x9,x4,lsr#7 // sigma0(X[i+1])
997 add x20,x20,x16 // h+=Sigma1(e)
998 eor x19,x19,x22 // Maj(a,b,c)
999 eor x17,x10,x21,ror#39 // Sigma0(a)
1000 eor x8,x8,x1,lsr#6 // sigma1(X[i+14])
1002 add x24,x24,x20 // d+=h
1003 add x20,x20,x19 // h+=Maj(a,b,c)
1004 ldr x19,[x30],#8 // *K++, x28 in next round
1006 add x20,x20,x17 // h+=Sigma0(a)
1008 cbnz x19,.Loop_16_xx
1012 sub x30,x30,#648 // rewind
1016 add x1,x1,#14*8 // advance input pointer
1019 ldp x9,x10,[x0,#6*8]
1026 stp x22,x23,[x0,#2*8]
1030 stp x24,x25,[x0,#4*8]
1031 stp x26,x27,[x0,#6*8]
1034 ldp x19,x20,[x29,#16]
1036 ldp x21,x22,[x29,#32]
1037 ldp x23,x24,[x29,#48]
1038 ldp x25,x26,[x29,#64]
1039 ldp x27,x28,[x29,#80]
1040 ldp x29,x30,[sp],#128
1041 .inst 0xd50323bf // autiasp
1043 .size sha512_block_data_order,.-sha512_block_data_order
1046 .type .LK512,%object
1048 .quad 0x428a2f98d728ae22,0x7137449123ef65cd
1049 .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
1050 .quad 0x3956c25bf348b538,0x59f111f1b605d019
1051 .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
1052 .quad 0xd807aa98a3030242,0x12835b0145706fbe
1053 .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
1054 .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
1055 .quad 0x9bdc06a725c71235,0xc19bf174cf692694
1056 .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
1057 .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
1058 .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
1059 .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
1060 .quad 0x983e5152ee66dfab,0xa831c66d2db43210
1061 .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
1062 .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
1063 .quad 0x06ca6351e003826f,0x142929670a0e6e70
1064 .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
1065 .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
1066 .quad 0x650a73548baf63de,0x766a0abb3c77b2a8
1067 .quad 0x81c2c92e47edaee6,0x92722c851482353b
1068 .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
1069 .quad 0xc24b8b70d0f89791,0xc76c51a30654be30
1070 .quad 0xd192e819d6ef5218,0xd69906245565a910
1071 .quad 0xf40e35855771202a,0x106aa07032bbd1b8
1072 .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
1073 .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
1074 .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
1075 .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
1076 .quad 0x748f82ee5defb2fc,0x78a5636f43172f60
1077 .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
1078 .quad 0x90befffa23631e28,0xa4506cebde82bde9
1079 .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
1080 .quad 0xca273eceea26619c,0xd186b8c721c0c207
1081 .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
1082 .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
1083 .quad 0x113f9804bef90dae,0x1b710b35131c471b
1084 .quad 0x28db77f523047d84,0x32caab7b40c72493
1085 .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
1086 .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
1087 .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
1088 .quad 0 // terminator
1089 .size .LK512,.-.LK512
1094 .long OPENSSL_armcap_P-.
1096 .quad OPENSSL_armcap_P-.
1099 .byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1103 .type sha512_block_armv8,%function
1107 stp x29,x30,[sp,#-16]!
1110 ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x1],#64 // load input
1111 ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64
1113 ld1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // load context
1116 rev64 v16.16b,v16.16b
1117 rev64 v17.16b,v17.16b
1118 rev64 v18.16b,v18.16b
1119 rev64 v19.16b,v19.16b
1120 rev64 v20.16b,v20.16b
1121 rev64 v21.16b,v21.16b
1122 rev64 v22.16b,v22.16b
1123 rev64 v23.16b,v23.16b
1128 ld1 {v24.2d},[x3],#16
1131 orr v26.16b,v0.16b,v0.16b // offload
1132 orr v27.16b,v1.16b,v1.16b
1133 orr v28.16b,v2.16b,v2.16b
1134 orr v29.16b,v3.16b,v3.16b
1135 csel x1,x1,x4,ne // conditional rewind
1136 add v24.2d,v24.2d,v16.2d
1137 ld1 {v25.2d},[x3],#16
1138 ext v24.16b,v24.16b,v24.16b,#8
1139 ext v5.16b,v2.16b,v3.16b,#8
1140 ext v6.16b,v1.16b,v2.16b,#8
1141 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]"
1142 .inst 0xcec08230 //sha512su0 v16.16b,v17.16b
1143 ext v7.16b,v20.16b,v21.16b,#8
1144 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
1145 .inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b
1146 add v4.2d,v1.2d,v3.2d // "D + T1"
1147 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
1148 add v25.2d,v25.2d,v17.2d
1149 ld1 {v24.2d},[x3],#16
1150 ext v25.16b,v25.16b,v25.16b,#8
1151 ext v5.16b,v4.16b,v2.16b,#8
1152 ext v6.16b,v0.16b,v4.16b,#8
1153 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]"
1154 .inst 0xcec08251 //sha512su0 v17.16b,v18.16b
1155 ext v7.16b,v21.16b,v22.16b,#8
1156 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
1157 .inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b
1158 add v1.2d,v0.2d,v2.2d // "D + T1"
1159 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
1160 add v24.2d,v24.2d,v18.2d
1161 ld1 {v25.2d},[x3],#16
1162 ext v24.16b,v24.16b,v24.16b,#8
1163 ext v5.16b,v1.16b,v4.16b,#8
1164 ext v6.16b,v3.16b,v1.16b,#8
1165 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]"
1166 .inst 0xcec08272 //sha512su0 v18.16b,v19.16b
1167 ext v7.16b,v22.16b,v23.16b,#8
1168 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
1169 .inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b
1170 add v0.2d,v3.2d,v4.2d // "D + T1"
1171 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
1172 add v25.2d,v25.2d,v19.2d
1173 ld1 {v24.2d},[x3],#16
1174 ext v25.16b,v25.16b,v25.16b,#8
1175 ext v5.16b,v0.16b,v1.16b,#8
1176 ext v6.16b,v2.16b,v0.16b,#8
1177 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]"
1178 .inst 0xcec08293 //sha512su0 v19.16b,v20.16b
1179 ext v7.16b,v23.16b,v16.16b,#8
1180 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
1181 .inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b
1182 add v3.2d,v2.2d,v1.2d // "D + T1"
1183 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
1184 add v24.2d,v24.2d,v20.2d
1185 ld1 {v25.2d},[x3],#16
1186 ext v24.16b,v24.16b,v24.16b,#8
1187 ext v5.16b,v3.16b,v0.16b,#8
1188 ext v6.16b,v4.16b,v3.16b,#8
1189 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]"
1190 .inst 0xcec082b4 //sha512su0 v20.16b,v21.16b
1191 ext v7.16b,v16.16b,v17.16b,#8
1192 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
1193 .inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b
1194 add v2.2d,v4.2d,v0.2d // "D + T1"
1195 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
1196 add v25.2d,v25.2d,v21.2d
1197 ld1 {v24.2d},[x3],#16
1198 ext v25.16b,v25.16b,v25.16b,#8
1199 ext v5.16b,v2.16b,v3.16b,#8
1200 ext v6.16b,v1.16b,v2.16b,#8
1201 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]"
1202 .inst 0xcec082d5 //sha512su0 v21.16b,v22.16b
1203 ext v7.16b,v17.16b,v18.16b,#8
1204 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
1205 .inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b
1206 add v4.2d,v1.2d,v3.2d // "D + T1"
1207 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
1208 add v24.2d,v24.2d,v22.2d
1209 ld1 {v25.2d},[x3],#16
1210 ext v24.16b,v24.16b,v24.16b,#8
1211 ext v5.16b,v4.16b,v2.16b,#8
1212 ext v6.16b,v0.16b,v4.16b,#8
1213 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]"
1214 .inst 0xcec082f6 //sha512su0 v22.16b,v23.16b
1215 ext v7.16b,v18.16b,v19.16b,#8
1216 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
1217 .inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b
1218 add v1.2d,v0.2d,v2.2d // "D + T1"
1219 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
1220 add v25.2d,v25.2d,v23.2d
1221 ld1 {v24.2d},[x3],#16
1222 ext v25.16b,v25.16b,v25.16b,#8
1223 ext v5.16b,v1.16b,v4.16b,#8
1224 ext v6.16b,v3.16b,v1.16b,#8
1225 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]"
1226 .inst 0xcec08217 //sha512su0 v23.16b,v16.16b
1227 ext v7.16b,v19.16b,v20.16b,#8
1228 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
1229 .inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b
1230 add v0.2d,v3.2d,v4.2d // "D + T1"
1231 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
1232 add v24.2d,v24.2d,v16.2d
1233 ld1 {v25.2d},[x3],#16
1234 ext v24.16b,v24.16b,v24.16b,#8
1235 ext v5.16b,v0.16b,v1.16b,#8
1236 ext v6.16b,v2.16b,v0.16b,#8
1237 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]"
1238 .inst 0xcec08230 //sha512su0 v16.16b,v17.16b
1239 ext v7.16b,v20.16b,v21.16b,#8
1240 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
1241 .inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b
1242 add v3.2d,v2.2d,v1.2d // "D + T1"
1243 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
1244 add v25.2d,v25.2d,v17.2d
1245 ld1 {v24.2d},[x3],#16
1246 ext v25.16b,v25.16b,v25.16b,#8
1247 ext v5.16b,v3.16b,v0.16b,#8
1248 ext v6.16b,v4.16b,v3.16b,#8
1249 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]"
1250 .inst 0xcec08251 //sha512su0 v17.16b,v18.16b
1251 ext v7.16b,v21.16b,v22.16b,#8
1252 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
1253 .inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b
1254 add v2.2d,v4.2d,v0.2d // "D + T1"
1255 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
1256 add v24.2d,v24.2d,v18.2d
1257 ld1 {v25.2d},[x3],#16
1258 ext v24.16b,v24.16b,v24.16b,#8
1259 ext v5.16b,v2.16b,v3.16b,#8
1260 ext v6.16b,v1.16b,v2.16b,#8
1261 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]"
1262 .inst 0xcec08272 //sha512su0 v18.16b,v19.16b
1263 ext v7.16b,v22.16b,v23.16b,#8
1264 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
1265 .inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b
1266 add v4.2d,v1.2d,v3.2d // "D + T1"
1267 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
1268 add v25.2d,v25.2d,v19.2d
1269 ld1 {v24.2d},[x3],#16
1270 ext v25.16b,v25.16b,v25.16b,#8
1271 ext v5.16b,v4.16b,v2.16b,#8
1272 ext v6.16b,v0.16b,v4.16b,#8
1273 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]"
1274 .inst 0xcec08293 //sha512su0 v19.16b,v20.16b
1275 ext v7.16b,v23.16b,v16.16b,#8
1276 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
1277 .inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b
1278 add v1.2d,v0.2d,v2.2d // "D + T1"
1279 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
1280 add v24.2d,v24.2d,v20.2d
1281 ld1 {v25.2d},[x3],#16
1282 ext v24.16b,v24.16b,v24.16b,#8
1283 ext v5.16b,v1.16b,v4.16b,#8
1284 ext v6.16b,v3.16b,v1.16b,#8
1285 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]"
1286 .inst 0xcec082b4 //sha512su0 v20.16b,v21.16b
1287 ext v7.16b,v16.16b,v17.16b,#8
1288 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
1289 .inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b
1290 add v0.2d,v3.2d,v4.2d // "D + T1"
1291 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
1292 add v25.2d,v25.2d,v21.2d
1293 ld1 {v24.2d},[x3],#16
1294 ext v25.16b,v25.16b,v25.16b,#8
1295 ext v5.16b,v0.16b,v1.16b,#8
1296 ext v6.16b,v2.16b,v0.16b,#8
1297 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]"
1298 .inst 0xcec082d5 //sha512su0 v21.16b,v22.16b
1299 ext v7.16b,v17.16b,v18.16b,#8
1300 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
1301 .inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b
1302 add v3.2d,v2.2d,v1.2d // "D + T1"
1303 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
1304 add v24.2d,v24.2d,v22.2d
1305 ld1 {v25.2d},[x3],#16
1306 ext v24.16b,v24.16b,v24.16b,#8
1307 ext v5.16b,v3.16b,v0.16b,#8
1308 ext v6.16b,v4.16b,v3.16b,#8
1309 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]"
1310 .inst 0xcec082f6 //sha512su0 v22.16b,v23.16b
1311 ext v7.16b,v18.16b,v19.16b,#8
1312 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
1313 .inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b
1314 add v2.2d,v4.2d,v0.2d // "D + T1"
1315 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
1316 add v25.2d,v25.2d,v23.2d
1317 ld1 {v24.2d},[x3],#16
1318 ext v25.16b,v25.16b,v25.16b,#8
1319 ext v5.16b,v2.16b,v3.16b,#8
1320 ext v6.16b,v1.16b,v2.16b,#8
1321 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]"
1322 .inst 0xcec08217 //sha512su0 v23.16b,v16.16b
1323 ext v7.16b,v19.16b,v20.16b,#8
1324 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
1325 .inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b
1326 add v4.2d,v1.2d,v3.2d // "D + T1"
1327 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
1328 add v24.2d,v24.2d,v16.2d
1329 ld1 {v25.2d},[x3],#16
1330 ext v24.16b,v24.16b,v24.16b,#8
1331 ext v5.16b,v4.16b,v2.16b,#8
1332 ext v6.16b,v0.16b,v4.16b,#8
1333 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]"
1334 .inst 0xcec08230 //sha512su0 v16.16b,v17.16b
1335 ext v7.16b,v20.16b,v21.16b,#8
1336 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
1337 .inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b
1338 add v1.2d,v0.2d,v2.2d // "D + T1"
1339 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
1340 add v25.2d,v25.2d,v17.2d
1341 ld1 {v24.2d},[x3],#16
1342 ext v25.16b,v25.16b,v25.16b,#8
1343 ext v5.16b,v1.16b,v4.16b,#8
1344 ext v6.16b,v3.16b,v1.16b,#8
1345 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]"
1346 .inst 0xcec08251 //sha512su0 v17.16b,v18.16b
1347 ext v7.16b,v21.16b,v22.16b,#8
1348 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
1349 .inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b
1350 add v0.2d,v3.2d,v4.2d // "D + T1"
1351 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
1352 add v24.2d,v24.2d,v18.2d
1353 ld1 {v25.2d},[x3],#16
1354 ext v24.16b,v24.16b,v24.16b,#8
1355 ext v5.16b,v0.16b,v1.16b,#8
1356 ext v6.16b,v2.16b,v0.16b,#8
1357 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]"
1358 .inst 0xcec08272 //sha512su0 v18.16b,v19.16b
1359 ext v7.16b,v22.16b,v23.16b,#8
1360 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
1361 .inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b
1362 add v3.2d,v2.2d,v1.2d // "D + T1"
1363 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
1364 add v25.2d,v25.2d,v19.2d
1365 ld1 {v24.2d},[x3],#16
1366 ext v25.16b,v25.16b,v25.16b,#8
1367 ext v5.16b,v3.16b,v0.16b,#8
1368 ext v6.16b,v4.16b,v3.16b,#8
1369 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]"
1370 .inst 0xcec08293 //sha512su0 v19.16b,v20.16b
1371 ext v7.16b,v23.16b,v16.16b,#8
1372 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
1373 .inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b
1374 add v2.2d,v4.2d,v0.2d // "D + T1"
1375 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
1376 add v24.2d,v24.2d,v20.2d
1377 ld1 {v25.2d},[x3],#16
1378 ext v24.16b,v24.16b,v24.16b,#8
1379 ext v5.16b,v2.16b,v3.16b,#8
1380 ext v6.16b,v1.16b,v2.16b,#8
1381 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]"
1382 .inst 0xcec082b4 //sha512su0 v20.16b,v21.16b
1383 ext v7.16b,v16.16b,v17.16b,#8
1384 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
1385 .inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b
1386 add v4.2d,v1.2d,v3.2d // "D + T1"
1387 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
1388 add v25.2d,v25.2d,v21.2d
1389 ld1 {v24.2d},[x3],#16
1390 ext v25.16b,v25.16b,v25.16b,#8
1391 ext v5.16b,v4.16b,v2.16b,#8
1392 ext v6.16b,v0.16b,v4.16b,#8
1393 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]"
1394 .inst 0xcec082d5 //sha512su0 v21.16b,v22.16b
1395 ext v7.16b,v17.16b,v18.16b,#8
1396 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
1397 .inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b
1398 add v1.2d,v0.2d,v2.2d // "D + T1"
1399 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
1400 add v24.2d,v24.2d,v22.2d
1401 ld1 {v25.2d},[x3],#16
1402 ext v24.16b,v24.16b,v24.16b,#8
1403 ext v5.16b,v1.16b,v4.16b,#8
1404 ext v6.16b,v3.16b,v1.16b,#8
1405 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]"
1406 .inst 0xcec082f6 //sha512su0 v22.16b,v23.16b
1407 ext v7.16b,v18.16b,v19.16b,#8
1408 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
1409 .inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b
1410 add v0.2d,v3.2d,v4.2d // "D + T1"
1411 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
1412 add v25.2d,v25.2d,v23.2d
1413 ld1 {v24.2d},[x3],#16
1414 ext v25.16b,v25.16b,v25.16b,#8
1415 ext v5.16b,v0.16b,v1.16b,#8
1416 ext v6.16b,v2.16b,v0.16b,#8
1417 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]"
1418 .inst 0xcec08217 //sha512su0 v23.16b,v16.16b
1419 ext v7.16b,v19.16b,v20.16b,#8
1420 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
1421 .inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b
1422 add v3.2d,v2.2d,v1.2d // "D + T1"
1423 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
1424 add v24.2d,v24.2d,v16.2d
1425 ld1 {v25.2d},[x3],#16
1426 ext v24.16b,v24.16b,v24.16b,#8
1427 ext v5.16b,v3.16b,v0.16b,#8
1428 ext v6.16b,v4.16b,v3.16b,#8
1429 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]"
1430 .inst 0xcec08230 //sha512su0 v16.16b,v17.16b
1431 ext v7.16b,v20.16b,v21.16b,#8
1432 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
1433 .inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b
1434 add v2.2d,v4.2d,v0.2d // "D + T1"
1435 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
1436 add v25.2d,v25.2d,v17.2d
1437 ld1 {v24.2d},[x3],#16
1438 ext v25.16b,v25.16b,v25.16b,#8
1439 ext v5.16b,v2.16b,v3.16b,#8
1440 ext v6.16b,v1.16b,v2.16b,#8
1441 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]"
1442 .inst 0xcec08251 //sha512su0 v17.16b,v18.16b
1443 ext v7.16b,v21.16b,v22.16b,#8
1444 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
1445 .inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b
1446 add v4.2d,v1.2d,v3.2d // "D + T1"
1447 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
1448 add v24.2d,v24.2d,v18.2d
1449 ld1 {v25.2d},[x3],#16
1450 ext v24.16b,v24.16b,v24.16b,#8
1451 ext v5.16b,v4.16b,v2.16b,#8
1452 ext v6.16b,v0.16b,v4.16b,#8
1453 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]"
1454 .inst 0xcec08272 //sha512su0 v18.16b,v19.16b
1455 ext v7.16b,v22.16b,v23.16b,#8
1456 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
1457 .inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b
1458 add v1.2d,v0.2d,v2.2d // "D + T1"
1459 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
1460 add v25.2d,v25.2d,v19.2d
1461 ld1 {v24.2d},[x3],#16
1462 ext v25.16b,v25.16b,v25.16b,#8
1463 ext v5.16b,v1.16b,v4.16b,#8
1464 ext v6.16b,v3.16b,v1.16b,#8
1465 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]"
1466 .inst 0xcec08293 //sha512su0 v19.16b,v20.16b
1467 ext v7.16b,v23.16b,v16.16b,#8
1468 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
1469 .inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b
1470 add v0.2d,v3.2d,v4.2d // "D + T1"
1471 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
1472 add v24.2d,v24.2d,v20.2d
1473 ld1 {v25.2d},[x3],#16
1474 ext v24.16b,v24.16b,v24.16b,#8
1475 ext v5.16b,v0.16b,v1.16b,#8
1476 ext v6.16b,v2.16b,v0.16b,#8
1477 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]"
1478 .inst 0xcec082b4 //sha512su0 v20.16b,v21.16b
1479 ext v7.16b,v16.16b,v17.16b,#8
1480 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
1481 .inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b
1482 add v3.2d,v2.2d,v1.2d // "D + T1"
1483 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
1484 add v25.2d,v25.2d,v21.2d
1485 ld1 {v24.2d},[x3],#16
1486 ext v25.16b,v25.16b,v25.16b,#8
1487 ext v5.16b,v3.16b,v0.16b,#8
1488 ext v6.16b,v4.16b,v3.16b,#8
1489 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]"
1490 .inst 0xcec082d5 //sha512su0 v21.16b,v22.16b
1491 ext v7.16b,v17.16b,v18.16b,#8
1492 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
1493 .inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b
1494 add v2.2d,v4.2d,v0.2d // "D + T1"
1495 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
1496 add v24.2d,v24.2d,v22.2d
1497 ld1 {v25.2d},[x3],#16
1498 ext v24.16b,v24.16b,v24.16b,#8
1499 ext v5.16b,v2.16b,v3.16b,#8
1500 ext v6.16b,v1.16b,v2.16b,#8
1501 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]"
1502 .inst 0xcec082f6 //sha512su0 v22.16b,v23.16b
1503 ext v7.16b,v18.16b,v19.16b,#8
1504 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
1505 .inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b
1506 add v4.2d,v1.2d,v3.2d // "D + T1"
1507 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
1508 add v25.2d,v25.2d,v23.2d
1509 ld1 {v24.2d},[x3],#16
1510 ext v25.16b,v25.16b,v25.16b,#8
1511 ext v5.16b,v4.16b,v2.16b,#8
1512 ext v6.16b,v0.16b,v4.16b,#8
1513 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]"
1514 .inst 0xcec08217 //sha512su0 v23.16b,v16.16b
1515 ext v7.16b,v19.16b,v20.16b,#8
1516 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
1517 .inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b
1518 add v1.2d,v0.2d,v2.2d // "D + T1"
1519 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
1520 ld1 {v25.2d},[x3],#16
1521 add v24.2d,v24.2d,v16.2d
1522 ld1 {v16.16b},[x1],#16 // load next input
1523 ext v24.16b,v24.16b,v24.16b,#8
1524 ext v5.16b,v1.16b,v4.16b,#8
1525 ext v6.16b,v3.16b,v1.16b,#8
1526 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]"
1527 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
1528 rev64 v16.16b,v16.16b
1529 add v0.2d,v3.2d,v4.2d // "D + T1"
1530 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
1531 ld1 {v24.2d},[x3],#16
1532 add v25.2d,v25.2d,v17.2d
1533 ld1 {v17.16b},[x1],#16 // load next input
1534 ext v25.16b,v25.16b,v25.16b,#8
1535 ext v5.16b,v0.16b,v1.16b,#8
1536 ext v6.16b,v2.16b,v0.16b,#8
1537 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]"
1538 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
1539 rev64 v17.16b,v17.16b
1540 add v3.2d,v2.2d,v1.2d // "D + T1"
1541 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
1542 ld1 {v25.2d},[x3],#16
1543 add v24.2d,v24.2d,v18.2d
1544 ld1 {v18.16b},[x1],#16 // load next input
1545 ext v24.16b,v24.16b,v24.16b,#8
1546 ext v5.16b,v3.16b,v0.16b,#8
1547 ext v6.16b,v4.16b,v3.16b,#8
1548 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]"
1549 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
1550 rev64 v18.16b,v18.16b
1551 add v2.2d,v4.2d,v0.2d // "D + T1"
1552 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
1553 ld1 {v24.2d},[x3],#16
1554 add v25.2d,v25.2d,v19.2d
1555 ld1 {v19.16b},[x1],#16 // load next input
1556 ext v25.16b,v25.16b,v25.16b,#8
1557 ext v5.16b,v2.16b,v3.16b,#8
1558 ext v6.16b,v1.16b,v2.16b,#8
1559 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]"
1560 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
1561 rev64 v19.16b,v19.16b
1562 add v4.2d,v1.2d,v3.2d // "D + T1"
1563 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
1564 ld1 {v25.2d},[x3],#16
1565 add v24.2d,v24.2d,v20.2d
1566 ld1 {v20.16b},[x1],#16 // load next input
1567 ext v24.16b,v24.16b,v24.16b,#8
1568 ext v5.16b,v4.16b,v2.16b,#8
1569 ext v6.16b,v0.16b,v4.16b,#8
1570 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]"
1571 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
1572 rev64 v20.16b,v20.16b
1573 add v1.2d,v0.2d,v2.2d // "D + T1"
1574 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
1575 ld1 {v24.2d},[x3],#16
1576 add v25.2d,v25.2d,v21.2d
1577 ld1 {v21.16b},[x1],#16 // load next input
1578 ext v25.16b,v25.16b,v25.16b,#8
1579 ext v5.16b,v1.16b,v4.16b,#8
1580 ext v6.16b,v3.16b,v1.16b,#8
1581 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]"
1582 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
1583 rev64 v21.16b,v21.16b
1584 add v0.2d,v3.2d,v4.2d // "D + T1"
1585 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
1586 ld1 {v25.2d},[x3],#16
1587 add v24.2d,v24.2d,v22.2d
1588 ld1 {v22.16b},[x1],#16 // load next input
1589 ext v24.16b,v24.16b,v24.16b,#8
1590 ext v5.16b,v0.16b,v1.16b,#8
1591 ext v6.16b,v2.16b,v0.16b,#8
1592 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]"
1593 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
1594 rev64 v22.16b,v22.16b
1595 add v3.2d,v2.2d,v1.2d // "D + T1"
1596 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
1597 sub x3,x3,#80*8 // rewind
1598 add v25.2d,v25.2d,v23.2d
1599 ld1 {v23.16b},[x1],#16 // load next input
1600 ext v25.16b,v25.16b,v25.16b,#8
1601 ext v5.16b,v3.16b,v0.16b,#8
1602 ext v6.16b,v4.16b,v3.16b,#8
1603 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]"
1604 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
1605 rev64 v23.16b,v23.16b
1606 add v2.2d,v4.2d,v0.2d // "D + T1"
1607 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
1608 add v0.2d,v0.2d,v26.2d // accumulate
1609 add v1.2d,v1.2d,v27.2d
1610 add v2.2d,v2.2d,v28.2d
1611 add v3.2d,v3.2d,v29.2d
1615 st1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // store context
1619 .size sha512_block_armv8,.-sha512_block_armv8