2 /* Do not modify. This file is auto-generated from x86_64-mont5.pl. */
7 .globl bn_mul_mont_gather5
8 .type bn_mul_mont_gather5,@function
15 movl OPENSSL_ia32cap_P+8(%rip),%r11d
30 leaq -280(%rsp,%r9,8),%r10
42 leaq (%r10,%r11,1),%rsp
46 jmp .Lmul_page_walk_done
56 movq %rax,8(%rsp,%r9,8)
62 leaq 24-112(%rsp,%r9,8),%r10
74 movdqa %xmm0,112(%r10)
79 movdqa %xmm1,128(%r10)
84 movdqa %xmm2,144(%r10)
89 movdqa %xmm3,160(%r10)
93 movdqa %xmm0,176(%r10)
98 movdqa %xmm1,192(%r10)
103 movdqa %xmm2,208(%r10)
108 movdqa %xmm3,224(%r10)
112 movdqa %xmm0,240(%r10)
117 movdqa %xmm1,256(%r10)
122 movdqa %xmm2,272(%r10)
127 movdqa %xmm3,288(%r10)
131 movdqa %xmm0,304(%r10)
136 movdqa %xmm1,320(%r10)
139 movdqa %xmm2,336(%r10)
144 movdqa %xmm3,352(%r10)
148 movdqa -128(%r12),%xmm4
149 movdqa -112(%r12),%xmm5
150 movdqa -96(%r12),%xmm2
152 movdqa -80(%r12),%xmm3
160 movdqa -64(%r12),%xmm4
161 movdqa -48(%r12),%xmm5
162 movdqa -32(%r12),%xmm2
164 movdqa -16(%r12),%xmm3
173 movdqa 16(%r12),%xmm5
174 movdqa 32(%r12),%xmm2
176 movdqa 48(%r12),%xmm3
185 pshufd $0x4e,%xmm0,%xmm1
188 .byte 102,72,15,126,195
216 movq (%rsi,%r15,8),%rax
221 movq %r13,-16(%rsp,%r15,8)
227 movq (%rcx,%r15,8),%rax
241 movq %r13,-16(%rsp,%r9,8)
248 movq %r13,-8(%rsp,%r9,8)
249 movq %rdx,(%rsp,%r9,8)
255 leaq 24+128(%rsp,%r9,8),%rdx
259 movdqa -128(%r12),%xmm0
260 movdqa -112(%r12),%xmm1
261 movdqa -96(%r12),%xmm2
262 movdqa -80(%r12),%xmm3
263 pand -128(%rdx),%xmm0
264 pand -112(%rdx),%xmm1
271 movdqa -64(%r12),%xmm0
272 movdqa -48(%r12),%xmm1
273 movdqa -32(%r12),%xmm2
274 movdqa -16(%r12),%xmm3
284 movdqa 16(%r12),%xmm1
285 movdqa 32(%r12),%xmm2
286 movdqa 48(%r12),%xmm3
295 movdqa 64(%r12),%xmm0
296 movdqa 80(%r12),%xmm1
297 movdqa 96(%r12),%xmm2
298 movdqa 112(%r12),%xmm3
308 pshufd $0x4e,%xmm4,%xmm0
313 .byte 102,72,15,126,195
340 movq (%rsi,%r15,8),%rax
343 movq (%rsp,%r15,8),%r10
345 movq %r13,-16(%rsp,%r15,8)
351 movq (%rcx,%r15,8),%rax
365 movq (%rsp,%r9,8),%r10
367 movq %r13,-16(%rsp,%r9,8)
375 movq %r13,-8(%rsp,%r9,8)
376 movq %rdx,(%rsp,%r9,8)
388 .Lsub: sbbq (%rcx,%r14,8),%rax
389 movq %rax,(%rdi,%r14,8)
390 movq 8(%rsi,%r14,8),%rax
405 movq (%rsi,%r14,8),%rax
406 movq %r14,(%rsp,%r14,8)
407 movq %rax,(%rdi,%r14,8)
412 movq 8(%rsp,%r9,8),%rsi
424 .size bn_mul_mont_gather5,.-bn_mul_mont_gather5
425 .type bn_mul4x_mont_gather5,@function
427 bn_mul4x_mont_gather5:
444 leaq (%r9,%r9,2),%r10
456 leaq -320(%rsp,%r9,2),%r11
463 leaq -320(%rbp,%r9,2),%rbp
468 leaq 4096-320(,%r9,2),%r10
469 leaq -320(%rbp,%r9,2),%rbp
479 leaq (%r11,%rbp,1),%rsp
483 jmp .Lmul4x_page_walk_done
486 leaq -4096(%rsp),%rsp
490 .Lmul4x_page_walk_done:
511 .size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5
513 .type mul4x_internal,@function
518 leaq .Linc(%rip),%rax
519 leaq 128(%rdx,%r9,1),%r13
522 movdqa 16(%rax),%xmm1
523 leaq 88-112(%rsp,%r9,1),%r10
526 pshufd $0,%xmm5,%xmm5
536 movdqa %xmm0,112(%r10)
541 movdqa %xmm1,128(%r10)
546 movdqa %xmm2,144(%r10)
551 movdqa %xmm3,160(%r10)
555 movdqa %xmm0,176(%r10)
560 movdqa %xmm1,192(%r10)
565 movdqa %xmm2,208(%r10)
570 movdqa %xmm3,224(%r10)
574 movdqa %xmm0,240(%r10)
579 movdqa %xmm1,256(%r10)
584 movdqa %xmm2,272(%r10)
589 movdqa %xmm3,288(%r10)
593 movdqa %xmm0,304(%r10)
598 movdqa %xmm1,320(%r10)
601 movdqa %xmm2,336(%r10)
606 movdqa %xmm3,352(%r10)
610 movdqa -128(%r12),%xmm4
611 movdqa -112(%r12),%xmm5
612 movdqa -96(%r12),%xmm2
614 movdqa -80(%r12),%xmm3
622 movdqa -64(%r12),%xmm4
623 movdqa -48(%r12),%xmm5
624 movdqa -32(%r12),%xmm2
626 movdqa -16(%r12),%xmm3
635 movdqa 16(%r12),%xmm5
636 movdqa 32(%r12),%xmm2
638 movdqa 48(%r12),%xmm3
647 pshufd $0x4e,%xmm0,%xmm1
650 .byte 102,72,15,126,195
657 leaq (%rsi,%r9,1),%rsi
671 movq 8(%rsi,%r9,1),%rax
683 movq 16(%rsi,%r9,1),%rax
704 movq -8(%rsi,%r15,1),%rax
719 movq (%rsi,%r15,1),%rax
734 movq 8(%rsi,%r15,1),%rax
749 movq 16(%rsi,%r15,1),%rax
784 movq (%rsi,%r9,1),%rax
791 leaq (%rcx,%r9,1),%rcx
802 leaq 16+128(%r14),%rdx
805 movdqa -128(%r12),%xmm0
806 movdqa -112(%r12),%xmm1
807 movdqa -96(%r12),%xmm2
808 movdqa -80(%r12),%xmm3
809 pand -128(%rdx),%xmm0
810 pand -112(%rdx),%xmm1
817 movdqa -64(%r12),%xmm0
818 movdqa -48(%r12),%xmm1
819 movdqa -32(%r12),%xmm2
820 movdqa -16(%r12),%xmm3
830 movdqa 16(%r12),%xmm1
831 movdqa 32(%r12),%xmm2
832 movdqa 48(%r12),%xmm3
841 movdqa 64(%r12),%xmm0
842 movdqa 80(%r12),%xmm1
843 movdqa 96(%r12),%xmm2
844 movdqa 112(%r12),%xmm3
854 pshufd $0x4e,%xmm4,%xmm0
857 .byte 102,72,15,126,195
859 movq (%r14,%r9,1),%r10
870 leaq (%r14,%r9,1),%r14
874 movq 8(%rsi,%r9,1),%rax
888 movq 16(%rsi,%r9,1),%rax
910 movq -8(%rsi,%r15,1),%rax
927 movq (%rsi,%r15,1),%rax
944 movq 8(%rsi,%r15,1),%rax
961 movq 16(%rsi,%r15,1),%rax
1001 movq (%rsi,%r9,1),%rax
1009 leaq (%rcx,%r9,1),%rcx
1018 cmpq 16+8(%rsp),%r12
1025 leaq (%r14,%r9,1),%rbx
1030 movq 56+8(%rsp),%rdi
1036 jmp .Lsqr4x_sub_entry
1037 .size mul4x_internal,.-mul4x_internal
1039 .type bn_power5,@function
1043 movl OPENSSL_ia32cap_P+8(%rip),%r11d
1056 leal (%r9,%r9,2),%r10d
1067 leaq -320(%rsp,%r9,2),%r11
1074 leaq -320(%rbp,%r9,2),%rbp
1079 leaq 4096-320(,%r9,2),%r10
1080 leaq -320(%rbp,%r9,2),%rbp
1090 leaq (%r11,%rbp,1),%rsp
1094 jmp .Lpwr_page_walk_done
1097 leaq -4096(%rsp),%rsp
1101 .Lpwr_page_walk_done:
1118 .byte 102,72,15,110,207
1119 .byte 102,72,15,110,209
1120 .byte 102,73,15,110,218
1121 .byte 102,72,15,110,226
1123 call __bn_sqr8x_internal
1124 call __bn_post4x_internal
1125 call __bn_sqr8x_internal
1126 call __bn_post4x_internal
1127 call __bn_sqr8x_internal
1128 call __bn_post4x_internal
1129 call __bn_sqr8x_internal
1130 call __bn_post4x_internal
1131 call __bn_sqr8x_internal
1132 call __bn_post4x_internal
1134 .byte 102,72,15,126,209
1135 .byte 102,72,15,126,226
1153 .size bn_power5,.-bn_power5
1155 .globl bn_sqr8x_internal
1156 .hidden bn_sqr8x_internal
1157 .type bn_sqr8x_internal,@function
1160 __bn_sqr8x_internal:
1235 leaq (%rsi,%r9,1),%rsi
1240 movq -32(%rsi,%rbp,1),%r14
1241 leaq 48+8(%rsp,%r9,2),%rdi
1242 movq -24(%rsi,%rbp,1),%rax
1243 leaq -32(%rdi,%rbp,1),%rdi
1244 movq -16(%rsi,%rbp,1),%rbx
1251 movq %r10,-24(%rdi,%rbp,1)
1257 movq %r11,-16(%rdi,%rbp,1)
1261 movq -8(%rsi,%rbp,1),%rbx
1275 movq %r10,-8(%rdi,%rcx,1)
1280 movq (%rsi,%rcx,1),%rbx
1290 movq 8(%rsi,%rcx,1),%rbx
1300 movq %r11,(%rdi,%rcx,1)
1307 movq 16(%rsi,%rcx,1),%rbx
1316 movq %r10,8(%rdi,%rcx,1)
1323 movq 24(%rsi,%rcx,1),%rbx
1333 movq %r11,16(%rdi,%rcx,1)
1345 movq %r10,-8(%rdi,%rcx,1)
1364 movq -32(%rsi,%rbp,1),%r14
1365 leaq 48+8(%rsp,%r9,2),%rdi
1366 movq -24(%rsi,%rbp,1),%rax
1367 leaq -32(%rdi,%rbp,1),%rdi
1368 movq -16(%rsi,%rbp,1),%rbx
1372 movq -24(%rdi,%rbp,1),%r10
1376 movq %r10,-24(%rdi,%rbp,1)
1383 addq -16(%rdi,%rbp,1),%r11
1386 movq %r11,-16(%rdi,%rbp,1)
1390 movq -8(%rsi,%rbp,1),%rbx
1395 addq -8(%rdi,%rbp,1),%r12
1406 movq %r10,-8(%rdi,%rbp,1)
1413 movq (%rsi,%rcx,1),%rbx
1419 addq (%rdi,%rcx,1),%r13
1426 movq 8(%rsi,%rcx,1),%rbx
1434 movq %r11,(%rdi,%rcx,1)
1438 addq 8(%rdi,%rcx,1),%r12
1449 movq %r10,-8(%rdi,%rcx,1)
1470 leaq 48+8(%rsp,%r9,2),%rdi
1472 leaq -32(%rdi,%rbp,1),%rdi
1531 movq -16(%rsi,%rbp,1),%rax
1532 leaq 48+8(%rsp),%rdi
1536 leaq (%r14,%r10,2),%r12
1538 leaq (%rcx,%r11,2),%r13
1547 movq -8(%rsi,%rbp,1),%rax
1551 leaq (%r14,%r10,2),%rbx
1555 leaq (%rcx,%r11,2),%r8
1564 movq 0(%rsi,%rbp,1),%rax
1571 jmp .Lsqr4x_shift_n_add
1574 .Lsqr4x_shift_n_add:
1575 leaq (%r14,%r10,2),%r12
1577 leaq (%rcx,%r11,2),%r13
1586 movq -8(%rsi,%rbp,1),%rax
1590 leaq (%r14,%r10,2),%rbx
1594 leaq (%rcx,%r11,2),%r8
1603 movq 0(%rsi,%rbp,1),%rax
1607 leaq (%r14,%r10,2),%r12
1611 leaq (%rcx,%r11,2),%r13
1620 movq 8(%rsi,%rbp,1),%rax
1624 leaq (%r14,%r10,2),%rbx
1628 leaq (%rcx,%r11,2),%r8
1637 movq 16(%rsi,%rbp,1),%rax
1644 jnz .Lsqr4x_shift_n_add
1646 leaq (%r14,%r10,2),%r12
1649 leaq (%rcx,%r11,2),%r13
1662 leaq (%r14,%r10,2),%rbx
1666 leaq (%rcx,%r11,2),%r8
1675 .byte 102,72,15,126,213
1676 __bn_sqr8x_reduction:
1678 leaq (%r9,%rbp,1),%rcx
1679 leaq 48+8(%rsp,%r9,2),%rdx
1681 leaq 48+8(%rsp,%r9,1),%rdi
1684 jmp .L8x_reduction_loop
1687 .L8x_reduction_loop:
1688 leaq (%rdi,%r9,1),%rdi
1703 imulq 32+8(%rsp),%rbx
1721 movq %rbx,48-8+8(%rsp,%rcx,8)
1730 movq 32+8(%rsp),%rsi
1796 movq 48+56+8(%rsp),%rbx
1860 movq 48-16+8(%rsp,%rcx,8),%rbx
1876 movq 48+56+8(%rsp),%rbx
1919 .byte 102,72,15,126,213
1923 .byte 102,73,15,126,217
1933 jb .L8x_reduction_loop
1935 .size bn_sqr8x_internal,.-bn_sqr8x_internal
1936 .type __bn_post4x_internal,@function
1938 __bn_post4x_internal:
1940 leaq (%rdi,%r9,1),%rbx
1942 .byte 102,72,15,126,207
1944 .byte 102,72,15,126,206
1951 jmp .Lsqr4x_sub_entry
1989 .size __bn_post4x_internal,.-__bn_post4x_internal
1990 .globl bn_from_montgomery
1991 .type bn_from_montgomery,@function
1998 .size bn_from_montgomery,.-bn_from_montgomery
2000 .type bn_from_mont8x,@function
2014 leaq (%r9,%r9,2),%r10
2025 leaq -320(%rsp,%r9,2),%r11
2032 leaq -320(%rbp,%r9,2),%rbp
2037 leaq 4096-320(,%r9,2),%r10
2038 leaq -320(%rbp,%r9,2),%rbp
2048 leaq (%r11,%rbp,1),%rsp
2052 jmp .Lfrom_page_walk_done
2055 leaq -4096(%rsp),%rsp
2059 .Lfrom_page_walk_done:
2084 movdqu 16(%rsi),%xmm2
2085 movdqu 32(%rsi),%xmm3
2086 movdqa %xmm0,(%rax,%r9,1)
2087 movdqu 48(%rsi),%xmm4
2088 movdqa %xmm0,16(%rax,%r9,1)
2089 .byte 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00
2091 movdqa %xmm0,32(%rax,%r9,1)
2092 movdqa %xmm2,16(%rax)
2093 movdqa %xmm0,48(%rax,%r9,1)
2094 movdqa %xmm3,32(%rax)
2095 movdqa %xmm4,48(%rax)
2100 .byte 102,72,15,110,207
2101 .byte 102,72,15,110,209
2104 .byte 102,73,15,110,218
2105 movl OPENSSL_ia32cap_P+8(%rip),%r11d
2110 leaq (%rax,%r9,1),%rdi
2111 call __bn_sqrx8x_reduction
2112 call __bn_postx4x_internal
2117 jmp .Lfrom_mont_zero
2121 call __bn_sqr8x_reduction
2122 call __bn_post4x_internal
2127 jmp .Lfrom_mont_zero
2131 movdqa %xmm0,0(%rax)
2132 movdqa %xmm0,16(%rax)
2133 movdqa %xmm0,32(%rax)
2134 movdqa %xmm0,48(%rax)
2137 jnz .Lfrom_mont_zero
2149 .size bn_from_mont8x,.-bn_from_mont8x
2150 .type bn_mulx4x_mont_gather5,@function
2152 bn_mulx4x_mont_gather5:
2164 leaq (%r9,%r9,2),%r10
2177 leaq -320(%rsp,%r9,2),%r11
2184 leaq -320(%rbp,%r9,2),%rbp
2188 leaq 4096-320(,%r9,2),%r10
2189 leaq -320(%rbp,%r9,2),%rbp
2199 leaq (%r11,%rbp,1),%rsp
2202 ja .Lmulx4x_page_walk
2203 jmp .Lmulx4x_page_walk_done
2206 leaq -4096(%rsp),%rsp
2209 ja .Lmulx4x_page_walk
2210 .Lmulx4x_page_walk_done:
2227 call mulx4x_internal
2241 .size bn_mulx4x_mont_gather5,.-bn_mulx4x_mont_gather5
2243 .type mulx4x_internal,@function
2251 leaq 128(%rdx,%r9,1),%r13
2255 leaq .Linc(%rip),%rax
2256 movq %r13,16+8(%rsp)
2258 movq %rdi,56+8(%rsp)
2259 movdqa 0(%rax),%xmm0
2260 movdqa 16(%rax),%xmm1
2261 leaq 88-112(%rsp,%r10,1),%r10
2264 pshufd $0,%xmm5,%xmm5
2274 movdqa %xmm0,112(%r10)
2279 movdqa %xmm1,128(%r10)
2284 movdqa %xmm2,144(%r10)
2289 movdqa %xmm3,160(%r10)
2293 movdqa %xmm0,176(%r10)
2298 movdqa %xmm1,192(%r10)
2303 movdqa %xmm2,208(%r10)
2308 movdqa %xmm3,224(%r10)
2312 movdqa %xmm0,240(%r10)
2317 movdqa %xmm1,256(%r10)
2322 movdqa %xmm2,272(%r10)
2327 movdqa %xmm3,288(%r10)
2332 movdqa %xmm0,304(%r10)
2336 movdqa %xmm1,320(%r10)
2339 movdqa %xmm2,336(%r10)
2344 movdqa %xmm3,352(%r10)
2345 pand 112(%rdi),%xmm3
2348 movdqa -128(%rdi),%xmm4
2349 movdqa -112(%rdi),%xmm5
2350 movdqa -96(%rdi),%xmm2
2351 pand 112(%r10),%xmm4
2352 movdqa -80(%rdi),%xmm3
2353 pand 128(%r10),%xmm5
2355 pand 144(%r10),%xmm2
2357 pand 160(%r10),%xmm3
2360 movdqa -64(%rdi),%xmm4
2361 movdqa -48(%rdi),%xmm5
2362 movdqa -32(%rdi),%xmm2
2363 pand 176(%r10),%xmm4
2364 movdqa -16(%rdi),%xmm3
2365 pand 192(%r10),%xmm5
2367 pand 208(%r10),%xmm2
2369 pand 224(%r10),%xmm3
2372 movdqa 0(%rdi),%xmm4
2373 movdqa 16(%rdi),%xmm5
2374 movdqa 32(%rdi),%xmm2
2375 pand 240(%r10),%xmm4
2376 movdqa 48(%rdi),%xmm3
2377 pand 256(%r10),%xmm5
2379 pand 272(%r10),%xmm2
2381 pand 288(%r10),%xmm3
2385 pshufd $0x4e,%xmm0,%xmm1
2388 .byte 102,72,15,126,194
2389 leaq 64+32+8(%rsp),%rbx
2392 mulxq 0(%rsi),%r8,%rax
2393 mulxq 8(%rsi),%r11,%r12
2395 mulxq 16(%rsi),%rax,%r13
2398 mulxq 24(%rsi),%rax,%r14
2401 imulq 32+8(%rsp),%r8
2411 mulxq 0(%rcx),%rax,%r10
2414 mulxq 8(%rcx),%rax,%r11
2417 mulxq 16(%rcx),%rax,%r12
2418 movq 24+8(%rsp),%rdi
2422 mulxq 24(%rcx),%rax,%r15
2434 mulxq 0(%rsi),%r10,%rax
2436 mulxq 8(%rsi),%r11,%r14
2438 mulxq 16(%rsi),%r12,%rax
2440 mulxq 24(%rsi),%r13,%r14
2449 mulxq 0(%rcx),%rax,%r15
2452 mulxq 8(%rcx),%rax,%r15
2455 mulxq 16(%rcx),%rax,%r15
2460 mulxq 24(%rcx),%rax,%r15
2473 leaq (%rsi,%rax,1),%rsi
2482 leaq 16-256(%rbx),%r10
2486 movdqa -128(%rdi),%xmm0
2487 movdqa -112(%rdi),%xmm1
2488 movdqa -96(%rdi),%xmm2
2489 pand 256(%r10),%xmm0
2490 movdqa -80(%rdi),%xmm3
2491 pand 272(%r10),%xmm1
2493 pand 288(%r10),%xmm2
2495 pand 304(%r10),%xmm3
2498 movdqa -64(%rdi),%xmm0
2499 movdqa -48(%rdi),%xmm1
2500 movdqa -32(%rdi),%xmm2
2501 pand 320(%r10),%xmm0
2502 movdqa -16(%rdi),%xmm3
2503 pand 336(%r10),%xmm1
2505 pand 352(%r10),%xmm2
2507 pand 368(%r10),%xmm3
2510 movdqa 0(%rdi),%xmm0
2511 movdqa 16(%rdi),%xmm1
2512 movdqa 32(%rdi),%xmm2
2513 pand 384(%r10),%xmm0
2514 movdqa 48(%rdi),%xmm3
2515 pand 400(%r10),%xmm1
2517 pand 416(%r10),%xmm2
2519 pand 432(%r10),%xmm3
2522 movdqa 64(%rdi),%xmm0
2523 movdqa 80(%rdi),%xmm1
2524 movdqa 96(%rdi),%xmm2
2525 pand 448(%r10),%xmm0
2526 movdqa 112(%rdi),%xmm3
2527 pand 464(%r10),%xmm1
2529 pand 480(%r10),%xmm2
2531 pand 496(%r10),%xmm3
2535 pshufd $0x4e,%xmm4,%xmm0
2538 .byte 102,72,15,126,194
2541 leaq 32(%rbx,%rax,1),%rbx
2542 mulxq 0(%rsi),%r8,%r11
2545 mulxq 8(%rsi),%r14,%r12
2548 mulxq 16(%rsi),%r15,%r13
2549 adoxq -24(%rbx),%r11
2551 mulxq 24(%rsi),%rdx,%r14
2552 adoxq -16(%rbx),%r12
2554 leaq (%rcx,%rax,1),%rcx
2561 imulq 32+8(%rsp),%r8
2567 mulxq 0(%rcx),%rax,%r10
2570 mulxq 8(%rcx),%rax,%r11
2573 mulxq 16(%rcx),%rax,%r12
2576 mulxq 24(%rcx),%rax,%r15
2578 movq 24+8(%rsp),%rdi
2589 mulxq 0(%rsi),%r10,%rax
2592 mulxq 8(%rsi),%r11,%r14
2595 mulxq 16(%rsi),%r12,%rax
2598 mulxq 24(%rsi),%r13,%r14
2609 mulxq 0(%rcx),%rax,%r15
2612 mulxq 8(%rcx),%rax,%r15
2615 mulxq 16(%rcx),%rax,%r15
2620 mulxq 24(%rcx),%rax,%r15
2635 movq 16+8(%rsp),%r10
2637 leaq (%rsi,%rax,1),%rsi
2646 movq (%rcx,%rax,1),%r12
2647 leaq (%rcx,%rax,1),%rbp
2649 leaq (%rbx,%rax,1),%rdi
2657 movq 56+8(%rsp),%rdx
2663 jmp .Lsqrx4x_sub_entry
2664 .size mulx4x_internal,.-mulx4x_internal
2665 .type bn_powerx5,@function
2679 leaq (%r9,%r9,2),%r10
2690 leaq -320(%rsp,%r9,2),%r11
2697 leaq -320(%rbp,%r9,2),%rbp
2702 leaq 4096-320(,%r9,2),%r10
2703 leaq -320(%rbp,%r9,2),%rbp
2713 leaq (%r11,%rbp,1),%rsp
2717 jmp .Lpwrx_page_walk_done
2720 leaq -4096(%rsp),%rsp
2724 .Lpwrx_page_walk_done:
2741 .byte 102,72,15,110,207
2742 .byte 102,72,15,110,209
2743 .byte 102,73,15,110,218
2744 .byte 102,72,15,110,226
2749 call __bn_sqrx8x_internal
2750 call __bn_postx4x_internal
2751 call __bn_sqrx8x_internal
2752 call __bn_postx4x_internal
2753 call __bn_sqrx8x_internal
2754 call __bn_postx4x_internal
2755 call __bn_sqrx8x_internal
2756 call __bn_postx4x_internal
2757 call __bn_sqrx8x_internal
2758 call __bn_postx4x_internal
2762 .byte 102,72,15,126,209
2763 .byte 102,72,15,126,226
2766 call mulx4x_internal
2780 .size bn_powerx5,.-bn_powerx5
2782 .globl bn_sqrx8x_internal
2783 .hidden bn_sqrx8x_internal
2784 .type bn_sqrx8x_internal,@function
2787 __bn_sqrx8x_internal:
2828 leaq 48+8(%rsp),%rdi
2829 leaq (%rsi,%r9,1),%rbp
2832 jmp .Lsqr8x_zero_start
2835 .byte 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00
2838 movdqa %xmm0,0(%rdi)
2839 movdqa %xmm0,16(%rdi)
2840 movdqa %xmm0,32(%rdi)
2841 movdqa %xmm0,48(%rdi)
2843 movdqa %xmm0,64(%rdi)
2844 movdqa %xmm0,80(%rdi)
2845 movdqa %xmm0,96(%rdi)
2846 movdqa %xmm0,112(%rdi)
2859 leaq 48+8(%rsp),%rdi
2861 jmp .Lsqrx8x_outer_loop
2864 .Lsqrx8x_outer_loop:
2865 mulxq 8(%rsi),%r8,%rax
2868 mulxq 16(%rsi),%r9,%rax
2871 .byte 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00
2874 .byte 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00
2877 mulxq 40(%rsi),%r12,%rax
2880 mulxq 48(%rsi),%r13,%rax
2883 mulxq 56(%rsi),%r14,%r15
2894 mulxq 16(%rsi),%r8,%rbx
2895 mulxq 24(%rsi),%r9,%rax
2898 mulxq 32(%rsi),%r10,%rbx
2901 .byte 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00
2904 .byte 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00
2907 .byte 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00
2918 mulxq 24(%rsi),%r8,%rbx
2919 mulxq 32(%rsi),%r9,%rax
2922 mulxq 40(%rsi),%r10,%rbx
2925 .byte 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00
2928 .byte 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00
2936 mulxq 32(%rsi),%r8,%rax
2940 mulxq 40(%rsi),%r9,%rbx
2943 mulxq 48(%rsi),%r10,%rax
2946 mulxq 56(%rsi),%r11,%r12
2962 mulxq %r15,%r10,%rbx
2972 mulxq %r15,%r14,%rbx
2987 je .Lsqrx8x_outer_break
3005 movq %rax,16+8(%rsp)
3006 movq %rdi,24+8(%rsp)
3015 mulxq 0(%rbp),%rax,%r8
3019 mulxq 8(%rbp),%rax,%r9
3023 mulxq 16(%rbp),%rax,%r10
3027 mulxq 24(%rbp),%rax,%r11
3031 .byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00
3035 mulxq 40(%rbp),%rax,%r13
3039 mulxq 48(%rbp),%rax,%r14
3040 movq %rbx,(%rdi,%rcx,8)
3045 .byte 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00
3046 movq 8(%rsi,%rcx,8),%rdx
3060 subq 16+8(%rsp),%rbx
3075 movq %rax,16+8(%rsp)
3081 subq 16+8(%rsp),%rbx
3083 movq 24+8(%rsp),%rcx
3094 je .Lsqrx8x_outer_loop
3111 jmp .Lsqrx8x_outer_loop
3114 .Lsqrx8x_outer_break:
3116 .byte 102,72,15,126,217
3122 leaq 48+8(%rsp),%rdi
3123 movq (%rsi,%rcx,1),%rdx
3134 .Lsqrx4x_shift_n_add:
3135 mulxq %rdx,%rax,%rbx
3138 .byte 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00
3139 .byte 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00
3146 mulxq %rdx,%rax,%rbx
3149 movq 16(%rsi,%rcx,1),%rdx
3157 mulxq %rdx,%rax,%rbx
3160 movq 24(%rsi,%rcx,1),%rdx
3169 mulxq %rdx,%rax,%rbx
3172 jrcxz .Lsqrx4x_shift_n_add_break
3173 .byte 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00
3182 jmp .Lsqrx4x_shift_n_add
3185 .Lsqrx4x_shift_n_add_break:
3190 .byte 102,72,15,126,213
3191 __bn_sqrx8x_reduction:
3193 movq 32+8(%rsp),%rbx
3194 movq 48+8(%rsp),%rdx
3195 leaq -64(%rbp,%r9,1),%rcx
3200 leaq 48+8(%rsp),%rdi
3201 jmp .Lsqrx8x_reduction_loop
3204 .Lsqrx8x_reduction_loop:
3214 movq %rax,24+8(%rsp)
3224 mulxq 0(%rbp),%rax,%r8
3228 mulxq 8(%rbp),%rbx,%r9
3232 mulxq 16(%rbp),%rbx,%r10
3236 mulxq 24(%rbp),%rbx,%r11
3240 .byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00
3246 mulxq 32+8(%rsp),%rbx,%rdx
3248 movq %rax,64+48+8(%rsp,%rcx,8)
3250 mulxq 40(%rbp),%rax,%r13
3254 mulxq 48(%rbp),%rax,%r14
3258 mulxq 56(%rbp),%rax,%r15
3264 .byte 0x67,0x67,0x67
3270 jae .Lsqrx8x_no_tail
3272 movq 48+8(%rsp),%rdx
3287 movq %rax,16+8(%rsp)
3293 mulxq 0(%rbp),%rax,%r8
3297 mulxq 8(%rbp),%rax,%r9
3301 mulxq 16(%rbp),%rax,%r10
3305 mulxq 24(%rbp),%rax,%r11
3309 .byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00
3313 mulxq 40(%rbp),%rax,%r13
3317 mulxq 48(%rbp),%rax,%r14
3321 mulxq 56(%rbp),%rax,%r15
3322 movq 72+48+8(%rsp,%rcx,8),%rdx
3325 movq %rbx,(%rdi,%rcx,8)
3333 jae .Lsqrx8x_tail_done
3335 subq 16+8(%rsp),%rsi
3336 movq 48+8(%rsp),%rdx
3351 movq %rax,16+8(%rsp)
3367 subq 16+8(%rsp),%rsi
3370 .byte 102,72,15,126,217
3373 .byte 102,72,15,126,213
3382 movq 32+8(%rsp),%rbx
3383 movq 64(%rdi,%rcx,1),%rdx
3395 leaq 64(%rdi,%rcx,1),%rdi
3397 jb .Lsqrx8x_reduction_loop
3399 .size bn_sqrx8x_internal,.-bn_sqrx8x_internal
3401 __bn_postx4x_internal:
3408 .byte 102,72,15,126,202
3409 .byte 102,72,15,126,206
3415 jmp .Lsqrx4x_sub_entry
3424 andnq %rax,%r12,%r12
3426 andnq %rax,%r13,%r13
3427 andnq %rax,%r14,%r14
3428 andnq %rax,%r15,%r15
3449 .size __bn_postx4x_internal,.-__bn_postx4x_internal
3451 .type bn_get_bits5,@function
3463 movzwl (%r10,%rsi,2),%eax
3467 .size bn_get_bits5,.-bn_get_bits5
3470 .type bn_scatter5,@function
3474 jz .Lscatter_epilogue
3475 leaq (%rdx,%rcx,8),%rdx
3485 .size bn_scatter5,.-bn_scatter5
3488 .type bn_gather5,@function
3491 .LSEH_begin_bn_gather5:
3493 .byte 0x4c,0x8d,0x14,0x24
3494 .byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00
3495 leaq .Linc(%rip),%rax
3499 movdqa 0(%rax),%xmm0
3500 movdqa 16(%rax),%xmm1
3504 pshufd $0,%xmm5,%xmm5
3513 movdqa %xmm0,-128(%rax)
3518 movdqa %xmm1,-112(%rax)
3523 movdqa %xmm2,-96(%rax)
3527 movdqa %xmm3,-80(%rax)
3532 movdqa %xmm0,-64(%rax)
3537 movdqa %xmm1,-48(%rax)
3542 movdqa %xmm2,-32(%rax)
3546 movdqa %xmm3,-16(%rax)
3551 movdqa %xmm0,0(%rax)
3556 movdqa %xmm1,16(%rax)
3561 movdqa %xmm2,32(%rax)
3565 movdqa %xmm3,48(%rax)
3570 movdqa %xmm0,64(%rax)
3575 movdqa %xmm1,80(%rax)
3580 movdqa %xmm2,96(%rax)
3582 movdqa %xmm3,112(%rax)
3589 movdqa -128(%r11),%xmm0
3590 movdqa -112(%r11),%xmm1
3591 movdqa -96(%r11),%xmm2
3592 pand -128(%rax),%xmm0
3593 movdqa -80(%r11),%xmm3
3594 pand -112(%rax),%xmm1
3596 pand -96(%rax),%xmm2
3598 pand -80(%rax),%xmm3
3601 movdqa -64(%r11),%xmm0
3602 movdqa -48(%r11),%xmm1
3603 movdqa -32(%r11),%xmm2
3604 pand -64(%rax),%xmm0
3605 movdqa -16(%r11),%xmm3
3606 pand -48(%rax),%xmm1
3608 pand -32(%rax),%xmm2
3610 pand -16(%rax),%xmm3
3613 movdqa 0(%r11),%xmm0
3614 movdqa 16(%r11),%xmm1
3615 movdqa 32(%r11),%xmm2
3617 movdqa 48(%r11),%xmm3
3625 movdqa 64(%r11),%xmm0
3626 movdqa 80(%r11),%xmm1
3627 movdqa 96(%r11),%xmm2
3629 movdqa 112(%r11),%xmm3
3634 pand 112(%rax),%xmm3
3639 pshufd $0x4e,%xmm4,%xmm0
3648 .LSEH_end_bn_gather5:
3649 .size bn_gather5,.-bn_gather5
3654 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0