2 /* Do not modify. This file is auto-generated from x86_64-mont5.pl. */
7 .globl bn_mul_mont_gather5
8 .type bn_mul_mont_gather5,@function
15 movl OPENSSL_ia32cap_P+8(%rip),%r11d
30 leaq -280(%rsp,%r9,8),%r10
42 leaq (%r10,%r11,1),%rsp
46 jmp .Lmul_page_walk_done
56 movq %rax,8(%rsp,%r9,8)
62 leaq 24-112(%rsp,%r9,8),%r10
74 movdqa %xmm0,112(%r10)
79 movdqa %xmm1,128(%r10)
84 movdqa %xmm2,144(%r10)
89 movdqa %xmm3,160(%r10)
93 movdqa %xmm0,176(%r10)
98 movdqa %xmm1,192(%r10)
103 movdqa %xmm2,208(%r10)
108 movdqa %xmm3,224(%r10)
112 movdqa %xmm0,240(%r10)
117 movdqa %xmm1,256(%r10)
122 movdqa %xmm2,272(%r10)
127 movdqa %xmm3,288(%r10)
131 movdqa %xmm0,304(%r10)
136 movdqa %xmm1,320(%r10)
139 movdqa %xmm2,336(%r10)
144 movdqa %xmm3,352(%r10)
148 movdqa -128(%r12),%xmm4
149 movdqa -112(%r12),%xmm5
150 movdqa -96(%r12),%xmm2
152 movdqa -80(%r12),%xmm3
160 movdqa -64(%r12),%xmm4
161 movdqa -48(%r12),%xmm5
162 movdqa -32(%r12),%xmm2
164 movdqa -16(%r12),%xmm3
173 movdqa 16(%r12),%xmm5
174 movdqa 32(%r12),%xmm2
176 movdqa 48(%r12),%xmm3
185 pshufd $0x4e,%xmm0,%xmm1
188 .byte 102,72,15,126,195
216 movq (%rsi,%r15,8),%rax
221 movq %r13,-16(%rsp,%r15,8)
227 movq (%rcx,%r15,8),%rax
241 movq %r13,-16(%rsp,%r9,8)
248 movq %r13,-8(%rsp,%r9,8)
249 movq %rdx,(%rsp,%r9,8)
255 leaq 24+128(%rsp,%r9,8),%rdx
259 movdqa -128(%r12),%xmm0
260 movdqa -112(%r12),%xmm1
261 movdqa -96(%r12),%xmm2
262 movdqa -80(%r12),%xmm3
263 pand -128(%rdx),%xmm0
264 pand -112(%rdx),%xmm1
271 movdqa -64(%r12),%xmm0
272 movdqa -48(%r12),%xmm1
273 movdqa -32(%r12),%xmm2
274 movdqa -16(%r12),%xmm3
284 movdqa 16(%r12),%xmm1
285 movdqa 32(%r12),%xmm2
286 movdqa 48(%r12),%xmm3
295 movdqa 64(%r12),%xmm0
296 movdqa 80(%r12),%xmm1
297 movdqa 96(%r12),%xmm2
298 movdqa 112(%r12),%xmm3
308 pshufd $0x4e,%xmm4,%xmm0
313 .byte 102,72,15,126,195
340 movq (%rsi,%r15,8),%rax
343 movq (%rsp,%r15,8),%r10
345 movq %r13,-16(%rsp,%r15,8)
351 movq (%rcx,%r15,8),%rax
365 movq (%rsp,%r9,8),%r10
367 movq %r13,-16(%rsp,%r9,8)
375 movq %r13,-8(%rsp,%r9,8)
376 movq %rdx,(%rsp,%r9,8)
388 .Lsub: sbbq (%rcx,%r14,8),%rax
389 movq %rax,(%rdi,%r14,8)
390 movq 8(%rsi,%r14,8),%rax
405 movq (%rsi,%r14,8),%rax
406 movq %r14,(%rsp,%r14,8)
407 movq %rax,(%rdi,%r14,8)
412 movq 8(%rsp,%r9,8),%rsi
424 .size bn_mul_mont_gather5,.-bn_mul_mont_gather5
425 .type bn_mul4x_mont_gather5,@function
427 bn_mul4x_mont_gather5:
444 leaq (%r9,%r9,2),%r10
456 leaq -320(%rsp,%r9,2),%r11
463 leaq -320(%rbp,%r9,2),%rbp
468 leaq 4096-320(,%r9,2),%r10
469 leaq -320(%rbp,%r9,2),%rbp
479 leaq (%r11,%rbp,1),%rsp
483 jmp .Lmul4x_page_walk_done
486 leaq -4096(%rsp),%rsp
490 .Lmul4x_page_walk_done:
511 .size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5
513 .type mul4x_internal,@function
518 leaq .Linc(%rip),%rax
519 leaq 128(%rdx,%r9,1),%r13
522 movdqa 16(%rax),%xmm1
523 leaq 88-112(%rsp,%r9,1),%r10
526 pshufd $0,%xmm5,%xmm5
536 movdqa %xmm0,112(%r10)
541 movdqa %xmm1,128(%r10)
546 movdqa %xmm2,144(%r10)
551 movdqa %xmm3,160(%r10)
555 movdqa %xmm0,176(%r10)
560 movdqa %xmm1,192(%r10)
565 movdqa %xmm2,208(%r10)
570 movdqa %xmm3,224(%r10)
574 movdqa %xmm0,240(%r10)
579 movdqa %xmm1,256(%r10)
584 movdqa %xmm2,272(%r10)
589 movdqa %xmm3,288(%r10)
593 movdqa %xmm0,304(%r10)
598 movdqa %xmm1,320(%r10)
601 movdqa %xmm2,336(%r10)
606 movdqa %xmm3,352(%r10)
610 movdqa -128(%r12),%xmm4
611 movdqa -112(%r12),%xmm5
612 movdqa -96(%r12),%xmm2
614 movdqa -80(%r12),%xmm3
622 movdqa -64(%r12),%xmm4
623 movdqa -48(%r12),%xmm5
624 movdqa -32(%r12),%xmm2
626 movdqa -16(%r12),%xmm3
635 movdqa 16(%r12),%xmm5
636 movdqa 32(%r12),%xmm2
638 movdqa 48(%r12),%xmm3
647 pshufd $0x4e,%xmm0,%xmm1
650 .byte 102,72,15,126,195
657 leaq (%rsi,%r9,1),%rsi
671 movq 8(%rsi,%r9,1),%rax
683 movq 16(%rsi,%r9,1),%rax
704 movq -8(%rsi,%r15,1),%rax
719 movq (%rsi,%r15,1),%rax
734 movq 8(%rsi,%r15,1),%rax
749 movq 16(%rsi,%r15,1),%rax
784 movq (%rsi,%r9,1),%rax
791 leaq (%rcx,%r9,1),%rcx
802 leaq 16+128(%r14),%rdx
805 movdqa -128(%r12),%xmm0
806 movdqa -112(%r12),%xmm1
807 movdqa -96(%r12),%xmm2
808 movdqa -80(%r12),%xmm3
809 pand -128(%rdx),%xmm0
810 pand -112(%rdx),%xmm1
817 movdqa -64(%r12),%xmm0
818 movdqa -48(%r12),%xmm1
819 movdqa -32(%r12),%xmm2
820 movdqa -16(%r12),%xmm3
830 movdqa 16(%r12),%xmm1
831 movdqa 32(%r12),%xmm2
832 movdqa 48(%r12),%xmm3
841 movdqa 64(%r12),%xmm0
842 movdqa 80(%r12),%xmm1
843 movdqa 96(%r12),%xmm2
844 movdqa 112(%r12),%xmm3
854 pshufd $0x4e,%xmm4,%xmm0
857 .byte 102,72,15,126,195
859 movq (%r14,%r9,1),%r10
870 leaq (%r14,%r9,1),%r14
874 movq 8(%rsi,%r9,1),%rax
888 movq 16(%rsi,%r9,1),%rax
910 movq -8(%rsi,%r15,1),%rax
927 movq (%rsi,%r15,1),%rax
944 movq 8(%rsi,%r15,1),%rax
961 movq 16(%rsi,%r15,1),%rax
1001 movq (%rsi,%r9,1),%rax
1009 leaq (%rcx,%r9,1),%rcx
1018 cmpq 16+8(%rsp),%r12
1025 leaq (%r14,%r9,1),%rbx
1030 movq 56+8(%rsp),%rdi
1036 jmp .Lsqr4x_sub_entry
1037 .size mul4x_internal,.-mul4x_internal
1039 .type bn_power5,@function
1043 movl OPENSSL_ia32cap_P+8(%rip),%r11d
1056 leal (%r9,%r9,2),%r10d
1067 leaq -320(%rsp,%r9,2),%r11
1074 leaq -320(%rbp,%r9,2),%rbp
1079 leaq 4096-320(,%r9,2),%r10
1080 leaq -320(%rbp,%r9,2),%rbp
1090 leaq (%r11,%rbp,1),%rsp
1094 jmp .Lpwr_page_walk_done
1097 leaq -4096(%rsp),%rsp
1101 .Lpwr_page_walk_done:
1118 .byte 102,72,15,110,207
1119 .byte 102,72,15,110,209
1120 .byte 102,73,15,110,218
1121 .byte 102,72,15,110,226
1123 call __bn_sqr8x_internal
1124 call __bn_post4x_internal
1125 call __bn_sqr8x_internal
1126 call __bn_post4x_internal
1127 call __bn_sqr8x_internal
1128 call __bn_post4x_internal
1129 call __bn_sqr8x_internal
1130 call __bn_post4x_internal
1131 call __bn_sqr8x_internal
1132 call __bn_post4x_internal
1134 .byte 102,72,15,126,209
1135 .byte 102,72,15,126,226
1153 .size bn_power5,.-bn_power5
1155 .globl bn_sqr8x_internal
1156 .hidden bn_sqr8x_internal
1157 .type bn_sqr8x_internal,@function
1160 __bn_sqr8x_internal:
1235 leaq (%rsi,%r9,1),%rsi
1240 movq -32(%rsi,%rbp,1),%r14
1241 leaq 48+8(%rsp,%r9,2),%rdi
1242 movq -24(%rsi,%rbp,1),%rax
1243 leaq -32(%rdi,%rbp,1),%rdi
1244 movq -16(%rsi,%rbp,1),%rbx
1251 movq %r10,-24(%rdi,%rbp,1)
1257 movq %r11,-16(%rdi,%rbp,1)
1261 movq -8(%rsi,%rbp,1),%rbx
1275 movq %r10,-8(%rdi,%rcx,1)
1280 movq (%rsi,%rcx,1),%rbx
1290 movq 8(%rsi,%rcx,1),%rbx
1300 movq %r11,(%rdi,%rcx,1)
1307 movq 16(%rsi,%rcx,1),%rbx
1316 movq %r10,8(%rdi,%rcx,1)
1323 movq 24(%rsi,%rcx,1),%rbx
1333 movq %r11,16(%rdi,%rcx,1)
1345 movq %r10,-8(%rdi,%rcx,1)
1364 movq -32(%rsi,%rbp,1),%r14
1365 leaq 48+8(%rsp,%r9,2),%rdi
1366 movq -24(%rsi,%rbp,1),%rax
1367 leaq -32(%rdi,%rbp,1),%rdi
1368 movq -16(%rsi,%rbp,1),%rbx
1372 movq -24(%rdi,%rbp,1),%r10
1376 movq %r10,-24(%rdi,%rbp,1)
1383 addq -16(%rdi,%rbp,1),%r11
1386 movq %r11,-16(%rdi,%rbp,1)
1390 movq -8(%rsi,%rbp,1),%rbx
1395 addq -8(%rdi,%rbp,1),%r12
1406 movq %r10,-8(%rdi,%rbp,1)
1413 movq (%rsi,%rcx,1),%rbx
1419 addq (%rdi,%rcx,1),%r13
1426 movq 8(%rsi,%rcx,1),%rbx
1434 movq %r11,(%rdi,%rcx,1)
1438 addq 8(%rdi,%rcx,1),%r12
1449 movq %r10,-8(%rdi,%rcx,1)
1470 leaq 48+8(%rsp,%r9,2),%rdi
1472 leaq -32(%rdi,%rbp,1),%rdi
1531 movq -16(%rsi,%rbp,1),%rax
1532 leaq 48+8(%rsp),%rdi
1536 leaq (%r14,%r10,2),%r12
1538 leaq (%rcx,%r11,2),%r13
1547 movq -8(%rsi,%rbp,1),%rax
1551 leaq (%r14,%r10,2),%rbx
1555 leaq (%rcx,%r11,2),%r8
1564 movq 0(%rsi,%rbp,1),%rax
1571 jmp .Lsqr4x_shift_n_add
1574 .Lsqr4x_shift_n_add:
1575 leaq (%r14,%r10,2),%r12
1577 leaq (%rcx,%r11,2),%r13
1586 movq -8(%rsi,%rbp,1),%rax
1590 leaq (%r14,%r10,2),%rbx
1594 leaq (%rcx,%r11,2),%r8
1603 movq 0(%rsi,%rbp,1),%rax
1607 leaq (%r14,%r10,2),%r12
1611 leaq (%rcx,%r11,2),%r13
1620 movq 8(%rsi,%rbp,1),%rax
1624 leaq (%r14,%r10,2),%rbx
1628 leaq (%rcx,%r11,2),%r8
1637 movq 16(%rsi,%rbp,1),%rax
1644 jnz .Lsqr4x_shift_n_add
1646 leaq (%r14,%r10,2),%r12
1649 leaq (%rcx,%r11,2),%r13
1662 leaq (%r14,%r10,2),%rbx
1666 leaq (%rcx,%r11,2),%r8
1675 .byte 102,72,15,126,213
1676 __bn_sqr8x_reduction:
1678 leaq (%r9,%rbp,1),%rcx
1679 leaq 48+8(%rsp,%r9,2),%rdx
1681 leaq 48+8(%rsp,%r9,1),%rdi
1684 jmp .L8x_reduction_loop
1687 .L8x_reduction_loop:
1688 leaq (%rdi,%r9,1),%rdi
1703 imulq 32+8(%rsp),%rbx
1721 movq %rbx,48-8+8(%rsp,%rcx,8)
1730 movq 32+8(%rsp),%rsi
1796 movq 48+56+8(%rsp),%rbx
1860 movq 48-16+8(%rsp,%rcx,8),%rbx
1876 movq 48+56+8(%rsp),%rbx
1920 .byte 102,72,15,126,213
1924 .byte 102,73,15,126,217
1934 jb .L8x_reduction_loop
1936 .size bn_sqr8x_internal,.-bn_sqr8x_internal
1937 .type __bn_post4x_internal,@function
1939 __bn_post4x_internal:
1941 leaq (%rdi,%r9,1),%rbx
1943 .byte 102,72,15,126,207
1945 .byte 102,72,15,126,206
1952 jmp .Lsqr4x_sub_entry
1990 .size __bn_post4x_internal,.-__bn_post4x_internal
1991 .globl bn_from_montgomery
1992 .type bn_from_montgomery,@function
1999 .size bn_from_montgomery,.-bn_from_montgomery
2001 .type bn_from_mont8x,@function
2015 leaq (%r9,%r9,2),%r10
2026 leaq -320(%rsp,%r9,2),%r11
2033 leaq -320(%rbp,%r9,2),%rbp
2038 leaq 4096-320(,%r9,2),%r10
2039 leaq -320(%rbp,%r9,2),%rbp
2049 leaq (%r11,%rbp,1),%rsp
2053 jmp .Lfrom_page_walk_done
2056 leaq -4096(%rsp),%rsp
2060 .Lfrom_page_walk_done:
2085 movdqu 16(%rsi),%xmm2
2086 movdqu 32(%rsi),%xmm3
2087 movdqa %xmm0,(%rax,%r9,1)
2088 movdqu 48(%rsi),%xmm4
2089 movdqa %xmm0,16(%rax,%r9,1)
2090 .byte 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00
2092 movdqa %xmm0,32(%rax,%r9,1)
2093 movdqa %xmm2,16(%rax)
2094 movdqa %xmm0,48(%rax,%r9,1)
2095 movdqa %xmm3,32(%rax)
2096 movdqa %xmm4,48(%rax)
2101 .byte 102,72,15,110,207
2102 .byte 102,72,15,110,209
2105 .byte 102,73,15,110,218
2106 movl OPENSSL_ia32cap_P+8(%rip),%r11d
2111 leaq (%rax,%r9,1),%rdi
2112 call __bn_sqrx8x_reduction
2113 call __bn_postx4x_internal
2118 jmp .Lfrom_mont_zero
2122 call __bn_sqr8x_reduction
2123 call __bn_post4x_internal
2128 jmp .Lfrom_mont_zero
2132 movdqa %xmm0,0(%rax)
2133 movdqa %xmm0,16(%rax)
2134 movdqa %xmm0,32(%rax)
2135 movdqa %xmm0,48(%rax)
2138 jnz .Lfrom_mont_zero
2150 .size bn_from_mont8x,.-bn_from_mont8x
2151 .type bn_mulx4x_mont_gather5,@function
2153 bn_mulx4x_mont_gather5:
2165 leaq (%r9,%r9,2),%r10
2178 leaq -320(%rsp,%r9,2),%r11
2185 leaq -320(%rbp,%r9,2),%rbp
2189 leaq 4096-320(,%r9,2),%r10
2190 leaq -320(%rbp,%r9,2),%rbp
2200 leaq (%r11,%rbp,1),%rsp
2203 ja .Lmulx4x_page_walk
2204 jmp .Lmulx4x_page_walk_done
2207 leaq -4096(%rsp),%rsp
2210 ja .Lmulx4x_page_walk
2211 .Lmulx4x_page_walk_done:
2228 call mulx4x_internal
2242 .size bn_mulx4x_mont_gather5,.-bn_mulx4x_mont_gather5
2244 .type mulx4x_internal,@function
2252 leaq 128(%rdx,%r9,1),%r13
2256 leaq .Linc(%rip),%rax
2257 movq %r13,16+8(%rsp)
2259 movq %rdi,56+8(%rsp)
2260 movdqa 0(%rax),%xmm0
2261 movdqa 16(%rax),%xmm1
2262 leaq 88-112(%rsp,%r10,1),%r10
2265 pshufd $0,%xmm5,%xmm5
2275 movdqa %xmm0,112(%r10)
2280 movdqa %xmm1,128(%r10)
2285 movdqa %xmm2,144(%r10)
2290 movdqa %xmm3,160(%r10)
2294 movdqa %xmm0,176(%r10)
2299 movdqa %xmm1,192(%r10)
2304 movdqa %xmm2,208(%r10)
2309 movdqa %xmm3,224(%r10)
2313 movdqa %xmm0,240(%r10)
2318 movdqa %xmm1,256(%r10)
2323 movdqa %xmm2,272(%r10)
2328 movdqa %xmm3,288(%r10)
2333 movdqa %xmm0,304(%r10)
2337 movdqa %xmm1,320(%r10)
2340 movdqa %xmm2,336(%r10)
2345 movdqa %xmm3,352(%r10)
2346 pand 112(%rdi),%xmm3
2349 movdqa -128(%rdi),%xmm4
2350 movdqa -112(%rdi),%xmm5
2351 movdqa -96(%rdi),%xmm2
2352 pand 112(%r10),%xmm4
2353 movdqa -80(%rdi),%xmm3
2354 pand 128(%r10),%xmm5
2356 pand 144(%r10),%xmm2
2358 pand 160(%r10),%xmm3
2361 movdqa -64(%rdi),%xmm4
2362 movdqa -48(%rdi),%xmm5
2363 movdqa -32(%rdi),%xmm2
2364 pand 176(%r10),%xmm4
2365 movdqa -16(%rdi),%xmm3
2366 pand 192(%r10),%xmm5
2368 pand 208(%r10),%xmm2
2370 pand 224(%r10),%xmm3
2373 movdqa 0(%rdi),%xmm4
2374 movdqa 16(%rdi),%xmm5
2375 movdqa 32(%rdi),%xmm2
2376 pand 240(%r10),%xmm4
2377 movdqa 48(%rdi),%xmm3
2378 pand 256(%r10),%xmm5
2380 pand 272(%r10),%xmm2
2382 pand 288(%r10),%xmm3
2386 pshufd $0x4e,%xmm0,%xmm1
2389 .byte 102,72,15,126,194
2390 leaq 64+32+8(%rsp),%rbx
2393 mulxq 0(%rsi),%r8,%rax
2394 mulxq 8(%rsi),%r11,%r12
2396 mulxq 16(%rsi),%rax,%r13
2399 mulxq 24(%rsi),%rax,%r14
2402 imulq 32+8(%rsp),%r8
2412 mulxq 0(%rcx),%rax,%r10
2415 mulxq 8(%rcx),%rax,%r11
2418 mulxq 16(%rcx),%rax,%r12
2419 movq 24+8(%rsp),%rdi
2423 mulxq 24(%rcx),%rax,%r15
2435 mulxq 0(%rsi),%r10,%rax
2437 mulxq 8(%rsi),%r11,%r14
2439 mulxq 16(%rsi),%r12,%rax
2441 mulxq 24(%rsi),%r13,%r14
2450 mulxq 0(%rcx),%rax,%r15
2453 mulxq 8(%rcx),%rax,%r15
2456 mulxq 16(%rcx),%rax,%r15
2461 mulxq 24(%rcx),%rax,%r15
2474 leaq (%rsi,%rax,1),%rsi
2483 leaq 16-256(%rbx),%r10
2487 movdqa -128(%rdi),%xmm0
2488 movdqa -112(%rdi),%xmm1
2489 movdqa -96(%rdi),%xmm2
2490 pand 256(%r10),%xmm0
2491 movdqa -80(%rdi),%xmm3
2492 pand 272(%r10),%xmm1
2494 pand 288(%r10),%xmm2
2496 pand 304(%r10),%xmm3
2499 movdqa -64(%rdi),%xmm0
2500 movdqa -48(%rdi),%xmm1
2501 movdqa -32(%rdi),%xmm2
2502 pand 320(%r10),%xmm0
2503 movdqa -16(%rdi),%xmm3
2504 pand 336(%r10),%xmm1
2506 pand 352(%r10),%xmm2
2508 pand 368(%r10),%xmm3
2511 movdqa 0(%rdi),%xmm0
2512 movdqa 16(%rdi),%xmm1
2513 movdqa 32(%rdi),%xmm2
2514 pand 384(%r10),%xmm0
2515 movdqa 48(%rdi),%xmm3
2516 pand 400(%r10),%xmm1
2518 pand 416(%r10),%xmm2
2520 pand 432(%r10),%xmm3
2523 movdqa 64(%rdi),%xmm0
2524 movdqa 80(%rdi),%xmm1
2525 movdqa 96(%rdi),%xmm2
2526 pand 448(%r10),%xmm0
2527 movdqa 112(%rdi),%xmm3
2528 pand 464(%r10),%xmm1
2530 pand 480(%r10),%xmm2
2532 pand 496(%r10),%xmm3
2536 pshufd $0x4e,%xmm4,%xmm0
2539 .byte 102,72,15,126,194
2542 leaq 32(%rbx,%rax,1),%rbx
2543 mulxq 0(%rsi),%r8,%r11
2546 mulxq 8(%rsi),%r14,%r12
2549 mulxq 16(%rsi),%r15,%r13
2550 adoxq -24(%rbx),%r11
2552 mulxq 24(%rsi),%rdx,%r14
2553 adoxq -16(%rbx),%r12
2555 leaq (%rcx,%rax,1),%rcx
2562 imulq 32+8(%rsp),%r8
2568 mulxq 0(%rcx),%rax,%r10
2571 mulxq 8(%rcx),%rax,%r11
2574 mulxq 16(%rcx),%rax,%r12
2577 mulxq 24(%rcx),%rax,%r15
2579 movq 24+8(%rsp),%rdi
2590 mulxq 0(%rsi),%r10,%rax
2593 mulxq 8(%rsi),%r11,%r14
2596 mulxq 16(%rsi),%r12,%rax
2599 mulxq 24(%rsi),%r13,%r14
2610 mulxq 0(%rcx),%rax,%r15
2613 mulxq 8(%rcx),%rax,%r15
2616 mulxq 16(%rcx),%rax,%r15
2621 mulxq 24(%rcx),%rax,%r15
2636 movq 16+8(%rsp),%r10
2638 leaq (%rsi,%rax,1),%rsi
2647 movq (%rcx,%rax,1),%r12
2648 leaq (%rcx,%rax,1),%rbp
2650 leaq (%rbx,%rax,1),%rdi
2658 movq 56+8(%rsp),%rdx
2664 jmp .Lsqrx4x_sub_entry
2665 .size mulx4x_internal,.-mulx4x_internal
2666 .type bn_powerx5,@function
2680 leaq (%r9,%r9,2),%r10
2691 leaq -320(%rsp,%r9,2),%r11
2698 leaq -320(%rbp,%r9,2),%rbp
2703 leaq 4096-320(,%r9,2),%r10
2704 leaq -320(%rbp,%r9,2),%rbp
2714 leaq (%r11,%rbp,1),%rsp
2718 jmp .Lpwrx_page_walk_done
2721 leaq -4096(%rsp),%rsp
2725 .Lpwrx_page_walk_done:
2742 .byte 102,72,15,110,207
2743 .byte 102,72,15,110,209
2744 .byte 102,73,15,110,218
2745 .byte 102,72,15,110,226
2750 call __bn_sqrx8x_internal
2751 call __bn_postx4x_internal
2752 call __bn_sqrx8x_internal
2753 call __bn_postx4x_internal
2754 call __bn_sqrx8x_internal
2755 call __bn_postx4x_internal
2756 call __bn_sqrx8x_internal
2757 call __bn_postx4x_internal
2758 call __bn_sqrx8x_internal
2759 call __bn_postx4x_internal
2763 .byte 102,72,15,126,209
2764 .byte 102,72,15,126,226
2767 call mulx4x_internal
2781 .size bn_powerx5,.-bn_powerx5
2783 .globl bn_sqrx8x_internal
2784 .hidden bn_sqrx8x_internal
2785 .type bn_sqrx8x_internal,@function
2788 __bn_sqrx8x_internal:
2829 leaq 48+8(%rsp),%rdi
2830 leaq (%rsi,%r9,1),%rbp
2833 jmp .Lsqr8x_zero_start
2836 .byte 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00
2839 movdqa %xmm0,0(%rdi)
2840 movdqa %xmm0,16(%rdi)
2841 movdqa %xmm0,32(%rdi)
2842 movdqa %xmm0,48(%rdi)
2844 movdqa %xmm0,64(%rdi)
2845 movdqa %xmm0,80(%rdi)
2846 movdqa %xmm0,96(%rdi)
2847 movdqa %xmm0,112(%rdi)
2860 leaq 48+8(%rsp),%rdi
2862 jmp .Lsqrx8x_outer_loop
2865 .Lsqrx8x_outer_loop:
2866 mulxq 8(%rsi),%r8,%rax
2869 mulxq 16(%rsi),%r9,%rax
2872 .byte 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00
2875 .byte 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00
2878 mulxq 40(%rsi),%r12,%rax
2881 mulxq 48(%rsi),%r13,%rax
2884 mulxq 56(%rsi),%r14,%r15
2895 mulxq 16(%rsi),%r8,%rbx
2896 mulxq 24(%rsi),%r9,%rax
2899 mulxq 32(%rsi),%r10,%rbx
2902 .byte 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00
2905 .byte 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00
2908 .byte 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00
2919 mulxq 24(%rsi),%r8,%rbx
2920 mulxq 32(%rsi),%r9,%rax
2923 mulxq 40(%rsi),%r10,%rbx
2926 .byte 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00
2929 .byte 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00
2937 mulxq 32(%rsi),%r8,%rax
2941 mulxq 40(%rsi),%r9,%rbx
2944 mulxq 48(%rsi),%r10,%rax
2947 mulxq 56(%rsi),%r11,%r12
2963 mulxq %r15,%r10,%rbx
2973 mulxq %r15,%r14,%rbx
2988 je .Lsqrx8x_outer_break
3006 movq %rax,16+8(%rsp)
3007 movq %rdi,24+8(%rsp)
3016 mulxq 0(%rbp),%rax,%r8
3020 mulxq 8(%rbp),%rax,%r9
3024 mulxq 16(%rbp),%rax,%r10
3028 mulxq 24(%rbp),%rax,%r11
3032 .byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00
3036 mulxq 40(%rbp),%rax,%r13
3040 mulxq 48(%rbp),%rax,%r14
3041 movq %rbx,(%rdi,%rcx,8)
3046 .byte 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00
3047 movq 8(%rsi,%rcx,8),%rdx
3061 subq 16+8(%rsp),%rbx
3076 movq %rax,16+8(%rsp)
3082 movq 24+8(%rsp),%rcx
3087 je .Lsqrx8x_outer_loop
3104 jmp .Lsqrx8x_outer_loop
3107 .Lsqrx8x_outer_break:
3109 .byte 102,72,15,126,217
3115 leaq 48+8(%rsp),%rdi
3116 movq (%rsi,%rcx,1),%rdx
3127 .Lsqrx4x_shift_n_add:
3128 mulxq %rdx,%rax,%rbx
3131 .byte 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00
3132 .byte 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00
3139 mulxq %rdx,%rax,%rbx
3142 movq 16(%rsi,%rcx,1),%rdx
3150 mulxq %rdx,%rax,%rbx
3153 movq 24(%rsi,%rcx,1),%rdx
3162 mulxq %rdx,%rax,%rbx
3165 jrcxz .Lsqrx4x_shift_n_add_break
3166 .byte 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00
3175 jmp .Lsqrx4x_shift_n_add
3178 .Lsqrx4x_shift_n_add_break:
3183 .byte 102,72,15,126,213
3184 __bn_sqrx8x_reduction:
3186 movq 32+8(%rsp),%rbx
3187 movq 48+8(%rsp),%rdx
3188 leaq -64(%rbp,%r9,1),%rcx
3193 leaq 48+8(%rsp),%rdi
3194 jmp .Lsqrx8x_reduction_loop
3197 .Lsqrx8x_reduction_loop:
3207 movq %rax,24+8(%rsp)
3217 mulxq 0(%rbp),%rax,%r8
3221 mulxq 8(%rbp),%rbx,%r9
3225 mulxq 16(%rbp),%rbx,%r10
3229 mulxq 24(%rbp),%rbx,%r11
3233 .byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00
3239 mulxq 32+8(%rsp),%rbx,%rdx
3241 movq %rax,64+48+8(%rsp,%rcx,8)
3243 mulxq 40(%rbp),%rax,%r13
3247 mulxq 48(%rbp),%rax,%r14
3251 mulxq 56(%rbp),%rax,%r15
3257 .byte 0x67,0x67,0x67
3263 jae .Lsqrx8x_no_tail
3265 movq 48+8(%rsp),%rdx
3280 movq %rax,16+8(%rsp)
3286 mulxq 0(%rbp),%rax,%r8
3290 mulxq 8(%rbp),%rax,%r9
3294 mulxq 16(%rbp),%rax,%r10
3298 mulxq 24(%rbp),%rax,%r11
3302 .byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00
3306 mulxq 40(%rbp),%rax,%r13
3310 mulxq 48(%rbp),%rax,%r14
3314 mulxq 56(%rbp),%rax,%r15
3315 movq 72+48+8(%rsp,%rcx,8),%rdx
3318 movq %rbx,(%rdi,%rcx,8)
3326 jae .Lsqrx8x_tail_done
3328 subq 16+8(%rsp),%rsi
3329 movq 48+8(%rsp),%rdx
3344 movq %rax,16+8(%rsp)
3361 subq 16+8(%rsp),%rsi
3364 .byte 102,72,15,126,217
3367 .byte 102,72,15,126,213
3376 movq 32+8(%rsp),%rbx
3377 movq 64(%rdi,%rcx,1),%rdx
3389 leaq 64(%rdi,%rcx,1),%rdi
3391 jb .Lsqrx8x_reduction_loop
3393 .size bn_sqrx8x_internal,.-bn_sqrx8x_internal
3395 __bn_postx4x_internal:
3402 .byte 102,72,15,126,202
3403 .byte 102,72,15,126,206
3409 jmp .Lsqrx4x_sub_entry
3418 andnq %rax,%r12,%r12
3420 andnq %rax,%r13,%r13
3421 andnq %rax,%r14,%r14
3422 andnq %rax,%r15,%r15
3443 .size __bn_postx4x_internal,.-__bn_postx4x_internal
3445 .type bn_get_bits5,@function
3457 movzwl (%r10,%rsi,2),%eax
3461 .size bn_get_bits5,.-bn_get_bits5
3464 .type bn_scatter5,@function
3468 jz .Lscatter_epilogue
3469 leaq (%rdx,%rcx,8),%rdx
3479 .size bn_scatter5,.-bn_scatter5
3482 .type bn_gather5,@function
3485 .LSEH_begin_bn_gather5:
3487 .byte 0x4c,0x8d,0x14,0x24
3488 .byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00
3489 leaq .Linc(%rip),%rax
3493 movdqa 0(%rax),%xmm0
3494 movdqa 16(%rax),%xmm1
3498 pshufd $0,%xmm5,%xmm5
3507 movdqa %xmm0,-128(%rax)
3512 movdqa %xmm1,-112(%rax)
3517 movdqa %xmm2,-96(%rax)
3521 movdqa %xmm3,-80(%rax)
3526 movdqa %xmm0,-64(%rax)
3531 movdqa %xmm1,-48(%rax)
3536 movdqa %xmm2,-32(%rax)
3540 movdqa %xmm3,-16(%rax)
3545 movdqa %xmm0,0(%rax)
3550 movdqa %xmm1,16(%rax)
3555 movdqa %xmm2,32(%rax)
3559 movdqa %xmm3,48(%rax)
3564 movdqa %xmm0,64(%rax)
3569 movdqa %xmm1,80(%rax)
3574 movdqa %xmm2,96(%rax)
3576 movdqa %xmm3,112(%rax)
3583 movdqa -128(%r11),%xmm0
3584 movdqa -112(%r11),%xmm1
3585 movdqa -96(%r11),%xmm2
3586 pand -128(%rax),%xmm0
3587 movdqa -80(%r11),%xmm3
3588 pand -112(%rax),%xmm1
3590 pand -96(%rax),%xmm2
3592 pand -80(%rax),%xmm3
3595 movdqa -64(%r11),%xmm0
3596 movdqa -48(%r11),%xmm1
3597 movdqa -32(%r11),%xmm2
3598 pand -64(%rax),%xmm0
3599 movdqa -16(%r11),%xmm3
3600 pand -48(%rax),%xmm1
3602 pand -32(%rax),%xmm2
3604 pand -16(%rax),%xmm3
3607 movdqa 0(%r11),%xmm0
3608 movdqa 16(%r11),%xmm1
3609 movdqa 32(%r11),%xmm2
3611 movdqa 48(%r11),%xmm3
3619 movdqa 64(%r11),%xmm0
3620 movdqa 80(%r11),%xmm1
3621 movdqa 96(%r11),%xmm2
3623 movdqa 112(%r11),%xmm3
3628 pand 112(%rax),%xmm3
3633 pshufd $0x4e,%xmm4,%xmm0
3642 .LSEH_end_bn_gather5:
3643 .size bn_gather5,.-bn_gather5
3648 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0