2 /* Do not modify. This file is auto-generated from x86_64-mont5.pl. */
7 .globl bn_mul_mont_gather5
8 .type bn_mul_mont_gather5,@function
15 movl OPENSSL_ia32cap_P+8(%rip),%r11d
30 leaq -280(%rsp,%r9,8),%r10
42 leaq (%r10,%r11,1),%rsp
46 jmp .Lmul_page_walk_done
56 movq %rax,8(%rsp,%r9,8)
62 leaq 24-112(%rsp,%r9,8),%r10
74 movdqa %xmm0,112(%r10)
79 movdqa %xmm1,128(%r10)
84 movdqa %xmm2,144(%r10)
89 movdqa %xmm3,160(%r10)
93 movdqa %xmm0,176(%r10)
98 movdqa %xmm1,192(%r10)
103 movdqa %xmm2,208(%r10)
108 movdqa %xmm3,224(%r10)
112 movdqa %xmm0,240(%r10)
117 movdqa %xmm1,256(%r10)
122 movdqa %xmm2,272(%r10)
127 movdqa %xmm3,288(%r10)
131 movdqa %xmm0,304(%r10)
136 movdqa %xmm1,320(%r10)
139 movdqa %xmm2,336(%r10)
144 movdqa %xmm3,352(%r10)
148 movdqa -128(%r12),%xmm4
149 movdqa -112(%r12),%xmm5
150 movdqa -96(%r12),%xmm2
152 movdqa -80(%r12),%xmm3
160 movdqa -64(%r12),%xmm4
161 movdqa -48(%r12),%xmm5
162 movdqa -32(%r12),%xmm2
164 movdqa -16(%r12),%xmm3
173 movdqa 16(%r12),%xmm5
174 movdqa 32(%r12),%xmm2
176 movdqa 48(%r12),%xmm3
185 pshufd $0x4e,%xmm0,%xmm1
188 .byte 102,72,15,126,195
216 movq (%rsi,%r15,8),%rax
221 movq %r13,-16(%rsp,%r15,8)
227 movq (%rcx,%r15,8),%rax
241 movq %r13,-16(%rsp,%r9,8)
248 movq %r13,-8(%rsp,%r9,8)
249 movq %rdx,(%rsp,%r9,8)
255 leaq 24+128(%rsp,%r9,8),%rdx
259 movdqa -128(%r12),%xmm0
260 movdqa -112(%r12),%xmm1
261 movdqa -96(%r12),%xmm2
262 movdqa -80(%r12),%xmm3
263 pand -128(%rdx),%xmm0
264 pand -112(%rdx),%xmm1
271 movdqa -64(%r12),%xmm0
272 movdqa -48(%r12),%xmm1
273 movdqa -32(%r12),%xmm2
274 movdqa -16(%r12),%xmm3
284 movdqa 16(%r12),%xmm1
285 movdqa 32(%r12),%xmm2
286 movdqa 48(%r12),%xmm3
295 movdqa 64(%r12),%xmm0
296 movdqa 80(%r12),%xmm1
297 movdqa 96(%r12),%xmm2
298 movdqa 112(%r12),%xmm3
308 pshufd $0x4e,%xmm4,%xmm0
313 .byte 102,72,15,126,195
340 movq (%rsi,%r15,8),%rax
343 movq (%rsp,%r15,8),%r10
345 movq %r13,-16(%rsp,%r15,8)
351 movq (%rcx,%r15,8),%rax
365 movq (%rsp,%r9,8),%r10
367 movq %r13,-16(%rsp,%r9,8)
375 movq %r13,-8(%rsp,%r9,8)
376 movq %rdx,(%rsp,%r9,8)
388 .Lsub: sbbq (%rcx,%r14,8),%rax
389 movq %rax,(%rdi,%r14,8)
390 movq 8(%rsi,%r14,8),%rax
402 movq (%rdi,%r14,8),%rcx
403 movq (%rsp,%r14,8),%rdx
406 movq %r14,(%rsp,%r14,8)
408 movq %rdx,(%rdi,%r14,8)
413 movq 8(%rsp,%r9,8),%rsi
425 .size bn_mul_mont_gather5,.-bn_mul_mont_gather5
426 .type bn_mul4x_mont_gather5,@function
428 bn_mul4x_mont_gather5:
445 leaq (%r9,%r9,2),%r10
457 leaq -320(%rsp,%r9,2),%r11
464 leaq -320(%rbp,%r9,2),%rbp
469 leaq 4096-320(,%r9,2),%r10
470 leaq -320(%rbp,%r9,2),%rbp
480 leaq (%r11,%rbp,1),%rsp
484 jmp .Lmul4x_page_walk_done
487 leaq -4096(%rsp),%rsp
491 .Lmul4x_page_walk_done:
512 .size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5
514 .type mul4x_internal,@function
519 leaq .Linc(%rip),%rax
520 leaq 128(%rdx,%r9,1),%r13
523 movdqa 16(%rax),%xmm1
524 leaq 88-112(%rsp,%r9,1),%r10
527 pshufd $0,%xmm5,%xmm5
537 movdqa %xmm0,112(%r10)
542 movdqa %xmm1,128(%r10)
547 movdqa %xmm2,144(%r10)
552 movdqa %xmm3,160(%r10)
556 movdqa %xmm0,176(%r10)
561 movdqa %xmm1,192(%r10)
566 movdqa %xmm2,208(%r10)
571 movdqa %xmm3,224(%r10)
575 movdqa %xmm0,240(%r10)
580 movdqa %xmm1,256(%r10)
585 movdqa %xmm2,272(%r10)
590 movdqa %xmm3,288(%r10)
594 movdqa %xmm0,304(%r10)
599 movdqa %xmm1,320(%r10)
602 movdqa %xmm2,336(%r10)
607 movdqa %xmm3,352(%r10)
611 movdqa -128(%r12),%xmm4
612 movdqa -112(%r12),%xmm5
613 movdqa -96(%r12),%xmm2
615 movdqa -80(%r12),%xmm3
623 movdqa -64(%r12),%xmm4
624 movdqa -48(%r12),%xmm5
625 movdqa -32(%r12),%xmm2
627 movdqa -16(%r12),%xmm3
636 movdqa 16(%r12),%xmm5
637 movdqa 32(%r12),%xmm2
639 movdqa 48(%r12),%xmm3
648 pshufd $0x4e,%xmm0,%xmm1
651 .byte 102,72,15,126,195
658 leaq (%rsi,%r9,1),%rsi
672 movq 8(%rsi,%r9,1),%rax
684 movq 16(%rsi,%r9,1),%rax
705 movq -8(%rsi,%r15,1),%rax
720 movq (%rsi,%r15,1),%rax
735 movq 8(%rsi,%r15,1),%rax
750 movq 16(%rsi,%r15,1),%rax
785 movq (%rsi,%r9,1),%rax
792 leaq (%rcx,%r9,1),%rcx
803 leaq 16+128(%r14),%rdx
806 movdqa -128(%r12),%xmm0
807 movdqa -112(%r12),%xmm1
808 movdqa -96(%r12),%xmm2
809 movdqa -80(%r12),%xmm3
810 pand -128(%rdx),%xmm0
811 pand -112(%rdx),%xmm1
818 movdqa -64(%r12),%xmm0
819 movdqa -48(%r12),%xmm1
820 movdqa -32(%r12),%xmm2
821 movdqa -16(%r12),%xmm3
831 movdqa 16(%r12),%xmm1
832 movdqa 32(%r12),%xmm2
833 movdqa 48(%r12),%xmm3
842 movdqa 64(%r12),%xmm0
843 movdqa 80(%r12),%xmm1
844 movdqa 96(%r12),%xmm2
845 movdqa 112(%r12),%xmm3
855 pshufd $0x4e,%xmm4,%xmm0
858 .byte 102,72,15,126,195
860 movq (%r14,%r9,1),%r10
871 leaq (%r14,%r9,1),%r14
875 movq 8(%rsi,%r9,1),%rax
889 movq 16(%rsi,%r9,1),%rax
911 movq -8(%rsi,%r15,1),%rax
928 movq (%rsi,%r15,1),%rax
945 movq 8(%rsi,%r15,1),%rax
962 movq 16(%rsi,%r15,1),%rax
1002 movq (%rsi,%r9,1),%rax
1010 leaq (%rcx,%r9,1),%rcx
1019 cmpq 16+8(%rsp),%r12
1026 leaq (%r14,%r9,1),%rbx
1031 movq 56+8(%rsp),%rdi
1037 jmp .Lsqr4x_sub_entry
1038 .size mul4x_internal,.-mul4x_internal
1040 .type bn_power5,@function
1044 movl OPENSSL_ia32cap_P+8(%rip),%r11d
1057 leal (%r9,%r9,2),%r10d
1068 leaq -320(%rsp,%r9,2),%r11
1075 leaq -320(%rbp,%r9,2),%rbp
1080 leaq 4096-320(,%r9,2),%r10
1081 leaq -320(%rbp,%r9,2),%rbp
1091 leaq (%r11,%rbp,1),%rsp
1095 jmp .Lpwr_page_walk_done
1098 leaq -4096(%rsp),%rsp
1102 .Lpwr_page_walk_done:
1119 .byte 102,72,15,110,207
1120 .byte 102,72,15,110,209
1121 .byte 102,73,15,110,218
1122 .byte 102,72,15,110,226
1124 call __bn_sqr8x_internal
1125 call __bn_post4x_internal
1126 call __bn_sqr8x_internal
1127 call __bn_post4x_internal
1128 call __bn_sqr8x_internal
1129 call __bn_post4x_internal
1130 call __bn_sqr8x_internal
1131 call __bn_post4x_internal
1132 call __bn_sqr8x_internal
1133 call __bn_post4x_internal
1135 .byte 102,72,15,126,209
1136 .byte 102,72,15,126,226
1154 .size bn_power5,.-bn_power5
1156 .globl bn_sqr8x_internal
1157 .hidden bn_sqr8x_internal
1158 .type bn_sqr8x_internal,@function
1161 __bn_sqr8x_internal:
1236 leaq (%rsi,%r9,1),%rsi
1241 movq -32(%rsi,%rbp,1),%r14
1242 leaq 48+8(%rsp,%r9,2),%rdi
1243 movq -24(%rsi,%rbp,1),%rax
1244 leaq -32(%rdi,%rbp,1),%rdi
1245 movq -16(%rsi,%rbp,1),%rbx
1252 movq %r10,-24(%rdi,%rbp,1)
1258 movq %r11,-16(%rdi,%rbp,1)
1262 movq -8(%rsi,%rbp,1),%rbx
1276 movq %r10,-8(%rdi,%rcx,1)
1281 movq (%rsi,%rcx,1),%rbx
1291 movq 8(%rsi,%rcx,1),%rbx
1301 movq %r11,(%rdi,%rcx,1)
1308 movq 16(%rsi,%rcx,1),%rbx
1317 movq %r10,8(%rdi,%rcx,1)
1324 movq 24(%rsi,%rcx,1),%rbx
1334 movq %r11,16(%rdi,%rcx,1)
1346 movq %r10,-8(%rdi,%rcx,1)
1365 movq -32(%rsi,%rbp,1),%r14
1366 leaq 48+8(%rsp,%r9,2),%rdi
1367 movq -24(%rsi,%rbp,1),%rax
1368 leaq -32(%rdi,%rbp,1),%rdi
1369 movq -16(%rsi,%rbp,1),%rbx
1373 movq -24(%rdi,%rbp,1),%r10
1377 movq %r10,-24(%rdi,%rbp,1)
1384 addq -16(%rdi,%rbp,1),%r11
1387 movq %r11,-16(%rdi,%rbp,1)
1391 movq -8(%rsi,%rbp,1),%rbx
1396 addq -8(%rdi,%rbp,1),%r12
1407 movq %r10,-8(%rdi,%rbp,1)
1414 movq (%rsi,%rcx,1),%rbx
1420 addq (%rdi,%rcx,1),%r13
1427 movq 8(%rsi,%rcx,1),%rbx
1435 movq %r11,(%rdi,%rcx,1)
1439 addq 8(%rdi,%rcx,1),%r12
1450 movq %r10,-8(%rdi,%rcx,1)
1471 leaq 48+8(%rsp,%r9,2),%rdi
1473 leaq -32(%rdi,%rbp,1),%rdi
1532 movq -16(%rsi,%rbp,1),%rax
1533 leaq 48+8(%rsp),%rdi
1537 leaq (%r14,%r10,2),%r12
1539 leaq (%rcx,%r11,2),%r13
1548 movq -8(%rsi,%rbp,1),%rax
1552 leaq (%r14,%r10,2),%rbx
1556 leaq (%rcx,%r11,2),%r8
1565 movq 0(%rsi,%rbp,1),%rax
1572 jmp .Lsqr4x_shift_n_add
1575 .Lsqr4x_shift_n_add:
1576 leaq (%r14,%r10,2),%r12
1578 leaq (%rcx,%r11,2),%r13
1587 movq -8(%rsi,%rbp,1),%rax
1591 leaq (%r14,%r10,2),%rbx
1595 leaq (%rcx,%r11,2),%r8
1604 movq 0(%rsi,%rbp,1),%rax
1608 leaq (%r14,%r10,2),%r12
1612 leaq (%rcx,%r11,2),%r13
1621 movq 8(%rsi,%rbp,1),%rax
1625 leaq (%r14,%r10,2),%rbx
1629 leaq (%rcx,%r11,2),%r8
1638 movq 16(%rsi,%rbp,1),%rax
1645 jnz .Lsqr4x_shift_n_add
1647 leaq (%r14,%r10,2),%r12
1650 leaq (%rcx,%r11,2),%r13
1663 leaq (%r14,%r10,2),%rbx
1667 leaq (%rcx,%r11,2),%r8
1676 .byte 102,72,15,126,213
1677 __bn_sqr8x_reduction:
1679 leaq (%r9,%rbp,1),%rcx
1680 leaq 48+8(%rsp,%r9,2),%rdx
1682 leaq 48+8(%rsp,%r9,1),%rdi
1685 jmp .L8x_reduction_loop
1688 .L8x_reduction_loop:
1689 leaq (%rdi,%r9,1),%rdi
1704 imulq 32+8(%rsp),%rbx
1722 movq %rbx,48-8+8(%rsp,%rcx,8)
1731 movq 32+8(%rsp),%rsi
1797 movq 48+56+8(%rsp),%rbx
1861 movq 48-16+8(%rsp,%rcx,8),%rbx
1877 movq 48+56+8(%rsp),%rbx
1920 .byte 102,72,15,126,213
1924 .byte 102,73,15,126,217
1934 jb .L8x_reduction_loop
1936 .size bn_sqr8x_internal,.-bn_sqr8x_internal
1937 .type __bn_post4x_internal,@function
1939 __bn_post4x_internal:
1941 leaq (%rdi,%r9,1),%rbx
1943 .byte 102,72,15,126,207
1945 .byte 102,72,15,126,206
1952 jmp .Lsqr4x_sub_entry
1990 .size __bn_post4x_internal,.-__bn_post4x_internal
1991 .globl bn_from_montgomery
1992 .type bn_from_montgomery,@function
1999 .size bn_from_montgomery,.-bn_from_montgomery
2001 .type bn_from_mont8x,@function
2015 leaq (%r9,%r9,2),%r10
2026 leaq -320(%rsp,%r9,2),%r11
2033 leaq -320(%rbp,%r9,2),%rbp
2038 leaq 4096-320(,%r9,2),%r10
2039 leaq -320(%rbp,%r9,2),%rbp
2049 leaq (%r11,%rbp,1),%rsp
2053 jmp .Lfrom_page_walk_done
2056 leaq -4096(%rsp),%rsp
2060 .Lfrom_page_walk_done:
2085 movdqu 16(%rsi),%xmm2
2086 movdqu 32(%rsi),%xmm3
2087 movdqa %xmm0,(%rax,%r9,1)
2088 movdqu 48(%rsi),%xmm4
2089 movdqa %xmm0,16(%rax,%r9,1)
2090 .byte 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00
2092 movdqa %xmm0,32(%rax,%r9,1)
2093 movdqa %xmm2,16(%rax)
2094 movdqa %xmm0,48(%rax,%r9,1)
2095 movdqa %xmm3,32(%rax)
2096 movdqa %xmm4,48(%rax)
2101 .byte 102,72,15,110,207
2102 .byte 102,72,15,110,209
2105 .byte 102,73,15,110,218
2106 movl OPENSSL_ia32cap_P+8(%rip),%r11d
2111 leaq (%rax,%r9,1),%rdi
2112 call __bn_sqrx8x_reduction
2113 call __bn_postx4x_internal
2118 jmp .Lfrom_mont_zero
2122 call __bn_sqr8x_reduction
2123 call __bn_post4x_internal
2128 jmp .Lfrom_mont_zero
2132 movdqa %xmm0,0(%rax)
2133 movdqa %xmm0,16(%rax)
2134 movdqa %xmm0,32(%rax)
2135 movdqa %xmm0,48(%rax)
2138 jnz .Lfrom_mont_zero
2150 .size bn_from_mont8x,.-bn_from_mont8x
2151 .type bn_mulx4x_mont_gather5,@function
2153 bn_mulx4x_mont_gather5:
2165 leaq (%r9,%r9,2),%r10
2178 leaq -320(%rsp,%r9,2),%r11
2185 leaq -320(%rbp,%r9,2),%rbp
2189 leaq 4096-320(,%r9,2),%r10
2190 leaq -320(%rbp,%r9,2),%rbp
2200 leaq (%r11,%rbp,1),%rsp
2203 ja .Lmulx4x_page_walk
2204 jmp .Lmulx4x_page_walk_done
2207 leaq -4096(%rsp),%rsp
2210 ja .Lmulx4x_page_walk
2211 .Lmulx4x_page_walk_done:
2228 call mulx4x_internal
2242 .size bn_mulx4x_mont_gather5,.-bn_mulx4x_mont_gather5
2244 .type mulx4x_internal,@function
2252 leaq 128(%rdx,%r9,1),%r13
2256 leaq .Linc(%rip),%rax
2257 movq %r13,16+8(%rsp)
2259 movq %rdi,56+8(%rsp)
2260 movdqa 0(%rax),%xmm0
2261 movdqa 16(%rax),%xmm1
2262 leaq 88-112(%rsp,%r10,1),%r10
2265 pshufd $0,%xmm5,%xmm5
2275 movdqa %xmm0,112(%r10)
2280 movdqa %xmm1,128(%r10)
2285 movdqa %xmm2,144(%r10)
2290 movdqa %xmm3,160(%r10)
2294 movdqa %xmm0,176(%r10)
2299 movdqa %xmm1,192(%r10)
2304 movdqa %xmm2,208(%r10)
2309 movdqa %xmm3,224(%r10)
2313 movdqa %xmm0,240(%r10)
2318 movdqa %xmm1,256(%r10)
2323 movdqa %xmm2,272(%r10)
2328 movdqa %xmm3,288(%r10)
2333 movdqa %xmm0,304(%r10)
2337 movdqa %xmm1,320(%r10)
2340 movdqa %xmm2,336(%r10)
2345 movdqa %xmm3,352(%r10)
2346 pand 112(%rdi),%xmm3
2349 movdqa -128(%rdi),%xmm4
2350 movdqa -112(%rdi),%xmm5
2351 movdqa -96(%rdi),%xmm2
2352 pand 112(%r10),%xmm4
2353 movdqa -80(%rdi),%xmm3
2354 pand 128(%r10),%xmm5
2356 pand 144(%r10),%xmm2
2358 pand 160(%r10),%xmm3
2361 movdqa -64(%rdi),%xmm4
2362 movdqa -48(%rdi),%xmm5
2363 movdqa -32(%rdi),%xmm2
2364 pand 176(%r10),%xmm4
2365 movdqa -16(%rdi),%xmm3
2366 pand 192(%r10),%xmm5
2368 pand 208(%r10),%xmm2
2370 pand 224(%r10),%xmm3
2373 movdqa 0(%rdi),%xmm4
2374 movdqa 16(%rdi),%xmm5
2375 movdqa 32(%rdi),%xmm2
2376 pand 240(%r10),%xmm4
2377 movdqa 48(%rdi),%xmm3
2378 pand 256(%r10),%xmm5
2380 pand 272(%r10),%xmm2
2382 pand 288(%r10),%xmm3
2386 pshufd $0x4e,%xmm0,%xmm1
2389 .byte 102,72,15,126,194
2390 leaq 64+32+8(%rsp),%rbx
2393 mulxq 0(%rsi),%r8,%rax
2394 mulxq 8(%rsi),%r11,%r12
2396 mulxq 16(%rsi),%rax,%r13
2399 mulxq 24(%rsi),%rax,%r14
2402 imulq 32+8(%rsp),%r8
2412 mulxq 0(%rcx),%rax,%r10
2415 mulxq 8(%rcx),%rax,%r11
2418 mulxq 16(%rcx),%rax,%r12
2419 movq 24+8(%rsp),%rdi
2423 mulxq 24(%rcx),%rax,%r15
2435 mulxq 0(%rsi),%r10,%rax
2437 mulxq 8(%rsi),%r11,%r14
2439 mulxq 16(%rsi),%r12,%rax
2441 mulxq 24(%rsi),%r13,%r14
2450 mulxq 0(%rcx),%rax,%r15
2453 mulxq 8(%rcx),%rax,%r15
2456 mulxq 16(%rcx),%rax,%r15
2461 mulxq 24(%rcx),%rax,%r15
2474 leaq (%rsi,%rax,1),%rsi
2483 leaq 16-256(%rbx),%r10
2487 movdqa -128(%rdi),%xmm0
2488 movdqa -112(%rdi),%xmm1
2489 movdqa -96(%rdi),%xmm2
2490 pand 256(%r10),%xmm0
2491 movdqa -80(%rdi),%xmm3
2492 pand 272(%r10),%xmm1
2494 pand 288(%r10),%xmm2
2496 pand 304(%r10),%xmm3
2499 movdqa -64(%rdi),%xmm0
2500 movdqa -48(%rdi),%xmm1
2501 movdqa -32(%rdi),%xmm2
2502 pand 320(%r10),%xmm0
2503 movdqa -16(%rdi),%xmm3
2504 pand 336(%r10),%xmm1
2506 pand 352(%r10),%xmm2
2508 pand 368(%r10),%xmm3
2511 movdqa 0(%rdi),%xmm0
2512 movdqa 16(%rdi),%xmm1
2513 movdqa 32(%rdi),%xmm2
2514 pand 384(%r10),%xmm0
2515 movdqa 48(%rdi),%xmm3
2516 pand 400(%r10),%xmm1
2518 pand 416(%r10),%xmm2
2520 pand 432(%r10),%xmm3
2523 movdqa 64(%rdi),%xmm0
2524 movdqa 80(%rdi),%xmm1
2525 movdqa 96(%rdi),%xmm2
2526 pand 448(%r10),%xmm0
2527 movdqa 112(%rdi),%xmm3
2528 pand 464(%r10),%xmm1
2530 pand 480(%r10),%xmm2
2532 pand 496(%r10),%xmm3
2536 pshufd $0x4e,%xmm4,%xmm0
2539 .byte 102,72,15,126,194
2542 leaq 32(%rbx,%rax,1),%rbx
2543 mulxq 0(%rsi),%r8,%r11
2546 mulxq 8(%rsi),%r14,%r12
2549 mulxq 16(%rsi),%r15,%r13
2550 adoxq -24(%rbx),%r11
2552 mulxq 24(%rsi),%rdx,%r14
2553 adoxq -16(%rbx),%r12
2555 leaq (%rcx,%rax,1),%rcx
2562 imulq 32+8(%rsp),%r8
2568 mulxq 0(%rcx),%rax,%r10
2571 mulxq 8(%rcx),%rax,%r11
2574 mulxq 16(%rcx),%rax,%r12
2577 mulxq 24(%rcx),%rax,%r15
2579 movq 24+8(%rsp),%rdi
2590 mulxq 0(%rsi),%r10,%rax
2593 mulxq 8(%rsi),%r11,%r14
2596 mulxq 16(%rsi),%r12,%rax
2599 mulxq 24(%rsi),%r13,%r14
2610 mulxq 0(%rcx),%rax,%r15
2613 mulxq 8(%rcx),%rax,%r15
2616 mulxq 16(%rcx),%rax,%r15
2621 mulxq 24(%rcx),%rax,%r15
2636 movq 16+8(%rsp),%r10
2638 leaq (%rsi,%rax,1),%rsi
2647 movq (%rcx,%rax,1),%r12
2648 leaq (%rcx,%rax,1),%rbp
2650 leaq (%rbx,%rax,1),%rdi
2658 movq 56+8(%rsp),%rdx
2664 jmp .Lsqrx4x_sub_entry
2665 .size mulx4x_internal,.-mulx4x_internal
2666 .type bn_powerx5,@function
2680 leaq (%r9,%r9,2),%r10
2691 leaq -320(%rsp,%r9,2),%r11
2698 leaq -320(%rbp,%r9,2),%rbp
2703 leaq 4096-320(,%r9,2),%r10
2704 leaq -320(%rbp,%r9,2),%rbp
2714 leaq (%r11,%rbp,1),%rsp
2718 jmp .Lpwrx_page_walk_done
2721 leaq -4096(%rsp),%rsp
2725 .Lpwrx_page_walk_done:
2742 .byte 102,72,15,110,207
2743 .byte 102,72,15,110,209
2744 .byte 102,73,15,110,218
2745 .byte 102,72,15,110,226
2750 call __bn_sqrx8x_internal
2751 call __bn_postx4x_internal
2752 call __bn_sqrx8x_internal
2753 call __bn_postx4x_internal
2754 call __bn_sqrx8x_internal
2755 call __bn_postx4x_internal
2756 call __bn_sqrx8x_internal
2757 call __bn_postx4x_internal
2758 call __bn_sqrx8x_internal
2759 call __bn_postx4x_internal
2763 .byte 102,72,15,126,209
2764 .byte 102,72,15,126,226
2767 call mulx4x_internal
2781 .size bn_powerx5,.-bn_powerx5
2783 .globl bn_sqrx8x_internal
2784 .hidden bn_sqrx8x_internal
2785 .type bn_sqrx8x_internal,@function
2788 __bn_sqrx8x_internal:
2829 leaq 48+8(%rsp),%rdi
2830 leaq (%rsi,%r9,1),%rbp
2833 jmp .Lsqr8x_zero_start
2836 .byte 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00
2839 movdqa %xmm0,0(%rdi)
2840 movdqa %xmm0,16(%rdi)
2841 movdqa %xmm0,32(%rdi)
2842 movdqa %xmm0,48(%rdi)
2844 movdqa %xmm0,64(%rdi)
2845 movdqa %xmm0,80(%rdi)
2846 movdqa %xmm0,96(%rdi)
2847 movdqa %xmm0,112(%rdi)
2860 leaq 48+8(%rsp),%rdi
2862 jmp .Lsqrx8x_outer_loop
2865 .Lsqrx8x_outer_loop:
2866 mulxq 8(%rsi),%r8,%rax
2869 mulxq 16(%rsi),%r9,%rax
2872 .byte 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00
2875 .byte 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00
2878 mulxq 40(%rsi),%r12,%rax
2881 mulxq 48(%rsi),%r13,%rax
2884 mulxq 56(%rsi),%r14,%r15
2895 mulxq 16(%rsi),%r8,%rbx
2896 mulxq 24(%rsi),%r9,%rax
2899 mulxq 32(%rsi),%r10,%rbx
2902 .byte 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00
2905 .byte 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00
2908 .byte 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00
2919 mulxq 24(%rsi),%r8,%rbx
2920 mulxq 32(%rsi),%r9,%rax
2923 mulxq 40(%rsi),%r10,%rbx
2926 .byte 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00
2929 .byte 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00
2937 mulxq 32(%rsi),%r8,%rax
2941 mulxq 40(%rsi),%r9,%rbx
2944 mulxq 48(%rsi),%r10,%rax
2947 mulxq 56(%rsi),%r11,%r12
2963 mulxq %r15,%r10,%rbx
2973 mulxq %r15,%r14,%rbx
2988 je .Lsqrx8x_outer_break
3006 movq %rax,16+8(%rsp)
3007 movq %rdi,24+8(%rsp)
3016 mulxq 0(%rbp),%rax,%r8
3020 mulxq 8(%rbp),%rax,%r9
3024 mulxq 16(%rbp),%rax,%r10
3028 mulxq 24(%rbp),%rax,%r11
3032 .byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00
3036 mulxq 40(%rbp),%rax,%r13
3040 mulxq 48(%rbp),%rax,%r14
3041 movq %rbx,(%rdi,%rcx,8)
3046 .byte 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00
3047 movq 8(%rsi,%rcx,8),%rdx
3061 subq 16+8(%rsp),%rbx
3076 movq %rax,16+8(%rsp)
3082 subq 16+8(%rsp),%rbx
3084 movq 24+8(%rsp),%rcx
3095 je .Lsqrx8x_outer_loop
3112 jmp .Lsqrx8x_outer_loop
3115 .Lsqrx8x_outer_break:
3117 .byte 102,72,15,126,217
3123 leaq 48+8(%rsp),%rdi
3124 movq (%rsi,%rcx,1),%rdx
3135 .Lsqrx4x_shift_n_add:
3136 mulxq %rdx,%rax,%rbx
3139 .byte 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00
3140 .byte 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00
3147 mulxq %rdx,%rax,%rbx
3150 movq 16(%rsi,%rcx,1),%rdx
3158 mulxq %rdx,%rax,%rbx
3161 movq 24(%rsi,%rcx,1),%rdx
3170 mulxq %rdx,%rax,%rbx
3173 jrcxz .Lsqrx4x_shift_n_add_break
3174 .byte 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00
3183 jmp .Lsqrx4x_shift_n_add
3186 .Lsqrx4x_shift_n_add_break:
3191 .byte 102,72,15,126,213
3192 __bn_sqrx8x_reduction:
3194 movq 32+8(%rsp),%rbx
3195 movq 48+8(%rsp),%rdx
3196 leaq -64(%rbp,%r9,1),%rcx
3201 leaq 48+8(%rsp),%rdi
3202 jmp .Lsqrx8x_reduction_loop
3205 .Lsqrx8x_reduction_loop:
3215 movq %rax,24+8(%rsp)
3225 mulxq 0(%rbp),%rax,%r8
3229 mulxq 8(%rbp),%rbx,%r9
3233 mulxq 16(%rbp),%rbx,%r10
3237 mulxq 24(%rbp),%rbx,%r11
3241 .byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00
3247 mulxq 32+8(%rsp),%rbx,%rdx
3249 movq %rax,64+48+8(%rsp,%rcx,8)
3251 mulxq 40(%rbp),%rax,%r13
3255 mulxq 48(%rbp),%rax,%r14
3259 mulxq 56(%rbp),%rax,%r15
3265 .byte 0x67,0x67,0x67
3271 jae .Lsqrx8x_no_tail
3273 movq 48+8(%rsp),%rdx
3288 movq %rax,16+8(%rsp)
3294 mulxq 0(%rbp),%rax,%r8
3298 mulxq 8(%rbp),%rax,%r9
3302 mulxq 16(%rbp),%rax,%r10
3306 mulxq 24(%rbp),%rax,%r11
3310 .byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00
3314 mulxq 40(%rbp),%rax,%r13
3318 mulxq 48(%rbp),%rax,%r14
3322 mulxq 56(%rbp),%rax,%r15
3323 movq 72+48+8(%rsp,%rcx,8),%rdx
3326 movq %rbx,(%rdi,%rcx,8)
3334 jae .Lsqrx8x_tail_done
3336 subq 16+8(%rsp),%rsi
3337 movq 48+8(%rsp),%rdx
3352 movq %rax,16+8(%rsp)
3368 subq 16+8(%rsp),%rsi
3371 .byte 102,72,15,126,217
3374 .byte 102,72,15,126,213
3383 movq 32+8(%rsp),%rbx
3384 movq 64(%rdi,%rcx,1),%rdx
3396 leaq 64(%rdi,%rcx,1),%rdi
3398 jb .Lsqrx8x_reduction_loop
3400 .size bn_sqrx8x_internal,.-bn_sqrx8x_internal
3402 __bn_postx4x_internal:
3409 .byte 102,72,15,126,202
3410 .byte 102,72,15,126,206
3416 jmp .Lsqrx4x_sub_entry
3425 andnq %rax,%r12,%r12
3427 andnq %rax,%r13,%r13
3428 andnq %rax,%r14,%r14
3429 andnq %rax,%r15,%r15
3450 .size __bn_postx4x_internal,.-__bn_postx4x_internal
3452 .type bn_get_bits5,@function
3464 movzwl (%r10,%rsi,2),%eax
3468 .size bn_get_bits5,.-bn_get_bits5
3471 .type bn_scatter5,@function
3475 jz .Lscatter_epilogue
3476 leaq (%rdx,%rcx,8),%rdx
3486 .size bn_scatter5,.-bn_scatter5
3489 .type bn_gather5,@function
3492 .LSEH_begin_bn_gather5:
3494 .byte 0x4c,0x8d,0x14,0x24
3495 .byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00
3496 leaq .Linc(%rip),%rax
3500 movdqa 0(%rax),%xmm0
3501 movdqa 16(%rax),%xmm1
3505 pshufd $0,%xmm5,%xmm5
3514 movdqa %xmm0,-128(%rax)
3519 movdqa %xmm1,-112(%rax)
3524 movdqa %xmm2,-96(%rax)
3528 movdqa %xmm3,-80(%rax)
3533 movdqa %xmm0,-64(%rax)
3538 movdqa %xmm1,-48(%rax)
3543 movdqa %xmm2,-32(%rax)
3547 movdqa %xmm3,-16(%rax)
3552 movdqa %xmm0,0(%rax)
3557 movdqa %xmm1,16(%rax)
3562 movdqa %xmm2,32(%rax)
3566 movdqa %xmm3,48(%rax)
3571 movdqa %xmm0,64(%rax)
3576 movdqa %xmm1,80(%rax)
3581 movdqa %xmm2,96(%rax)
3583 movdqa %xmm3,112(%rax)
3590 movdqa -128(%r11),%xmm0
3591 movdqa -112(%r11),%xmm1
3592 movdqa -96(%r11),%xmm2
3593 pand -128(%rax),%xmm0
3594 movdqa -80(%r11),%xmm3
3595 pand -112(%rax),%xmm1
3597 pand -96(%rax),%xmm2
3599 pand -80(%rax),%xmm3
3602 movdqa -64(%r11),%xmm0
3603 movdqa -48(%r11),%xmm1
3604 movdqa -32(%r11),%xmm2
3605 pand -64(%rax),%xmm0
3606 movdqa -16(%r11),%xmm3
3607 pand -48(%rax),%xmm1
3609 pand -32(%rax),%xmm2
3611 pand -16(%rax),%xmm3
3614 movdqa 0(%r11),%xmm0
3615 movdqa 16(%r11),%xmm1
3616 movdqa 32(%r11),%xmm2
3618 movdqa 48(%r11),%xmm3
3626 movdqa 64(%r11),%xmm0
3627 movdqa 80(%r11),%xmm1
3628 movdqa 96(%r11),%xmm2
3630 movdqa 112(%r11),%xmm3
3635 pand 112(%rax),%xmm3
3640 pshufd $0x4e,%xmm4,%xmm0
3649 .LSEH_end_bn_gather5:
3650 .size bn_gather5,.-bn_gather5
3655 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0