2 /* Do not modify. This file is auto-generated from x86_64-mont5.pl. */
7 .globl bn_mul_mont_gather5
8 .type bn_mul_mont_gather5,@function
14 .cfi_def_cfa_register %rax
17 movl OPENSSL_ia32cap_P+8(%rip),%r11d
38 leaq -280(%rsp,%r9,8),%r10
52 leaq (%r10,%r11,1),%rsp
56 jmp .Lmul_page_walk_done
66 movq %rax,8(%rsp,%r9,8)
67 .cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08
73 leaq 24-112(%rsp,%r9,8),%r10
85 movdqa %xmm0,112(%r10)
90 movdqa %xmm1,128(%r10)
95 movdqa %xmm2,144(%r10)
100 movdqa %xmm3,160(%r10)
104 movdqa %xmm0,176(%r10)
109 movdqa %xmm1,192(%r10)
114 movdqa %xmm2,208(%r10)
119 movdqa %xmm3,224(%r10)
123 movdqa %xmm0,240(%r10)
128 movdqa %xmm1,256(%r10)
133 movdqa %xmm2,272(%r10)
138 movdqa %xmm3,288(%r10)
142 movdqa %xmm0,304(%r10)
147 movdqa %xmm1,320(%r10)
150 movdqa %xmm2,336(%r10)
155 movdqa %xmm3,352(%r10)
159 movdqa -128(%r12),%xmm4
160 movdqa -112(%r12),%xmm5
161 movdqa -96(%r12),%xmm2
163 movdqa -80(%r12),%xmm3
171 movdqa -64(%r12),%xmm4
172 movdqa -48(%r12),%xmm5
173 movdqa -32(%r12),%xmm2
175 movdqa -16(%r12),%xmm3
184 movdqa 16(%r12),%xmm5
185 movdqa 32(%r12),%xmm2
187 movdqa 48(%r12),%xmm3
196 pshufd $0x4e,%xmm0,%xmm1
199 .byte 102,72,15,126,195
227 movq (%rsi,%r15,8),%rax
232 movq %r13,-16(%rsp,%r15,8)
238 movq (%rcx,%r15,8),%rax
252 movq %r13,-16(%rsp,%r9,8)
259 movq %r13,-8(%rsp,%r9,8)
260 movq %rdx,(%rsp,%r9,8)
266 leaq 24+128(%rsp,%r9,8),%rdx
270 movdqa -128(%r12),%xmm0
271 movdqa -112(%r12),%xmm1
272 movdqa -96(%r12),%xmm2
273 movdqa -80(%r12),%xmm3
274 pand -128(%rdx),%xmm0
275 pand -112(%rdx),%xmm1
282 movdqa -64(%r12),%xmm0
283 movdqa -48(%r12),%xmm1
284 movdqa -32(%r12),%xmm2
285 movdqa -16(%r12),%xmm3
295 movdqa 16(%r12),%xmm1
296 movdqa 32(%r12),%xmm2
297 movdqa 48(%r12),%xmm3
306 movdqa 64(%r12),%xmm0
307 movdqa 80(%r12),%xmm1
308 movdqa 96(%r12),%xmm2
309 movdqa 112(%r12),%xmm3
319 pshufd $0x4e,%xmm4,%xmm0
324 .byte 102,72,15,126,195
351 movq (%rsi,%r15,8),%rax
354 movq (%rsp,%r15,8),%r10
356 movq %r13,-16(%rsp,%r15,8)
362 movq (%rcx,%r15,8),%rax
376 movq (%rsp,%r9,8),%r10
378 movq %r13,-16(%rsp,%r9,8)
386 movq %r13,-8(%rsp,%r9,8)
387 movq %rdx,(%rsp,%r9,8)
399 .Lsub: sbbq (%rcx,%r14,8),%rax
400 movq %rax,(%rdi,%r14,8)
401 movq 8(%rsi,%r14,8),%rax
413 movq (%rdi,%r14,8),%rcx
414 movq (%rsp,%r14,8),%rdx
417 movq %r14,(%rsp,%r14,8)
419 movq %rdx,(%rdi,%r14,8)
424 movq 8(%rsp,%r9,8),%rsi
441 .cfi_def_cfa_register %rsp
445 .size bn_mul_mont_gather5,.-bn_mul_mont_gather5
446 .type bn_mul4x_mont_gather5,@function
448 bn_mul4x_mont_gather5:
452 .cfi_def_cfa_register %rax
473 leaq (%r9,%r9,2),%r10
485 leaq -320(%rsp,%r9,2),%r11
492 leaq -320(%rbp,%r9,2),%rbp
497 leaq 4096-320(,%r9,2),%r10
498 leaq -320(%rbp,%r9,2),%rbp
508 leaq (%r11,%rbp,1),%rsp
512 jmp .Lmul4x_page_walk_done
515 leaq -4096(%rsp),%rsp
519 .Lmul4x_page_walk_done:
524 .cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08
546 .cfi_def_cfa_register %rsp
550 .size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5
552 .type mul4x_internal,@function
558 leaq .Linc(%rip),%rax
559 leaq 128(%rdx,%r9,1),%r13
562 movdqa 16(%rax),%xmm1
563 leaq 88-112(%rsp,%r9,1),%r10
566 pshufd $0,%xmm5,%xmm5
576 movdqa %xmm0,112(%r10)
581 movdqa %xmm1,128(%r10)
586 movdqa %xmm2,144(%r10)
591 movdqa %xmm3,160(%r10)
595 movdqa %xmm0,176(%r10)
600 movdqa %xmm1,192(%r10)
605 movdqa %xmm2,208(%r10)
610 movdqa %xmm3,224(%r10)
614 movdqa %xmm0,240(%r10)
619 movdqa %xmm1,256(%r10)
624 movdqa %xmm2,272(%r10)
629 movdqa %xmm3,288(%r10)
633 movdqa %xmm0,304(%r10)
638 movdqa %xmm1,320(%r10)
641 movdqa %xmm2,336(%r10)
646 movdqa %xmm3,352(%r10)
650 movdqa -128(%r12),%xmm4
651 movdqa -112(%r12),%xmm5
652 movdqa -96(%r12),%xmm2
654 movdqa -80(%r12),%xmm3
662 movdqa -64(%r12),%xmm4
663 movdqa -48(%r12),%xmm5
664 movdqa -32(%r12),%xmm2
666 movdqa -16(%r12),%xmm3
675 movdqa 16(%r12),%xmm5
676 movdqa 32(%r12),%xmm2
678 movdqa 48(%r12),%xmm3
687 pshufd $0x4e,%xmm0,%xmm1
690 .byte 102,72,15,126,195
697 leaq (%rsi,%r9,1),%rsi
711 movq 8(%rsi,%r9,1),%rax
723 movq 16(%rsi,%r9,1),%rax
744 movq -8(%rsi,%r15,1),%rax
759 movq (%rsi,%r15,1),%rax
774 movq 8(%rsi,%r15,1),%rax
789 movq 16(%rsi,%r15,1),%rax
824 movq (%rsi,%r9,1),%rax
831 leaq (%rcx,%r9,1),%rcx
842 leaq 16+128(%r14),%rdx
845 movdqa -128(%r12),%xmm0
846 movdqa -112(%r12),%xmm1
847 movdqa -96(%r12),%xmm2
848 movdqa -80(%r12),%xmm3
849 pand -128(%rdx),%xmm0
850 pand -112(%rdx),%xmm1
857 movdqa -64(%r12),%xmm0
858 movdqa -48(%r12),%xmm1
859 movdqa -32(%r12),%xmm2
860 movdqa -16(%r12),%xmm3
870 movdqa 16(%r12),%xmm1
871 movdqa 32(%r12),%xmm2
872 movdqa 48(%r12),%xmm3
881 movdqa 64(%r12),%xmm0
882 movdqa 80(%r12),%xmm1
883 movdqa 96(%r12),%xmm2
884 movdqa 112(%r12),%xmm3
894 pshufd $0x4e,%xmm4,%xmm0
897 .byte 102,72,15,126,195
899 movq (%r14,%r9,1),%r10
910 leaq (%r14,%r9,1),%r14
914 movq 8(%rsi,%r9,1),%rax
928 movq 16(%rsi,%r9,1),%rax
950 movq -8(%rsi,%r15,1),%rax
967 movq (%rsi,%r15,1),%rax
984 movq 8(%rsi,%r15,1),%rax
1001 movq 16(%rsi,%r15,1),%rax
1041 movq (%rsi,%r9,1),%rax
1049 leaq (%rcx,%r9,1),%rcx
1058 cmpq 16+8(%rsp),%r12
1065 leaq (%r14,%r9,1),%rbx
1070 movq 56+8(%rsp),%rdi
1076 jmp .Lsqr4x_sub_entry
1078 .size mul4x_internal,.-mul4x_internal
1080 .type bn_power5,@function
1085 .cfi_def_cfa_register %rax
1086 movl OPENSSL_ia32cap_P+8(%rip),%r11d
1091 .cfi_offset %rbx,-16
1093 .cfi_offset %rbp,-24
1095 .cfi_offset %r12,-32
1097 .cfi_offset %r13,-40
1099 .cfi_offset %r14,-48
1101 .cfi_offset %r15,-56
1105 leal (%r9,%r9,2),%r10d
1116 leaq -320(%rsp,%r9,2),%r11
1123 leaq -320(%rbp,%r9,2),%rbp
1128 leaq 4096-320(,%r9,2),%r10
1129 leaq -320(%rbp,%r9,2),%rbp
1139 leaq (%r11,%rbp,1),%rsp
1143 jmp .Lpwr_page_walk_done
1146 leaq -4096(%rsp),%rsp
1150 .Lpwr_page_walk_done:
1166 .cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08
1168 .byte 102,72,15,110,207
1169 .byte 102,72,15,110,209
1170 .byte 102,73,15,110,218
1171 .byte 102,72,15,110,226
1173 call __bn_sqr8x_internal
1174 call __bn_post4x_internal
1175 call __bn_sqr8x_internal
1176 call __bn_post4x_internal
1177 call __bn_sqr8x_internal
1178 call __bn_post4x_internal
1179 call __bn_sqr8x_internal
1180 call __bn_post4x_internal
1181 call __bn_sqr8x_internal
1182 call __bn_post4x_internal
1184 .byte 102,72,15,126,209
1185 .byte 102,72,15,126,226
1208 .cfi_def_cfa_register %rsp
1212 .size bn_power5,.-bn_power5
1214 .globl bn_sqr8x_internal
1215 .hidden bn_sqr8x_internal
1216 .type bn_sqr8x_internal,@function
1219 __bn_sqr8x_internal:
1295 leaq (%rsi,%r9,1),%rsi
1300 movq -32(%rsi,%rbp,1),%r14
1301 leaq 48+8(%rsp,%r9,2),%rdi
1302 movq -24(%rsi,%rbp,1),%rax
1303 leaq -32(%rdi,%rbp,1),%rdi
1304 movq -16(%rsi,%rbp,1),%rbx
1311 movq %r10,-24(%rdi,%rbp,1)
1317 movq %r11,-16(%rdi,%rbp,1)
1321 movq -8(%rsi,%rbp,1),%rbx
1335 movq %r10,-8(%rdi,%rcx,1)
1340 movq (%rsi,%rcx,1),%rbx
1350 movq 8(%rsi,%rcx,1),%rbx
1360 movq %r11,(%rdi,%rcx,1)
1367 movq 16(%rsi,%rcx,1),%rbx
1376 movq %r10,8(%rdi,%rcx,1)
1383 movq 24(%rsi,%rcx,1),%rbx
1393 movq %r11,16(%rdi,%rcx,1)
1405 movq %r10,-8(%rdi,%rcx,1)
1424 movq -32(%rsi,%rbp,1),%r14
1425 leaq 48+8(%rsp,%r9,2),%rdi
1426 movq -24(%rsi,%rbp,1),%rax
1427 leaq -32(%rdi,%rbp,1),%rdi
1428 movq -16(%rsi,%rbp,1),%rbx
1432 movq -24(%rdi,%rbp,1),%r10
1436 movq %r10,-24(%rdi,%rbp,1)
1443 addq -16(%rdi,%rbp,1),%r11
1446 movq %r11,-16(%rdi,%rbp,1)
1450 movq -8(%rsi,%rbp,1),%rbx
1455 addq -8(%rdi,%rbp,1),%r12
1466 movq %r10,-8(%rdi,%rbp,1)
1473 movq (%rsi,%rcx,1),%rbx
1479 addq (%rdi,%rcx,1),%r13
1486 movq 8(%rsi,%rcx,1),%rbx
1494 movq %r11,(%rdi,%rcx,1)
1498 addq 8(%rdi,%rcx,1),%r12
1509 movq %r10,-8(%rdi,%rcx,1)
1530 leaq 48+8(%rsp,%r9,2),%rdi
1532 leaq -32(%rdi,%rbp,1),%rdi
1591 movq -16(%rsi,%rbp,1),%rax
1592 leaq 48+8(%rsp),%rdi
1596 leaq (%r14,%r10,2),%r12
1598 leaq (%rcx,%r11,2),%r13
1607 movq -8(%rsi,%rbp,1),%rax
1611 leaq (%r14,%r10,2),%rbx
1615 leaq (%rcx,%r11,2),%r8
1624 movq 0(%rsi,%rbp,1),%rax
1631 jmp .Lsqr4x_shift_n_add
1634 .Lsqr4x_shift_n_add:
1635 leaq (%r14,%r10,2),%r12
1637 leaq (%rcx,%r11,2),%r13
1646 movq -8(%rsi,%rbp,1),%rax
1650 leaq (%r14,%r10,2),%rbx
1654 leaq (%rcx,%r11,2),%r8
1663 movq 0(%rsi,%rbp,1),%rax
1667 leaq (%r14,%r10,2),%r12
1671 leaq (%rcx,%r11,2),%r13
1680 movq 8(%rsi,%rbp,1),%rax
1684 leaq (%r14,%r10,2),%rbx
1688 leaq (%rcx,%r11,2),%r8
1697 movq 16(%rsi,%rbp,1),%rax
1704 jnz .Lsqr4x_shift_n_add
1706 leaq (%r14,%r10,2),%r12
1709 leaq (%rcx,%r11,2),%r13
1722 leaq (%r14,%r10,2),%rbx
1726 leaq (%rcx,%r11,2),%r8
1735 .byte 102,72,15,126,213
1736 __bn_sqr8x_reduction:
1738 leaq (%r9,%rbp,1),%rcx
1739 leaq 48+8(%rsp,%r9,2),%rdx
1741 leaq 48+8(%rsp,%r9,1),%rdi
1744 jmp .L8x_reduction_loop
1747 .L8x_reduction_loop:
1748 leaq (%rdi,%r9,1),%rdi
1763 imulq 32+8(%rsp),%rbx
1781 movq %rbx,48-8+8(%rsp,%rcx,8)
1790 movq 32+8(%rsp),%rsi
1856 movq 48+56+8(%rsp),%rbx
1920 movq 48-16+8(%rsp,%rcx,8),%rbx
1936 movq 48+56+8(%rsp),%rbx
1979 .byte 102,72,15,126,213
1983 .byte 102,73,15,126,217
1993 jb .L8x_reduction_loop
1996 .size bn_sqr8x_internal,.-bn_sqr8x_internal
1997 .type __bn_post4x_internal,@function
1999 __bn_post4x_internal:
2002 leaq (%rdi,%r9,1),%rbx
2004 .byte 102,72,15,126,207
2006 .byte 102,72,15,126,206
2013 jmp .Lsqr4x_sub_entry
2052 .size __bn_post4x_internal,.-__bn_post4x_internal
2053 .globl bn_from_montgomery
2054 .type bn_from_montgomery,@function
2063 .size bn_from_montgomery,.-bn_from_montgomery
2065 .type bn_from_mont8x,@function
2071 .cfi_def_cfa_register %rax
2073 .cfi_offset %rbx,-16
2075 .cfi_offset %rbp,-24
2077 .cfi_offset %r12,-32
2079 .cfi_offset %r13,-40
2081 .cfi_offset %r14,-48
2083 .cfi_offset %r15,-56
2087 leaq (%r9,%r9,2),%r10
2098 leaq -320(%rsp,%r9,2),%r11
2105 leaq -320(%rbp,%r9,2),%rbp
2110 leaq 4096-320(,%r9,2),%r10
2111 leaq -320(%rbp,%r9,2),%rbp
2121 leaq (%r11,%rbp,1),%rsp
2125 jmp .Lfrom_page_walk_done
2128 leaq -4096(%rsp),%rsp
2132 .Lfrom_page_walk_done:
2148 .cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08
2158 movdqu 16(%rsi),%xmm2
2159 movdqu 32(%rsi),%xmm3
2160 movdqa %xmm0,(%rax,%r9,1)
2161 movdqu 48(%rsi),%xmm4
2162 movdqa %xmm0,16(%rax,%r9,1)
2163 .byte 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00
2165 movdqa %xmm0,32(%rax,%r9,1)
2166 movdqa %xmm2,16(%rax)
2167 movdqa %xmm0,48(%rax,%r9,1)
2168 movdqa %xmm3,32(%rax)
2169 movdqa %xmm4,48(%rax)
2174 .byte 102,72,15,110,207
2175 .byte 102,72,15,110,209
2178 .byte 102,73,15,110,218
2179 movl OPENSSL_ia32cap_P+8(%rip),%r11d
2184 leaq (%rax,%r9,1),%rdi
2185 call __bn_sqrx8x_reduction
2186 call __bn_postx4x_internal
2190 jmp .Lfrom_mont_zero
2194 call __bn_sqr8x_reduction
2195 call __bn_post4x_internal
2199 jmp .Lfrom_mont_zero
2205 movdqa %xmm0,0(%rax)
2206 movdqa %xmm0,16(%rax)
2207 movdqa %xmm0,32(%rax)
2208 movdqa %xmm0,48(%rax)
2211 jnz .Lfrom_mont_zero
2227 .cfi_def_cfa_register %rsp
2231 .size bn_from_mont8x,.-bn_from_mont8x
2232 .type bn_mulx4x_mont_gather5,@function
2234 bn_mulx4x_mont_gather5:
2237 .cfi_def_cfa_register %rax
2240 .cfi_offset %rbx,-16
2242 .cfi_offset %rbp,-24
2244 .cfi_offset %r12,-32
2246 .cfi_offset %r13,-40
2248 .cfi_offset %r14,-48
2250 .cfi_offset %r15,-56
2254 leaq (%r9,%r9,2),%r10
2267 leaq -320(%rsp,%r9,2),%r11
2274 leaq -320(%rbp,%r9,2),%rbp
2278 leaq 4096-320(,%r9,2),%r10
2279 leaq -320(%rbp,%r9,2),%rbp
2289 leaq (%r11,%rbp,1),%rsp
2292 ja .Lmulx4x_page_walk
2293 jmp .Lmulx4x_page_walk_done
2296 leaq -4096(%rsp),%rsp
2299 ja .Lmulx4x_page_walk
2300 .Lmulx4x_page_walk_done:
2316 .cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08
2318 call mulx4x_internal
2337 .cfi_def_cfa_register %rsp
2341 .size bn_mulx4x_mont_gather5,.-bn_mulx4x_mont_gather5
2343 .type mulx4x_internal,@function
2352 leaq 128(%rdx,%r9,1),%r13
2356 leaq .Linc(%rip),%rax
2357 movq %r13,16+8(%rsp)
2359 movq %rdi,56+8(%rsp)
2360 movdqa 0(%rax),%xmm0
2361 movdqa 16(%rax),%xmm1
2362 leaq 88-112(%rsp,%r10,1),%r10
2365 pshufd $0,%xmm5,%xmm5
2375 movdqa %xmm0,112(%r10)
2380 movdqa %xmm1,128(%r10)
2385 movdqa %xmm2,144(%r10)
2390 movdqa %xmm3,160(%r10)
2394 movdqa %xmm0,176(%r10)
2399 movdqa %xmm1,192(%r10)
2404 movdqa %xmm2,208(%r10)
2409 movdqa %xmm3,224(%r10)
2413 movdqa %xmm0,240(%r10)
2418 movdqa %xmm1,256(%r10)
2423 movdqa %xmm2,272(%r10)
2428 movdqa %xmm3,288(%r10)
2433 movdqa %xmm0,304(%r10)
2437 movdqa %xmm1,320(%r10)
2440 movdqa %xmm2,336(%r10)
2445 movdqa %xmm3,352(%r10)
2446 pand 112(%rdi),%xmm3
2449 movdqa -128(%rdi),%xmm4
2450 movdqa -112(%rdi),%xmm5
2451 movdqa -96(%rdi),%xmm2
2452 pand 112(%r10),%xmm4
2453 movdqa -80(%rdi),%xmm3
2454 pand 128(%r10),%xmm5
2456 pand 144(%r10),%xmm2
2458 pand 160(%r10),%xmm3
2461 movdqa -64(%rdi),%xmm4
2462 movdqa -48(%rdi),%xmm5
2463 movdqa -32(%rdi),%xmm2
2464 pand 176(%r10),%xmm4
2465 movdqa -16(%rdi),%xmm3
2466 pand 192(%r10),%xmm5
2468 pand 208(%r10),%xmm2
2470 pand 224(%r10),%xmm3
2473 movdqa 0(%rdi),%xmm4
2474 movdqa 16(%rdi),%xmm5
2475 movdqa 32(%rdi),%xmm2
2476 pand 240(%r10),%xmm4
2477 movdqa 48(%rdi),%xmm3
2478 pand 256(%r10),%xmm5
2480 pand 272(%r10),%xmm2
2482 pand 288(%r10),%xmm3
2486 pshufd $0x4e,%xmm0,%xmm1
2489 .byte 102,72,15,126,194
2490 leaq 64+32+8(%rsp),%rbx
2493 mulxq 0(%rsi),%r8,%rax
2494 mulxq 8(%rsi),%r11,%r12
2496 mulxq 16(%rsi),%rax,%r13
2499 mulxq 24(%rsi),%rax,%r14
2502 imulq 32+8(%rsp),%r8
2512 mulxq 0(%rcx),%rax,%r10
2515 mulxq 8(%rcx),%rax,%r11
2518 mulxq 16(%rcx),%rax,%r12
2519 movq 24+8(%rsp),%rdi
2523 mulxq 24(%rcx),%rax,%r15
2535 mulxq 0(%rsi),%r10,%rax
2537 mulxq 8(%rsi),%r11,%r14
2539 mulxq 16(%rsi),%r12,%rax
2541 mulxq 24(%rsi),%r13,%r14
2550 mulxq 0(%rcx),%rax,%r15
2553 mulxq 8(%rcx),%rax,%r15
2556 mulxq 16(%rcx),%rax,%r15
2561 mulxq 24(%rcx),%rax,%r15
2574 leaq (%rsi,%rax,1),%rsi
2583 leaq 16-256(%rbx),%r10
2587 movdqa -128(%rdi),%xmm0
2588 movdqa -112(%rdi),%xmm1
2589 movdqa -96(%rdi),%xmm2
2590 pand 256(%r10),%xmm0
2591 movdqa -80(%rdi),%xmm3
2592 pand 272(%r10),%xmm1
2594 pand 288(%r10),%xmm2
2596 pand 304(%r10),%xmm3
2599 movdqa -64(%rdi),%xmm0
2600 movdqa -48(%rdi),%xmm1
2601 movdqa -32(%rdi),%xmm2
2602 pand 320(%r10),%xmm0
2603 movdqa -16(%rdi),%xmm3
2604 pand 336(%r10),%xmm1
2606 pand 352(%r10),%xmm2
2608 pand 368(%r10),%xmm3
2611 movdqa 0(%rdi),%xmm0
2612 movdqa 16(%rdi),%xmm1
2613 movdqa 32(%rdi),%xmm2
2614 pand 384(%r10),%xmm0
2615 movdqa 48(%rdi),%xmm3
2616 pand 400(%r10),%xmm1
2618 pand 416(%r10),%xmm2
2620 pand 432(%r10),%xmm3
2623 movdqa 64(%rdi),%xmm0
2624 movdqa 80(%rdi),%xmm1
2625 movdqa 96(%rdi),%xmm2
2626 pand 448(%r10),%xmm0
2627 movdqa 112(%rdi),%xmm3
2628 pand 464(%r10),%xmm1
2630 pand 480(%r10),%xmm2
2632 pand 496(%r10),%xmm3
2636 pshufd $0x4e,%xmm4,%xmm0
2639 .byte 102,72,15,126,194
2642 leaq 32(%rbx,%rax,1),%rbx
2643 mulxq 0(%rsi),%r8,%r11
2646 mulxq 8(%rsi),%r14,%r12
2649 mulxq 16(%rsi),%r15,%r13
2650 adoxq -24(%rbx),%r11
2652 mulxq 24(%rsi),%rdx,%r14
2653 adoxq -16(%rbx),%r12
2655 leaq (%rcx,%rax,1),%rcx
2662 imulq 32+8(%rsp),%r8
2668 mulxq 0(%rcx),%rax,%r10
2671 mulxq 8(%rcx),%rax,%r11
2674 mulxq 16(%rcx),%rax,%r12
2677 mulxq 24(%rcx),%rax,%r15
2679 movq 24+8(%rsp),%rdi
2690 mulxq 0(%rsi),%r10,%rax
2693 mulxq 8(%rsi),%r11,%r14
2696 mulxq 16(%rsi),%r12,%rax
2699 mulxq 24(%rsi),%r13,%r14
2710 mulxq 0(%rcx),%rax,%r15
2713 mulxq 8(%rcx),%rax,%r15
2716 mulxq 16(%rcx),%rax,%r15
2721 mulxq 24(%rcx),%rax,%r15
2736 movq 16+8(%rsp),%r10
2738 leaq (%rsi,%rax,1),%rsi
2747 movq (%rcx,%rax,1),%r12
2748 leaq (%rcx,%rax,1),%rbp
2750 leaq (%rbx,%rax,1),%rdi
2758 movq 56+8(%rsp),%rdx
2764 jmp .Lsqrx4x_sub_entry
2766 .size mulx4x_internal,.-mulx4x_internal
2767 .type bn_powerx5,@function
2772 .cfi_def_cfa_register %rax
2775 .cfi_offset %rbx,-16
2777 .cfi_offset %rbp,-24
2779 .cfi_offset %r12,-32
2781 .cfi_offset %r13,-40
2783 .cfi_offset %r14,-48
2785 .cfi_offset %r15,-56
2789 leaq (%r9,%r9,2),%r10
2800 leaq -320(%rsp,%r9,2),%r11
2807 leaq -320(%rbp,%r9,2),%rbp
2812 leaq 4096-320(,%r9,2),%r10
2813 leaq -320(%rbp,%r9,2),%rbp
2823 leaq (%r11,%rbp,1),%rsp
2827 jmp .Lpwrx_page_walk_done
2830 leaq -4096(%rsp),%rsp
2834 .Lpwrx_page_walk_done:
2851 .byte 102,72,15,110,207
2852 .byte 102,72,15,110,209
2853 .byte 102,73,15,110,218
2854 .byte 102,72,15,110,226
2857 .cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08
2860 call __bn_sqrx8x_internal
2861 call __bn_postx4x_internal
2862 call __bn_sqrx8x_internal
2863 call __bn_postx4x_internal
2864 call __bn_sqrx8x_internal
2865 call __bn_postx4x_internal
2866 call __bn_sqrx8x_internal
2867 call __bn_postx4x_internal
2868 call __bn_sqrx8x_internal
2869 call __bn_postx4x_internal
2873 .byte 102,72,15,126,209
2874 .byte 102,72,15,126,226
2877 call mulx4x_internal
2896 .cfi_def_cfa_register %rsp
2900 .size bn_powerx5,.-bn_powerx5
2902 .globl bn_sqrx8x_internal
2903 .hidden bn_sqrx8x_internal
2904 .type bn_sqrx8x_internal,@function
2907 __bn_sqrx8x_internal:
2949 leaq 48+8(%rsp),%rdi
2950 leaq (%rsi,%r9,1),%rbp
2953 jmp .Lsqr8x_zero_start
2956 .byte 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00
2959 movdqa %xmm0,0(%rdi)
2960 movdqa %xmm0,16(%rdi)
2961 movdqa %xmm0,32(%rdi)
2962 movdqa %xmm0,48(%rdi)
2964 movdqa %xmm0,64(%rdi)
2965 movdqa %xmm0,80(%rdi)
2966 movdqa %xmm0,96(%rdi)
2967 movdqa %xmm0,112(%rdi)
2980 leaq 48+8(%rsp),%rdi
2982 jmp .Lsqrx8x_outer_loop
2985 .Lsqrx8x_outer_loop:
2986 mulxq 8(%rsi),%r8,%rax
2989 mulxq 16(%rsi),%r9,%rax
2992 .byte 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00
2995 .byte 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00
2998 mulxq 40(%rsi),%r12,%rax
3001 mulxq 48(%rsi),%r13,%rax
3004 mulxq 56(%rsi),%r14,%r15
3015 mulxq 16(%rsi),%r8,%rbx
3016 mulxq 24(%rsi),%r9,%rax
3019 mulxq 32(%rsi),%r10,%rbx
3022 .byte 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00
3025 .byte 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00
3028 .byte 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00
3039 mulxq 24(%rsi),%r8,%rbx
3040 mulxq 32(%rsi),%r9,%rax
3043 mulxq 40(%rsi),%r10,%rbx
3046 .byte 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00
3049 .byte 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00
3057 mulxq 32(%rsi),%r8,%rax
3061 mulxq 40(%rsi),%r9,%rbx
3064 mulxq 48(%rsi),%r10,%rax
3067 mulxq 56(%rsi),%r11,%r12
3083 mulxq %r15,%r10,%rbx
3093 mulxq %r15,%r14,%rbx
3108 je .Lsqrx8x_outer_break
3126 movq %rax,16+8(%rsp)
3127 movq %rdi,24+8(%rsp)
3136 mulxq 0(%rbp),%rax,%r8
3140 mulxq 8(%rbp),%rax,%r9
3144 mulxq 16(%rbp),%rax,%r10
3148 mulxq 24(%rbp),%rax,%r11
3152 .byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00
3156 mulxq 40(%rbp),%rax,%r13
3160 mulxq 48(%rbp),%rax,%r14
3161 movq %rbx,(%rdi,%rcx,8)
3166 .byte 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00
3167 movq 8(%rsi,%rcx,8),%rdx
3181 subq 16+8(%rsp),%rbx
3196 movq %rax,16+8(%rsp)
3202 subq 16+8(%rsp),%rbx
3204 movq 24+8(%rsp),%rcx
3215 je .Lsqrx8x_outer_loop
3232 jmp .Lsqrx8x_outer_loop
3235 .Lsqrx8x_outer_break:
3237 .byte 102,72,15,126,217
3243 leaq 48+8(%rsp),%rdi
3244 movq (%rsi,%rcx,1),%rdx
3255 .Lsqrx4x_shift_n_add:
3256 mulxq %rdx,%rax,%rbx
3259 .byte 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00
3260 .byte 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00
3267 mulxq %rdx,%rax,%rbx
3270 movq 16(%rsi,%rcx,1),%rdx
3278 mulxq %rdx,%rax,%rbx
3281 movq 24(%rsi,%rcx,1),%rdx
3290 mulxq %rdx,%rax,%rbx
3293 jrcxz .Lsqrx4x_shift_n_add_break
3294 .byte 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00
3303 jmp .Lsqrx4x_shift_n_add
3306 .Lsqrx4x_shift_n_add_break:
3311 .byte 102,72,15,126,213
3312 __bn_sqrx8x_reduction:
3314 movq 32+8(%rsp),%rbx
3315 movq 48+8(%rsp),%rdx
3316 leaq -64(%rbp,%r9,1),%rcx
3321 leaq 48+8(%rsp),%rdi
3322 jmp .Lsqrx8x_reduction_loop
3325 .Lsqrx8x_reduction_loop:
3335 movq %rax,24+8(%rsp)
3345 mulxq 0(%rbp),%rax,%r8
3349 mulxq 8(%rbp),%rbx,%r9
3353 mulxq 16(%rbp),%rbx,%r10
3357 mulxq 24(%rbp),%rbx,%r11
3361 .byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00
3367 mulxq 32+8(%rsp),%rbx,%rdx
3369 movq %rax,64+48+8(%rsp,%rcx,8)
3371 mulxq 40(%rbp),%rax,%r13
3375 mulxq 48(%rbp),%rax,%r14
3379 mulxq 56(%rbp),%rax,%r15
3385 .byte 0x67,0x67,0x67
3391 jae .Lsqrx8x_no_tail
3393 movq 48+8(%rsp),%rdx
3408 movq %rax,16+8(%rsp)
3414 mulxq 0(%rbp),%rax,%r8
3418 mulxq 8(%rbp),%rax,%r9
3422 mulxq 16(%rbp),%rax,%r10
3426 mulxq 24(%rbp),%rax,%r11
3430 .byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00
3434 mulxq 40(%rbp),%rax,%r13
3438 mulxq 48(%rbp),%rax,%r14
3442 mulxq 56(%rbp),%rax,%r15
3443 movq 72+48+8(%rsp,%rcx,8),%rdx
3446 movq %rbx,(%rdi,%rcx,8)
3454 jae .Lsqrx8x_tail_done
3456 subq 16+8(%rsp),%rsi
3457 movq 48+8(%rsp),%rdx
3472 movq %rax,16+8(%rsp)
3488 subq 16+8(%rsp),%rsi
3491 .byte 102,72,15,126,217
3494 .byte 102,72,15,126,213
3503 movq 32+8(%rsp),%rbx
3504 movq 64(%rdi,%rcx,1),%rdx
3516 leaq 64(%rdi,%rcx,1),%rdi
3518 jb .Lsqrx8x_reduction_loop
3521 .size bn_sqrx8x_internal,.-bn_sqrx8x_internal
3523 __bn_postx4x_internal:
3531 .byte 102,72,15,126,202
3532 .byte 102,72,15,126,206
3538 jmp .Lsqrx4x_sub_entry
3547 andnq %rax,%r12,%r12
3549 andnq %rax,%r13,%r13
3550 andnq %rax,%r14,%r14
3551 andnq %rax,%r15,%r15
3573 .size __bn_postx4x_internal,.-__bn_postx4x_internal
3575 .type bn_get_bits5,@function
3588 movzwl (%r10,%rsi,2),%eax
3593 .size bn_get_bits5,.-bn_get_bits5
3596 .type bn_scatter5,@function
3601 jz .Lscatter_epilogue
3602 leaq (%rdx,%rcx,8),%rdx
3613 .size bn_scatter5,.-bn_scatter5
3616 .type bn_gather5,@function
3619 .LSEH_begin_bn_gather5:
3622 .byte 0x4c,0x8d,0x14,0x24
3623 .byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00
3624 leaq .Linc(%rip),%rax
3628 movdqa 0(%rax),%xmm0
3629 movdqa 16(%rax),%xmm1
3633 pshufd $0,%xmm5,%xmm5
3642 movdqa %xmm0,-128(%rax)
3647 movdqa %xmm1,-112(%rax)
3652 movdqa %xmm2,-96(%rax)
3656 movdqa %xmm3,-80(%rax)
3661 movdqa %xmm0,-64(%rax)
3666 movdqa %xmm1,-48(%rax)
3671 movdqa %xmm2,-32(%rax)
3675 movdqa %xmm3,-16(%rax)
3680 movdqa %xmm0,0(%rax)
3685 movdqa %xmm1,16(%rax)
3690 movdqa %xmm2,32(%rax)
3694 movdqa %xmm3,48(%rax)
3699 movdqa %xmm0,64(%rax)
3704 movdqa %xmm1,80(%rax)
3709 movdqa %xmm2,96(%rax)
3711 movdqa %xmm3,112(%rax)
3718 movdqa -128(%r11),%xmm0
3719 movdqa -112(%r11),%xmm1
3720 movdqa -96(%r11),%xmm2
3721 pand -128(%rax),%xmm0
3722 movdqa -80(%r11),%xmm3
3723 pand -112(%rax),%xmm1
3725 pand -96(%rax),%xmm2
3727 pand -80(%rax),%xmm3
3730 movdqa -64(%r11),%xmm0
3731 movdqa -48(%r11),%xmm1
3732 movdqa -32(%r11),%xmm2
3733 pand -64(%rax),%xmm0
3734 movdqa -16(%r11),%xmm3
3735 pand -48(%rax),%xmm1
3737 pand -32(%rax),%xmm2
3739 pand -16(%rax),%xmm3
3742 movdqa 0(%r11),%xmm0
3743 movdqa 16(%r11),%xmm1
3744 movdqa 32(%r11),%xmm2
3746 movdqa 48(%r11),%xmm3
3754 movdqa 64(%r11),%xmm0
3755 movdqa 80(%r11),%xmm1
3756 movdqa 96(%r11),%xmm2
3758 movdqa 112(%r11),%xmm3
3763 pand 112(%rax),%xmm3
3768 pshufd $0x4e,%xmm4,%xmm0
3777 .LSEH_end_bn_gather5:
3779 .size bn_gather5,.-bn_gather5
3784 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0