2 /* Do not modify. This file is auto-generated from x86_64-mont5.pl. */
7 .globl bn_mul_mont_gather5
8 .type bn_mul_mont_gather5,@function
14 .cfi_def_cfa_register %rax
17 movl OPENSSL_ia32cap_P+8(%rip),%r11d
38 leaq -280(%rsp,%r9,8),%r10
52 leaq (%r10,%r11,1),%rsp
56 jmp .Lmul_page_walk_done
66 movq %rax,8(%rsp,%r9,8)
67 .cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08
73 leaq 24-112(%rsp,%r9,8),%r10
85 movdqa %xmm0,112(%r10)
90 movdqa %xmm1,128(%r10)
95 movdqa %xmm2,144(%r10)
100 movdqa %xmm3,160(%r10)
104 movdqa %xmm0,176(%r10)
109 movdqa %xmm1,192(%r10)
114 movdqa %xmm2,208(%r10)
119 movdqa %xmm3,224(%r10)
123 movdqa %xmm0,240(%r10)
128 movdqa %xmm1,256(%r10)
133 movdqa %xmm2,272(%r10)
138 movdqa %xmm3,288(%r10)
142 movdqa %xmm0,304(%r10)
147 movdqa %xmm1,320(%r10)
150 movdqa %xmm2,336(%r10)
155 movdqa %xmm3,352(%r10)
159 movdqa -128(%r12),%xmm4
160 movdqa -112(%r12),%xmm5
161 movdqa -96(%r12),%xmm2
163 movdqa -80(%r12),%xmm3
171 movdqa -64(%r12),%xmm4
172 movdqa -48(%r12),%xmm5
173 movdqa -32(%r12),%xmm2
175 movdqa -16(%r12),%xmm3
184 movdqa 16(%r12),%xmm5
185 movdqa 32(%r12),%xmm2
187 movdqa 48(%r12),%xmm3
196 pshufd $0x4e,%xmm0,%xmm1
199 .byte 102,72,15,126,195
227 movq (%rsi,%r15,8),%rax
232 movq %r13,-16(%rsp,%r15,8)
238 movq (%rcx,%r15,8),%rax
252 movq %r13,-16(%rsp,%r9,8)
259 movq %r13,-8(%rsp,%r9,8)
260 movq %rdx,(%rsp,%r9,8)
266 leaq 24+128(%rsp,%r9,8),%rdx
270 movdqa -128(%r12),%xmm0
271 movdqa -112(%r12),%xmm1
272 movdqa -96(%r12),%xmm2
273 movdqa -80(%r12),%xmm3
274 pand -128(%rdx),%xmm0
275 pand -112(%rdx),%xmm1
282 movdqa -64(%r12),%xmm0
283 movdqa -48(%r12),%xmm1
284 movdqa -32(%r12),%xmm2
285 movdqa -16(%r12),%xmm3
295 movdqa 16(%r12),%xmm1
296 movdqa 32(%r12),%xmm2
297 movdqa 48(%r12),%xmm3
306 movdqa 64(%r12),%xmm0
307 movdqa 80(%r12),%xmm1
308 movdqa 96(%r12),%xmm2
309 movdqa 112(%r12),%xmm3
319 pshufd $0x4e,%xmm4,%xmm0
324 .byte 102,72,15,126,195
351 movq (%rsi,%r15,8),%rax
354 movq (%rsp,%r15,8),%r10
356 movq %r13,-16(%rsp,%r15,8)
362 movq (%rcx,%r15,8),%rax
376 movq (%rsp,%r9,8),%r10
378 movq %r13,-16(%rsp,%r9,8)
386 movq %r13,-8(%rsp,%r9,8)
387 movq %rdx,(%rsp,%r9,8)
399 .Lsub: sbbq (%rcx,%r14,8),%rax
400 movq %rax,(%rdi,%r14,8)
401 movq 8(%rsi,%r14,8),%rax
413 movq (%rdi,%r14,8),%rcx
414 movq (%rsp,%r14,8),%rdx
417 movq %r14,(%rsp,%r14,8)
419 movq %rdx,(%rdi,%r14,8)
424 movq 8(%rsp,%r9,8),%rsi
441 .cfi_def_cfa_register %rsp
445 .size bn_mul_mont_gather5,.-bn_mul_mont_gather5
446 .type bn_mul4x_mont_gather5,@function
448 bn_mul4x_mont_gather5:
452 .cfi_def_cfa_register %rax
473 leaq (%r9,%r9,2),%r10
485 leaq -320(%rsp,%r9,2),%r11
492 leaq -320(%rbp,%r9,2),%rbp
497 leaq 4096-320(,%r9,2),%r10
498 leaq -320(%rbp,%r9,2),%rbp
508 leaq (%r11,%rbp,1),%rsp
512 jmp .Lmul4x_page_walk_done
515 leaq -4096(%rsp),%rsp
519 .Lmul4x_page_walk_done:
524 .cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08
546 .cfi_def_cfa_register %rsp
550 .size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5
552 .type mul4x_internal,@function
557 leaq .Linc(%rip),%rax
558 leaq 128(%rdx,%r9,1),%r13
561 movdqa 16(%rax),%xmm1
562 leaq 88-112(%rsp,%r9,1),%r10
565 pshufd $0,%xmm5,%xmm5
575 movdqa %xmm0,112(%r10)
580 movdqa %xmm1,128(%r10)
585 movdqa %xmm2,144(%r10)
590 movdqa %xmm3,160(%r10)
594 movdqa %xmm0,176(%r10)
599 movdqa %xmm1,192(%r10)
604 movdqa %xmm2,208(%r10)
609 movdqa %xmm3,224(%r10)
613 movdqa %xmm0,240(%r10)
618 movdqa %xmm1,256(%r10)
623 movdqa %xmm2,272(%r10)
628 movdqa %xmm3,288(%r10)
632 movdqa %xmm0,304(%r10)
637 movdqa %xmm1,320(%r10)
640 movdqa %xmm2,336(%r10)
645 movdqa %xmm3,352(%r10)
649 movdqa -128(%r12),%xmm4
650 movdqa -112(%r12),%xmm5
651 movdqa -96(%r12),%xmm2
653 movdqa -80(%r12),%xmm3
661 movdqa -64(%r12),%xmm4
662 movdqa -48(%r12),%xmm5
663 movdqa -32(%r12),%xmm2
665 movdqa -16(%r12),%xmm3
674 movdqa 16(%r12),%xmm5
675 movdqa 32(%r12),%xmm2
677 movdqa 48(%r12),%xmm3
686 pshufd $0x4e,%xmm0,%xmm1
689 .byte 102,72,15,126,195
696 leaq (%rsi,%r9,1),%rsi
710 movq 8(%rsi,%r9,1),%rax
722 movq 16(%rsi,%r9,1),%rax
743 movq -8(%rsi,%r15,1),%rax
758 movq (%rsi,%r15,1),%rax
773 movq 8(%rsi,%r15,1),%rax
788 movq 16(%rsi,%r15,1),%rax
823 movq (%rsi,%r9,1),%rax
830 leaq (%rcx,%r9,1),%rcx
841 leaq 16+128(%r14),%rdx
844 movdqa -128(%r12),%xmm0
845 movdqa -112(%r12),%xmm1
846 movdqa -96(%r12),%xmm2
847 movdqa -80(%r12),%xmm3
848 pand -128(%rdx),%xmm0
849 pand -112(%rdx),%xmm1
856 movdqa -64(%r12),%xmm0
857 movdqa -48(%r12),%xmm1
858 movdqa -32(%r12),%xmm2
859 movdqa -16(%r12),%xmm3
869 movdqa 16(%r12),%xmm1
870 movdqa 32(%r12),%xmm2
871 movdqa 48(%r12),%xmm3
880 movdqa 64(%r12),%xmm0
881 movdqa 80(%r12),%xmm1
882 movdqa 96(%r12),%xmm2
883 movdqa 112(%r12),%xmm3
893 pshufd $0x4e,%xmm4,%xmm0
896 .byte 102,72,15,126,195
898 movq (%r14,%r9,1),%r10
909 leaq (%r14,%r9,1),%r14
913 movq 8(%rsi,%r9,1),%rax
927 movq 16(%rsi,%r9,1),%rax
949 movq -8(%rsi,%r15,1),%rax
966 movq (%rsi,%r15,1),%rax
983 movq 8(%rsi,%r15,1),%rax
1000 movq 16(%rsi,%r15,1),%rax
1040 movq (%rsi,%r9,1),%rax
1048 leaq (%rcx,%r9,1),%rcx
1057 cmpq 16+8(%rsp),%r12
1064 leaq (%r14,%r9,1),%rbx
1069 movq 56+8(%rsp),%rdi
1075 jmp .Lsqr4x_sub_entry
1076 .size mul4x_internal,.-mul4x_internal
1078 .type bn_power5,@function
1083 .cfi_def_cfa_register %rax
1084 movl OPENSSL_ia32cap_P+8(%rip),%r11d
1089 .cfi_offset %rbx,-16
1091 .cfi_offset %rbp,-24
1093 .cfi_offset %r12,-32
1095 .cfi_offset %r13,-40
1097 .cfi_offset %r14,-48
1099 .cfi_offset %r15,-56
1103 leal (%r9,%r9,2),%r10d
1114 leaq -320(%rsp,%r9,2),%r11
1121 leaq -320(%rbp,%r9,2),%rbp
1126 leaq 4096-320(,%r9,2),%r10
1127 leaq -320(%rbp,%r9,2),%rbp
1137 leaq (%r11,%rbp,1),%rsp
1141 jmp .Lpwr_page_walk_done
1144 leaq -4096(%rsp),%rsp
1148 .Lpwr_page_walk_done:
1164 .cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08
1166 .byte 102,72,15,110,207
1167 .byte 102,72,15,110,209
1168 .byte 102,73,15,110,218
1169 .byte 102,72,15,110,226
1171 call __bn_sqr8x_internal
1172 call __bn_post4x_internal
1173 call __bn_sqr8x_internal
1174 call __bn_post4x_internal
1175 call __bn_sqr8x_internal
1176 call __bn_post4x_internal
1177 call __bn_sqr8x_internal
1178 call __bn_post4x_internal
1179 call __bn_sqr8x_internal
1180 call __bn_post4x_internal
1182 .byte 102,72,15,126,209
1183 .byte 102,72,15,126,226
1206 .cfi_def_cfa_register %rsp
1210 .size bn_power5,.-bn_power5
1212 .globl bn_sqr8x_internal
1213 .hidden bn_sqr8x_internal
1214 .type bn_sqr8x_internal,@function
1217 __bn_sqr8x_internal:
1292 leaq (%rsi,%r9,1),%rsi
1297 movq -32(%rsi,%rbp,1),%r14
1298 leaq 48+8(%rsp,%r9,2),%rdi
1299 movq -24(%rsi,%rbp,1),%rax
1300 leaq -32(%rdi,%rbp,1),%rdi
1301 movq -16(%rsi,%rbp,1),%rbx
1308 movq %r10,-24(%rdi,%rbp,1)
1314 movq %r11,-16(%rdi,%rbp,1)
1318 movq -8(%rsi,%rbp,1),%rbx
1332 movq %r10,-8(%rdi,%rcx,1)
1337 movq (%rsi,%rcx,1),%rbx
1347 movq 8(%rsi,%rcx,1),%rbx
1357 movq %r11,(%rdi,%rcx,1)
1364 movq 16(%rsi,%rcx,1),%rbx
1373 movq %r10,8(%rdi,%rcx,1)
1380 movq 24(%rsi,%rcx,1),%rbx
1390 movq %r11,16(%rdi,%rcx,1)
1402 movq %r10,-8(%rdi,%rcx,1)
1421 movq -32(%rsi,%rbp,1),%r14
1422 leaq 48+8(%rsp,%r9,2),%rdi
1423 movq -24(%rsi,%rbp,1),%rax
1424 leaq -32(%rdi,%rbp,1),%rdi
1425 movq -16(%rsi,%rbp,1),%rbx
1429 movq -24(%rdi,%rbp,1),%r10
1433 movq %r10,-24(%rdi,%rbp,1)
1440 addq -16(%rdi,%rbp,1),%r11
1443 movq %r11,-16(%rdi,%rbp,1)
1447 movq -8(%rsi,%rbp,1),%rbx
1452 addq -8(%rdi,%rbp,1),%r12
1463 movq %r10,-8(%rdi,%rbp,1)
1470 movq (%rsi,%rcx,1),%rbx
1476 addq (%rdi,%rcx,1),%r13
1483 movq 8(%rsi,%rcx,1),%rbx
1491 movq %r11,(%rdi,%rcx,1)
1495 addq 8(%rdi,%rcx,1),%r12
1506 movq %r10,-8(%rdi,%rcx,1)
1527 leaq 48+8(%rsp,%r9,2),%rdi
1529 leaq -32(%rdi,%rbp,1),%rdi
1588 movq -16(%rsi,%rbp,1),%rax
1589 leaq 48+8(%rsp),%rdi
1593 leaq (%r14,%r10,2),%r12
1595 leaq (%rcx,%r11,2),%r13
1604 movq -8(%rsi,%rbp,1),%rax
1608 leaq (%r14,%r10,2),%rbx
1612 leaq (%rcx,%r11,2),%r8
1621 movq 0(%rsi,%rbp,1),%rax
1628 jmp .Lsqr4x_shift_n_add
1631 .Lsqr4x_shift_n_add:
1632 leaq (%r14,%r10,2),%r12
1634 leaq (%rcx,%r11,2),%r13
1643 movq -8(%rsi,%rbp,1),%rax
1647 leaq (%r14,%r10,2),%rbx
1651 leaq (%rcx,%r11,2),%r8
1660 movq 0(%rsi,%rbp,1),%rax
1664 leaq (%r14,%r10,2),%r12
1668 leaq (%rcx,%r11,2),%r13
1677 movq 8(%rsi,%rbp,1),%rax
1681 leaq (%r14,%r10,2),%rbx
1685 leaq (%rcx,%r11,2),%r8
1694 movq 16(%rsi,%rbp,1),%rax
1701 jnz .Lsqr4x_shift_n_add
1703 leaq (%r14,%r10,2),%r12
1706 leaq (%rcx,%r11,2),%r13
1719 leaq (%r14,%r10,2),%rbx
1723 leaq (%rcx,%r11,2),%r8
1732 .byte 102,72,15,126,213
1733 __bn_sqr8x_reduction:
1735 leaq (%r9,%rbp,1),%rcx
1736 leaq 48+8(%rsp,%r9,2),%rdx
1738 leaq 48+8(%rsp,%r9,1),%rdi
1741 jmp .L8x_reduction_loop
1744 .L8x_reduction_loop:
1745 leaq (%rdi,%r9,1),%rdi
1760 imulq 32+8(%rsp),%rbx
1778 movq %rbx,48-8+8(%rsp,%rcx,8)
1787 movq 32+8(%rsp),%rsi
1853 movq 48+56+8(%rsp),%rbx
1917 movq 48-16+8(%rsp,%rcx,8),%rbx
1933 movq 48+56+8(%rsp),%rbx
1976 .byte 102,72,15,126,213
1980 .byte 102,73,15,126,217
1990 jb .L8x_reduction_loop
1992 .size bn_sqr8x_internal,.-bn_sqr8x_internal
1993 .type __bn_post4x_internal,@function
1995 __bn_post4x_internal:
1997 leaq (%rdi,%r9,1),%rbx
1999 .byte 102,72,15,126,207
2001 .byte 102,72,15,126,206
2008 jmp .Lsqr4x_sub_entry
2046 .size __bn_post4x_internal,.-__bn_post4x_internal
2047 .globl bn_from_montgomery
2048 .type bn_from_montgomery,@function
2055 .size bn_from_montgomery,.-bn_from_montgomery
2057 .type bn_from_mont8x,@function
2063 .cfi_def_cfa_register %rax
2065 .cfi_offset %rbx,-16
2067 .cfi_offset %rbp,-24
2069 .cfi_offset %r12,-32
2071 .cfi_offset %r13,-40
2073 .cfi_offset %r14,-48
2075 .cfi_offset %r15,-56
2079 leaq (%r9,%r9,2),%r10
2090 leaq -320(%rsp,%r9,2),%r11
2097 leaq -320(%rbp,%r9,2),%rbp
2102 leaq 4096-320(,%r9,2),%r10
2103 leaq -320(%rbp,%r9,2),%rbp
2113 leaq (%r11,%rbp,1),%rsp
2117 jmp .Lfrom_page_walk_done
2120 leaq -4096(%rsp),%rsp
2124 .Lfrom_page_walk_done:
2140 .cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08
2150 movdqu 16(%rsi),%xmm2
2151 movdqu 32(%rsi),%xmm3
2152 movdqa %xmm0,(%rax,%r9,1)
2153 movdqu 48(%rsi),%xmm4
2154 movdqa %xmm0,16(%rax,%r9,1)
2155 .byte 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00
2157 movdqa %xmm0,32(%rax,%r9,1)
2158 movdqa %xmm2,16(%rax)
2159 movdqa %xmm0,48(%rax,%r9,1)
2160 movdqa %xmm3,32(%rax)
2161 movdqa %xmm4,48(%rax)
2166 .byte 102,72,15,110,207
2167 .byte 102,72,15,110,209
2170 .byte 102,73,15,110,218
2171 movl OPENSSL_ia32cap_P+8(%rip),%r11d
2176 leaq (%rax,%r9,1),%rdi
2177 call __bn_sqrx8x_reduction
2178 call __bn_postx4x_internal
2182 jmp .Lfrom_mont_zero
2186 call __bn_sqr8x_reduction
2187 call __bn_post4x_internal
2191 jmp .Lfrom_mont_zero
2197 movdqa %xmm0,0(%rax)
2198 movdqa %xmm0,16(%rax)
2199 movdqa %xmm0,32(%rax)
2200 movdqa %xmm0,48(%rax)
2203 jnz .Lfrom_mont_zero
2219 .cfi_def_cfa_register %rsp
2223 .size bn_from_mont8x,.-bn_from_mont8x
2224 .type bn_mulx4x_mont_gather5,@function
2226 bn_mulx4x_mont_gather5:
2229 .cfi_def_cfa_register %rax
2232 .cfi_offset %rbx,-16
2234 .cfi_offset %rbp,-24
2236 .cfi_offset %r12,-32
2238 .cfi_offset %r13,-40
2240 .cfi_offset %r14,-48
2242 .cfi_offset %r15,-56
2246 leaq (%r9,%r9,2),%r10
2259 leaq -320(%rsp,%r9,2),%r11
2266 leaq -320(%rbp,%r9,2),%rbp
2270 leaq 4096-320(,%r9,2),%r10
2271 leaq -320(%rbp,%r9,2),%rbp
2281 leaq (%r11,%rbp,1),%rsp
2284 ja .Lmulx4x_page_walk
2285 jmp .Lmulx4x_page_walk_done
2288 leaq -4096(%rsp),%rsp
2291 ja .Lmulx4x_page_walk
2292 .Lmulx4x_page_walk_done:
2308 .cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08
2310 call mulx4x_internal
2329 .cfi_def_cfa_register %rsp
2333 .size bn_mulx4x_mont_gather5,.-bn_mulx4x_mont_gather5
2335 .type mulx4x_internal,@function
2343 leaq 128(%rdx,%r9,1),%r13
2347 leaq .Linc(%rip),%rax
2348 movq %r13,16+8(%rsp)
2350 movq %rdi,56+8(%rsp)
2351 movdqa 0(%rax),%xmm0
2352 movdqa 16(%rax),%xmm1
2353 leaq 88-112(%rsp,%r10,1),%r10
2356 pshufd $0,%xmm5,%xmm5
2366 movdqa %xmm0,112(%r10)
2371 movdqa %xmm1,128(%r10)
2376 movdqa %xmm2,144(%r10)
2381 movdqa %xmm3,160(%r10)
2385 movdqa %xmm0,176(%r10)
2390 movdqa %xmm1,192(%r10)
2395 movdqa %xmm2,208(%r10)
2400 movdqa %xmm3,224(%r10)
2404 movdqa %xmm0,240(%r10)
2409 movdqa %xmm1,256(%r10)
2414 movdqa %xmm2,272(%r10)
2419 movdqa %xmm3,288(%r10)
2424 movdqa %xmm0,304(%r10)
2428 movdqa %xmm1,320(%r10)
2431 movdqa %xmm2,336(%r10)
2436 movdqa %xmm3,352(%r10)
2437 pand 112(%rdi),%xmm3
2440 movdqa -128(%rdi),%xmm4
2441 movdqa -112(%rdi),%xmm5
2442 movdqa -96(%rdi),%xmm2
2443 pand 112(%r10),%xmm4
2444 movdqa -80(%rdi),%xmm3
2445 pand 128(%r10),%xmm5
2447 pand 144(%r10),%xmm2
2449 pand 160(%r10),%xmm3
2452 movdqa -64(%rdi),%xmm4
2453 movdqa -48(%rdi),%xmm5
2454 movdqa -32(%rdi),%xmm2
2455 pand 176(%r10),%xmm4
2456 movdqa -16(%rdi),%xmm3
2457 pand 192(%r10),%xmm5
2459 pand 208(%r10),%xmm2
2461 pand 224(%r10),%xmm3
2464 movdqa 0(%rdi),%xmm4
2465 movdqa 16(%rdi),%xmm5
2466 movdqa 32(%rdi),%xmm2
2467 pand 240(%r10),%xmm4
2468 movdqa 48(%rdi),%xmm3
2469 pand 256(%r10),%xmm5
2471 pand 272(%r10),%xmm2
2473 pand 288(%r10),%xmm3
2477 pshufd $0x4e,%xmm0,%xmm1
2480 .byte 102,72,15,126,194
2481 leaq 64+32+8(%rsp),%rbx
2484 mulxq 0(%rsi),%r8,%rax
2485 mulxq 8(%rsi),%r11,%r12
2487 mulxq 16(%rsi),%rax,%r13
2490 mulxq 24(%rsi),%rax,%r14
2493 imulq 32+8(%rsp),%r8
2503 mulxq 0(%rcx),%rax,%r10
2506 mulxq 8(%rcx),%rax,%r11
2509 mulxq 16(%rcx),%rax,%r12
2510 movq 24+8(%rsp),%rdi
2514 mulxq 24(%rcx),%rax,%r15
2526 mulxq 0(%rsi),%r10,%rax
2528 mulxq 8(%rsi),%r11,%r14
2530 mulxq 16(%rsi),%r12,%rax
2532 mulxq 24(%rsi),%r13,%r14
2541 mulxq 0(%rcx),%rax,%r15
2544 mulxq 8(%rcx),%rax,%r15
2547 mulxq 16(%rcx),%rax,%r15
2552 mulxq 24(%rcx),%rax,%r15
2565 leaq (%rsi,%rax,1),%rsi
2574 leaq 16-256(%rbx),%r10
2578 movdqa -128(%rdi),%xmm0
2579 movdqa -112(%rdi),%xmm1
2580 movdqa -96(%rdi),%xmm2
2581 pand 256(%r10),%xmm0
2582 movdqa -80(%rdi),%xmm3
2583 pand 272(%r10),%xmm1
2585 pand 288(%r10),%xmm2
2587 pand 304(%r10),%xmm3
2590 movdqa -64(%rdi),%xmm0
2591 movdqa -48(%rdi),%xmm1
2592 movdqa -32(%rdi),%xmm2
2593 pand 320(%r10),%xmm0
2594 movdqa -16(%rdi),%xmm3
2595 pand 336(%r10),%xmm1
2597 pand 352(%r10),%xmm2
2599 pand 368(%r10),%xmm3
2602 movdqa 0(%rdi),%xmm0
2603 movdqa 16(%rdi),%xmm1
2604 movdqa 32(%rdi),%xmm2
2605 pand 384(%r10),%xmm0
2606 movdqa 48(%rdi),%xmm3
2607 pand 400(%r10),%xmm1
2609 pand 416(%r10),%xmm2
2611 pand 432(%r10),%xmm3
2614 movdqa 64(%rdi),%xmm0
2615 movdqa 80(%rdi),%xmm1
2616 movdqa 96(%rdi),%xmm2
2617 pand 448(%r10),%xmm0
2618 movdqa 112(%rdi),%xmm3
2619 pand 464(%r10),%xmm1
2621 pand 480(%r10),%xmm2
2623 pand 496(%r10),%xmm3
2627 pshufd $0x4e,%xmm4,%xmm0
2630 .byte 102,72,15,126,194
2633 leaq 32(%rbx,%rax,1),%rbx
2634 mulxq 0(%rsi),%r8,%r11
2637 mulxq 8(%rsi),%r14,%r12
2640 mulxq 16(%rsi),%r15,%r13
2641 adoxq -24(%rbx),%r11
2643 mulxq 24(%rsi),%rdx,%r14
2644 adoxq -16(%rbx),%r12
2646 leaq (%rcx,%rax,1),%rcx
2653 imulq 32+8(%rsp),%r8
2659 mulxq 0(%rcx),%rax,%r10
2662 mulxq 8(%rcx),%rax,%r11
2665 mulxq 16(%rcx),%rax,%r12
2668 mulxq 24(%rcx),%rax,%r15
2670 movq 24+8(%rsp),%rdi
2681 mulxq 0(%rsi),%r10,%rax
2684 mulxq 8(%rsi),%r11,%r14
2687 mulxq 16(%rsi),%r12,%rax
2690 mulxq 24(%rsi),%r13,%r14
2701 mulxq 0(%rcx),%rax,%r15
2704 mulxq 8(%rcx),%rax,%r15
2707 mulxq 16(%rcx),%rax,%r15
2712 mulxq 24(%rcx),%rax,%r15
2727 movq 16+8(%rsp),%r10
2729 leaq (%rsi,%rax,1),%rsi
2738 movq (%rcx,%rax,1),%r12
2739 leaq (%rcx,%rax,1),%rbp
2741 leaq (%rbx,%rax,1),%rdi
2749 movq 56+8(%rsp),%rdx
2755 jmp .Lsqrx4x_sub_entry
2756 .size mulx4x_internal,.-mulx4x_internal
2757 .type bn_powerx5,@function
2762 .cfi_def_cfa_register %rax
2765 .cfi_offset %rbx,-16
2767 .cfi_offset %rbp,-24
2769 .cfi_offset %r12,-32
2771 .cfi_offset %r13,-40
2773 .cfi_offset %r14,-48
2775 .cfi_offset %r15,-56
2779 leaq (%r9,%r9,2),%r10
2790 leaq -320(%rsp,%r9,2),%r11
2797 leaq -320(%rbp,%r9,2),%rbp
2802 leaq 4096-320(,%r9,2),%r10
2803 leaq -320(%rbp,%r9,2),%rbp
2813 leaq (%r11,%rbp,1),%rsp
2817 jmp .Lpwrx_page_walk_done
2820 leaq -4096(%rsp),%rsp
2824 .Lpwrx_page_walk_done:
2841 .byte 102,72,15,110,207
2842 .byte 102,72,15,110,209
2843 .byte 102,73,15,110,218
2844 .byte 102,72,15,110,226
2847 .cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08
2850 call __bn_sqrx8x_internal
2851 call __bn_postx4x_internal
2852 call __bn_sqrx8x_internal
2853 call __bn_postx4x_internal
2854 call __bn_sqrx8x_internal
2855 call __bn_postx4x_internal
2856 call __bn_sqrx8x_internal
2857 call __bn_postx4x_internal
2858 call __bn_sqrx8x_internal
2859 call __bn_postx4x_internal
2863 .byte 102,72,15,126,209
2864 .byte 102,72,15,126,226
2867 call mulx4x_internal
2886 .cfi_def_cfa_register %rsp
2890 .size bn_powerx5,.-bn_powerx5
2892 .globl bn_sqrx8x_internal
2893 .hidden bn_sqrx8x_internal
2894 .type bn_sqrx8x_internal,@function
2897 __bn_sqrx8x_internal:
2938 leaq 48+8(%rsp),%rdi
2939 leaq (%rsi,%r9,1),%rbp
2942 jmp .Lsqr8x_zero_start
2945 .byte 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00
2948 movdqa %xmm0,0(%rdi)
2949 movdqa %xmm0,16(%rdi)
2950 movdqa %xmm0,32(%rdi)
2951 movdqa %xmm0,48(%rdi)
2953 movdqa %xmm0,64(%rdi)
2954 movdqa %xmm0,80(%rdi)
2955 movdqa %xmm0,96(%rdi)
2956 movdqa %xmm0,112(%rdi)
2969 leaq 48+8(%rsp),%rdi
2971 jmp .Lsqrx8x_outer_loop
2974 .Lsqrx8x_outer_loop:
2975 mulxq 8(%rsi),%r8,%rax
2978 mulxq 16(%rsi),%r9,%rax
2981 .byte 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00
2984 .byte 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00
2987 mulxq 40(%rsi),%r12,%rax
2990 mulxq 48(%rsi),%r13,%rax
2993 mulxq 56(%rsi),%r14,%r15
3004 mulxq 16(%rsi),%r8,%rbx
3005 mulxq 24(%rsi),%r9,%rax
3008 mulxq 32(%rsi),%r10,%rbx
3011 .byte 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00
3014 .byte 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00
3017 .byte 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00
3028 mulxq 24(%rsi),%r8,%rbx
3029 mulxq 32(%rsi),%r9,%rax
3032 mulxq 40(%rsi),%r10,%rbx
3035 .byte 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00
3038 .byte 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00
3046 mulxq 32(%rsi),%r8,%rax
3050 mulxq 40(%rsi),%r9,%rbx
3053 mulxq 48(%rsi),%r10,%rax
3056 mulxq 56(%rsi),%r11,%r12
3072 mulxq %r15,%r10,%rbx
3082 mulxq %r15,%r14,%rbx
3097 je .Lsqrx8x_outer_break
3115 movq %rax,16+8(%rsp)
3116 movq %rdi,24+8(%rsp)
3125 mulxq 0(%rbp),%rax,%r8
3129 mulxq 8(%rbp),%rax,%r9
3133 mulxq 16(%rbp),%rax,%r10
3137 mulxq 24(%rbp),%rax,%r11
3141 .byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00
3145 mulxq 40(%rbp),%rax,%r13
3149 mulxq 48(%rbp),%rax,%r14
3150 movq %rbx,(%rdi,%rcx,8)
3155 .byte 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00
3156 movq 8(%rsi,%rcx,8),%rdx
3170 subq 16+8(%rsp),%rbx
3185 movq %rax,16+8(%rsp)
3191 subq 16+8(%rsp),%rbx
3193 movq 24+8(%rsp),%rcx
3204 je .Lsqrx8x_outer_loop
3221 jmp .Lsqrx8x_outer_loop
3224 .Lsqrx8x_outer_break:
3226 .byte 102,72,15,126,217
3232 leaq 48+8(%rsp),%rdi
3233 movq (%rsi,%rcx,1),%rdx
3244 .Lsqrx4x_shift_n_add:
3245 mulxq %rdx,%rax,%rbx
3248 .byte 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00
3249 .byte 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00
3256 mulxq %rdx,%rax,%rbx
3259 movq 16(%rsi,%rcx,1),%rdx
3267 mulxq %rdx,%rax,%rbx
3270 movq 24(%rsi,%rcx,1),%rdx
3279 mulxq %rdx,%rax,%rbx
3282 jrcxz .Lsqrx4x_shift_n_add_break
3283 .byte 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00
3292 jmp .Lsqrx4x_shift_n_add
3295 .Lsqrx4x_shift_n_add_break:
3300 .byte 102,72,15,126,213
3301 __bn_sqrx8x_reduction:
3303 movq 32+8(%rsp),%rbx
3304 movq 48+8(%rsp),%rdx
3305 leaq -64(%rbp,%r9,1),%rcx
3310 leaq 48+8(%rsp),%rdi
3311 jmp .Lsqrx8x_reduction_loop
3314 .Lsqrx8x_reduction_loop:
3324 movq %rax,24+8(%rsp)
3334 mulxq 0(%rbp),%rax,%r8
3338 mulxq 8(%rbp),%rbx,%r9
3342 mulxq 16(%rbp),%rbx,%r10
3346 mulxq 24(%rbp),%rbx,%r11
3350 .byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00
3356 mulxq 32+8(%rsp),%rbx,%rdx
3358 movq %rax,64+48+8(%rsp,%rcx,8)
3360 mulxq 40(%rbp),%rax,%r13
3364 mulxq 48(%rbp),%rax,%r14
3368 mulxq 56(%rbp),%rax,%r15
3374 .byte 0x67,0x67,0x67
3380 jae .Lsqrx8x_no_tail
3382 movq 48+8(%rsp),%rdx
3397 movq %rax,16+8(%rsp)
3403 mulxq 0(%rbp),%rax,%r8
3407 mulxq 8(%rbp),%rax,%r9
3411 mulxq 16(%rbp),%rax,%r10
3415 mulxq 24(%rbp),%rax,%r11
3419 .byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00
3423 mulxq 40(%rbp),%rax,%r13
3427 mulxq 48(%rbp),%rax,%r14
3431 mulxq 56(%rbp),%rax,%r15
3432 movq 72+48+8(%rsp,%rcx,8),%rdx
3435 movq %rbx,(%rdi,%rcx,8)
3443 jae .Lsqrx8x_tail_done
3445 subq 16+8(%rsp),%rsi
3446 movq 48+8(%rsp),%rdx
3461 movq %rax,16+8(%rsp)
3477 subq 16+8(%rsp),%rsi
3480 .byte 102,72,15,126,217
3483 .byte 102,72,15,126,213
3492 movq 32+8(%rsp),%rbx
3493 movq 64(%rdi,%rcx,1),%rdx
3505 leaq 64(%rdi,%rcx,1),%rdi
3507 jb .Lsqrx8x_reduction_loop
3509 .size bn_sqrx8x_internal,.-bn_sqrx8x_internal
3511 __bn_postx4x_internal:
3518 .byte 102,72,15,126,202
3519 .byte 102,72,15,126,206
3525 jmp .Lsqrx4x_sub_entry
3534 andnq %rax,%r12,%r12
3536 andnq %rax,%r13,%r13
3537 andnq %rax,%r14,%r14
3538 andnq %rax,%r15,%r15
3559 .size __bn_postx4x_internal,.-__bn_postx4x_internal
3561 .type bn_get_bits5,@function
3573 movzwl (%r10,%rsi,2),%eax
3577 .size bn_get_bits5,.-bn_get_bits5
3580 .type bn_scatter5,@function
3584 jz .Lscatter_epilogue
3585 leaq (%rdx,%rcx,8),%rdx
3595 .size bn_scatter5,.-bn_scatter5
3598 .type bn_gather5,@function
3601 .LSEH_begin_bn_gather5:
3603 .byte 0x4c,0x8d,0x14,0x24
3604 .byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00
3605 leaq .Linc(%rip),%rax
3609 movdqa 0(%rax),%xmm0
3610 movdqa 16(%rax),%xmm1
3614 pshufd $0,%xmm5,%xmm5
3623 movdqa %xmm0,-128(%rax)
3628 movdqa %xmm1,-112(%rax)
3633 movdqa %xmm2,-96(%rax)
3637 movdqa %xmm3,-80(%rax)
3642 movdqa %xmm0,-64(%rax)
3647 movdqa %xmm1,-48(%rax)
3652 movdqa %xmm2,-32(%rax)
3656 movdqa %xmm3,-16(%rax)
3661 movdqa %xmm0,0(%rax)
3666 movdqa %xmm1,16(%rax)
3671 movdqa %xmm2,32(%rax)
3675 movdqa %xmm3,48(%rax)
3680 movdqa %xmm0,64(%rax)
3685 movdqa %xmm1,80(%rax)
3690 movdqa %xmm2,96(%rax)
3692 movdqa %xmm3,112(%rax)
3699 movdqa -128(%r11),%xmm0
3700 movdqa -112(%r11),%xmm1
3701 movdqa -96(%r11),%xmm2
3702 pand -128(%rax),%xmm0
3703 movdqa -80(%r11),%xmm3
3704 pand -112(%rax),%xmm1
3706 pand -96(%rax),%xmm2
3708 pand -80(%rax),%xmm3
3711 movdqa -64(%r11),%xmm0
3712 movdqa -48(%r11),%xmm1
3713 movdqa -32(%r11),%xmm2
3714 pand -64(%rax),%xmm0
3715 movdqa -16(%r11),%xmm3
3716 pand -48(%rax),%xmm1
3718 pand -32(%rax),%xmm2
3720 pand -16(%rax),%xmm3
3723 movdqa 0(%r11),%xmm0
3724 movdqa 16(%r11),%xmm1
3725 movdqa 32(%r11),%xmm2
3727 movdqa 48(%r11),%xmm3
3735 movdqa 64(%r11),%xmm0
3736 movdqa 80(%r11),%xmm1
3737 movdqa 96(%r11),%xmm2
3739 movdqa 112(%r11),%xmm3
3744 pand 112(%rax),%xmm3
3749 pshufd $0x4e,%xmm4,%xmm0
3758 .LSEH_end_bn_gather5:
3759 .size bn_gather5,.-bn_gather5
3764 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0