6 .globl bn_mul_mont_gather5
7 .type bn_mul_mont_gather5,@function
27 leaq (%rsp,%r11,8),%rsp
30 movq %rax,8(%rsp,%r9,8)
37 leaq .Lmagic_masks(%rip),%rax
39 leaq 96(%r12,%r11,8),%r12
40 movq 0(%rax,%r10,8),%xmm4
41 movq 8(%rax,%r10,8),%xmm5
42 movq 16(%rax,%r10,8),%xmm6
43 movq 24(%rax,%r10,8),%xmm7
58 .byte 102,72,15,126,195
101 movq (%rsi,%r15,8),%rax
106 movq %r13,-16(%rsp,%r15,8)
112 movq (%rcx,%r15,8),%rax
121 .byte 102,72,15,126,195
128 movq %r13,-16(%rsp,%r15,8)
135 movq %r13,-8(%rsp,%r9,8)
136 movq %rdx,(%rsp,%r9,8)
182 movq (%rsi,%r15,8),%rax
185 movq (%rsp,%r15,8),%r10
187 movq %r13,-16(%rsp,%r15,8)
193 movq (%rcx,%r15,8),%rax
204 .byte 102,72,15,126,195
210 movq (%rsp,%r15,8),%r10
212 movq %r13,-16(%rsp,%r15,8)
220 movq %r13,-8(%rsp,%r9,8)
221 movq %rdx,(%rsp,%r9,8)
233 .Lsub: sbbq (%rcx,%r14,8),%rax
234 movq %rax,(%rdi,%r14,8)
235 movq 8(%rsi,%r14,8),%rax
250 movq (%rsi,%r14,8),%rax
251 movq %r14,(%rsp,%r14,8)
252 movq %rax,(%rdi,%r14,8)
257 movq 8(%rsp,%r9,8),%rsi
268 .size bn_mul_mont_gather5,.-bn_mul_mont_gather5
269 .type bn_mul4x_mont_gather5,@function
271 bn_mul4x_mont_gather5:
294 leaq -64(%rsp,%r9,2),%r11
300 leaq -64(%rsp,%r9,2),%rsp
305 leaq 4096-64(,%r9,2),%r10
306 leaq -64(%rsp,%r9,2),%rsp
331 .size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5
333 .type mul4x_internal,@function
338 leaq 256(%rdx,%r9,1),%r13
344 leaq .Lmagic_masks(%rip),%rax
346 leaq 96(%rdx,%r11,8),%r12
347 movq 0(%rax,%r10,8),%xmm4
348 movq 8(%rax,%r10,8),%xmm5
350 movq 16(%rax,%r10,8),%xmm6
351 movq 24(%rax,%r10,8),%xmm7
376 .byte 102,72,15,126,195
383 leaq (%rsi,%r9,1),%rsi
403 leaq 64+8(%rsp,%r11,8),%r14
413 movq 8(%rsi,%r9,1),%rax
425 movq 16(%rsi,%r9,1),%rax
446 movq -8(%rsi,%r15,1),%rax
461 movq (%rsi,%r15,1),%rax
476 movq 8(%rsi,%r15,1),%rax
491 movq 16(%rsi,%r15,1),%rax
526 movq (%rsi,%r9,1),%rax
533 .byte 102,72,15,126,195
534 leaq (%rcx,%r9,2),%rcx
545 movq (%r14,%r9,1),%r10
568 leaq (%r14,%r9,1),%r14
574 movq 8(%rsi,%r9,1),%rax
588 movq 16(%rsi,%r9,1),%rax
610 movq -8(%rsi,%r15,1),%rax
627 movq (%rsi,%r15,1),%rax
644 movq 8(%rsi,%r15,1),%rax
661 movq 16(%rsi,%r15,1),%rax
701 movq (%rsi,%r9,1),%rax
708 .byte 102,72,15,126,195
710 leaq (%rcx,%r9,2),%rcx
725 leaq (%r14,%r9,1),%rbx
726 leaq (%rcx,%rdi,8),%rbp
731 .size mul4x_internal,.-mul4x_internal
733 .type bn_power5,@function
755 leaq -64(%rsp,%r9,2),%r11
761 leaq -64(%rsp,%r9,2),%rsp
766 leaq 4096-64(,%r9,2),%r10
767 leaq -64(%rsp,%r9,2),%rsp
789 .byte 102,72,15,110,207
790 .byte 102,72,15,110,209
791 .byte 102,73,15,110,218
792 .byte 102,72,15,110,226
794 call __bn_sqr8x_internal
795 call __bn_sqr8x_internal
796 call __bn_sqr8x_internal
797 call __bn_sqr8x_internal
798 call __bn_sqr8x_internal
800 .byte 102,72,15,126,209
801 .byte 102,72,15,126,226
819 .size bn_power5,.-bn_power5
821 .globl bn_sqr8x_internal
822 .hidden bn_sqr8x_internal
823 .type bn_sqr8x_internal,@function
901 leaq (%rsi,%r9,1),%rsi
906 movq -32(%rsi,%rbp,1),%r14
907 leaq 48+8(%rsp,%r9,2),%rdi
908 movq -24(%rsi,%rbp,1),%rax
909 leaq -32(%rdi,%rbp,1),%rdi
910 movq -16(%rsi,%rbp,1),%rbx
917 movq %r10,-24(%rdi,%rbp,1)
923 movq %r11,-16(%rdi,%rbp,1)
927 movq -8(%rsi,%rbp,1),%rbx
941 movq %r10,-8(%rdi,%rcx,1)
946 movq (%rsi,%rcx,1),%rbx
956 movq 8(%rsi,%rcx,1),%rbx
966 movq %r11,(%rdi,%rcx,1)
973 movq 16(%rsi,%rcx,1),%rbx
982 movq %r10,8(%rdi,%rcx,1)
989 movq 24(%rsi,%rcx,1),%rbx
999 movq %r11,16(%rdi,%rcx,1)
1011 movq %r10,-8(%rdi,%rcx,1)
1030 movq -32(%rsi,%rbp,1),%r14
1031 leaq 48+8(%rsp,%r9,2),%rdi
1032 movq -24(%rsi,%rbp,1),%rax
1033 leaq -32(%rdi,%rbp,1),%rdi
1034 movq -16(%rsi,%rbp,1),%rbx
1038 movq -24(%rdi,%rbp,1),%r10
1042 movq %r10,-24(%rdi,%rbp,1)
1049 addq -16(%rdi,%rbp,1),%r11
1052 movq %r11,-16(%rdi,%rbp,1)
1056 movq -8(%rsi,%rbp,1),%rbx
1061 addq -8(%rdi,%rbp,1),%r12
1072 movq %r10,-8(%rdi,%rbp,1)
1079 movq (%rsi,%rcx,1),%rbx
1085 addq (%rdi,%rcx,1),%r13
1092 movq 8(%rsi,%rcx,1),%rbx
1100 movq %r11,(%rdi,%rcx,1)
1104 addq 8(%rdi,%rcx,1),%r12
1115 movq %r10,-8(%rdi,%rcx,1)
1136 leaq 48+8(%rsp,%r9,2),%rdi
1138 leaq -32(%rdi,%rbp,1),%rdi
1197 movq -16(%rsi,%rbp,1),%rax
1198 leaq 48+8(%rsp),%rdi
1202 leaq (%r14,%r10,2),%r12
1204 leaq (%rcx,%r11,2),%r13
1213 movq -8(%rsi,%rbp,1),%rax
1217 leaq (%r14,%r10,2),%rbx
1221 leaq (%rcx,%r11,2),%r8
1230 movq 0(%rsi,%rbp,1),%rax
1237 jmp .Lsqr4x_shift_n_add
1240 .Lsqr4x_shift_n_add:
1241 leaq (%r14,%r10,2),%r12
1243 leaq (%rcx,%r11,2),%r13
1252 movq -8(%rsi,%rbp,1),%rax
1256 leaq (%r14,%r10,2),%rbx
1260 leaq (%rcx,%r11,2),%r8
1269 movq 0(%rsi,%rbp,1),%rax
1273 leaq (%r14,%r10,2),%r12
1277 leaq (%rcx,%r11,2),%r13
1286 movq 8(%rsi,%rbp,1),%rax
1290 leaq (%r14,%r10,2),%rbx
1294 leaq (%rcx,%r11,2),%r8
1303 movq 16(%rsi,%rbp,1),%rax
1310 jnz .Lsqr4x_shift_n_add
1312 leaq (%r14,%r10,2),%r12
1315 leaq (%rcx,%r11,2),%r13
1328 leaq (%r14,%r10,2),%rbx
1332 leaq (%rcx,%r11,2),%r8
1341 .byte 102,72,15,126,213
1344 leaq (%rbp,%r9,2),%rcx
1345 leaq 48+8(%rsp,%r9,2),%rdx
1347 leaq 48+8(%rsp,%r9,1),%rdi
1350 jmp .L8x_reduction_loop
1353 .L8x_reduction_loop:
1354 leaq (%rdi,%r9,1),%rdi
1369 imulq 32+8(%rsp),%rbx
1387 movq %rbx,48-8+8(%rsp,%rcx,8)
1396 movq 32+8(%rsp),%rsi
1462 movq 48+56+8(%rsp),%rbx
1526 movq 48-16+8(%rsp,%rcx,8),%rbx
1542 movq 48+56+8(%rsp),%rbx
1586 .byte 102,72,15,126,213
1590 .byte 102,73,15,126,217
1600 jb .L8x_reduction_loop
1603 leaq (%rdi,%r9,1),%rbx
1607 .byte 102,72,15,126,207
1609 .byte 102,72,15,126,206
1610 leaq (%rbp,%rax,8),%rbp
1638 .size bn_sqr8x_internal,.-bn_sqr8x_internal
1639 .globl bn_from_montgomery
1640 .type bn_from_montgomery,@function
1647 .size bn_from_montgomery,.-bn_from_montgomery
1649 .type bn_from_mont8x,@function
1673 leaq -64(%rsp,%r9,2),%r11
1679 leaq -64(%rsp,%r9,2),%rsp
1684 leaq 4096-64(,%r9,2),%r10
1685 leaq -64(%rsp,%r9,2),%rsp
1715 movdqu 16(%rsi),%xmm2
1716 movdqu 32(%rsi),%xmm3
1717 movdqa %xmm0,(%rax,%r9,1)
1718 movdqu 48(%rsi),%xmm4
1719 movdqa %xmm0,16(%rax,%r9,1)
1720 .byte 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00
1722 movdqa %xmm0,32(%rax,%r9,1)
1723 movdqa %xmm2,16(%rax)
1724 movdqa %xmm0,48(%rax,%r9,1)
1725 movdqa %xmm3,32(%rax)
1726 movdqa %xmm4,48(%rax)
1731 .byte 102,72,15,110,207
1732 .byte 102,72,15,110,209
1735 .byte 102,73,15,110,218
1736 call sqr8x_reduction
1741 jmp .Lfrom_mont_zero
1745 movdqa %xmm0,0(%rax)
1746 movdqa %xmm0,16(%rax)
1747 movdqa %xmm0,32(%rax)
1748 movdqa %xmm0,48(%rax)
1751 jnz .Lfrom_mont_zero
1763 .size bn_from_mont8x,.-bn_from_mont8x
1765 .type bn_get_bits5,@function
1777 movzwl (%r10,%rsi,2),%eax
1781 .size bn_get_bits5,.-bn_get_bits5
1784 .type bn_scatter5,@function
1788 jz .Lscatter_epilogue
1789 leaq (%rdx,%rcx,8),%rdx
1799 .size bn_scatter5,.-bn_scatter5
1802 .type bn_gather5,@function
1809 leaq .Lmagic_masks(%rip),%rax
1811 leaq 128(%rdx,%r11,8),%rdx
1812 movq 0(%rax,%rcx,8),%xmm4
1813 movq 8(%rax,%rcx,8),%xmm5
1814 movq 16(%rax,%rcx,8),%xmm6
1815 movq 24(%rax,%rcx,8),%xmm7
1819 movq -128(%rdx),%xmm0
1820 movq -64(%rdx),%xmm1
1838 .LSEH_end_bn_gather5:
1839 .size bn_gather5,.-bn_gather5
1842 .long 0,0, 0,0, 0,0, -1,-1
1843 .long 0,0, 0,0, 0,0, 0,0
1844 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0