6 .globl bn_mul_mont_gather5
7 .type bn_mul_mont_gather5,@function
29 leaq -264(%rsp,%r11,8),%rsp
32 movq %rax,8(%rsp,%r9,8)
43 movq (%rsp,%rax,1),%r11
51 leaq 24-112(%rsp,%r9,8),%r10
63 movdqa %xmm0,112(%r10)
68 movdqa %xmm1,128(%r10)
73 movdqa %xmm2,144(%r10)
78 movdqa %xmm3,160(%r10)
82 movdqa %xmm0,176(%r10)
87 movdqa %xmm1,192(%r10)
92 movdqa %xmm2,208(%r10)
97 movdqa %xmm3,224(%r10)
101 movdqa %xmm0,240(%r10)
106 movdqa %xmm1,256(%r10)
111 movdqa %xmm2,272(%r10)
116 movdqa %xmm3,288(%r10)
120 movdqa %xmm0,304(%r10)
125 movdqa %xmm1,320(%r10)
128 movdqa %xmm2,336(%r10)
133 movdqa %xmm3,352(%r10)
137 movdqa -128(%r12),%xmm4
138 movdqa -112(%r12),%xmm5
139 movdqa -96(%r12),%xmm2
141 movdqa -80(%r12),%xmm3
149 movdqa -64(%r12),%xmm4
150 movdqa -48(%r12),%xmm5
151 movdqa -32(%r12),%xmm2
153 movdqa -16(%r12),%xmm3
162 movdqa 16(%r12),%xmm5
163 movdqa 32(%r12),%xmm2
165 movdqa 48(%r12),%xmm3
174 pshufd $0x4e,%xmm0,%xmm1
177 .byte 102,72,15,126,195
205 movq (%rsi,%r15,8),%rax
210 movq %r13,-16(%rsp,%r15,8)
216 movq (%rcx,%r15,8),%rax
230 movq %r13,-16(%rsp,%r9,8)
237 movq %r13,-8(%rsp,%r9,8)
238 movq %rdx,(%rsp,%r9,8)
244 leaq 24+128(%rsp,%r9,8),%rdx
248 movdqa -128(%r12),%xmm0
249 movdqa -112(%r12),%xmm1
250 movdqa -96(%r12),%xmm2
251 movdqa -80(%r12),%xmm3
252 pand -128(%rdx),%xmm0
253 pand -112(%rdx),%xmm1
260 movdqa -64(%r12),%xmm0
261 movdqa -48(%r12),%xmm1
262 movdqa -32(%r12),%xmm2
263 movdqa -16(%r12),%xmm3
273 movdqa 16(%r12),%xmm1
274 movdqa 32(%r12),%xmm2
275 movdqa 48(%r12),%xmm3
284 movdqa 64(%r12),%xmm0
285 movdqa 80(%r12),%xmm1
286 movdqa 96(%r12),%xmm2
287 movdqa 112(%r12),%xmm3
297 pshufd $0x4e,%xmm4,%xmm0
302 .byte 102,72,15,126,195
329 movq (%rsi,%r15,8),%rax
332 movq (%rsp,%r15,8),%r10
334 movq %r13,-16(%rsp,%r15,8)
340 movq (%rcx,%r15,8),%rax
354 movq (%rsp,%r9,8),%r10
356 movq %r13,-16(%rsp,%r9,8)
364 movq %r13,-8(%rsp,%r9,8)
365 movq %rdx,(%rsp,%r9,8)
377 .Lsub: sbbq (%rcx,%r14,8),%rax
378 movq %rax,(%rdi,%r14,8)
379 movq 8(%rsi,%r14,8),%rax
394 movq (%rsi,%r14,8),%rax
395 movq %r14,(%rsp,%r14,8)
396 movq %rax,(%rdi,%r14,8)
401 movq 8(%rsp,%r9,8),%rsi
413 .size bn_mul_mont_gather5,.-bn_mul_mont_gather5
414 .type bn_mul4x_mont_gather5,@function
416 bn_mul4x_mont_gather5:
429 leaq (%r9,%r9,2),%r10
441 leaq -320(%rsp,%r9,2),%r11
447 leaq -320(%rsp,%r9,2),%rsp
452 leaq 4096-320(,%r9,2),%r10
453 leaq -320(%rsp,%r9,2),%rsp
464 movq (%rsp,%r11,1),%r10
467 jnc .Lmul4x_page_walk
488 .size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5
490 .type mul4x_internal,@function
495 leaq .Linc(%rip),%rax
496 leaq 128(%rdx,%r9,1),%r13
499 movdqa 16(%rax),%xmm1
500 leaq 88-112(%rsp,%r9,1),%r10
503 pshufd $0,%xmm5,%xmm5
513 movdqa %xmm0,112(%r10)
518 movdqa %xmm1,128(%r10)
523 movdqa %xmm2,144(%r10)
528 movdqa %xmm3,160(%r10)
532 movdqa %xmm0,176(%r10)
537 movdqa %xmm1,192(%r10)
542 movdqa %xmm2,208(%r10)
547 movdqa %xmm3,224(%r10)
551 movdqa %xmm0,240(%r10)
556 movdqa %xmm1,256(%r10)
561 movdqa %xmm2,272(%r10)
566 movdqa %xmm3,288(%r10)
570 movdqa %xmm0,304(%r10)
575 movdqa %xmm1,320(%r10)
578 movdqa %xmm2,336(%r10)
583 movdqa %xmm3,352(%r10)
587 movdqa -128(%r12),%xmm4
588 movdqa -112(%r12),%xmm5
589 movdqa -96(%r12),%xmm2
591 movdqa -80(%r12),%xmm3
599 movdqa -64(%r12),%xmm4
600 movdqa -48(%r12),%xmm5
601 movdqa -32(%r12),%xmm2
603 movdqa -16(%r12),%xmm3
612 movdqa 16(%r12),%xmm5
613 movdqa 32(%r12),%xmm2
615 movdqa 48(%r12),%xmm3
624 pshufd $0x4e,%xmm0,%xmm1
627 .byte 102,72,15,126,195
634 leaq (%rsi,%r9,1),%rsi
648 movq 8(%rsi,%r9,1),%rax
660 movq 16(%rsi,%r9,1),%rax
681 movq -8(%rsi,%r15,1),%rax
696 movq (%rsi,%r15,1),%rax
711 movq 8(%rsi,%r15,1),%rax
726 movq 16(%rsi,%r15,1),%rax
761 movq (%rsi,%r9,1),%rax
768 leaq (%rcx,%r9,1),%rcx
779 leaq 16+128(%r14),%rdx
782 movdqa -128(%r12),%xmm0
783 movdqa -112(%r12),%xmm1
784 movdqa -96(%r12),%xmm2
785 movdqa -80(%r12),%xmm3
786 pand -128(%rdx),%xmm0
787 pand -112(%rdx),%xmm1
794 movdqa -64(%r12),%xmm0
795 movdqa -48(%r12),%xmm1
796 movdqa -32(%r12),%xmm2
797 movdqa -16(%r12),%xmm3
807 movdqa 16(%r12),%xmm1
808 movdqa 32(%r12),%xmm2
809 movdqa 48(%r12),%xmm3
818 movdqa 64(%r12),%xmm0
819 movdqa 80(%r12),%xmm1
820 movdqa 96(%r12),%xmm2
821 movdqa 112(%r12),%xmm3
831 pshufd $0x4e,%xmm4,%xmm0
834 .byte 102,72,15,126,195
836 movq (%r14,%r9,1),%r10
847 leaq (%r14,%r9,1),%r14
851 movq 8(%rsi,%r9,1),%rax
865 movq 16(%rsi,%r9,1),%rax
887 movq -8(%rsi,%r15,1),%rax
904 movq (%rsi,%r15,1),%rax
921 movq 8(%rsi,%r15,1),%rax
938 movq 16(%rsi,%r15,1),%rax
978 movq (%rsi,%r9,1),%rax
986 leaq (%rcx,%r9,1),%rcx
1002 leaq (%r14,%r9,1),%rbx
1007 movq 56+8(%rsp),%rdi
1013 jmp .Lsqr4x_sub_entry
1014 .size mul4x_internal,.-mul4x_internal
1016 .type bn_power5,@function
1028 leal (%r9,%r9,2),%r10d
1039 leaq -320(%rsp,%r9,2),%r11
1045 leaq -320(%rsp,%r9,2),%rsp
1050 leaq 4096-320(,%r9,2),%r10
1051 leaq -320(%rsp,%r9,2),%rsp
1062 movq (%rsp,%r11,1),%r10
1082 .byte 102,72,15,110,207
1083 .byte 102,72,15,110,209
1084 .byte 102,73,15,110,218
1085 .byte 102,72,15,110,226
1087 call __bn_sqr8x_internal
1088 call __bn_post4x_internal
1089 call __bn_sqr8x_internal
1090 call __bn_post4x_internal
1091 call __bn_sqr8x_internal
1092 call __bn_post4x_internal
1093 call __bn_sqr8x_internal
1094 call __bn_post4x_internal
1095 call __bn_sqr8x_internal
1096 call __bn_post4x_internal
1098 .byte 102,72,15,126,209
1099 .byte 102,72,15,126,226
1117 .size bn_power5,.-bn_power5
1119 .globl bn_sqr8x_internal
1120 .hidden bn_sqr8x_internal
1121 .type bn_sqr8x_internal,@function
1124 __bn_sqr8x_internal:
1199 leaq (%rsi,%r9,1),%rsi
1204 movq -32(%rsi,%rbp,1),%r14
1205 leaq 48+8(%rsp,%r9,2),%rdi
1206 movq -24(%rsi,%rbp,1),%rax
1207 leaq -32(%rdi,%rbp,1),%rdi
1208 movq -16(%rsi,%rbp,1),%rbx
1215 movq %r10,-24(%rdi,%rbp,1)
1221 movq %r11,-16(%rdi,%rbp,1)
1225 movq -8(%rsi,%rbp,1),%rbx
1239 movq %r10,-8(%rdi,%rcx,1)
1244 movq (%rsi,%rcx,1),%rbx
1254 movq 8(%rsi,%rcx,1),%rbx
1264 movq %r11,(%rdi,%rcx,1)
1271 movq 16(%rsi,%rcx,1),%rbx
1280 movq %r10,8(%rdi,%rcx,1)
1287 movq 24(%rsi,%rcx,1),%rbx
1297 movq %r11,16(%rdi,%rcx,1)
1309 movq %r10,-8(%rdi,%rcx,1)
1328 movq -32(%rsi,%rbp,1),%r14
1329 leaq 48+8(%rsp,%r9,2),%rdi
1330 movq -24(%rsi,%rbp,1),%rax
1331 leaq -32(%rdi,%rbp,1),%rdi
1332 movq -16(%rsi,%rbp,1),%rbx
1336 movq -24(%rdi,%rbp,1),%r10
1340 movq %r10,-24(%rdi,%rbp,1)
1347 addq -16(%rdi,%rbp,1),%r11
1350 movq %r11,-16(%rdi,%rbp,1)
1354 movq -8(%rsi,%rbp,1),%rbx
1359 addq -8(%rdi,%rbp,1),%r12
1370 movq %r10,-8(%rdi,%rbp,1)
1377 movq (%rsi,%rcx,1),%rbx
1383 addq (%rdi,%rcx,1),%r13
1390 movq 8(%rsi,%rcx,1),%rbx
1398 movq %r11,(%rdi,%rcx,1)
1402 addq 8(%rdi,%rcx,1),%r12
1413 movq %r10,-8(%rdi,%rcx,1)
1434 leaq 48+8(%rsp,%r9,2),%rdi
1436 leaq -32(%rdi,%rbp,1),%rdi
1495 movq -16(%rsi,%rbp,1),%rax
1496 leaq 48+8(%rsp),%rdi
1500 leaq (%r14,%r10,2),%r12
1502 leaq (%rcx,%r11,2),%r13
1511 movq -8(%rsi,%rbp,1),%rax
1515 leaq (%r14,%r10,2),%rbx
1519 leaq (%rcx,%r11,2),%r8
1528 movq 0(%rsi,%rbp,1),%rax
1535 jmp .Lsqr4x_shift_n_add
1538 .Lsqr4x_shift_n_add:
1539 leaq (%r14,%r10,2),%r12
1541 leaq (%rcx,%r11,2),%r13
1550 movq -8(%rsi,%rbp,1),%rax
1554 leaq (%r14,%r10,2),%rbx
1558 leaq (%rcx,%r11,2),%r8
1567 movq 0(%rsi,%rbp,1),%rax
1571 leaq (%r14,%r10,2),%r12
1575 leaq (%rcx,%r11,2),%r13
1584 movq 8(%rsi,%rbp,1),%rax
1588 leaq (%r14,%r10,2),%rbx
1592 leaq (%rcx,%r11,2),%r8
1601 movq 16(%rsi,%rbp,1),%rax
1608 jnz .Lsqr4x_shift_n_add
1610 leaq (%r14,%r10,2),%r12
1613 leaq (%rcx,%r11,2),%r13
1626 leaq (%r14,%r10,2),%rbx
1630 leaq (%rcx,%r11,2),%r8
1639 .byte 102,72,15,126,213
1640 __bn_sqr8x_reduction:
1642 leaq (%r9,%rbp,1),%rcx
1643 leaq 48+8(%rsp,%r9,2),%rdx
1645 leaq 48+8(%rsp,%r9,1),%rdi
1648 jmp .L8x_reduction_loop
1651 .L8x_reduction_loop:
1652 leaq (%rdi,%r9,1),%rdi
1667 imulq 32+8(%rsp),%rbx
1685 movq %rbx,48-8+8(%rsp,%rcx,8)
1694 movq 32+8(%rsp),%rsi
1760 movq 48+56+8(%rsp),%rbx
1824 movq 48-16+8(%rsp,%rcx,8),%rbx
1840 movq 48+56+8(%rsp),%rbx
1884 .byte 102,72,15,126,213
1888 .byte 102,73,15,126,217
1898 jb .L8x_reduction_loop
1900 .size bn_sqr8x_internal,.-bn_sqr8x_internal
1901 .type __bn_post4x_internal,@function
1903 __bn_post4x_internal:
1905 leaq (%rdi,%r9,1),%rbx
1907 .byte 102,72,15,126,207
1909 .byte 102,72,15,126,206
1916 jmp .Lsqr4x_sub_entry
1954 .size __bn_post4x_internal,.-__bn_post4x_internal
1955 .globl bn_from_montgomery
1956 .type bn_from_montgomery,@function
1963 .size bn_from_montgomery,.-bn_from_montgomery
1965 .type bn_from_mont8x,@function
1978 leaq (%r9,%r9,2),%r10
1989 leaq -320(%rsp,%r9,2),%r11
1995 leaq -320(%rsp,%r9,2),%rsp
2000 leaq 4096-320(,%r9,2),%r10
2001 leaq -320(%rsp,%r9,2),%rsp
2012 movq (%rsp,%r11,1),%r10
2015 jnc .Lfrom_page_walk
2040 movdqu 16(%rsi),%xmm2
2041 movdqu 32(%rsi),%xmm3
2042 movdqa %xmm0,(%rax,%r9,1)
2043 movdqu 48(%rsi),%xmm4
2044 movdqa %xmm0,16(%rax,%r9,1)
2045 .byte 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00
2047 movdqa %xmm0,32(%rax,%r9,1)
2048 movdqa %xmm2,16(%rax)
2049 movdqa %xmm0,48(%rax,%r9,1)
2050 movdqa %xmm3,32(%rax)
2051 movdqa %xmm4,48(%rax)
2056 .byte 102,72,15,110,207
2057 .byte 102,72,15,110,209
2060 .byte 102,73,15,110,218
2061 call __bn_sqr8x_reduction
2062 call __bn_post4x_internal
2067 jmp .Lfrom_mont_zero
2071 movdqa %xmm0,0(%rax)
2072 movdqa %xmm0,16(%rax)
2073 movdqa %xmm0,32(%rax)
2074 movdqa %xmm0,48(%rax)
2077 jnz .Lfrom_mont_zero
2089 .size bn_from_mont8x,.-bn_from_mont8x
2091 .type bn_get_bits5,@function
2103 movzwl (%r10,%rsi,2),%eax
2107 .size bn_get_bits5,.-bn_get_bits5
2110 .type bn_scatter5,@function
2114 jz .Lscatter_epilogue
2115 leaq (%rdx,%rcx,8),%rdx
2125 .size bn_scatter5,.-bn_scatter5
2128 .type bn_gather5,@function
2131 .LSEH_begin_bn_gather5:
2133 .byte 0x4c,0x8d,0x14,0x24
2134 .byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00
2135 leaq .Linc(%rip),%rax
2139 movdqa 0(%rax),%xmm0
2140 movdqa 16(%rax),%xmm1
2144 pshufd $0,%xmm5,%xmm5
2153 movdqa %xmm0,-128(%rax)
2158 movdqa %xmm1,-112(%rax)
2163 movdqa %xmm2,-96(%rax)
2167 movdqa %xmm3,-80(%rax)
2172 movdqa %xmm0,-64(%rax)
2177 movdqa %xmm1,-48(%rax)
2182 movdqa %xmm2,-32(%rax)
2186 movdqa %xmm3,-16(%rax)
2191 movdqa %xmm0,0(%rax)
2196 movdqa %xmm1,16(%rax)
2201 movdqa %xmm2,32(%rax)
2205 movdqa %xmm3,48(%rax)
2210 movdqa %xmm0,64(%rax)
2215 movdqa %xmm1,80(%rax)
2220 movdqa %xmm2,96(%rax)
2222 movdqa %xmm3,112(%rax)
2229 movdqa -128(%r11),%xmm0
2230 movdqa -112(%r11),%xmm1
2231 movdqa -96(%r11),%xmm2
2232 pand -128(%rax),%xmm0
2233 movdqa -80(%r11),%xmm3
2234 pand -112(%rax),%xmm1
2236 pand -96(%rax),%xmm2
2238 pand -80(%rax),%xmm3
2241 movdqa -64(%r11),%xmm0
2242 movdqa -48(%r11),%xmm1
2243 movdqa -32(%r11),%xmm2
2244 pand -64(%rax),%xmm0
2245 movdqa -16(%r11),%xmm3
2246 pand -48(%rax),%xmm1
2248 pand -32(%rax),%xmm2
2250 pand -16(%rax),%xmm3
2253 movdqa 0(%r11),%xmm0
2254 movdqa 16(%r11),%xmm1
2255 movdqa 32(%r11),%xmm2
2257 movdqa 48(%r11),%xmm3
2265 movdqa 64(%r11),%xmm0
2266 movdqa 80(%r11),%xmm1
2267 movdqa 96(%r11),%xmm2
2269 movdqa 112(%r11),%xmm3
2274 pand 112(%rax),%xmm3
2279 pshufd $0x4e,%xmm4,%xmm0
2288 .LSEH_end_bn_gather5:
2289 .size bn_gather5,.-bn_gather5
2294 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0