2 /* Do not modify. This file is auto-generated from rsaz-x86_64.pl. */
8 .type rsaz_512_sqr,@function
25 andl OPENSSL_ia32cap_P+8(%rip),%r11d
139 leaq (%rcx,%r10,2),%r10
179 leaq (%rbx,%r12,2),%r12
197 leaq (%r10,%r13,2),%r13
227 leaq (%rcx,%r14,2),%r14
245 leaq (%r12,%r15,2),%r15
270 leaq (%rbx,%r8,2),%r8
285 leaq (%r12,%r9,2),%r9
309 leaq (%rcx,%r10,2),%r10
317 leaq (%r15,%r11,2),%r11
370 call __rsaz_512_reduce
382 call __rsaz_512_subtract
386 movl 128+8(%rsp),%r8d
395 movl %r8d,128+8(%rsp)
396 .byte 102,72,15,110,199
397 .byte 102,72,15,110,205
401 mulxq 16(%rsi),%rcx,%r10
404 mulxq 24(%rsi),%rax,%r11
407 mulxq 32(%rsi),%rcx,%r12
410 mulxq 40(%rsi),%rax,%r13
413 .byte 0xc4,0x62,0xf3,0xf6,0xb6,0x30,0x00,0x00,0x00
417 .byte 0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00
435 mulxq 16(%rsi),%rax,%rbx
439 .byte 0xc4,0x62,0xc3,0xf6,0x86,0x18,0x00,0x00,0x00
443 mulxq 32(%rsi),%rax,%rbx
447 mulxq 40(%rsi),%rdi,%r8
451 .byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00
455 .byte 0xc4,0x62,0xc3,0xf6,0x86,0x38,0x00,0x00,0x00
472 .byte 0x4c,0x89,0x94,0x24,0x18,0x00,0x00,0x00
475 .byte 0xc4,0x62,0xc3,0xf6,0x8e,0x18,0x00,0x00,0x00
479 mulxq 32(%rsi),%rax,%rcx
483 mulxq 40(%rsi),%rdi,%r9
487 .byte 0xc4,0xe2,0xfb,0xf6,0x8e,0x30,0x00,0x00,0x00
491 .byte 0xc4,0x62,0xc3,0xf6,0x8e,0x38,0x00,0x00,0x00
508 .byte 0x4c,0x89,0xa4,0x24,0x28,0x00,0x00,0x00
511 .byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x20,0x00,0x00,0x00
515 mulxq 40(%rsi),%rdi,%r10
519 mulxq 48(%rsi),%rax,%rbx
523 mulxq 56(%rsi),%rdi,%r10
544 .byte 0xc4,0x62,0xc3,0xf6,0x9e,0x28,0x00,0x00,0x00
548 mulxq 48(%rsi),%rax,%rcx
552 mulxq 56(%rsi),%rdi,%r11
572 .byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00
576 .byte 0xc4,0x62,0xc3,0xf6,0xa6,0x38,0x00,0x00,0x00
596 .byte 0xc4,0x62,0xfb,0xf6,0xae,0x38,0x00,0x00,0x00
612 .byte 0x4c,0x89,0x9c,0x24,0x60,0x00,0x00,0x00
613 .byte 0x4c,0x89,0xa4,0x24,0x68,0x00,0x00,0x00
625 .byte 102,72,15,126,199
626 .byte 102,72,15,126,205
638 call __rsaz_512_reducex
650 call __rsaz_512_subtract
654 movl 128+8(%rsp),%r8d
662 leaq 128+24+48(%rsp),%rax
672 .size rsaz_512_sqr,.-rsaz_512_sqr
674 .type rsaz_512_mul,@function
686 .byte 102,72,15,110,199
687 .byte 102,72,15,110,201
690 andl OPENSSL_ia32cap_P+8(%rip),%r11d
697 .byte 102,72,15,126,199
698 .byte 102,72,15,126,205
709 call __rsaz_512_reduce
718 .byte 102,72,15,126,199
719 .byte 102,72,15,126,205
731 call __rsaz_512_reducex
743 call __rsaz_512_subtract
745 leaq 128+24+48(%rsp),%rax
755 .size rsaz_512_mul,.-rsaz_512_mul
756 .globl rsaz_512_mul_gather4
757 .type rsaz_512_mul_gather4,@function
759 rsaz_512_mul_gather4:
770 movdqa .Linc+16(%rip),%xmm1
771 movdqa .Linc(%rip),%xmm0
773 pshufd $0,%xmm8,%xmm8
797 movdqa 16(%rdx),%xmm9
798 movdqa 32(%rdx),%xmm10
799 movdqa 48(%rdx),%xmm11
801 movdqa 64(%rdx),%xmm12
803 movdqa 80(%rdx),%xmm13
805 movdqa 96(%rdx),%xmm14
807 movdqa 112(%rdx),%xmm15
821 pshufd $0x4e,%xmm8,%xmm9
824 andl OPENSSL_ia32cap_P+8(%rip),%r11d
827 .byte 102,76,15,126,195
830 movq %rdi,128+8(%rsp)
831 movq %rcx,128+16(%rsp)
889 movdqa 16(%rbp),%xmm9
890 movdqa 32(%rbp),%xmm10
891 movdqa 48(%rbp),%xmm11
893 movdqa 64(%rbp),%xmm12
895 movdqa 80(%rbp),%xmm13
897 movdqa 96(%rbp),%xmm14
899 movdqa 112(%rbp),%xmm15
913 pshufd $0x4e,%xmm8,%xmm9
915 .byte 102,76,15,126,195
994 movq 128+8(%rsp),%rdi
995 movq 128+16(%rsp),%rbp
1006 call __rsaz_512_reduce
1007 jmp .Lmul_gather_tail
1011 .byte 102,76,15,126,194
1014 movq %rdi,128+8(%rsp)
1015 movq %rcx,128+16(%rsp)
1017 mulxq (%rsi),%rbx,%r8
1021 mulxq 8(%rsi),%rax,%r9
1023 mulxq 16(%rsi),%rbx,%r10
1026 mulxq 24(%rsi),%rax,%r11
1029 mulxq 32(%rsi),%rbx,%r12
1032 mulxq 40(%rsi),%rax,%r13
1035 mulxq 48(%rsi),%rbx,%r14
1038 mulxq 56(%rsi),%rax,%r15
1046 jmp .Loop_mulx_gather
1050 movdqa 0(%rbp),%xmm8
1051 movdqa 16(%rbp),%xmm9
1052 movdqa 32(%rbp),%xmm10
1053 movdqa 48(%rbp),%xmm11
1055 movdqa 64(%rbp),%xmm12
1057 movdqa 80(%rbp),%xmm13
1059 movdqa 96(%rbp),%xmm14
1061 movdqa 112(%rbp),%xmm15
1075 pshufd $0x4e,%xmm8,%xmm9
1077 .byte 102,76,15,126,194
1079 .byte 0xc4,0x62,0xfb,0xf6,0x86,0x00,0x00,0x00,0x00
1083 mulxq 8(%rsi),%rax,%r9
1087 mulxq 16(%rsi),%rax,%r10
1091 .byte 0xc4,0x62,0xfb,0xf6,0x9e,0x18,0x00,0x00,0x00
1095 mulxq 32(%rsi),%rax,%r12
1099 mulxq 40(%rsi),%rax,%r13
1103 .byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00
1108 mulxq 56(%rsi),%rax,%r15
1109 movq %rbx,64(%rsp,%rcx,8)
1116 jnz .Loop_mulx_gather
1120 movq %r10,64+16(%rsp)
1121 movq %r11,64+24(%rsp)
1122 movq %r12,64+32(%rsp)
1123 movq %r13,64+40(%rsp)
1124 movq %r14,64+48(%rsp)
1125 movq %r15,64+56(%rsp)
1128 movq 128+8(%rsp),%rdi
1129 movq 128+16(%rsp),%rbp
1140 call __rsaz_512_reducex
1153 call __rsaz_512_subtract
1155 leaq 128+24+48(%rsp),%rax
1163 .Lmul_gather4_epilogue:
1165 .size rsaz_512_mul_gather4,.-rsaz_512_mul_gather4
1166 .globl rsaz_512_mul_scatter4
1167 .type rsaz_512_mul_scatter4,@function
1169 rsaz_512_mul_scatter4:
1179 .Lmul_scatter4_body:
1180 leaq (%r8,%r9,8),%r8
1181 .byte 102,72,15,110,199
1182 .byte 102,72,15,110,202
1183 .byte 102,73,15,110,208
1188 andl OPENSSL_ia32cap_P+8(%rip),%r11d
1194 .byte 102,72,15,126,199
1195 .byte 102,72,15,126,205
1206 call __rsaz_512_reduce
1207 jmp .Lmul_scatter_tail
1212 call __rsaz_512_mulx
1214 .byte 102,72,15,126,199
1215 .byte 102,72,15,126,205
1227 call __rsaz_512_reducex
1238 .byte 102,72,15,126,214
1241 call __rsaz_512_subtract
1252 leaq 128+24+48(%rsp),%rax
1260 .Lmul_scatter4_epilogue:
1262 .size rsaz_512_mul_scatter4,.-rsaz_512_mul_scatter4
1263 .globl rsaz_512_mul_by_one
1264 .type rsaz_512_mul_by_one,@function
1266 rsaz_512_mul_by_one:
1276 movl OPENSSL_ia32cap_P+8(%rip),%eax
1291 movdqa %xmm0,16(%rsp)
1292 movdqa %xmm0,32(%rsp)
1293 movdqa %xmm0,48(%rsp)
1294 movdqa %xmm0,64(%rsp)
1295 movdqa %xmm0,80(%rsp)
1296 movdqa %xmm0,96(%rsp)
1300 call __rsaz_512_reduce
1305 call __rsaz_512_reducex
1316 leaq 128+24+48(%rsp),%rax
1324 .Lmul_by_one_epilogue:
1326 .size rsaz_512_mul_by_one,.-rsaz_512_mul_by_one
1327 .type __rsaz_512_reduce,@function
1331 imulq 128+8(%rsp),%rbx
1334 jmp .Lreduction_loop
1365 movq 128+8(%rsp),%rsi
1406 jne .Lreduction_loop
1409 .size __rsaz_512_reduce,.-__rsaz_512_reduce
1410 .type __rsaz_512_reducex,@function
1417 jmp .Lreduction_loopx
1422 mulxq 0(%rbp),%rax,%r8
1426 mulxq 8(%rbp),%rax,%r9
1430 mulxq 16(%rbp),%rbx,%r10
1434 mulxq 24(%rbp),%rbx,%r11
1438 .byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00
1444 mulxq 128+8(%rsp),%rbx,%rdx
1447 mulxq 40(%rbp),%rax,%r13
1451 .byte 0xc4,0x62,0xfb,0xf6,0xb5,0x30,0x00,0x00,0x00
1455 mulxq 56(%rbp),%rax,%r15
1462 jne .Lreduction_loopx
1465 .size __rsaz_512_reducex,.-__rsaz_512_reducex
1466 .type __rsaz_512_subtract,@function
1468 __rsaz_512_subtract:
1522 .size __rsaz_512_subtract,.-__rsaz_512_subtract
1523 .type __rsaz_512_mul,@function
1664 .size __rsaz_512_mul,.-__rsaz_512_mul
1665 .type __rsaz_512_mulx,@function
1668 mulxq (%rsi),%rbx,%r8
1671 mulxq 8(%rsi),%rax,%r9
1674 mulxq 16(%rsi),%rbx,%r10
1677 mulxq 24(%rsi),%rax,%r11
1680 mulxq 32(%rsi),%rbx,%r12
1683 mulxq 40(%rsi),%rax,%r13
1686 mulxq 48(%rsi),%rbx,%r14
1689 mulxq 56(%rsi),%rax,%r15
1701 mulxq (%rsi),%rax,%r8
1705 mulxq 8(%rsi),%rax,%r9
1709 mulxq 16(%rsi),%rax,%r10
1713 mulxq 24(%rsi),%rax,%r11
1717 .byte 0x3e,0xc4,0x62,0xfb,0xf6,0xa6,0x20,0x00,0x00,0x00
1721 mulxq 40(%rsi),%rax,%r13
1725 mulxq 48(%rsi),%rax,%r14
1729 mulxq 56(%rsi),%rax,%r15
1730 movq 64(%rbp,%rcx,8),%rdx
1731 movq %rbx,8+64-8(%rsp,%rcx,8)
1740 mulxq (%rsi),%rax,%r8
1744 .byte 0xc4,0x62,0xfb,0xf6,0x8e,0x08,0x00,0x00,0x00
1748 .byte 0xc4,0x62,0xfb,0xf6,0x96,0x10,0x00,0x00,0x00
1752 mulxq 24(%rsi),%rax,%r11
1756 mulxq 32(%rsi),%rax,%r12
1760 mulxq 40(%rsi),%rax,%r13
1764 .byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00
1768 .byte 0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00
1773 movq %rbx,8+64-8(%rsp)
1775 movq %r9,8+64+8(%rsp)
1776 movq %r10,8+64+16(%rsp)
1777 movq %r11,8+64+24(%rsp)
1778 movq %r12,8+64+32(%rsp)
1779 movq %r13,8+64+40(%rsp)
1780 movq %r14,8+64+48(%rsp)
1781 movq %r15,8+64+56(%rsp)
1784 .size __rsaz_512_mulx,.-__rsaz_512_mulx
1785 .globl rsaz_512_scatter4
1786 .type rsaz_512_scatter4,@function
1789 leaq (%rdi,%rdx,8),%rdi
1801 .size rsaz_512_scatter4,.-rsaz_512_scatter4
1803 .globl rsaz_512_gather4
1804 .type rsaz_512_gather4,@function
1808 movdqa .Linc+16(%rip),%xmm1
1809 movdqa .Linc(%rip),%xmm0
1811 pshufd $0,%xmm8,%xmm8
1837 movdqa 0(%rsi),%xmm8
1838 movdqa 16(%rsi),%xmm9
1839 movdqa 32(%rsi),%xmm10
1840 movdqa 48(%rsi),%xmm11
1842 movdqa 64(%rsi),%xmm12
1844 movdqa 80(%rsi),%xmm13
1846 movdqa 96(%rsi),%xmm14
1848 movdqa 112(%rsi),%xmm15
1862 pshufd $0x4e,%xmm8,%xmm9
1869 .LSEH_end_rsaz_512_gather4:
1870 .size rsaz_512_gather4,.-rsaz_512_gather4