2 /* Do not modify. This file is auto-generated from rsaz-x86_64.pl. */
8 .type rsaz_512_sqr,@function
13 .cfi_adjust_cfa_offset 8
16 .cfi_adjust_cfa_offset 8
19 .cfi_adjust_cfa_offset 8
22 .cfi_adjust_cfa_offset 8
25 .cfi_adjust_cfa_offset 8
28 .cfi_adjust_cfa_offset 8
32 .cfi_adjust_cfa_offset 128+24
34 .byte 102,72,15,110,202
39 andl OPENSSL_ia32cap_P+8(%rip),%r11d
386 .byte 102,72,15,126,205
391 call __rsaz_512_reduce
403 call __rsaz_512_subtract
407 movl 128+8(%rsp),%r8d
416 movl %r8d,128+8(%rsp)
417 .byte 102,72,15,110,199
422 mulxq 16(%rsi),%rcx,%r10
425 mulxq 24(%rsi),%rax,%r11
428 .byte 0xc4,0x62,0xf3,0xf6,0xa6,0x20,0x00,0x00,0x00
431 .byte 0xc4,0x62,0xfb,0xf6,0xae,0x28,0x00,0x00,0x00
434 mulxq 48(%rsi),%rcx,%r14
438 mulxq 56(%rsi),%rax,%r15
454 .byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x10,0x00,0x00,0x00
458 mulxq 24(%rsi),%rdi,%r8
463 mulxq 32(%rsi),%rax,%rbx
467 mulxq 40(%rsi),%rdi,%r8
471 .byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00
475 .byte 0xc4,0x62,0xc3,0xf6,0x86,0x38,0x00,0x00,0x00
480 .byte 0x48,0x8b,0x96,0x10,0x00,0x00,0x00
493 .byte 0x4c,0x89,0x94,0x24,0x18,0x00,0x00,0x00
496 mulxq 24(%rsi),%rdi,%r9
500 mulxq 32(%rsi),%rax,%rcx
504 .byte 0xc4,0x62,0xc3,0xf6,0x8e,0x28,0x00,0x00,0x00
508 .byte 0xc4,0xe2,0xfb,0xf6,0x8e,0x30,0x00,0x00,0x00
512 mulxq 56(%rsi),%rdi,%r9
533 mulxq 32(%rsi),%rax,%rbx
537 mulxq 40(%rsi),%rdi,%r10
541 mulxq 48(%rsi),%rax,%rbx
545 mulxq 56(%rsi),%rdi,%r10
566 mulxq 40(%rsi),%rdi,%r11
570 mulxq 48(%rsi),%rax,%rcx
574 mulxq 56(%rsi),%rdi,%r11
595 .byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00
599 .byte 0xc4,0x62,0xc3,0xf6,0xa6,0x38,0x00,0x00,0x00
620 .byte 0xc4,0x62,0xfb,0xf6,0xae,0x38,0x00,0x00,0x00
636 .byte 0x4c,0x89,0x9c,0x24,0x60,0x00,0x00,0x00
637 .byte 0x4c,0x89,0xa4,0x24,0x68,0x00,0x00,0x00
649 .byte 102,72,15,126,199
650 .byte 102,72,15,126,205
665 call __rsaz_512_reducex
677 call __rsaz_512_subtract
681 movl 128+8(%rsp),%r8d
689 leaq 128+24+48(%rsp),%rax
704 .cfi_def_cfa_register %rsp
708 .size rsaz_512_sqr,.-rsaz_512_sqr
710 .type rsaz_512_mul,@function
715 .cfi_adjust_cfa_offset 8
718 .cfi_adjust_cfa_offset 8
721 .cfi_adjust_cfa_offset 8
724 .cfi_adjust_cfa_offset 8
727 .cfi_adjust_cfa_offset 8
730 .cfi_adjust_cfa_offset 8
734 .cfi_adjust_cfa_offset 128+24
736 .byte 102,72,15,110,199
737 .byte 102,72,15,110,201
740 andl OPENSSL_ia32cap_P+8(%rip),%r11d
747 .byte 102,72,15,126,199
748 .byte 102,72,15,126,205
759 call __rsaz_512_reduce
768 .byte 102,72,15,126,199
769 .byte 102,72,15,126,205
781 call __rsaz_512_reducex
793 call __rsaz_512_subtract
795 leaq 128+24+48(%rsp),%rax
810 .cfi_def_cfa_register %rsp
814 .size rsaz_512_mul,.-rsaz_512_mul
815 .globl rsaz_512_mul_gather4
816 .type rsaz_512_mul_gather4,@function
818 rsaz_512_mul_gather4:
821 .cfi_adjust_cfa_offset 8
824 .cfi_adjust_cfa_offset 8
827 .cfi_adjust_cfa_offset 8
830 .cfi_adjust_cfa_offset 8
833 .cfi_adjust_cfa_offset 8
836 .cfi_adjust_cfa_offset 8
840 .cfi_adjust_cfa_offset 152
843 movdqa .Linc+16(%rip),%xmm1
844 movdqa .Linc(%rip),%xmm0
846 pshufd $0,%xmm8,%xmm8
870 movdqa 16(%rdx),%xmm9
871 movdqa 32(%rdx),%xmm10
872 movdqa 48(%rdx),%xmm11
874 movdqa 64(%rdx),%xmm12
876 movdqa 80(%rdx),%xmm13
878 movdqa 96(%rdx),%xmm14
880 movdqa 112(%rdx),%xmm15
894 pshufd $0x4e,%xmm8,%xmm9
897 andl OPENSSL_ia32cap_P+8(%rip),%r11d
900 .byte 102,76,15,126,195
903 movq %rdi,128+8(%rsp)
904 movq %rcx,128+16(%rsp)
962 movdqa 16(%rbp),%xmm9
963 movdqa 32(%rbp),%xmm10
964 movdqa 48(%rbp),%xmm11
966 movdqa 64(%rbp),%xmm12
968 movdqa 80(%rbp),%xmm13
970 movdqa 96(%rbp),%xmm14
972 movdqa 112(%rbp),%xmm15
986 pshufd $0x4e,%xmm8,%xmm9
988 .byte 102,76,15,126,195
1056 jnz .Loop_mul_gather
1067 movq 128+8(%rsp),%rdi
1068 movq 128+16(%rsp),%rbp
1079 call __rsaz_512_reduce
1080 jmp .Lmul_gather_tail
1084 .byte 102,76,15,126,194
1087 movq %rdi,128+8(%rsp)
1088 movq %rcx,128+16(%rsp)
1090 mulxq (%rsi),%rbx,%r8
1094 mulxq 8(%rsi),%rax,%r9
1096 mulxq 16(%rsi),%rbx,%r10
1099 mulxq 24(%rsi),%rax,%r11
1102 mulxq 32(%rsi),%rbx,%r12
1105 mulxq 40(%rsi),%rax,%r13
1108 mulxq 48(%rsi),%rbx,%r14
1111 mulxq 56(%rsi),%rax,%r15
1119 jmp .Loop_mulx_gather
1123 movdqa 0(%rbp),%xmm8
1124 movdqa 16(%rbp),%xmm9
1125 movdqa 32(%rbp),%xmm10
1126 movdqa 48(%rbp),%xmm11
1128 movdqa 64(%rbp),%xmm12
1130 movdqa 80(%rbp),%xmm13
1132 movdqa 96(%rbp),%xmm14
1134 movdqa 112(%rbp),%xmm15
1148 pshufd $0x4e,%xmm8,%xmm9
1150 .byte 102,76,15,126,194
1152 .byte 0xc4,0x62,0xfb,0xf6,0x86,0x00,0x00,0x00,0x00
1156 mulxq 8(%rsi),%rax,%r9
1160 mulxq 16(%rsi),%rax,%r10
1164 .byte 0xc4,0x62,0xfb,0xf6,0x9e,0x18,0x00,0x00,0x00
1168 mulxq 32(%rsi),%rax,%r12
1172 mulxq 40(%rsi),%rax,%r13
1176 .byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00
1181 mulxq 56(%rsi),%rax,%r15
1182 movq %rbx,64(%rsp,%rcx,8)
1189 jnz .Loop_mulx_gather
1193 movq %r10,64+16(%rsp)
1194 movq %r11,64+24(%rsp)
1195 movq %r12,64+32(%rsp)
1196 movq %r13,64+40(%rsp)
1197 movq %r14,64+48(%rsp)
1198 movq %r15,64+56(%rsp)
1201 movq 128+8(%rsp),%rdi
1202 movq 128+16(%rsp),%rbp
1213 call __rsaz_512_reducex
1226 call __rsaz_512_subtract
1228 leaq 128+24+48(%rsp),%rax
1243 .cfi_def_cfa_register %rsp
1244 .Lmul_gather4_epilogue:
1247 .size rsaz_512_mul_gather4,.-rsaz_512_mul_gather4
1248 .globl rsaz_512_mul_scatter4
1249 .type rsaz_512_mul_scatter4,@function
1251 rsaz_512_mul_scatter4:
1254 .cfi_adjust_cfa_offset 8
1255 .cfi_offset %rbx,-16
1257 .cfi_adjust_cfa_offset 8
1258 .cfi_offset %rbp,-24
1260 .cfi_adjust_cfa_offset 8
1261 .cfi_offset %r12,-32
1263 .cfi_adjust_cfa_offset 8
1264 .cfi_offset %r13,-40
1266 .cfi_adjust_cfa_offset 8
1267 .cfi_offset %r14,-48
1269 .cfi_adjust_cfa_offset 8
1270 .cfi_offset %r15,-56
1274 .cfi_adjust_cfa_offset 128+24
1275 .Lmul_scatter4_body:
1276 leaq (%r8,%r9,8),%r8
1277 .byte 102,72,15,110,199
1278 .byte 102,72,15,110,202
1279 .byte 102,73,15,110,208
1284 andl OPENSSL_ia32cap_P+8(%rip),%r11d
1290 .byte 102,72,15,126,199
1291 .byte 102,72,15,126,205
1302 call __rsaz_512_reduce
1303 jmp .Lmul_scatter_tail
1308 call __rsaz_512_mulx
1310 .byte 102,72,15,126,199
1311 .byte 102,72,15,126,205
1323 call __rsaz_512_reducex
1334 .byte 102,72,15,126,214
1337 call __rsaz_512_subtract
1348 leaq 128+24+48(%rsp),%rax
1363 .cfi_def_cfa_register %rsp
1364 .Lmul_scatter4_epilogue:
1367 .size rsaz_512_mul_scatter4,.-rsaz_512_mul_scatter4
1368 .globl rsaz_512_mul_by_one
1369 .type rsaz_512_mul_by_one,@function
1371 rsaz_512_mul_by_one:
1374 .cfi_adjust_cfa_offset 8
1375 .cfi_offset %rbx,-16
1377 .cfi_adjust_cfa_offset 8
1378 .cfi_offset %rbp,-24
1380 .cfi_adjust_cfa_offset 8
1381 .cfi_offset %r12,-32
1383 .cfi_adjust_cfa_offset 8
1384 .cfi_offset %r13,-40
1386 .cfi_adjust_cfa_offset 8
1387 .cfi_offset %r14,-48
1389 .cfi_adjust_cfa_offset 8
1390 .cfi_offset %r15,-56
1393 .cfi_adjust_cfa_offset 128+24
1395 movl OPENSSL_ia32cap_P+8(%rip),%eax
1410 movdqa %xmm0,16(%rsp)
1411 movdqa %xmm0,32(%rsp)
1412 movdqa %xmm0,48(%rsp)
1413 movdqa %xmm0,64(%rsp)
1414 movdqa %xmm0,80(%rsp)
1415 movdqa %xmm0,96(%rsp)
1419 call __rsaz_512_reduce
1424 call __rsaz_512_reducex
1435 leaq 128+24+48(%rsp),%rax
1450 .cfi_def_cfa_register %rsp
1451 .Lmul_by_one_epilogue:
1454 .size rsaz_512_mul_by_one,.-rsaz_512_mul_by_one
1455 .type __rsaz_512_reduce,@function
1460 imulq 128+8(%rsp),%rbx
1463 jmp .Lreduction_loop
1494 movq 128+8(%rsp),%rsi
1535 jne .Lreduction_loop
1539 .size __rsaz_512_reduce,.-__rsaz_512_reduce
1540 .type __rsaz_512_reducex,@function
1548 jmp .Lreduction_loopx
1553 mulxq 0(%rbp),%rax,%r8
1557 mulxq 8(%rbp),%rax,%r9
1561 mulxq 16(%rbp),%rbx,%r10
1565 mulxq 24(%rbp),%rbx,%r11
1569 .byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00
1575 mulxq 128+8(%rsp),%rbx,%rdx
1578 mulxq 40(%rbp),%rax,%r13
1582 .byte 0xc4,0x62,0xfb,0xf6,0xb5,0x30,0x00,0x00,0x00
1586 mulxq 56(%rbp),%rax,%r15
1593 jne .Lreduction_loopx
1597 .size __rsaz_512_reducex,.-__rsaz_512_reducex
1598 .type __rsaz_512_subtract,@function
1600 __rsaz_512_subtract:
1656 .size __rsaz_512_subtract,.-__rsaz_512_subtract
1657 .type __rsaz_512_mul,@function
1800 .size __rsaz_512_mul,.-__rsaz_512_mul
1801 .type __rsaz_512_mulx,@function
1805 mulxq (%rsi),%rbx,%r8
1808 mulxq 8(%rsi),%rax,%r9
1811 mulxq 16(%rsi),%rbx,%r10
1814 mulxq 24(%rsi),%rax,%r11
1817 mulxq 32(%rsi),%rbx,%r12
1820 mulxq 40(%rsi),%rax,%r13
1823 mulxq 48(%rsi),%rbx,%r14
1826 mulxq 56(%rsi),%rax,%r15
1838 mulxq (%rsi),%rax,%r8
1842 mulxq 8(%rsi),%rax,%r9
1846 mulxq 16(%rsi),%rax,%r10
1850 mulxq 24(%rsi),%rax,%r11
1854 .byte 0x3e,0xc4,0x62,0xfb,0xf6,0xa6,0x20,0x00,0x00,0x00
1858 mulxq 40(%rsi),%rax,%r13
1862 mulxq 48(%rsi),%rax,%r14
1866 mulxq 56(%rsi),%rax,%r15
1867 movq 64(%rbp,%rcx,8),%rdx
1868 movq %rbx,8+64-8(%rsp,%rcx,8)
1877 mulxq (%rsi),%rax,%r8
1881 .byte 0xc4,0x62,0xfb,0xf6,0x8e,0x08,0x00,0x00,0x00
1885 .byte 0xc4,0x62,0xfb,0xf6,0x96,0x10,0x00,0x00,0x00
1889 mulxq 24(%rsi),%rax,%r11
1893 mulxq 32(%rsi),%rax,%r12
1897 mulxq 40(%rsi),%rax,%r13
1901 .byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00
1905 .byte 0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00
1910 movq %rbx,8+64-8(%rsp)
1912 movq %r9,8+64+8(%rsp)
1913 movq %r10,8+64+16(%rsp)
1914 movq %r11,8+64+24(%rsp)
1915 movq %r12,8+64+32(%rsp)
1916 movq %r13,8+64+40(%rsp)
1917 movq %r14,8+64+48(%rsp)
1918 movq %r15,8+64+56(%rsp)
1922 .size __rsaz_512_mulx,.-__rsaz_512_mulx
1923 .globl rsaz_512_scatter4
1924 .type rsaz_512_scatter4,@function
1928 leaq (%rdi,%rdx,8),%rdi
1941 .size rsaz_512_scatter4,.-rsaz_512_scatter4
1943 .globl rsaz_512_gather4
1944 .type rsaz_512_gather4,@function
1949 movdqa .Linc+16(%rip),%xmm1
1950 movdqa .Linc(%rip),%xmm0
1952 pshufd $0,%xmm8,%xmm8
1978 movdqa 0(%rsi),%xmm8
1979 movdqa 16(%rsi),%xmm9
1980 movdqa 32(%rsi),%xmm10
1981 movdqa 48(%rsi),%xmm11
1983 movdqa 64(%rsi),%xmm12
1985 movdqa 80(%rsi),%xmm13
1987 movdqa 96(%rsi),%xmm14
1989 movdqa 112(%rsi),%xmm15
2003 pshufd $0x4e,%xmm8,%xmm9
2010 .LSEH_end_rsaz_512_gather4:
2012 .size rsaz_512_gather4,.-rsaz_512_gather4