2 /* Do not modify. This file is auto-generated from rsaz-x86_64.pl. */
8 .type rsaz_512_sqr,@function
13 .cfi_adjust_cfa_offset 8
16 .cfi_adjust_cfa_offset 8
19 .cfi_adjust_cfa_offset 8
22 .cfi_adjust_cfa_offset 8
25 .cfi_adjust_cfa_offset 8
28 .cfi_adjust_cfa_offset 8
32 .cfi_adjust_cfa_offset 128+24
39 andl OPENSSL_ia32cap_P+8(%rip),%r11d
153 leaq (%rcx,%r10,2),%r10
193 leaq (%rbx,%r12,2),%r12
211 leaq (%r10,%r13,2),%r13
241 leaq (%rcx,%r14,2),%r14
259 leaq (%r12,%r15,2),%r15
284 leaq (%rbx,%r8,2),%r8
299 leaq (%r12,%r9,2),%r9
323 leaq (%rcx,%r10,2),%r10
331 leaq (%r15,%r11,2),%r11
384 call __rsaz_512_reduce
396 call __rsaz_512_subtract
400 movl 128+8(%rsp),%r8d
409 movl %r8d,128+8(%rsp)
410 .byte 102,72,15,110,199
411 .byte 102,72,15,110,205
415 mulxq 16(%rsi),%rcx,%r10
418 mulxq 24(%rsi),%rax,%r11
421 mulxq 32(%rsi),%rcx,%r12
424 mulxq 40(%rsi),%rax,%r13
427 .byte 0xc4,0x62,0xf3,0xf6,0xb6,0x30,0x00,0x00,0x00
431 .byte 0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00
449 mulxq 16(%rsi),%rax,%rbx
453 .byte 0xc4,0x62,0xc3,0xf6,0x86,0x18,0x00,0x00,0x00
457 mulxq 32(%rsi),%rax,%rbx
461 mulxq 40(%rsi),%rdi,%r8
465 .byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00
469 .byte 0xc4,0x62,0xc3,0xf6,0x86,0x38,0x00,0x00,0x00
486 .byte 0x4c,0x89,0x94,0x24,0x18,0x00,0x00,0x00
489 .byte 0xc4,0x62,0xc3,0xf6,0x8e,0x18,0x00,0x00,0x00
493 mulxq 32(%rsi),%rax,%rcx
497 mulxq 40(%rsi),%rdi,%r9
501 .byte 0xc4,0xe2,0xfb,0xf6,0x8e,0x30,0x00,0x00,0x00
505 .byte 0xc4,0x62,0xc3,0xf6,0x8e,0x38,0x00,0x00,0x00
522 .byte 0x4c,0x89,0xa4,0x24,0x28,0x00,0x00,0x00
525 .byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x20,0x00,0x00,0x00
529 mulxq 40(%rsi),%rdi,%r10
533 mulxq 48(%rsi),%rax,%rbx
537 mulxq 56(%rsi),%rdi,%r10
558 .byte 0xc4,0x62,0xc3,0xf6,0x9e,0x28,0x00,0x00,0x00
562 mulxq 48(%rsi),%rax,%rcx
566 mulxq 56(%rsi),%rdi,%r11
586 .byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00
590 .byte 0xc4,0x62,0xc3,0xf6,0xa6,0x38,0x00,0x00,0x00
610 .byte 0xc4,0x62,0xfb,0xf6,0xae,0x38,0x00,0x00,0x00
626 .byte 0x4c,0x89,0x9c,0x24,0x60,0x00,0x00,0x00
627 .byte 0x4c,0x89,0xa4,0x24,0x68,0x00,0x00,0x00
639 .byte 102,72,15,126,199
640 .byte 102,72,15,126,205
652 call __rsaz_512_reducex
664 call __rsaz_512_subtract
668 movl 128+8(%rsp),%r8d
676 leaq 128+24+48(%rsp),%rax
691 .cfi_def_cfa_register %rsp
695 .size rsaz_512_sqr,.-rsaz_512_sqr
697 .type rsaz_512_mul,@function
702 .cfi_adjust_cfa_offset 8
705 .cfi_adjust_cfa_offset 8
708 .cfi_adjust_cfa_offset 8
711 .cfi_adjust_cfa_offset 8
714 .cfi_adjust_cfa_offset 8
717 .cfi_adjust_cfa_offset 8
721 .cfi_adjust_cfa_offset 128+24
723 .byte 102,72,15,110,199
724 .byte 102,72,15,110,201
727 andl OPENSSL_ia32cap_P+8(%rip),%r11d
734 .byte 102,72,15,126,199
735 .byte 102,72,15,126,205
746 call __rsaz_512_reduce
755 .byte 102,72,15,126,199
756 .byte 102,72,15,126,205
768 call __rsaz_512_reducex
780 call __rsaz_512_subtract
782 leaq 128+24+48(%rsp),%rax
797 .cfi_def_cfa_register %rsp
801 .size rsaz_512_mul,.-rsaz_512_mul
802 .globl rsaz_512_mul_gather4
803 .type rsaz_512_mul_gather4,@function
805 rsaz_512_mul_gather4:
808 .cfi_adjust_cfa_offset 8
811 .cfi_adjust_cfa_offset 8
814 .cfi_adjust_cfa_offset 8
817 .cfi_adjust_cfa_offset 8
820 .cfi_adjust_cfa_offset 8
823 .cfi_adjust_cfa_offset 8
827 .cfi_adjust_cfa_offset 152
830 movdqa .Linc+16(%rip),%xmm1
831 movdqa .Linc(%rip),%xmm0
833 pshufd $0,%xmm8,%xmm8
857 movdqa 16(%rdx),%xmm9
858 movdqa 32(%rdx),%xmm10
859 movdqa 48(%rdx),%xmm11
861 movdqa 64(%rdx),%xmm12
863 movdqa 80(%rdx),%xmm13
865 movdqa 96(%rdx),%xmm14
867 movdqa 112(%rdx),%xmm15
881 pshufd $0x4e,%xmm8,%xmm9
884 andl OPENSSL_ia32cap_P+8(%rip),%r11d
887 .byte 102,76,15,126,195
890 movq %rdi,128+8(%rsp)
891 movq %rcx,128+16(%rsp)
949 movdqa 16(%rbp),%xmm9
950 movdqa 32(%rbp),%xmm10
951 movdqa 48(%rbp),%xmm11
953 movdqa 64(%rbp),%xmm12
955 movdqa 80(%rbp),%xmm13
957 movdqa 96(%rbp),%xmm14
959 movdqa 112(%rbp),%xmm15
973 pshufd $0x4e,%xmm8,%xmm9
975 .byte 102,76,15,126,195
1043 jnz .Loop_mul_gather
1054 movq 128+8(%rsp),%rdi
1055 movq 128+16(%rsp),%rbp
1066 call __rsaz_512_reduce
1067 jmp .Lmul_gather_tail
1071 .byte 102,76,15,126,194
1074 movq %rdi,128+8(%rsp)
1075 movq %rcx,128+16(%rsp)
1077 mulxq (%rsi),%rbx,%r8
1081 mulxq 8(%rsi),%rax,%r9
1083 mulxq 16(%rsi),%rbx,%r10
1086 mulxq 24(%rsi),%rax,%r11
1089 mulxq 32(%rsi),%rbx,%r12
1092 mulxq 40(%rsi),%rax,%r13
1095 mulxq 48(%rsi),%rbx,%r14
1098 mulxq 56(%rsi),%rax,%r15
1106 jmp .Loop_mulx_gather
1110 movdqa 0(%rbp),%xmm8
1111 movdqa 16(%rbp),%xmm9
1112 movdqa 32(%rbp),%xmm10
1113 movdqa 48(%rbp),%xmm11
1115 movdqa 64(%rbp),%xmm12
1117 movdqa 80(%rbp),%xmm13
1119 movdqa 96(%rbp),%xmm14
1121 movdqa 112(%rbp),%xmm15
1135 pshufd $0x4e,%xmm8,%xmm9
1137 .byte 102,76,15,126,194
1139 .byte 0xc4,0x62,0xfb,0xf6,0x86,0x00,0x00,0x00,0x00
1143 mulxq 8(%rsi),%rax,%r9
1147 mulxq 16(%rsi),%rax,%r10
1151 .byte 0xc4,0x62,0xfb,0xf6,0x9e,0x18,0x00,0x00,0x00
1155 mulxq 32(%rsi),%rax,%r12
1159 mulxq 40(%rsi),%rax,%r13
1163 .byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00
1168 mulxq 56(%rsi),%rax,%r15
1169 movq %rbx,64(%rsp,%rcx,8)
1176 jnz .Loop_mulx_gather
1180 movq %r10,64+16(%rsp)
1181 movq %r11,64+24(%rsp)
1182 movq %r12,64+32(%rsp)
1183 movq %r13,64+40(%rsp)
1184 movq %r14,64+48(%rsp)
1185 movq %r15,64+56(%rsp)
1188 movq 128+8(%rsp),%rdi
1189 movq 128+16(%rsp),%rbp
1200 call __rsaz_512_reducex
1213 call __rsaz_512_subtract
1215 leaq 128+24+48(%rsp),%rax
1230 .cfi_def_cfa_register %rsp
1231 .Lmul_gather4_epilogue:
1234 .size rsaz_512_mul_gather4,.-rsaz_512_mul_gather4
1235 .globl rsaz_512_mul_scatter4
1236 .type rsaz_512_mul_scatter4,@function
1238 rsaz_512_mul_scatter4:
1241 .cfi_adjust_cfa_offset 8
1242 .cfi_offset %rbx,-16
1244 .cfi_adjust_cfa_offset 8
1245 .cfi_offset %rbp,-24
1247 .cfi_adjust_cfa_offset 8
1248 .cfi_offset %r12,-32
1250 .cfi_adjust_cfa_offset 8
1251 .cfi_offset %r13,-40
1253 .cfi_adjust_cfa_offset 8
1254 .cfi_offset %r14,-48
1256 .cfi_adjust_cfa_offset 8
1257 .cfi_offset %r15,-56
1261 .cfi_adjust_cfa_offset 128+24
1262 .Lmul_scatter4_body:
1263 leaq (%r8,%r9,8),%r8
1264 .byte 102,72,15,110,199
1265 .byte 102,72,15,110,202
1266 .byte 102,73,15,110,208
1271 andl OPENSSL_ia32cap_P+8(%rip),%r11d
1277 .byte 102,72,15,126,199
1278 .byte 102,72,15,126,205
1289 call __rsaz_512_reduce
1290 jmp .Lmul_scatter_tail
1295 call __rsaz_512_mulx
1297 .byte 102,72,15,126,199
1298 .byte 102,72,15,126,205
1310 call __rsaz_512_reducex
1321 .byte 102,72,15,126,214
1324 call __rsaz_512_subtract
1335 leaq 128+24+48(%rsp),%rax
1350 .cfi_def_cfa_register %rsp
1351 .Lmul_scatter4_epilogue:
1354 .size rsaz_512_mul_scatter4,.-rsaz_512_mul_scatter4
1355 .globl rsaz_512_mul_by_one
1356 .type rsaz_512_mul_by_one,@function
1358 rsaz_512_mul_by_one:
1361 .cfi_adjust_cfa_offset 8
1362 .cfi_offset %rbx,-16
1364 .cfi_adjust_cfa_offset 8
1365 .cfi_offset %rbp,-24
1367 .cfi_adjust_cfa_offset 8
1368 .cfi_offset %r12,-32
1370 .cfi_adjust_cfa_offset 8
1371 .cfi_offset %r13,-40
1373 .cfi_adjust_cfa_offset 8
1374 .cfi_offset %r14,-48
1376 .cfi_adjust_cfa_offset 8
1377 .cfi_offset %r15,-56
1380 .cfi_adjust_cfa_offset 128+24
1382 movl OPENSSL_ia32cap_P+8(%rip),%eax
1397 movdqa %xmm0,16(%rsp)
1398 movdqa %xmm0,32(%rsp)
1399 movdqa %xmm0,48(%rsp)
1400 movdqa %xmm0,64(%rsp)
1401 movdqa %xmm0,80(%rsp)
1402 movdqa %xmm0,96(%rsp)
1406 call __rsaz_512_reduce
1411 call __rsaz_512_reducex
1422 leaq 128+24+48(%rsp),%rax
1437 .cfi_def_cfa_register %rsp
1438 .Lmul_by_one_epilogue:
1441 .size rsaz_512_mul_by_one,.-rsaz_512_mul_by_one
1442 .type __rsaz_512_reduce,@function
1446 imulq 128+8(%rsp),%rbx
1449 jmp .Lreduction_loop
1480 movq 128+8(%rsp),%rsi
1521 jne .Lreduction_loop
1524 .size __rsaz_512_reduce,.-__rsaz_512_reduce
1525 .type __rsaz_512_reducex,@function
1532 jmp .Lreduction_loopx
1537 mulxq 0(%rbp),%rax,%r8
1541 mulxq 8(%rbp),%rax,%r9
1545 mulxq 16(%rbp),%rbx,%r10
1549 mulxq 24(%rbp),%rbx,%r11
1553 .byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00
1559 mulxq 128+8(%rsp),%rbx,%rdx
1562 mulxq 40(%rbp),%rax,%r13
1566 .byte 0xc4,0x62,0xfb,0xf6,0xb5,0x30,0x00,0x00,0x00
1570 mulxq 56(%rbp),%rax,%r15
1577 jne .Lreduction_loopx
1580 .size __rsaz_512_reducex,.-__rsaz_512_reducex
1581 .type __rsaz_512_subtract,@function
1583 __rsaz_512_subtract:
1637 .size __rsaz_512_subtract,.-__rsaz_512_subtract
1638 .type __rsaz_512_mul,@function
1779 .size __rsaz_512_mul,.-__rsaz_512_mul
1780 .type __rsaz_512_mulx,@function
1783 mulxq (%rsi),%rbx,%r8
1786 mulxq 8(%rsi),%rax,%r9
1789 mulxq 16(%rsi),%rbx,%r10
1792 mulxq 24(%rsi),%rax,%r11
1795 mulxq 32(%rsi),%rbx,%r12
1798 mulxq 40(%rsi),%rax,%r13
1801 mulxq 48(%rsi),%rbx,%r14
1804 mulxq 56(%rsi),%rax,%r15
1816 mulxq (%rsi),%rax,%r8
1820 mulxq 8(%rsi),%rax,%r9
1824 mulxq 16(%rsi),%rax,%r10
1828 mulxq 24(%rsi),%rax,%r11
1832 .byte 0x3e,0xc4,0x62,0xfb,0xf6,0xa6,0x20,0x00,0x00,0x00
1836 mulxq 40(%rsi),%rax,%r13
1840 mulxq 48(%rsi),%rax,%r14
1844 mulxq 56(%rsi),%rax,%r15
1845 movq 64(%rbp,%rcx,8),%rdx
1846 movq %rbx,8+64-8(%rsp,%rcx,8)
1855 mulxq (%rsi),%rax,%r8
1859 .byte 0xc4,0x62,0xfb,0xf6,0x8e,0x08,0x00,0x00,0x00
1863 .byte 0xc4,0x62,0xfb,0xf6,0x96,0x10,0x00,0x00,0x00
1867 mulxq 24(%rsi),%rax,%r11
1871 mulxq 32(%rsi),%rax,%r12
1875 mulxq 40(%rsi),%rax,%r13
1879 .byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00
1883 .byte 0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00
1888 movq %rbx,8+64-8(%rsp)
1890 movq %r9,8+64+8(%rsp)
1891 movq %r10,8+64+16(%rsp)
1892 movq %r11,8+64+24(%rsp)
1893 movq %r12,8+64+32(%rsp)
1894 movq %r13,8+64+40(%rsp)
1895 movq %r14,8+64+48(%rsp)
1896 movq %r15,8+64+56(%rsp)
1899 .size __rsaz_512_mulx,.-__rsaz_512_mulx
1900 .globl rsaz_512_scatter4
1901 .type rsaz_512_scatter4,@function
1904 leaq (%rdi,%rdx,8),%rdi
1916 .size rsaz_512_scatter4,.-rsaz_512_scatter4
1918 .globl rsaz_512_gather4
1919 .type rsaz_512_gather4,@function
1923 movdqa .Linc+16(%rip),%xmm1
1924 movdqa .Linc(%rip),%xmm0
1926 pshufd $0,%xmm8,%xmm8
1952 movdqa 0(%rsi),%xmm8
1953 movdqa 16(%rsi),%xmm9
1954 movdqa 32(%rsi),%xmm10
1955 movdqa 48(%rsi),%xmm11
1957 movdqa 64(%rsi),%xmm12
1959 movdqa 80(%rsi),%xmm13
1961 movdqa 96(%rsi),%xmm14
1963 movdqa 112(%rsi),%xmm15
1977 pshufd $0x4e,%xmm8,%xmm9
1984 .LSEH_end_rsaz_512_gather4:
1985 .size rsaz_512_gather4,.-rsaz_512_gather4