2 /* Do not modify. This file is auto-generated from poly1305-x86.pl. */
7 .type poly1305_init,@function
10 .L_poly1305_init_begin:
30 leal poly1305_blocks-.L001pic_point(%ebx),%eax
31 leal poly1305_emit-.L001pic_point(%ebx),%edx
32 leal OPENSSL_ia32cap_P-.L001pic_point(%ebx),%edi
37 leal _poly1305_blocks_sse2-.L001pic_point(%ebx),%eax
38 leal _poly1305_emit_sse2-.L001pic_point(%ebx),%edx
42 leal _poly1305_blocks_avx2-.L001pic_point(%ebx),%eax
66 .size poly1305_init,.-.L_poly1305_init_begin
67 .globl poly1305_blocks
68 .type poly1305_blocks,@function
71 .L_poly1305_blocks_begin:
85 leal (%esi,%ecx,1),%ebp
213 leal (%edx,%edx,4),%edx
234 .size poly1305_blocks,.-.L_poly1305_blocks_begin
236 .type poly1305_emit,@function
239 .L_poly1305_emit_begin:
294 .size poly1305_emit,.-.L_poly1305_emit_begin
296 .type _poly1305_init_sse2,@function
299 movdqu 24(%edi),%xmm4
322 movdqa %xmm1,16(%esp)
323 movdqa %xmm2,32(%esp)
324 movdqa %xmm3,48(%esp)
325 movdqa %xmm4,64(%esp)
332 movdqa %xmm6,80(%esp)
333 movdqa %xmm5,96(%esp)
340 movdqa %xmm6,112(%esp)
341 movdqa %xmm5,128(%esp)
342 pshufd $68,%xmm0,%xmm6
344 pshufd $68,%xmm1,%xmm1
345 pshufd $68,%xmm2,%xmm2
346 pshufd $68,%xmm3,%xmm3
347 pshufd $68,%xmm4,%xmm4
349 movdqa %xmm1,16(%edx)
350 movdqa %xmm2,32(%edx)
351 movdqa %xmm3,48(%edx)
352 movdqa %xmm4,64(%edx)
359 pmuludq 48(%edx),%xmm5
361 pmuludq 32(%edx),%xmm6
364 pmuludq 16(%edx),%xmm7
366 movdqa 80(%esp),%xmm6
369 pmuludq 64(%edx),%xmm6
370 movdqa 32(%esp),%xmm7
373 pmuludq 32(%edx),%xmm7
376 pmuludq 16(%edx),%xmm5
378 movdqa 96(%esp),%xmm7
382 pmuludq 64(%edx),%xmm7
384 pmuludq 48(%edx),%xmm5
385 movdqa 48(%esp),%xmm6
388 pmuludq 16(%edx),%xmm6
390 movdqa 112(%esp),%xmm5
394 pmuludq 64(%edx),%xmm5
397 pmuludq 48(%edx),%xmm6
399 pmuludq 32(%edx),%xmm7
400 movdqa 64(%esp),%xmm5
402 movdqa 128(%esp),%xmm6
406 pmuludq 64(%edx),%xmm6
409 pmuludq 16(%edx),%xmm7
412 pmuludq 32(%edx),%xmm5
414 pmuludq 48(%edx),%xmm6
415 movdqa 64(%ebx),%xmm7
450 punpcklqdq (%esp),%xmm0
451 punpcklqdq 16(%esp),%xmm1
452 punpcklqdq 32(%esp),%xmm2
453 punpcklqdq 48(%esp),%xmm3
454 punpcklqdq 64(%esp),%xmm4
467 pshufd $141,%xmm0,%xmm0
468 pshufd $141,%xmm1,%xmm1
469 pshufd $141,%xmm2,%xmm2
470 pshufd $141,%xmm3,%xmm3
471 pshufd $141,%xmm4,%xmm4
473 movdqu %xmm1,16(%edi)
474 movdqu %xmm2,32(%edi)
475 movdqu %xmm3,48(%edi)
476 movdqu %xmm4,64(%edi)
483 movdqu %xmm6,80(%edi)
484 movdqu %xmm5,96(%edi)
491 movdqu %xmm6,112(%edi)
492 movdqu %xmm5,128(%edi)
496 .size _poly1305_init_sse2,.-_poly1305_init_sse2
498 .type _poly1305_blocks_sse2,@function
500 _poly1305_blocks_sse2:
520 leal .Lconst_sse2-.L009pic_point(%ebx),%ebx
523 call _poly1305_init_sse2
551 movdqa 64(%ebx),%xmm7
585 movdqa %xmm1,16(%esp)
586 movdqa %xmm2,32(%esp)
587 movdqa %xmm3,48(%esp)
588 movdqa %xmm4,64(%esp)
596 pmuludq 48(%esp),%xmm5
598 pmuludq 32(%esp),%xmm6
601 pmuludq 16(%esp),%xmm7
606 pmuludq 64(%esp),%xmm6
610 pmuludq 32(%esp),%xmm7
613 pmuludq 16(%esp),%xmm5
619 pmuludq 64(%esp),%xmm7
621 pmuludq 48(%esp),%xmm5
625 pmuludq 16(%esp),%xmm6
631 pmuludq 64(%esp),%xmm5
634 pmuludq 48(%esp),%xmm6
636 pmuludq 32(%esp),%xmm7
643 pmuludq 64(%esp),%xmm6
646 pmuludq 16(%esp),%xmm7
649 pmuludq 32(%esp),%xmm5
651 pmuludq 48(%esp),%xmm6
652 movdqa 64(%ebx),%xmm7
692 pshufd $68,%xmm5,%xmm6
694 pshufd $238,%xmm5,%xmm5
697 movdqu 16(%edi),%xmm6
698 movdqa %xmm5,-144(%edx)
699 pshufd $68,%xmm6,%xmm5
700 pshufd $238,%xmm6,%xmm6
701 movdqa %xmm5,16(%edx)
702 movdqu 32(%edi),%xmm5
703 movdqa %xmm6,-128(%edx)
704 pshufd $68,%xmm5,%xmm6
705 pshufd $238,%xmm5,%xmm5
706 movdqa %xmm6,32(%edx)
707 movdqu 48(%edi),%xmm6
708 movdqa %xmm5,-112(%edx)
709 pshufd $68,%xmm6,%xmm5
710 pshufd $238,%xmm6,%xmm6
711 movdqa %xmm5,48(%edx)
712 movdqu 64(%edi),%xmm5
713 movdqa %xmm6,-96(%edx)
714 pshufd $68,%xmm5,%xmm6
715 pshufd $238,%xmm5,%xmm5
716 movdqa %xmm6,64(%edx)
717 movdqu 80(%edi),%xmm6
718 movdqa %xmm5,-80(%edx)
719 pshufd $68,%xmm6,%xmm5
720 pshufd $238,%xmm6,%xmm6
721 movdqa %xmm5,80(%edx)
722 movdqu 96(%edi),%xmm5
723 movdqa %xmm6,-64(%edx)
724 pshufd $68,%xmm5,%xmm6
725 pshufd $238,%xmm5,%xmm5
726 movdqa %xmm6,96(%edx)
727 movdqu 112(%edi),%xmm6
728 movdqa %xmm5,-48(%edx)
729 pshufd $68,%xmm6,%xmm5
730 pshufd $238,%xmm6,%xmm6
731 movdqa %xmm5,112(%edx)
732 movdqu 128(%edi),%xmm5
733 movdqa %xmm6,-32(%edx)
734 pshufd $68,%xmm5,%xmm6
735 pshufd $238,%xmm5,%xmm5
736 movdqa %xmm6,128(%edx)
737 movdqa %xmm5,-16(%edx)
738 movdqu 32(%esi),%xmm5
739 movdqu 48(%esi),%xmm6
741 movdqa %xmm2,112(%esp)
742 movdqa %xmm3,128(%esp)
743 movdqa %xmm4,144(%esp)
749 punpcklqdq %xmm3,%xmm2
750 punpckhqdq %xmm6,%xmm4
751 punpcklqdq %xmm6,%xmm5
763 movdqa %xmm0,80(%esp)
764 movdqa %xmm1,96(%esp)
769 movdqa -144(%edx),%xmm7
770 movdqa %xmm6,16(%eax)
771 movdqa %xmm2,32(%eax)
772 movdqa %xmm3,48(%eax)
773 movdqa %xmm4,64(%eax)
781 pmuludq -16(%edx),%xmm0
783 pmuludq -128(%edx),%xmm1
786 pmuludq -112(%edx),%xmm7
789 pmuludq -96(%edx),%xmm5
791 movdqa 16(%eax),%xmm7
792 pmuludq -80(%edx),%xmm6
795 pmuludq -128(%edx),%xmm7
798 pmuludq -112(%edx),%xmm5
800 movdqa 32(%eax),%xmm7
801 pmuludq -96(%edx),%xmm6
804 pmuludq -32(%edx),%xmm7
807 pmuludq -16(%edx),%xmm5
810 pmuludq -128(%edx),%xmm6
812 movdqa 48(%eax),%xmm5
813 pmuludq -112(%edx),%xmm7
816 pmuludq -48(%edx),%xmm5
819 pmuludq -32(%edx),%xmm6
822 pmuludq -16(%edx),%xmm7
824 movdqa 64(%eax),%xmm6
825 pmuludq -128(%edx),%xmm5
828 pmuludq -16(%edx),%xmm6
831 pmuludq -64(%edx),%xmm7
834 pmuludq -48(%edx),%xmm5
836 movdqa 64(%ebx),%xmm7
837 pmuludq -32(%edx),%xmm6
840 movdqu -32(%esi),%xmm5
841 movdqu -16(%esi),%xmm6
843 movdqa %xmm2,32(%esp)
844 movdqa %xmm3,48(%esp)
845 movdqa %xmm4,64(%esp)
851 punpcklqdq %xmm3,%xmm2
852 punpckhqdq %xmm6,%xmm4
853 punpcklqdq %xmm6,%xmm5
869 paddd 112(%esp),%xmm2
870 paddd 128(%esp),%xmm3
871 paddd 144(%esp),%xmm4
875 movdqa %xmm1,16(%esp)
876 movdqa %xmm6,16(%eax)
877 movdqa %xmm2,32(%eax)
878 movdqa %xmm3,48(%eax)
879 movdqa %xmm4,64(%eax)
892 pmuludq 128(%edx),%xmm0
894 pmuludq 16(%edx),%xmm1
897 pmuludq 32(%edx),%xmm7
900 pmuludq 48(%edx),%xmm5
902 movdqa 16(%eax),%xmm7
903 pmuludq 64(%edx),%xmm6
906 pmuludq 16(%edx),%xmm7
909 pmuludq 32(%edx),%xmm5
911 movdqa 32(%eax),%xmm7
912 pmuludq 48(%edx),%xmm6
915 pmuludq 112(%edx),%xmm7
918 pmuludq 128(%edx),%xmm5
921 pmuludq 16(%edx),%xmm6
923 movdqa 48(%eax),%xmm5
924 pmuludq 32(%edx),%xmm7
927 pmuludq 96(%edx),%xmm5
930 pmuludq 112(%edx),%xmm6
933 pmuludq 128(%edx),%xmm7
935 movdqa 64(%eax),%xmm6
936 pmuludq 16(%edx),%xmm5
939 pmuludq 128(%edx),%xmm6
942 pmuludq 80(%edx),%xmm7
945 pmuludq 96(%edx),%xmm5
947 movdqa 64(%ebx),%xmm7
948 pmuludq 112(%edx),%xmm6
981 movdqu 32(%esi),%xmm5
982 movdqu 48(%esi),%xmm6
984 movdqa %xmm2,112(%esp)
985 movdqa %xmm3,128(%esp)
986 movdqa %xmm4,144(%esp)
992 punpcklqdq %xmm3,%xmm2
993 punpckhqdq %xmm6,%xmm4
994 punpcklqdq %xmm6,%xmm5
1006 movdqa %xmm0,80(%esp)
1007 movdqa %xmm1,96(%esp)
1010 pshufd $16,-144(%edx),%xmm7
1015 paddd 112(%esp),%xmm2
1016 paddd 128(%esp),%xmm3
1017 paddd 144(%esp),%xmm4
1020 movdqa %xmm6,16(%eax)
1021 movdqa %xmm2,32(%eax)
1022 movdqa %xmm3,48(%eax)
1023 movdqa %xmm4,64(%eax)
1028 pshufd $16,-128(%edx),%xmm5
1033 pmuludq 48(%eax),%xmm5
1035 pmuludq 32(%eax),%xmm6
1038 pmuludq 16(%eax),%xmm7
1040 pshufd $16,-64(%edx),%xmm6
1041 pmuludq (%eax),%xmm5
1043 pmuludq 64(%eax),%xmm6
1044 pshufd $16,-112(%edx),%xmm7
1047 pmuludq 32(%eax),%xmm7
1050 pmuludq 16(%eax),%xmm5
1052 pshufd $16,-48(%edx),%xmm7
1053 pmuludq (%eax),%xmm6
1056 pmuludq 64(%eax),%xmm7
1058 pmuludq 48(%eax),%xmm5
1059 pshufd $16,-96(%edx),%xmm6
1062 pmuludq 16(%eax),%xmm6
1064 pshufd $16,-32(%edx),%xmm5
1065 pmuludq (%eax),%xmm7
1068 pmuludq 64(%eax),%xmm5
1071 pmuludq 48(%eax),%xmm6
1073 pmuludq 32(%eax),%xmm7
1074 pshufd $16,-80(%edx),%xmm5
1076 pshufd $16,-16(%edx),%xmm6
1077 pmuludq (%eax),%xmm5
1080 pmuludq 64(%eax),%xmm6
1083 pmuludq 16(%eax),%xmm7
1086 pmuludq 32(%eax),%xmm5
1088 pmuludq 48(%eax),%xmm6
1089 movdqa 64(%ebx),%xmm7
1093 movdqu -32(%esi),%xmm5
1094 movdqu -16(%esi),%xmm6
1096 movdqa %xmm2,32(%esp)
1097 movdqa %xmm3,48(%esp)
1098 movdqa %xmm4,64(%esp)
1104 punpcklqdq %xmm3,%xmm2
1105 punpckhqdq %xmm6,%xmm4
1106 punpcklqdq %xmm6,%xmm5
1118 pshufd $16,(%edx),%xmm7
1119 paddd 80(%esp),%xmm5
1120 paddd 96(%esp),%xmm6
1121 paddd 112(%esp),%xmm2
1122 paddd 128(%esp),%xmm3
1123 paddd 144(%esp),%xmm4
1126 movdqa %xmm6,16(%esp)
1134 paddq 32(%esp),%xmm2
1135 movdqa %xmm5,32(%esp)
1136 pshufd $16,16(%edx),%xmm5
1137 paddq 48(%esp),%xmm3
1138 movdqa %xmm6,48(%esp)
1141 paddq 64(%esp),%xmm4
1142 movdqa %xmm6,64(%esp)
1144 pmuludq 48(%esp),%xmm5
1146 pmuludq 32(%esp),%xmm6
1149 pmuludq 16(%esp),%xmm7
1151 pshufd $16,80(%edx),%xmm6
1152 pmuludq (%esp),%xmm5
1154 pmuludq 64(%esp),%xmm6
1155 pshufd $16,32(%edx),%xmm7
1158 pmuludq 32(%esp),%xmm7
1161 pmuludq 16(%esp),%xmm5
1163 pshufd $16,96(%edx),%xmm7
1164 pmuludq (%esp),%xmm6
1167 pmuludq 64(%esp),%xmm7
1169 pmuludq 48(%esp),%xmm5
1170 pshufd $16,48(%edx),%xmm6
1173 pmuludq 16(%esp),%xmm6
1175 pshufd $16,112(%edx),%xmm5
1176 pmuludq (%esp),%xmm7
1179 pmuludq 64(%esp),%xmm5
1182 pmuludq 48(%esp),%xmm6
1184 pmuludq 32(%esp),%xmm7
1185 pshufd $16,64(%edx),%xmm5
1187 pshufd $16,128(%edx),%xmm6
1188 pmuludq (%esp),%xmm5
1191 pmuludq 64(%esp),%xmm6
1194 pmuludq 16(%esp),%xmm7
1197 pmuludq 32(%esp),%xmm5
1199 pmuludq 48(%esp),%xmm6
1200 movdqa 64(%ebx),%xmm7
1204 pshufd $78,%xmm4,%xmm6
1205 pshufd $78,%xmm3,%xmm5
1208 pshufd $78,%xmm0,%xmm6
1209 pshufd $78,%xmm1,%xmm5
1212 pshufd $78,%xmm2,%xmm6
1245 movd %xmm0,-48(%edi)
1246 movd %xmm1,-44(%edi)
1247 movd %xmm2,-40(%edi)
1248 movd %xmm3,-36(%edi)
1249 movd %xmm4,-32(%edi)
1257 .size _poly1305_blocks_sse2,.-_poly1305_blocks_sse2
1259 .type _poly1305_emit_sse2,@function
1261 _poly1305_emit_sse2:
1297 leal (%edi,%edi,4),%ebp
1350 .size _poly1305_emit_sse2,.-_poly1305_emit_sse2
1352 .type _poly1305_init_avx2,@function
1354 _poly1305_init_avx2:
1355 vmovdqu 24(%edi),%xmm4
1360 vmovdqa 64(%ebx),%xmm7
1361 vpand %xmm7,%xmm4,%xmm0
1362 vpsrlq $26,%xmm4,%xmm1
1363 vpsrldq $6,%xmm4,%xmm3
1364 vpand %xmm7,%xmm1,%xmm1
1365 vpsrlq $4,%xmm3,%xmm2
1366 vpsrlq $30,%xmm3,%xmm3
1367 vpand %xmm7,%xmm2,%xmm2
1368 vpand %xmm7,%xmm3,%xmm3
1369 vpsrldq $13,%xmm4,%xmm4
1373 vmovdqa %xmm0,(%esp)
1374 vmovdqa %xmm1,16(%esp)
1375 vmovdqa %xmm2,32(%esp)
1376 vmovdqa %xmm3,48(%esp)
1377 vmovdqa %xmm4,64(%esp)
1378 vpslld $2,%xmm1,%xmm6
1379 vpslld $2,%xmm2,%xmm5
1380 vpaddd %xmm1,%xmm6,%xmm6
1381 vpaddd %xmm2,%xmm5,%xmm5
1382 vmovdqa %xmm6,80(%esp)
1383 vmovdqa %xmm5,96(%esp)
1384 vpslld $2,%xmm3,%xmm6
1385 vpslld $2,%xmm4,%xmm5
1386 vpaddd %xmm3,%xmm6,%xmm6
1387 vpaddd %xmm4,%xmm5,%xmm5
1388 vmovdqa %xmm6,112(%esp)
1389 vmovdqa %xmm5,128(%esp)
1390 vpshufd $68,%xmm0,%xmm5
1392 vpshufd $68,%xmm1,%xmm1
1393 vpshufd $68,%xmm2,%xmm2
1394 vpshufd $68,%xmm3,%xmm3
1395 vpshufd $68,%xmm4,%xmm4
1396 vmovdqa %xmm5,(%edx)
1397 vmovdqa %xmm1,16(%edx)
1398 vmovdqa %xmm2,32(%edx)
1399 vmovdqa %xmm3,48(%edx)
1400 vmovdqa %xmm4,64(%edx)
1401 vpmuludq %xmm0,%xmm4,%xmm4
1402 vpmuludq %xmm0,%xmm3,%xmm3
1403 vpmuludq %xmm0,%xmm2,%xmm2
1404 vpmuludq %xmm0,%xmm1,%xmm1
1405 vpmuludq %xmm0,%xmm5,%xmm0
1406 vpmuludq 48(%edx),%xmm6,%xmm5
1407 vpaddq %xmm5,%xmm4,%xmm4
1408 vpmuludq 32(%edx),%xmm6,%xmm7
1409 vpaddq %xmm7,%xmm3,%xmm3
1410 vpmuludq 16(%edx),%xmm6,%xmm5
1411 vpaddq %xmm5,%xmm2,%xmm2
1412 vmovdqa 80(%esp),%xmm7
1413 vpmuludq (%edx),%xmm6,%xmm6
1414 vpaddq %xmm6,%xmm1,%xmm1
1415 vmovdqa 32(%esp),%xmm5
1416 vpmuludq 64(%edx),%xmm7,%xmm7
1417 vpaddq %xmm7,%xmm0,%xmm0
1418 vpmuludq 32(%edx),%xmm5,%xmm6
1419 vpaddq %xmm6,%xmm4,%xmm4
1420 vpmuludq 16(%edx),%xmm5,%xmm7
1421 vpaddq %xmm7,%xmm3,%xmm3
1422 vmovdqa 96(%esp),%xmm6
1423 vpmuludq (%edx),%xmm5,%xmm5
1424 vpaddq %xmm5,%xmm2,%xmm2
1425 vpmuludq 64(%edx),%xmm6,%xmm7
1426 vpaddq %xmm7,%xmm1,%xmm1
1427 vmovdqa 48(%esp),%xmm5
1428 vpmuludq 48(%edx),%xmm6,%xmm6
1429 vpaddq %xmm6,%xmm0,%xmm0
1430 vpmuludq 16(%edx),%xmm5,%xmm7
1431 vpaddq %xmm7,%xmm4,%xmm4
1432 vmovdqa 112(%esp),%xmm6
1433 vpmuludq (%edx),%xmm5,%xmm5
1434 vpaddq %xmm5,%xmm3,%xmm3
1435 vpmuludq 64(%edx),%xmm6,%xmm7
1436 vpaddq %xmm7,%xmm2,%xmm2
1437 vpmuludq 48(%edx),%xmm6,%xmm5
1438 vpaddq %xmm5,%xmm1,%xmm1
1439 vmovdqa 64(%esp),%xmm7
1440 vpmuludq 32(%edx),%xmm6,%xmm6
1441 vpaddq %xmm6,%xmm0,%xmm0
1442 vmovdqa 128(%esp),%xmm5
1443 vpmuludq (%edx),%xmm7,%xmm7
1444 vpaddq %xmm7,%xmm4,%xmm4
1445 vpmuludq 64(%edx),%xmm5,%xmm6
1446 vpaddq %xmm6,%xmm3,%xmm3
1447 vpmuludq 16(%edx),%xmm5,%xmm7
1448 vpaddq %xmm7,%xmm0,%xmm0
1449 vpmuludq 32(%edx),%xmm5,%xmm6
1450 vpaddq %xmm6,%xmm1,%xmm1
1451 vmovdqa 64(%ebx),%xmm7
1452 vpmuludq 48(%edx),%xmm5,%xmm5
1453 vpaddq %xmm5,%xmm2,%xmm2
1454 vpsrlq $26,%xmm3,%xmm5
1455 vpand %xmm7,%xmm3,%xmm3
1456 vpsrlq $26,%xmm0,%xmm6
1457 vpand %xmm7,%xmm0,%xmm0
1458 vpaddq %xmm5,%xmm4,%xmm4
1459 vpaddq %xmm6,%xmm1,%xmm1
1460 vpsrlq $26,%xmm4,%xmm5
1461 vpand %xmm7,%xmm4,%xmm4
1462 vpsrlq $26,%xmm1,%xmm6
1463 vpand %xmm7,%xmm1,%xmm1
1464 vpaddq %xmm6,%xmm2,%xmm2
1465 vpaddd %xmm5,%xmm0,%xmm0
1466 vpsllq $2,%xmm5,%xmm5
1467 vpsrlq $26,%xmm2,%xmm6
1468 vpand %xmm7,%xmm2,%xmm2
1469 vpaddd %xmm5,%xmm0,%xmm0
1470 vpaddd %xmm6,%xmm3,%xmm3
1471 vpsrlq $26,%xmm3,%xmm6
1472 vpsrlq $26,%xmm0,%xmm5
1473 vpand %xmm7,%xmm0,%xmm0
1474 vpand %xmm7,%xmm3,%xmm3
1475 vpaddd %xmm5,%xmm1,%xmm1
1476 vpaddd %xmm6,%xmm4,%xmm4
1478 jz .L019square_break
1479 vpunpcklqdq (%esp),%xmm0,%xmm0
1480 vpunpcklqdq 16(%esp),%xmm1,%xmm1
1481 vpunpcklqdq 32(%esp),%xmm2,%xmm2
1482 vpunpcklqdq 48(%esp),%xmm3,%xmm3
1483 vpunpcklqdq 64(%esp),%xmm4,%xmm4
1486 vpsllq $32,%xmm0,%xmm0
1487 vpsllq $32,%xmm1,%xmm1
1488 vpsllq $32,%xmm2,%xmm2
1489 vpsllq $32,%xmm3,%xmm3
1490 vpsllq $32,%xmm4,%xmm4
1491 vpor (%esp),%xmm0,%xmm0
1492 vpor 16(%esp),%xmm1,%xmm1
1493 vpor 32(%esp),%xmm2,%xmm2
1494 vpor 48(%esp),%xmm3,%xmm3
1495 vpor 64(%esp),%xmm4,%xmm4
1496 vpshufd $141,%xmm0,%xmm0
1497 vpshufd $141,%xmm1,%xmm1
1498 vpshufd $141,%xmm2,%xmm2
1499 vpshufd $141,%xmm3,%xmm3
1500 vpshufd $141,%xmm4,%xmm4
1501 vmovdqu %xmm0,(%edi)
1502 vmovdqu %xmm1,16(%edi)
1503 vmovdqu %xmm2,32(%edi)
1504 vmovdqu %xmm3,48(%edi)
1505 vmovdqu %xmm4,64(%edi)
1506 vpslld $2,%xmm1,%xmm6
1507 vpslld $2,%xmm2,%xmm5
1508 vpaddd %xmm1,%xmm6,%xmm6
1509 vpaddd %xmm2,%xmm5,%xmm5
1510 vmovdqu %xmm6,80(%edi)
1511 vmovdqu %xmm5,96(%edi)
1512 vpslld $2,%xmm3,%xmm6
1513 vpslld $2,%xmm4,%xmm5
1514 vpaddd %xmm3,%xmm6,%xmm6
1515 vpaddd %xmm4,%xmm5,%xmm5
1516 vmovdqu %xmm6,112(%edi)
1517 vmovdqu %xmm5,128(%edi)
1521 .size _poly1305_init_avx2,.-_poly1305_init_avx2
1523 .type _poly1305_blocks_avx2,@function
1525 _poly1305_blocks_avx2:
1545 leal .Lconst_sse2-.L022pic_point(%ebx),%ebx
1548 call _poly1305_init_avx2
1573 vmovdqu 48(%edi),%xmm0
1575 vmovdqu 64(%edi),%xmm1
1576 vmovdqu 80(%edi),%xmm2
1577 vmovdqu 96(%edi),%xmm3
1578 vmovdqu 112(%edi),%xmm4
1580 vpermq $64,%ymm0,%ymm0
1581 vpermq $64,%ymm1,%ymm1
1582 vpermq $64,%ymm2,%ymm2
1583 vpermq $64,%ymm3,%ymm3
1584 vpermq $64,%ymm4,%ymm4
1585 vpshufd $200,%ymm0,%ymm0
1586 vpshufd $200,%ymm1,%ymm1
1587 vpshufd $200,%ymm2,%ymm2
1588 vpshufd $200,%ymm3,%ymm3
1589 vpshufd $200,%ymm4,%ymm4
1590 vmovdqa %ymm0,-128(%edx)
1591 vmovdqu 80(%edi),%xmm0
1592 vmovdqa %ymm1,-96(%edx)
1593 vmovdqu 96(%edi),%xmm1
1594 vmovdqa %ymm2,-64(%edx)
1595 vmovdqu 112(%edi),%xmm2
1596 vmovdqa %ymm3,-32(%edx)
1597 vmovdqu 128(%edi),%xmm3
1598 vmovdqa %ymm4,(%edx)
1599 vpermq $64,%ymm0,%ymm0
1600 vpermq $64,%ymm1,%ymm1
1601 vpermq $64,%ymm2,%ymm2
1602 vpermq $64,%ymm3,%ymm3
1603 vpshufd $200,%ymm0,%ymm0
1604 vpshufd $200,%ymm1,%ymm1
1605 vpshufd $200,%ymm2,%ymm2
1606 vpshufd $200,%ymm3,%ymm3
1607 vmovdqa %ymm0,32(%edx)
1608 vmovd -48(%edi),%xmm0
1609 vmovdqa %ymm1,64(%edx)
1610 vmovd -44(%edi),%xmm1
1611 vmovdqa %ymm2,96(%edx)
1612 vmovd -40(%edi),%xmm2
1613 vmovdqa %ymm3,128(%edx)
1614 vmovd -36(%edi),%xmm3
1615 vmovd -32(%edi),%xmm4
1616 vmovdqa 64(%ebx),%ymm7
1623 vmovdqu (%esi),%xmm5
1626 vmovdqu 16(%esi),%xmm6
1628 vinserti128 $1,32(%esi),%ymm5,%ymm5
1640 vpxor %ymm6,%ymm6,%ymm6
1641 leal 32(%ebx,%eax,8),%ebx
1646 vmovdqu (%esi),%xmm5
1647 vmovdqu 16(%esi),%xmm6
1648 vinserti128 $1,32(%esi),%ymm5,%ymm5
1649 vinserti128 $1,48(%esi),%ymm6,%ymm6
1654 vmovdqa %ymm2,64(%esp)
1655 vpsrldq $6,%ymm5,%ymm2
1656 vmovdqa %ymm0,(%esp)
1657 vpsrldq $6,%ymm6,%ymm0
1658 vmovdqa %ymm1,32(%esp)
1659 vpunpckhqdq %ymm6,%ymm5,%ymm1
1660 vpunpcklqdq %ymm6,%ymm5,%ymm5
1661 vpunpcklqdq %ymm0,%ymm2,%ymm2
1662 vpsrlq $30,%ymm2,%ymm0
1663 vpsrlq $4,%ymm2,%ymm2
1664 vpsrlq $26,%ymm5,%ymm6
1665 vpsrlq $40,%ymm1,%ymm1
1666 vpand %ymm7,%ymm2,%ymm2
1667 vpand %ymm7,%ymm5,%ymm5
1668 vpand %ymm7,%ymm6,%ymm6
1669 vpand %ymm7,%ymm0,%ymm0
1670 vpor (%ebx),%ymm1,%ymm1
1671 vpaddq 64(%esp),%ymm2,%ymm2
1672 vpaddq (%esp),%ymm5,%ymm5
1673 vpaddq 32(%esp),%ymm6,%ymm6
1674 vpaddq %ymm3,%ymm0,%ymm0
1675 vpaddq %ymm4,%ymm1,%ymm1
1676 vpmuludq -96(%edx),%ymm2,%ymm3
1677 vmovdqa %ymm6,32(%esp)
1678 vpmuludq -64(%edx),%ymm2,%ymm4
1679 vmovdqa %ymm0,96(%esp)
1680 vpmuludq 96(%edx),%ymm2,%ymm0
1681 vmovdqa %ymm1,128(%esp)
1682 vpmuludq 128(%edx),%ymm2,%ymm1
1683 vpmuludq -128(%edx),%ymm2,%ymm2
1684 vpmuludq -32(%edx),%ymm5,%ymm7
1685 vpaddq %ymm7,%ymm3,%ymm3
1686 vpmuludq (%edx),%ymm5,%ymm6
1687 vpaddq %ymm6,%ymm4,%ymm4
1688 vpmuludq -128(%edx),%ymm5,%ymm7
1689 vpaddq %ymm7,%ymm0,%ymm0
1690 vmovdqa 32(%esp),%ymm7
1691 vpmuludq -96(%edx),%ymm5,%ymm6
1692 vpaddq %ymm6,%ymm1,%ymm1
1693 vpmuludq -64(%edx),%ymm5,%ymm5
1694 vpaddq %ymm5,%ymm2,%ymm2
1695 vpmuludq -64(%edx),%ymm7,%ymm6
1696 vpaddq %ymm6,%ymm3,%ymm3
1697 vpmuludq -32(%edx),%ymm7,%ymm5
1698 vpaddq %ymm5,%ymm4,%ymm4
1699 vpmuludq 128(%edx),%ymm7,%ymm6
1700 vpaddq %ymm6,%ymm0,%ymm0
1701 vmovdqa 96(%esp),%ymm6
1702 vpmuludq -128(%edx),%ymm7,%ymm5
1703 vpaddq %ymm5,%ymm1,%ymm1
1704 vpmuludq -96(%edx),%ymm7,%ymm7
1705 vpaddq %ymm7,%ymm2,%ymm2
1706 vpmuludq -128(%edx),%ymm6,%ymm5
1707 vpaddq %ymm5,%ymm3,%ymm3
1708 vpmuludq -96(%edx),%ymm6,%ymm7
1709 vpaddq %ymm7,%ymm4,%ymm4
1710 vpmuludq 64(%edx),%ymm6,%ymm5
1711 vpaddq %ymm5,%ymm0,%ymm0
1712 vmovdqa 128(%esp),%ymm5
1713 vpmuludq 96(%edx),%ymm6,%ymm7
1714 vpaddq %ymm7,%ymm1,%ymm1
1715 vpmuludq 128(%edx),%ymm6,%ymm6
1716 vpaddq %ymm6,%ymm2,%ymm2
1717 vpmuludq 128(%edx),%ymm5,%ymm7
1718 vpaddq %ymm7,%ymm3,%ymm3
1719 vpmuludq 32(%edx),%ymm5,%ymm6
1720 vpaddq %ymm6,%ymm0,%ymm0
1721 vpmuludq -128(%edx),%ymm5,%ymm7
1722 vpaddq %ymm7,%ymm4,%ymm4
1723 vmovdqa 64(%ebx),%ymm7
1724 vpmuludq 64(%edx),%ymm5,%ymm6
1725 vpaddq %ymm6,%ymm1,%ymm1
1726 vpmuludq 96(%edx),%ymm5,%ymm5
1727 vpaddq %ymm5,%ymm2,%ymm2
1728 vpsrlq $26,%ymm3,%ymm5
1729 vpand %ymm7,%ymm3,%ymm3
1730 vpsrlq $26,%ymm0,%ymm6
1731 vpand %ymm7,%ymm0,%ymm0
1732 vpaddq %ymm5,%ymm4,%ymm4
1733 vpaddq %ymm6,%ymm1,%ymm1
1734 vpsrlq $26,%ymm4,%ymm5
1735 vpand %ymm7,%ymm4,%ymm4
1736 vpsrlq $26,%ymm1,%ymm6
1737 vpand %ymm7,%ymm1,%ymm1
1738 vpaddq %ymm6,%ymm2,%ymm2
1739 vpaddq %ymm5,%ymm0,%ymm0
1740 vpsllq $2,%ymm5,%ymm5
1741 vpsrlq $26,%ymm2,%ymm6
1742 vpand %ymm7,%ymm2,%ymm2
1743 vpaddq %ymm5,%ymm0,%ymm0
1744 vpaddq %ymm6,%ymm3,%ymm3
1745 vpsrlq $26,%ymm3,%ymm6
1746 vpsrlq $26,%ymm0,%ymm5
1747 vpand %ymm7,%ymm0,%ymm0
1748 vpand %ymm7,%ymm3,%ymm3
1749 vpaddq %ymm5,%ymm1,%ymm1
1750 vpaddq %ymm6,%ymm4,%ymm4
1751 vmovdqu (%esi),%xmm5
1752 vmovdqu 16(%esi),%xmm6
1753 vinserti128 $1,32(%esi),%ymm5,%ymm5
1754 vinserti128 $1,48(%esi),%ymm6,%ymm6
1759 vmovdqa %ymm2,64(%esp)
1760 vpsrldq $6,%ymm5,%ymm2
1761 vmovdqa %ymm0,(%esp)
1762 vpsrldq $6,%ymm6,%ymm0
1763 vmovdqa %ymm1,32(%esp)
1764 vpunpckhqdq %ymm6,%ymm5,%ymm1
1765 vpunpcklqdq %ymm6,%ymm5,%ymm5
1766 vpunpcklqdq %ymm0,%ymm2,%ymm2
1767 vpsrlq $30,%ymm2,%ymm0
1768 vpsrlq $4,%ymm2,%ymm2
1769 vpsrlq $26,%ymm5,%ymm6
1770 vpsrlq $40,%ymm1,%ymm1
1771 vpand %ymm7,%ymm2,%ymm2
1772 vpand %ymm7,%ymm5,%ymm5
1773 vpand %ymm7,%ymm6,%ymm6
1774 vpand %ymm7,%ymm0,%ymm0
1775 vpor (%ebx),%ymm1,%ymm1
1777 vpaddq 64(%esp),%ymm2,%ymm2
1778 vpaddq (%esp),%ymm5,%ymm5
1779 vpaddq 32(%esp),%ymm6,%ymm6
1780 vpaddq %ymm3,%ymm0,%ymm0
1781 vpaddq %ymm4,%ymm1,%ymm1
1782 vpmuludq -92(%edx),%ymm2,%ymm3
1783 vmovdqa %ymm6,32(%esp)
1784 vpmuludq -60(%edx),%ymm2,%ymm4
1785 vmovdqa %ymm0,96(%esp)
1786 vpmuludq 100(%edx),%ymm2,%ymm0
1787 vmovdqa %ymm1,128(%esp)
1788 vpmuludq 132(%edx),%ymm2,%ymm1
1789 vpmuludq -124(%edx),%ymm2,%ymm2
1790 vpmuludq -28(%edx),%ymm5,%ymm7
1791 vpaddq %ymm7,%ymm3,%ymm3
1792 vpmuludq 4(%edx),%ymm5,%ymm6
1793 vpaddq %ymm6,%ymm4,%ymm4
1794 vpmuludq -124(%edx),%ymm5,%ymm7
1795 vpaddq %ymm7,%ymm0,%ymm0
1796 vmovdqa 32(%esp),%ymm7
1797 vpmuludq -92(%edx),%ymm5,%ymm6
1798 vpaddq %ymm6,%ymm1,%ymm1
1799 vpmuludq -60(%edx),%ymm5,%ymm5
1800 vpaddq %ymm5,%ymm2,%ymm2
1801 vpmuludq -60(%edx),%ymm7,%ymm6
1802 vpaddq %ymm6,%ymm3,%ymm3
1803 vpmuludq -28(%edx),%ymm7,%ymm5
1804 vpaddq %ymm5,%ymm4,%ymm4
1805 vpmuludq 132(%edx),%ymm7,%ymm6
1806 vpaddq %ymm6,%ymm0,%ymm0
1807 vmovdqa 96(%esp),%ymm6
1808 vpmuludq -124(%edx),%ymm7,%ymm5
1809 vpaddq %ymm5,%ymm1,%ymm1
1810 vpmuludq -92(%edx),%ymm7,%ymm7
1811 vpaddq %ymm7,%ymm2,%ymm2
1812 vpmuludq -124(%edx),%ymm6,%ymm5
1813 vpaddq %ymm5,%ymm3,%ymm3
1814 vpmuludq -92(%edx),%ymm6,%ymm7
1815 vpaddq %ymm7,%ymm4,%ymm4
1816 vpmuludq 68(%edx),%ymm6,%ymm5
1817 vpaddq %ymm5,%ymm0,%ymm0
1818 vmovdqa 128(%esp),%ymm5
1819 vpmuludq 100(%edx),%ymm6,%ymm7
1820 vpaddq %ymm7,%ymm1,%ymm1
1821 vpmuludq 132(%edx),%ymm6,%ymm6
1822 vpaddq %ymm6,%ymm2,%ymm2
1823 vpmuludq 132(%edx),%ymm5,%ymm7
1824 vpaddq %ymm7,%ymm3,%ymm3
1825 vpmuludq 36(%edx),%ymm5,%ymm6
1826 vpaddq %ymm6,%ymm0,%ymm0
1827 vpmuludq -124(%edx),%ymm5,%ymm7
1828 vpaddq %ymm7,%ymm4,%ymm4
1829 vmovdqa 64(%ebx),%ymm7
1830 vpmuludq 68(%edx),%ymm5,%ymm6
1831 vpaddq %ymm6,%ymm1,%ymm1
1832 vpmuludq 100(%edx),%ymm5,%ymm5
1833 vpaddq %ymm5,%ymm2,%ymm2
1834 vpsrldq $8,%ymm4,%ymm5
1835 vpsrldq $8,%ymm3,%ymm6
1836 vpaddq %ymm5,%ymm4,%ymm4
1837 vpsrldq $8,%ymm0,%ymm5
1838 vpaddq %ymm6,%ymm3,%ymm3
1839 vpsrldq $8,%ymm1,%ymm6
1840 vpaddq %ymm5,%ymm0,%ymm0
1841 vpsrldq $8,%ymm2,%ymm5
1842 vpaddq %ymm6,%ymm1,%ymm1
1843 vpermq $2,%ymm4,%ymm6
1844 vpaddq %ymm5,%ymm2,%ymm2
1845 vpermq $2,%ymm3,%ymm5
1846 vpaddq %ymm6,%ymm4,%ymm4
1847 vpermq $2,%ymm0,%ymm6
1848 vpaddq %ymm5,%ymm3,%ymm3
1849 vpermq $2,%ymm1,%ymm5
1850 vpaddq %ymm6,%ymm0,%ymm0
1851 vpermq $2,%ymm2,%ymm6
1852 vpaddq %ymm5,%ymm1,%ymm1
1853 vpaddq %ymm6,%ymm2,%ymm2
1854 vpsrlq $26,%ymm3,%ymm5
1855 vpand %ymm7,%ymm3,%ymm3
1856 vpsrlq $26,%ymm0,%ymm6
1857 vpand %ymm7,%ymm0,%ymm0
1858 vpaddq %ymm5,%ymm4,%ymm4
1859 vpaddq %ymm6,%ymm1,%ymm1
1860 vpsrlq $26,%ymm4,%ymm5
1861 vpand %ymm7,%ymm4,%ymm4
1862 vpsrlq $26,%ymm1,%ymm6
1863 vpand %ymm7,%ymm1,%ymm1
1864 vpaddq %ymm6,%ymm2,%ymm2
1865 vpaddq %ymm5,%ymm0,%ymm0
1866 vpsllq $2,%ymm5,%ymm5
1867 vpsrlq $26,%ymm2,%ymm6
1868 vpand %ymm7,%ymm2,%ymm2
1869 vpaddq %ymm5,%ymm0,%ymm0
1870 vpaddq %ymm6,%ymm3,%ymm3
1871 vpsrlq $26,%ymm3,%ymm6
1872 vpsrlq $26,%ymm0,%ymm5
1873 vpand %ymm7,%ymm0,%ymm0
1874 vpand %ymm7,%ymm3,%ymm3
1875 vpaddq %ymm5,%ymm1,%ymm1
1876 vpaddq %ymm6,%ymm4,%ymm4
1879 vpshufd $252,%xmm0,%xmm0
1881 vpshufd $252,%xmm1,%xmm1
1882 vpshufd $252,%xmm2,%xmm2
1883 vpshufd $252,%xmm3,%xmm3
1884 vpshufd $252,%xmm4,%xmm4
1888 vmovd %xmm0,-48(%edi)
1889 vmovd %xmm1,-44(%edi)
1890 vmovd %xmm2,-40(%edi)
1891 vmovd %xmm3,-36(%edi)
1892 vmovd %xmm4,-32(%edi)
1901 .size _poly1305_blocks_avx2,.-_poly1305_blocks_avx2
1904 .long 16777216,0,16777216,0,16777216,0,16777216,0
1905 .long 0,0,0,0,0,0,0,0
1906 .long 67108863,0,67108863,0,67108863,0,67108863,0
1907 .long 268435455,268435452,268435452,268435452
1908 .byte 80,111,108,121,49,51,48,53,32,102,111,114,32,120,56,54
1909 .byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
1910 .byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
1913 .comm OPENSSL_ia32cap_P,16,4
1917 .globl poly1305_init
1918 .type poly1305_init,@function
1921 .L_poly1305_init_begin:
1941 leal poly1305_blocks-.L001pic_point(%ebx),%eax
1942 leal poly1305_emit-.L001pic_point(%ebx),%edx
1943 leal OPENSSL_ia32cap_P,%edi
1948 leal _poly1305_blocks_sse2-.L001pic_point(%ebx),%eax
1949 leal _poly1305_emit_sse2-.L001pic_point(%ebx),%edx
1953 leal _poly1305_blocks_avx2-.L001pic_point(%ebx),%eax
1962 andl $268435455,%eax
1963 andl $268435452,%ebx
1964 andl $268435452,%ecx
1965 andl $268435452,%edx
1977 .size poly1305_init,.-.L_poly1305_init_begin
1978 .globl poly1305_blocks
1979 .type poly1305_blocks,@function
1982 .L_poly1305_blocks_begin:
1996 leal (%esi,%ecx,1),%ebp
2124 leal (%edx,%edx,4),%edx
2145 .size poly1305_blocks,.-.L_poly1305_blocks_begin
2146 .globl poly1305_emit
2147 .type poly1305_emit,@function
2150 .L_poly1305_emit_begin:
2205 .size poly1305_emit,.-.L_poly1305_emit_begin
2207 .type _poly1305_init_sse2,@function
2209 _poly1305_init_sse2:
2210 movdqu 24(%edi),%xmm4
2233 movdqa %xmm1,16(%esp)
2234 movdqa %xmm2,32(%esp)
2235 movdqa %xmm3,48(%esp)
2236 movdqa %xmm4,64(%esp)
2243 movdqa %xmm6,80(%esp)
2244 movdqa %xmm5,96(%esp)
2251 movdqa %xmm6,112(%esp)
2252 movdqa %xmm5,128(%esp)
2253 pshufd $68,%xmm0,%xmm6
2255 pshufd $68,%xmm1,%xmm1
2256 pshufd $68,%xmm2,%xmm2
2257 pshufd $68,%xmm3,%xmm3
2258 pshufd $68,%xmm4,%xmm4
2260 movdqa %xmm1,16(%edx)
2261 movdqa %xmm2,32(%edx)
2262 movdqa %xmm3,48(%edx)
2263 movdqa %xmm4,64(%edx)
2270 pmuludq 48(%edx),%xmm5
2272 pmuludq 32(%edx),%xmm6
2275 pmuludq 16(%edx),%xmm7
2277 movdqa 80(%esp),%xmm6
2278 pmuludq (%edx),%xmm5
2280 pmuludq 64(%edx),%xmm6
2281 movdqa 32(%esp),%xmm7
2284 pmuludq 32(%edx),%xmm7
2287 pmuludq 16(%edx),%xmm5
2289 movdqa 96(%esp),%xmm7
2290 pmuludq (%edx),%xmm6
2293 pmuludq 64(%edx),%xmm7
2295 pmuludq 48(%edx),%xmm5
2296 movdqa 48(%esp),%xmm6
2299 pmuludq 16(%edx),%xmm6
2301 movdqa 112(%esp),%xmm5
2302 pmuludq (%edx),%xmm7
2305 pmuludq 64(%edx),%xmm5
2308 pmuludq 48(%edx),%xmm6
2310 pmuludq 32(%edx),%xmm7
2311 movdqa 64(%esp),%xmm5
2313 movdqa 128(%esp),%xmm6
2314 pmuludq (%edx),%xmm5
2317 pmuludq 64(%edx),%xmm6
2320 pmuludq 16(%edx),%xmm7
2323 pmuludq 32(%edx),%xmm5
2325 pmuludq 48(%edx),%xmm6
2326 movdqa 64(%ebx),%xmm7
2360 jz .L006square_break
2361 punpcklqdq (%esp),%xmm0
2362 punpcklqdq 16(%esp),%xmm1
2363 punpcklqdq 32(%esp),%xmm2
2364 punpcklqdq 48(%esp),%xmm3
2365 punpcklqdq 64(%esp),%xmm4
2378 pshufd $141,%xmm0,%xmm0
2379 pshufd $141,%xmm1,%xmm1
2380 pshufd $141,%xmm2,%xmm2
2381 pshufd $141,%xmm3,%xmm3
2382 pshufd $141,%xmm4,%xmm4
2384 movdqu %xmm1,16(%edi)
2385 movdqu %xmm2,32(%edi)
2386 movdqu %xmm3,48(%edi)
2387 movdqu %xmm4,64(%edi)
2394 movdqu %xmm6,80(%edi)
2395 movdqu %xmm5,96(%edi)
2402 movdqu %xmm6,112(%edi)
2403 movdqu %xmm5,128(%edi)
2407 .size _poly1305_init_sse2,.-_poly1305_init_sse2
2409 .type _poly1305_blocks_sse2,@function
2411 _poly1305_blocks_sse2:
2431 leal .Lconst_sse2-.L009pic_point(%ebx),%ebx
2434 call _poly1305_init_sse2
2462 movdqa 64(%ebx),%xmm7
2496 movdqa %xmm1,16(%esp)
2497 movdqa %xmm2,32(%esp)
2498 movdqa %xmm3,48(%esp)
2499 movdqa %xmm4,64(%esp)
2507 pmuludq 48(%esp),%xmm5
2509 pmuludq 32(%esp),%xmm6
2512 pmuludq 16(%esp),%xmm7
2515 pmuludq (%esp),%xmm5
2517 pmuludq 64(%esp),%xmm6
2521 pmuludq 32(%esp),%xmm7
2524 pmuludq 16(%esp),%xmm5
2526 movd 108(%edi),%xmm7
2527 pmuludq (%esp),%xmm6
2530 pmuludq 64(%esp),%xmm7
2532 pmuludq 48(%esp),%xmm5
2536 pmuludq 16(%esp),%xmm6
2538 movd 124(%edi),%xmm5
2539 pmuludq (%esp),%xmm7
2542 pmuludq 64(%esp),%xmm5
2545 pmuludq 48(%esp),%xmm6
2547 pmuludq 32(%esp),%xmm7
2550 movd 140(%edi),%xmm6
2551 pmuludq (%esp),%xmm5
2554 pmuludq 64(%esp),%xmm6
2557 pmuludq 16(%esp),%xmm7
2560 pmuludq 32(%esp),%xmm5
2562 pmuludq 48(%esp),%xmm6
2563 movdqa 64(%ebx),%xmm7
2603 pshufd $68,%xmm5,%xmm6
2605 pshufd $238,%xmm5,%xmm5
2608 movdqu 16(%edi),%xmm6
2609 movdqa %xmm5,-144(%edx)
2610 pshufd $68,%xmm6,%xmm5
2611 pshufd $238,%xmm6,%xmm6
2612 movdqa %xmm5,16(%edx)
2613 movdqu 32(%edi),%xmm5
2614 movdqa %xmm6,-128(%edx)
2615 pshufd $68,%xmm5,%xmm6
2616 pshufd $238,%xmm5,%xmm5
2617 movdqa %xmm6,32(%edx)
2618 movdqu 48(%edi),%xmm6
2619 movdqa %xmm5,-112(%edx)
2620 pshufd $68,%xmm6,%xmm5
2621 pshufd $238,%xmm6,%xmm6
2622 movdqa %xmm5,48(%edx)
2623 movdqu 64(%edi),%xmm5
2624 movdqa %xmm6,-96(%edx)
2625 pshufd $68,%xmm5,%xmm6
2626 pshufd $238,%xmm5,%xmm5
2627 movdqa %xmm6,64(%edx)
2628 movdqu 80(%edi),%xmm6
2629 movdqa %xmm5,-80(%edx)
2630 pshufd $68,%xmm6,%xmm5
2631 pshufd $238,%xmm6,%xmm6
2632 movdqa %xmm5,80(%edx)
2633 movdqu 96(%edi),%xmm5
2634 movdqa %xmm6,-64(%edx)
2635 pshufd $68,%xmm5,%xmm6
2636 pshufd $238,%xmm5,%xmm5
2637 movdqa %xmm6,96(%edx)
2638 movdqu 112(%edi),%xmm6
2639 movdqa %xmm5,-48(%edx)
2640 pshufd $68,%xmm6,%xmm5
2641 pshufd $238,%xmm6,%xmm6
2642 movdqa %xmm5,112(%edx)
2643 movdqu 128(%edi),%xmm5
2644 movdqa %xmm6,-32(%edx)
2645 pshufd $68,%xmm5,%xmm6
2646 pshufd $238,%xmm5,%xmm5
2647 movdqa %xmm6,128(%edx)
2648 movdqa %xmm5,-16(%edx)
2649 movdqu 32(%esi),%xmm5
2650 movdqu 48(%esi),%xmm6
2652 movdqa %xmm2,112(%esp)
2653 movdqa %xmm3,128(%esp)
2654 movdqa %xmm4,144(%esp)
2660 punpcklqdq %xmm3,%xmm2
2661 punpckhqdq %xmm6,%xmm4
2662 punpcklqdq %xmm6,%xmm5
2674 movdqa %xmm0,80(%esp)
2675 movdqa %xmm1,96(%esp)
2680 movdqa -144(%edx),%xmm7
2681 movdqa %xmm6,16(%eax)
2682 movdqa %xmm2,32(%eax)
2683 movdqa %xmm3,48(%eax)
2684 movdqa %xmm4,64(%eax)
2692 pmuludq -16(%edx),%xmm0
2694 pmuludq -128(%edx),%xmm1
2697 pmuludq -112(%edx),%xmm7
2700 pmuludq -96(%edx),%xmm5
2702 movdqa 16(%eax),%xmm7
2703 pmuludq -80(%edx),%xmm6
2706 pmuludq -128(%edx),%xmm7
2709 pmuludq -112(%edx),%xmm5
2711 movdqa 32(%eax),%xmm7
2712 pmuludq -96(%edx),%xmm6
2715 pmuludq -32(%edx),%xmm7
2718 pmuludq -16(%edx),%xmm5
2721 pmuludq -128(%edx),%xmm6
2723 movdqa 48(%eax),%xmm5
2724 pmuludq -112(%edx),%xmm7
2727 pmuludq -48(%edx),%xmm5
2730 pmuludq -32(%edx),%xmm6
2733 pmuludq -16(%edx),%xmm7
2735 movdqa 64(%eax),%xmm6
2736 pmuludq -128(%edx),%xmm5
2739 pmuludq -16(%edx),%xmm6
2742 pmuludq -64(%edx),%xmm7
2745 pmuludq -48(%edx),%xmm5
2747 movdqa 64(%ebx),%xmm7
2748 pmuludq -32(%edx),%xmm6
2751 movdqu -32(%esi),%xmm5
2752 movdqu -16(%esi),%xmm6
2754 movdqa %xmm2,32(%esp)
2755 movdqa %xmm3,48(%esp)
2756 movdqa %xmm4,64(%esp)
2762 punpcklqdq %xmm3,%xmm2
2763 punpckhqdq %xmm6,%xmm4
2764 punpcklqdq %xmm6,%xmm5
2778 paddd 80(%esp),%xmm5
2779 paddd 96(%esp),%xmm6
2780 paddd 112(%esp),%xmm2
2781 paddd 128(%esp),%xmm3
2782 paddd 144(%esp),%xmm4
2786 movdqa %xmm1,16(%esp)
2787 movdqa %xmm6,16(%eax)
2788 movdqa %xmm2,32(%eax)
2789 movdqa %xmm3,48(%eax)
2790 movdqa %xmm4,64(%eax)
2799 paddq 16(%esp),%xmm6
2800 paddq 32(%esp),%xmm2
2801 paddq 48(%esp),%xmm3
2802 paddq 64(%esp),%xmm4
2803 pmuludq 128(%edx),%xmm0
2805 pmuludq 16(%edx),%xmm1
2808 pmuludq 32(%edx),%xmm7
2811 pmuludq 48(%edx),%xmm5
2813 movdqa 16(%eax),%xmm7
2814 pmuludq 64(%edx),%xmm6
2817 pmuludq 16(%edx),%xmm7
2820 pmuludq 32(%edx),%xmm5
2822 movdqa 32(%eax),%xmm7
2823 pmuludq 48(%edx),%xmm6
2826 pmuludq 112(%edx),%xmm7
2829 pmuludq 128(%edx),%xmm5
2832 pmuludq 16(%edx),%xmm6
2834 movdqa 48(%eax),%xmm5
2835 pmuludq 32(%edx),%xmm7
2838 pmuludq 96(%edx),%xmm5
2841 pmuludq 112(%edx),%xmm6
2844 pmuludq 128(%edx),%xmm7
2846 movdqa 64(%eax),%xmm6
2847 pmuludq 16(%edx),%xmm5
2850 pmuludq 128(%edx),%xmm6
2853 pmuludq 80(%edx),%xmm7
2856 pmuludq 96(%edx),%xmm5
2858 movdqa 64(%ebx),%xmm7
2859 pmuludq 112(%edx),%xmm6
2892 movdqu 32(%esi),%xmm5
2893 movdqu 48(%esi),%xmm6
2895 movdqa %xmm2,112(%esp)
2896 movdqa %xmm3,128(%esp)
2897 movdqa %xmm4,144(%esp)
2903 punpcklqdq %xmm3,%xmm2
2904 punpckhqdq %xmm6,%xmm4
2905 punpcklqdq %xmm6,%xmm5
2917 movdqa %xmm0,80(%esp)
2918 movdqa %xmm1,96(%esp)
2921 pshufd $16,-144(%edx),%xmm7
2926 paddd 112(%esp),%xmm2
2927 paddd 128(%esp),%xmm3
2928 paddd 144(%esp),%xmm4
2931 movdqa %xmm6,16(%eax)
2932 movdqa %xmm2,32(%eax)
2933 movdqa %xmm3,48(%eax)
2934 movdqa %xmm4,64(%eax)
2939 pshufd $16,-128(%edx),%xmm5
2944 pmuludq 48(%eax),%xmm5
2946 pmuludq 32(%eax),%xmm6
2949 pmuludq 16(%eax),%xmm7
2951 pshufd $16,-64(%edx),%xmm6
2952 pmuludq (%eax),%xmm5
2954 pmuludq 64(%eax),%xmm6
2955 pshufd $16,-112(%edx),%xmm7
2958 pmuludq 32(%eax),%xmm7
2961 pmuludq 16(%eax),%xmm5
2963 pshufd $16,-48(%edx),%xmm7
2964 pmuludq (%eax),%xmm6
2967 pmuludq 64(%eax),%xmm7
2969 pmuludq 48(%eax),%xmm5
2970 pshufd $16,-96(%edx),%xmm6
2973 pmuludq 16(%eax),%xmm6
2975 pshufd $16,-32(%edx),%xmm5
2976 pmuludq (%eax),%xmm7
2979 pmuludq 64(%eax),%xmm5
2982 pmuludq 48(%eax),%xmm6
2984 pmuludq 32(%eax),%xmm7
2985 pshufd $16,-80(%edx),%xmm5
2987 pshufd $16,-16(%edx),%xmm6
2988 pmuludq (%eax),%xmm5
2991 pmuludq 64(%eax),%xmm6
2994 pmuludq 16(%eax),%xmm7
2997 pmuludq 32(%eax),%xmm5
2999 pmuludq 48(%eax),%xmm6
3000 movdqa 64(%ebx),%xmm7
3004 movdqu -32(%esi),%xmm5
3005 movdqu -16(%esi),%xmm6
3007 movdqa %xmm2,32(%esp)
3008 movdqa %xmm3,48(%esp)
3009 movdqa %xmm4,64(%esp)
3015 punpcklqdq %xmm3,%xmm2
3016 punpckhqdq %xmm6,%xmm4
3017 punpcklqdq %xmm6,%xmm5
3029 pshufd $16,(%edx),%xmm7
3030 paddd 80(%esp),%xmm5
3031 paddd 96(%esp),%xmm6
3032 paddd 112(%esp),%xmm2
3033 paddd 128(%esp),%xmm3
3034 paddd 144(%esp),%xmm4
3037 movdqa %xmm6,16(%esp)
3045 paddq 32(%esp),%xmm2
3046 movdqa %xmm5,32(%esp)
3047 pshufd $16,16(%edx),%xmm5
3048 paddq 48(%esp),%xmm3
3049 movdqa %xmm6,48(%esp)
3052 paddq 64(%esp),%xmm4
3053 movdqa %xmm6,64(%esp)
3055 pmuludq 48(%esp),%xmm5
3057 pmuludq 32(%esp),%xmm6
3060 pmuludq 16(%esp),%xmm7
3062 pshufd $16,80(%edx),%xmm6
3063 pmuludq (%esp),%xmm5
3065 pmuludq 64(%esp),%xmm6
3066 pshufd $16,32(%edx),%xmm7
3069 pmuludq 32(%esp),%xmm7
3072 pmuludq 16(%esp),%xmm5
3074 pshufd $16,96(%edx),%xmm7
3075 pmuludq (%esp),%xmm6
3078 pmuludq 64(%esp),%xmm7
3080 pmuludq 48(%esp),%xmm5
3081 pshufd $16,48(%edx),%xmm6
3084 pmuludq 16(%esp),%xmm6
3086 pshufd $16,112(%edx),%xmm5
3087 pmuludq (%esp),%xmm7
3090 pmuludq 64(%esp),%xmm5
3093 pmuludq 48(%esp),%xmm6
3095 pmuludq 32(%esp),%xmm7
3096 pshufd $16,64(%edx),%xmm5
3098 pshufd $16,128(%edx),%xmm6
3099 pmuludq (%esp),%xmm5
3102 pmuludq 64(%esp),%xmm6
3105 pmuludq 16(%esp),%xmm7
3108 pmuludq 32(%esp),%xmm5
3110 pmuludq 48(%esp),%xmm6
3111 movdqa 64(%ebx),%xmm7
3115 pshufd $78,%xmm4,%xmm6
3116 pshufd $78,%xmm3,%xmm5
3119 pshufd $78,%xmm0,%xmm6
3120 pshufd $78,%xmm1,%xmm5
3123 pshufd $78,%xmm2,%xmm6
3156 movd %xmm0,-48(%edi)
3157 movd %xmm1,-44(%edi)
3158 movd %xmm2,-40(%edi)
3159 movd %xmm3,-36(%edi)
3160 movd %xmm4,-32(%edi)
3168 .size _poly1305_blocks_sse2,.-_poly1305_blocks_sse2
3170 .type _poly1305_emit_sse2,@function
3172 _poly1305_emit_sse2:
3208 leal (%edi,%edi,4),%ebp
3261 .size _poly1305_emit_sse2,.-_poly1305_emit_sse2
3263 .type _poly1305_init_avx2,@function
3265 _poly1305_init_avx2:
3266 vmovdqu 24(%edi),%xmm4
3271 vmovdqa 64(%ebx),%xmm7
3272 vpand %xmm7,%xmm4,%xmm0
3273 vpsrlq $26,%xmm4,%xmm1
3274 vpsrldq $6,%xmm4,%xmm3
3275 vpand %xmm7,%xmm1,%xmm1
3276 vpsrlq $4,%xmm3,%xmm2
3277 vpsrlq $30,%xmm3,%xmm3
3278 vpand %xmm7,%xmm2,%xmm2
3279 vpand %xmm7,%xmm3,%xmm3
3280 vpsrldq $13,%xmm4,%xmm4
3284 vmovdqa %xmm0,(%esp)
3285 vmovdqa %xmm1,16(%esp)
3286 vmovdqa %xmm2,32(%esp)
3287 vmovdqa %xmm3,48(%esp)
3288 vmovdqa %xmm4,64(%esp)
3289 vpslld $2,%xmm1,%xmm6
3290 vpslld $2,%xmm2,%xmm5
3291 vpaddd %xmm1,%xmm6,%xmm6
3292 vpaddd %xmm2,%xmm5,%xmm5
3293 vmovdqa %xmm6,80(%esp)
3294 vmovdqa %xmm5,96(%esp)
3295 vpslld $2,%xmm3,%xmm6
3296 vpslld $2,%xmm4,%xmm5
3297 vpaddd %xmm3,%xmm6,%xmm6
3298 vpaddd %xmm4,%xmm5,%xmm5
3299 vmovdqa %xmm6,112(%esp)
3300 vmovdqa %xmm5,128(%esp)
3301 vpshufd $68,%xmm0,%xmm5
3303 vpshufd $68,%xmm1,%xmm1
3304 vpshufd $68,%xmm2,%xmm2
3305 vpshufd $68,%xmm3,%xmm3
3306 vpshufd $68,%xmm4,%xmm4
3307 vmovdqa %xmm5,(%edx)
3308 vmovdqa %xmm1,16(%edx)
3309 vmovdqa %xmm2,32(%edx)
3310 vmovdqa %xmm3,48(%edx)
3311 vmovdqa %xmm4,64(%edx)
3312 vpmuludq %xmm0,%xmm4,%xmm4
3313 vpmuludq %xmm0,%xmm3,%xmm3
3314 vpmuludq %xmm0,%xmm2,%xmm2
3315 vpmuludq %xmm0,%xmm1,%xmm1
3316 vpmuludq %xmm0,%xmm5,%xmm0
3317 vpmuludq 48(%edx),%xmm6,%xmm5
3318 vpaddq %xmm5,%xmm4,%xmm4
3319 vpmuludq 32(%edx),%xmm6,%xmm7
3320 vpaddq %xmm7,%xmm3,%xmm3
3321 vpmuludq 16(%edx),%xmm6,%xmm5
3322 vpaddq %xmm5,%xmm2,%xmm2
3323 vmovdqa 80(%esp),%xmm7
3324 vpmuludq (%edx),%xmm6,%xmm6
3325 vpaddq %xmm6,%xmm1,%xmm1
3326 vmovdqa 32(%esp),%xmm5
3327 vpmuludq 64(%edx),%xmm7,%xmm7
3328 vpaddq %xmm7,%xmm0,%xmm0
3329 vpmuludq 32(%edx),%xmm5,%xmm6
3330 vpaddq %xmm6,%xmm4,%xmm4
3331 vpmuludq 16(%edx),%xmm5,%xmm7
3332 vpaddq %xmm7,%xmm3,%xmm3
3333 vmovdqa 96(%esp),%xmm6
3334 vpmuludq (%edx),%xmm5,%xmm5
3335 vpaddq %xmm5,%xmm2,%xmm2
3336 vpmuludq 64(%edx),%xmm6,%xmm7
3337 vpaddq %xmm7,%xmm1,%xmm1
3338 vmovdqa 48(%esp),%xmm5
3339 vpmuludq 48(%edx),%xmm6,%xmm6
3340 vpaddq %xmm6,%xmm0,%xmm0
3341 vpmuludq 16(%edx),%xmm5,%xmm7
3342 vpaddq %xmm7,%xmm4,%xmm4
3343 vmovdqa 112(%esp),%xmm6
3344 vpmuludq (%edx),%xmm5,%xmm5
3345 vpaddq %xmm5,%xmm3,%xmm3
3346 vpmuludq 64(%edx),%xmm6,%xmm7
3347 vpaddq %xmm7,%xmm2,%xmm2
3348 vpmuludq 48(%edx),%xmm6,%xmm5
3349 vpaddq %xmm5,%xmm1,%xmm1
3350 vmovdqa 64(%esp),%xmm7
3351 vpmuludq 32(%edx),%xmm6,%xmm6
3352 vpaddq %xmm6,%xmm0,%xmm0
3353 vmovdqa 128(%esp),%xmm5
3354 vpmuludq (%edx),%xmm7,%xmm7
3355 vpaddq %xmm7,%xmm4,%xmm4
3356 vpmuludq 64(%edx),%xmm5,%xmm6
3357 vpaddq %xmm6,%xmm3,%xmm3
3358 vpmuludq 16(%edx),%xmm5,%xmm7
3359 vpaddq %xmm7,%xmm0,%xmm0
3360 vpmuludq 32(%edx),%xmm5,%xmm6
3361 vpaddq %xmm6,%xmm1,%xmm1
3362 vmovdqa 64(%ebx),%xmm7
3363 vpmuludq 48(%edx),%xmm5,%xmm5
3364 vpaddq %xmm5,%xmm2,%xmm2
3365 vpsrlq $26,%xmm3,%xmm5
3366 vpand %xmm7,%xmm3,%xmm3
3367 vpsrlq $26,%xmm0,%xmm6
3368 vpand %xmm7,%xmm0,%xmm0
3369 vpaddq %xmm5,%xmm4,%xmm4
3370 vpaddq %xmm6,%xmm1,%xmm1
3371 vpsrlq $26,%xmm4,%xmm5
3372 vpand %xmm7,%xmm4,%xmm4
3373 vpsrlq $26,%xmm1,%xmm6
3374 vpand %xmm7,%xmm1,%xmm1
3375 vpaddq %xmm6,%xmm2,%xmm2
3376 vpaddd %xmm5,%xmm0,%xmm0
3377 vpsllq $2,%xmm5,%xmm5
3378 vpsrlq $26,%xmm2,%xmm6
3379 vpand %xmm7,%xmm2,%xmm2
3380 vpaddd %xmm5,%xmm0,%xmm0
3381 vpaddd %xmm6,%xmm3,%xmm3
3382 vpsrlq $26,%xmm3,%xmm6
3383 vpsrlq $26,%xmm0,%xmm5
3384 vpand %xmm7,%xmm0,%xmm0
3385 vpand %xmm7,%xmm3,%xmm3
3386 vpaddd %xmm5,%xmm1,%xmm1
3387 vpaddd %xmm6,%xmm4,%xmm4
3389 jz .L019square_break
3390 vpunpcklqdq (%esp),%xmm0,%xmm0
3391 vpunpcklqdq 16(%esp),%xmm1,%xmm1
3392 vpunpcklqdq 32(%esp),%xmm2,%xmm2
3393 vpunpcklqdq 48(%esp),%xmm3,%xmm3
3394 vpunpcklqdq 64(%esp),%xmm4,%xmm4
3397 vpsllq $32,%xmm0,%xmm0
3398 vpsllq $32,%xmm1,%xmm1
3399 vpsllq $32,%xmm2,%xmm2
3400 vpsllq $32,%xmm3,%xmm3
3401 vpsllq $32,%xmm4,%xmm4
3402 vpor (%esp),%xmm0,%xmm0
3403 vpor 16(%esp),%xmm1,%xmm1
3404 vpor 32(%esp),%xmm2,%xmm2
3405 vpor 48(%esp),%xmm3,%xmm3
3406 vpor 64(%esp),%xmm4,%xmm4
3407 vpshufd $141,%xmm0,%xmm0
3408 vpshufd $141,%xmm1,%xmm1
3409 vpshufd $141,%xmm2,%xmm2
3410 vpshufd $141,%xmm3,%xmm3
3411 vpshufd $141,%xmm4,%xmm4
3412 vmovdqu %xmm0,(%edi)
3413 vmovdqu %xmm1,16(%edi)
3414 vmovdqu %xmm2,32(%edi)
3415 vmovdqu %xmm3,48(%edi)
3416 vmovdqu %xmm4,64(%edi)
3417 vpslld $2,%xmm1,%xmm6
3418 vpslld $2,%xmm2,%xmm5
3419 vpaddd %xmm1,%xmm6,%xmm6
3420 vpaddd %xmm2,%xmm5,%xmm5
3421 vmovdqu %xmm6,80(%edi)
3422 vmovdqu %xmm5,96(%edi)
3423 vpslld $2,%xmm3,%xmm6
3424 vpslld $2,%xmm4,%xmm5
3425 vpaddd %xmm3,%xmm6,%xmm6
3426 vpaddd %xmm4,%xmm5,%xmm5
3427 vmovdqu %xmm6,112(%edi)
3428 vmovdqu %xmm5,128(%edi)
3432 .size _poly1305_init_avx2,.-_poly1305_init_avx2
3434 .type _poly1305_blocks_avx2,@function
3436 _poly1305_blocks_avx2:
3456 leal .Lconst_sse2-.L022pic_point(%ebx),%ebx
3459 call _poly1305_init_avx2
3484 vmovdqu 48(%edi),%xmm0
3486 vmovdqu 64(%edi),%xmm1
3487 vmovdqu 80(%edi),%xmm2
3488 vmovdqu 96(%edi),%xmm3
3489 vmovdqu 112(%edi),%xmm4
3491 vpermq $64,%ymm0,%ymm0
3492 vpermq $64,%ymm1,%ymm1
3493 vpermq $64,%ymm2,%ymm2
3494 vpermq $64,%ymm3,%ymm3
3495 vpermq $64,%ymm4,%ymm4
3496 vpshufd $200,%ymm0,%ymm0
3497 vpshufd $200,%ymm1,%ymm1
3498 vpshufd $200,%ymm2,%ymm2
3499 vpshufd $200,%ymm3,%ymm3
3500 vpshufd $200,%ymm4,%ymm4
3501 vmovdqa %ymm0,-128(%edx)
3502 vmovdqu 80(%edi),%xmm0
3503 vmovdqa %ymm1,-96(%edx)
3504 vmovdqu 96(%edi),%xmm1
3505 vmovdqa %ymm2,-64(%edx)
3506 vmovdqu 112(%edi),%xmm2
3507 vmovdqa %ymm3,-32(%edx)
3508 vmovdqu 128(%edi),%xmm3
3509 vmovdqa %ymm4,(%edx)
3510 vpermq $64,%ymm0,%ymm0
3511 vpermq $64,%ymm1,%ymm1
3512 vpermq $64,%ymm2,%ymm2
3513 vpermq $64,%ymm3,%ymm3
3514 vpshufd $200,%ymm0,%ymm0
3515 vpshufd $200,%ymm1,%ymm1
3516 vpshufd $200,%ymm2,%ymm2
3517 vpshufd $200,%ymm3,%ymm3
3518 vmovdqa %ymm0,32(%edx)
3519 vmovd -48(%edi),%xmm0
3520 vmovdqa %ymm1,64(%edx)
3521 vmovd -44(%edi),%xmm1
3522 vmovdqa %ymm2,96(%edx)
3523 vmovd -40(%edi),%xmm2
3524 vmovdqa %ymm3,128(%edx)
3525 vmovd -36(%edi),%xmm3
3526 vmovd -32(%edi),%xmm4
3527 vmovdqa 64(%ebx),%ymm7
3534 vmovdqu (%esi),%xmm5
3537 vmovdqu 16(%esi),%xmm6
3539 vinserti128 $1,32(%esi),%ymm5,%ymm5
3551 vpxor %ymm6,%ymm6,%ymm6
3552 leal 32(%ebx,%eax,8),%ebx
3557 vmovdqu (%esi),%xmm5
3558 vmovdqu 16(%esi),%xmm6
3559 vinserti128 $1,32(%esi),%ymm5,%ymm5
3560 vinserti128 $1,48(%esi),%ymm6,%ymm6
3565 vmovdqa %ymm2,64(%esp)
3566 vpsrldq $6,%ymm5,%ymm2
3567 vmovdqa %ymm0,(%esp)
3568 vpsrldq $6,%ymm6,%ymm0
3569 vmovdqa %ymm1,32(%esp)
3570 vpunpckhqdq %ymm6,%ymm5,%ymm1
3571 vpunpcklqdq %ymm6,%ymm5,%ymm5
3572 vpunpcklqdq %ymm0,%ymm2,%ymm2
3573 vpsrlq $30,%ymm2,%ymm0
3574 vpsrlq $4,%ymm2,%ymm2
3575 vpsrlq $26,%ymm5,%ymm6
3576 vpsrlq $40,%ymm1,%ymm1
3577 vpand %ymm7,%ymm2,%ymm2
3578 vpand %ymm7,%ymm5,%ymm5
3579 vpand %ymm7,%ymm6,%ymm6
3580 vpand %ymm7,%ymm0,%ymm0
3581 vpor (%ebx),%ymm1,%ymm1
3582 vpaddq 64(%esp),%ymm2,%ymm2
3583 vpaddq (%esp),%ymm5,%ymm5
3584 vpaddq 32(%esp),%ymm6,%ymm6
3585 vpaddq %ymm3,%ymm0,%ymm0
3586 vpaddq %ymm4,%ymm1,%ymm1
3587 vpmuludq -96(%edx),%ymm2,%ymm3
3588 vmovdqa %ymm6,32(%esp)
3589 vpmuludq -64(%edx),%ymm2,%ymm4
3590 vmovdqa %ymm0,96(%esp)
3591 vpmuludq 96(%edx),%ymm2,%ymm0
3592 vmovdqa %ymm1,128(%esp)
3593 vpmuludq 128(%edx),%ymm2,%ymm1
3594 vpmuludq -128(%edx),%ymm2,%ymm2
3595 vpmuludq -32(%edx),%ymm5,%ymm7
3596 vpaddq %ymm7,%ymm3,%ymm3
3597 vpmuludq (%edx),%ymm5,%ymm6
3598 vpaddq %ymm6,%ymm4,%ymm4
3599 vpmuludq -128(%edx),%ymm5,%ymm7
3600 vpaddq %ymm7,%ymm0,%ymm0
3601 vmovdqa 32(%esp),%ymm7
3602 vpmuludq -96(%edx),%ymm5,%ymm6
3603 vpaddq %ymm6,%ymm1,%ymm1
3604 vpmuludq -64(%edx),%ymm5,%ymm5
3605 vpaddq %ymm5,%ymm2,%ymm2
3606 vpmuludq -64(%edx),%ymm7,%ymm6
3607 vpaddq %ymm6,%ymm3,%ymm3
3608 vpmuludq -32(%edx),%ymm7,%ymm5
3609 vpaddq %ymm5,%ymm4,%ymm4
3610 vpmuludq 128(%edx),%ymm7,%ymm6
3611 vpaddq %ymm6,%ymm0,%ymm0
3612 vmovdqa 96(%esp),%ymm6
3613 vpmuludq -128(%edx),%ymm7,%ymm5
3614 vpaddq %ymm5,%ymm1,%ymm1
3615 vpmuludq -96(%edx),%ymm7,%ymm7
3616 vpaddq %ymm7,%ymm2,%ymm2
3617 vpmuludq -128(%edx),%ymm6,%ymm5
3618 vpaddq %ymm5,%ymm3,%ymm3
3619 vpmuludq -96(%edx),%ymm6,%ymm7
3620 vpaddq %ymm7,%ymm4,%ymm4
3621 vpmuludq 64(%edx),%ymm6,%ymm5
3622 vpaddq %ymm5,%ymm0,%ymm0
3623 vmovdqa 128(%esp),%ymm5
3624 vpmuludq 96(%edx),%ymm6,%ymm7
3625 vpaddq %ymm7,%ymm1,%ymm1
3626 vpmuludq 128(%edx),%ymm6,%ymm6
3627 vpaddq %ymm6,%ymm2,%ymm2
3628 vpmuludq 128(%edx),%ymm5,%ymm7
3629 vpaddq %ymm7,%ymm3,%ymm3
3630 vpmuludq 32(%edx),%ymm5,%ymm6
3631 vpaddq %ymm6,%ymm0,%ymm0
3632 vpmuludq -128(%edx),%ymm5,%ymm7
3633 vpaddq %ymm7,%ymm4,%ymm4
3634 vmovdqa 64(%ebx),%ymm7
3635 vpmuludq 64(%edx),%ymm5,%ymm6
3636 vpaddq %ymm6,%ymm1,%ymm1
3637 vpmuludq 96(%edx),%ymm5,%ymm5
3638 vpaddq %ymm5,%ymm2,%ymm2
3639 vpsrlq $26,%ymm3,%ymm5
3640 vpand %ymm7,%ymm3,%ymm3
3641 vpsrlq $26,%ymm0,%ymm6
3642 vpand %ymm7,%ymm0,%ymm0
3643 vpaddq %ymm5,%ymm4,%ymm4
3644 vpaddq %ymm6,%ymm1,%ymm1
3645 vpsrlq $26,%ymm4,%ymm5
3646 vpand %ymm7,%ymm4,%ymm4
3647 vpsrlq $26,%ymm1,%ymm6
3648 vpand %ymm7,%ymm1,%ymm1
3649 vpaddq %ymm6,%ymm2,%ymm2
3650 vpaddq %ymm5,%ymm0,%ymm0
3651 vpsllq $2,%ymm5,%ymm5
3652 vpsrlq $26,%ymm2,%ymm6
3653 vpand %ymm7,%ymm2,%ymm2
3654 vpaddq %ymm5,%ymm0,%ymm0
3655 vpaddq %ymm6,%ymm3,%ymm3
3656 vpsrlq $26,%ymm3,%ymm6
3657 vpsrlq $26,%ymm0,%ymm5
3658 vpand %ymm7,%ymm0,%ymm0
3659 vpand %ymm7,%ymm3,%ymm3
3660 vpaddq %ymm5,%ymm1,%ymm1
3661 vpaddq %ymm6,%ymm4,%ymm4
3662 vmovdqu (%esi),%xmm5
3663 vmovdqu 16(%esi),%xmm6
3664 vinserti128 $1,32(%esi),%ymm5,%ymm5
3665 vinserti128 $1,48(%esi),%ymm6,%ymm6
3670 vmovdqa %ymm2,64(%esp)
3671 vpsrldq $6,%ymm5,%ymm2
3672 vmovdqa %ymm0,(%esp)
3673 vpsrldq $6,%ymm6,%ymm0
3674 vmovdqa %ymm1,32(%esp)
3675 vpunpckhqdq %ymm6,%ymm5,%ymm1
3676 vpunpcklqdq %ymm6,%ymm5,%ymm5
3677 vpunpcklqdq %ymm0,%ymm2,%ymm2
3678 vpsrlq $30,%ymm2,%ymm0
3679 vpsrlq $4,%ymm2,%ymm2
3680 vpsrlq $26,%ymm5,%ymm6
3681 vpsrlq $40,%ymm1,%ymm1
3682 vpand %ymm7,%ymm2,%ymm2
3683 vpand %ymm7,%ymm5,%ymm5
3684 vpand %ymm7,%ymm6,%ymm6
3685 vpand %ymm7,%ymm0,%ymm0
3686 vpor (%ebx),%ymm1,%ymm1
3688 vpaddq 64(%esp),%ymm2,%ymm2
3689 vpaddq (%esp),%ymm5,%ymm5
3690 vpaddq 32(%esp),%ymm6,%ymm6
3691 vpaddq %ymm3,%ymm0,%ymm0
3692 vpaddq %ymm4,%ymm1,%ymm1
3693 vpmuludq -92(%edx),%ymm2,%ymm3
3694 vmovdqa %ymm6,32(%esp)
3695 vpmuludq -60(%edx),%ymm2,%ymm4
3696 vmovdqa %ymm0,96(%esp)
3697 vpmuludq 100(%edx),%ymm2,%ymm0
3698 vmovdqa %ymm1,128(%esp)
3699 vpmuludq 132(%edx),%ymm2,%ymm1
3700 vpmuludq -124(%edx),%ymm2,%ymm2
3701 vpmuludq -28(%edx),%ymm5,%ymm7
3702 vpaddq %ymm7,%ymm3,%ymm3
3703 vpmuludq 4(%edx),%ymm5,%ymm6
3704 vpaddq %ymm6,%ymm4,%ymm4
3705 vpmuludq -124(%edx),%ymm5,%ymm7
3706 vpaddq %ymm7,%ymm0,%ymm0
3707 vmovdqa 32(%esp),%ymm7
3708 vpmuludq -92(%edx),%ymm5,%ymm6
3709 vpaddq %ymm6,%ymm1,%ymm1
3710 vpmuludq -60(%edx),%ymm5,%ymm5
3711 vpaddq %ymm5,%ymm2,%ymm2
3712 vpmuludq -60(%edx),%ymm7,%ymm6
3713 vpaddq %ymm6,%ymm3,%ymm3
3714 vpmuludq -28(%edx),%ymm7,%ymm5
3715 vpaddq %ymm5,%ymm4,%ymm4
3716 vpmuludq 132(%edx),%ymm7,%ymm6
3717 vpaddq %ymm6,%ymm0,%ymm0
3718 vmovdqa 96(%esp),%ymm6
3719 vpmuludq -124(%edx),%ymm7,%ymm5
3720 vpaddq %ymm5,%ymm1,%ymm1
3721 vpmuludq -92(%edx),%ymm7,%ymm7
3722 vpaddq %ymm7,%ymm2,%ymm2
3723 vpmuludq -124(%edx),%ymm6,%ymm5
3724 vpaddq %ymm5,%ymm3,%ymm3
3725 vpmuludq -92(%edx),%ymm6,%ymm7
3726 vpaddq %ymm7,%ymm4,%ymm4
3727 vpmuludq 68(%edx),%ymm6,%ymm5
3728 vpaddq %ymm5,%ymm0,%ymm0
3729 vmovdqa 128(%esp),%ymm5
3730 vpmuludq 100(%edx),%ymm6,%ymm7
3731 vpaddq %ymm7,%ymm1,%ymm1
3732 vpmuludq 132(%edx),%ymm6,%ymm6
3733 vpaddq %ymm6,%ymm2,%ymm2
3734 vpmuludq 132(%edx),%ymm5,%ymm7
3735 vpaddq %ymm7,%ymm3,%ymm3
3736 vpmuludq 36(%edx),%ymm5,%ymm6
3737 vpaddq %ymm6,%ymm0,%ymm0
3738 vpmuludq -124(%edx),%ymm5,%ymm7
3739 vpaddq %ymm7,%ymm4,%ymm4
3740 vmovdqa 64(%ebx),%ymm7
3741 vpmuludq 68(%edx),%ymm5,%ymm6
3742 vpaddq %ymm6,%ymm1,%ymm1
3743 vpmuludq 100(%edx),%ymm5,%ymm5
3744 vpaddq %ymm5,%ymm2,%ymm2
3745 vpsrldq $8,%ymm4,%ymm5
3746 vpsrldq $8,%ymm3,%ymm6
3747 vpaddq %ymm5,%ymm4,%ymm4
3748 vpsrldq $8,%ymm0,%ymm5
3749 vpaddq %ymm6,%ymm3,%ymm3
3750 vpsrldq $8,%ymm1,%ymm6
3751 vpaddq %ymm5,%ymm0,%ymm0
3752 vpsrldq $8,%ymm2,%ymm5
3753 vpaddq %ymm6,%ymm1,%ymm1
3754 vpermq $2,%ymm4,%ymm6
3755 vpaddq %ymm5,%ymm2,%ymm2
3756 vpermq $2,%ymm3,%ymm5
3757 vpaddq %ymm6,%ymm4,%ymm4
3758 vpermq $2,%ymm0,%ymm6
3759 vpaddq %ymm5,%ymm3,%ymm3
3760 vpermq $2,%ymm1,%ymm5
3761 vpaddq %ymm6,%ymm0,%ymm0
3762 vpermq $2,%ymm2,%ymm6
3763 vpaddq %ymm5,%ymm1,%ymm1
3764 vpaddq %ymm6,%ymm2,%ymm2
3765 vpsrlq $26,%ymm3,%ymm5
3766 vpand %ymm7,%ymm3,%ymm3
3767 vpsrlq $26,%ymm0,%ymm6
3768 vpand %ymm7,%ymm0,%ymm0
3769 vpaddq %ymm5,%ymm4,%ymm4
3770 vpaddq %ymm6,%ymm1,%ymm1
3771 vpsrlq $26,%ymm4,%ymm5
3772 vpand %ymm7,%ymm4,%ymm4
3773 vpsrlq $26,%ymm1,%ymm6
3774 vpand %ymm7,%ymm1,%ymm1
3775 vpaddq %ymm6,%ymm2,%ymm2
3776 vpaddq %ymm5,%ymm0,%ymm0
3777 vpsllq $2,%ymm5,%ymm5
3778 vpsrlq $26,%ymm2,%ymm6
3779 vpand %ymm7,%ymm2,%ymm2
3780 vpaddq %ymm5,%ymm0,%ymm0
3781 vpaddq %ymm6,%ymm3,%ymm3
3782 vpsrlq $26,%ymm3,%ymm6
3783 vpsrlq $26,%ymm0,%ymm5
3784 vpand %ymm7,%ymm0,%ymm0
3785 vpand %ymm7,%ymm3,%ymm3
3786 vpaddq %ymm5,%ymm1,%ymm1
3787 vpaddq %ymm6,%ymm4,%ymm4
3790 vpshufd $252,%xmm0,%xmm0
3792 vpshufd $252,%xmm1,%xmm1
3793 vpshufd $252,%xmm2,%xmm2
3794 vpshufd $252,%xmm3,%xmm3
3795 vpshufd $252,%xmm4,%xmm4
3799 vmovd %xmm0,-48(%edi)
3800 vmovd %xmm1,-44(%edi)
3801 vmovd %xmm2,-40(%edi)
3802 vmovd %xmm3,-36(%edi)
3803 vmovd %xmm4,-32(%edi)
3812 .size _poly1305_blocks_avx2,.-_poly1305_blocks_avx2
3815 .long 16777216,0,16777216,0,16777216,0,16777216,0
3816 .long 0,0,0,0,0,0,0,0
3817 .long 67108863,0,67108863,0,67108863,0,67108863,0
3818 .long 268435455,268435452,268435452,268435452
3819 .byte 80,111,108,121,49,51,48,53,32,102,111,114,32,120,56,54
3820 .byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
3821 .byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
3824 .comm OPENSSL_ia32cap_P,16,4