2 /* Do not modify. This file is auto-generated from poly1305-x86_64.pl. */
10 .hidden poly1305_blocks
14 .type poly1305_init,@function
26 leaq poly1305_blocks(%rip),%r10
27 leaq poly1305_emit(%rip),%r11
28 movq OPENSSL_ia32cap_P+4(%rip),%r9
29 leaq poly1305_blocks_avx(%rip),%rax
30 leaq poly1305_emit_avx(%rip),%rcx
34 leaq poly1305_blocks_avx2(%rip),%rax
37 movq $0x0ffffffc0fffffff,%rax
38 movq $0x0ffffffc0ffffffc,%rcx
49 .size poly1305_init,.-poly1305_init
51 .type poly1305_blocks,@function
60 .cfi_adjust_cfa_offset 8
63 .cfi_adjust_cfa_offset 8
66 .cfi_adjust_cfa_offset 8
69 .cfi_adjust_cfa_offset 8
72 .cfi_adjust_cfa_offset 8
75 .cfi_adjust_cfa_offset 8
159 .cfi_adjust_cfa_offset -48
164 .size poly1305_blocks,.-poly1305_blocks
166 .type poly1305_emit,@function
191 .size poly1305_emit,.-poly1305_emit
192 .type __poly1305_block,@function
236 .size __poly1305_block,.-__poly1305_block
238 .type __poly1305_init_avx,@function
246 leaq 48+64(%rdi),%rdi
249 call __poly1305_block
267 leal (%rax,%rax,4),%eax
269 leal (%rdx,%rdx,4),%edx
284 leal (%rax,%rax,4),%eax
286 leal (%rdx,%rdx,4),%edx
299 leal (%rax,%rax,4),%eax
301 leal (%rdx,%rdx,4),%edx
318 call __poly1305_block
329 leal (%rdx,%rdx,4),%edx
338 leal (%rax,%rax,4),%eax
346 leal (%rdx,%rdx,4),%edx
358 call __poly1305_block
369 leal (%rdx,%rdx,4),%edx
378 leal (%rax,%rax,4),%eax
386 leal (%rdx,%rdx,4),%edx
397 leaq -48-64(%rdi),%rdi
400 .size __poly1305_init_avx,.-__poly1305_init_avx
402 .type poly1305_blocks_avx,@function
425 .cfi_adjust_cfa_offset 8
428 .cfi_adjust_cfa_offset 8
431 .cfi_adjust_cfa_offset 8
434 .cfi_adjust_cfa_offset 8
437 .cfi_adjust_cfa_offset 8
440 .cfi_adjust_cfa_offset 8
455 andq $-2147483648,%r8
458 andq $-2147483648,%r9
494 call __poly1305_block
497 jz .Lstore_base2_64_avx
518 jz .Lstore_base2_26_avx
528 .Lstore_base2_64_avx:
535 .Lstore_base2_26_avx:
556 .cfi_adjust_cfa_offset -48
558 .Lblocks_avx_epilogue:
566 .cfi_adjust_cfa_offset 8
569 .cfi_adjust_cfa_offset 8
572 .cfi_adjust_cfa_offset 8
575 .cfi_adjust_cfa_offset 8
578 .cfi_adjust_cfa_offset 8
581 .cfi_adjust_cfa_offset 8
608 call __poly1305_block
636 call __poly1305_init_avx
655 .cfi_adjust_cfa_offset -48
656 .Lbase2_64_avx_epilogue:
671 .cfi_def_cfa %r11,0x60
677 vmovdqu 48(%rdi),%xmm14
679 leaq .Lconst(%rip),%rcx
683 vmovdqu 32(%rsi),%xmm5
684 vmovdqu 48(%rsi),%xmm6
685 vmovdqa 64(%rcx),%xmm15
687 vpsrldq $6,%xmm5,%xmm7
688 vpsrldq $6,%xmm6,%xmm8
689 vpunpckhqdq %xmm6,%xmm5,%xmm9
690 vpunpcklqdq %xmm6,%xmm5,%xmm5
691 vpunpcklqdq %xmm8,%xmm7,%xmm8
693 vpsrlq $40,%xmm9,%xmm9
694 vpsrlq $26,%xmm5,%xmm6
695 vpand %xmm15,%xmm5,%xmm5
696 vpsrlq $4,%xmm8,%xmm7
697 vpand %xmm15,%xmm6,%xmm6
698 vpsrlq $30,%xmm8,%xmm8
699 vpand %xmm15,%xmm7,%xmm7
700 vpand %xmm15,%xmm8,%xmm8
701 vpor 32(%rcx),%xmm9,%xmm9
706 vmovdqu -48(%rdi),%xmm11
707 vmovdqu -32(%rdi),%xmm12
708 vpshufd $0xEE,%xmm14,%xmm13
709 vpshufd $0x44,%xmm14,%xmm10
710 vmovdqa %xmm13,-144(%r11)
711 vmovdqa %xmm10,0(%rsp)
712 vpshufd $0xEE,%xmm11,%xmm14
713 vmovdqu -16(%rdi),%xmm10
714 vpshufd $0x44,%xmm11,%xmm11
715 vmovdqa %xmm14,-128(%r11)
716 vmovdqa %xmm11,16(%rsp)
717 vpshufd $0xEE,%xmm12,%xmm13
718 vmovdqu 0(%rdi),%xmm11
719 vpshufd $0x44,%xmm12,%xmm12
720 vmovdqa %xmm13,-112(%r11)
721 vmovdqa %xmm12,32(%rsp)
722 vpshufd $0xEE,%xmm10,%xmm14
723 vmovdqu 16(%rdi),%xmm12
724 vpshufd $0x44,%xmm10,%xmm10
725 vmovdqa %xmm14,-96(%r11)
726 vmovdqa %xmm10,48(%rsp)
727 vpshufd $0xEE,%xmm11,%xmm13
728 vmovdqu 32(%rdi),%xmm10
729 vpshufd $0x44,%xmm11,%xmm11
730 vmovdqa %xmm13,-80(%r11)
731 vmovdqa %xmm11,64(%rsp)
732 vpshufd $0xEE,%xmm12,%xmm14
733 vmovdqu 48(%rdi),%xmm11
734 vpshufd $0x44,%xmm12,%xmm12
735 vmovdqa %xmm14,-64(%r11)
736 vmovdqa %xmm12,80(%rsp)
737 vpshufd $0xEE,%xmm10,%xmm13
738 vmovdqu 64(%rdi),%xmm12
739 vpshufd $0x44,%xmm10,%xmm10
740 vmovdqa %xmm13,-48(%r11)
741 vmovdqa %xmm10,96(%rsp)
742 vpshufd $0xEE,%xmm11,%xmm14
743 vpshufd $0x44,%xmm11,%xmm11
744 vmovdqa %xmm14,-32(%r11)
745 vmovdqa %xmm11,112(%rsp)
746 vpshufd $0xEE,%xmm12,%xmm13
747 vmovdqa 0(%rsp),%xmm14
748 vpshufd $0x44,%xmm12,%xmm12
749 vmovdqa %xmm13,-16(%r11)
750 vmovdqa %xmm12,128(%rsp)
776 vpmuludq %xmm5,%xmm14,%xmm10
777 vpmuludq %xmm6,%xmm14,%xmm11
778 vmovdqa %xmm2,32(%r11)
779 vpmuludq %xmm7,%xmm14,%xmm12
780 vmovdqa 16(%rsp),%xmm2
781 vpmuludq %xmm8,%xmm14,%xmm13
782 vpmuludq %xmm9,%xmm14,%xmm14
784 vmovdqa %xmm0,0(%r11)
785 vpmuludq 32(%rsp),%xmm9,%xmm0
786 vmovdqa %xmm1,16(%r11)
787 vpmuludq %xmm8,%xmm2,%xmm1
788 vpaddq %xmm0,%xmm10,%xmm10
789 vpaddq %xmm1,%xmm14,%xmm14
790 vmovdqa %xmm3,48(%r11)
791 vpmuludq %xmm7,%xmm2,%xmm0
792 vpmuludq %xmm6,%xmm2,%xmm1
793 vpaddq %xmm0,%xmm13,%xmm13
794 vmovdqa 48(%rsp),%xmm3
795 vpaddq %xmm1,%xmm12,%xmm12
796 vmovdqa %xmm4,64(%r11)
797 vpmuludq %xmm5,%xmm2,%xmm2
798 vpmuludq %xmm7,%xmm3,%xmm0
799 vpaddq %xmm2,%xmm11,%xmm11
801 vmovdqa 64(%rsp),%xmm4
802 vpaddq %xmm0,%xmm14,%xmm14
803 vpmuludq %xmm6,%xmm3,%xmm1
804 vpmuludq %xmm5,%xmm3,%xmm3
805 vpaddq %xmm1,%xmm13,%xmm13
806 vmovdqa 80(%rsp),%xmm2
807 vpaddq %xmm3,%xmm12,%xmm12
808 vpmuludq %xmm9,%xmm4,%xmm0
809 vpmuludq %xmm8,%xmm4,%xmm4
810 vpaddq %xmm0,%xmm11,%xmm11
811 vmovdqa 96(%rsp),%xmm3
812 vpaddq %xmm4,%xmm10,%xmm10
814 vmovdqa 128(%rsp),%xmm4
815 vpmuludq %xmm6,%xmm2,%xmm1
816 vpmuludq %xmm5,%xmm2,%xmm2
817 vpaddq %xmm1,%xmm14,%xmm14
818 vpaddq %xmm2,%xmm13,%xmm13
819 vpmuludq %xmm9,%xmm3,%xmm0
820 vpmuludq %xmm8,%xmm3,%xmm1
821 vpaddq %xmm0,%xmm12,%xmm12
822 vmovdqu 0(%rsi),%xmm0
823 vpaddq %xmm1,%xmm11,%xmm11
824 vpmuludq %xmm7,%xmm3,%xmm3
825 vpmuludq %xmm7,%xmm4,%xmm7
826 vpaddq %xmm3,%xmm10,%xmm10
828 vmovdqu 16(%rsi),%xmm1
829 vpaddq %xmm7,%xmm11,%xmm11
830 vpmuludq %xmm8,%xmm4,%xmm8
831 vpmuludq %xmm9,%xmm4,%xmm9
832 vpsrldq $6,%xmm0,%xmm2
833 vpaddq %xmm8,%xmm12,%xmm12
834 vpaddq %xmm9,%xmm13,%xmm13
835 vpsrldq $6,%xmm1,%xmm3
836 vpmuludq 112(%rsp),%xmm5,%xmm9
837 vpmuludq %xmm6,%xmm4,%xmm5
838 vpunpckhqdq %xmm1,%xmm0,%xmm4
839 vpaddq %xmm9,%xmm14,%xmm14
840 vmovdqa -144(%r11),%xmm9
841 vpaddq %xmm5,%xmm10,%xmm10
843 vpunpcklqdq %xmm1,%xmm0,%xmm0
844 vpunpcklqdq %xmm3,%xmm2,%xmm3
847 vpsrldq $5,%xmm4,%xmm4
848 vpsrlq $26,%xmm0,%xmm1
849 vpand %xmm15,%xmm0,%xmm0
850 vpsrlq $4,%xmm3,%xmm2
851 vpand %xmm15,%xmm1,%xmm1
852 vpand 0(%rcx),%xmm4,%xmm4
853 vpsrlq $30,%xmm3,%xmm3
854 vpand %xmm15,%xmm2,%xmm2
855 vpand %xmm15,%xmm3,%xmm3
856 vpor 32(%rcx),%xmm4,%xmm4
858 vpaddq 0(%r11),%xmm0,%xmm0
859 vpaddq 16(%r11),%xmm1,%xmm1
860 vpaddq 32(%r11),%xmm2,%xmm2
861 vpaddq 48(%r11),%xmm3,%xmm3
862 vpaddq 64(%r11),%xmm4,%xmm4
878 vpmuludq %xmm0,%xmm9,%xmm5
879 vpmuludq %xmm1,%xmm9,%xmm6
880 vpaddq %xmm5,%xmm10,%xmm10
881 vpaddq %xmm6,%xmm11,%xmm11
882 vmovdqa -128(%r11),%xmm7
883 vpmuludq %xmm2,%xmm9,%xmm5
884 vpmuludq %xmm3,%xmm9,%xmm6
885 vpaddq %xmm5,%xmm12,%xmm12
886 vpaddq %xmm6,%xmm13,%xmm13
887 vpmuludq %xmm4,%xmm9,%xmm9
888 vpmuludq -112(%r11),%xmm4,%xmm5
889 vpaddq %xmm9,%xmm14,%xmm14
891 vpaddq %xmm5,%xmm10,%xmm10
892 vpmuludq %xmm2,%xmm7,%xmm6
893 vpmuludq %xmm3,%xmm7,%xmm5
894 vpaddq %xmm6,%xmm13,%xmm13
895 vmovdqa -96(%r11),%xmm8
896 vpaddq %xmm5,%xmm14,%xmm14
897 vpmuludq %xmm1,%xmm7,%xmm6
898 vpmuludq %xmm0,%xmm7,%xmm7
899 vpaddq %xmm6,%xmm12,%xmm12
900 vpaddq %xmm7,%xmm11,%xmm11
902 vmovdqa -80(%r11),%xmm9
903 vpmuludq %xmm2,%xmm8,%xmm5
904 vpmuludq %xmm1,%xmm8,%xmm6
905 vpaddq %xmm5,%xmm14,%xmm14
906 vpaddq %xmm6,%xmm13,%xmm13
907 vmovdqa -64(%r11),%xmm7
908 vpmuludq %xmm0,%xmm8,%xmm8
909 vpmuludq %xmm4,%xmm9,%xmm5
910 vpaddq %xmm8,%xmm12,%xmm12
911 vpaddq %xmm5,%xmm11,%xmm11
912 vmovdqa -48(%r11),%xmm8
913 vpmuludq %xmm3,%xmm9,%xmm9
914 vpmuludq %xmm1,%xmm7,%xmm6
915 vpaddq %xmm9,%xmm10,%xmm10
917 vmovdqa -16(%r11),%xmm9
918 vpaddq %xmm6,%xmm14,%xmm14
919 vpmuludq %xmm0,%xmm7,%xmm7
920 vpmuludq %xmm4,%xmm8,%xmm5
921 vpaddq %xmm7,%xmm13,%xmm13
922 vpaddq %xmm5,%xmm12,%xmm12
923 vmovdqu 32(%rsi),%xmm5
924 vpmuludq %xmm3,%xmm8,%xmm7
925 vpmuludq %xmm2,%xmm8,%xmm8
926 vpaddq %xmm7,%xmm11,%xmm11
927 vmovdqu 48(%rsi),%xmm6
928 vpaddq %xmm8,%xmm10,%xmm10
930 vpmuludq %xmm2,%xmm9,%xmm2
931 vpmuludq %xmm3,%xmm9,%xmm3
932 vpsrldq $6,%xmm5,%xmm7
933 vpaddq %xmm2,%xmm11,%xmm11
934 vpmuludq %xmm4,%xmm9,%xmm4
935 vpsrldq $6,%xmm6,%xmm8
936 vpaddq %xmm3,%xmm12,%xmm2
937 vpaddq %xmm4,%xmm13,%xmm3
938 vpmuludq -32(%r11),%xmm0,%xmm4
939 vpmuludq %xmm1,%xmm9,%xmm0
940 vpunpckhqdq %xmm6,%xmm5,%xmm9
941 vpaddq %xmm4,%xmm14,%xmm4
942 vpaddq %xmm0,%xmm10,%xmm0
944 vpunpcklqdq %xmm6,%xmm5,%xmm5
945 vpunpcklqdq %xmm8,%xmm7,%xmm8
948 vpsrldq $5,%xmm9,%xmm9
949 vpsrlq $26,%xmm5,%xmm6
950 vmovdqa 0(%rsp),%xmm14
951 vpand %xmm15,%xmm5,%xmm5
952 vpsrlq $4,%xmm8,%xmm7
953 vpand %xmm15,%xmm6,%xmm6
954 vpand 0(%rcx),%xmm9,%xmm9
955 vpsrlq $30,%xmm8,%xmm8
956 vpand %xmm15,%xmm7,%xmm7
957 vpand %xmm15,%xmm8,%xmm8
958 vpor 32(%rcx),%xmm9,%xmm9
964 vpsrlq $26,%xmm3,%xmm13
965 vpand %xmm15,%xmm3,%xmm3
966 vpaddq %xmm13,%xmm4,%xmm4
968 vpsrlq $26,%xmm0,%xmm10
969 vpand %xmm15,%xmm0,%xmm0
970 vpaddq %xmm10,%xmm11,%xmm1
972 vpsrlq $26,%xmm4,%xmm10
973 vpand %xmm15,%xmm4,%xmm4
975 vpsrlq $26,%xmm1,%xmm11
976 vpand %xmm15,%xmm1,%xmm1
977 vpaddq %xmm11,%xmm2,%xmm2
979 vpaddq %xmm10,%xmm0,%xmm0
980 vpsllq $2,%xmm10,%xmm10
981 vpaddq %xmm10,%xmm0,%xmm0
983 vpsrlq $26,%xmm2,%xmm12
984 vpand %xmm15,%xmm2,%xmm2
985 vpaddq %xmm12,%xmm3,%xmm3
987 vpsrlq $26,%xmm0,%xmm10
988 vpand %xmm15,%xmm0,%xmm0
989 vpaddq %xmm10,%xmm1,%xmm1
991 vpsrlq $26,%xmm3,%xmm13
992 vpand %xmm15,%xmm3,%xmm3
993 vpaddq %xmm13,%xmm4,%xmm4
1001 vpshufd $0x10,%xmm14,%xmm14
1005 vpaddq %xmm2,%xmm7,%xmm7
1006 vpaddq %xmm0,%xmm5,%xmm5
1007 vpaddq %xmm1,%xmm6,%xmm6
1008 vpaddq %xmm3,%xmm8,%xmm8
1009 vpaddq %xmm4,%xmm9,%xmm9
1012 vmovdqa %xmm2,32(%r11)
1013 vmovdqa %xmm0,0(%r11)
1014 vmovdqa %xmm1,16(%r11)
1015 vmovdqa %xmm3,48(%r11)
1016 vmovdqa %xmm4,64(%r11)
1024 vpmuludq %xmm7,%xmm14,%xmm12
1025 vpmuludq %xmm5,%xmm14,%xmm10
1026 vpshufd $0x10,-48(%rdi),%xmm2
1027 vpmuludq %xmm6,%xmm14,%xmm11
1028 vpmuludq %xmm8,%xmm14,%xmm13
1029 vpmuludq %xmm9,%xmm14,%xmm14
1031 vpmuludq %xmm8,%xmm2,%xmm0
1032 vpaddq %xmm0,%xmm14,%xmm14
1033 vpshufd $0x10,-32(%rdi),%xmm3
1034 vpmuludq %xmm7,%xmm2,%xmm1
1035 vpaddq %xmm1,%xmm13,%xmm13
1036 vpshufd $0x10,-16(%rdi),%xmm4
1037 vpmuludq %xmm6,%xmm2,%xmm0
1038 vpaddq %xmm0,%xmm12,%xmm12
1039 vpmuludq %xmm5,%xmm2,%xmm2
1040 vpaddq %xmm2,%xmm11,%xmm11
1041 vpmuludq %xmm9,%xmm3,%xmm3
1042 vpaddq %xmm3,%xmm10,%xmm10
1044 vpshufd $0x10,0(%rdi),%xmm2
1045 vpmuludq %xmm7,%xmm4,%xmm1
1046 vpaddq %xmm1,%xmm14,%xmm14
1047 vpmuludq %xmm6,%xmm4,%xmm0
1048 vpaddq %xmm0,%xmm13,%xmm13
1049 vpshufd $0x10,16(%rdi),%xmm3
1050 vpmuludq %xmm5,%xmm4,%xmm4
1051 vpaddq %xmm4,%xmm12,%xmm12
1052 vpmuludq %xmm9,%xmm2,%xmm1
1053 vpaddq %xmm1,%xmm11,%xmm11
1054 vpshufd $0x10,32(%rdi),%xmm4
1055 vpmuludq %xmm8,%xmm2,%xmm2
1056 vpaddq %xmm2,%xmm10,%xmm10
1058 vpmuludq %xmm6,%xmm3,%xmm0
1059 vpaddq %xmm0,%xmm14,%xmm14
1060 vpmuludq %xmm5,%xmm3,%xmm3
1061 vpaddq %xmm3,%xmm13,%xmm13
1062 vpshufd $0x10,48(%rdi),%xmm2
1063 vpmuludq %xmm9,%xmm4,%xmm1
1064 vpaddq %xmm1,%xmm12,%xmm12
1065 vpshufd $0x10,64(%rdi),%xmm3
1066 vpmuludq %xmm8,%xmm4,%xmm0
1067 vpaddq %xmm0,%xmm11,%xmm11
1068 vpmuludq %xmm7,%xmm4,%xmm4
1069 vpaddq %xmm4,%xmm10,%xmm10
1071 vpmuludq %xmm5,%xmm2,%xmm2
1072 vpaddq %xmm2,%xmm14,%xmm14
1073 vpmuludq %xmm9,%xmm3,%xmm1
1074 vpaddq %xmm1,%xmm13,%xmm13
1075 vpmuludq %xmm8,%xmm3,%xmm0
1076 vpaddq %xmm0,%xmm12,%xmm12
1077 vpmuludq %xmm7,%xmm3,%xmm1
1078 vpaddq %xmm1,%xmm11,%xmm11
1079 vpmuludq %xmm6,%xmm3,%xmm3
1080 vpaddq %xmm3,%xmm10,%xmm10
1084 vmovdqu 0(%rsi),%xmm0
1085 vmovdqu 16(%rsi),%xmm1
1087 vpsrldq $6,%xmm0,%xmm2
1088 vpsrldq $6,%xmm1,%xmm3
1089 vpunpckhqdq %xmm1,%xmm0,%xmm4
1090 vpunpcklqdq %xmm1,%xmm0,%xmm0
1091 vpunpcklqdq %xmm3,%xmm2,%xmm3
1093 vpsrlq $40,%xmm4,%xmm4
1094 vpsrlq $26,%xmm0,%xmm1
1095 vpand %xmm15,%xmm0,%xmm0
1096 vpsrlq $4,%xmm3,%xmm2
1097 vpand %xmm15,%xmm1,%xmm1
1098 vpsrlq $30,%xmm3,%xmm3
1099 vpand %xmm15,%xmm2,%xmm2
1100 vpand %xmm15,%xmm3,%xmm3
1101 vpor 32(%rcx),%xmm4,%xmm4
1103 vpshufd $0x32,-64(%rdi),%xmm9
1104 vpaddq 0(%r11),%xmm0,%xmm0
1105 vpaddq 16(%r11),%xmm1,%xmm1
1106 vpaddq 32(%r11),%xmm2,%xmm2
1107 vpaddq 48(%r11),%xmm3,%xmm3
1108 vpaddq 64(%r11),%xmm4,%xmm4
1113 vpmuludq %xmm0,%xmm9,%xmm5
1114 vpaddq %xmm5,%xmm10,%xmm10
1115 vpmuludq %xmm1,%xmm9,%xmm6
1116 vpaddq %xmm6,%xmm11,%xmm11
1117 vpmuludq %xmm2,%xmm9,%xmm5
1118 vpaddq %xmm5,%xmm12,%xmm12
1119 vpshufd $0x32,-48(%rdi),%xmm7
1120 vpmuludq %xmm3,%xmm9,%xmm6
1121 vpaddq %xmm6,%xmm13,%xmm13
1122 vpmuludq %xmm4,%xmm9,%xmm9
1123 vpaddq %xmm9,%xmm14,%xmm14
1125 vpmuludq %xmm3,%xmm7,%xmm5
1126 vpaddq %xmm5,%xmm14,%xmm14
1127 vpshufd $0x32,-32(%rdi),%xmm8
1128 vpmuludq %xmm2,%xmm7,%xmm6
1129 vpaddq %xmm6,%xmm13,%xmm13
1130 vpshufd $0x32,-16(%rdi),%xmm9
1131 vpmuludq %xmm1,%xmm7,%xmm5
1132 vpaddq %xmm5,%xmm12,%xmm12
1133 vpmuludq %xmm0,%xmm7,%xmm7
1134 vpaddq %xmm7,%xmm11,%xmm11
1135 vpmuludq %xmm4,%xmm8,%xmm8
1136 vpaddq %xmm8,%xmm10,%xmm10
1138 vpshufd $0x32,0(%rdi),%xmm7
1139 vpmuludq %xmm2,%xmm9,%xmm6
1140 vpaddq %xmm6,%xmm14,%xmm14
1141 vpmuludq %xmm1,%xmm9,%xmm5
1142 vpaddq %xmm5,%xmm13,%xmm13
1143 vpshufd $0x32,16(%rdi),%xmm8
1144 vpmuludq %xmm0,%xmm9,%xmm9
1145 vpaddq %xmm9,%xmm12,%xmm12
1146 vpmuludq %xmm4,%xmm7,%xmm6
1147 vpaddq %xmm6,%xmm11,%xmm11
1148 vpshufd $0x32,32(%rdi),%xmm9
1149 vpmuludq %xmm3,%xmm7,%xmm7
1150 vpaddq %xmm7,%xmm10,%xmm10
1152 vpmuludq %xmm1,%xmm8,%xmm5
1153 vpaddq %xmm5,%xmm14,%xmm14
1154 vpmuludq %xmm0,%xmm8,%xmm8
1155 vpaddq %xmm8,%xmm13,%xmm13
1156 vpshufd $0x32,48(%rdi),%xmm7
1157 vpmuludq %xmm4,%xmm9,%xmm6
1158 vpaddq %xmm6,%xmm12,%xmm12
1159 vpshufd $0x32,64(%rdi),%xmm8
1160 vpmuludq %xmm3,%xmm9,%xmm5
1161 vpaddq %xmm5,%xmm11,%xmm11
1162 vpmuludq %xmm2,%xmm9,%xmm9
1163 vpaddq %xmm9,%xmm10,%xmm10
1165 vpmuludq %xmm0,%xmm7,%xmm7
1166 vpaddq %xmm7,%xmm14,%xmm14
1167 vpmuludq %xmm4,%xmm8,%xmm6
1168 vpaddq %xmm6,%xmm13,%xmm13
1169 vpmuludq %xmm3,%xmm8,%xmm5
1170 vpaddq %xmm5,%xmm12,%xmm12
1171 vpmuludq %xmm2,%xmm8,%xmm6
1172 vpaddq %xmm6,%xmm11,%xmm11
1173 vpmuludq %xmm1,%xmm8,%xmm8
1174 vpaddq %xmm8,%xmm10,%xmm10
1180 vpsrldq $8,%xmm14,%xmm9
1181 vpsrldq $8,%xmm13,%xmm8
1182 vpsrldq $8,%xmm11,%xmm6
1183 vpsrldq $8,%xmm10,%xmm5
1184 vpsrldq $8,%xmm12,%xmm7
1185 vpaddq %xmm8,%xmm13,%xmm13
1186 vpaddq %xmm9,%xmm14,%xmm14
1187 vpaddq %xmm5,%xmm10,%xmm10
1188 vpaddq %xmm6,%xmm11,%xmm11
1189 vpaddq %xmm7,%xmm12,%xmm12
1194 vpsrlq $26,%xmm13,%xmm3
1195 vpand %xmm15,%xmm13,%xmm13
1196 vpaddq %xmm3,%xmm14,%xmm14
1198 vpsrlq $26,%xmm10,%xmm0
1199 vpand %xmm15,%xmm10,%xmm10
1200 vpaddq %xmm0,%xmm11,%xmm11
1202 vpsrlq $26,%xmm14,%xmm4
1203 vpand %xmm15,%xmm14,%xmm14
1205 vpsrlq $26,%xmm11,%xmm1
1206 vpand %xmm15,%xmm11,%xmm11
1207 vpaddq %xmm1,%xmm12,%xmm12
1209 vpaddq %xmm4,%xmm10,%xmm10
1210 vpsllq $2,%xmm4,%xmm4
1211 vpaddq %xmm4,%xmm10,%xmm10
1213 vpsrlq $26,%xmm12,%xmm2
1214 vpand %xmm15,%xmm12,%xmm12
1215 vpaddq %xmm2,%xmm13,%xmm13
1217 vpsrlq $26,%xmm10,%xmm0
1218 vpand %xmm15,%xmm10,%xmm10
1219 vpaddq %xmm0,%xmm11,%xmm11
1221 vpsrlq $26,%xmm13,%xmm3
1222 vpand %xmm15,%xmm13,%xmm13
1223 vpaddq %xmm3,%xmm14,%xmm14
1225 vmovd %xmm10,-112(%rdi)
1226 vmovd %xmm11,-108(%rdi)
1227 vmovd %xmm12,-104(%rdi)
1228 vmovd %xmm13,-100(%rdi)
1229 vmovd %xmm14,-96(%rdi)
1235 .size poly1305_blocks_avx,.-poly1305_blocks_avx
1237 .type poly1305_emit_avx,@function
1292 .size poly1305_emit_avx,.-poly1305_emit_avx
1293 .type poly1305_blocks_avx2,@function
1295 poly1305_blocks_avx2:
1316 .cfi_adjust_cfa_offset 8
1317 .cfi_offset %rbx,-16
1319 .cfi_adjust_cfa_offset 8
1320 .cfi_offset %rbp,-24
1322 .cfi_adjust_cfa_offset 8
1323 .cfi_offset %r12,-32
1325 .cfi_adjust_cfa_offset 8
1326 .cfi_offset %r13,-40
1328 .cfi_adjust_cfa_offset 8
1329 .cfi_offset %r14,-48
1331 .cfi_adjust_cfa_offset 8
1332 .cfi_offset %r15,-56
1346 andq $-2147483648,%r8
1349 andq $-2147483648,%r9
1380 .Lbase2_26_pre_avx2:
1387 call __poly1305_block
1391 jnz .Lbase2_26_pre_avx2
1394 jz .Lstore_base2_64_avx2
1403 andq $0x3ffffff,%rax
1405 andq $0x3ffffff,%rdx
1409 andq $0x3ffffff,%r14
1411 andq $0x3ffffff,%rbx
1415 jz .Lstore_base2_26_avx2
1425 .Lstore_base2_64_avx2:
1432 .Lstore_base2_26_avx2:
1453 .cfi_adjust_cfa_offset -48
1455 .Lblocks_avx2_epilogue:
1463 .cfi_adjust_cfa_offset 8
1464 .cfi_offset %rbx,-16
1466 .cfi_adjust_cfa_offset 8
1467 .cfi_offset %rbp,-24
1469 .cfi_adjust_cfa_offset 8
1470 .cfi_offset %r12,-32
1472 .cfi_adjust_cfa_offset 8
1473 .cfi_offset %r13,-40
1475 .cfi_adjust_cfa_offset 8
1476 .cfi_offset %r14,-48
1478 .cfi_adjust_cfa_offset 8
1479 .cfi_offset %r15,-56
1480 .Lbase2_64_avx2_body:
1499 .Lbase2_64_pre_avx2:
1506 call __poly1305_block
1510 jnz .Lbase2_64_pre_avx2
1520 andq $0x3ffffff,%rax
1522 andq $0x3ffffff,%rdx
1526 andq $0x3ffffff,%r14
1528 andq $0x3ffffff,%rbx
1538 call __poly1305_init_avx
1542 movl OPENSSL_ia32cap_P+8(%rip),%r10d
1543 movl $3221291008,%r11d
1559 .cfi_adjust_cfa_offset -48
1560 .Lbase2_64_avx2_epilogue:
1567 movl OPENSSL_ia32cap_P+8(%rip),%r10d
1571 vmovd 12(%rdi),%xmm3
1572 vmovd 16(%rdi),%xmm4
1576 .cfi_def_cfa %r11,16
1578 leaq .Lconst(%rip),%rcx
1579 leaq 48+64(%rdi),%rdi
1580 vmovdqa 96(%rcx),%ymm7
1583 vmovdqu -64(%rdi),%xmm9
1585 vmovdqu -48(%rdi),%xmm10
1586 vmovdqu -32(%rdi),%xmm6
1587 vmovdqu -16(%rdi),%xmm11
1588 vmovdqu 0(%rdi),%xmm12
1589 vmovdqu 16(%rdi),%xmm13
1591 vmovdqu 32(%rdi),%xmm14
1592 vpermd %ymm9,%ymm7,%ymm9
1593 vmovdqu 48(%rdi),%xmm15
1594 vpermd %ymm10,%ymm7,%ymm10
1595 vmovdqu 64(%rdi),%xmm5
1596 vpermd %ymm6,%ymm7,%ymm6
1597 vmovdqa %ymm9,0(%rsp)
1598 vpermd %ymm11,%ymm7,%ymm11
1599 vmovdqa %ymm10,32-144(%rax)
1600 vpermd %ymm12,%ymm7,%ymm12
1601 vmovdqa %ymm6,64-144(%rax)
1602 vpermd %ymm13,%ymm7,%ymm13
1603 vmovdqa %ymm11,96-144(%rax)
1604 vpermd %ymm14,%ymm7,%ymm14
1605 vmovdqa %ymm12,128-144(%rax)
1606 vpermd %ymm15,%ymm7,%ymm15
1607 vmovdqa %ymm13,160-144(%rax)
1608 vpermd %ymm5,%ymm7,%ymm5
1609 vmovdqa %ymm14,192-144(%rax)
1610 vmovdqa %ymm15,224-144(%rax)
1611 vmovdqa %ymm5,256-144(%rax)
1612 vmovdqa 64(%rcx),%ymm5
1616 vmovdqu 0(%rsi),%xmm7
1617 vmovdqu 16(%rsi),%xmm8
1618 vinserti128 $1,32(%rsi),%ymm7,%ymm7
1619 vinserti128 $1,48(%rsi),%ymm8,%ymm8
1622 vpsrldq $6,%ymm7,%ymm9
1623 vpsrldq $6,%ymm8,%ymm10
1624 vpunpckhqdq %ymm8,%ymm7,%ymm6
1625 vpunpcklqdq %ymm10,%ymm9,%ymm9
1626 vpunpcklqdq %ymm8,%ymm7,%ymm7
1628 vpsrlq $30,%ymm9,%ymm10
1629 vpsrlq $4,%ymm9,%ymm9
1630 vpsrlq $26,%ymm7,%ymm8
1631 vpsrlq $40,%ymm6,%ymm6
1632 vpand %ymm5,%ymm9,%ymm9
1633 vpand %ymm5,%ymm7,%ymm7
1634 vpand %ymm5,%ymm8,%ymm8
1635 vpand %ymm5,%ymm10,%ymm10
1636 vpor 32(%rcx),%ymm6,%ymm6
1638 vpaddq %ymm2,%ymm9,%ymm2
1653 vpaddq %ymm0,%ymm7,%ymm0
1654 vmovdqa 0(%rsp),%ymm7
1655 vpaddq %ymm1,%ymm8,%ymm1
1656 vmovdqa 32(%rsp),%ymm8
1657 vpaddq %ymm3,%ymm10,%ymm3
1658 vmovdqa 96(%rsp),%ymm9
1659 vpaddq %ymm4,%ymm6,%ymm4
1660 vmovdqa 48(%rax),%ymm10
1661 vmovdqa 112(%rax),%ymm5
1678 vpmuludq %ymm2,%ymm7,%ymm13
1679 vpmuludq %ymm2,%ymm8,%ymm14
1680 vpmuludq %ymm2,%ymm9,%ymm15
1681 vpmuludq %ymm2,%ymm10,%ymm11
1682 vpmuludq %ymm2,%ymm5,%ymm12
1684 vpmuludq %ymm0,%ymm8,%ymm6
1685 vpmuludq %ymm1,%ymm8,%ymm2
1686 vpaddq %ymm6,%ymm12,%ymm12
1687 vpaddq %ymm2,%ymm13,%ymm13
1688 vpmuludq %ymm3,%ymm8,%ymm6
1689 vpmuludq 64(%rsp),%ymm4,%ymm2
1690 vpaddq %ymm6,%ymm15,%ymm15
1691 vpaddq %ymm2,%ymm11,%ymm11
1692 vmovdqa -16(%rax),%ymm8
1694 vpmuludq %ymm0,%ymm7,%ymm6
1695 vpmuludq %ymm1,%ymm7,%ymm2
1696 vpaddq %ymm6,%ymm11,%ymm11
1697 vpaddq %ymm2,%ymm12,%ymm12
1698 vpmuludq %ymm3,%ymm7,%ymm6
1699 vpmuludq %ymm4,%ymm7,%ymm2
1700 vmovdqu 0(%rsi),%xmm7
1701 vpaddq %ymm6,%ymm14,%ymm14
1702 vpaddq %ymm2,%ymm15,%ymm15
1703 vinserti128 $1,32(%rsi),%ymm7,%ymm7
1705 vpmuludq %ymm3,%ymm8,%ymm6
1706 vpmuludq %ymm4,%ymm8,%ymm2
1707 vmovdqu 16(%rsi),%xmm8
1708 vpaddq %ymm6,%ymm11,%ymm11
1709 vpaddq %ymm2,%ymm12,%ymm12
1710 vmovdqa 16(%rax),%ymm2
1711 vpmuludq %ymm1,%ymm9,%ymm6
1712 vpmuludq %ymm0,%ymm9,%ymm9
1713 vpaddq %ymm6,%ymm14,%ymm14
1714 vpaddq %ymm9,%ymm13,%ymm13
1715 vinserti128 $1,48(%rsi),%ymm8,%ymm8
1718 vpmuludq %ymm1,%ymm2,%ymm6
1719 vpmuludq %ymm0,%ymm2,%ymm2
1720 vpsrldq $6,%ymm7,%ymm9
1721 vpaddq %ymm6,%ymm15,%ymm15
1722 vpaddq %ymm2,%ymm14,%ymm14
1723 vpmuludq %ymm3,%ymm10,%ymm6
1724 vpmuludq %ymm4,%ymm10,%ymm2
1725 vpsrldq $6,%ymm8,%ymm10
1726 vpaddq %ymm6,%ymm12,%ymm12
1727 vpaddq %ymm2,%ymm13,%ymm13
1728 vpunpckhqdq %ymm8,%ymm7,%ymm6
1730 vpmuludq %ymm3,%ymm5,%ymm3
1731 vpmuludq %ymm4,%ymm5,%ymm4
1732 vpunpcklqdq %ymm8,%ymm7,%ymm7
1733 vpaddq %ymm3,%ymm13,%ymm2
1734 vpaddq %ymm4,%ymm14,%ymm3
1735 vpunpcklqdq %ymm10,%ymm9,%ymm10
1736 vpmuludq 80(%rax),%ymm0,%ymm4
1737 vpmuludq %ymm1,%ymm5,%ymm0
1738 vmovdqa 64(%rcx),%ymm5
1739 vpaddq %ymm4,%ymm15,%ymm4
1740 vpaddq %ymm0,%ymm11,%ymm0
1745 vpsrlq $26,%ymm3,%ymm14
1746 vpand %ymm5,%ymm3,%ymm3
1747 vpaddq %ymm14,%ymm4,%ymm4
1749 vpsrlq $26,%ymm0,%ymm11
1750 vpand %ymm5,%ymm0,%ymm0
1751 vpaddq %ymm11,%ymm12,%ymm1
1753 vpsrlq $26,%ymm4,%ymm15
1754 vpand %ymm5,%ymm4,%ymm4
1756 vpsrlq $4,%ymm10,%ymm9
1758 vpsrlq $26,%ymm1,%ymm12
1759 vpand %ymm5,%ymm1,%ymm1
1760 vpaddq %ymm12,%ymm2,%ymm2
1762 vpaddq %ymm15,%ymm0,%ymm0
1763 vpsllq $2,%ymm15,%ymm15
1764 vpaddq %ymm15,%ymm0,%ymm0
1766 vpand %ymm5,%ymm9,%ymm9
1767 vpsrlq $26,%ymm7,%ymm8
1769 vpsrlq $26,%ymm2,%ymm13
1770 vpand %ymm5,%ymm2,%ymm2
1771 vpaddq %ymm13,%ymm3,%ymm3
1773 vpaddq %ymm9,%ymm2,%ymm2
1774 vpsrlq $30,%ymm10,%ymm10
1776 vpsrlq $26,%ymm0,%ymm11
1777 vpand %ymm5,%ymm0,%ymm0
1778 vpaddq %ymm11,%ymm1,%ymm1
1780 vpsrlq $40,%ymm6,%ymm6
1782 vpsrlq $26,%ymm3,%ymm14
1783 vpand %ymm5,%ymm3,%ymm3
1784 vpaddq %ymm14,%ymm4,%ymm4
1786 vpand %ymm5,%ymm7,%ymm7
1787 vpand %ymm5,%ymm8,%ymm8
1788 vpand %ymm5,%ymm10,%ymm10
1789 vpor 32(%rcx),%ymm6,%ymm6
1803 vpaddq %ymm0,%ymm7,%ymm0
1804 vmovdqu 4(%rsp),%ymm7
1805 vpaddq %ymm1,%ymm8,%ymm1
1806 vmovdqu 36(%rsp),%ymm8
1807 vpaddq %ymm3,%ymm10,%ymm3
1808 vmovdqu 100(%rsp),%ymm9
1809 vpaddq %ymm4,%ymm6,%ymm4
1810 vmovdqu 52(%rax),%ymm10
1811 vmovdqu 116(%rax),%ymm5
1813 vpmuludq %ymm2,%ymm7,%ymm13
1814 vpmuludq %ymm2,%ymm8,%ymm14
1815 vpmuludq %ymm2,%ymm9,%ymm15
1816 vpmuludq %ymm2,%ymm10,%ymm11
1817 vpmuludq %ymm2,%ymm5,%ymm12
1819 vpmuludq %ymm0,%ymm8,%ymm6
1820 vpmuludq %ymm1,%ymm8,%ymm2
1821 vpaddq %ymm6,%ymm12,%ymm12
1822 vpaddq %ymm2,%ymm13,%ymm13
1823 vpmuludq %ymm3,%ymm8,%ymm6
1824 vpmuludq 68(%rsp),%ymm4,%ymm2
1825 vpaddq %ymm6,%ymm15,%ymm15
1826 vpaddq %ymm2,%ymm11,%ymm11
1828 vpmuludq %ymm0,%ymm7,%ymm6
1829 vpmuludq %ymm1,%ymm7,%ymm2
1830 vpaddq %ymm6,%ymm11,%ymm11
1831 vmovdqu -12(%rax),%ymm8
1832 vpaddq %ymm2,%ymm12,%ymm12
1833 vpmuludq %ymm3,%ymm7,%ymm6
1834 vpmuludq %ymm4,%ymm7,%ymm2
1835 vpaddq %ymm6,%ymm14,%ymm14
1836 vpaddq %ymm2,%ymm15,%ymm15
1838 vpmuludq %ymm3,%ymm8,%ymm6
1839 vpmuludq %ymm4,%ymm8,%ymm2
1840 vpaddq %ymm6,%ymm11,%ymm11
1841 vpaddq %ymm2,%ymm12,%ymm12
1842 vmovdqu 20(%rax),%ymm2
1843 vpmuludq %ymm1,%ymm9,%ymm6
1844 vpmuludq %ymm0,%ymm9,%ymm9
1845 vpaddq %ymm6,%ymm14,%ymm14
1846 vpaddq %ymm9,%ymm13,%ymm13
1848 vpmuludq %ymm1,%ymm2,%ymm6
1849 vpmuludq %ymm0,%ymm2,%ymm2
1850 vpaddq %ymm6,%ymm15,%ymm15
1851 vpaddq %ymm2,%ymm14,%ymm14
1852 vpmuludq %ymm3,%ymm10,%ymm6
1853 vpmuludq %ymm4,%ymm10,%ymm2
1854 vpaddq %ymm6,%ymm12,%ymm12
1855 vpaddq %ymm2,%ymm13,%ymm13
1857 vpmuludq %ymm3,%ymm5,%ymm3
1858 vpmuludq %ymm4,%ymm5,%ymm4
1859 vpaddq %ymm3,%ymm13,%ymm2
1860 vpaddq %ymm4,%ymm14,%ymm3
1861 vpmuludq 84(%rax),%ymm0,%ymm4
1862 vpmuludq %ymm1,%ymm5,%ymm0
1863 vmovdqa 64(%rcx),%ymm5
1864 vpaddq %ymm4,%ymm15,%ymm4
1865 vpaddq %ymm0,%ymm11,%ymm0
1870 vpsrldq $8,%ymm12,%ymm8
1871 vpsrldq $8,%ymm2,%ymm9
1872 vpsrldq $8,%ymm3,%ymm10
1873 vpsrldq $8,%ymm4,%ymm6
1874 vpsrldq $8,%ymm0,%ymm7
1875 vpaddq %ymm8,%ymm12,%ymm12
1876 vpaddq %ymm9,%ymm2,%ymm2
1877 vpaddq %ymm10,%ymm3,%ymm3
1878 vpaddq %ymm6,%ymm4,%ymm4
1879 vpaddq %ymm7,%ymm0,%ymm0
1881 vpermq $0x2,%ymm3,%ymm10
1882 vpermq $0x2,%ymm4,%ymm6
1883 vpermq $0x2,%ymm0,%ymm7
1884 vpermq $0x2,%ymm12,%ymm8
1885 vpermq $0x2,%ymm2,%ymm9
1886 vpaddq %ymm10,%ymm3,%ymm3
1887 vpaddq %ymm6,%ymm4,%ymm4
1888 vpaddq %ymm7,%ymm0,%ymm0
1889 vpaddq %ymm8,%ymm12,%ymm12
1890 vpaddq %ymm9,%ymm2,%ymm2
1895 vpsrlq $26,%ymm3,%ymm14
1896 vpand %ymm5,%ymm3,%ymm3
1897 vpaddq %ymm14,%ymm4,%ymm4
1899 vpsrlq $26,%ymm0,%ymm11
1900 vpand %ymm5,%ymm0,%ymm0
1901 vpaddq %ymm11,%ymm12,%ymm1
1903 vpsrlq $26,%ymm4,%ymm15
1904 vpand %ymm5,%ymm4,%ymm4
1906 vpsrlq $26,%ymm1,%ymm12
1907 vpand %ymm5,%ymm1,%ymm1
1908 vpaddq %ymm12,%ymm2,%ymm2
1910 vpaddq %ymm15,%ymm0,%ymm0
1911 vpsllq $2,%ymm15,%ymm15
1912 vpaddq %ymm15,%ymm0,%ymm0
1914 vpsrlq $26,%ymm2,%ymm13
1915 vpand %ymm5,%ymm2,%ymm2
1916 vpaddq %ymm13,%ymm3,%ymm3
1918 vpsrlq $26,%ymm0,%ymm11
1919 vpand %ymm5,%ymm0,%ymm0
1920 vpaddq %ymm11,%ymm1,%ymm1
1922 vpsrlq $26,%ymm3,%ymm14
1923 vpand %ymm5,%ymm3,%ymm3
1924 vpaddq %ymm14,%ymm4,%ymm4
1926 vmovd %xmm0,-112(%rdi)
1927 vmovd %xmm1,-108(%rdi)
1928 vmovd %xmm2,-104(%rdi)
1929 vmovd %xmm3,-100(%rdi)
1930 vmovd %xmm4,-96(%rdi)
1936 .size poly1305_blocks_avx2,.-poly1305_blocks_avx2
1940 .long 0x0ffffff,0,0x0ffffff,0,0x0ffffff,0,0x0ffffff,0
1942 .long 16777216,0,16777216,0,16777216,0,16777216,0
1944 .long 0x3ffffff,0,0x3ffffff,0,0x3ffffff,0,0x3ffffff,0
1946 .long 2,2,2,3,2,0,2,1
1948 .long 0,0,0,1, 0,2,0,3, 0,4,0,5, 0,6,0,7
1951 .long 0,1,1,2,2,3,7,7
1955 .quad 0xfffffffffff,0xfffffffffff,0x3ffffffffff,0xffffffffffffffff
1963 .quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff
1964 .quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff
1966 .quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff
1967 .quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff
1968 .byte 80,111,108,121,49,51,48,53,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1970 .globl xor128_encrypt_n_pad
1971 .type xor128_encrypt_n_pad,@function
1973 xor128_encrypt_n_pad:
1982 movdqu (%rsi,%rdx,1),%xmm0
1984 movdqu %xmm0,(%rdi,%rdx,1)
1998 movb (%rsi,%rdx,1),%al
2000 movb %al,(%rdi,%rdx,1)
2017 .size xor128_encrypt_n_pad,.-xor128_encrypt_n_pad
2019 .globl xor128_decrypt_n_pad
2020 .type xor128_decrypt_n_pad,@function
2022 xor128_decrypt_n_pad:
2031 movdqu (%rsi,%rdx,1),%xmm0
2034 movdqu %xmm1,(%rdi,%rdx,1)
2050 movb (%rsi,%rdx,1),%r11b
2053 movb %al,(%rdi,%rdx,1)
2070 .size xor128_decrypt_n_pad,.-xor128_decrypt_n_pad