; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s ; Verify that instcombine is able to fold identity shuffles. define <16 x i8> @identity_test(<16 x i8> %InVec) { ; CHECK-LABEL: @identity_test( ; CHECK-NEXT: ret <16 x i8> %InVec ; %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) ret <16 x i8> %1 } define <32 x i8> @identity_test_avx2(<32 x i8> %InVec) { ; CHECK-LABEL: @identity_test_avx2( ; CHECK-NEXT: ret <32 x i8> %InVec ; %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) ret <32 x i8> %1 } define <64 x i8> @identity_test_avx512(<64 x i8> %InVec) { ; CHECK-LABEL: @identity_test_avx512( ; CHECK-NEXT: ret <64 x i8> %InVec ; %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) ret <64 x i8> %1 } ; Verify that instcombine is able to fold byte shuffles with zero masks. define <16 x i8> @fold_to_zero_vector(<16 x i8> %InVec) { ; CHECK-LABEL: @fold_to_zero_vector( ; CHECK-NEXT: ret <16 x i8> zeroinitializer ; %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) ret <16 x i8> %1 } define <32 x i8> @fold_to_zero_vector_avx2(<32 x i8> %InVec) { ; CHECK-LABEL: @fold_to_zero_vector_avx2( ; CHECK-NEXT: ret <32 x i8> zeroinitializer ; %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) ret <32 x i8> %1 } define <64 x i8> @fold_to_zero_vector_avx512(<64 x i8> %InVec) { ; CHECK-LABEL: @fold_to_zero_vector_avx512( ; CHECK-NEXT: ret <64 x i8> zeroinitializer ; %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) ret <64 x i8> %1 } ; Instcombine should be able to fold the following byte shuffle to a builtin shufflevector ; with a shuffle mask of all zeroes. define <16 x i8> @splat_test(<16 x i8> %InVec) { ; CHECK-LABEL: @splat_test( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> undef, <16 x i32> zeroinitializer ; CHECK-NEXT: ret <16 x i8> [[TMP1]] ; %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> zeroinitializer) ret <16 x i8> %1 } ; In the test case below, elements in the low 128-bit lane of the result ; vector are equal to the lower byte of %InVec (shuffle index 0). ; Elements in the high 128-bit lane of the result vector are equal to ; the lower byte in the high 128-bit lane of %InVec (shuffle index 16). define <32 x i8> @splat_test_avx2(<32 x i8> %InVec) { ; CHECK-LABEL: @splat_test_avx2( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> undef, <32 x i32> ; CHECK-NEXT: ret <32 x i8> [[TMP1]] ; %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> zeroinitializer) ret <32 x i8> %1 } define <64 x i8> @splat_test_avx512(<64 x i8> %InVec) { ; CHECK-LABEL: @splat_test_avx512( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> undef, <64 x i32> ; CHECK-NEXT: ret <64 x i8> [[TMP1]] ; %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> zeroinitializer) ret <64 x i8> %1 } ; Each of the byte shuffles in the following tests is equivalent to a blend between ; vector %InVec and a vector of all zeroes. define <16 x i8> @blend1(<16 x i8> %InVec) { ; CHECK-LABEL: @blend1( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> , <16 x i32> ; CHECK-NEXT: ret <16 x i8> [[TMP1]] ; %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) ret <16 x i8> %1 } define <16 x i8> @blend2(<16 x i8> %InVec) { ; CHECK-LABEL: @blend2( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> , <16 x i32> ; CHECK-NEXT: ret <16 x i8> [[TMP1]] ; %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) ret <16 x i8> %1 } define <16 x i8> @blend3(<16 x i8> %InVec) { ; CHECK-LABEL: @blend3( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> , <16 x i32> ; CHECK-NEXT: ret <16 x i8> [[TMP1]] ; %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) ret <16 x i8> %1 } define <16 x i8> @blend4(<16 x i8> %InVec) { ; CHECK-LABEL: @blend4( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> , <16 x i32> ; CHECK-NEXT: ret <16 x i8> [[TMP1]] ; %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) ret <16 x i8> %1 } define <16 x i8> @blend5(<16 x i8> %InVec) { ; CHECK-LABEL: @blend5( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> , <16 x i32> ; CHECK-NEXT: ret <16 x i8> [[TMP1]] ; %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) ret <16 x i8> %1 } define <16 x i8> @blend6(<16 x i8> %InVec) { ; CHECK-LABEL: @blend6( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> , <16 x i32> ; CHECK-NEXT: ret <16 x i8> [[TMP1]] ; %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) ret <16 x i8> %1 } define <32 x i8> @blend1_avx2(<32 x i8> %InVec) { ; CHECK-LABEL: @blend1_avx2( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> , <32 x i32> ; CHECK-NEXT: ret <32 x i8> [[TMP1]] ; %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) ret <32 x i8> %1 } define <32 x i8> @blend2_avx2(<32 x i8> %InVec) { ; CHECK-LABEL: @blend2_avx2( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> , <32 x i32> ; CHECK-NEXT: ret <32 x i8> [[TMP1]] ; %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) ret <32 x i8> %1 } define <32 x i8> @blend3_avx2(<32 x i8> %InVec) { ; CHECK-LABEL: @blend3_avx2( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> , <32 x i32> ; CHECK-NEXT: ret <32 x i8> [[TMP1]] ; %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) ret <32 x i8> %1 } define <32 x i8> @blend4_avx2(<32 x i8> %InVec) { ; CHECK-LABEL: @blend4_avx2( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> , <32 x i32> ; CHECK-NEXT: ret <32 x i8> [[TMP1]] ; %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) ret <32 x i8> %1 } define <32 x i8> @blend5_avx2(<32 x i8> %InVec) { ; CHECK-LABEL: @blend5_avx2( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> , <32 x i32> ; CHECK-NEXT: ret <32 x i8> [[TMP1]] ; %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) ret <32 x i8> %1 } define <32 x i8> @blend6_avx2(<32 x i8> %InVec) { ; CHECK-LABEL: @blend6_avx2( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> , <32 x i32> ; CHECK-NEXT: ret <32 x i8> [[TMP1]] ; %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) ret <32 x i8> %1 } define <64 x i8> @blend1_avx512(<64 x i8> %InVec) { ; CHECK-LABEL: @blend1_avx512( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> , <64 x i32> ; CHECK-NEXT: ret <64 x i8> [[TMP1]] ; %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) ret <64 x i8> %1 } define <64 x i8> @blend2_avx512(<64 x i8> %InVec) { ; CHECK-LABEL: @blend2_avx512( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> , <64 x i32> ; CHECK-NEXT: ret <64 x i8> [[TMP1]] ; %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) ret <64 x i8> %1 } define <64 x i8> @blend3_avx512(<64 x i8> %InVec) { ; CHECK-LABEL: @blend3_avx512( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> , <64 x i32> ; CHECK-NEXT: ret <64 x i8> [[TMP1]] ; %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) ret <64 x i8> %1 } define <64 x i8> @blend4_avx512(<64 x i8> %InVec) { ; CHECK-LABEL: @blend4_avx512( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> , <64 x i32> ; CHECK-NEXT: ret <64 x i8> [[TMP1]] ; %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) ret <64 x i8> %1 } define <64 x i8> @blend5_avx512(<64 x i8> %InVec) { ; CHECK-LABEL: @blend5_avx512( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> , <64 x i32> ; CHECK-NEXT: ret <64 x i8> [[TMP1]] ; %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) ret <64 x i8> %1 } define <64 x i8> @blend6_avx512(<64 x i8> %InVec) { ; CHECK-LABEL: @blend6_avx512( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> , <64 x i32> ; CHECK-NEXT: ret <64 x i8> [[TMP1]] ; %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) ret <64 x i8> %1 } ; movq idiom. define <16 x i8> @movq_idiom(<16 x i8> %InVec) { ; CHECK-LABEL: @movq_idiom( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> , <16 x i32> ; CHECK-NEXT: ret <16 x i8> [[TMP1]] ; %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) ret <16 x i8> %1 } define <32 x i8> @movq_idiom_avx2(<32 x i8> %InVec) { ; CHECK-LABEL: @movq_idiom_avx2( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> , <32 x i32> ; CHECK-NEXT: ret <32 x i8> [[TMP1]] ; %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) ret <32 x i8> %1 } define <64 x i8> @movq_idiom_avx512(<64 x i8> %InVec) { ; CHECK-LABEL: @movq_idiom_avx512( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> , <64 x i32> ; CHECK-NEXT: ret <64 x i8> [[TMP1]] ; %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) ret <64 x i8> %1 } ; Vector permutations using byte shuffles. define <16 x i8> @permute1(<16 x i8> %InVec) { ; CHECK-LABEL: @permute1( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> undef, <16 x i32> ; CHECK-NEXT: ret <16 x i8> [[TMP1]] ; %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) ret <16 x i8> %1 } define <16 x i8> @permute2(<16 x i8> %InVec) { ; CHECK-LABEL: @permute2( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> undef, <16 x i32> ; CHECK-NEXT: ret <16 x i8> [[TMP1]] ; %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) ret <16 x i8> %1 } define <32 x i8> @permute1_avx2(<32 x i8> %InVec) { ; CHECK-LABEL: @permute1_avx2( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> undef, <32 x i32> ; CHECK-NEXT: ret <32 x i8> [[TMP1]] ; %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) ret <32 x i8> %1 } define <32 x i8> @permute2_avx2(<32 x i8> %InVec) { ; CHECK-LABEL: @permute2_avx2( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> undef, <32 x i32> ; CHECK-NEXT: ret <32 x i8> [[TMP1]] ; %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) ret <32 x i8> %1 } define <64 x i8> @permute1_avx512(<64 x i8> %InVec) { ; CHECK-LABEL: @permute1_avx512( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> undef, <64 x i32> ; CHECK-NEXT: ret <64 x i8> [[TMP1]] ; %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) ret <64 x i8> %1 } define <64 x i8> @permute2_avx512(<64 x i8> %InVec) { ; CHECK-LABEL: @permute2_avx512( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> undef, <64 x i32> ; CHECK-NEXT: ret <64 x i8> [[TMP1]] ; %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) ret <64 x i8> %1 } ; Test that instcombine correctly folds a pshufb with values that ; are not -128 and that are not encoded in four bits. define <16 x i8> @identity_test2_2(<16 x i8> %InVec) { ; CHECK-LABEL: @identity_test2_2( ; CHECK-NEXT: ret <16 x i8> %InVec ; %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) ret <16 x i8> %1 } define <32 x i8> @identity_test_avx2_2(<32 x i8> %InVec) { ; CHECK-LABEL: @identity_test_avx2_2( ; CHECK-NEXT: ret <32 x i8> %InVec ; %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) ret <32 x i8> %1 } define <64 x i8> @identity_test_avx512_2(<64 x i8> %InVec) { ; CHECK-LABEL: @identity_test_avx512_2( ; CHECK-NEXT: ret <64 x i8> %InVec ; %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) ret <64 x i8> %1 } define <16 x i8> @fold_to_zero_vector_2(<16 x i8> %InVec) { ; CHECK-LABEL: @fold_to_zero_vector_2( ; CHECK-NEXT: ret <16 x i8> zeroinitializer ; %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) ret <16 x i8> %1 } define <32 x i8> @fold_to_zero_vector_avx2_2(<32 x i8> %InVec) { ; CHECK-LABEL: @fold_to_zero_vector_avx2_2( ; CHECK-NEXT: ret <32 x i8> zeroinitializer ; %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) ret <32 x i8> %1 } define <64 x i8> @fold_to_zero_vector_avx512_2(<64 x i8> %InVec) { ; CHECK-LABEL: @fold_to_zero_vector_avx512_2( ; CHECK-NEXT: ret <64 x i8> zeroinitializer ; %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) ret <64 x i8> %1 } define <16 x i8> @permute3(<16 x i8> %InVec) { ; CHECK-LABEL: @permute3( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> undef, <16 x i32> ; CHECK-NEXT: ret <16 x i8> [[TMP1]] ; %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) ret <16 x i8> %1 } define <32 x i8> @permute3_avx2(<32 x i8> %InVec) { ; CHECK-LABEL: @permute3_avx2( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> undef, <32 x i32> ; CHECK-NEXT: ret <32 x i8> [[TMP1]] ; %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) ret <32 x i8> %1 } define <64 x i8> @permute3_avx512(<64 x i8> %InVec) { ; CHECK-LABEL: @permute3_avx512( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> undef, <64 x i32> ; CHECK-NEXT: ret <64 x i8> [[TMP1]] ; %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) ret <64 x i8> %1 } ; FIXME: Verify that instcombine is able to fold constant byte shuffles with undef mask elements. define <16 x i8> @fold_with_undef_elts(<16 x i8> %InVec) { ; CHECK-LABEL: @fold_with_undef_elts( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> , <16 x i32> ; CHECK-NEXT: ret <16 x i8> [[TMP1]] ; %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) ret <16 x i8> %1 } define <32 x i8> @fold_with_undef_elts_avx2(<32 x i8> %InVec) { ; CHECK-LABEL: @fold_with_undef_elts_avx2( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> , <32 x i32> ; CHECK-NEXT: ret <32 x i8> [[TMP1]] ; %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) ret <32 x i8> %1 } define <64 x i8> @fold_with_undef_elts_avx512(<64 x i8> %InVec) { ; CHECK-LABEL: @fold_with_undef_elts_avx512( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> , <64 x i32> ; CHECK-NEXT: ret <64 x i8> [[TMP1]] ; %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) ret <64 x i8> %1 } define <16 x i8> @fold_with_allundef_elts(<16 x i8> %InVec) { ; CHECK-LABEL: @fold_with_allundef_elts( ; CHECK-NEXT: ret <16 x i8> undef ; %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> undef) ret <16 x i8> %1 } define <32 x i8> @fold_with_allundef_elts_avx2(<32 x i8> %InVec) { ; CHECK-LABEL: @fold_with_allundef_elts_avx2( ; CHECK-NEXT: ret <32 x i8> undef ; %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> undef) ret <32 x i8> %1 } define <64 x i8> @fold_with_allundef_elts_avx512(<64 x i8> %InVec) { ; CHECK-LABEL: @fold_with_allundef_elts_avx512( ; CHECK-NEXT: ret <64 x i8> undef ; %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> undef) ret <64 x i8> %1 } ; Demanded elts tests. define <16 x i8> @demanded_elts_insertion(<16 x i8> %InVec, <16 x i8> %BaseMask, i8 %M0, i8 %M15) { ; CHECK-LABEL: @demanded_elts_insertion( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> %BaseMask) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <16 x i32> ; CHECK-NEXT: ret <16 x i8> [[TMP2]] ; %1 = insertelement <16 x i8> %BaseMask, i8 %M0, i32 0 %2 = insertelement <16 x i8> %1, i8 %M15, i32 15 %3 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> %2) %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> ret <16 x i8> %4 } define <32 x i8> @demanded_elts_insertion_avx2(<32 x i8> %InVec, <32 x i8> %BaseMask, i8 %M0, i8 %M22) { ; CHECK-LABEL: @demanded_elts_insertion_avx2( ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <32 x i8> %BaseMask, i8 %M0, i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> [[TMP1]]) ; CHECK-NEXT: ret <32 x i8> [[TMP2]] ; %1 = insertelement <32 x i8> %BaseMask, i8 %M0, i32 0 %2 = insertelement <32 x i8> %1, i8 %M22, i32 22 %3 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> %2) %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> ret <32 x i8> %4 } define <64 x i8> @demanded_elts_insertion_avx512(<64 x i8> %InVec, <64 x i8> %BaseMask, i8 %M0, i8 %M30) { ; CHECK-LABEL: @demanded_elts_insertion_avx512( ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <64 x i8> undef, i8 %M0, i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <64 x i8> [[TMP2]], <64 x i8> undef, <64 x i32> zeroinitializer ; CHECK-NEXT: ret <64 x i8> [[TMP3]] ; %1 = insertelement <64 x i8> %BaseMask, i8 %M0, i32 0 %2 = insertelement <64 x i8> %1, i8 %M30, i32 30 %3 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> %2) %4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> zeroinitializer ret <64 x i8> %4 } declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) declare <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8>, <64 x i8>)