1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512bw -mattr=+avx512vl -mattr=+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
3 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f -mattr=+fma | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
5 ; This test checks combinations of FNEG and FMA intrinsics on AVX-512 target
8 define <16 x float> @test1(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
10 ; CHECK: # %bb.0: # %entry
11 ; CHECK-NEXT: vfmsub213ps %zmm2, %zmm1, %zmm0
14 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
15 %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %sub.i, i16 -1, i32 4) #2
19 declare <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
20 declare <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
21 declare <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
24 define <16 x float> @test2(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
26 ; CHECK: # %bb.0: # %entry
27 ; CHECK-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0
30 %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 -1, i32 4) #2
31 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0
32 ret <16 x float> %sub.i
35 define <16 x float> @test3(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
37 ; CHECK: # %bb.0: # %entry
38 ; CHECK-NEXT: vfmsub213ps %zmm2, %zmm1, %zmm0
41 %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 -1, i32 4) #2
42 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0
43 ret <16 x float> %sub.i
46 define <16 x float> @test4(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
48 ; CHECK: # %bb.0: # %entry
49 ; CHECK-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm0
52 %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 -1, i32 4) #2
53 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0
54 ret <16 x float> %sub.i
57 define <16 x float> @test5(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
59 ; CHECK: # %bb.0: # %entry
60 ; CHECK-NEXT: vfmsub213ps {ru-sae}, %zmm2, %zmm1, %zmm0
63 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
64 %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %sub.i, i16 -1, i32 2) #2
68 define <16 x float> @test6(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
70 ; CHECK: # %bb.0: # %entry
71 ; CHECK-NEXT: vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0
74 %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 -1, i32 2) #2
75 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0
76 ret <16 x float> %sub.i
80 define <8 x float> @test7(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
82 ; CHECK: # %bb.0: # %entry
83 ; CHECK-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
86 %0 = tail call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c) #2
87 %sub.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0
88 ret <8 x float> %sub.i
91 define <8 x float> @test8(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
93 ; SKX: # %bb.0: # %entry
94 ; SKX-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm2, %ymm2
95 ; SKX-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0
99 ; KNL: # %bb.0: # %entry
100 ; KNL-NEXT: vbroadcastss {{.*#+}} ymm3 = [-0,-0,-0,-0,-0,-0,-0,-0]
101 ; KNL-NEXT: vxorps %ymm3, %ymm2, %ymm2
102 ; KNL-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0
105 %sub.c = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
106 %0 = tail call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %sub.c) #2
110 declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
113 define <8 x double> @test9(<8 x double> %a, <8 x double> %b, <8 x double> %c) {
114 ; CHECK-LABEL: test9:
115 ; CHECK: # %bb.0: # %entry
116 ; CHECK-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm0
119 %0 = tail call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 -1, i32 4) #2
120 %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %0
121 ret <8 x double> %sub.i
124 declare <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8, i32)
126 define <2 x double> @test10(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
127 ; CHECK-LABEL: test10:
128 ; CHECK: # %bb.0: # %entry
129 ; CHECK-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0
130 ; CHECK-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0
133 %0 = tail call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 -1, i32 4) #2
134 %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %0
135 ret <2 x double> %sub.i
138 declare <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8, i32)
140 define <4 x float> @test11(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 {
142 ; SKX: # %bb.0: # %entry
143 ; SKX-NEXT: vxorps {{.*}}(%rip){1to4}, %xmm2, %xmm2
144 ; SKX-NEXT: kmovd %edi, %k1
145 ; SKX-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm2 {%k1}
146 ; SKX-NEXT: vmovaps %xmm2, %xmm0
150 ; KNL: # %bb.0: # %entry
151 ; KNL-NEXT: vbroadcastss {{.*#+}} xmm3 = [-0,-0,-0,-0]
152 ; KNL-NEXT: vxorps %xmm3, %xmm2, %xmm2
153 ; KNL-NEXT: kmovw %edi, %k1
154 ; KNL-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm2 {%k1}
155 ; KNL-NEXT: vmovaps %xmm2, %xmm0
158 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
159 %0 = tail call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %sub.i, i8 %mask, i32 4) #10
163 declare <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32)
165 define <4 x float> @test11b(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 {
166 ; SKX-LABEL: test11b:
167 ; SKX: # %bb.0: # %entry
168 ; SKX-NEXT: kmovd %edi, %k1
169 ; SKX-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1}
172 ; KNL-LABEL: test11b:
173 ; KNL: # %bb.0: # %entry
174 ; KNL-NEXT: kmovw %edi, %k1
175 ; KNL-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1}
178 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
179 %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %sub.i, i8 %mask, i32 4) #10
183 declare <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32)
185 define <8 x double> @test12(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
187 ; SKX: # %bb.0: # %entry
188 ; SKX-NEXT: kmovd %edi, %k1
189 ; SKX-NEXT: vfmadd132pd %zmm1, %zmm2, %zmm0 {%k1}
190 ; SKX-NEXT: vxorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
194 ; KNL: # %bb.0: # %entry
195 ; KNL-NEXT: kmovw %edi, %k1
196 ; KNL-NEXT: vfmadd132pd %zmm1, %zmm2, %zmm0 {%k1}
197 ; KNL-NEXT: vpxorq {{.*}}(%rip){1to8}, %zmm0, %zmm0
200 %0 = tail call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask, i32 4) #2
201 %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %0
202 ret <8 x double> %sub.i
205 define <2 x double> @test13(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
207 ; SKX: # %bb.0: # %entry
208 ; SKX-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0
209 ; SKX-NEXT: kmovd %edi, %k1
210 ; SKX-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1}
214 ; KNL: # %bb.0: # %entry
215 ; KNL-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0
216 ; KNL-NEXT: kmovw %edi, %k1
217 ; KNL-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1}
221 %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a
222 %0 = tail call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %sub.i, <2 x double> %b, <2 x double> %c, i8 %mask, i32 4)
226 define <16 x float> @test14(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
228 ; SKX: # %bb.0: # %entry
229 ; SKX-NEXT: kmovd %edi, %k1
230 ; SKX-NEXT: vfnmsub132ps {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1}
231 ; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0
235 ; KNL: # %bb.0: # %entry
236 ; KNL-NEXT: kmovw %edi, %k1
237 ; KNL-NEXT: vfnmsub132ps {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1}
238 ; KNL-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0
241 %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 2) #2
242 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0
243 ret <16 x float> %sub.i
246 define <16 x float> @test15(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
248 ; SKX: # %bb.0: # %entry
249 ; SKX-NEXT: kmovd %edi, %k1
250 ; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm3
251 ; SKX-NEXT: vfnmadd213ps {ru-sae}, %zmm2, %zmm0, %zmm1
252 ; SKX-NEXT: vmovaps %zmm1, %zmm3 {%k1}
253 ; SKX-NEXT: vfnmadd132ps {rd-sae}, %zmm0, %zmm2, %zmm3 {%k1}
254 ; SKX-NEXT: vmovaps %zmm3, %zmm0
258 ; KNL: # %bb.0: # %entry
259 ; KNL-NEXT: kmovw %edi, %k1
260 ; KNL-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm3
261 ; KNL-NEXT: vfnmadd213ps {ru-sae}, %zmm2, %zmm0, %zmm1
262 ; KNL-NEXT: vmovaps %zmm1, %zmm3 {%k1}
263 ; KNL-NEXT: vfnmadd132ps {rd-sae}, %zmm0, %zmm2, %zmm3 {%k1}
264 ; KNL-NEXT: vmovaps %zmm3, %zmm0
267 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
268 %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %sub.i, <16 x float> %b, <16 x float> %c, i16 %mask, i32 2)
269 %1 = tail call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %0, <16 x float> %sub.i, <16 x float> %c, i16 %mask, i32 1)
273 define <16 x float> @test16(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
276 ; SKX-NEXT: kmovd %edi, %k1
277 ; SKX-NEXT: vfmsubadd132ps {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1}
282 ; KNL-NEXT: kmovw %edi, %k1
283 ; KNL-NEXT: vfmsubadd132ps {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1}
285 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
286 %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %sub.i, i16 %mask, i32 1)
287 ret <16 x float> %res
289 declare <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
291 define <8 x double> @test17(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
294 ; SKX-NEXT: kmovd %edi, %k1
295 ; SKX-NEXT: vfmsubadd132pd %zmm1, %zmm2, %zmm0 {%k1}
300 ; KNL-NEXT: kmovw %edi, %k1
301 ; KNL-NEXT: vfmsubadd132pd %zmm1, %zmm2, %zmm0 {%k1}
303 %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %c
304 %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %sub.i, i8 %mask, i32 4)
305 ret <8 x double> %res
307 declare <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
309 define <4 x float> @test18(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 {
311 ; SKX: # %bb.0: # %entry
312 ; SKX-NEXT: kmovd %edi, %k1
313 ; SKX-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1}
317 ; KNL: # %bb.0: # %entry
318 ; KNL-NEXT: kmovw %edi, %k1
319 ; KNL-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1}
322 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
323 %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %c, i8 %mask, i32 4) #10
327 define <4 x float> @test19(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 {
329 ; SKX: # %bb.0: # %entry
330 ; SKX-NEXT: kmovd %edi, %k1
331 ; SKX-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1}
335 ; KNL: # %bb.0: # %entry
336 ; KNL-NEXT: kmovw %edi, %k1
337 ; KNL-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1}
340 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
341 %sub.i.2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
342 %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %sub.i.2, i8 %mask, i32 4) #10
346 define <4 x float> @test20(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 {
348 ; SKX: # %bb.0: # %entry
349 ; SKX-NEXT: kmovd %edi, %k1
350 ; SKX-NEXT: vfnmadd231ss %xmm1, %xmm0, %xmm2 {%k1}
351 ; SKX-NEXT: vmovaps %xmm2, %xmm0
355 ; KNL: # %bb.0: # %entry
356 ; KNL-NEXT: kmovw %edi, %k1
357 ; KNL-NEXT: vfnmadd231ss %xmm1, %xmm0, %xmm2 {%k1}
358 ; KNL-NEXT: vmovaps %xmm2, %xmm0
361 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
362 %0 = tail call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %c, i8 %mask, i32 4) #10
366 define <4 x float> @test21(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 {
368 ; SKX: # %bb.0: # %entry
369 ; SKX-NEXT: kmovd %edi, %k1
370 ; SKX-NEXT: vfnmadd213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
374 ; KNL: # %bb.0: # %entry
375 ; KNL-NEXT: kmovw %edi, %k1
376 ; KNL-NEXT: vfnmadd213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
379 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
380 %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %c, i8 %mask, i32 8) #10
384 define <4 x float> @test22(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 {
386 ; SKX: # %bb.0: # %entry
387 ; SKX-NEXT: kmovd %edi, %k1
388 ; SKX-NEXT: vfnmsub213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
392 ; KNL: # %bb.0: # %entry
393 ; KNL-NEXT: kmovw %edi, %k1
394 ; KNL-NEXT: vfnmsub213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
397 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
398 %sub.i.2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
399 %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %sub.i.2, i8 %mask, i32 8) #10
403 define <4 x float> @test23(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 {
405 ; SKX: # %bb.0: # %entry
406 ; SKX-NEXT: kmovd %edi, %k1
407 ; SKX-NEXT: vfnmadd231ss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
408 ; SKX-NEXT: vmovaps %xmm2, %xmm0
412 ; KNL: # %bb.0: # %entry
413 ; KNL-NEXT: kmovw %edi, %k1
414 ; KNL-NEXT: vfnmadd231ss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
415 ; KNL-NEXT: vmovaps %xmm2, %xmm0
418 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
419 %0 = tail call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %c, i8 %mask, i32 8) #10
423 define <4 x float> @test24(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 {
425 ; SKX: # %bb.0: # %entry
426 ; SKX-NEXT: kmovd %edi, %k1
427 ; SKX-NEXT: vfmsub213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
431 ; KNL: # %bb.0: # %entry
432 ; KNL-NEXT: kmovw %edi, %k1
433 ; KNL-NEXT: vfmsub213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
436 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
437 %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %sub.i, i8 %mask, i32 8) #10
441 define <16 x float> @test25(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
442 ; CHECK-LABEL: test25:
443 ; CHECK: # %bb.0: # %entry
444 ; CHECK-NEXT: vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
447 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
448 %sub.i.2 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
449 %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a, <16 x float> %sub.i, <16 x float> %sub.i.2, i16 -1, i32 8) #2