1 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2 ; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -cost-model -analyze | FileCheck %s --check-prefix=AVX2
3 ; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=skylake -cost-model -analyze | FileCheck %s --check-prefix=SKL
4 ; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=knl -cost-model -analyze | FileCheck %s --check-prefix=KNL
5 ; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=skx -cost-model -analyze | FileCheck %s --check-prefix=SKX
8 define <2 x double> @test1(<2 x i64> %trigger, <2 x double>* %addr, <2 x double> %dst) {
10 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer
11 ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)
12 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
15 ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer
16 ; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)
17 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
20 ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer
21 ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)
22 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
25 ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer
26 ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)
27 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
29 %mask = icmp eq <2 x i64> %trigger, zeroinitializer
30 %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1>%mask, <2 x double>%dst)
34 define <4 x i32> @test2(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %dst) {
36 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
37 ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)
38 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
41 ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
42 ; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)
43 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
46 ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
47 ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)
48 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
51 ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
52 ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)
53 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
55 %mask = icmp eq <4 x i32> %trigger, zeroinitializer
56 %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1>%mask, <4 x i32>%dst)
60 define void @test3(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) {
62 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
63 ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask)
64 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
67 ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
68 ; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask)
69 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
72 ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
73 ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask)
74 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
77 ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
78 ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask)
79 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
81 %mask = icmp eq <4 x i32> %trigger, zeroinitializer
82 call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>%val, <4 x i32>* %addr, i32 4, <4 x i1>%mask)
86 define <8 x float> @test4(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> %dst) {
88 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer
89 ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
90 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res
93 ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer
94 ; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
95 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res
98 ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer
99 ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
100 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res
103 ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer
104 ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
105 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res
107 %mask = icmp eq <8 x i32> %trigger, zeroinitializer
108 %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1>%mask, <8 x float>%dst)
112 define void @test5(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) {
113 ; AVX2-LABEL: 'test5'
114 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
115 ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask)
116 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
119 ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
120 ; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask)
121 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
124 ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
125 ; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask)
126 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
129 ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
130 ; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask)
131 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
133 %mask = icmp eq <2 x i32> %trigger, zeroinitializer
134 call void @llvm.masked.store.v2f32.p0v2f32(<2 x float>%val, <2 x float>* %addr, i32 4, <2 x i1>%mask)
138 define void @test6(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) {
139 ; AVX2-LABEL: 'test6'
140 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
141 ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask)
142 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
145 ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
146 ; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask)
147 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
150 ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
151 ; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask)
152 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
155 ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
156 ; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask)
157 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
159 %mask = icmp eq <2 x i32> %trigger, zeroinitializer
160 call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32>%val, <2 x i32>* %addr, i32 4, <2 x i1>%mask)
164 define <2 x float> @test7(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %dst) {
165 ; AVX2-LABEL: 'test7'
166 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
167 ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)
168 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %res
171 ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
172 ; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)
173 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %res
176 ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
177 ; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)
178 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %res
181 ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
182 ; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)
183 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %res
185 %mask = icmp eq <2 x i32> %trigger, zeroinitializer
186 %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1>%mask, <2 x float>%dst)
190 define <2 x i32> @test8(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) {
191 ; AVX2-LABEL: 'test8'
192 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
193 ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)
194 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res
197 ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
198 ; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)
199 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res
202 ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
203 ; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)
204 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res
207 ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
208 ; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)
209 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res
211 %mask = icmp eq <2 x i32> %trigger, zeroinitializer
212 %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst)
216 define <2 x double> @test_gather_2f64(<2 x double*> %ptrs, <2 x i1> %mask, <2 x double> %src0) {
217 ; AVX2-LABEL: 'test_gather_2f64'
218 ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
219 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
221 ; SKL-LABEL: 'test_gather_2f64'
222 ; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
223 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
225 ; KNL-LABEL: 'test_gather_2f64'
226 ; KNL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
227 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
229 ; SKX-LABEL: 'test_gather_2f64'
230 ; SKX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
231 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
237 %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
238 ret <2 x double> %res
240 declare <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32, <2 x i1> %mask, <2 x double> %src0)
242 define <4 x i32> @test_gather_4i32(<4 x i32*> %ptrs, <4 x i1> %mask, <4 x i32> %src0) {
243 ; AVX2-LABEL: 'test_gather_4i32'
244 ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
245 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
247 ; SKL-LABEL: 'test_gather_4i32'
248 ; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
249 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
251 ; KNL-LABEL: 'test_gather_4i32'
252 ; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
253 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
255 ; SKX-LABEL: 'test_gather_4i32'
256 ; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
257 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
263 %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
267 define <4 x i32> @test_gather_4i32_const_mask(<4 x i32*> %ptrs, <4 x i32> %src0) {
268 ; AVX2-LABEL: 'test_gather_4i32_const_mask'
269 ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
270 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
272 ; SKL-LABEL: 'test_gather_4i32_const_mask'
273 ; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
274 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
276 ; KNL-LABEL: 'test_gather_4i32_const_mask'
277 ; KNL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
278 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
280 ; SKX-LABEL: 'test_gather_4i32_const_mask'
281 ; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
282 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
288 %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
291 declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32, <4 x i1> %mask, <4 x i32> %src0)
293 define <16 x float> @test_gather_16f32_const_mask(float* %base, <16 x i32> %ind) {
294 ; AVX2-LABEL: 'test_gather_16f32_const_mask'
295 ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
296 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
297 ; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
298 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
300 ; SKL-LABEL: 'test_gather_16f32_const_mask'
301 ; SKL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
302 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
303 ; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
304 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
306 ; KNL-LABEL: 'test_gather_16f32_const_mask'
307 ; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
308 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
309 ; KNL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
310 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
312 ; SKX-LABEL: 'test_gather_16f32_const_mask'
313 ; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
314 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
315 ; SKX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
316 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
318 %sext_ind = sext <16 x i32> %ind to <16 x i64>
319 %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
321 %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
325 define <16 x float> @test_gather_16f32_var_mask(float* %base, <16 x i32> %ind, <16 x i1>%mask) {
326 ; AVX2-LABEL: 'test_gather_16f32_var_mask'
327 ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
328 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
329 ; AVX2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
330 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
332 ; SKL-LABEL: 'test_gather_16f32_var_mask'
333 ; SKL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
334 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
335 ; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
336 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
338 ; KNL-LABEL: 'test_gather_16f32_var_mask'
339 ; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
340 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
341 ; KNL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
342 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
344 ; SKX-LABEL: 'test_gather_16f32_var_mask'
345 ; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
346 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
347 ; SKX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
348 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
350 %sext_ind = sext <16 x i32> %ind to <16 x i64>
351 %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
353 %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
357 define <16 x float> @test_gather_16f32_ra_var_mask(<16 x float*> %ptrs, <16 x i32> %ind, <16 x i1>%mask) {
358 ; AVX2-LABEL: 'test_gather_16f32_ra_var_mask'
359 ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
360 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
361 ; AVX2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
362 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
364 ; SKL-LABEL: 'test_gather_16f32_ra_var_mask'
365 ; SKL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
366 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
367 ; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
368 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
370 ; KNL-LABEL: 'test_gather_16f32_ra_var_mask'
371 ; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
372 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
373 ; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
374 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
376 ; SKX-LABEL: 'test_gather_16f32_ra_var_mask'
377 ; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
378 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
379 ; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
380 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
382 %sext_ind = sext <16 x i32> %ind to <16 x i64>
383 %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
385 %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
389 define <16 x float> @test_gather_16f32_const_mask2(float* %base, <16 x i32> %ind) {
390 ; AVX2-LABEL: 'test_gather_16f32_const_mask2'
391 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
392 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
393 ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
394 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
395 ; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
396 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
398 ; SKL-LABEL: 'test_gather_16f32_const_mask2'
399 ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
400 ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
401 ; SKL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
402 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
403 ; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
404 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
406 ; KNL-LABEL: 'test_gather_16f32_const_mask2'
407 ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
408 ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
409 ; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
410 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
411 ; KNL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
412 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
414 ; SKX-LABEL: 'test_gather_16f32_const_mask2'
415 ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
416 ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
417 ; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
418 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
419 ; SKX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
420 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
422 %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
423 %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
425 %sext_ind = sext <16 x i32> %ind to <16 x i64>
426 %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
428 %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
432 define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) {
433 ; AVX2-LABEL: 'test_scatter_16i32'
434 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
435 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
436 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
437 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1>
438 ; AVX2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
439 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
441 ; SKL-LABEL: 'test_scatter_16i32'
442 ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
443 ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
444 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
445 ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1>
446 ; SKL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
447 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
449 ; KNL-LABEL: 'test_scatter_16i32'
450 ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
451 ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
452 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
453 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %imask = bitcast i16 %mask to <16 x i1>
454 ; KNL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
455 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
457 ; SKX-LABEL: 'test_scatter_16i32'
458 ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
459 ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
460 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
461 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %imask = bitcast i16 %mask to <16 x i1>
462 ; SKX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
463 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
465 %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
466 %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
468 %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
469 %imask = bitcast i16 %mask to <16 x i1>
470 call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
474 define void @test_scatter_8i32(<8 x i32>%a1, <8 x i32*> %ptr, <8 x i1>%mask) {
475 ; AVX2-LABEL: 'test_scatter_8i32'
476 ; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
477 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
479 ; SKL-LABEL: 'test_scatter_8i32'
480 ; SKL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
481 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
483 ; KNL-LABEL: 'test_scatter_8i32'
484 ; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
485 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
487 ; SKX-LABEL: 'test_scatter_8i32'
488 ; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
489 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
492 call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
496 declare void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32, <8 x i1> %mask)
498 define void @test_scatter_4i32(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) {
499 ; AVX2-LABEL: 'test_scatter_4i32'
500 ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
501 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
503 ; SKL-LABEL: 'test_scatter_4i32'
504 ; SKL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
505 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
507 ; KNL-LABEL: 'test_scatter_4i32'
508 ; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
509 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
511 ; SKX-LABEL: 'test_scatter_4i32'
512 ; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
513 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
515 call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
519 define <4 x float> @test_gather_4f32(float* %ptr, <4 x i32> %ind, <4 x i1>%mask) {
520 ; AVX2-LABEL: 'test_gather_4f32'
521 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
522 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
523 ; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
524 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
526 ; SKL-LABEL: 'test_gather_4f32'
527 ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
528 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
529 ; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
530 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
532 ; KNL-LABEL: 'test_gather_4f32'
533 ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
534 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
535 ; KNL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
536 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
538 ; SKX-LABEL: 'test_gather_4f32'
539 ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
540 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
541 ; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
542 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
544 %sext_ind = sext <4 x i32> %ind to <4 x i64>
545 %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
547 %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
551 define <4 x float> @test_gather_4f32_const_mask(float* %ptr, <4 x i32> %ind) {
552 ; AVX2-LABEL: 'test_gather_4f32_const_mask'
553 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
554 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
555 ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
556 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
558 ; SKL-LABEL: 'test_gather_4f32_const_mask'
559 ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
560 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
561 ; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
562 ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
564 ; KNL-LABEL: 'test_gather_4f32_const_mask'
565 ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
566 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
567 ; KNL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
568 ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
570 ; SKX-LABEL: 'test_gather_4f32_const_mask'
571 ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
572 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
573 ; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
574 ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
576 %sext_ind = sext <4 x i32> %ind to <4 x i64>
577 %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
579 %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
583 declare <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32, <4 x i1> %mask, <4 x float> )
584 declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32, <4 x i1> %mask)
585 declare void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>%val, <16 x i32*> %gep.random, i32, <16 x i1> %imask)
586 declare <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32, <16 x i1> %mask, <16 x float>)
588 declare <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>*, i32, <16 x i1>, <16 x i32>)
589 declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>)
590 declare <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>*, i32, <2 x i1>, <2 x i32>)
591 declare void @llvm.masked.store.v16i32.p0v16i32(<16 x i32>, <16 x i32>*, i32, <16 x i1>)
592 declare void @llvm.masked.store.v8i32.p0v8i32(<8 x i32>, <8 x i32>*, i32, <8 x i1>)
593 declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>)
594 declare void @llvm.masked.store.v2f32.p0v2f32(<2 x float>, <2 x float>*, i32, <2 x i1>)
595 declare void @llvm.masked.store.v2i32.p0v2i32(<2 x i32>, <2 x i32>*, i32, <2 x i1>)
596 declare void @llvm.masked.store.v16f32.p0v16f32(<16 x float>, <16 x float>*, i32, <16 x i1>)
597 declare <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>*, i32, <16 x i1>, <16 x float>)
598 declare <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>*, i32, <8 x i1>, <8 x float>)
599 declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>)
600 declare <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>*, i32, <2 x i1>, <2 x float>)
601 declare <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>*, i32, <8 x i1>, <8 x double>)
602 declare <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>*, i32, <4 x i1>, <4 x double>)
603 declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>*, i32, <2 x i1>, <2 x double>)
604 declare void @llvm.masked.store.v8f64.p0v8f64(<8 x double>, <8 x double>*, i32, <8 x i1>)
605 declare void @llvm.masked.store.v2f64.p0v2f64(<2 x double>, <2 x double>*, i32, <2 x i1>)
606 declare void @llvm.masked.store.v2i64.p0v2i64(<2 x i64>, <2 x i64>*, i32, <2 x i1>)