1 ;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
2 ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
4 ;CHECK-LABEL: {{^}}gather4_v2:
5 ;CHECK: image_gather4 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
6 define amdgpu_ps void @gather4_v2() {
8 %r = call <4 x float> @llvm.SI.gather4.v2i32(<2 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
9 %r0 = extractelement <4 x float> %r, i32 0
10 %r1 = extractelement <4 x float> %r, i32 1
11 %r2 = extractelement <4 x float> %r, i32 2
12 %r3 = extractelement <4 x float> %r, i32 3
13 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
17 ;CHECK-LABEL: {{^}}gather4:
18 ;CHECK: image_gather4 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
19 define amdgpu_ps void @gather4() {
21 %r = call <4 x float> @llvm.SI.gather4.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
22 %r0 = extractelement <4 x float> %r, i32 0
23 %r1 = extractelement <4 x float> %r, i32 1
24 %r2 = extractelement <4 x float> %r, i32 2
25 %r3 = extractelement <4 x float> %r, i32 3
26 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
30 ;CHECK-LABEL: {{^}}gather4_cl:
31 ;CHECK: image_gather4_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
32 define amdgpu_ps void @gather4_cl() {
34 %r = call <4 x float> @llvm.SI.gather4.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
35 %r0 = extractelement <4 x float> %r, i32 0
36 %r1 = extractelement <4 x float> %r, i32 1
37 %r2 = extractelement <4 x float> %r, i32 2
38 %r3 = extractelement <4 x float> %r, i32 3
39 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
43 ;CHECK-LABEL: {{^}}gather4_l:
44 ;CHECK: image_gather4_l {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
45 define amdgpu_ps void @gather4_l() {
47 %r = call <4 x float> @llvm.SI.gather4.l.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
48 %r0 = extractelement <4 x float> %r, i32 0
49 %r1 = extractelement <4 x float> %r, i32 1
50 %r2 = extractelement <4 x float> %r, i32 2
51 %r3 = extractelement <4 x float> %r, i32 3
52 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
56 ;CHECK-LABEL: {{^}}gather4_b:
57 ;CHECK: image_gather4_b {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
58 define amdgpu_ps void @gather4_b() {
60 %r = call <4 x float> @llvm.SI.gather4.b.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
61 %r0 = extractelement <4 x float> %r, i32 0
62 %r1 = extractelement <4 x float> %r, i32 1
63 %r2 = extractelement <4 x float> %r, i32 2
64 %r3 = extractelement <4 x float> %r, i32 3
65 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
69 ;CHECK-LABEL: {{^}}gather4_b_cl:
70 ;CHECK: image_gather4_b_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
71 define amdgpu_ps void @gather4_b_cl() {
73 %r = call <4 x float> @llvm.SI.gather4.b.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
74 %r0 = extractelement <4 x float> %r, i32 0
75 %r1 = extractelement <4 x float> %r, i32 1
76 %r2 = extractelement <4 x float> %r, i32 2
77 %r3 = extractelement <4 x float> %r, i32 3
78 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
82 ;CHECK-LABEL: {{^}}gather4_b_cl_v8:
83 ;CHECK: image_gather4_b_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
84 define amdgpu_ps void @gather4_b_cl_v8() {
86 %r = call <4 x float> @llvm.SI.gather4.b.cl.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
87 %r0 = extractelement <4 x float> %r, i32 0
88 %r1 = extractelement <4 x float> %r, i32 1
89 %r2 = extractelement <4 x float> %r, i32 2
90 %r3 = extractelement <4 x float> %r, i32 3
91 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
95 ;CHECK-LABEL: {{^}}gather4_lz_v2:
96 ;CHECK: image_gather4_lz {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
97 define amdgpu_ps void @gather4_lz_v2() {
99 %r = call <4 x float> @llvm.SI.gather4.lz.v2i32(<2 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
100 %r0 = extractelement <4 x float> %r, i32 0
101 %r1 = extractelement <4 x float> %r, i32 1
102 %r2 = extractelement <4 x float> %r, i32 2
103 %r3 = extractelement <4 x float> %r, i32 3
104 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
108 ;CHECK-LABEL: {{^}}gather4_lz:
109 ;CHECK: image_gather4_lz {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
110 define amdgpu_ps void @gather4_lz() {
112 %r = call <4 x float> @llvm.SI.gather4.lz.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
113 %r0 = extractelement <4 x float> %r, i32 0
114 %r1 = extractelement <4 x float> %r, i32 1
115 %r2 = extractelement <4 x float> %r, i32 2
116 %r3 = extractelement <4 x float> %r, i32 3
117 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
123 ;CHECK-LABEL: {{^}}gather4_o:
124 ;CHECK: image_gather4_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
125 define amdgpu_ps void @gather4_o() {
127 %r = call <4 x float> @llvm.SI.gather4.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
128 %r0 = extractelement <4 x float> %r, i32 0
129 %r1 = extractelement <4 x float> %r, i32 1
130 %r2 = extractelement <4 x float> %r, i32 2
131 %r3 = extractelement <4 x float> %r, i32 3
132 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
136 ;CHECK-LABEL: {{^}}gather4_cl_o:
137 ;CHECK: image_gather4_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
138 define amdgpu_ps void @gather4_cl_o() {
140 %r = call <4 x float> @llvm.SI.gather4.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
141 %r0 = extractelement <4 x float> %r, i32 0
142 %r1 = extractelement <4 x float> %r, i32 1
143 %r2 = extractelement <4 x float> %r, i32 2
144 %r3 = extractelement <4 x float> %r, i32 3
145 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
149 ;CHECK-LABEL: {{^}}gather4_cl_o_v8:
150 ;CHECK: image_gather4_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
151 define amdgpu_ps void @gather4_cl_o_v8() {
153 %r = call <4 x float> @llvm.SI.gather4.cl.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
154 %r0 = extractelement <4 x float> %r, i32 0
155 %r1 = extractelement <4 x float> %r, i32 1
156 %r2 = extractelement <4 x float> %r, i32 2
157 %r3 = extractelement <4 x float> %r, i32 3
158 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
162 ;CHECK-LABEL: {{^}}gather4_l_o:
163 ;CHECK: image_gather4_l_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
164 define amdgpu_ps void @gather4_l_o() {
166 %r = call <4 x float> @llvm.SI.gather4.l.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
167 %r0 = extractelement <4 x float> %r, i32 0
168 %r1 = extractelement <4 x float> %r, i32 1
169 %r2 = extractelement <4 x float> %r, i32 2
170 %r3 = extractelement <4 x float> %r, i32 3
171 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
175 ;CHECK-LABEL: {{^}}gather4_l_o_v8:
176 ;CHECK: image_gather4_l_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
177 define amdgpu_ps void @gather4_l_o_v8() {
179 %r = call <4 x float> @llvm.SI.gather4.l.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
180 %r0 = extractelement <4 x float> %r, i32 0
181 %r1 = extractelement <4 x float> %r, i32 1
182 %r2 = extractelement <4 x float> %r, i32 2
183 %r3 = extractelement <4 x float> %r, i32 3
184 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
188 ;CHECK-LABEL: {{^}}gather4_b_o:
189 ;CHECK: image_gather4_b_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
190 define amdgpu_ps void @gather4_b_o() {
192 %r = call <4 x float> @llvm.SI.gather4.b.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
193 %r0 = extractelement <4 x float> %r, i32 0
194 %r1 = extractelement <4 x float> %r, i32 1
195 %r2 = extractelement <4 x float> %r, i32 2
196 %r3 = extractelement <4 x float> %r, i32 3
197 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
201 ;CHECK-LABEL: {{^}}gather4_b_o_v8:
202 ;CHECK: image_gather4_b_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
203 define amdgpu_ps void @gather4_b_o_v8() {
205 %r = call <4 x float> @llvm.SI.gather4.b.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
206 %r0 = extractelement <4 x float> %r, i32 0
207 %r1 = extractelement <4 x float> %r, i32 1
208 %r2 = extractelement <4 x float> %r, i32 2
209 %r3 = extractelement <4 x float> %r, i32 3
210 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
214 ;CHECK-LABEL: {{^}}gather4_b_cl_o:
215 ;CHECK: image_gather4_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
216 define amdgpu_ps void @gather4_b_cl_o() {
218 %r = call <4 x float> @llvm.SI.gather4.b.cl.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
219 %r0 = extractelement <4 x float> %r, i32 0
220 %r1 = extractelement <4 x float> %r, i32 1
221 %r2 = extractelement <4 x float> %r, i32 2
222 %r3 = extractelement <4 x float> %r, i32 3
223 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
227 ;CHECK-LABEL: {{^}}gather4_lz_o:
228 ;CHECK: image_gather4_lz_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
229 define amdgpu_ps void @gather4_lz_o() {
231 %r = call <4 x float> @llvm.SI.gather4.lz.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
232 %r0 = extractelement <4 x float> %r, i32 0
233 %r1 = extractelement <4 x float> %r, i32 1
234 %r2 = extractelement <4 x float> %r, i32 2
235 %r3 = extractelement <4 x float> %r, i32 3
236 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
242 ;CHECK-LABEL: {{^}}gather4_c:
243 ;CHECK: image_gather4_c {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
244 define amdgpu_ps void @gather4_c() {
246 %r = call <4 x float> @llvm.SI.gather4.c.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
247 %r0 = extractelement <4 x float> %r, i32 0
248 %r1 = extractelement <4 x float> %r, i32 1
249 %r2 = extractelement <4 x float> %r, i32 2
250 %r3 = extractelement <4 x float> %r, i32 3
251 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
255 ;CHECK-LABEL: {{^}}gather4_c_cl:
256 ;CHECK: image_gather4_c_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
257 define amdgpu_ps void @gather4_c_cl() {
259 %r = call <4 x float> @llvm.SI.gather4.c.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
260 %r0 = extractelement <4 x float> %r, i32 0
261 %r1 = extractelement <4 x float> %r, i32 1
262 %r2 = extractelement <4 x float> %r, i32 2
263 %r3 = extractelement <4 x float> %r, i32 3
264 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
268 ;CHECK-LABEL: {{^}}gather4_c_cl_v8:
269 ;CHECK: image_gather4_c_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
270 define amdgpu_ps void @gather4_c_cl_v8() {
272 %r = call <4 x float> @llvm.SI.gather4.c.cl.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
273 %r0 = extractelement <4 x float> %r, i32 0
274 %r1 = extractelement <4 x float> %r, i32 1
275 %r2 = extractelement <4 x float> %r, i32 2
276 %r3 = extractelement <4 x float> %r, i32 3
277 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
281 ;CHECK-LABEL: {{^}}gather4_c_l:
282 ;CHECK: image_gather4_c_l {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
283 define amdgpu_ps void @gather4_c_l() {
285 %r = call <4 x float> @llvm.SI.gather4.c.l.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
286 %r0 = extractelement <4 x float> %r, i32 0
287 %r1 = extractelement <4 x float> %r, i32 1
288 %r2 = extractelement <4 x float> %r, i32 2
289 %r3 = extractelement <4 x float> %r, i32 3
290 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
294 ;CHECK-LABEL: {{^}}gather4_c_l_v8:
295 ;CHECK: image_gather4_c_l {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
296 define amdgpu_ps void @gather4_c_l_v8() {
298 %r = call <4 x float> @llvm.SI.gather4.c.l.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
299 %r0 = extractelement <4 x float> %r, i32 0
300 %r1 = extractelement <4 x float> %r, i32 1
301 %r2 = extractelement <4 x float> %r, i32 2
302 %r3 = extractelement <4 x float> %r, i32 3
303 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
307 ;CHECK-LABEL: {{^}}gather4_c_b:
308 ;CHECK: image_gather4_c_b {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
309 define amdgpu_ps void @gather4_c_b() {
311 %r = call <4 x float> @llvm.SI.gather4.c.b.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
312 %r0 = extractelement <4 x float> %r, i32 0
313 %r1 = extractelement <4 x float> %r, i32 1
314 %r2 = extractelement <4 x float> %r, i32 2
315 %r3 = extractelement <4 x float> %r, i32 3
316 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
320 ;CHECK-LABEL: {{^}}gather4_c_b_v8:
321 ;CHECK: image_gather4_c_b {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
322 define amdgpu_ps void @gather4_c_b_v8() {
324 %r = call <4 x float> @llvm.SI.gather4.c.b.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
325 %r0 = extractelement <4 x float> %r, i32 0
326 %r1 = extractelement <4 x float> %r, i32 1
327 %r2 = extractelement <4 x float> %r, i32 2
328 %r3 = extractelement <4 x float> %r, i32 3
329 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
333 ;CHECK-LABEL: {{^}}gather4_c_b_cl:
334 ;CHECK: image_gather4_c_b_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
335 define amdgpu_ps void @gather4_c_b_cl() {
337 %r = call <4 x float> @llvm.SI.gather4.c.b.cl.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
338 %r0 = extractelement <4 x float> %r, i32 0
339 %r1 = extractelement <4 x float> %r, i32 1
340 %r2 = extractelement <4 x float> %r, i32 2
341 %r3 = extractelement <4 x float> %r, i32 3
342 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
346 ;CHECK-LABEL: {{^}}gather4_c_lz:
347 ;CHECK: image_gather4_c_lz {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
348 define amdgpu_ps void @gather4_c_lz() {
350 %r = call <4 x float> @llvm.SI.gather4.c.lz.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
351 %r0 = extractelement <4 x float> %r, i32 0
352 %r1 = extractelement <4 x float> %r, i32 1
353 %r2 = extractelement <4 x float> %r, i32 2
354 %r3 = extractelement <4 x float> %r, i32 3
355 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
361 ;CHECK-LABEL: {{^}}gather4_c_o:
362 ;CHECK: image_gather4_c_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
363 define amdgpu_ps void @gather4_c_o() {
365 %r = call <4 x float> @llvm.SI.gather4.c.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
366 %r0 = extractelement <4 x float> %r, i32 0
367 %r1 = extractelement <4 x float> %r, i32 1
368 %r2 = extractelement <4 x float> %r, i32 2
369 %r3 = extractelement <4 x float> %r, i32 3
370 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
374 ;CHECK-LABEL: {{^}}gather4_c_o_v8:
375 ;CHECK: image_gather4_c_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
376 define amdgpu_ps void @gather4_c_o_v8() {
378 %r = call <4 x float> @llvm.SI.gather4.c.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
379 %r0 = extractelement <4 x float> %r, i32 0
380 %r1 = extractelement <4 x float> %r, i32 1
381 %r2 = extractelement <4 x float> %r, i32 2
382 %r3 = extractelement <4 x float> %r, i32 3
383 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
387 ;CHECK-LABEL: {{^}}gather4_c_cl_o:
388 ;CHECK: image_gather4_c_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
389 define amdgpu_ps void @gather4_c_cl_o() {
391 %r = call <4 x float> @llvm.SI.gather4.c.cl.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
392 %r0 = extractelement <4 x float> %r, i32 0
393 %r1 = extractelement <4 x float> %r, i32 1
394 %r2 = extractelement <4 x float> %r, i32 2
395 %r3 = extractelement <4 x float> %r, i32 3
396 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
400 ;CHECK-LABEL: {{^}}gather4_c_l_o:
401 ;CHECK: image_gather4_c_l_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
402 define amdgpu_ps void @gather4_c_l_o() {
404 %r = call <4 x float> @llvm.SI.gather4.c.l.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
405 %r0 = extractelement <4 x float> %r, i32 0
406 %r1 = extractelement <4 x float> %r, i32 1
407 %r2 = extractelement <4 x float> %r, i32 2
408 %r3 = extractelement <4 x float> %r, i32 3
409 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
413 ;CHECK-LABEL: {{^}}gather4_c_b_o:
414 ;CHECK: image_gather4_c_b_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
415 define amdgpu_ps void @gather4_c_b_o() {
417 %r = call <4 x float> @llvm.SI.gather4.c.b.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
418 %r0 = extractelement <4 x float> %r, i32 0
419 %r1 = extractelement <4 x float> %r, i32 1
420 %r2 = extractelement <4 x float> %r, i32 2
421 %r3 = extractelement <4 x float> %r, i32 3
422 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
426 ;CHECK-LABEL: {{^}}gather4_c_b_cl_o:
427 ;CHECK: image_gather4_c_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
428 define amdgpu_ps void @gather4_c_b_cl_o() {
430 %r = call <4 x float> @llvm.SI.gather4.c.b.cl.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
431 %r0 = extractelement <4 x float> %r, i32 0
432 %r1 = extractelement <4 x float> %r, i32 1
433 %r2 = extractelement <4 x float> %r, i32 2
434 %r3 = extractelement <4 x float> %r, i32 3
435 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
439 ;CHECK-LABEL: {{^}}gather4_c_lz_o:
440 ;CHECK: image_gather4_c_lz_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
441 define amdgpu_ps void @gather4_c_lz_o() {
443 %r = call <4 x float> @llvm.SI.gather4.c.lz.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
444 %r0 = extractelement <4 x float> %r, i32 0
445 %r1 = extractelement <4 x float> %r, i32 1
446 %r2 = extractelement <4 x float> %r, i32 2
447 %r3 = extractelement <4 x float> %r, i32 3
448 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
452 ;CHECK-LABEL: {{^}}gather4_c_lz_o_v8:
453 ;CHECK: image_gather4_c_lz_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
454 define amdgpu_ps void @gather4_c_lz_o_v8() {
456 %r = call <4 x float> @llvm.SI.gather4.c.lz.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
457 %r0 = extractelement <4 x float> %r, i32 0
458 %r1 = extractelement <4 x float> %r, i32 1
459 %r2 = extractelement <4 x float> %r, i32 2
460 %r3 = extractelement <4 x float> %r, i32 3
461 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
465 ;CHECK-LABEL: {{^}}gather4_sgpr_bug:
467 ; This crashed at some point due to a bug in FixSGPRCopies. Derived from the
468 ; report in https://bugs.freedesktop.org/show_bug.cgi?id=96877
470 ;CHECK: s_load_dwordx4 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]], {{s\[[0-9]+:[0-9]+\]}}, 0x0
471 ;CHECK: s_waitcnt lgkmcnt(0)
472 ;CHECK: s_mov_b32 s[[LO]], 0
473 ;CHECK: image_gather4_lz {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, s{{\[}}[[LO]]:[[HI]]] dmask:0x8
474 define amdgpu_ps float @gather4_sgpr_bug() {
476 %tmp = load <4 x i32>, <4 x i32> addrspace(2)* undef, align 16
477 %tmp1 = insertelement <4 x i32> %tmp, i32 0, i32 0
478 %tmp2 = call <4 x float> @llvm.SI.gather4.lz.v2i32(<2 x i32> undef, <8 x i32> undef, <4 x i32> %tmp1, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
479 %tmp4 = extractelement <4 x float> %tmp2, i32 1
480 %tmp9 = fadd float undef, %tmp4
484 declare <4 x float> @llvm.SI.gather4.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
485 declare <4 x float> @llvm.SI.gather4.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
486 declare <4 x float> @llvm.SI.gather4.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
487 declare <4 x float> @llvm.SI.gather4.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
488 declare <4 x float> @llvm.SI.gather4.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
489 declare <4 x float> @llvm.SI.gather4.b.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
490 declare <4 x float> @llvm.SI.gather4.b.cl.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
491 declare <4 x float> @llvm.SI.gather4.lz.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
492 declare <4 x float> @llvm.SI.gather4.lz.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
494 declare <4 x float> @llvm.SI.gather4.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
495 declare <4 x float> @llvm.SI.gather4.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
496 declare <4 x float> @llvm.SI.gather4.cl.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
497 declare <4 x float> @llvm.SI.gather4.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
498 declare <4 x float> @llvm.SI.gather4.l.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
499 declare <4 x float> @llvm.SI.gather4.b.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
500 declare <4 x float> @llvm.SI.gather4.b.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
501 declare <4 x float> @llvm.SI.gather4.b.cl.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
502 declare <4 x float> @llvm.SI.gather4.lz.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
504 declare <4 x float> @llvm.SI.gather4.c.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
505 declare <4 x float> @llvm.SI.gather4.c.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
506 declare <4 x float> @llvm.SI.gather4.c.cl.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
507 declare <4 x float> @llvm.SI.gather4.c.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
508 declare <4 x float> @llvm.SI.gather4.c.l.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
509 declare <4 x float> @llvm.SI.gather4.c.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
510 declare <4 x float> @llvm.SI.gather4.c.b.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
511 declare <4 x float> @llvm.SI.gather4.c.b.cl.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
512 declare <4 x float> @llvm.SI.gather4.c.lz.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
514 declare <4 x float> @llvm.SI.gather4.c.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
515 declare <4 x float> @llvm.SI.gather4.c.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
516 declare <4 x float> @llvm.SI.gather4.c.cl.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
517 declare <4 x float> @llvm.SI.gather4.c.l.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
518 declare <4 x float> @llvm.SI.gather4.c.b.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
519 declare <4 x float> @llvm.SI.gather4.c.b.cl.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
520 declare <4 x float> @llvm.SI.gather4.c.lz.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
521 declare <4 x float> @llvm.SI.gather4.c.lz.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
523 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
525 attributes #0 = { nounwind readnone }