1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefixes=CHECK,GENERIC
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,ATOM
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SLM
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE
10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL
11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE
12 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL
13 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE
14 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
15 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
16 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
17 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE
18 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2
19 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
20 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
21 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
22 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1
24 define <2 x double> @test_addsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
25 ; GENERIC-LABEL: test_addsubpd:
27 ; GENERIC-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00]
28 ; GENERIC-NEXT: addsubpd (%rdi), %xmm0 # sched: [9:1.00]
29 ; GENERIC-NEXT: retq # sched: [1:1.00]
31 ; ATOM-LABEL: test_addsubpd:
33 ; ATOM-NEXT: addsubpd %xmm1, %xmm0 # sched: [6:3.00]
34 ; ATOM-NEXT: addsubpd (%rdi), %xmm0 # sched: [7:3.50]
35 ; ATOM-NEXT: retq # sched: [79:39.50]
37 ; SLM-LABEL: test_addsubpd:
39 ; SLM-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00]
40 ; SLM-NEXT: addsubpd (%rdi), %xmm0 # sched: [6:1.00]
41 ; SLM-NEXT: retq # sched: [4:1.00]
43 ; SANDY-SSE-LABEL: test_addsubpd:
45 ; SANDY-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00]
46 ; SANDY-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [9:1.00]
47 ; SANDY-SSE-NEXT: retq # sched: [1:1.00]
49 ; SANDY-LABEL: test_addsubpd:
51 ; SANDY-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
52 ; SANDY-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
53 ; SANDY-NEXT: retq # sched: [1:1.00]
55 ; HASWELL-SSE-LABEL: test_addsubpd:
56 ; HASWELL-SSE: # %bb.0:
57 ; HASWELL-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00]
58 ; HASWELL-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [9:1.00]
59 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
61 ; HASWELL-LABEL: test_addsubpd:
63 ; HASWELL-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
64 ; HASWELL-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
65 ; HASWELL-NEXT: retq # sched: [7:1.00]
67 ; BROADWELL-SSE-LABEL: test_addsubpd:
68 ; BROADWELL-SSE: # %bb.0:
69 ; BROADWELL-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00]
70 ; BROADWELL-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [8:1.00]
71 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
73 ; BROADWELL-LABEL: test_addsubpd:
75 ; BROADWELL-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
76 ; BROADWELL-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
77 ; BROADWELL-NEXT: retq # sched: [7:1.00]
79 ; SKYLAKE-SSE-LABEL: test_addsubpd:
80 ; SKYLAKE-SSE: # %bb.0:
81 ; SKYLAKE-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [4:0.50]
82 ; SKYLAKE-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [10:0.50]
83 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
85 ; SKYLAKE-LABEL: test_addsubpd:
87 ; SKYLAKE-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
88 ; SKYLAKE-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
89 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
91 ; SKX-SSE-LABEL: test_addsubpd:
93 ; SKX-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [4:0.50]
94 ; SKX-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [10:0.50]
95 ; SKX-SSE-NEXT: retq # sched: [7:1.00]
97 ; SKX-LABEL: test_addsubpd:
99 ; SKX-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
100 ; SKX-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
101 ; SKX-NEXT: retq # sched: [7:1.00]
103 ; BDVER2-SSE-LABEL: test_addsubpd:
104 ; BDVER2-SSE: # %bb.0:
105 ; BDVER2-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [5:1.00]
106 ; BDVER2-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [10:1.00]
107 ; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
109 ; BDVER2-LABEL: test_addsubpd:
111 ; BDVER2-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
112 ; BDVER2-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
113 ; BDVER2-NEXT: retq # sched: [5:1.00]
115 ; BTVER2-SSE-LABEL: test_addsubpd:
116 ; BTVER2-SSE: # %bb.0:
117 ; BTVER2-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00]
118 ; BTVER2-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [8:1.00]
119 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
121 ; BTVER2-LABEL: test_addsubpd:
123 ; BTVER2-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
124 ; BTVER2-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
125 ; BTVER2-NEXT: retq # sched: [4:1.00]
127 ; ZNVER1-SSE-LABEL: test_addsubpd:
128 ; ZNVER1-SSE: # %bb.0:
129 ; ZNVER1-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00]
130 ; ZNVER1-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [10:1.00]
131 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
133 ; ZNVER1-LABEL: test_addsubpd:
135 ; ZNVER1-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
136 ; ZNVER1-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
137 ; ZNVER1-NEXT: retq # sched: [1:0.50]
138 %1 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1)
139 %2 = load <2 x double>, <2 x double> *%a2, align 16
140 %3 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %1, <2 x double> %2)
143 declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone
145 define <4 x float> @test_addsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
146 ; GENERIC-LABEL: test_addsubps:
148 ; GENERIC-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00]
149 ; GENERIC-NEXT: addsubps (%rdi), %xmm0 # sched: [9:1.00]
150 ; GENERIC-NEXT: retq # sched: [1:1.00]
152 ; ATOM-LABEL: test_addsubps:
154 ; ATOM-NEXT: addsubps %xmm1, %xmm0 # sched: [5:5.00]
155 ; ATOM-NEXT: addsubps (%rdi), %xmm0 # sched: [5:5.00]
156 ; ATOM-NEXT: retq # sched: [79:39.50]
158 ; SLM-LABEL: test_addsubps:
160 ; SLM-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00]
161 ; SLM-NEXT: addsubps (%rdi), %xmm0 # sched: [6:1.00]
162 ; SLM-NEXT: retq # sched: [4:1.00]
164 ; SANDY-SSE-LABEL: test_addsubps:
165 ; SANDY-SSE: # %bb.0:
166 ; SANDY-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00]
167 ; SANDY-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [9:1.00]
168 ; SANDY-SSE-NEXT: retq # sched: [1:1.00]
170 ; SANDY-LABEL: test_addsubps:
172 ; SANDY-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
173 ; SANDY-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
174 ; SANDY-NEXT: retq # sched: [1:1.00]
176 ; HASWELL-SSE-LABEL: test_addsubps:
177 ; HASWELL-SSE: # %bb.0:
178 ; HASWELL-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00]
179 ; HASWELL-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [9:1.00]
180 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
182 ; HASWELL-LABEL: test_addsubps:
184 ; HASWELL-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
185 ; HASWELL-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
186 ; HASWELL-NEXT: retq # sched: [7:1.00]
188 ; BROADWELL-SSE-LABEL: test_addsubps:
189 ; BROADWELL-SSE: # %bb.0:
190 ; BROADWELL-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00]
191 ; BROADWELL-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [8:1.00]
192 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
194 ; BROADWELL-LABEL: test_addsubps:
195 ; BROADWELL: # %bb.0:
196 ; BROADWELL-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
197 ; BROADWELL-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
198 ; BROADWELL-NEXT: retq # sched: [7:1.00]
200 ; SKYLAKE-SSE-LABEL: test_addsubps:
201 ; SKYLAKE-SSE: # %bb.0:
202 ; SKYLAKE-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [4:0.50]
203 ; SKYLAKE-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [10:0.50]
204 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
206 ; SKYLAKE-LABEL: test_addsubps:
208 ; SKYLAKE-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
209 ; SKYLAKE-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
210 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
212 ; SKX-SSE-LABEL: test_addsubps:
214 ; SKX-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [4:0.50]
215 ; SKX-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [10:0.50]
216 ; SKX-SSE-NEXT: retq # sched: [7:1.00]
218 ; SKX-LABEL: test_addsubps:
220 ; SKX-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
221 ; SKX-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
222 ; SKX-NEXT: retq # sched: [7:1.00]
224 ; BDVER2-SSE-LABEL: test_addsubps:
225 ; BDVER2-SSE: # %bb.0:
226 ; BDVER2-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [5:1.00]
227 ; BDVER2-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [10:1.00]
228 ; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
230 ; BDVER2-LABEL: test_addsubps:
232 ; BDVER2-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
233 ; BDVER2-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
234 ; BDVER2-NEXT: retq # sched: [5:1.00]
236 ; BTVER2-SSE-LABEL: test_addsubps:
237 ; BTVER2-SSE: # %bb.0:
238 ; BTVER2-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00]
239 ; BTVER2-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [8:1.00]
240 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
242 ; BTVER2-LABEL: test_addsubps:
244 ; BTVER2-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
245 ; BTVER2-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
246 ; BTVER2-NEXT: retq # sched: [4:1.00]
248 ; ZNVER1-SSE-LABEL: test_addsubps:
249 ; ZNVER1-SSE: # %bb.0:
250 ; ZNVER1-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00]
251 ; ZNVER1-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [10:1.00]
252 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
254 ; ZNVER1-LABEL: test_addsubps:
256 ; ZNVER1-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
257 ; ZNVER1-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
258 ; ZNVER1-NEXT: retq # sched: [1:0.50]
259 %1 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1)
260 %2 = load <4 x float>, <4 x float> *%a2, align 16
261 %3 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %1, <4 x float> %2)
264 declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone
266 define <2 x double> @test_haddpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
267 ; GENERIC-LABEL: test_haddpd:
269 ; GENERIC-NEXT: haddpd %xmm1, %xmm0 # sched: [5:2.00]
270 ; GENERIC-NEXT: haddpd (%rdi), %xmm0 # sched: [11:2.00]
271 ; GENERIC-NEXT: retq # sched: [1:1.00]
273 ; ATOM-LABEL: test_haddpd:
275 ; ATOM-NEXT: haddpd %xmm1, %xmm0 # sched: [8:4.00]
276 ; ATOM-NEXT: haddpd (%rdi), %xmm0 # sched: [9:4.50]
277 ; ATOM-NEXT: retq # sched: [79:39.50]
279 ; SLM-LABEL: test_haddpd:
281 ; SLM-NEXT: haddpd %xmm1, %xmm0 # sched: [3:1.00]
282 ; SLM-NEXT: haddpd (%rdi), %xmm0 # sched: [6:1.00]
283 ; SLM-NEXT: retq # sched: [4:1.00]
285 ; SANDY-SSE-LABEL: test_haddpd:
286 ; SANDY-SSE: # %bb.0:
287 ; SANDY-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [5:2.00]
288 ; SANDY-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [11:2.00]
289 ; SANDY-SSE-NEXT: retq # sched: [1:1.00]
291 ; SANDY-LABEL: test_haddpd:
293 ; SANDY-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
294 ; SANDY-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
295 ; SANDY-NEXT: retq # sched: [1:1.00]
297 ; HASWELL-SSE-LABEL: test_haddpd:
298 ; HASWELL-SSE: # %bb.0:
299 ; HASWELL-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [5:2.00]
300 ; HASWELL-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [11:2.00]
301 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
303 ; HASWELL-LABEL: test_haddpd:
305 ; HASWELL-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
306 ; HASWELL-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
307 ; HASWELL-NEXT: retq # sched: [7:1.00]
309 ; BROADWELL-SSE-LABEL: test_haddpd:
310 ; BROADWELL-SSE: # %bb.0:
311 ; BROADWELL-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [5:2.00]
312 ; BROADWELL-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [10:2.00]
313 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
315 ; BROADWELL-LABEL: test_haddpd:
316 ; BROADWELL: # %bb.0:
317 ; BROADWELL-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
318 ; BROADWELL-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
319 ; BROADWELL-NEXT: retq # sched: [7:1.00]
321 ; SKYLAKE-SSE-LABEL: test_haddpd:
322 ; SKYLAKE-SSE: # %bb.0:
323 ; SKYLAKE-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [6:2.00]
324 ; SKYLAKE-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [12:2.00]
325 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
327 ; SKYLAKE-LABEL: test_haddpd:
329 ; SKYLAKE-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
330 ; SKYLAKE-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
331 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
333 ; SKX-SSE-LABEL: test_haddpd:
335 ; SKX-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [6:2.00]
336 ; SKX-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [12:2.00]
337 ; SKX-SSE-NEXT: retq # sched: [7:1.00]
339 ; SKX-LABEL: test_haddpd:
341 ; SKX-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
342 ; SKX-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
343 ; SKX-NEXT: retq # sched: [7:1.00]
345 ; BDVER2-SSE-LABEL: test_haddpd:
346 ; BDVER2-SSE: # %bb.0:
347 ; BDVER2-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [11:1.00]
348 ; BDVER2-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [16:1.00]
349 ; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
351 ; BDVER2-LABEL: test_haddpd:
353 ; BDVER2-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
354 ; BDVER2-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
355 ; BDVER2-NEXT: retq # sched: [5:1.00]
357 ; BTVER2-SSE-LABEL: test_haddpd:
358 ; BTVER2-SSE: # %bb.0:
359 ; BTVER2-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [3:1.00]
360 ; BTVER2-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [8:1.00]
361 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
363 ; BTVER2-LABEL: test_haddpd:
365 ; BTVER2-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
366 ; BTVER2-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
367 ; BTVER2-NEXT: retq # sched: [4:1.00]
369 ; ZNVER1-SSE-LABEL: test_haddpd:
370 ; ZNVER1-SSE: # %bb.0:
371 ; ZNVER1-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [100:0.25]
372 ; ZNVER1-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [100:0.25]
373 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
375 ; ZNVER1-LABEL: test_haddpd:
377 ; ZNVER1-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
378 ; ZNVER1-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
379 ; ZNVER1-NEXT: retq # sched: [1:0.50]
380 %1 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1)
381 %2 = load <2 x double>, <2 x double> *%a2, align 16
382 %3 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %1, <2 x double> %2)
385 declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone
387 define <4 x float> @test_haddps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
388 ; GENERIC-LABEL: test_haddps:
390 ; GENERIC-NEXT: haddps %xmm1, %xmm0 # sched: [5:2.00]
391 ; GENERIC-NEXT: haddps (%rdi), %xmm0 # sched: [11:2.00]
392 ; GENERIC-NEXT: retq # sched: [1:1.00]
394 ; ATOM-LABEL: test_haddps:
396 ; ATOM-NEXT: haddps %xmm1, %xmm0 # sched: [8:4.00]
397 ; ATOM-NEXT: haddps (%rdi), %xmm0 # sched: [9:4.50]
398 ; ATOM-NEXT: retq # sched: [79:39.50]
400 ; SLM-LABEL: test_haddps:
402 ; SLM-NEXT: haddps %xmm1, %xmm0 # sched: [3:1.00]
403 ; SLM-NEXT: haddps (%rdi), %xmm0 # sched: [6:1.00]
404 ; SLM-NEXT: retq # sched: [4:1.00]
406 ; SANDY-SSE-LABEL: test_haddps:
407 ; SANDY-SSE: # %bb.0:
408 ; SANDY-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [5:2.00]
409 ; SANDY-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [11:2.00]
410 ; SANDY-SSE-NEXT: retq # sched: [1:1.00]
412 ; SANDY-LABEL: test_haddps:
414 ; SANDY-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
415 ; SANDY-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
416 ; SANDY-NEXT: retq # sched: [1:1.00]
418 ; HASWELL-SSE-LABEL: test_haddps:
419 ; HASWELL-SSE: # %bb.0:
420 ; HASWELL-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [5:2.00]
421 ; HASWELL-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [11:2.00]
422 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
424 ; HASWELL-LABEL: test_haddps:
426 ; HASWELL-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
427 ; HASWELL-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
428 ; HASWELL-NEXT: retq # sched: [7:1.00]
430 ; BROADWELL-SSE-LABEL: test_haddps:
431 ; BROADWELL-SSE: # %bb.0:
432 ; BROADWELL-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [5:2.00]
433 ; BROADWELL-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [10:2.00]
434 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
436 ; BROADWELL-LABEL: test_haddps:
437 ; BROADWELL: # %bb.0:
438 ; BROADWELL-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
439 ; BROADWELL-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
440 ; BROADWELL-NEXT: retq # sched: [7:1.00]
442 ; SKYLAKE-SSE-LABEL: test_haddps:
443 ; SKYLAKE-SSE: # %bb.0:
444 ; SKYLAKE-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [6:2.00]
445 ; SKYLAKE-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [12:2.00]
446 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
448 ; SKYLAKE-LABEL: test_haddps:
450 ; SKYLAKE-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
451 ; SKYLAKE-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
452 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
454 ; SKX-SSE-LABEL: test_haddps:
456 ; SKX-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [6:2.00]
457 ; SKX-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [12:2.00]
458 ; SKX-SSE-NEXT: retq # sched: [7:1.00]
460 ; SKX-LABEL: test_haddps:
462 ; SKX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
463 ; SKX-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
464 ; SKX-NEXT: retq # sched: [7:1.00]
466 ; BDVER2-SSE-LABEL: test_haddps:
467 ; BDVER2-SSE: # %bb.0:
468 ; BDVER2-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [11:1.00]
469 ; BDVER2-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [16:1.00]
470 ; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
472 ; BDVER2-LABEL: test_haddps:
474 ; BDVER2-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
475 ; BDVER2-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
476 ; BDVER2-NEXT: retq # sched: [5:1.00]
478 ; BTVER2-SSE-LABEL: test_haddps:
479 ; BTVER2-SSE: # %bb.0:
480 ; BTVER2-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [3:1.00]
481 ; BTVER2-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [8:1.00]
482 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
484 ; BTVER2-LABEL: test_haddps:
486 ; BTVER2-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
487 ; BTVER2-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
488 ; BTVER2-NEXT: retq # sched: [4:1.00]
490 ; ZNVER1-SSE-LABEL: test_haddps:
491 ; ZNVER1-SSE: # %bb.0:
492 ; ZNVER1-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [100:0.25]
493 ; ZNVER1-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [100:0.25]
494 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
496 ; ZNVER1-LABEL: test_haddps:
498 ; ZNVER1-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
499 ; ZNVER1-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
500 ; ZNVER1-NEXT: retq # sched: [1:0.50]
501 %1 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1)
502 %2 = load <4 x float>, <4 x float> *%a2, align 16
503 %3 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %1, <4 x float> %2)
506 declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
508 define <2 x double> @test_hsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
509 ; GENERIC-LABEL: test_hsubpd:
511 ; GENERIC-NEXT: hsubpd %xmm1, %xmm0 # sched: [5:2.00]
512 ; GENERIC-NEXT: hsubpd (%rdi), %xmm0 # sched: [11:2.00]
513 ; GENERIC-NEXT: retq # sched: [1:1.00]
515 ; ATOM-LABEL: test_hsubpd:
517 ; ATOM-NEXT: hsubpd %xmm1, %xmm0 # sched: [8:4.00]
518 ; ATOM-NEXT: hsubpd (%rdi), %xmm0 # sched: [9:4.50]
519 ; ATOM-NEXT: retq # sched: [79:39.50]
521 ; SLM-LABEL: test_hsubpd:
523 ; SLM-NEXT: hsubpd %xmm1, %xmm0 # sched: [3:1.00]
524 ; SLM-NEXT: hsubpd (%rdi), %xmm0 # sched: [6:1.00]
525 ; SLM-NEXT: retq # sched: [4:1.00]
527 ; SANDY-SSE-LABEL: test_hsubpd:
528 ; SANDY-SSE: # %bb.0:
529 ; SANDY-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [5:2.00]
530 ; SANDY-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [11:2.00]
531 ; SANDY-SSE-NEXT: retq # sched: [1:1.00]
533 ; SANDY-LABEL: test_hsubpd:
535 ; SANDY-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
536 ; SANDY-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
537 ; SANDY-NEXT: retq # sched: [1:1.00]
539 ; HASWELL-SSE-LABEL: test_hsubpd:
540 ; HASWELL-SSE: # %bb.0:
541 ; HASWELL-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [5:2.00]
542 ; HASWELL-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [11:2.00]
543 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
545 ; HASWELL-LABEL: test_hsubpd:
547 ; HASWELL-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
548 ; HASWELL-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
549 ; HASWELL-NEXT: retq # sched: [7:1.00]
551 ; BROADWELL-SSE-LABEL: test_hsubpd:
552 ; BROADWELL-SSE: # %bb.0:
553 ; BROADWELL-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [5:2.00]
554 ; BROADWELL-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [10:2.00]
555 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
557 ; BROADWELL-LABEL: test_hsubpd:
558 ; BROADWELL: # %bb.0:
559 ; BROADWELL-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
560 ; BROADWELL-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
561 ; BROADWELL-NEXT: retq # sched: [7:1.00]
563 ; SKYLAKE-SSE-LABEL: test_hsubpd:
564 ; SKYLAKE-SSE: # %bb.0:
565 ; SKYLAKE-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [6:2.00]
566 ; SKYLAKE-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [12:2.00]
567 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
569 ; SKYLAKE-LABEL: test_hsubpd:
571 ; SKYLAKE-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
572 ; SKYLAKE-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
573 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
575 ; SKX-SSE-LABEL: test_hsubpd:
577 ; SKX-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [6:2.00]
578 ; SKX-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [12:2.00]
579 ; SKX-SSE-NEXT: retq # sched: [7:1.00]
581 ; SKX-LABEL: test_hsubpd:
583 ; SKX-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
584 ; SKX-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
585 ; SKX-NEXT: retq # sched: [7:1.00]
587 ; BDVER2-SSE-LABEL: test_hsubpd:
588 ; BDVER2-SSE: # %bb.0:
589 ; BDVER2-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [11:1.00]
590 ; BDVER2-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [16:1.00]
591 ; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
593 ; BDVER2-LABEL: test_hsubpd:
595 ; BDVER2-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
596 ; BDVER2-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
597 ; BDVER2-NEXT: retq # sched: [5:1.00]
599 ; BTVER2-SSE-LABEL: test_hsubpd:
600 ; BTVER2-SSE: # %bb.0:
601 ; BTVER2-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [3:1.00]
602 ; BTVER2-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [8:1.00]
603 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
605 ; BTVER2-LABEL: test_hsubpd:
607 ; BTVER2-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
608 ; BTVER2-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
609 ; BTVER2-NEXT: retq # sched: [4:1.00]
611 ; ZNVER1-SSE-LABEL: test_hsubpd:
612 ; ZNVER1-SSE: # %bb.0:
613 ; ZNVER1-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [100:0.25]
614 ; ZNVER1-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [100:0.25]
615 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
617 ; ZNVER1-LABEL: test_hsubpd:
619 ; ZNVER1-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
620 ; ZNVER1-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
621 ; ZNVER1-NEXT: retq # sched: [1:0.50]
622 %1 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1)
623 %2 = load <2 x double>, <2 x double> *%a2, align 16
624 %3 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %1, <2 x double> %2)
627 declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone
629 define <4 x float> @test_hsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
630 ; GENERIC-LABEL: test_hsubps:
632 ; GENERIC-NEXT: hsubps %xmm1, %xmm0 # sched: [5:2.00]
633 ; GENERIC-NEXT: hsubps (%rdi), %xmm0 # sched: [11:2.00]
634 ; GENERIC-NEXT: retq # sched: [1:1.00]
636 ; ATOM-LABEL: test_hsubps:
638 ; ATOM-NEXT: hsubps %xmm1, %xmm0 # sched: [8:4.00]
639 ; ATOM-NEXT: hsubps (%rdi), %xmm0 # sched: [9:4.50]
640 ; ATOM-NEXT: retq # sched: [79:39.50]
642 ; SLM-LABEL: test_hsubps:
644 ; SLM-NEXT: hsubps %xmm1, %xmm0 # sched: [3:1.00]
645 ; SLM-NEXT: hsubps (%rdi), %xmm0 # sched: [6:1.00]
646 ; SLM-NEXT: retq # sched: [4:1.00]
648 ; SANDY-SSE-LABEL: test_hsubps:
649 ; SANDY-SSE: # %bb.0:
650 ; SANDY-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [5:2.00]
651 ; SANDY-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [11:2.00]
652 ; SANDY-SSE-NEXT: retq # sched: [1:1.00]
654 ; SANDY-LABEL: test_hsubps:
656 ; SANDY-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
657 ; SANDY-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
658 ; SANDY-NEXT: retq # sched: [1:1.00]
660 ; HASWELL-SSE-LABEL: test_hsubps:
661 ; HASWELL-SSE: # %bb.0:
662 ; HASWELL-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [5:2.00]
663 ; HASWELL-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [11:2.00]
664 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
666 ; HASWELL-LABEL: test_hsubps:
668 ; HASWELL-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
669 ; HASWELL-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
670 ; HASWELL-NEXT: retq # sched: [7:1.00]
672 ; BROADWELL-SSE-LABEL: test_hsubps:
673 ; BROADWELL-SSE: # %bb.0:
674 ; BROADWELL-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [5:2.00]
675 ; BROADWELL-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [10:2.00]
676 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
678 ; BROADWELL-LABEL: test_hsubps:
679 ; BROADWELL: # %bb.0:
680 ; BROADWELL-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
681 ; BROADWELL-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
682 ; BROADWELL-NEXT: retq # sched: [7:1.00]
684 ; SKYLAKE-SSE-LABEL: test_hsubps:
685 ; SKYLAKE-SSE: # %bb.0:
686 ; SKYLAKE-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [6:2.00]
687 ; SKYLAKE-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [12:2.00]
688 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
690 ; SKYLAKE-LABEL: test_hsubps:
692 ; SKYLAKE-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
693 ; SKYLAKE-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
694 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
696 ; SKX-SSE-LABEL: test_hsubps:
698 ; SKX-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [6:2.00]
699 ; SKX-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [12:2.00]
700 ; SKX-SSE-NEXT: retq # sched: [7:1.00]
702 ; SKX-LABEL: test_hsubps:
704 ; SKX-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
705 ; SKX-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
706 ; SKX-NEXT: retq # sched: [7:1.00]
708 ; BDVER2-SSE-LABEL: test_hsubps:
709 ; BDVER2-SSE: # %bb.0:
710 ; BDVER2-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [11:1.00]
711 ; BDVER2-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [16:1.00]
712 ; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
714 ; BDVER2-LABEL: test_hsubps:
716 ; BDVER2-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
717 ; BDVER2-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
718 ; BDVER2-NEXT: retq # sched: [5:1.00]
720 ; BTVER2-SSE-LABEL: test_hsubps:
721 ; BTVER2-SSE: # %bb.0:
722 ; BTVER2-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [3:1.00]
723 ; BTVER2-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [8:1.00]
724 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
726 ; BTVER2-LABEL: test_hsubps:
728 ; BTVER2-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
729 ; BTVER2-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
730 ; BTVER2-NEXT: retq # sched: [4:1.00]
732 ; ZNVER1-SSE-LABEL: test_hsubps:
733 ; ZNVER1-SSE: # %bb.0:
734 ; ZNVER1-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [100:0.25]
735 ; ZNVER1-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [100:0.25]
736 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
738 ; ZNVER1-LABEL: test_hsubps:
740 ; ZNVER1-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
741 ; ZNVER1-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
742 ; ZNVER1-NEXT: retq # sched: [1:0.50]
743 %1 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1)
744 %2 = load <4 x float>, <4 x float> *%a2, align 16
745 %3 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %1, <4 x float> %2)
748 declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
750 define <16 x i8> @test_lddqu(i8* %a0) {
751 ; GENERIC-LABEL: test_lddqu:
753 ; GENERIC-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50]
754 ; GENERIC-NEXT: retq # sched: [1:1.00]
756 ; ATOM-LABEL: test_lddqu:
758 ; ATOM-NEXT: lddqu (%rdi), %xmm0 # sched: [3:1.50]
759 ; ATOM-NEXT: nop # sched: [1:0.50]
760 ; ATOM-NEXT: nop # sched: [1:0.50]
761 ; ATOM-NEXT: retq # sched: [79:39.50]
763 ; SLM-LABEL: test_lddqu:
765 ; SLM-NEXT: lddqu (%rdi), %xmm0 # sched: [3:1.00]
766 ; SLM-NEXT: retq # sched: [4:1.00]
768 ; SANDY-SSE-LABEL: test_lddqu:
769 ; SANDY-SSE: # %bb.0:
770 ; SANDY-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50]
771 ; SANDY-SSE-NEXT: retq # sched: [1:1.00]
773 ; SANDY-LABEL: test_lddqu:
775 ; SANDY-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50]
776 ; SANDY-NEXT: retq # sched: [1:1.00]
778 ; HASWELL-SSE-LABEL: test_lddqu:
779 ; HASWELL-SSE: # %bb.0:
780 ; HASWELL-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50]
781 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
783 ; HASWELL-LABEL: test_lddqu:
785 ; HASWELL-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50]
786 ; HASWELL-NEXT: retq # sched: [7:1.00]
788 ; BROADWELL-SSE-LABEL: test_lddqu:
789 ; BROADWELL-SSE: # %bb.0:
790 ; BROADWELL-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [5:0.50]
791 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
793 ; BROADWELL-LABEL: test_lddqu:
794 ; BROADWELL: # %bb.0:
795 ; BROADWELL-NEXT: vlddqu (%rdi), %xmm0 # sched: [5:0.50]
796 ; BROADWELL-NEXT: retq # sched: [7:1.00]
798 ; SKYLAKE-SSE-LABEL: test_lddqu:
799 ; SKYLAKE-SSE: # %bb.0:
800 ; SKYLAKE-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50]
801 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
803 ; SKYLAKE-LABEL: test_lddqu:
805 ; SKYLAKE-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50]
806 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
808 ; SKX-SSE-LABEL: test_lddqu:
810 ; SKX-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50]
811 ; SKX-SSE-NEXT: retq # sched: [7:1.00]
813 ; SKX-LABEL: test_lddqu:
815 ; SKX-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50]
816 ; SKX-NEXT: retq # sched: [7:1.00]
818 ; BDVER2-SSE-LABEL: test_lddqu:
819 ; BDVER2-SSE: # %bb.0:
820 ; BDVER2-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [5:0.50]
821 ; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
823 ; BDVER2-LABEL: test_lddqu:
825 ; BDVER2-NEXT: vlddqu (%rdi), %xmm0 # sched: [5:0.50]
826 ; BDVER2-NEXT: retq # sched: [5:1.00]
828 ; BTVER2-SSE-LABEL: test_lddqu:
829 ; BTVER2-SSE: # %bb.0:
830 ; BTVER2-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [5:1.00]
831 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
833 ; BTVER2-LABEL: test_lddqu:
835 ; BTVER2-NEXT: vlddqu (%rdi), %xmm0 # sched: [5:1.00]
836 ; BTVER2-NEXT: retq # sched: [4:1.00]
838 ; ZNVER1-SSE-LABEL: test_lddqu:
839 ; ZNVER1-SSE: # %bb.0:
840 ; ZNVER1-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [8:0.50]
841 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
843 ; ZNVER1-LABEL: test_lddqu:
845 ; ZNVER1-NEXT: vlddqu (%rdi), %xmm0 # sched: [8:0.50]
846 ; ZNVER1-NEXT: retq # sched: [1:0.50]
847 %1 = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0)
850 declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly
852 define void @test_monitor(i8* %a0, i32 %a1, i32 %a2) {
853 ; GENERIC-LABEL: test_monitor:
855 ; GENERIC-NEXT: movl %esi, %ecx # sched: [1:0.33]
856 ; GENERIC-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
857 ; GENERIC-NEXT: monitor # sched: [100:0.33]
858 ; GENERIC-NEXT: retq # sched: [1:1.00]
860 ; ATOM-LABEL: test_monitor:
862 ; ATOM-NEXT: movl %esi, %ecx # sched: [1:0.50]
863 ; ATOM-NEXT: leaq (%rdi), %rax # sched: [1:1.00]
864 ; ATOM-NEXT: monitor # sched: [45:22.50]
865 ; ATOM-NEXT: retq # sched: [79:39.50]
867 ; SLM-LABEL: test_monitor:
869 ; SLM-NEXT: movl %esi, %ecx # sched: [1:0.50]
870 ; SLM-NEXT: leaq (%rdi), %rax # sched: [1:1.00]
871 ; SLM-NEXT: monitor # sched: [100:1.00]
872 ; SLM-NEXT: retq # sched: [4:1.00]
874 ; SANDY-SSE-LABEL: test_monitor:
875 ; SANDY-SSE: # %bb.0:
876 ; SANDY-SSE-NEXT: movl %esi, %ecx # sched: [1:0.33]
877 ; SANDY-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
878 ; SANDY-SSE-NEXT: monitor # sched: [100:0.33]
879 ; SANDY-SSE-NEXT: retq # sched: [1:1.00]
881 ; SANDY-LABEL: test_monitor:
883 ; SANDY-NEXT: movl %esi, %ecx # sched: [1:0.33]
884 ; SANDY-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
885 ; SANDY-NEXT: monitor # sched: [100:0.33]
886 ; SANDY-NEXT: retq # sched: [1:1.00]
888 ; HASWELL-SSE-LABEL: test_monitor:
889 ; HASWELL-SSE: # %bb.0:
890 ; HASWELL-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25]
891 ; HASWELL-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
892 ; HASWELL-SSE-NEXT: monitor # sched: [100:0.25]
893 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
895 ; HASWELL-LABEL: test_monitor:
897 ; HASWELL-NEXT: movl %esi, %ecx # sched: [1:0.25]
898 ; HASWELL-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
899 ; HASWELL-NEXT: monitor # sched: [100:0.25]
900 ; HASWELL-NEXT: retq # sched: [7:1.00]
902 ; BROADWELL-SSE-LABEL: test_monitor:
903 ; BROADWELL-SSE: # %bb.0:
904 ; BROADWELL-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25]
905 ; BROADWELL-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
906 ; BROADWELL-SSE-NEXT: monitor # sched: [100:0.25]
907 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
909 ; BROADWELL-LABEL: test_monitor:
910 ; BROADWELL: # %bb.0:
911 ; BROADWELL-NEXT: movl %esi, %ecx # sched: [1:0.25]
912 ; BROADWELL-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
913 ; BROADWELL-NEXT: monitor # sched: [100:0.25]
914 ; BROADWELL-NEXT: retq # sched: [7:1.00]
916 ; SKYLAKE-SSE-LABEL: test_monitor:
917 ; SKYLAKE-SSE: # %bb.0:
918 ; SKYLAKE-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25]
919 ; SKYLAKE-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
920 ; SKYLAKE-SSE-NEXT: monitor # sched: [100:0.25]
921 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
923 ; SKYLAKE-LABEL: test_monitor:
925 ; SKYLAKE-NEXT: movl %esi, %ecx # sched: [1:0.25]
926 ; SKYLAKE-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
927 ; SKYLAKE-NEXT: monitor # sched: [100:0.25]
928 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
930 ; SKX-SSE-LABEL: test_monitor:
932 ; SKX-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25]
933 ; SKX-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
934 ; SKX-SSE-NEXT: monitor # sched: [100:0.25]
935 ; SKX-SSE-NEXT: retq # sched: [7:1.00]
937 ; SKX-LABEL: test_monitor:
939 ; SKX-NEXT: movl %esi, %ecx # sched: [1:0.25]
940 ; SKX-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
941 ; SKX-NEXT: monitor # sched: [100:0.25]
942 ; SKX-NEXT: retq # sched: [7:1.00]
944 ; BDVER2-SSE-LABEL: test_monitor:
945 ; BDVER2-SSE: # %bb.0:
946 ; BDVER2-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
947 ; BDVER2-SSE-NEXT: movl %esi, %ecx # sched: [1:0.50]
948 ; BDVER2-SSE-NEXT: monitor # sched: [100:0.50]
949 ; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
951 ; BDVER2-LABEL: test_monitor:
953 ; BDVER2-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
954 ; BDVER2-NEXT: movl %esi, %ecx # sched: [1:0.50]
955 ; BDVER2-NEXT: monitor # sched: [100:0.50]
956 ; BDVER2-NEXT: retq # sched: [5:1.00]
958 ; BTVER2-SSE-LABEL: test_monitor:
959 ; BTVER2-SSE: # %bb.0:
960 ; BTVER2-SSE-NEXT: movl %esi, %ecx # sched: [1:0.50]
961 ; BTVER2-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
962 ; BTVER2-SSE-NEXT: monitor # sched: [100:0.50]
963 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
965 ; BTVER2-LABEL: test_monitor:
967 ; BTVER2-NEXT: movl %esi, %ecx # sched: [1:0.50]
968 ; BTVER2-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
969 ; BTVER2-NEXT: monitor # sched: [100:0.50]
970 ; BTVER2-NEXT: retq # sched: [4:1.00]
972 ; ZNVER1-SSE-LABEL: test_monitor:
973 ; ZNVER1-SSE: # %bb.0:
974 ; ZNVER1-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25]
975 ; ZNVER1-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.25]
976 ; ZNVER1-SSE-NEXT: monitor # sched: [100:0.25]
977 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
979 ; ZNVER1-LABEL: test_monitor:
981 ; ZNVER1-NEXT: movl %esi, %ecx # sched: [1:0.25]
982 ; ZNVER1-NEXT: leaq (%rdi), %rax # sched: [1:0.25]
983 ; ZNVER1-NEXT: monitor # sched: [100:0.25]
984 ; ZNVER1-NEXT: retq # sched: [1:0.50]
985 tail call void @llvm.x86.sse3.monitor(i8* %a0, i32 %a1, i32 %a2)
988 declare void @llvm.x86.sse3.monitor(i8*, i32, i32)
990 define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) {
991 ; GENERIC-LABEL: test_movddup:
993 ; GENERIC-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
994 ; GENERIC-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:0.50]
995 ; GENERIC-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
996 ; GENERIC-NEXT: retq # sched: [1:1.00]
998 ; ATOM-LABEL: test_movddup:
1000 ; ATOM-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
1001 ; ATOM-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [1:1.00]
1002 ; ATOM-NEXT: subpd %xmm1, %xmm0 # sched: [6:3.00]
1003 ; ATOM-NEXT: retq # sched: [79:39.50]
1005 ; SLM-LABEL: test_movddup:
1007 ; SLM-NEXT: movddup {{.*#+}} xmm1 = mem[0,0] sched: [4:1.00]
1008 ; SLM-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
1009 ; SLM-NEXT: subpd %xmm0, %xmm1 # sched: [3:1.00]
1010 ; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
1011 ; SLM-NEXT: retq # sched: [4:1.00]
1013 ; SANDY-SSE-LABEL: test_movddup:
1014 ; SANDY-SSE: # %bb.0:
1015 ; SANDY-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
1016 ; SANDY-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:0.50]
1017 ; SANDY-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
1018 ; SANDY-SSE-NEXT: retq # sched: [1:1.00]
1020 ; SANDY-LABEL: test_movddup:
1022 ; SANDY-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
1023 ; SANDY-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:0.50]
1024 ; SANDY-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
1025 ; SANDY-NEXT: retq # sched: [1:1.00]
1027 ; HASWELL-SSE-LABEL: test_movddup:
1028 ; HASWELL-SSE: # %bb.0:
1029 ; HASWELL-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
1030 ; HASWELL-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50]
1031 ; HASWELL-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
1032 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
1034 ; HASWELL-LABEL: test_movddup:
1036 ; HASWELL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
1037 ; HASWELL-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50]
1038 ; HASWELL-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
1039 ; HASWELL-NEXT: retq # sched: [7:1.00]
1041 ; BROADWELL-SSE-LABEL: test_movddup:
1042 ; BROADWELL-SSE: # %bb.0:
1043 ; BROADWELL-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
1044 ; BROADWELL-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50]
1045 ; BROADWELL-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
1046 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
1048 ; BROADWELL-LABEL: test_movddup:
1049 ; BROADWELL: # %bb.0:
1050 ; BROADWELL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
1051 ; BROADWELL-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50]
1052 ; BROADWELL-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
1053 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1055 ; SKYLAKE-SSE-LABEL: test_movddup:
1056 ; SKYLAKE-SSE: # %bb.0:
1057 ; SKYLAKE-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
1058 ; SKYLAKE-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50]
1059 ; SKYLAKE-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.50]
1060 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
1062 ; SKYLAKE-LABEL: test_movddup:
1064 ; SKYLAKE-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
1065 ; SKYLAKE-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50]
1066 ; SKYLAKE-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
1067 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1069 ; SKX-SSE-LABEL: test_movddup:
1071 ; SKX-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
1072 ; SKX-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50]
1073 ; SKX-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.50]
1074 ; SKX-SSE-NEXT: retq # sched: [7:1.00]
1076 ; SKX-LABEL: test_movddup:
1078 ; SKX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
1079 ; SKX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50]
1080 ; SKX-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
1081 ; SKX-NEXT: retq # sched: [7:1.00]
1083 ; BDVER2-SSE-LABEL: test_movddup:
1084 ; BDVER2-SSE: # %bb.0:
1085 ; BDVER2-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [2:0.50]
1086 ; BDVER2-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [7:0.50]
1087 ; BDVER2-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [5:1.00]
1088 ; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
1090 ; BDVER2-LABEL: test_movddup:
1092 ; BDVER2-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [7:0.50]
1093 ; BDVER2-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [2:0.50]
1094 ; BDVER2-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
1095 ; BDVER2-NEXT: retq # sched: [5:1.00]
1097 ; BTVER2-SSE-LABEL: test_movddup:
1098 ; BTVER2-SSE: # %bb.0:
1099 ; BTVER2-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:0.50]
1100 ; BTVER2-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:1.00]
1101 ; BTVER2-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
1102 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
1104 ; BTVER2-LABEL: test_movddup:
1106 ; BTVER2-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:1.00]
1107 ; BTVER2-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:0.50]
1108 ; BTVER2-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
1109 ; BTVER2-NEXT: retq # sched: [4:1.00]
1111 ; ZNVER1-SSE-LABEL: test_movddup:
1112 ; ZNVER1-SSE: # %bb.0:
1113 ; ZNVER1-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:0.50]
1114 ; ZNVER1-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [8:0.50]
1115 ; ZNVER1-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
1116 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
1118 ; ZNVER1-LABEL: test_movddup:
1120 ; ZNVER1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [8:0.50]
1121 ; ZNVER1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:0.50]
1122 ; ZNVER1-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
1123 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1124 %1 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> zeroinitializer
1125 %2 = load <2 x double>, <2 x double> *%a1, align 16
1126 %3 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> zeroinitializer
1127 %4 = fsub <2 x double> %3, %1 ; Use fsub to stop the movddup from being folded as a broadcast load in avx512vl.
1131 define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) {
1132 ; GENERIC-LABEL: test_movshdup:
1134 ; GENERIC-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
1135 ; GENERIC-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
1136 ; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
1137 ; GENERIC-NEXT: retq # sched: [1:1.00]
1139 ; ATOM-LABEL: test_movshdup:
1141 ; ATOM-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
1142 ; ATOM-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [1:1.00]
1143 ; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
1144 ; ATOM-NEXT: retq # sched: [79:39.50]
1146 ; SLM-LABEL: test_movshdup:
1148 ; SLM-NEXT: movshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [4:1.00]
1149 ; SLM-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
1150 ; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
1151 ; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
1152 ; SLM-NEXT: retq # sched: [4:1.00]
1154 ; SANDY-SSE-LABEL: test_movshdup:
1155 ; SANDY-SSE: # %bb.0:
1156 ; SANDY-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
1157 ; SANDY-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
1158 ; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
1159 ; SANDY-SSE-NEXT: retq # sched: [1:1.00]
1161 ; SANDY-LABEL: test_movshdup:
1163 ; SANDY-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
1164 ; SANDY-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50]
1165 ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1166 ; SANDY-NEXT: retq # sched: [1:1.00]
1168 ; HASWELL-SSE-LABEL: test_movshdup:
1169 ; HASWELL-SSE: # %bb.0:
1170 ; HASWELL-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
1171 ; HASWELL-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
1172 ; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
1173 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
1175 ; HASWELL-LABEL: test_movshdup:
1177 ; HASWELL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
1178 ; HASWELL-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50]
1179 ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1180 ; HASWELL-NEXT: retq # sched: [7:1.00]
1182 ; BROADWELL-SSE-LABEL: test_movshdup:
1183 ; BROADWELL-SSE: # %bb.0:
1184 ; BROADWELL-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
1185 ; BROADWELL-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [5:0.50]
1186 ; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
1187 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
1189 ; BROADWELL-LABEL: test_movshdup:
1190 ; BROADWELL: # %bb.0:
1191 ; BROADWELL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
1192 ; BROADWELL-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [5:0.50]
1193 ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1194 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1196 ; SKYLAKE-SSE-LABEL: test_movshdup:
1197 ; SKYLAKE-SSE: # %bb.0:
1198 ; SKYLAKE-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
1199 ; SKYLAKE-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
1200 ; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
1201 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
1203 ; SKYLAKE-LABEL: test_movshdup:
1205 ; SKYLAKE-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
1206 ; SKYLAKE-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50]
1207 ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
1208 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1210 ; SKX-SSE-LABEL: test_movshdup:
1212 ; SKX-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
1213 ; SKX-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
1214 ; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
1215 ; SKX-SSE-NEXT: retq # sched: [7:1.00]
1217 ; SKX-LABEL: test_movshdup:
1219 ; SKX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
1220 ; SKX-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50]
1221 ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
1222 ; SKX-NEXT: retq # sched: [7:1.00]
1224 ; BDVER2-SSE-LABEL: test_movshdup:
1225 ; BDVER2-SSE: # %bb.0:
1226 ; BDVER2-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [2:0.50]
1227 ; BDVER2-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [7:0.50]
1228 ; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00]
1229 ; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
1231 ; BDVER2-LABEL: test_movshdup:
1233 ; BDVER2-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [7:0.50]
1234 ; BDVER2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [2:0.50]
1235 ; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
1236 ; BDVER2-NEXT: retq # sched: [5:1.00]
1238 ; BTVER2-SSE-LABEL: test_movshdup:
1239 ; BTVER2-SSE: # %bb.0:
1240 ; BTVER2-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:0.50]
1241 ; BTVER2-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:1.00]
1242 ; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
1243 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
1245 ; BTVER2-LABEL: test_movshdup:
1247 ; BTVER2-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:1.00]
1248 ; BTVER2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:0.50]
1249 ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1250 ; BTVER2-NEXT: retq # sched: [4:1.00]
1252 ; ZNVER1-SSE-LABEL: test_movshdup:
1253 ; ZNVER1-SSE: # %bb.0:
1254 ; ZNVER1-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:0.50]
1255 ; ZNVER1-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [8:0.50]
1256 ; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
1257 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
1259 ; ZNVER1-LABEL: test_movshdup:
1261 ; ZNVER1-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [8:0.50]
1262 ; ZNVER1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:0.50]
1263 ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1264 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1265 %1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
1266 %2 = load <4 x float>, <4 x float> *%a1, align 16
1267 %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
1268 %4 = fadd <4 x float> %1, %3
1272 define <4 x float> @test_movsldup(<4 x float> %a0, <4 x float> *%a1) {
1273 ; GENERIC-LABEL: test_movsldup:
1275 ; GENERIC-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
1276 ; GENERIC-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
1277 ; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
1278 ; GENERIC-NEXT: retq # sched: [1:1.00]
1280 ; ATOM-LABEL: test_movsldup:
1282 ; ATOM-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
1283 ; ATOM-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [1:1.00]
1284 ; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
1285 ; ATOM-NEXT: retq # sched: [79:39.50]
1287 ; SLM-LABEL: test_movsldup:
1289 ; SLM-NEXT: movsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [4:1.00]
1290 ; SLM-NEXT: movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
1291 ; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
1292 ; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
1293 ; SLM-NEXT: retq # sched: [4:1.00]
1295 ; SANDY-SSE-LABEL: test_movsldup:
1296 ; SANDY-SSE: # %bb.0:
1297 ; SANDY-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
1298 ; SANDY-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
1299 ; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
1300 ; SANDY-SSE-NEXT: retq # sched: [1:1.00]
1302 ; SANDY-LABEL: test_movsldup:
1304 ; SANDY-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
1305 ; SANDY-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50]
1306 ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1307 ; SANDY-NEXT: retq # sched: [1:1.00]
1309 ; HASWELL-SSE-LABEL: test_movsldup:
1310 ; HASWELL-SSE: # %bb.0:
1311 ; HASWELL-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
1312 ; HASWELL-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
1313 ; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
1314 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
1316 ; HASWELL-LABEL: test_movsldup:
1318 ; HASWELL-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
1319 ; HASWELL-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50]
1320 ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1321 ; HASWELL-NEXT: retq # sched: [7:1.00]
1323 ; BROADWELL-SSE-LABEL: test_movsldup:
1324 ; BROADWELL-SSE: # %bb.0:
1325 ; BROADWELL-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
1326 ; BROADWELL-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [5:0.50]
1327 ; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
1328 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
1330 ; BROADWELL-LABEL: test_movsldup:
1331 ; BROADWELL: # %bb.0:
1332 ; BROADWELL-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
1333 ; BROADWELL-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [5:0.50]
1334 ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1335 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1337 ; SKYLAKE-SSE-LABEL: test_movsldup:
1338 ; SKYLAKE-SSE: # %bb.0:
1339 ; SKYLAKE-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
1340 ; SKYLAKE-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
1341 ; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
1342 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
1344 ; SKYLAKE-LABEL: test_movsldup:
1346 ; SKYLAKE-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
1347 ; SKYLAKE-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50]
1348 ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
1349 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1351 ; SKX-SSE-LABEL: test_movsldup:
1353 ; SKX-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
1354 ; SKX-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
1355 ; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
1356 ; SKX-SSE-NEXT: retq # sched: [7:1.00]
1358 ; SKX-LABEL: test_movsldup:
1360 ; SKX-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
1361 ; SKX-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50]
1362 ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
1363 ; SKX-NEXT: retq # sched: [7:1.00]
1365 ; BDVER2-SSE-LABEL: test_movsldup:
1366 ; BDVER2-SSE: # %bb.0:
1367 ; BDVER2-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [2:0.50]
1368 ; BDVER2-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [7:0.50]
1369 ; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00]
1370 ; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
1372 ; BDVER2-LABEL: test_movsldup:
1374 ; BDVER2-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [7:0.50]
1375 ; BDVER2-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [2:0.50]
1376 ; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
1377 ; BDVER2-NEXT: retq # sched: [5:1.00]
1379 ; BTVER2-SSE-LABEL: test_movsldup:
1380 ; BTVER2-SSE: # %bb.0:
1381 ; BTVER2-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:0.50]
1382 ; BTVER2-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:1.00]
1383 ; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
1384 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
1386 ; BTVER2-LABEL: test_movsldup:
1388 ; BTVER2-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:1.00]
1389 ; BTVER2-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:0.50]
1390 ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1391 ; BTVER2-NEXT: retq # sched: [4:1.00]
1393 ; ZNVER1-SSE-LABEL: test_movsldup:
1394 ; ZNVER1-SSE: # %bb.0:
1395 ; ZNVER1-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [100:0.25]
1396 ; ZNVER1-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [100:0.25]
1397 ; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
1398 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
1400 ; ZNVER1-LABEL: test_movsldup:
1402 ; ZNVER1-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [8:0.50]
1403 ; ZNVER1-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:0.50]
1404 ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1405 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1406 %1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
1407 %2 = load <4 x float>, <4 x float> *%a1, align 16
1408 %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
1409 %4 = fadd <4 x float> %1, %3
1413 define void @test_mwait(i32 %a0, i32 %a1) {
1414 ; GENERIC-LABEL: test_mwait:
1416 ; GENERIC-NEXT: movl %esi, %eax # sched: [1:0.33]
1417 ; GENERIC-NEXT: movl %edi, %ecx # sched: [1:0.33]
1418 ; GENERIC-NEXT: mwait # sched: [100:0.33]
1419 ; GENERIC-NEXT: retq # sched: [1:1.00]
1421 ; ATOM-LABEL: test_mwait:
1423 ; ATOM-NEXT: movl %esi, %eax # sched: [1:0.50]
1424 ; ATOM-NEXT: movl %edi, %ecx # sched: [1:0.50]
1425 ; ATOM-NEXT: mwait # sched: [46:23.00]
1426 ; ATOM-NEXT: retq # sched: [79:39.50]
1428 ; SLM-LABEL: test_mwait:
1430 ; SLM-NEXT: movl %esi, %eax # sched: [1:0.50]
1431 ; SLM-NEXT: movl %edi, %ecx # sched: [1:0.50]
1432 ; SLM-NEXT: mwait # sched: [100:1.00]
1433 ; SLM-NEXT: retq # sched: [4:1.00]
1435 ; SANDY-SSE-LABEL: test_mwait:
1436 ; SANDY-SSE: # %bb.0:
1437 ; SANDY-SSE-NEXT: movl %esi, %eax # sched: [1:0.33]
1438 ; SANDY-SSE-NEXT: movl %edi, %ecx # sched: [1:0.33]
1439 ; SANDY-SSE-NEXT: mwait # sched: [100:0.33]
1440 ; SANDY-SSE-NEXT: retq # sched: [1:1.00]
1442 ; SANDY-LABEL: test_mwait:
1444 ; SANDY-NEXT: movl %esi, %eax # sched: [1:0.33]
1445 ; SANDY-NEXT: movl %edi, %ecx # sched: [1:0.33]
1446 ; SANDY-NEXT: mwait # sched: [100:0.33]
1447 ; SANDY-NEXT: retq # sched: [1:1.00]
1449 ; HASWELL-SSE-LABEL: test_mwait:
1450 ; HASWELL-SSE: # %bb.0:
1451 ; HASWELL-SSE-NEXT: movl %esi, %eax # sched: [1:0.25]
1452 ; HASWELL-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25]
1453 ; HASWELL-SSE-NEXT: mwait # sched: [20:2.50]
1454 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
1456 ; HASWELL-LABEL: test_mwait:
1458 ; HASWELL-NEXT: movl %esi, %eax # sched: [1:0.25]
1459 ; HASWELL-NEXT: movl %edi, %ecx # sched: [1:0.25]
1460 ; HASWELL-NEXT: mwait # sched: [20:2.50]
1461 ; HASWELL-NEXT: retq # sched: [7:1.00]
1463 ; BROADWELL-SSE-LABEL: test_mwait:
1464 ; BROADWELL-SSE: # %bb.0:
1465 ; BROADWELL-SSE-NEXT: movl %esi, %eax # sched: [1:0.25]
1466 ; BROADWELL-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25]
1467 ; BROADWELL-SSE-NEXT: mwait # sched: [100:0.25]
1468 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
1470 ; BROADWELL-LABEL: test_mwait:
1471 ; BROADWELL: # %bb.0:
1472 ; BROADWELL-NEXT: movl %esi, %eax # sched: [1:0.25]
1473 ; BROADWELL-NEXT: movl %edi, %ecx # sched: [1:0.25]
1474 ; BROADWELL-NEXT: mwait # sched: [100:0.25]
1475 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1477 ; SKYLAKE-SSE-LABEL: test_mwait:
1478 ; SKYLAKE-SSE: # %bb.0:
1479 ; SKYLAKE-SSE-NEXT: movl %esi, %eax # sched: [1:0.25]
1480 ; SKYLAKE-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25]
1481 ; SKYLAKE-SSE-NEXT: mwait # sched: [20:2.50]
1482 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
1484 ; SKYLAKE-LABEL: test_mwait:
1486 ; SKYLAKE-NEXT: movl %esi, %eax # sched: [1:0.25]
1487 ; SKYLAKE-NEXT: movl %edi, %ecx # sched: [1:0.25]
1488 ; SKYLAKE-NEXT: mwait # sched: [20:2.50]
1489 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1491 ; SKX-SSE-LABEL: test_mwait:
1493 ; SKX-SSE-NEXT: movl %esi, %eax # sched: [1:0.25]
1494 ; SKX-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25]
1495 ; SKX-SSE-NEXT: mwait # sched: [20:2.50]
1496 ; SKX-SSE-NEXT: retq # sched: [7:1.00]
1498 ; SKX-LABEL: test_mwait:
1500 ; SKX-NEXT: movl %esi, %eax # sched: [1:0.25]
1501 ; SKX-NEXT: movl %edi, %ecx # sched: [1:0.25]
1502 ; SKX-NEXT: mwait # sched: [20:2.50]
1503 ; SKX-NEXT: retq # sched: [7:1.00]
1505 ; BDVER2-SSE-LABEL: test_mwait:
1506 ; BDVER2-SSE: # %bb.0:
1507 ; BDVER2-SSE-NEXT: movl %esi, %eax # sched: [1:0.50]
1508 ; BDVER2-SSE-NEXT: movl %edi, %ecx # sched: [1:0.50]
1509 ; BDVER2-SSE-NEXT: mwait # sched: [100:0.50]
1510 ; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
1512 ; BDVER2-LABEL: test_mwait:
1514 ; BDVER2-NEXT: movl %esi, %eax # sched: [1:0.50]
1515 ; BDVER2-NEXT: movl %edi, %ecx # sched: [1:0.50]
1516 ; BDVER2-NEXT: mwait # sched: [100:0.50]
1517 ; BDVER2-NEXT: retq # sched: [5:1.00]
1519 ; BTVER2-SSE-LABEL: test_mwait:
1520 ; BTVER2-SSE: # %bb.0:
1521 ; BTVER2-SSE-NEXT: movl %esi, %eax # sched: [1:0.50]
1522 ; BTVER2-SSE-NEXT: movl %edi, %ecx # sched: [1:0.50]
1523 ; BTVER2-SSE-NEXT: mwait # sched: [100:0.50]
1524 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
1526 ; BTVER2-LABEL: test_mwait:
1528 ; BTVER2-NEXT: movl %esi, %eax # sched: [1:0.50]
1529 ; BTVER2-NEXT: movl %edi, %ecx # sched: [1:0.50]
1530 ; BTVER2-NEXT: mwait # sched: [100:0.50]
1531 ; BTVER2-NEXT: retq # sched: [4:1.00]
1533 ; ZNVER1-SSE-LABEL: test_mwait:
1534 ; ZNVER1-SSE: # %bb.0:
1535 ; ZNVER1-SSE-NEXT: movl %esi, %eax # sched: [1:0.25]
1536 ; ZNVER1-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25]
1537 ; ZNVER1-SSE-NEXT: mwait # sched: [100:0.25]
1538 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
1540 ; ZNVER1-LABEL: test_mwait:
1542 ; ZNVER1-NEXT: movl %esi, %eax # sched: [1:0.25]
1543 ; ZNVER1-NEXT: movl %edi, %ecx # sched: [1:0.25]
1544 ; ZNVER1-NEXT: mwait # sched: [100:0.25]
1545 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1546 tail call void @llvm.x86.sse3.mwait(i32 %a0, i32 %a1)
1549 declare void @llvm.x86.sse3.mwait(i32, i32)