1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefixes=CHECK,GENERIC
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,ATOM
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SLM
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE
10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL
11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE
12 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL
13 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE
14 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
15 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
16 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
17 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
18 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
19 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
20 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1
22 define <2 x double> @test_addsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
23 ; GENERIC-LABEL: test_addsubpd:
25 ; GENERIC-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00]
26 ; GENERIC-NEXT: addsubpd (%rdi), %xmm0 # sched: [9:1.00]
27 ; GENERIC-NEXT: retq # sched: [1:1.00]
29 ; ATOM-LABEL: test_addsubpd:
31 ; ATOM-NEXT: addsubpd %xmm1, %xmm0 # sched: [6:3.00]
32 ; ATOM-NEXT: addsubpd (%rdi), %xmm0 # sched: [7:3.50]
33 ; ATOM-NEXT: retq # sched: [79:39.50]
35 ; SLM-LABEL: test_addsubpd:
37 ; SLM-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00]
38 ; SLM-NEXT: addsubpd (%rdi), %xmm0 # sched: [6:1.00]
39 ; SLM-NEXT: retq # sched: [4:1.00]
41 ; SANDY-SSE-LABEL: test_addsubpd:
43 ; SANDY-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00]
44 ; SANDY-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [9:1.00]
45 ; SANDY-SSE-NEXT: retq # sched: [1:1.00]
47 ; SANDY-LABEL: test_addsubpd:
49 ; SANDY-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
50 ; SANDY-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
51 ; SANDY-NEXT: retq # sched: [1:1.00]
53 ; HASWELL-SSE-LABEL: test_addsubpd:
54 ; HASWELL-SSE: # %bb.0:
55 ; HASWELL-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00]
56 ; HASWELL-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [9:1.00]
57 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
59 ; HASWELL-LABEL: test_addsubpd:
61 ; HASWELL-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
62 ; HASWELL-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
63 ; HASWELL-NEXT: retq # sched: [7:1.00]
65 ; BROADWELL-SSE-LABEL: test_addsubpd:
66 ; BROADWELL-SSE: # %bb.0:
67 ; BROADWELL-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00]
68 ; BROADWELL-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [8:1.00]
69 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
71 ; BROADWELL-LABEL: test_addsubpd:
73 ; BROADWELL-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
74 ; BROADWELL-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
75 ; BROADWELL-NEXT: retq # sched: [7:1.00]
77 ; SKYLAKE-SSE-LABEL: test_addsubpd:
78 ; SKYLAKE-SSE: # %bb.0:
79 ; SKYLAKE-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [4:0.50]
80 ; SKYLAKE-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [10:0.50]
81 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
83 ; SKYLAKE-LABEL: test_addsubpd:
85 ; SKYLAKE-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
86 ; SKYLAKE-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
87 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
89 ; SKX-SSE-LABEL: test_addsubpd:
91 ; SKX-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [4:0.50]
92 ; SKX-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [10:0.50]
93 ; SKX-SSE-NEXT: retq # sched: [7:1.00]
95 ; SKX-LABEL: test_addsubpd:
97 ; SKX-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
98 ; SKX-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
99 ; SKX-NEXT: retq # sched: [7:1.00]
101 ; BTVER2-SSE-LABEL: test_addsubpd:
102 ; BTVER2-SSE: # %bb.0:
103 ; BTVER2-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00]
104 ; BTVER2-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [8:1.00]
105 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
107 ; BTVER2-LABEL: test_addsubpd:
109 ; BTVER2-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
110 ; BTVER2-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
111 ; BTVER2-NEXT: retq # sched: [4:1.00]
113 ; ZNVER1-SSE-LABEL: test_addsubpd:
114 ; ZNVER1-SSE: # %bb.0:
115 ; ZNVER1-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00]
116 ; ZNVER1-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [10:1.00]
117 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
119 ; ZNVER1-LABEL: test_addsubpd:
121 ; ZNVER1-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
122 ; ZNVER1-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
123 ; ZNVER1-NEXT: retq # sched: [1:0.50]
124 %1 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1)
125 %2 = load <2 x double>, <2 x double> *%a2, align 16
126 %3 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %1, <2 x double> %2)
129 declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone
131 define <4 x float> @test_addsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
132 ; GENERIC-LABEL: test_addsubps:
134 ; GENERIC-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00]
135 ; GENERIC-NEXT: addsubps (%rdi), %xmm0 # sched: [9:1.00]
136 ; GENERIC-NEXT: retq # sched: [1:1.00]
138 ; ATOM-LABEL: test_addsubps:
140 ; ATOM-NEXT: addsubps %xmm1, %xmm0 # sched: [5:5.00]
141 ; ATOM-NEXT: addsubps (%rdi), %xmm0 # sched: [5:5.00]
142 ; ATOM-NEXT: retq # sched: [79:39.50]
144 ; SLM-LABEL: test_addsubps:
146 ; SLM-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00]
147 ; SLM-NEXT: addsubps (%rdi), %xmm0 # sched: [6:1.00]
148 ; SLM-NEXT: retq # sched: [4:1.00]
150 ; SANDY-SSE-LABEL: test_addsubps:
151 ; SANDY-SSE: # %bb.0:
152 ; SANDY-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00]
153 ; SANDY-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [9:1.00]
154 ; SANDY-SSE-NEXT: retq # sched: [1:1.00]
156 ; SANDY-LABEL: test_addsubps:
158 ; SANDY-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
159 ; SANDY-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
160 ; SANDY-NEXT: retq # sched: [1:1.00]
162 ; HASWELL-SSE-LABEL: test_addsubps:
163 ; HASWELL-SSE: # %bb.0:
164 ; HASWELL-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00]
165 ; HASWELL-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [9:1.00]
166 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
168 ; HASWELL-LABEL: test_addsubps:
170 ; HASWELL-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
171 ; HASWELL-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
172 ; HASWELL-NEXT: retq # sched: [7:1.00]
174 ; BROADWELL-SSE-LABEL: test_addsubps:
175 ; BROADWELL-SSE: # %bb.0:
176 ; BROADWELL-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00]
177 ; BROADWELL-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [8:1.00]
178 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
180 ; BROADWELL-LABEL: test_addsubps:
181 ; BROADWELL: # %bb.0:
182 ; BROADWELL-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
183 ; BROADWELL-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
184 ; BROADWELL-NEXT: retq # sched: [7:1.00]
186 ; SKYLAKE-SSE-LABEL: test_addsubps:
187 ; SKYLAKE-SSE: # %bb.0:
188 ; SKYLAKE-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [4:0.50]
189 ; SKYLAKE-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [10:0.50]
190 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
192 ; SKYLAKE-LABEL: test_addsubps:
194 ; SKYLAKE-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
195 ; SKYLAKE-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
196 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
198 ; SKX-SSE-LABEL: test_addsubps:
200 ; SKX-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [4:0.50]
201 ; SKX-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [10:0.50]
202 ; SKX-SSE-NEXT: retq # sched: [7:1.00]
204 ; SKX-LABEL: test_addsubps:
206 ; SKX-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
207 ; SKX-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
208 ; SKX-NEXT: retq # sched: [7:1.00]
210 ; BTVER2-SSE-LABEL: test_addsubps:
211 ; BTVER2-SSE: # %bb.0:
212 ; BTVER2-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00]
213 ; BTVER2-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [8:1.00]
214 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
216 ; BTVER2-LABEL: test_addsubps:
218 ; BTVER2-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
219 ; BTVER2-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
220 ; BTVER2-NEXT: retq # sched: [4:1.00]
222 ; ZNVER1-SSE-LABEL: test_addsubps:
223 ; ZNVER1-SSE: # %bb.0:
224 ; ZNVER1-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00]
225 ; ZNVER1-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [10:1.00]
226 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
228 ; ZNVER1-LABEL: test_addsubps:
230 ; ZNVER1-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
231 ; ZNVER1-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
232 ; ZNVER1-NEXT: retq # sched: [1:0.50]
233 %1 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1)
234 %2 = load <4 x float>, <4 x float> *%a2, align 16
235 %3 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %1, <4 x float> %2)
238 declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone
240 define <2 x double> @test_haddpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
241 ; GENERIC-LABEL: test_haddpd:
243 ; GENERIC-NEXT: haddpd %xmm1, %xmm0 # sched: [5:2.00]
244 ; GENERIC-NEXT: haddpd (%rdi), %xmm0 # sched: [11:2.00]
245 ; GENERIC-NEXT: retq # sched: [1:1.00]
247 ; ATOM-LABEL: test_haddpd:
249 ; ATOM-NEXT: haddpd %xmm1, %xmm0 # sched: [8:4.00]
250 ; ATOM-NEXT: haddpd (%rdi), %xmm0 # sched: [9:4.50]
251 ; ATOM-NEXT: retq # sched: [79:39.50]
253 ; SLM-LABEL: test_haddpd:
255 ; SLM-NEXT: haddpd %xmm1, %xmm0 # sched: [3:1.00]
256 ; SLM-NEXT: haddpd (%rdi), %xmm0 # sched: [6:1.00]
257 ; SLM-NEXT: retq # sched: [4:1.00]
259 ; SANDY-SSE-LABEL: test_haddpd:
260 ; SANDY-SSE: # %bb.0:
261 ; SANDY-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [5:2.00]
262 ; SANDY-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [11:2.00]
263 ; SANDY-SSE-NEXT: retq # sched: [1:1.00]
265 ; SANDY-LABEL: test_haddpd:
267 ; SANDY-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
268 ; SANDY-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
269 ; SANDY-NEXT: retq # sched: [1:1.00]
271 ; HASWELL-SSE-LABEL: test_haddpd:
272 ; HASWELL-SSE: # %bb.0:
273 ; HASWELL-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [5:2.00]
274 ; HASWELL-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [11:2.00]
275 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
277 ; HASWELL-LABEL: test_haddpd:
279 ; HASWELL-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
280 ; HASWELL-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
281 ; HASWELL-NEXT: retq # sched: [7:1.00]
283 ; BROADWELL-SSE-LABEL: test_haddpd:
284 ; BROADWELL-SSE: # %bb.0:
285 ; BROADWELL-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [5:2.00]
286 ; BROADWELL-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [10:2.00]
287 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
289 ; BROADWELL-LABEL: test_haddpd:
290 ; BROADWELL: # %bb.0:
291 ; BROADWELL-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
292 ; BROADWELL-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
293 ; BROADWELL-NEXT: retq # sched: [7:1.00]
295 ; SKYLAKE-SSE-LABEL: test_haddpd:
296 ; SKYLAKE-SSE: # %bb.0:
297 ; SKYLAKE-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [6:2.00]
298 ; SKYLAKE-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [12:2.00]
299 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
301 ; SKYLAKE-LABEL: test_haddpd:
303 ; SKYLAKE-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
304 ; SKYLAKE-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
305 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
307 ; SKX-SSE-LABEL: test_haddpd:
309 ; SKX-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [6:2.00]
310 ; SKX-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [12:2.00]
311 ; SKX-SSE-NEXT: retq # sched: [7:1.00]
313 ; SKX-LABEL: test_haddpd:
315 ; SKX-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
316 ; SKX-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
317 ; SKX-NEXT: retq # sched: [7:1.00]
319 ; BTVER2-SSE-LABEL: test_haddpd:
320 ; BTVER2-SSE: # %bb.0:
321 ; BTVER2-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [3:1.00]
322 ; BTVER2-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [8:1.00]
323 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
325 ; BTVER2-LABEL: test_haddpd:
327 ; BTVER2-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
328 ; BTVER2-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
329 ; BTVER2-NEXT: retq # sched: [4:1.00]
331 ; ZNVER1-SSE-LABEL: test_haddpd:
332 ; ZNVER1-SSE: # %bb.0:
333 ; ZNVER1-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [100:0.25]
334 ; ZNVER1-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [100:0.25]
335 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
337 ; ZNVER1-LABEL: test_haddpd:
339 ; ZNVER1-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
340 ; ZNVER1-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
341 ; ZNVER1-NEXT: retq # sched: [1:0.50]
342 %1 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1)
343 %2 = load <2 x double>, <2 x double> *%a2, align 16
344 %3 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %1, <2 x double> %2)
347 declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone
349 define <4 x float> @test_haddps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
350 ; GENERIC-LABEL: test_haddps:
352 ; GENERIC-NEXT: haddps %xmm1, %xmm0 # sched: [5:2.00]
353 ; GENERIC-NEXT: haddps (%rdi), %xmm0 # sched: [11:2.00]
354 ; GENERIC-NEXT: retq # sched: [1:1.00]
356 ; ATOM-LABEL: test_haddps:
358 ; ATOM-NEXT: haddps %xmm1, %xmm0 # sched: [8:4.00]
359 ; ATOM-NEXT: haddps (%rdi), %xmm0 # sched: [9:4.50]
360 ; ATOM-NEXT: retq # sched: [79:39.50]
362 ; SLM-LABEL: test_haddps:
364 ; SLM-NEXT: haddps %xmm1, %xmm0 # sched: [3:1.00]
365 ; SLM-NEXT: haddps (%rdi), %xmm0 # sched: [6:1.00]
366 ; SLM-NEXT: retq # sched: [4:1.00]
368 ; SANDY-SSE-LABEL: test_haddps:
369 ; SANDY-SSE: # %bb.0:
370 ; SANDY-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [5:2.00]
371 ; SANDY-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [11:2.00]
372 ; SANDY-SSE-NEXT: retq # sched: [1:1.00]
374 ; SANDY-LABEL: test_haddps:
376 ; SANDY-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
377 ; SANDY-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
378 ; SANDY-NEXT: retq # sched: [1:1.00]
380 ; HASWELL-SSE-LABEL: test_haddps:
381 ; HASWELL-SSE: # %bb.0:
382 ; HASWELL-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [5:2.00]
383 ; HASWELL-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [11:2.00]
384 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
386 ; HASWELL-LABEL: test_haddps:
388 ; HASWELL-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
389 ; HASWELL-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
390 ; HASWELL-NEXT: retq # sched: [7:1.00]
392 ; BROADWELL-SSE-LABEL: test_haddps:
393 ; BROADWELL-SSE: # %bb.0:
394 ; BROADWELL-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [5:2.00]
395 ; BROADWELL-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [10:2.00]
396 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
398 ; BROADWELL-LABEL: test_haddps:
399 ; BROADWELL: # %bb.0:
400 ; BROADWELL-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
401 ; BROADWELL-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
402 ; BROADWELL-NEXT: retq # sched: [7:1.00]
404 ; SKYLAKE-SSE-LABEL: test_haddps:
405 ; SKYLAKE-SSE: # %bb.0:
406 ; SKYLAKE-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [6:2.00]
407 ; SKYLAKE-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [12:2.00]
408 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
410 ; SKYLAKE-LABEL: test_haddps:
412 ; SKYLAKE-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
413 ; SKYLAKE-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
414 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
416 ; SKX-SSE-LABEL: test_haddps:
418 ; SKX-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [6:2.00]
419 ; SKX-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [12:2.00]
420 ; SKX-SSE-NEXT: retq # sched: [7:1.00]
422 ; SKX-LABEL: test_haddps:
424 ; SKX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
425 ; SKX-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
426 ; SKX-NEXT: retq # sched: [7:1.00]
428 ; BTVER2-SSE-LABEL: test_haddps:
429 ; BTVER2-SSE: # %bb.0:
430 ; BTVER2-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [3:1.00]
431 ; BTVER2-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [8:1.00]
432 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
434 ; BTVER2-LABEL: test_haddps:
436 ; BTVER2-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
437 ; BTVER2-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
438 ; BTVER2-NEXT: retq # sched: [4:1.00]
440 ; ZNVER1-SSE-LABEL: test_haddps:
441 ; ZNVER1-SSE: # %bb.0:
442 ; ZNVER1-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [100:0.25]
443 ; ZNVER1-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [100:0.25]
444 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
446 ; ZNVER1-LABEL: test_haddps:
448 ; ZNVER1-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
449 ; ZNVER1-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
450 ; ZNVER1-NEXT: retq # sched: [1:0.50]
451 %1 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1)
452 %2 = load <4 x float>, <4 x float> *%a2, align 16
453 %3 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %1, <4 x float> %2)
456 declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
458 define <2 x double> @test_hsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
459 ; GENERIC-LABEL: test_hsubpd:
461 ; GENERIC-NEXT: hsubpd %xmm1, %xmm0 # sched: [5:2.00]
462 ; GENERIC-NEXT: hsubpd (%rdi), %xmm0 # sched: [11:2.00]
463 ; GENERIC-NEXT: retq # sched: [1:1.00]
465 ; ATOM-LABEL: test_hsubpd:
467 ; ATOM-NEXT: hsubpd %xmm1, %xmm0 # sched: [8:4.00]
468 ; ATOM-NEXT: hsubpd (%rdi), %xmm0 # sched: [9:4.50]
469 ; ATOM-NEXT: retq # sched: [79:39.50]
471 ; SLM-LABEL: test_hsubpd:
473 ; SLM-NEXT: hsubpd %xmm1, %xmm0 # sched: [3:1.00]
474 ; SLM-NEXT: hsubpd (%rdi), %xmm0 # sched: [6:1.00]
475 ; SLM-NEXT: retq # sched: [4:1.00]
477 ; SANDY-SSE-LABEL: test_hsubpd:
478 ; SANDY-SSE: # %bb.0:
479 ; SANDY-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [5:2.00]
480 ; SANDY-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [11:2.00]
481 ; SANDY-SSE-NEXT: retq # sched: [1:1.00]
483 ; SANDY-LABEL: test_hsubpd:
485 ; SANDY-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
486 ; SANDY-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
487 ; SANDY-NEXT: retq # sched: [1:1.00]
489 ; HASWELL-SSE-LABEL: test_hsubpd:
490 ; HASWELL-SSE: # %bb.0:
491 ; HASWELL-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [5:2.00]
492 ; HASWELL-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [11:2.00]
493 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
495 ; HASWELL-LABEL: test_hsubpd:
497 ; HASWELL-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
498 ; HASWELL-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
499 ; HASWELL-NEXT: retq # sched: [7:1.00]
501 ; BROADWELL-SSE-LABEL: test_hsubpd:
502 ; BROADWELL-SSE: # %bb.0:
503 ; BROADWELL-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [5:2.00]
504 ; BROADWELL-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [10:2.00]
505 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
507 ; BROADWELL-LABEL: test_hsubpd:
508 ; BROADWELL: # %bb.0:
509 ; BROADWELL-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
510 ; BROADWELL-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
511 ; BROADWELL-NEXT: retq # sched: [7:1.00]
513 ; SKYLAKE-SSE-LABEL: test_hsubpd:
514 ; SKYLAKE-SSE: # %bb.0:
515 ; SKYLAKE-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [6:2.00]
516 ; SKYLAKE-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [12:2.00]
517 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
519 ; SKYLAKE-LABEL: test_hsubpd:
521 ; SKYLAKE-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
522 ; SKYLAKE-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
523 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
525 ; SKX-SSE-LABEL: test_hsubpd:
527 ; SKX-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [6:2.00]
528 ; SKX-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [12:2.00]
529 ; SKX-SSE-NEXT: retq # sched: [7:1.00]
531 ; SKX-LABEL: test_hsubpd:
533 ; SKX-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
534 ; SKX-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
535 ; SKX-NEXT: retq # sched: [7:1.00]
537 ; BTVER2-SSE-LABEL: test_hsubpd:
538 ; BTVER2-SSE: # %bb.0:
539 ; BTVER2-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [3:1.00]
540 ; BTVER2-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [8:1.00]
541 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
543 ; BTVER2-LABEL: test_hsubpd:
545 ; BTVER2-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
546 ; BTVER2-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
547 ; BTVER2-NEXT: retq # sched: [4:1.00]
549 ; ZNVER1-SSE-LABEL: test_hsubpd:
550 ; ZNVER1-SSE: # %bb.0:
551 ; ZNVER1-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [100:0.25]
552 ; ZNVER1-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [100:0.25]
553 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
555 ; ZNVER1-LABEL: test_hsubpd:
557 ; ZNVER1-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
558 ; ZNVER1-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
559 ; ZNVER1-NEXT: retq # sched: [1:0.50]
560 %1 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1)
561 %2 = load <2 x double>, <2 x double> *%a2, align 16
562 %3 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %1, <2 x double> %2)
565 declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone
567 define <4 x float> @test_hsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
568 ; GENERIC-LABEL: test_hsubps:
570 ; GENERIC-NEXT: hsubps %xmm1, %xmm0 # sched: [5:2.00]
571 ; GENERIC-NEXT: hsubps (%rdi), %xmm0 # sched: [11:2.00]
572 ; GENERIC-NEXT: retq # sched: [1:1.00]
574 ; ATOM-LABEL: test_hsubps:
576 ; ATOM-NEXT: hsubps %xmm1, %xmm0 # sched: [8:4.00]
577 ; ATOM-NEXT: hsubps (%rdi), %xmm0 # sched: [9:4.50]
578 ; ATOM-NEXT: retq # sched: [79:39.50]
580 ; SLM-LABEL: test_hsubps:
582 ; SLM-NEXT: hsubps %xmm1, %xmm0 # sched: [3:1.00]
583 ; SLM-NEXT: hsubps (%rdi), %xmm0 # sched: [6:1.00]
584 ; SLM-NEXT: retq # sched: [4:1.00]
586 ; SANDY-SSE-LABEL: test_hsubps:
587 ; SANDY-SSE: # %bb.0:
588 ; SANDY-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [5:2.00]
589 ; SANDY-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [11:2.00]
590 ; SANDY-SSE-NEXT: retq # sched: [1:1.00]
592 ; SANDY-LABEL: test_hsubps:
594 ; SANDY-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
595 ; SANDY-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
596 ; SANDY-NEXT: retq # sched: [1:1.00]
598 ; HASWELL-SSE-LABEL: test_hsubps:
599 ; HASWELL-SSE: # %bb.0:
600 ; HASWELL-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [5:2.00]
601 ; HASWELL-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [11:2.00]
602 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
604 ; HASWELL-LABEL: test_hsubps:
606 ; HASWELL-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
607 ; HASWELL-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
608 ; HASWELL-NEXT: retq # sched: [7:1.00]
610 ; BROADWELL-SSE-LABEL: test_hsubps:
611 ; BROADWELL-SSE: # %bb.0:
612 ; BROADWELL-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [5:2.00]
613 ; BROADWELL-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [10:2.00]
614 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
616 ; BROADWELL-LABEL: test_hsubps:
617 ; BROADWELL: # %bb.0:
618 ; BROADWELL-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
619 ; BROADWELL-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
620 ; BROADWELL-NEXT: retq # sched: [7:1.00]
622 ; SKYLAKE-SSE-LABEL: test_hsubps:
623 ; SKYLAKE-SSE: # %bb.0:
624 ; SKYLAKE-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [6:2.00]
625 ; SKYLAKE-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [12:2.00]
626 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
628 ; SKYLAKE-LABEL: test_hsubps:
630 ; SKYLAKE-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
631 ; SKYLAKE-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
632 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
634 ; SKX-SSE-LABEL: test_hsubps:
636 ; SKX-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [6:2.00]
637 ; SKX-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [12:2.00]
638 ; SKX-SSE-NEXT: retq # sched: [7:1.00]
640 ; SKX-LABEL: test_hsubps:
642 ; SKX-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
643 ; SKX-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
644 ; SKX-NEXT: retq # sched: [7:1.00]
646 ; BTVER2-SSE-LABEL: test_hsubps:
647 ; BTVER2-SSE: # %bb.0:
648 ; BTVER2-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [3:1.00]
649 ; BTVER2-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [8:1.00]
650 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
652 ; BTVER2-LABEL: test_hsubps:
654 ; BTVER2-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
655 ; BTVER2-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
656 ; BTVER2-NEXT: retq # sched: [4:1.00]
658 ; ZNVER1-SSE-LABEL: test_hsubps:
659 ; ZNVER1-SSE: # %bb.0:
660 ; ZNVER1-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [100:0.25]
661 ; ZNVER1-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [100:0.25]
662 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
664 ; ZNVER1-LABEL: test_hsubps:
666 ; ZNVER1-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
667 ; ZNVER1-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
668 ; ZNVER1-NEXT: retq # sched: [1:0.50]
669 %1 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1)
670 %2 = load <4 x float>, <4 x float> *%a2, align 16
671 %3 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %1, <4 x float> %2)
674 declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
676 define <16 x i8> @test_lddqu(i8* %a0) {
677 ; GENERIC-LABEL: test_lddqu:
679 ; GENERIC-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50]
680 ; GENERIC-NEXT: retq # sched: [1:1.00]
682 ; ATOM-LABEL: test_lddqu:
684 ; ATOM-NEXT: lddqu (%rdi), %xmm0 # sched: [3:1.50]
685 ; ATOM-NEXT: nop # sched: [1:0.50]
686 ; ATOM-NEXT: nop # sched: [1:0.50]
687 ; ATOM-NEXT: retq # sched: [79:39.50]
689 ; SLM-LABEL: test_lddqu:
691 ; SLM-NEXT: lddqu (%rdi), %xmm0 # sched: [3:1.00]
692 ; SLM-NEXT: retq # sched: [4:1.00]
694 ; SANDY-SSE-LABEL: test_lddqu:
695 ; SANDY-SSE: # %bb.0:
696 ; SANDY-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50]
697 ; SANDY-SSE-NEXT: retq # sched: [1:1.00]
699 ; SANDY-LABEL: test_lddqu:
701 ; SANDY-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50]
702 ; SANDY-NEXT: retq # sched: [1:1.00]
704 ; HASWELL-SSE-LABEL: test_lddqu:
705 ; HASWELL-SSE: # %bb.0:
706 ; HASWELL-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50]
707 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
709 ; HASWELL-LABEL: test_lddqu:
711 ; HASWELL-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50]
712 ; HASWELL-NEXT: retq # sched: [7:1.00]
714 ; BROADWELL-SSE-LABEL: test_lddqu:
715 ; BROADWELL-SSE: # %bb.0:
716 ; BROADWELL-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [5:0.50]
717 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
719 ; BROADWELL-LABEL: test_lddqu:
720 ; BROADWELL: # %bb.0:
721 ; BROADWELL-NEXT: vlddqu (%rdi), %xmm0 # sched: [5:0.50]
722 ; BROADWELL-NEXT: retq # sched: [7:1.00]
724 ; SKYLAKE-SSE-LABEL: test_lddqu:
725 ; SKYLAKE-SSE: # %bb.0:
726 ; SKYLAKE-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50]
727 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
729 ; SKYLAKE-LABEL: test_lddqu:
731 ; SKYLAKE-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50]
732 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
734 ; SKX-SSE-LABEL: test_lddqu:
736 ; SKX-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50]
737 ; SKX-SSE-NEXT: retq # sched: [7:1.00]
739 ; SKX-LABEL: test_lddqu:
741 ; SKX-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50]
742 ; SKX-NEXT: retq # sched: [7:1.00]
744 ; BTVER2-SSE-LABEL: test_lddqu:
745 ; BTVER2-SSE: # %bb.0:
746 ; BTVER2-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [5:1.00]
747 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
749 ; BTVER2-LABEL: test_lddqu:
751 ; BTVER2-NEXT: vlddqu (%rdi), %xmm0 # sched: [5:1.00]
752 ; BTVER2-NEXT: retq # sched: [4:1.00]
754 ; ZNVER1-SSE-LABEL: test_lddqu:
755 ; ZNVER1-SSE: # %bb.0:
756 ; ZNVER1-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [8:0.50]
757 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
759 ; ZNVER1-LABEL: test_lddqu:
761 ; ZNVER1-NEXT: vlddqu (%rdi), %xmm0 # sched: [8:0.50]
762 ; ZNVER1-NEXT: retq # sched: [1:0.50]
763 %1 = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0)
766 declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly
768 define void @test_monitor(i8* %a0, i32 %a1, i32 %a2) {
769 ; GENERIC-LABEL: test_monitor:
771 ; GENERIC-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
772 ; GENERIC-NEXT: movl %esi, %ecx # sched: [1:0.33]
773 ; GENERIC-NEXT: monitor # sched: [100:0.33]
774 ; GENERIC-NEXT: retq # sched: [1:1.00]
776 ; ATOM-LABEL: test_monitor:
778 ; ATOM-NEXT: leaq (%rdi), %rax # sched: [1:1.00]
779 ; ATOM-NEXT: movl %esi, %ecx # sched: [1:0.50]
780 ; ATOM-NEXT: monitor # sched: [45:22.50]
781 ; ATOM-NEXT: retq # sched: [79:39.50]
783 ; SLM-LABEL: test_monitor:
785 ; SLM-NEXT: leaq (%rdi), %rax # sched: [1:1.00]
786 ; SLM-NEXT: movl %esi, %ecx # sched: [1:0.50]
787 ; SLM-NEXT: monitor # sched: [100:1.00]
788 ; SLM-NEXT: retq # sched: [4:1.00]
790 ; SANDY-SSE-LABEL: test_monitor:
791 ; SANDY-SSE: # %bb.0:
792 ; SANDY-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
793 ; SANDY-SSE-NEXT: movl %esi, %ecx # sched: [1:0.33]
794 ; SANDY-SSE-NEXT: monitor # sched: [100:0.33]
795 ; SANDY-SSE-NEXT: retq # sched: [1:1.00]
797 ; SANDY-LABEL: test_monitor:
799 ; SANDY-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
800 ; SANDY-NEXT: movl %esi, %ecx # sched: [1:0.33]
801 ; SANDY-NEXT: monitor # sched: [100:0.33]
802 ; SANDY-NEXT: retq # sched: [1:1.00]
804 ; HASWELL-SSE-LABEL: test_monitor:
805 ; HASWELL-SSE: # %bb.0:
806 ; HASWELL-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
807 ; HASWELL-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25]
808 ; HASWELL-SSE-NEXT: monitor # sched: [100:0.25]
809 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
811 ; HASWELL-LABEL: test_monitor:
813 ; HASWELL-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
814 ; HASWELL-NEXT: movl %esi, %ecx # sched: [1:0.25]
815 ; HASWELL-NEXT: monitor # sched: [100:0.25]
816 ; HASWELL-NEXT: retq # sched: [7:1.00]
818 ; BROADWELL-SSE-LABEL: test_monitor:
819 ; BROADWELL-SSE: # %bb.0:
820 ; BROADWELL-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
821 ; BROADWELL-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25]
822 ; BROADWELL-SSE-NEXT: monitor # sched: [100:0.25]
823 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
825 ; BROADWELL-LABEL: test_monitor:
826 ; BROADWELL: # %bb.0:
827 ; BROADWELL-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
828 ; BROADWELL-NEXT: movl %esi, %ecx # sched: [1:0.25]
829 ; BROADWELL-NEXT: monitor # sched: [100:0.25]
830 ; BROADWELL-NEXT: retq # sched: [7:1.00]
832 ; SKYLAKE-SSE-LABEL: test_monitor:
833 ; SKYLAKE-SSE: # %bb.0:
834 ; SKYLAKE-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
835 ; SKYLAKE-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25]
836 ; SKYLAKE-SSE-NEXT: monitor # sched: [100:0.25]
837 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
839 ; SKYLAKE-LABEL: test_monitor:
841 ; SKYLAKE-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
842 ; SKYLAKE-NEXT: movl %esi, %ecx # sched: [1:0.25]
843 ; SKYLAKE-NEXT: monitor # sched: [100:0.25]
844 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
846 ; SKX-SSE-LABEL: test_monitor:
848 ; SKX-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
849 ; SKX-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25]
850 ; SKX-SSE-NEXT: monitor # sched: [100:0.25]
851 ; SKX-SSE-NEXT: retq # sched: [7:1.00]
853 ; SKX-LABEL: test_monitor:
855 ; SKX-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
856 ; SKX-NEXT: movl %esi, %ecx # sched: [1:0.25]
857 ; SKX-NEXT: monitor # sched: [100:0.25]
858 ; SKX-NEXT: retq # sched: [7:1.00]
860 ; BTVER2-SSE-LABEL: test_monitor:
861 ; BTVER2-SSE: # %bb.0:
862 ; BTVER2-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
863 ; BTVER2-SSE-NEXT: movl %esi, %ecx # sched: [1:0.50]
864 ; BTVER2-SSE-NEXT: monitor # sched: [100:0.50]
865 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
867 ; BTVER2-LABEL: test_monitor:
869 ; BTVER2-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
870 ; BTVER2-NEXT: movl %esi, %ecx # sched: [1:0.50]
871 ; BTVER2-NEXT: monitor # sched: [100:0.50]
872 ; BTVER2-NEXT: retq # sched: [4:1.00]
874 ; ZNVER1-SSE-LABEL: test_monitor:
875 ; ZNVER1-SSE: # %bb.0:
876 ; ZNVER1-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.25]
877 ; ZNVER1-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25]
878 ; ZNVER1-SSE-NEXT: monitor # sched: [100:0.25]
879 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
881 ; ZNVER1-LABEL: test_monitor:
883 ; ZNVER1-NEXT: leaq (%rdi), %rax # sched: [1:0.25]
884 ; ZNVER1-NEXT: movl %esi, %ecx # sched: [1:0.25]
885 ; ZNVER1-NEXT: monitor # sched: [100:0.25]
886 ; ZNVER1-NEXT: retq # sched: [1:0.50]
887 tail call void @llvm.x86.sse3.monitor(i8* %a0, i32 %a1, i32 %a2)
890 declare void @llvm.x86.sse3.monitor(i8*, i32, i32)
892 define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) {
893 ; GENERIC-LABEL: test_movddup:
895 ; GENERIC-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
896 ; GENERIC-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:0.50]
897 ; GENERIC-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
898 ; GENERIC-NEXT: retq # sched: [1:1.00]
900 ; ATOM-LABEL: test_movddup:
902 ; ATOM-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
903 ; ATOM-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [1:1.00]
904 ; ATOM-NEXT: subpd %xmm1, %xmm0 # sched: [6:3.00]
905 ; ATOM-NEXT: retq # sched: [79:39.50]
907 ; SLM-LABEL: test_movddup:
909 ; SLM-NEXT: movddup {{.*#+}} xmm1 = mem[0,0] sched: [4:1.00]
910 ; SLM-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
911 ; SLM-NEXT: subpd %xmm0, %xmm1 # sched: [3:1.00]
912 ; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
913 ; SLM-NEXT: retq # sched: [4:1.00]
915 ; SANDY-SSE-LABEL: test_movddup:
916 ; SANDY-SSE: # %bb.0:
917 ; SANDY-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
918 ; SANDY-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:0.50]
919 ; SANDY-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
920 ; SANDY-SSE-NEXT: retq # sched: [1:1.00]
922 ; SANDY-LABEL: test_movddup:
924 ; SANDY-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
925 ; SANDY-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:0.50]
926 ; SANDY-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
927 ; SANDY-NEXT: retq # sched: [1:1.00]
929 ; HASWELL-SSE-LABEL: test_movddup:
930 ; HASWELL-SSE: # %bb.0:
931 ; HASWELL-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
932 ; HASWELL-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50]
933 ; HASWELL-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
934 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
936 ; HASWELL-LABEL: test_movddup:
938 ; HASWELL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
939 ; HASWELL-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50]
940 ; HASWELL-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
941 ; HASWELL-NEXT: retq # sched: [7:1.00]
943 ; BROADWELL-SSE-LABEL: test_movddup:
944 ; BROADWELL-SSE: # %bb.0:
945 ; BROADWELL-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
946 ; BROADWELL-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50]
947 ; BROADWELL-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
948 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
950 ; BROADWELL-LABEL: test_movddup:
951 ; BROADWELL: # %bb.0:
952 ; BROADWELL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
953 ; BROADWELL-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50]
954 ; BROADWELL-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
955 ; BROADWELL-NEXT: retq # sched: [7:1.00]
957 ; SKYLAKE-SSE-LABEL: test_movddup:
958 ; SKYLAKE-SSE: # %bb.0:
959 ; SKYLAKE-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
960 ; SKYLAKE-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50]
961 ; SKYLAKE-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.50]
962 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
964 ; SKYLAKE-LABEL: test_movddup:
966 ; SKYLAKE-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
967 ; SKYLAKE-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50]
968 ; SKYLAKE-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
969 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
971 ; SKX-SSE-LABEL: test_movddup:
973 ; SKX-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
974 ; SKX-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50]
975 ; SKX-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.50]
976 ; SKX-SSE-NEXT: retq # sched: [7:1.00]
978 ; SKX-LABEL: test_movddup:
980 ; SKX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
981 ; SKX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50]
982 ; SKX-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
983 ; SKX-NEXT: retq # sched: [7:1.00]
985 ; BTVER2-SSE-LABEL: test_movddup:
986 ; BTVER2-SSE: # %bb.0:
987 ; BTVER2-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:0.50]
988 ; BTVER2-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:1.00]
989 ; BTVER2-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
990 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
992 ; BTVER2-LABEL: test_movddup:
994 ; BTVER2-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:1.00]
995 ; BTVER2-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:0.50]
996 ; BTVER2-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
997 ; BTVER2-NEXT: retq # sched: [4:1.00]
999 ; ZNVER1-SSE-LABEL: test_movddup:
1000 ; ZNVER1-SSE: # %bb.0:
1001 ; ZNVER1-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:0.50]
1002 ; ZNVER1-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [8:0.50]
1003 ; ZNVER1-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
1004 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
1006 ; ZNVER1-LABEL: test_movddup:
1008 ; ZNVER1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [8:0.50]
1009 ; ZNVER1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:0.50]
1010 ; ZNVER1-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
1011 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1012 %1 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> zeroinitializer
1013 %2 = load <2 x double>, <2 x double> *%a1, align 16
1014 %3 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> zeroinitializer
1015 %4 = fsub <2 x double> %3, %1 ; Use fsub to stop the movddup from being folded as a broadcast load in avx512vl.
1019 define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) {
1020 ; GENERIC-LABEL: test_movshdup:
1022 ; GENERIC-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
1023 ; GENERIC-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
1024 ; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
1025 ; GENERIC-NEXT: retq # sched: [1:1.00]
1027 ; ATOM-LABEL: test_movshdup:
1029 ; ATOM-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
1030 ; ATOM-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [1:1.00]
1031 ; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
1032 ; ATOM-NEXT: retq # sched: [79:39.50]
1034 ; SLM-LABEL: test_movshdup:
1036 ; SLM-NEXT: movshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [4:1.00]
1037 ; SLM-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
1038 ; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
1039 ; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
1040 ; SLM-NEXT: retq # sched: [4:1.00]
1042 ; SANDY-SSE-LABEL: test_movshdup:
1043 ; SANDY-SSE: # %bb.0:
1044 ; SANDY-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
1045 ; SANDY-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
1046 ; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
1047 ; SANDY-SSE-NEXT: retq # sched: [1:1.00]
1049 ; SANDY-LABEL: test_movshdup:
1051 ; SANDY-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
1052 ; SANDY-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50]
1053 ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1054 ; SANDY-NEXT: retq # sched: [1:1.00]
1056 ; HASWELL-SSE-LABEL: test_movshdup:
1057 ; HASWELL-SSE: # %bb.0:
1058 ; HASWELL-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
1059 ; HASWELL-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
1060 ; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
1061 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
1063 ; HASWELL-LABEL: test_movshdup:
1065 ; HASWELL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
1066 ; HASWELL-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50]
1067 ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1068 ; HASWELL-NEXT: retq # sched: [7:1.00]
1070 ; BROADWELL-SSE-LABEL: test_movshdup:
1071 ; BROADWELL-SSE: # %bb.0:
1072 ; BROADWELL-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
1073 ; BROADWELL-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [5:0.50]
1074 ; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
1075 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
1077 ; BROADWELL-LABEL: test_movshdup:
1078 ; BROADWELL: # %bb.0:
1079 ; BROADWELL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
1080 ; BROADWELL-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [5:0.50]
1081 ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1082 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1084 ; SKYLAKE-SSE-LABEL: test_movshdup:
1085 ; SKYLAKE-SSE: # %bb.0:
1086 ; SKYLAKE-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
1087 ; SKYLAKE-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
1088 ; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
1089 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
1091 ; SKYLAKE-LABEL: test_movshdup:
1093 ; SKYLAKE-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
1094 ; SKYLAKE-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50]
1095 ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
1096 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1098 ; SKX-SSE-LABEL: test_movshdup:
1100 ; SKX-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
1101 ; SKX-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
1102 ; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
1103 ; SKX-SSE-NEXT: retq # sched: [7:1.00]
1105 ; SKX-LABEL: test_movshdup:
1107 ; SKX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
1108 ; SKX-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50]
1109 ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
1110 ; SKX-NEXT: retq # sched: [7:1.00]
1112 ; BTVER2-SSE-LABEL: test_movshdup:
1113 ; BTVER2-SSE: # %bb.0:
1114 ; BTVER2-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:0.50]
1115 ; BTVER2-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:1.00]
1116 ; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
1117 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
1119 ; BTVER2-LABEL: test_movshdup:
1121 ; BTVER2-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:1.00]
1122 ; BTVER2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:0.50]
1123 ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1124 ; BTVER2-NEXT: retq # sched: [4:1.00]
1126 ; ZNVER1-SSE-LABEL: test_movshdup:
1127 ; ZNVER1-SSE: # %bb.0:
1128 ; ZNVER1-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:0.50]
1129 ; ZNVER1-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [8:0.50]
1130 ; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
1131 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
1133 ; ZNVER1-LABEL: test_movshdup:
1135 ; ZNVER1-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [8:0.50]
1136 ; ZNVER1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:0.50]
1137 ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1138 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1139 %1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
1140 %2 = load <4 x float>, <4 x float> *%a1, align 16
1141 %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
1142 %4 = fadd <4 x float> %1, %3
1146 define <4 x float> @test_movsldup(<4 x float> %a0, <4 x float> *%a1) {
1147 ; GENERIC-LABEL: test_movsldup:
1149 ; GENERIC-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
1150 ; GENERIC-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
1151 ; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
1152 ; GENERIC-NEXT: retq # sched: [1:1.00]
1154 ; ATOM-LABEL: test_movsldup:
1156 ; ATOM-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
1157 ; ATOM-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [1:1.00]
1158 ; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
1159 ; ATOM-NEXT: retq # sched: [79:39.50]
1161 ; SLM-LABEL: test_movsldup:
1163 ; SLM-NEXT: movsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [4:1.00]
1164 ; SLM-NEXT: movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
1165 ; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
1166 ; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
1167 ; SLM-NEXT: retq # sched: [4:1.00]
1169 ; SANDY-SSE-LABEL: test_movsldup:
1170 ; SANDY-SSE: # %bb.0:
1171 ; SANDY-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
1172 ; SANDY-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
1173 ; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
1174 ; SANDY-SSE-NEXT: retq # sched: [1:1.00]
1176 ; SANDY-LABEL: test_movsldup:
1178 ; SANDY-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
1179 ; SANDY-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50]
1180 ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1181 ; SANDY-NEXT: retq # sched: [1:1.00]
1183 ; HASWELL-SSE-LABEL: test_movsldup:
1184 ; HASWELL-SSE: # %bb.0:
1185 ; HASWELL-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
1186 ; HASWELL-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
1187 ; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
1188 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
1190 ; HASWELL-LABEL: test_movsldup:
1192 ; HASWELL-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
1193 ; HASWELL-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50]
1194 ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1195 ; HASWELL-NEXT: retq # sched: [7:1.00]
1197 ; BROADWELL-SSE-LABEL: test_movsldup:
1198 ; BROADWELL-SSE: # %bb.0:
1199 ; BROADWELL-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
1200 ; BROADWELL-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [5:0.50]
1201 ; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
1202 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
1204 ; BROADWELL-LABEL: test_movsldup:
1205 ; BROADWELL: # %bb.0:
1206 ; BROADWELL-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
1207 ; BROADWELL-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [5:0.50]
1208 ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1209 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1211 ; SKYLAKE-SSE-LABEL: test_movsldup:
1212 ; SKYLAKE-SSE: # %bb.0:
1213 ; SKYLAKE-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
1214 ; SKYLAKE-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
1215 ; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
1216 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
1218 ; SKYLAKE-LABEL: test_movsldup:
1220 ; SKYLAKE-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
1221 ; SKYLAKE-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50]
1222 ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
1223 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1225 ; SKX-SSE-LABEL: test_movsldup:
1227 ; SKX-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
1228 ; SKX-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
1229 ; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
1230 ; SKX-SSE-NEXT: retq # sched: [7:1.00]
1232 ; SKX-LABEL: test_movsldup:
1234 ; SKX-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
1235 ; SKX-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50]
1236 ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
1237 ; SKX-NEXT: retq # sched: [7:1.00]
1239 ; BTVER2-SSE-LABEL: test_movsldup:
1240 ; BTVER2-SSE: # %bb.0:
1241 ; BTVER2-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:0.50]
1242 ; BTVER2-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:1.00]
1243 ; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
1244 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
1246 ; BTVER2-LABEL: test_movsldup:
1248 ; BTVER2-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:1.00]
1249 ; BTVER2-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:0.50]
1250 ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1251 ; BTVER2-NEXT: retq # sched: [4:1.00]
1253 ; ZNVER1-SSE-LABEL: test_movsldup:
1254 ; ZNVER1-SSE: # %bb.0:
1255 ; ZNVER1-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [100:0.25]
1256 ; ZNVER1-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [100:0.25]
1257 ; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
1258 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
1260 ; ZNVER1-LABEL: test_movsldup:
1262 ; ZNVER1-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [8:0.50]
1263 ; ZNVER1-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:0.50]
1264 ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1265 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1266 %1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
1267 %2 = load <4 x float>, <4 x float> *%a1, align 16
1268 %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
1269 %4 = fadd <4 x float> %1, %3
1273 define void @test_mwait(i32 %a0, i32 %a1) {
1274 ; GENERIC-LABEL: test_mwait:
1276 ; GENERIC-NEXT: movl %edi, %ecx # sched: [1:0.33]
1277 ; GENERIC-NEXT: movl %esi, %eax # sched: [1:0.33]
1278 ; GENERIC-NEXT: mwait # sched: [100:0.33]
1279 ; GENERIC-NEXT: retq # sched: [1:1.00]
1281 ; ATOM-LABEL: test_mwait:
1283 ; ATOM-NEXT: movl %edi, %ecx # sched: [1:0.50]
1284 ; ATOM-NEXT: movl %esi, %eax # sched: [1:0.50]
1285 ; ATOM-NEXT: mwait # sched: [46:23.00]
1286 ; ATOM-NEXT: retq # sched: [79:39.50]
1288 ; SLM-LABEL: test_mwait:
1290 ; SLM-NEXT: movl %edi, %ecx # sched: [1:0.50]
1291 ; SLM-NEXT: movl %esi, %eax # sched: [1:0.50]
1292 ; SLM-NEXT: mwait # sched: [100:1.00]
1293 ; SLM-NEXT: retq # sched: [4:1.00]
1295 ; SANDY-SSE-LABEL: test_mwait:
1296 ; SANDY-SSE: # %bb.0:
1297 ; SANDY-SSE-NEXT: movl %edi, %ecx # sched: [1:0.33]
1298 ; SANDY-SSE-NEXT: movl %esi, %eax # sched: [1:0.33]
1299 ; SANDY-SSE-NEXT: mwait # sched: [100:0.33]
1300 ; SANDY-SSE-NEXT: retq # sched: [1:1.00]
1302 ; SANDY-LABEL: test_mwait:
1304 ; SANDY-NEXT: movl %edi, %ecx # sched: [1:0.33]
1305 ; SANDY-NEXT: movl %esi, %eax # sched: [1:0.33]
1306 ; SANDY-NEXT: mwait # sched: [100:0.33]
1307 ; SANDY-NEXT: retq # sched: [1:1.00]
1309 ; HASWELL-SSE-LABEL: test_mwait:
1310 ; HASWELL-SSE: # %bb.0:
1311 ; HASWELL-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25]
1312 ; HASWELL-SSE-NEXT: movl %esi, %eax # sched: [1:0.25]
1313 ; HASWELL-SSE-NEXT: mwait # sched: [20:2.50]
1314 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
1316 ; HASWELL-LABEL: test_mwait:
1318 ; HASWELL-NEXT: movl %edi, %ecx # sched: [1:0.25]
1319 ; HASWELL-NEXT: movl %esi, %eax # sched: [1:0.25]
1320 ; HASWELL-NEXT: mwait # sched: [20:2.50]
1321 ; HASWELL-NEXT: retq # sched: [7:1.00]
1323 ; BROADWELL-SSE-LABEL: test_mwait:
1324 ; BROADWELL-SSE: # %bb.0:
1325 ; BROADWELL-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25]
1326 ; BROADWELL-SSE-NEXT: movl %esi, %eax # sched: [1:0.25]
1327 ; BROADWELL-SSE-NEXT: mwait # sched: [100:0.25]
1328 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
1330 ; BROADWELL-LABEL: test_mwait:
1331 ; BROADWELL: # %bb.0:
1332 ; BROADWELL-NEXT: movl %edi, %ecx # sched: [1:0.25]
1333 ; BROADWELL-NEXT: movl %esi, %eax # sched: [1:0.25]
1334 ; BROADWELL-NEXT: mwait # sched: [100:0.25]
1335 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1337 ; SKYLAKE-SSE-LABEL: test_mwait:
1338 ; SKYLAKE-SSE: # %bb.0:
1339 ; SKYLAKE-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25]
1340 ; SKYLAKE-SSE-NEXT: movl %esi, %eax # sched: [1:0.25]
1341 ; SKYLAKE-SSE-NEXT: mwait # sched: [20:2.50]
1342 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
1344 ; SKYLAKE-LABEL: test_mwait:
1346 ; SKYLAKE-NEXT: movl %edi, %ecx # sched: [1:0.25]
1347 ; SKYLAKE-NEXT: movl %esi, %eax # sched: [1:0.25]
1348 ; SKYLAKE-NEXT: mwait # sched: [20:2.50]
1349 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1351 ; SKX-SSE-LABEL: test_mwait:
1353 ; SKX-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25]
1354 ; SKX-SSE-NEXT: movl %esi, %eax # sched: [1:0.25]
1355 ; SKX-SSE-NEXT: mwait # sched: [20:2.50]
1356 ; SKX-SSE-NEXT: retq # sched: [7:1.00]
1358 ; SKX-LABEL: test_mwait:
1360 ; SKX-NEXT: movl %edi, %ecx # sched: [1:0.25]
1361 ; SKX-NEXT: movl %esi, %eax # sched: [1:0.25]
1362 ; SKX-NEXT: mwait # sched: [20:2.50]
1363 ; SKX-NEXT: retq # sched: [7:1.00]
1365 ; BTVER2-SSE-LABEL: test_mwait:
1366 ; BTVER2-SSE: # %bb.0:
1367 ; BTVER2-SSE-NEXT: movl %edi, %ecx # sched: [1:0.50]
1368 ; BTVER2-SSE-NEXT: movl %esi, %eax # sched: [1:0.50]
1369 ; BTVER2-SSE-NEXT: mwait # sched: [100:0.50]
1370 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
1372 ; BTVER2-LABEL: test_mwait:
1374 ; BTVER2-NEXT: movl %edi, %ecx # sched: [1:0.50]
1375 ; BTVER2-NEXT: movl %esi, %eax # sched: [1:0.50]
1376 ; BTVER2-NEXT: mwait # sched: [100:0.50]
1377 ; BTVER2-NEXT: retq # sched: [4:1.00]
1379 ; ZNVER1-SSE-LABEL: test_mwait:
1380 ; ZNVER1-SSE: # %bb.0:
1381 ; ZNVER1-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25]
1382 ; ZNVER1-SSE-NEXT: movl %esi, %eax # sched: [1:0.25]
1383 ; ZNVER1-SSE-NEXT: mwait # sched: [100:0.25]
1384 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
1386 ; ZNVER1-LABEL: test_mwait:
1388 ; ZNVER1-NEXT: movl %edi, %ecx # sched: [1:0.25]
1389 ; ZNVER1-NEXT: movl %esi, %eax # sched: [1:0.25]
1390 ; ZNVER1-NEXT: mwait # sched: [100:0.25]
1391 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1392 tail call void @llvm.x86.sse3.mwait(i32 %a0, i32 %a1)
1395 declare void @llvm.x86.sse3.mwait(i32, i32)