]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - test/CodeGen/X86/sse3-schedule.ll
Vendor import of llvm trunk r351319 (just before the release_80 branch
[FreeBSD/FreeBSD.git] / test / CodeGen / X86 / sse3-schedule.ll
1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefixes=CHECK,GENERIC
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,ATOM
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SLM
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,SANDY
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,SANDY
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE
10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,HASWELL
11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE
12 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,BROADWELL
13 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE
14 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,SKYLAKE
15 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
16 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,SKX
17 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE
18 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,BDVER2
19 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
20 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,BTVER2
21 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
22 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,ZNVER1
23
24 define <2 x double> @test_addsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
25 ; GENERIC-LABEL: test_addsubpd:
26 ; GENERIC:       # %bb.0:
27 ; GENERIC-NEXT:    addsubpd %xmm1, %xmm0 # sched: [3:1.00]
28 ; GENERIC-NEXT:    addsubpd (%rdi), %xmm0 # sched: [9:1.00]
29 ; GENERIC-NEXT:    retq # sched: [1:1.00]
30 ;
31 ; ATOM-LABEL: test_addsubpd:
32 ; ATOM:       # %bb.0:
33 ; ATOM-NEXT:    addsubpd %xmm1, %xmm0 # sched: [6:3.00]
34 ; ATOM-NEXT:    addsubpd (%rdi), %xmm0 # sched: [7:3.50]
35 ; ATOM-NEXT:    retq # sched: [79:39.50]
36 ;
37 ; SLM-LABEL: test_addsubpd:
38 ; SLM:       # %bb.0:
39 ; SLM-NEXT:    addsubpd %xmm1, %xmm0 # sched: [3:1.00]
40 ; SLM-NEXT:    addsubpd (%rdi), %xmm0 # sched: [6:1.00]
41 ; SLM-NEXT:    retq # sched: [4:1.00]
42 ;
43 ; SANDY-SSE-LABEL: test_addsubpd:
44 ; SANDY-SSE:       # %bb.0:
45 ; SANDY-SSE-NEXT:    addsubpd %xmm1, %xmm0 # sched: [3:1.00]
46 ; SANDY-SSE-NEXT:    addsubpd (%rdi), %xmm0 # sched: [9:1.00]
47 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
48 ;
49 ; SANDY-LABEL: test_addsubpd:
50 ; SANDY:       # %bb.0:
51 ; SANDY-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
52 ; SANDY-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
53 ; SANDY-NEXT:    retq # sched: [1:1.00]
54 ;
55 ; HASWELL-SSE-LABEL: test_addsubpd:
56 ; HASWELL-SSE:       # %bb.0:
57 ; HASWELL-SSE-NEXT:    addsubpd %xmm1, %xmm0 # sched: [3:1.00]
58 ; HASWELL-SSE-NEXT:    addsubpd (%rdi), %xmm0 # sched: [9:1.00]
59 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
60 ;
61 ; HASWELL-LABEL: test_addsubpd:
62 ; HASWELL:       # %bb.0:
63 ; HASWELL-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
64 ; HASWELL-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
65 ; HASWELL-NEXT:    retq # sched: [7:1.00]
66 ;
67 ; BROADWELL-SSE-LABEL: test_addsubpd:
68 ; BROADWELL-SSE:       # %bb.0:
69 ; BROADWELL-SSE-NEXT:    addsubpd %xmm1, %xmm0 # sched: [3:1.00]
70 ; BROADWELL-SSE-NEXT:    addsubpd (%rdi), %xmm0 # sched: [8:1.00]
71 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
72 ;
73 ; BROADWELL-LABEL: test_addsubpd:
74 ; BROADWELL:       # %bb.0:
75 ; BROADWELL-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
76 ; BROADWELL-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
77 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
78 ;
79 ; SKYLAKE-SSE-LABEL: test_addsubpd:
80 ; SKYLAKE-SSE:       # %bb.0:
81 ; SKYLAKE-SSE-NEXT:    addsubpd %xmm1, %xmm0 # sched: [4:0.50]
82 ; SKYLAKE-SSE-NEXT:    addsubpd (%rdi), %xmm0 # sched: [10:0.50]
83 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
84 ;
85 ; SKYLAKE-LABEL: test_addsubpd:
86 ; SKYLAKE:       # %bb.0:
87 ; SKYLAKE-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
88 ; SKYLAKE-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
89 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
90 ;
91 ; SKX-SSE-LABEL: test_addsubpd:
92 ; SKX-SSE:       # %bb.0:
93 ; SKX-SSE-NEXT:    addsubpd %xmm1, %xmm0 # sched: [4:0.50]
94 ; SKX-SSE-NEXT:    addsubpd (%rdi), %xmm0 # sched: [10:0.50]
95 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
96 ;
97 ; SKX-LABEL: test_addsubpd:
98 ; SKX:       # %bb.0:
99 ; SKX-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
100 ; SKX-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
101 ; SKX-NEXT:    retq # sched: [7:1.00]
102 ;
103 ; BDVER2-SSE-LABEL: test_addsubpd:
104 ; BDVER2-SSE:       # %bb.0:
105 ; BDVER2-SSE-NEXT:    addsubpd %xmm1, %xmm0 # sched: [5:1.00]
106 ; BDVER2-SSE-NEXT:    addsubpd (%rdi), %xmm0 # sched: [10:1.00]
107 ; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
108 ;
109 ; BDVER2-LABEL: test_addsubpd:
110 ; BDVER2:       # %bb.0:
111 ; BDVER2-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
112 ; BDVER2-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
113 ; BDVER2-NEXT:    retq # sched: [5:1.00]
114 ;
115 ; BTVER2-SSE-LABEL: test_addsubpd:
116 ; BTVER2-SSE:       # %bb.0:
117 ; BTVER2-SSE-NEXT:    addsubpd %xmm1, %xmm0 # sched: [3:1.00]
118 ; BTVER2-SSE-NEXT:    addsubpd (%rdi), %xmm0 # sched: [8:1.00]
119 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
120 ;
121 ; BTVER2-LABEL: test_addsubpd:
122 ; BTVER2:       # %bb.0:
123 ; BTVER2-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
124 ; BTVER2-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
125 ; BTVER2-NEXT:    retq # sched: [4:1.00]
126 ;
127 ; ZNVER1-SSE-LABEL: test_addsubpd:
128 ; ZNVER1-SSE:       # %bb.0:
129 ; ZNVER1-SSE-NEXT:    addsubpd %xmm1, %xmm0 # sched: [3:1.00]
130 ; ZNVER1-SSE-NEXT:    addsubpd (%rdi), %xmm0 # sched: [10:1.00]
131 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
132 ;
133 ; ZNVER1-LABEL: test_addsubpd:
134 ; ZNVER1:       # %bb.0:
135 ; ZNVER1-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
136 ; ZNVER1-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
137 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
138   %1 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1)
139   %2 = load <2 x double>, <2 x double> *%a2, align 16
140   %3 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %1, <2 x double> %2)
141   ret <2 x double> %3
142 }
143 declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone
144
145 define <4 x float> @test_addsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
146 ; GENERIC-LABEL: test_addsubps:
147 ; GENERIC:       # %bb.0:
148 ; GENERIC-NEXT:    addsubps %xmm1, %xmm0 # sched: [3:1.00]
149 ; GENERIC-NEXT:    addsubps (%rdi), %xmm0 # sched: [9:1.00]
150 ; GENERIC-NEXT:    retq # sched: [1:1.00]
151 ;
152 ; ATOM-LABEL: test_addsubps:
153 ; ATOM:       # %bb.0:
154 ; ATOM-NEXT:    addsubps %xmm1, %xmm0 # sched: [5:5.00]
155 ; ATOM-NEXT:    addsubps (%rdi), %xmm0 # sched: [5:5.00]
156 ; ATOM-NEXT:    retq # sched: [79:39.50]
157 ;
158 ; SLM-LABEL: test_addsubps:
159 ; SLM:       # %bb.0:
160 ; SLM-NEXT:    addsubps %xmm1, %xmm0 # sched: [3:1.00]
161 ; SLM-NEXT:    addsubps (%rdi), %xmm0 # sched: [6:1.00]
162 ; SLM-NEXT:    retq # sched: [4:1.00]
163 ;
164 ; SANDY-SSE-LABEL: test_addsubps:
165 ; SANDY-SSE:       # %bb.0:
166 ; SANDY-SSE-NEXT:    addsubps %xmm1, %xmm0 # sched: [3:1.00]
167 ; SANDY-SSE-NEXT:    addsubps (%rdi), %xmm0 # sched: [9:1.00]
168 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
169 ;
170 ; SANDY-LABEL: test_addsubps:
171 ; SANDY:       # %bb.0:
172 ; SANDY-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
173 ; SANDY-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
174 ; SANDY-NEXT:    retq # sched: [1:1.00]
175 ;
176 ; HASWELL-SSE-LABEL: test_addsubps:
177 ; HASWELL-SSE:       # %bb.0:
178 ; HASWELL-SSE-NEXT:    addsubps %xmm1, %xmm0 # sched: [3:1.00]
179 ; HASWELL-SSE-NEXT:    addsubps (%rdi), %xmm0 # sched: [9:1.00]
180 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
181 ;
182 ; HASWELL-LABEL: test_addsubps:
183 ; HASWELL:       # %bb.0:
184 ; HASWELL-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
185 ; HASWELL-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
186 ; HASWELL-NEXT:    retq # sched: [7:1.00]
187 ;
188 ; BROADWELL-SSE-LABEL: test_addsubps:
189 ; BROADWELL-SSE:       # %bb.0:
190 ; BROADWELL-SSE-NEXT:    addsubps %xmm1, %xmm0 # sched: [3:1.00]
191 ; BROADWELL-SSE-NEXT:    addsubps (%rdi), %xmm0 # sched: [8:1.00]
192 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
193 ;
194 ; BROADWELL-LABEL: test_addsubps:
195 ; BROADWELL:       # %bb.0:
196 ; BROADWELL-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
197 ; BROADWELL-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
198 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
199 ;
200 ; SKYLAKE-SSE-LABEL: test_addsubps:
201 ; SKYLAKE-SSE:       # %bb.0:
202 ; SKYLAKE-SSE-NEXT:    addsubps %xmm1, %xmm0 # sched: [4:0.50]
203 ; SKYLAKE-SSE-NEXT:    addsubps (%rdi), %xmm0 # sched: [10:0.50]
204 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
205 ;
206 ; SKYLAKE-LABEL: test_addsubps:
207 ; SKYLAKE:       # %bb.0:
208 ; SKYLAKE-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
209 ; SKYLAKE-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
210 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
211 ;
212 ; SKX-SSE-LABEL: test_addsubps:
213 ; SKX-SSE:       # %bb.0:
214 ; SKX-SSE-NEXT:    addsubps %xmm1, %xmm0 # sched: [4:0.50]
215 ; SKX-SSE-NEXT:    addsubps (%rdi), %xmm0 # sched: [10:0.50]
216 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
217 ;
218 ; SKX-LABEL: test_addsubps:
219 ; SKX:       # %bb.0:
220 ; SKX-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
221 ; SKX-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
222 ; SKX-NEXT:    retq # sched: [7:1.00]
223 ;
224 ; BDVER2-SSE-LABEL: test_addsubps:
225 ; BDVER2-SSE:       # %bb.0:
226 ; BDVER2-SSE-NEXT:    addsubps %xmm1, %xmm0 # sched: [5:1.00]
227 ; BDVER2-SSE-NEXT:    addsubps (%rdi), %xmm0 # sched: [10:1.00]
228 ; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
229 ;
230 ; BDVER2-LABEL: test_addsubps:
231 ; BDVER2:       # %bb.0:
232 ; BDVER2-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
233 ; BDVER2-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
234 ; BDVER2-NEXT:    retq # sched: [5:1.00]
235 ;
236 ; BTVER2-SSE-LABEL: test_addsubps:
237 ; BTVER2-SSE:       # %bb.0:
238 ; BTVER2-SSE-NEXT:    addsubps %xmm1, %xmm0 # sched: [3:1.00]
239 ; BTVER2-SSE-NEXT:    addsubps (%rdi), %xmm0 # sched: [8:1.00]
240 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
241 ;
242 ; BTVER2-LABEL: test_addsubps:
243 ; BTVER2:       # %bb.0:
244 ; BTVER2-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
245 ; BTVER2-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
246 ; BTVER2-NEXT:    retq # sched: [4:1.00]
247 ;
248 ; ZNVER1-SSE-LABEL: test_addsubps:
249 ; ZNVER1-SSE:       # %bb.0:
250 ; ZNVER1-SSE-NEXT:    addsubps %xmm1, %xmm0 # sched: [3:1.00]
251 ; ZNVER1-SSE-NEXT:    addsubps (%rdi), %xmm0 # sched: [10:1.00]
252 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
253 ;
254 ; ZNVER1-LABEL: test_addsubps:
255 ; ZNVER1:       # %bb.0:
256 ; ZNVER1-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
257 ; ZNVER1-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
258 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
259   %1 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1)
260   %2 = load <4 x float>, <4 x float> *%a2, align 16
261   %3 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %1, <4 x float> %2)
262   ret <4 x float> %3
263 }
264 declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone
265
266 define <2 x double> @test_haddpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
267 ; GENERIC-LABEL: test_haddpd:
268 ; GENERIC:       # %bb.0:
269 ; GENERIC-NEXT:    haddpd %xmm1, %xmm0 # sched: [5:2.00]
270 ; GENERIC-NEXT:    haddpd (%rdi), %xmm0 # sched: [11:2.00]
271 ; GENERIC-NEXT:    retq # sched: [1:1.00]
272 ;
273 ; ATOM-LABEL: test_haddpd:
274 ; ATOM:       # %bb.0:
275 ; ATOM-NEXT:    haddpd %xmm1, %xmm0 # sched: [8:4.00]
276 ; ATOM-NEXT:    haddpd (%rdi), %xmm0 # sched: [9:4.50]
277 ; ATOM-NEXT:    retq # sched: [79:39.50]
278 ;
279 ; SLM-LABEL: test_haddpd:
280 ; SLM:       # %bb.0:
281 ; SLM-NEXT:    haddpd %xmm1, %xmm0 # sched: [3:1.00]
282 ; SLM-NEXT:    haddpd (%rdi), %xmm0 # sched: [6:1.00]
283 ; SLM-NEXT:    retq # sched: [4:1.00]
284 ;
285 ; SANDY-SSE-LABEL: test_haddpd:
286 ; SANDY-SSE:       # %bb.0:
287 ; SANDY-SSE-NEXT:    haddpd %xmm1, %xmm0 # sched: [5:2.00]
288 ; SANDY-SSE-NEXT:    haddpd (%rdi), %xmm0 # sched: [11:2.00]
289 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
290 ;
291 ; SANDY-LABEL: test_haddpd:
292 ; SANDY:       # %bb.0:
293 ; SANDY-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
294 ; SANDY-NEXT:    vhaddpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
295 ; SANDY-NEXT:    retq # sched: [1:1.00]
296 ;
297 ; HASWELL-SSE-LABEL: test_haddpd:
298 ; HASWELL-SSE:       # %bb.0:
299 ; HASWELL-SSE-NEXT:    haddpd %xmm1, %xmm0 # sched: [5:2.00]
300 ; HASWELL-SSE-NEXT:    haddpd (%rdi), %xmm0 # sched: [11:2.00]
301 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
302 ;
303 ; HASWELL-LABEL: test_haddpd:
304 ; HASWELL:       # %bb.0:
305 ; HASWELL-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
306 ; HASWELL-NEXT:    vhaddpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
307 ; HASWELL-NEXT:    retq # sched: [7:1.00]
308 ;
309 ; BROADWELL-SSE-LABEL: test_haddpd:
310 ; BROADWELL-SSE:       # %bb.0:
311 ; BROADWELL-SSE-NEXT:    haddpd %xmm1, %xmm0 # sched: [5:2.00]
312 ; BROADWELL-SSE-NEXT:    haddpd (%rdi), %xmm0 # sched: [10:2.00]
313 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
314 ;
315 ; BROADWELL-LABEL: test_haddpd:
316 ; BROADWELL:       # %bb.0:
317 ; BROADWELL-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
318 ; BROADWELL-NEXT:    vhaddpd (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
319 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
320 ;
321 ; SKYLAKE-SSE-LABEL: test_haddpd:
322 ; SKYLAKE-SSE:       # %bb.0:
323 ; SKYLAKE-SSE-NEXT:    haddpd %xmm1, %xmm0 # sched: [6:2.00]
324 ; SKYLAKE-SSE-NEXT:    haddpd (%rdi), %xmm0 # sched: [12:2.00]
325 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
326 ;
327 ; SKYLAKE-LABEL: test_haddpd:
328 ; SKYLAKE:       # %bb.0:
329 ; SKYLAKE-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
330 ; SKYLAKE-NEXT:    vhaddpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
331 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
332 ;
333 ; SKX-SSE-LABEL: test_haddpd:
334 ; SKX-SSE:       # %bb.0:
335 ; SKX-SSE-NEXT:    haddpd %xmm1, %xmm0 # sched: [6:2.00]
336 ; SKX-SSE-NEXT:    haddpd (%rdi), %xmm0 # sched: [12:2.00]
337 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
338 ;
339 ; SKX-LABEL: test_haddpd:
340 ; SKX:       # %bb.0:
341 ; SKX-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
342 ; SKX-NEXT:    vhaddpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
343 ; SKX-NEXT:    retq # sched: [7:1.00]
344 ;
345 ; BDVER2-SSE-LABEL: test_haddpd:
346 ; BDVER2-SSE:       # %bb.0:
347 ; BDVER2-SSE-NEXT:    haddpd %xmm1, %xmm0 # sched: [11:1.00]
348 ; BDVER2-SSE-NEXT:    haddpd (%rdi), %xmm0 # sched: [16:1.00]
349 ; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
350 ;
351 ; BDVER2-LABEL: test_haddpd:
352 ; BDVER2:       # %bb.0:
353 ; BDVER2-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
354 ; BDVER2-NEXT:    vhaddpd (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
355 ; BDVER2-NEXT:    retq # sched: [5:1.00]
356 ;
357 ; BTVER2-SSE-LABEL: test_haddpd:
358 ; BTVER2-SSE:       # %bb.0:
359 ; BTVER2-SSE-NEXT:    haddpd %xmm1, %xmm0 # sched: [3:1.00]
360 ; BTVER2-SSE-NEXT:    haddpd (%rdi), %xmm0 # sched: [8:1.00]
361 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
362 ;
363 ; BTVER2-LABEL: test_haddpd:
364 ; BTVER2:       # %bb.0:
365 ; BTVER2-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
366 ; BTVER2-NEXT:    vhaddpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
367 ; BTVER2-NEXT:    retq # sched: [4:1.00]
368 ;
369 ; ZNVER1-SSE-LABEL: test_haddpd:
370 ; ZNVER1-SSE:       # %bb.0:
371 ; ZNVER1-SSE-NEXT:    haddpd %xmm1, %xmm0 # sched: [100:0.25]
372 ; ZNVER1-SSE-NEXT:    haddpd (%rdi), %xmm0 # sched: [100:0.25]
373 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
374 ;
375 ; ZNVER1-LABEL: test_haddpd:
376 ; ZNVER1:       # %bb.0:
377 ; ZNVER1-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
378 ; ZNVER1-NEXT:    vhaddpd (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
379 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
380   %1 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1)
381   %2 = load <2 x double>, <2 x double> *%a2, align 16
382   %3 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %1, <2 x double> %2)
383   ret <2 x double> %3
384 }
385 declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone
386
387 define <4 x float> @test_haddps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
388 ; GENERIC-LABEL: test_haddps:
389 ; GENERIC:       # %bb.0:
390 ; GENERIC-NEXT:    haddps %xmm1, %xmm0 # sched: [5:2.00]
391 ; GENERIC-NEXT:    haddps (%rdi), %xmm0 # sched: [11:2.00]
392 ; GENERIC-NEXT:    retq # sched: [1:1.00]
393 ;
394 ; ATOM-LABEL: test_haddps:
395 ; ATOM:       # %bb.0:
396 ; ATOM-NEXT:    haddps %xmm1, %xmm0 # sched: [8:4.00]
397 ; ATOM-NEXT:    haddps (%rdi), %xmm0 # sched: [9:4.50]
398 ; ATOM-NEXT:    retq # sched: [79:39.50]
399 ;
400 ; SLM-LABEL: test_haddps:
401 ; SLM:       # %bb.0:
402 ; SLM-NEXT:    haddps %xmm1, %xmm0 # sched: [3:1.00]
403 ; SLM-NEXT:    haddps (%rdi), %xmm0 # sched: [6:1.00]
404 ; SLM-NEXT:    retq # sched: [4:1.00]
405 ;
406 ; SANDY-SSE-LABEL: test_haddps:
407 ; SANDY-SSE:       # %bb.0:
408 ; SANDY-SSE-NEXT:    haddps %xmm1, %xmm0 # sched: [5:2.00]
409 ; SANDY-SSE-NEXT:    haddps (%rdi), %xmm0 # sched: [11:2.00]
410 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
411 ;
412 ; SANDY-LABEL: test_haddps:
413 ; SANDY:       # %bb.0:
414 ; SANDY-NEXT:    vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
415 ; SANDY-NEXT:    vhaddps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
416 ; SANDY-NEXT:    retq # sched: [1:1.00]
417 ;
418 ; HASWELL-SSE-LABEL: test_haddps:
419 ; HASWELL-SSE:       # %bb.0:
420 ; HASWELL-SSE-NEXT:    haddps %xmm1, %xmm0 # sched: [5:2.00]
421 ; HASWELL-SSE-NEXT:    haddps (%rdi), %xmm0 # sched: [11:2.00]
422 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
423 ;
424 ; HASWELL-LABEL: test_haddps:
425 ; HASWELL:       # %bb.0:
426 ; HASWELL-NEXT:    vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
427 ; HASWELL-NEXT:    vhaddps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
428 ; HASWELL-NEXT:    retq # sched: [7:1.00]
429 ;
430 ; BROADWELL-SSE-LABEL: test_haddps:
431 ; BROADWELL-SSE:       # %bb.0:
432 ; BROADWELL-SSE-NEXT:    haddps %xmm1, %xmm0 # sched: [5:2.00]
433 ; BROADWELL-SSE-NEXT:    haddps (%rdi), %xmm0 # sched: [10:2.00]
434 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
435 ;
436 ; BROADWELL-LABEL: test_haddps:
437 ; BROADWELL:       # %bb.0:
438 ; BROADWELL-NEXT:    vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
439 ; BROADWELL-NEXT:    vhaddps (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
440 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
441 ;
442 ; SKYLAKE-SSE-LABEL: test_haddps:
443 ; SKYLAKE-SSE:       # %bb.0:
444 ; SKYLAKE-SSE-NEXT:    haddps %xmm1, %xmm0 # sched: [6:2.00]
445 ; SKYLAKE-SSE-NEXT:    haddps (%rdi), %xmm0 # sched: [12:2.00]
446 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
447 ;
448 ; SKYLAKE-LABEL: test_haddps:
449 ; SKYLAKE:       # %bb.0:
450 ; SKYLAKE-NEXT:    vhaddps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
451 ; SKYLAKE-NEXT:    vhaddps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
452 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
453 ;
454 ; SKX-SSE-LABEL: test_haddps:
455 ; SKX-SSE:       # %bb.0:
456 ; SKX-SSE-NEXT:    haddps %xmm1, %xmm0 # sched: [6:2.00]
457 ; SKX-SSE-NEXT:    haddps (%rdi), %xmm0 # sched: [12:2.00]
458 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
459 ;
460 ; SKX-LABEL: test_haddps:
461 ; SKX:       # %bb.0:
462 ; SKX-NEXT:    vhaddps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
463 ; SKX-NEXT:    vhaddps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
464 ; SKX-NEXT:    retq # sched: [7:1.00]
465 ;
466 ; BDVER2-SSE-LABEL: test_haddps:
467 ; BDVER2-SSE:       # %bb.0:
468 ; BDVER2-SSE-NEXT:    haddps %xmm1, %xmm0 # sched: [11:1.00]
469 ; BDVER2-SSE-NEXT:    haddps (%rdi), %xmm0 # sched: [16:1.00]
470 ; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
471 ;
472 ; BDVER2-LABEL: test_haddps:
473 ; BDVER2:       # %bb.0:
474 ; BDVER2-NEXT:    vhaddps %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
475 ; BDVER2-NEXT:    vhaddps (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
476 ; BDVER2-NEXT:    retq # sched: [5:1.00]
477 ;
478 ; BTVER2-SSE-LABEL: test_haddps:
479 ; BTVER2-SSE:       # %bb.0:
480 ; BTVER2-SSE-NEXT:    haddps %xmm1, %xmm0 # sched: [3:1.00]
481 ; BTVER2-SSE-NEXT:    haddps (%rdi), %xmm0 # sched: [8:1.00]
482 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
483 ;
484 ; BTVER2-LABEL: test_haddps:
485 ; BTVER2:       # %bb.0:
486 ; BTVER2-NEXT:    vhaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
487 ; BTVER2-NEXT:    vhaddps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
488 ; BTVER2-NEXT:    retq # sched: [4:1.00]
489 ;
490 ; ZNVER1-SSE-LABEL: test_haddps:
491 ; ZNVER1-SSE:       # %bb.0:
492 ; ZNVER1-SSE-NEXT:    haddps %xmm1, %xmm0 # sched: [100:0.25]
493 ; ZNVER1-SSE-NEXT:    haddps (%rdi), %xmm0 # sched: [100:0.25]
494 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
495 ;
496 ; ZNVER1-LABEL: test_haddps:
497 ; ZNVER1:       # %bb.0:
498 ; ZNVER1-NEXT:    vhaddps %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
499 ; ZNVER1-NEXT:    vhaddps (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
500 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
501   %1 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1)
502   %2 = load <4 x float>, <4 x float> *%a2, align 16
503   %3 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %1, <4 x float> %2)
504   ret <4 x float> %3
505 }
506 declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
507
508 define <2 x double> @test_hsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
509 ; GENERIC-LABEL: test_hsubpd:
510 ; GENERIC:       # %bb.0:
511 ; GENERIC-NEXT:    hsubpd %xmm1, %xmm0 # sched: [5:2.00]
512 ; GENERIC-NEXT:    hsubpd (%rdi), %xmm0 # sched: [11:2.00]
513 ; GENERIC-NEXT:    retq # sched: [1:1.00]
514 ;
515 ; ATOM-LABEL: test_hsubpd:
516 ; ATOM:       # %bb.0:
517 ; ATOM-NEXT:    hsubpd %xmm1, %xmm0 # sched: [8:4.00]
518 ; ATOM-NEXT:    hsubpd (%rdi), %xmm0 # sched: [9:4.50]
519 ; ATOM-NEXT:    retq # sched: [79:39.50]
520 ;
521 ; SLM-LABEL: test_hsubpd:
522 ; SLM:       # %bb.0:
523 ; SLM-NEXT:    hsubpd %xmm1, %xmm0 # sched: [3:1.00]
524 ; SLM-NEXT:    hsubpd (%rdi), %xmm0 # sched: [6:1.00]
525 ; SLM-NEXT:    retq # sched: [4:1.00]
526 ;
527 ; SANDY-SSE-LABEL: test_hsubpd:
528 ; SANDY-SSE:       # %bb.0:
529 ; SANDY-SSE-NEXT:    hsubpd %xmm1, %xmm0 # sched: [5:2.00]
530 ; SANDY-SSE-NEXT:    hsubpd (%rdi), %xmm0 # sched: [11:2.00]
531 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
532 ;
533 ; SANDY-LABEL: test_hsubpd:
534 ; SANDY:       # %bb.0:
535 ; SANDY-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
536 ; SANDY-NEXT:    vhsubpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
537 ; SANDY-NEXT:    retq # sched: [1:1.00]
538 ;
539 ; HASWELL-SSE-LABEL: test_hsubpd:
540 ; HASWELL-SSE:       # %bb.0:
541 ; HASWELL-SSE-NEXT:    hsubpd %xmm1, %xmm0 # sched: [5:2.00]
542 ; HASWELL-SSE-NEXT:    hsubpd (%rdi), %xmm0 # sched: [11:2.00]
543 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
544 ;
545 ; HASWELL-LABEL: test_hsubpd:
546 ; HASWELL:       # %bb.0:
547 ; HASWELL-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
548 ; HASWELL-NEXT:    vhsubpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
549 ; HASWELL-NEXT:    retq # sched: [7:1.00]
550 ;
551 ; BROADWELL-SSE-LABEL: test_hsubpd:
552 ; BROADWELL-SSE:       # %bb.0:
553 ; BROADWELL-SSE-NEXT:    hsubpd %xmm1, %xmm0 # sched: [5:2.00]
554 ; BROADWELL-SSE-NEXT:    hsubpd (%rdi), %xmm0 # sched: [10:2.00]
555 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
556 ;
557 ; BROADWELL-LABEL: test_hsubpd:
558 ; BROADWELL:       # %bb.0:
559 ; BROADWELL-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
560 ; BROADWELL-NEXT:    vhsubpd (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
561 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
562 ;
563 ; SKYLAKE-SSE-LABEL: test_hsubpd:
564 ; SKYLAKE-SSE:       # %bb.0:
565 ; SKYLAKE-SSE-NEXT:    hsubpd %xmm1, %xmm0 # sched: [6:2.00]
566 ; SKYLAKE-SSE-NEXT:    hsubpd (%rdi), %xmm0 # sched: [12:2.00]
567 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
568 ;
569 ; SKYLAKE-LABEL: test_hsubpd:
570 ; SKYLAKE:       # %bb.0:
571 ; SKYLAKE-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
572 ; SKYLAKE-NEXT:    vhsubpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
573 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
574 ;
575 ; SKX-SSE-LABEL: test_hsubpd:
576 ; SKX-SSE:       # %bb.0:
577 ; SKX-SSE-NEXT:    hsubpd %xmm1, %xmm0 # sched: [6:2.00]
578 ; SKX-SSE-NEXT:    hsubpd (%rdi), %xmm0 # sched: [12:2.00]
579 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
580 ;
581 ; SKX-LABEL: test_hsubpd:
582 ; SKX:       # %bb.0:
583 ; SKX-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
584 ; SKX-NEXT:    vhsubpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
585 ; SKX-NEXT:    retq # sched: [7:1.00]
586 ;
587 ; BDVER2-SSE-LABEL: test_hsubpd:
588 ; BDVER2-SSE:       # %bb.0:
589 ; BDVER2-SSE-NEXT:    hsubpd %xmm1, %xmm0 # sched: [11:1.00]
590 ; BDVER2-SSE-NEXT:    hsubpd (%rdi), %xmm0 # sched: [16:1.00]
591 ; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
592 ;
593 ; BDVER2-LABEL: test_hsubpd:
594 ; BDVER2:       # %bb.0:
595 ; BDVER2-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
596 ; BDVER2-NEXT:    vhsubpd (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
597 ; BDVER2-NEXT:    retq # sched: [5:1.00]
598 ;
599 ; BTVER2-SSE-LABEL: test_hsubpd:
600 ; BTVER2-SSE:       # %bb.0:
601 ; BTVER2-SSE-NEXT:    hsubpd %xmm1, %xmm0 # sched: [3:1.00]
602 ; BTVER2-SSE-NEXT:    hsubpd (%rdi), %xmm0 # sched: [8:1.00]
603 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
604 ;
605 ; BTVER2-LABEL: test_hsubpd:
606 ; BTVER2:       # %bb.0:
607 ; BTVER2-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
608 ; BTVER2-NEXT:    vhsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
609 ; BTVER2-NEXT:    retq # sched: [4:1.00]
610 ;
611 ; ZNVER1-SSE-LABEL: test_hsubpd:
612 ; ZNVER1-SSE:       # %bb.0:
613 ; ZNVER1-SSE-NEXT:    hsubpd %xmm1, %xmm0 # sched: [100:0.25]
614 ; ZNVER1-SSE-NEXT:    hsubpd (%rdi), %xmm0 # sched: [100:0.25]
615 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
616 ;
617 ; ZNVER1-LABEL: test_hsubpd:
618 ; ZNVER1:       # %bb.0:
619 ; ZNVER1-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
620 ; ZNVER1-NEXT:    vhsubpd (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
621 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
622   %1 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1)
623   %2 = load <2 x double>, <2 x double> *%a2, align 16
624   %3 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %1, <2 x double> %2)
625   ret <2 x double> %3
626 }
627 declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone
628
629 define <4 x float> @test_hsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
630 ; GENERIC-LABEL: test_hsubps:
631 ; GENERIC:       # %bb.0:
632 ; GENERIC-NEXT:    hsubps %xmm1, %xmm0 # sched: [5:2.00]
633 ; GENERIC-NEXT:    hsubps (%rdi), %xmm0 # sched: [11:2.00]
634 ; GENERIC-NEXT:    retq # sched: [1:1.00]
635 ;
636 ; ATOM-LABEL: test_hsubps:
637 ; ATOM:       # %bb.0:
638 ; ATOM-NEXT:    hsubps %xmm1, %xmm0 # sched: [8:4.00]
639 ; ATOM-NEXT:    hsubps (%rdi), %xmm0 # sched: [9:4.50]
640 ; ATOM-NEXT:    retq # sched: [79:39.50]
641 ;
642 ; SLM-LABEL: test_hsubps:
643 ; SLM:       # %bb.0:
644 ; SLM-NEXT:    hsubps %xmm1, %xmm0 # sched: [3:1.00]
645 ; SLM-NEXT:    hsubps (%rdi), %xmm0 # sched: [6:1.00]
646 ; SLM-NEXT:    retq # sched: [4:1.00]
647 ;
648 ; SANDY-SSE-LABEL: test_hsubps:
649 ; SANDY-SSE:       # %bb.0:
650 ; SANDY-SSE-NEXT:    hsubps %xmm1, %xmm0 # sched: [5:2.00]
651 ; SANDY-SSE-NEXT:    hsubps (%rdi), %xmm0 # sched: [11:2.00]
652 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
653 ;
654 ; SANDY-LABEL: test_hsubps:
655 ; SANDY:       # %bb.0:
656 ; SANDY-NEXT:    vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
657 ; SANDY-NEXT:    vhsubps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
658 ; SANDY-NEXT:    retq # sched: [1:1.00]
659 ;
660 ; HASWELL-SSE-LABEL: test_hsubps:
661 ; HASWELL-SSE:       # %bb.0:
662 ; HASWELL-SSE-NEXT:    hsubps %xmm1, %xmm0 # sched: [5:2.00]
663 ; HASWELL-SSE-NEXT:    hsubps (%rdi), %xmm0 # sched: [11:2.00]
664 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
665 ;
666 ; HASWELL-LABEL: test_hsubps:
667 ; HASWELL:       # %bb.0:
668 ; HASWELL-NEXT:    vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
669 ; HASWELL-NEXT:    vhsubps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
670 ; HASWELL-NEXT:    retq # sched: [7:1.00]
671 ;
672 ; BROADWELL-SSE-LABEL: test_hsubps:
673 ; BROADWELL-SSE:       # %bb.0:
674 ; BROADWELL-SSE-NEXT:    hsubps %xmm1, %xmm0 # sched: [5:2.00]
675 ; BROADWELL-SSE-NEXT:    hsubps (%rdi), %xmm0 # sched: [10:2.00]
676 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
677 ;
678 ; BROADWELL-LABEL: test_hsubps:
679 ; BROADWELL:       # %bb.0:
680 ; BROADWELL-NEXT:    vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
681 ; BROADWELL-NEXT:    vhsubps (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
682 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
683 ;
684 ; SKYLAKE-SSE-LABEL: test_hsubps:
685 ; SKYLAKE-SSE:       # %bb.0:
686 ; SKYLAKE-SSE-NEXT:    hsubps %xmm1, %xmm0 # sched: [6:2.00]
687 ; SKYLAKE-SSE-NEXT:    hsubps (%rdi), %xmm0 # sched: [12:2.00]
688 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
689 ;
690 ; SKYLAKE-LABEL: test_hsubps:
691 ; SKYLAKE:       # %bb.0:
692 ; SKYLAKE-NEXT:    vhsubps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
693 ; SKYLAKE-NEXT:    vhsubps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
694 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
695 ;
696 ; SKX-SSE-LABEL: test_hsubps:
697 ; SKX-SSE:       # %bb.0:
698 ; SKX-SSE-NEXT:    hsubps %xmm1, %xmm0 # sched: [6:2.00]
699 ; SKX-SSE-NEXT:    hsubps (%rdi), %xmm0 # sched: [12:2.00]
700 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
701 ;
702 ; SKX-LABEL: test_hsubps:
703 ; SKX:       # %bb.0:
704 ; SKX-NEXT:    vhsubps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
705 ; SKX-NEXT:    vhsubps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
706 ; SKX-NEXT:    retq # sched: [7:1.00]
707 ;
708 ; BDVER2-SSE-LABEL: test_hsubps:
709 ; BDVER2-SSE:       # %bb.0:
710 ; BDVER2-SSE-NEXT:    hsubps %xmm1, %xmm0 # sched: [11:1.00]
711 ; BDVER2-SSE-NEXT:    hsubps (%rdi), %xmm0 # sched: [16:1.00]
712 ; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
713 ;
714 ; BDVER2-LABEL: test_hsubps:
715 ; BDVER2:       # %bb.0:
716 ; BDVER2-NEXT:    vhsubps %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
717 ; BDVER2-NEXT:    vhsubps (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
718 ; BDVER2-NEXT:    retq # sched: [5:1.00]
719 ;
720 ; BTVER2-SSE-LABEL: test_hsubps:
721 ; BTVER2-SSE:       # %bb.0:
722 ; BTVER2-SSE-NEXT:    hsubps %xmm1, %xmm0 # sched: [3:1.00]
723 ; BTVER2-SSE-NEXT:    hsubps (%rdi), %xmm0 # sched: [8:1.00]
724 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
725 ;
726 ; BTVER2-LABEL: test_hsubps:
727 ; BTVER2:       # %bb.0:
728 ; BTVER2-NEXT:    vhsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
729 ; BTVER2-NEXT:    vhsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
730 ; BTVER2-NEXT:    retq # sched: [4:1.00]
731 ;
732 ; ZNVER1-SSE-LABEL: test_hsubps:
733 ; ZNVER1-SSE:       # %bb.0:
734 ; ZNVER1-SSE-NEXT:    hsubps %xmm1, %xmm0 # sched: [100:0.25]
735 ; ZNVER1-SSE-NEXT:    hsubps (%rdi), %xmm0 # sched: [100:0.25]
736 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
737 ;
738 ; ZNVER1-LABEL: test_hsubps:
739 ; ZNVER1:       # %bb.0:
740 ; ZNVER1-NEXT:    vhsubps %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
741 ; ZNVER1-NEXT:    vhsubps (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
742 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
743   %1 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1)
744   %2 = load <4 x float>, <4 x float> *%a2, align 16
745   %3 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %1, <4 x float> %2)
746   ret <4 x float> %3
747 }
748 declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
749
750 define <16 x i8> @test_lddqu(i8* %a0) {
751 ; GENERIC-LABEL: test_lddqu:
752 ; GENERIC:       # %bb.0:
753 ; GENERIC-NEXT:    lddqu (%rdi), %xmm0 # sched: [6:0.50]
754 ; GENERIC-NEXT:    retq # sched: [1:1.00]
755 ;
756 ; ATOM-LABEL: test_lddqu:
757 ; ATOM:       # %bb.0:
758 ; ATOM-NEXT:    lddqu (%rdi), %xmm0 # sched: [3:1.50]
759 ; ATOM-NEXT:    nop # sched: [1:0.50]
760 ; ATOM-NEXT:    nop # sched: [1:0.50]
761 ; ATOM-NEXT:    retq # sched: [79:39.50]
762 ;
763 ; SLM-LABEL: test_lddqu:
764 ; SLM:       # %bb.0:
765 ; SLM-NEXT:    lddqu (%rdi), %xmm0 # sched: [3:1.00]
766 ; SLM-NEXT:    retq # sched: [4:1.00]
767 ;
768 ; SANDY-SSE-LABEL: test_lddqu:
769 ; SANDY-SSE:       # %bb.0:
770 ; SANDY-SSE-NEXT:    lddqu (%rdi), %xmm0 # sched: [6:0.50]
771 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
772 ;
773 ; SANDY-LABEL: test_lddqu:
774 ; SANDY:       # %bb.0:
775 ; SANDY-NEXT:    vlddqu (%rdi), %xmm0 # sched: [6:0.50]
776 ; SANDY-NEXT:    retq # sched: [1:1.00]
777 ;
778 ; HASWELL-SSE-LABEL: test_lddqu:
779 ; HASWELL-SSE:       # %bb.0:
780 ; HASWELL-SSE-NEXT:    lddqu (%rdi), %xmm0 # sched: [6:0.50]
781 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
782 ;
783 ; HASWELL-LABEL: test_lddqu:
784 ; HASWELL:       # %bb.0:
785 ; HASWELL-NEXT:    vlddqu (%rdi), %xmm0 # sched: [6:0.50]
786 ; HASWELL-NEXT:    retq # sched: [7:1.00]
787 ;
788 ; BROADWELL-SSE-LABEL: test_lddqu:
789 ; BROADWELL-SSE:       # %bb.0:
790 ; BROADWELL-SSE-NEXT:    lddqu (%rdi), %xmm0 # sched: [5:0.50]
791 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
792 ;
793 ; BROADWELL-LABEL: test_lddqu:
794 ; BROADWELL:       # %bb.0:
795 ; BROADWELL-NEXT:    vlddqu (%rdi), %xmm0 # sched: [5:0.50]
796 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
797 ;
798 ; SKYLAKE-SSE-LABEL: test_lddqu:
799 ; SKYLAKE-SSE:       # %bb.0:
800 ; SKYLAKE-SSE-NEXT:    lddqu (%rdi), %xmm0 # sched: [6:0.50]
801 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
802 ;
803 ; SKYLAKE-LABEL: test_lddqu:
804 ; SKYLAKE:       # %bb.0:
805 ; SKYLAKE-NEXT:    vlddqu (%rdi), %xmm0 # sched: [6:0.50]
806 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
807 ;
808 ; SKX-SSE-LABEL: test_lddqu:
809 ; SKX-SSE:       # %bb.0:
810 ; SKX-SSE-NEXT:    lddqu (%rdi), %xmm0 # sched: [6:0.50]
811 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
812 ;
813 ; SKX-LABEL: test_lddqu:
814 ; SKX:       # %bb.0:
815 ; SKX-NEXT:    vlddqu (%rdi), %xmm0 # sched: [6:0.50]
816 ; SKX-NEXT:    retq # sched: [7:1.00]
817 ;
818 ; BDVER2-SSE-LABEL: test_lddqu:
819 ; BDVER2-SSE:       # %bb.0:
820 ; BDVER2-SSE-NEXT:    lddqu (%rdi), %xmm0 # sched: [5:0.50]
821 ; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
822 ;
823 ; BDVER2-LABEL: test_lddqu:
824 ; BDVER2:       # %bb.0:
825 ; BDVER2-NEXT:    vlddqu (%rdi), %xmm0 # sched: [5:0.50]
826 ; BDVER2-NEXT:    retq # sched: [5:1.00]
827 ;
828 ; BTVER2-SSE-LABEL: test_lddqu:
829 ; BTVER2-SSE:       # %bb.0:
830 ; BTVER2-SSE-NEXT:    lddqu (%rdi), %xmm0 # sched: [5:1.00]
831 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
832 ;
833 ; BTVER2-LABEL: test_lddqu:
834 ; BTVER2:       # %bb.0:
835 ; BTVER2-NEXT:    vlddqu (%rdi), %xmm0 # sched: [5:1.00]
836 ; BTVER2-NEXT:    retq # sched: [4:1.00]
837 ;
838 ; ZNVER1-SSE-LABEL: test_lddqu:
839 ; ZNVER1-SSE:       # %bb.0:
840 ; ZNVER1-SSE-NEXT:    lddqu (%rdi), %xmm0 # sched: [8:0.50]
841 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
842 ;
843 ; ZNVER1-LABEL: test_lddqu:
844 ; ZNVER1:       # %bb.0:
845 ; ZNVER1-NEXT:    vlddqu (%rdi), %xmm0 # sched: [8:0.50]
846 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
847   %1 = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0)
848   ret <16 x i8> %1
849 }
850 declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly
851
852 define void @test_monitor(i8* %a0, i32 %a1, i32 %a2) {
853 ; GENERIC-LABEL: test_monitor:
854 ; GENERIC:       # %bb.0:
855 ; GENERIC-NEXT:    movl %esi, %ecx # sched: [1:0.33]
856 ; GENERIC-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
857 ; GENERIC-NEXT:    monitor # sched: [100:0.33]
858 ; GENERIC-NEXT:    retq # sched: [1:1.00]
859 ;
860 ; ATOM-LABEL: test_monitor:
861 ; ATOM:       # %bb.0:
862 ; ATOM-NEXT:    movl %esi, %ecx # sched: [1:0.50]
863 ; ATOM-NEXT:    leaq (%rdi), %rax # sched: [1:1.00]
864 ; ATOM-NEXT:    monitor # sched: [45:22.50]
865 ; ATOM-NEXT:    retq # sched: [79:39.50]
866 ;
867 ; SLM-LABEL: test_monitor:
868 ; SLM:       # %bb.0:
869 ; SLM-NEXT:    movl %esi, %ecx # sched: [1:0.50]
870 ; SLM-NEXT:    leaq (%rdi), %rax # sched: [1:1.00]
871 ; SLM-NEXT:    monitor # sched: [100:1.00]
872 ; SLM-NEXT:    retq # sched: [4:1.00]
873 ;
874 ; SANDY-SSE-LABEL: test_monitor:
875 ; SANDY-SSE:       # %bb.0:
876 ; SANDY-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.33]
877 ; SANDY-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
878 ; SANDY-SSE-NEXT:    monitor # sched: [100:0.33]
879 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
880 ;
881 ; SANDY-LABEL: test_monitor:
882 ; SANDY:       # %bb.0:
883 ; SANDY-NEXT:    movl %esi, %ecx # sched: [1:0.33]
884 ; SANDY-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
885 ; SANDY-NEXT:    monitor # sched: [100:0.33]
886 ; SANDY-NEXT:    retq # sched: [1:1.00]
887 ;
888 ; HASWELL-SSE-LABEL: test_monitor:
889 ; HASWELL-SSE:       # %bb.0:
890 ; HASWELL-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.25]
891 ; HASWELL-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
892 ; HASWELL-SSE-NEXT:    monitor # sched: [100:0.25]
893 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
894 ;
895 ; HASWELL-LABEL: test_monitor:
896 ; HASWELL:       # %bb.0:
897 ; HASWELL-NEXT:    movl %esi, %ecx # sched: [1:0.25]
898 ; HASWELL-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
899 ; HASWELL-NEXT:    monitor # sched: [100:0.25]
900 ; HASWELL-NEXT:    retq # sched: [7:1.00]
901 ;
902 ; BROADWELL-SSE-LABEL: test_monitor:
903 ; BROADWELL-SSE:       # %bb.0:
904 ; BROADWELL-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.25]
905 ; BROADWELL-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
906 ; BROADWELL-SSE-NEXT:    monitor # sched: [100:0.25]
907 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
908 ;
909 ; BROADWELL-LABEL: test_monitor:
910 ; BROADWELL:       # %bb.0:
911 ; BROADWELL-NEXT:    movl %esi, %ecx # sched: [1:0.25]
912 ; BROADWELL-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
913 ; BROADWELL-NEXT:    monitor # sched: [100:0.25]
914 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
915 ;
916 ; SKYLAKE-SSE-LABEL: test_monitor:
917 ; SKYLAKE-SSE:       # %bb.0:
918 ; SKYLAKE-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.25]
919 ; SKYLAKE-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
920 ; SKYLAKE-SSE-NEXT:    monitor # sched: [100:0.25]
921 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
922 ;
923 ; SKYLAKE-LABEL: test_monitor:
924 ; SKYLAKE:       # %bb.0:
925 ; SKYLAKE-NEXT:    movl %esi, %ecx # sched: [1:0.25]
926 ; SKYLAKE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
927 ; SKYLAKE-NEXT:    monitor # sched: [100:0.25]
928 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
929 ;
930 ; SKX-SSE-LABEL: test_monitor:
931 ; SKX-SSE:       # %bb.0:
932 ; SKX-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.25]
933 ; SKX-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
934 ; SKX-SSE-NEXT:    monitor # sched: [100:0.25]
935 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
936 ;
937 ; SKX-LABEL: test_monitor:
938 ; SKX:       # %bb.0:
939 ; SKX-NEXT:    movl %esi, %ecx # sched: [1:0.25]
940 ; SKX-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
941 ; SKX-NEXT:    monitor # sched: [100:0.25]
942 ; SKX-NEXT:    retq # sched: [7:1.00]
943 ;
944 ; BDVER2-SSE-LABEL: test_monitor:
945 ; BDVER2-SSE:       # %bb.0:
946 ; BDVER2-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
947 ; BDVER2-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.50]
948 ; BDVER2-SSE-NEXT:    monitor # sched: [100:0.50]
949 ; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
950 ;
951 ; BDVER2-LABEL: test_monitor:
952 ; BDVER2:       # %bb.0:
953 ; BDVER2-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
954 ; BDVER2-NEXT:    movl %esi, %ecx # sched: [1:0.50]
955 ; BDVER2-NEXT:    monitor # sched: [100:0.50]
956 ; BDVER2-NEXT:    retq # sched: [5:1.00]
957 ;
958 ; BTVER2-SSE-LABEL: test_monitor:
959 ; BTVER2-SSE:       # %bb.0:
960 ; BTVER2-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.50]
961 ; BTVER2-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
962 ; BTVER2-SSE-NEXT:    monitor # sched: [100:0.50]
963 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
964 ;
965 ; BTVER2-LABEL: test_monitor:
966 ; BTVER2:       # %bb.0:
967 ; BTVER2-NEXT:    movl %esi, %ecx # sched: [1:0.50]
968 ; BTVER2-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
969 ; BTVER2-NEXT:    monitor # sched: [100:0.50]
970 ; BTVER2-NEXT:    retq # sched: [4:1.00]
971 ;
972 ; ZNVER1-SSE-LABEL: test_monitor:
973 ; ZNVER1-SSE:       # %bb.0:
974 ; ZNVER1-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.25]
975 ; ZNVER1-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.25]
976 ; ZNVER1-SSE-NEXT:    monitor # sched: [100:0.25]
977 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
978 ;
979 ; ZNVER1-LABEL: test_monitor:
980 ; ZNVER1:       # %bb.0:
981 ; ZNVER1-NEXT:    movl %esi, %ecx # sched: [1:0.25]
982 ; ZNVER1-NEXT:    leaq (%rdi), %rax # sched: [1:0.25]
983 ; ZNVER1-NEXT:    monitor # sched: [100:0.25]
984 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
985   tail call void @llvm.x86.sse3.monitor(i8* %a0, i32 %a1, i32 %a2)
986   ret void
987 }
988 declare void @llvm.x86.sse3.monitor(i8*, i32, i32)
989
990 define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) {
991 ; GENERIC-LABEL: test_movddup:
992 ; GENERIC:       # %bb.0:
993 ; GENERIC-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
994 ; GENERIC-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:0.50]
995 ; GENERIC-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
996 ; GENERIC-NEXT:    retq # sched: [1:1.00]
997 ;
998 ; ATOM-LABEL: test_movddup:
999 ; ATOM:       # %bb.0:
1000 ; ATOM-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
1001 ; ATOM-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [1:1.00]
1002 ; ATOM-NEXT:    subpd %xmm1, %xmm0 # sched: [6:3.00]
1003 ; ATOM-NEXT:    retq # sched: [79:39.50]
1004 ;
1005 ; SLM-LABEL: test_movddup:
1006 ; SLM:       # %bb.0:
1007 ; SLM-NEXT:    movddup {{.*#+}} xmm1 = mem[0,0] sched: [4:1.00]
1008 ; SLM-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
1009 ; SLM-NEXT:    subpd %xmm0, %xmm1 # sched: [3:1.00]
1010 ; SLM-NEXT:    movapd %xmm1, %xmm0 # sched: [1:0.50]
1011 ; SLM-NEXT:    retq # sched: [4:1.00]
1012 ;
1013 ; SANDY-SSE-LABEL: test_movddup:
1014 ; SANDY-SSE:       # %bb.0:
1015 ; SANDY-SSE-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
1016 ; SANDY-SSE-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:0.50]
1017 ; SANDY-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
1018 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
1019 ;
1020 ; SANDY-LABEL: test_movddup:
1021 ; SANDY:       # %bb.0:
1022 ; SANDY-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
1023 ; SANDY-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:0.50]
1024 ; SANDY-NEXT:    vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
1025 ; SANDY-NEXT:    retq # sched: [1:1.00]
1026 ;
1027 ; HASWELL-SSE-LABEL: test_movddup:
1028 ; HASWELL-SSE:       # %bb.0:
1029 ; HASWELL-SSE-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
1030 ; HASWELL-SSE-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50]
1031 ; HASWELL-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
1032 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
1033 ;
1034 ; HASWELL-LABEL: test_movddup:
1035 ; HASWELL:       # %bb.0:
1036 ; HASWELL-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
1037 ; HASWELL-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50]
1038 ; HASWELL-NEXT:    vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
1039 ; HASWELL-NEXT:    retq # sched: [7:1.00]
1040 ;
1041 ; BROADWELL-SSE-LABEL: test_movddup:
1042 ; BROADWELL-SSE:       # %bb.0:
1043 ; BROADWELL-SSE-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
1044 ; BROADWELL-SSE-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50]
1045 ; BROADWELL-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
1046 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
1047 ;
1048 ; BROADWELL-LABEL: test_movddup:
1049 ; BROADWELL:       # %bb.0:
1050 ; BROADWELL-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
1051 ; BROADWELL-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50]
1052 ; BROADWELL-NEXT:    vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
1053 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
1054 ;
1055 ; SKYLAKE-SSE-LABEL: test_movddup:
1056 ; SKYLAKE-SSE:       # %bb.0:
1057 ; SKYLAKE-SSE-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
1058 ; SKYLAKE-SSE-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50]
1059 ; SKYLAKE-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [4:0.50]
1060 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
1061 ;
1062 ; SKYLAKE-LABEL: test_movddup:
1063 ; SKYLAKE:       # %bb.0:
1064 ; SKYLAKE-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
1065 ; SKYLAKE-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50]
1066 ; SKYLAKE-NEXT:    vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
1067 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
1068 ;
1069 ; SKX-SSE-LABEL: test_movddup:
1070 ; SKX-SSE:       # %bb.0:
1071 ; SKX-SSE-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
1072 ; SKX-SSE-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50]
1073 ; SKX-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [4:0.50]
1074 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
1075 ;
1076 ; SKX-LABEL: test_movddup:
1077 ; SKX:       # %bb.0:
1078 ; SKX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
1079 ; SKX-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50]
1080 ; SKX-NEXT:    vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
1081 ; SKX-NEXT:    retq # sched: [7:1.00]
1082 ;
1083 ; BDVER2-SSE-LABEL: test_movddup:
1084 ; BDVER2-SSE:       # %bb.0:
1085 ; BDVER2-SSE-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [2:0.50]
1086 ; BDVER2-SSE-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [7:0.50]
1087 ; BDVER2-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [5:1.00]
1088 ; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
1089 ;
1090 ; BDVER2-LABEL: test_movddup:
1091 ; BDVER2:       # %bb.0:
1092 ; BDVER2-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [7:0.50]
1093 ; BDVER2-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [2:0.50]
1094 ; BDVER2-NEXT:    vsubpd %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
1095 ; BDVER2-NEXT:    retq # sched: [5:1.00]
1096 ;
1097 ; BTVER2-SSE-LABEL: test_movddup:
1098 ; BTVER2-SSE:       # %bb.0:
1099 ; BTVER2-SSE-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:0.50]
1100 ; BTVER2-SSE-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:1.00]
1101 ; BTVER2-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
1102 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
1103 ;
1104 ; BTVER2-LABEL: test_movddup:
1105 ; BTVER2:       # %bb.0:
1106 ; BTVER2-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:1.00]
1107 ; BTVER2-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:0.50]
1108 ; BTVER2-NEXT:    vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
1109 ; BTVER2-NEXT:    retq # sched: [4:1.00]
1110 ;
1111 ; ZNVER1-SSE-LABEL: test_movddup:
1112 ; ZNVER1-SSE:       # %bb.0:
1113 ; ZNVER1-SSE-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:0.50]
1114 ; ZNVER1-SSE-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [8:0.50]
1115 ; ZNVER1-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
1116 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
1117 ;
1118 ; ZNVER1-LABEL: test_movddup:
1119 ; ZNVER1:       # %bb.0:
1120 ; ZNVER1-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [8:0.50]
1121 ; ZNVER1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:0.50]
1122 ; ZNVER1-NEXT:    vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
1123 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
1124   %1 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> zeroinitializer
1125   %2 = load <2 x double>, <2 x double> *%a1, align 16
1126   %3 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> zeroinitializer
1127   %4 = fsub <2 x double> %3, %1 ; Use fsub to stop the movddup from being folded as a broadcast load in avx512vl.
1128   ret <2 x double> %4
1129 }
1130
1131 define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) {
1132 ; GENERIC-LABEL: test_movshdup:
1133 ; GENERIC:       # %bb.0:
1134 ; GENERIC-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
1135 ; GENERIC-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
1136 ; GENERIC-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
1137 ; GENERIC-NEXT:    retq # sched: [1:1.00]
1138 ;
1139 ; ATOM-LABEL: test_movshdup:
1140 ; ATOM:       # %bb.0:
1141 ; ATOM-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
1142 ; ATOM-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [1:1.00]
1143 ; ATOM-NEXT:    addps %xmm1, %xmm0 # sched: [5:5.00]
1144 ; ATOM-NEXT:    retq # sched: [79:39.50]
1145 ;
1146 ; SLM-LABEL: test_movshdup:
1147 ; SLM:       # %bb.0:
1148 ; SLM-NEXT:    movshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [4:1.00]
1149 ; SLM-NEXT:    movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
1150 ; SLM-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
1151 ; SLM-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.50]
1152 ; SLM-NEXT:    retq # sched: [4:1.00]
1153 ;
1154 ; SANDY-SSE-LABEL: test_movshdup:
1155 ; SANDY-SSE:       # %bb.0:
1156 ; SANDY-SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
1157 ; SANDY-SSE-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
1158 ; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
1159 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
1160 ;
1161 ; SANDY-LABEL: test_movshdup:
1162 ; SANDY:       # %bb.0:
1163 ; SANDY-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
1164 ; SANDY-NEXT:    vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50]
1165 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1166 ; SANDY-NEXT:    retq # sched: [1:1.00]
1167 ;
1168 ; HASWELL-SSE-LABEL: test_movshdup:
1169 ; HASWELL-SSE:       # %bb.0:
1170 ; HASWELL-SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
1171 ; HASWELL-SSE-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
1172 ; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
1173 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
1174 ;
1175 ; HASWELL-LABEL: test_movshdup:
1176 ; HASWELL:       # %bb.0:
1177 ; HASWELL-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
1178 ; HASWELL-NEXT:    vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50]
1179 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1180 ; HASWELL-NEXT:    retq # sched: [7:1.00]
1181 ;
1182 ; BROADWELL-SSE-LABEL: test_movshdup:
1183 ; BROADWELL-SSE:       # %bb.0:
1184 ; BROADWELL-SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
1185 ; BROADWELL-SSE-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [5:0.50]
1186 ; BROADWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
1187 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
1188 ;
1189 ; BROADWELL-LABEL: test_movshdup:
1190 ; BROADWELL:       # %bb.0:
1191 ; BROADWELL-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
1192 ; BROADWELL-NEXT:    vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [5:0.50]
1193 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1194 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
1195 ;
1196 ; SKYLAKE-SSE-LABEL: test_movshdup:
1197 ; SKYLAKE-SSE:       # %bb.0:
1198 ; SKYLAKE-SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
1199 ; SKYLAKE-SSE-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
1200 ; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
1201 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
1202 ;
1203 ; SKYLAKE-LABEL: test_movshdup:
1204 ; SKYLAKE:       # %bb.0:
1205 ; SKYLAKE-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
1206 ; SKYLAKE-NEXT:    vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50]
1207 ; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
1208 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
1209 ;
1210 ; SKX-SSE-LABEL: test_movshdup:
1211 ; SKX-SSE:       # %bb.0:
1212 ; SKX-SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
1213 ; SKX-SSE-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
1214 ; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
1215 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
1216 ;
1217 ; SKX-LABEL: test_movshdup:
1218 ; SKX:       # %bb.0:
1219 ; SKX-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
1220 ; SKX-NEXT:    vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50]
1221 ; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
1222 ; SKX-NEXT:    retq # sched: [7:1.00]
1223 ;
1224 ; BDVER2-SSE-LABEL: test_movshdup:
1225 ; BDVER2-SSE:       # %bb.0:
1226 ; BDVER2-SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [2:0.50]
1227 ; BDVER2-SSE-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [7:0.50]
1228 ; BDVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [5:1.00]
1229 ; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
1230 ;
1231 ; BDVER2-LABEL: test_movshdup:
1232 ; BDVER2:       # %bb.0:
1233 ; BDVER2-NEXT:    vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [7:0.50]
1234 ; BDVER2-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [2:0.50]
1235 ; BDVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
1236 ; BDVER2-NEXT:    retq # sched: [5:1.00]
1237 ;
1238 ; BTVER2-SSE-LABEL: test_movshdup:
1239 ; BTVER2-SSE:       # %bb.0:
1240 ; BTVER2-SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:0.50]
1241 ; BTVER2-SSE-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:1.00]
1242 ; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
1243 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
1244 ;
1245 ; BTVER2-LABEL: test_movshdup:
1246 ; BTVER2:       # %bb.0:
1247 ; BTVER2-NEXT:    vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:1.00]
1248 ; BTVER2-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:0.50]
1249 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1250 ; BTVER2-NEXT:    retq # sched: [4:1.00]
1251 ;
1252 ; ZNVER1-SSE-LABEL: test_movshdup:
1253 ; ZNVER1-SSE:       # %bb.0:
1254 ; ZNVER1-SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:0.50]
1255 ; ZNVER1-SSE-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [8:0.50]
1256 ; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
1257 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
1258 ;
1259 ; ZNVER1-LABEL: test_movshdup:
1260 ; ZNVER1:       # %bb.0:
1261 ; ZNVER1-NEXT:    vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [8:0.50]
1262 ; ZNVER1-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:0.50]
1263 ; ZNVER1-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1264 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
1265   %1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
1266   %2 = load <4 x float>, <4 x float> *%a1, align 16
1267   %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
1268   %4 = fadd <4 x float> %1, %3
1269   ret <4 x float> %4
1270 }
1271
1272 define <4 x float> @test_movsldup(<4 x float> %a0, <4 x float> *%a1) {
1273 ; GENERIC-LABEL: test_movsldup:
1274 ; GENERIC:       # %bb.0:
1275 ; GENERIC-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
1276 ; GENERIC-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
1277 ; GENERIC-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
1278 ; GENERIC-NEXT:    retq # sched: [1:1.00]
1279 ;
1280 ; ATOM-LABEL: test_movsldup:
1281 ; ATOM:       # %bb.0:
1282 ; ATOM-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
1283 ; ATOM-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [1:1.00]
1284 ; ATOM-NEXT:    addps %xmm1, %xmm0 # sched: [5:5.00]
1285 ; ATOM-NEXT:    retq # sched: [79:39.50]
1286 ;
1287 ; SLM-LABEL: test_movsldup:
1288 ; SLM:       # %bb.0:
1289 ; SLM-NEXT:    movsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [4:1.00]
1290 ; SLM-NEXT:    movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
1291 ; SLM-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
1292 ; SLM-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.50]
1293 ; SLM-NEXT:    retq # sched: [4:1.00]
1294 ;
1295 ; SANDY-SSE-LABEL: test_movsldup:
1296 ; SANDY-SSE:       # %bb.0:
1297 ; SANDY-SSE-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
1298 ; SANDY-SSE-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
1299 ; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
1300 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
1301 ;
1302 ; SANDY-LABEL: test_movsldup:
1303 ; SANDY:       # %bb.0:
1304 ; SANDY-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
1305 ; SANDY-NEXT:    vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50]
1306 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1307 ; SANDY-NEXT:    retq # sched: [1:1.00]
1308 ;
1309 ; HASWELL-SSE-LABEL: test_movsldup:
1310 ; HASWELL-SSE:       # %bb.0:
1311 ; HASWELL-SSE-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
1312 ; HASWELL-SSE-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
1313 ; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
1314 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
1315 ;
1316 ; HASWELL-LABEL: test_movsldup:
1317 ; HASWELL:       # %bb.0:
1318 ; HASWELL-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
1319 ; HASWELL-NEXT:    vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50]
1320 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1321 ; HASWELL-NEXT:    retq # sched: [7:1.00]
1322 ;
1323 ; BROADWELL-SSE-LABEL: test_movsldup:
1324 ; BROADWELL-SSE:       # %bb.0:
1325 ; BROADWELL-SSE-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
1326 ; BROADWELL-SSE-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [5:0.50]
1327 ; BROADWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
1328 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
1329 ;
1330 ; BROADWELL-LABEL: test_movsldup:
1331 ; BROADWELL:       # %bb.0:
1332 ; BROADWELL-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
1333 ; BROADWELL-NEXT:    vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [5:0.50]
1334 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1335 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
1336 ;
1337 ; SKYLAKE-SSE-LABEL: test_movsldup:
1338 ; SKYLAKE-SSE:       # %bb.0:
1339 ; SKYLAKE-SSE-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
1340 ; SKYLAKE-SSE-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
1341 ; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
1342 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
1343 ;
1344 ; SKYLAKE-LABEL: test_movsldup:
1345 ; SKYLAKE:       # %bb.0:
1346 ; SKYLAKE-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
1347 ; SKYLAKE-NEXT:    vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50]
1348 ; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
1349 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
1350 ;
1351 ; SKX-SSE-LABEL: test_movsldup:
1352 ; SKX-SSE:       # %bb.0:
1353 ; SKX-SSE-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
1354 ; SKX-SSE-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
1355 ; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
1356 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
1357 ;
1358 ; SKX-LABEL: test_movsldup:
1359 ; SKX:       # %bb.0:
1360 ; SKX-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
1361 ; SKX-NEXT:    vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50]
1362 ; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
1363 ; SKX-NEXT:    retq # sched: [7:1.00]
1364 ;
1365 ; BDVER2-SSE-LABEL: test_movsldup:
1366 ; BDVER2-SSE:       # %bb.0:
1367 ; BDVER2-SSE-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [2:0.50]
1368 ; BDVER2-SSE-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [7:0.50]
1369 ; BDVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [5:1.00]
1370 ; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
1371 ;
1372 ; BDVER2-LABEL: test_movsldup:
1373 ; BDVER2:       # %bb.0:
1374 ; BDVER2-NEXT:    vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [7:0.50]
1375 ; BDVER2-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [2:0.50]
1376 ; BDVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
1377 ; BDVER2-NEXT:    retq # sched: [5:1.00]
1378 ;
1379 ; BTVER2-SSE-LABEL: test_movsldup:
1380 ; BTVER2-SSE:       # %bb.0:
1381 ; BTVER2-SSE-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:0.50]
1382 ; BTVER2-SSE-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:1.00]
1383 ; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
1384 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
1385 ;
1386 ; BTVER2-LABEL: test_movsldup:
1387 ; BTVER2:       # %bb.0:
1388 ; BTVER2-NEXT:    vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:1.00]
1389 ; BTVER2-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:0.50]
1390 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1391 ; BTVER2-NEXT:    retq # sched: [4:1.00]
1392 ;
1393 ; ZNVER1-SSE-LABEL: test_movsldup:
1394 ; ZNVER1-SSE:       # %bb.0:
1395 ; ZNVER1-SSE-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [100:0.25]
1396 ; ZNVER1-SSE-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [100:0.25]
1397 ; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
1398 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
1399 ;
1400 ; ZNVER1-LABEL: test_movsldup:
1401 ; ZNVER1:       # %bb.0:
1402 ; ZNVER1-NEXT:    vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [8:0.50]
1403 ; ZNVER1-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:0.50]
1404 ; ZNVER1-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1405 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
1406   %1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
1407   %2 = load <4 x float>, <4 x float> *%a1, align 16
1408   %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
1409   %4 = fadd <4 x float> %1, %3
1410   ret <4 x float> %4
1411 }
1412
1413 define void @test_mwait(i32 %a0, i32 %a1) {
1414 ; GENERIC-LABEL: test_mwait:
1415 ; GENERIC:       # %bb.0:
1416 ; GENERIC-NEXT:    movl %esi, %eax # sched: [1:0.33]
1417 ; GENERIC-NEXT:    movl %edi, %ecx # sched: [1:0.33]
1418 ; GENERIC-NEXT:    mwait # sched: [100:0.33]
1419 ; GENERIC-NEXT:    retq # sched: [1:1.00]
1420 ;
1421 ; ATOM-LABEL: test_mwait:
1422 ; ATOM:       # %bb.0:
1423 ; ATOM-NEXT:    movl %esi, %eax # sched: [1:0.50]
1424 ; ATOM-NEXT:    movl %edi, %ecx # sched: [1:0.50]
1425 ; ATOM-NEXT:    mwait # sched: [46:23.00]
1426 ; ATOM-NEXT:    retq # sched: [79:39.50]
1427 ;
1428 ; SLM-LABEL: test_mwait:
1429 ; SLM:       # %bb.0:
1430 ; SLM-NEXT:    movl %esi, %eax # sched: [1:0.50]
1431 ; SLM-NEXT:    movl %edi, %ecx # sched: [1:0.50]
1432 ; SLM-NEXT:    mwait # sched: [100:1.00]
1433 ; SLM-NEXT:    retq # sched: [4:1.00]
1434 ;
1435 ; SANDY-SSE-LABEL: test_mwait:
1436 ; SANDY-SSE:       # %bb.0:
1437 ; SANDY-SSE-NEXT:    movl %esi, %eax # sched: [1:0.33]
1438 ; SANDY-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.33]
1439 ; SANDY-SSE-NEXT:    mwait # sched: [100:0.33]
1440 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
1441 ;
1442 ; SANDY-LABEL: test_mwait:
1443 ; SANDY:       # %bb.0:
1444 ; SANDY-NEXT:    movl %esi, %eax # sched: [1:0.33]
1445 ; SANDY-NEXT:    movl %edi, %ecx # sched: [1:0.33]
1446 ; SANDY-NEXT:    mwait # sched: [100:0.33]
1447 ; SANDY-NEXT:    retq # sched: [1:1.00]
1448 ;
1449 ; HASWELL-SSE-LABEL: test_mwait:
1450 ; HASWELL-SSE:       # %bb.0:
1451 ; HASWELL-SSE-NEXT:    movl %esi, %eax # sched: [1:0.25]
1452 ; HASWELL-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
1453 ; HASWELL-SSE-NEXT:    mwait # sched: [20:2.50]
1454 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
1455 ;
1456 ; HASWELL-LABEL: test_mwait:
1457 ; HASWELL:       # %bb.0:
1458 ; HASWELL-NEXT:    movl %esi, %eax # sched: [1:0.25]
1459 ; HASWELL-NEXT:    movl %edi, %ecx # sched: [1:0.25]
1460 ; HASWELL-NEXT:    mwait # sched: [20:2.50]
1461 ; HASWELL-NEXT:    retq # sched: [7:1.00]
1462 ;
1463 ; BROADWELL-SSE-LABEL: test_mwait:
1464 ; BROADWELL-SSE:       # %bb.0:
1465 ; BROADWELL-SSE-NEXT:    movl %esi, %eax # sched: [1:0.25]
1466 ; BROADWELL-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
1467 ; BROADWELL-SSE-NEXT:    mwait # sched: [100:0.25]
1468 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
1469 ;
1470 ; BROADWELL-LABEL: test_mwait:
1471 ; BROADWELL:       # %bb.0:
1472 ; BROADWELL-NEXT:    movl %esi, %eax # sched: [1:0.25]
1473 ; BROADWELL-NEXT:    movl %edi, %ecx # sched: [1:0.25]
1474 ; BROADWELL-NEXT:    mwait # sched: [100:0.25]
1475 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
1476 ;
1477 ; SKYLAKE-SSE-LABEL: test_mwait:
1478 ; SKYLAKE-SSE:       # %bb.0:
1479 ; SKYLAKE-SSE-NEXT:    movl %esi, %eax # sched: [1:0.25]
1480 ; SKYLAKE-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
1481 ; SKYLAKE-SSE-NEXT:    mwait # sched: [20:2.50]
1482 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
1483 ;
1484 ; SKYLAKE-LABEL: test_mwait:
1485 ; SKYLAKE:       # %bb.0:
1486 ; SKYLAKE-NEXT:    movl %esi, %eax # sched: [1:0.25]
1487 ; SKYLAKE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
1488 ; SKYLAKE-NEXT:    mwait # sched: [20:2.50]
1489 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
1490 ;
1491 ; SKX-SSE-LABEL: test_mwait:
1492 ; SKX-SSE:       # %bb.0:
1493 ; SKX-SSE-NEXT:    movl %esi, %eax # sched: [1:0.25]
1494 ; SKX-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
1495 ; SKX-SSE-NEXT:    mwait # sched: [20:2.50]
1496 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
1497 ;
1498 ; SKX-LABEL: test_mwait:
1499 ; SKX:       # %bb.0:
1500 ; SKX-NEXT:    movl %esi, %eax # sched: [1:0.25]
1501 ; SKX-NEXT:    movl %edi, %ecx # sched: [1:0.25]
1502 ; SKX-NEXT:    mwait # sched: [20:2.50]
1503 ; SKX-NEXT:    retq # sched: [7:1.00]
1504 ;
1505 ; BDVER2-SSE-LABEL: test_mwait:
1506 ; BDVER2-SSE:       # %bb.0:
1507 ; BDVER2-SSE-NEXT:    movl %esi, %eax # sched: [1:0.50]
1508 ; BDVER2-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.50]
1509 ; BDVER2-SSE-NEXT:    mwait # sched: [100:0.50]
1510 ; BDVER2-SSE-NEXT:    retq # sched: [5:1.00]
1511 ;
1512 ; BDVER2-LABEL: test_mwait:
1513 ; BDVER2:       # %bb.0:
1514 ; BDVER2-NEXT:    movl %esi, %eax # sched: [1:0.50]
1515 ; BDVER2-NEXT:    movl %edi, %ecx # sched: [1:0.50]
1516 ; BDVER2-NEXT:    mwait # sched: [100:0.50]
1517 ; BDVER2-NEXT:    retq # sched: [5:1.00]
1518 ;
1519 ; BTVER2-SSE-LABEL: test_mwait:
1520 ; BTVER2-SSE:       # %bb.0:
1521 ; BTVER2-SSE-NEXT:    movl %esi, %eax # sched: [1:0.50]
1522 ; BTVER2-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.50]
1523 ; BTVER2-SSE-NEXT:    mwait # sched: [100:0.50]
1524 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
1525 ;
1526 ; BTVER2-LABEL: test_mwait:
1527 ; BTVER2:       # %bb.0:
1528 ; BTVER2-NEXT:    movl %esi, %eax # sched: [1:0.50]
1529 ; BTVER2-NEXT:    movl %edi, %ecx # sched: [1:0.50]
1530 ; BTVER2-NEXT:    mwait # sched: [100:0.50]
1531 ; BTVER2-NEXT:    retq # sched: [4:1.00]
1532 ;
1533 ; ZNVER1-SSE-LABEL: test_mwait:
1534 ; ZNVER1-SSE:       # %bb.0:
1535 ; ZNVER1-SSE-NEXT:    movl %esi, %eax # sched: [1:0.25]
1536 ; ZNVER1-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
1537 ; ZNVER1-SSE-NEXT:    mwait # sched: [100:0.25]
1538 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
1539 ;
1540 ; ZNVER1-LABEL: test_mwait:
1541 ; ZNVER1:       # %bb.0:
1542 ; ZNVER1-NEXT:    movl %esi, %eax # sched: [1:0.25]
1543 ; ZNVER1-NEXT:    movl %edi, %ecx # sched: [1:0.25]
1544 ; ZNVER1-NEXT:    mwait # sched: [100:0.25]
1545 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
1546   tail call void @llvm.x86.sse3.mwait(i32 %a0, i32 %a1)
1547   ret void
1548 }
1549 declare void @llvm.x86.sse3.mwait(i32, i32)