1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the auto-upgrade helper functions.
11 // This is where deprecated IR intrinsics and other IR features are updated to
12 // current specifications.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/IR/AutoUpgrade.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/IR/Constants.h"
19 #include "llvm/IR/DIBuilder.h"
20 #include "llvm/IR/DebugInfo.h"
21 #include "llvm/IR/DiagnosticInfo.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/IRBuilder.h"
24 #include "llvm/IR/Instruction.h"
25 #include "llvm/IR/IntrinsicInst.h"
26 #include "llvm/IR/LLVMContext.h"
27 #include "llvm/IR/Module.h"
28 #include "llvm/IR/Verifier.h"
29 #include "llvm/Support/ErrorHandling.h"
30 #include "llvm/Support/Regex.h"
34 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
36 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
37 // changed their type from v4f32 to v2i64.
38 static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
40 // Check whether this is an old version of the function, which received
42 Type *Arg0Type = F->getFunctionType()->getParamType(0);
43 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
46 // Yes, it's old, replace it with new version.
48 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
52 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
53 // arguments have changed their type from i32 to i8.
54 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
56 // Check that the last argument is an i32.
57 Type *LastArgType = F->getFunctionType()->getParamType(
58 F->getFunctionType()->getNumParams() - 1);
59 if (!LastArgType->isIntegerTy(32))
62 // Move this function aside and map down.
64 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
68 static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
69 // All of the intrinsics matches below should be marked with which llvm
70 // version started autoupgrading them. At some point in the future we would
71 // like to use this information to remove upgrade code for some older
72 // intrinsics. It is currently undecided how we will determine that future
74 if (Name=="ssse3.pabs.b.128" || // Added in 6.0
75 Name=="ssse3.pabs.w.128" || // Added in 6.0
76 Name=="ssse3.pabs.d.128" || // Added in 6.0
77 Name.startswith("fma4.vfmadd.s") || // Added in 7.0
78 Name.startswith("fma.vfmadd.") || // Added in 7.0
79 Name.startswith("fma.vfmsub.") || // Added in 7.0
80 Name.startswith("fma.vfmaddsub.") || // Added in 7.0
81 Name.startswith("fma.vfmsubadd.") || // Added in 7.0
82 Name.startswith("fma.vfnmadd.") || // Added in 7.0
83 Name.startswith("fma.vfnmsub.") || // Added in 7.0
84 Name.startswith("avx512.mask.vfmadd.") || // Added in 7.0
85 Name.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
86 Name.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
87 Name.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
88 Name.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
89 Name.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
90 Name.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
91 Name.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
92 Name.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
93 Name.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
94 Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
95 Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
96 Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
97 Name.startswith("avx512.kunpck") || //added in 6.0
98 Name.startswith("avx2.pabs.") || // Added in 6.0
99 Name.startswith("avx512.mask.pabs.") || // Added in 6.0
100 Name.startswith("avx512.broadcastm") || // Added in 6.0
101 Name == "sse.sqrt.ss" || // Added in 7.0
102 Name == "sse2.sqrt.sd" || // Added in 7.0
103 Name.startswith("avx512.mask.sqrt.p") || // Added in 7.0
104 Name.startswith("avx.sqrt.p") || // Added in 7.0
105 Name.startswith("sse2.sqrt.p") || // Added in 7.0
106 Name.startswith("sse.sqrt.p") || // Added in 7.0
107 Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0
108 Name.startswith("sse2.pcmpeq.") || // Added in 3.1
109 Name.startswith("sse2.pcmpgt.") || // Added in 3.1
110 Name.startswith("avx2.pcmpeq.") || // Added in 3.1
111 Name.startswith("avx2.pcmpgt.") || // Added in 3.1
112 Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
113 Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
114 Name.startswith("avx.vperm2f128.") || // Added in 6.0
115 Name == "avx2.vperm2i128" || // Added in 6.0
116 Name == "sse.add.ss" || // Added in 4.0
117 Name == "sse2.add.sd" || // Added in 4.0
118 Name == "sse.sub.ss" || // Added in 4.0
119 Name == "sse2.sub.sd" || // Added in 4.0
120 Name == "sse.mul.ss" || // Added in 4.0
121 Name == "sse2.mul.sd" || // Added in 4.0
122 Name == "sse.div.ss" || // Added in 4.0
123 Name == "sse2.div.sd" || // Added in 4.0
124 Name == "sse41.pmaxsb" || // Added in 3.9
125 Name == "sse2.pmaxs.w" || // Added in 3.9
126 Name == "sse41.pmaxsd" || // Added in 3.9
127 Name == "sse2.pmaxu.b" || // Added in 3.9
128 Name == "sse41.pmaxuw" || // Added in 3.9
129 Name == "sse41.pmaxud" || // Added in 3.9
130 Name == "sse41.pminsb" || // Added in 3.9
131 Name == "sse2.pmins.w" || // Added in 3.9
132 Name == "sse41.pminsd" || // Added in 3.9
133 Name == "sse2.pminu.b" || // Added in 3.9
134 Name == "sse41.pminuw" || // Added in 3.9
135 Name == "sse41.pminud" || // Added in 3.9
136 Name == "avx512.kand.w" || // Added in 7.0
137 Name == "avx512.kandn.w" || // Added in 7.0
138 Name == "avx512.knot.w" || // Added in 7.0
139 Name == "avx512.kor.w" || // Added in 7.0
140 Name == "avx512.kxor.w" || // Added in 7.0
141 Name == "avx512.kxnor.w" || // Added in 7.0
142 Name == "avx512.kortestc.w" || // Added in 7.0
143 Name == "avx512.kortestz.w" || // Added in 7.0
144 Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
145 Name.startswith("avx2.pmax") || // Added in 3.9
146 Name.startswith("avx2.pmin") || // Added in 3.9
147 Name.startswith("avx512.mask.pmax") || // Added in 4.0
148 Name.startswith("avx512.mask.pmin") || // Added in 4.0
149 Name.startswith("avx2.vbroadcast") || // Added in 3.8
150 Name.startswith("avx2.pbroadcast") || // Added in 3.8
151 Name.startswith("avx.vpermil.") || // Added in 3.1
152 Name.startswith("sse2.pshuf") || // Added in 3.9
153 Name.startswith("avx512.pbroadcast") || // Added in 3.9
154 Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
155 Name.startswith("avx512.mask.movddup") || // Added in 3.9
156 Name.startswith("avx512.mask.movshdup") || // Added in 3.9
157 Name.startswith("avx512.mask.movsldup") || // Added in 3.9
158 Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
159 Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
160 Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
161 Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
162 Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
163 Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
164 Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
165 Name.startswith("avx512.mask.punpckl") || // Added in 3.9
166 Name.startswith("avx512.mask.punpckh") || // Added in 3.9
167 Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
168 Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
169 Name.startswith("avx512.mask.pand.") || // Added in 3.9
170 Name.startswith("avx512.mask.pandn.") || // Added in 3.9
171 Name.startswith("avx512.mask.por.") || // Added in 3.9
172 Name.startswith("avx512.mask.pxor.") || // Added in 3.9
173 Name.startswith("avx512.mask.and.") || // Added in 3.9
174 Name.startswith("avx512.mask.andn.") || // Added in 3.9
175 Name.startswith("avx512.mask.or.") || // Added in 3.9
176 Name.startswith("avx512.mask.xor.") || // Added in 3.9
177 Name.startswith("avx512.mask.padd.") || // Added in 4.0
178 Name.startswith("avx512.mask.psub.") || // Added in 4.0
179 Name.startswith("avx512.mask.pmull.") || // Added in 4.0
180 Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
181 Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
182 Name == "avx512.mask.cvtudq2ps.128" || // Added in 7.0
183 Name == "avx512.mask.cvtudq2ps.256" || // Added in 7.0
184 Name == "avx512.mask.cvtqq2pd.128" || // Added in 7.0
185 Name == "avx512.mask.cvtqq2pd.256" || // Added in 7.0
186 Name == "avx512.mask.cvtuqq2pd.128" || // Added in 7.0
187 Name == "avx512.mask.cvtuqq2pd.256" || // Added in 7.0
188 Name == "avx512.mask.cvtdq2ps.128" || // Added in 7.0
189 Name == "avx512.mask.cvtdq2ps.256" || // Added in 7.0
190 Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0
191 Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0
192 Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0
193 Name == "avx512.mask.cvttps2dq.128" || // Added in 7.0
194 Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0
195 Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
196 Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
197 Name == "avx512.cvtusi2sd" || // Added in 7.0
198 Name.startswith("avx512.mask.permvar.") || // Added in 7.0
199 Name.startswith("avx512.mask.permvar.") || // Added in 7.0
200 Name == "sse2.pmulu.dq" || // Added in 7.0
201 Name == "sse41.pmuldq" || // Added in 7.0
202 Name == "avx2.pmulu.dq" || // Added in 7.0
203 Name == "avx2.pmul.dq" || // Added in 7.0
204 Name == "avx512.pmulu.dq.512" || // Added in 7.0
205 Name == "avx512.pmul.dq.512" || // Added in 7.0
206 Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
207 Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
208 Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
209 Name.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
210 Name.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
211 Name.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
212 Name.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
213 Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
214 Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
215 Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
216 Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
217 Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
218 Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
219 Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
220 Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
221 Name.startswith("avx512.mask.cmp.p") || // Added in 7.0
222 Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
223 Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
224 Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
225 Name.startswith("avx512.cvtd2mask.") || // Added in 7.0
226 Name.startswith("avx512.cvtq2mask.") || // Added in 7.0
227 Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
228 Name.startswith("avx512.mask.psll.d") || // Added in 4.0
229 Name.startswith("avx512.mask.psll.q") || // Added in 4.0
230 Name.startswith("avx512.mask.psll.w") || // Added in 4.0
231 Name.startswith("avx512.mask.psra.d") || // Added in 4.0
232 Name.startswith("avx512.mask.psra.q") || // Added in 4.0
233 Name.startswith("avx512.mask.psra.w") || // Added in 4.0
234 Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
235 Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
236 Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
237 Name.startswith("avx512.mask.pslli") || // Added in 4.0
238 Name.startswith("avx512.mask.psrai") || // Added in 4.0
239 Name.startswith("avx512.mask.psrli") || // Added in 4.0
240 Name.startswith("avx512.mask.psllv") || // Added in 4.0
241 Name.startswith("avx512.mask.psrav") || // Added in 4.0
242 Name.startswith("avx512.mask.psrlv") || // Added in 4.0
243 Name.startswith("sse41.pmovsx") || // Added in 3.8
244 Name.startswith("sse41.pmovzx") || // Added in 3.9
245 Name.startswith("avx2.pmovsx") || // Added in 3.9
246 Name.startswith("avx2.pmovzx") || // Added in 3.9
247 Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
248 Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
249 Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
250 Name.startswith("avx512.mask.pternlog.") || // Added in 7.0
251 Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0
252 Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0
253 Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
254 Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
255 Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
256 Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
257 Name.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
258 Name.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
259 Name.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
260 Name.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
261 Name.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
262 Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
263 Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
264 Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
265 Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
266 Name.startswith("avx512.mask.vpshld.") || // Added in 7.0
267 Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0
268 Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
269 Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
270 Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
271 Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
272 Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
273 Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
274 Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
275 Name.startswith("avx512.mask.prorv.") || // Added in 7.0
276 Name.startswith("avx512.mask.pror.") || // Added in 7.0
277 Name.startswith("avx512.mask.prolv.") || // Added in 7.0
278 Name.startswith("avx512.mask.prol.") || // Added in 7.0
279 Name == "sse.cvtsi2ss" || // Added in 7.0
280 Name == "sse.cvtsi642ss" || // Added in 7.0
281 Name == "sse2.cvtsi2sd" || // Added in 7.0
282 Name == "sse2.cvtsi642sd" || // Added in 7.0
283 Name == "sse2.cvtss2sd" || // Added in 7.0
284 Name == "sse2.cvtdq2pd" || // Added in 3.9
285 Name == "sse2.cvtdq2ps" || // Added in 7.0
286 Name == "sse2.cvtps2pd" || // Added in 3.9
287 Name == "avx.cvtdq2.pd.256" || // Added in 3.9
288 Name == "avx.cvtdq2.ps.256" || // Added in 7.0
289 Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
290 Name.startswith("avx.vinsertf128.") || // Added in 3.7
291 Name == "avx2.vinserti128" || // Added in 3.7
292 Name.startswith("avx512.mask.insert") || // Added in 4.0
293 Name.startswith("avx.vextractf128.") || // Added in 3.7
294 Name == "avx2.vextracti128" || // Added in 3.7
295 Name.startswith("avx512.mask.vextract") || // Added in 4.0
296 Name.startswith("sse4a.movnt.") || // Added in 3.9
297 Name.startswith("avx.movnt.") || // Added in 3.2
298 Name.startswith("avx512.storent.") || // Added in 3.9
299 Name == "sse41.movntdqa" || // Added in 5.0
300 Name == "avx2.movntdqa" || // Added in 5.0
301 Name == "avx512.movntdqa" || // Added in 5.0
302 Name == "sse2.storel.dq" || // Added in 3.9
303 Name.startswith("sse.storeu.") || // Added in 3.9
304 Name.startswith("sse2.storeu.") || // Added in 3.9
305 Name.startswith("avx.storeu.") || // Added in 3.9
306 Name.startswith("avx512.mask.storeu.") || // Added in 3.9
307 Name.startswith("avx512.mask.store.p") || // Added in 3.9
308 Name.startswith("avx512.mask.store.b.") || // Added in 3.9
309 Name.startswith("avx512.mask.store.w.") || // Added in 3.9
310 Name.startswith("avx512.mask.store.d.") || // Added in 3.9
311 Name.startswith("avx512.mask.store.q.") || // Added in 3.9
312 Name == "avx512.mask.store.ss" || // Added in 7.0
313 Name.startswith("avx512.mask.loadu.") || // Added in 3.9
314 Name.startswith("avx512.mask.load.") || // Added in 3.9
315 Name.startswith("avx512.mask.expand.load.") || // Added in 7.0
316 Name.startswith("avx512.mask.compress.store.") || // Added in 7.0
317 Name == "sse42.crc32.64.8" || // Added in 3.4
318 Name.startswith("avx.vbroadcast.s") || // Added in 3.5
319 Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
320 Name.startswith("avx512.mask.palignr.") || // Added in 3.9
321 Name.startswith("avx512.mask.valign.") || // Added in 4.0
322 Name.startswith("sse2.psll.dq") || // Added in 3.7
323 Name.startswith("sse2.psrl.dq") || // Added in 3.7
324 Name.startswith("avx2.psll.dq") || // Added in 3.7
325 Name.startswith("avx2.psrl.dq") || // Added in 3.7
326 Name.startswith("avx512.psll.dq") || // Added in 3.9
327 Name.startswith("avx512.psrl.dq") || // Added in 3.9
328 Name == "sse41.pblendw" || // Added in 3.7
329 Name.startswith("sse41.blendp") || // Added in 3.7
330 Name.startswith("avx.blend.p") || // Added in 3.7
331 Name == "avx2.pblendw" || // Added in 3.7
332 Name.startswith("avx2.pblendd.") || // Added in 3.7
333 Name.startswith("avx.vbroadcastf128") || // Added in 4.0
334 Name == "avx2.vbroadcasti128" || // Added in 3.7
335 Name.startswith("avx512.mask.broadcastf") || // Added in 6.0
336 Name.startswith("avx512.mask.broadcasti") || // Added in 6.0
337 Name == "xop.vpcmov" || // Added in 3.8
338 Name == "xop.vpcmov.256" || // Added in 5.0
339 Name.startswith("avx512.mask.move.s") || // Added in 4.0
340 Name.startswith("avx512.cvtmask2") || // Added in 5.0
341 (Name.startswith("xop.vpcom") && // Added in 3.2
342 F->arg_size() == 2) ||
343 Name.startswith("avx512.ptestm") || //Added in 6.0
344 Name.startswith("avx512.ptestnm") || //Added in 6.0
345 Name.startswith("sse2.pavg") || // Added in 6.0
346 Name.startswith("avx2.pavg") || // Added in 6.0
347 Name.startswith("avx512.mask.pavg")) // Added in 6.0
353 static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
355 // Only handle intrinsics that start with "x86.".
356 if (!Name.startswith("x86."))
358 // Remove "x86." prefix.
359 Name = Name.substr(4);
361 if (ShouldUpgradeX86Intrinsic(F, Name)) {
366 // SSE4.1 ptest functions may have an old signature.
367 if (Name.startswith("sse41.ptest")) { // Added in 3.2
368 if (Name.substr(11) == "c")
369 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
370 if (Name.substr(11) == "z")
371 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
372 if (Name.substr(11) == "nzc")
373 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
375 // Several blend and other instructions with masks used the wrong number of
377 if (Name == "sse41.insertps") // Added in 3.6
378 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
380 if (Name == "sse41.dppd") // Added in 3.6
381 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
383 if (Name == "sse41.dpps") // Added in 3.6
384 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
386 if (Name == "sse41.mpsadbw") // Added in 3.6
387 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
389 if (Name == "avx.dp.ps.256") // Added in 3.6
390 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
392 if (Name == "avx2.mpsadbw") // Added in 3.6
393 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
396 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
397 if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
399 NewFn = Intrinsic::getDeclaration(F->getParent(),
400 Intrinsic::x86_xop_vfrcz_ss);
403 if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
405 NewFn = Intrinsic::getDeclaration(F->getParent(),
406 Intrinsic::x86_xop_vfrcz_sd);
409 // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
410 if (Name.startswith("xop.vpermil2")) { // Added in 3.9
411 auto Idx = F->getFunctionType()->getParamType(2);
412 if (Idx->isFPOrFPVectorTy()) {
414 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
415 unsigned EltSize = Idx->getScalarSizeInBits();
416 Intrinsic::ID Permil2ID;
417 if (EltSize == 64 && IdxSize == 128)
418 Permil2ID = Intrinsic::x86_xop_vpermil2pd;
419 else if (EltSize == 32 && IdxSize == 128)
420 Permil2ID = Intrinsic::x86_xop_vpermil2ps;
421 else if (EltSize == 64 && IdxSize == 256)
422 Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
424 Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
425 NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
433 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
434 assert(F && "Illegal to upgrade a non-existent Function.");
436 // Quickly eliminate it, if it's not a candidate.
437 StringRef Name = F->getName();
438 if (Name.size() <= 8 || !Name.startswith("llvm."))
440 Name = Name.substr(5); // Strip off "llvm."
445 if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
446 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
447 F->arg_begin()->getType());
450 if (Name.startswith("arm.neon.vclz")) {
452 F->arg_begin()->getType(),
453 Type::getInt1Ty(F->getContext())
455 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
456 // the end of the name. Change name from llvm.arm.neon.vclz.* to
458 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
459 NewFn = Function::Create(fType, F->getLinkage(),
460 "llvm.ctlz." + Name.substr(14), F->getParent());
463 if (Name.startswith("arm.neon.vcnt")) {
464 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
465 F->arg_begin()->getType());
468 Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
469 if (vldRegex.match(Name)) {
470 auto fArgs = F->getFunctionType()->params();
471 SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
472 // Can't use Intrinsic::getDeclaration here as the return types might
473 // then only be structurally equal.
474 FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
475 NewFn = Function::Create(fType, F->getLinkage(),
476 "llvm." + Name + ".p0i8", F->getParent());
479 Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
480 if (vstRegex.match(Name)) {
481 static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
482 Intrinsic::arm_neon_vst2,
483 Intrinsic::arm_neon_vst3,
484 Intrinsic::arm_neon_vst4};
486 static const Intrinsic::ID StoreLaneInts[] = {
487 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
488 Intrinsic::arm_neon_vst4lane
491 auto fArgs = F->getFunctionType()->params();
492 Type *Tys[] = {fArgs[0], fArgs[1]};
493 if (Name.find("lane") == StringRef::npos)
494 NewFn = Intrinsic::getDeclaration(F->getParent(),
495 StoreInts[fArgs.size() - 3], Tys);
497 NewFn = Intrinsic::getDeclaration(F->getParent(),
498 StoreLaneInts[fArgs.size() - 5], Tys);
501 if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
502 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
509 if (Name.startswith("ctlz.") && F->arg_size() == 1) {
511 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
512 F->arg_begin()->getType());
515 if (Name.startswith("cttz.") && F->arg_size() == 1) {
517 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
518 F->arg_begin()->getType());
524 if (Name == "dbg.value" && F->arg_size() == 4) {
526 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
533 bool IsLifetimeStart = Name.startswith("lifetime.start");
534 if (IsLifetimeStart || Name.startswith("invariant.start")) {
535 Intrinsic::ID ID = IsLifetimeStart ?
536 Intrinsic::lifetime_start : Intrinsic::invariant_start;
537 auto Args = F->getFunctionType()->params();
538 Type* ObjectPtr[1] = {Args[1]};
539 if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
541 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
546 bool IsLifetimeEnd = Name.startswith("lifetime.end");
547 if (IsLifetimeEnd || Name.startswith("invariant.end")) {
548 Intrinsic::ID ID = IsLifetimeEnd ?
549 Intrinsic::lifetime_end : Intrinsic::invariant_end;
551 auto Args = F->getFunctionType()->params();
552 Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
553 if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
555 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
559 if (Name.startswith("invariant.group.barrier")) {
560 // Rename invariant.group.barrier to launder.invariant.group
561 auto Args = F->getFunctionType()->params();
562 Type* ObjectPtr[1] = {Args[0]};
564 NewFn = Intrinsic::getDeclaration(F->getParent(),
565 Intrinsic::launder_invariant_group, ObjectPtr);
573 if (Name.startswith("masked.load.")) {
574 Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
575 if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) {
577 NewFn = Intrinsic::getDeclaration(F->getParent(),
578 Intrinsic::masked_load,
583 if (Name.startswith("masked.store.")) {
584 auto Args = F->getFunctionType()->params();
585 Type *Tys[] = { Args[0], Args[1] };
586 if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) {
588 NewFn = Intrinsic::getDeclaration(F->getParent(),
589 Intrinsic::masked_store,
594 // Renaming gather/scatter intrinsics with no address space overloading
595 // to the new overload which includes an address space
596 if (Name.startswith("masked.gather.")) {
597 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
598 if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) {
600 NewFn = Intrinsic::getDeclaration(F->getParent(),
601 Intrinsic::masked_gather, Tys);
605 if (Name.startswith("masked.scatter.")) {
606 auto Args = F->getFunctionType()->params();
607 Type *Tys[] = {Args[0], Args[1]};
608 if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) {
610 NewFn = Intrinsic::getDeclaration(F->getParent(),
611 Intrinsic::masked_scatter, Tys);
615 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
616 // alignment parameter to embedding the alignment as an attribute of
618 if (Name.startswith("memcpy.") && F->arg_size() == 5) {
620 // Get the types of dest, src, and len
621 ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
622 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy,
626 if (Name.startswith("memmove.") && F->arg_size() == 5) {
628 // Get the types of dest, src, and len
629 ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
630 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove,
634 if (Name.startswith("memset.") && F->arg_size() == 5) {
636 // Get the types of dest, and len
637 const auto *FT = F->getFunctionType();
638 Type *ParamTypes[2] = {
639 FT->getParamType(0), // Dest
640 FT->getParamType(2) // len
642 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
649 if (Name.startswith("nvvm.")) {
650 Name = Name.substr(5);
652 // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
653 Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name)
654 .Cases("brev32", "brev64", Intrinsic::bitreverse)
655 .Case("clz.i", Intrinsic::ctlz)
656 .Case("popc.i", Intrinsic::ctpop)
657 .Default(Intrinsic::not_intrinsic);
658 if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
659 NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
660 {F->getReturnType()});
664 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
665 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
667 // TODO: We could add lohi.i2d.
668 bool Expand = StringSwitch<bool>(Name)
669 .Cases("abs.i", "abs.ll", true)
670 .Cases("clz.ll", "popc.ll", "h2f", true)
671 .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
672 .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
682 // We only need to change the name to match the mangling including the
684 if (Name.startswith("objectsize.")) {
685 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
686 if (F->arg_size() == 2 ||
687 F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
689 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
697 if (Name == "stackprotectorcheck") {
704 if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
707 // Remangle our intrinsic since we upgrade the mangling
708 auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
709 if (Result != None) {
710 NewFn = Result.getValue();
714 // This may not belong here. This function is effectively being overloaded
715 // to both detect an intrinsic which needs upgrading, and to provide the
716 // upgraded form of the intrinsic. We should perhaps have two separate
717 // functions for this.
721 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
723 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
724 assert(F != NewFn && "Intrinsic function upgraded to the same function");
726 // Upgrade intrinsic attributes. This does not change the function.
729 if (Intrinsic::ID id = F->getIntrinsicID())
730 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
734 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
735 // Nothing to do yet.
739 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
741 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
742 Value *Op, unsigned Shift) {
743 Type *ResultTy = Op->getType();
744 unsigned NumElts = ResultTy->getVectorNumElements() * 8;
746 // Bitcast from a 64-bit element type to a byte element type.
747 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
748 Op = Builder.CreateBitCast(Op, VecTy, "cast");
750 // We'll be shuffling in zeroes.
751 Value *Res = Constant::getNullValue(VecTy);
753 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
754 // we'll just return the zero vector.
757 // 256/512-bit version is split into 2/4 16-byte lanes.
758 for (unsigned l = 0; l != NumElts; l += 16)
759 for (unsigned i = 0; i != 16; ++i) {
760 unsigned Idx = NumElts + i - Shift;
762 Idx -= NumElts - 16; // end of lane, switch operand.
763 Idxs[l + i] = Idx + l;
766 Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
769 // Bitcast back to a 64-bit element type.
770 return Builder.CreateBitCast(Res, ResultTy, "cast");
773 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
775 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
777 Type *ResultTy = Op->getType();
778 unsigned NumElts = ResultTy->getVectorNumElements() * 8;
780 // Bitcast from a 64-bit element type to a byte element type.
781 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
782 Op = Builder.CreateBitCast(Op, VecTy, "cast");
784 // We'll be shuffling in zeroes.
785 Value *Res = Constant::getNullValue(VecTy);
787 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
788 // we'll just return the zero vector.
791 // 256/512-bit version is split into 2/4 16-byte lanes.
792 for (unsigned l = 0; l != NumElts; l += 16)
793 for (unsigned i = 0; i != 16; ++i) {
794 unsigned Idx = i + Shift;
796 Idx += NumElts - 16; // end of lane, switch operand.
797 Idxs[l + i] = Idx + l;
800 Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
803 // Bitcast back to a 64-bit element type.
804 return Builder.CreateBitCast(Res, ResultTy, "cast");
807 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
809 llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(),
810 cast<IntegerType>(Mask->getType())->getBitWidth());
811 Mask = Builder.CreateBitCast(Mask, MaskTy);
813 // If we have less than 8 elements, then the starting mask was an i8 and
814 // we need to extract down to the right number of elements.
817 for (unsigned i = 0; i != NumElts; ++i)
819 Mask = Builder.CreateShuffleVector(Mask, Mask,
820 makeArrayRef(Indices, NumElts),
827 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
828 Value *Op0, Value *Op1) {
829 // If the mask is all ones just emit the first operation.
830 if (const auto *C = dyn_cast<Constant>(Mask))
831 if (C->isAllOnesValue())
834 Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements());
835 return Builder.CreateSelect(Mask, Op0, Op1);
838 static Value *EmitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask,
839 Value *Op0, Value *Op1) {
840 // If the mask is all ones just emit the first operation.
841 if (const auto *C = dyn_cast<Constant>(Mask))
842 if (C->isAllOnesValue())
845 llvm::VectorType *MaskTy =
846 llvm::VectorType::get(Builder.getInt1Ty(),
847 Mask->getType()->getIntegerBitWidth());
848 Mask = Builder.CreateBitCast(Mask, MaskTy);
849 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
850 return Builder.CreateSelect(Mask, Op0, Op1);
853 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
854 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
855 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
856 static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
857 Value *Op1, Value *Shift,
858 Value *Passthru, Value *Mask,
860 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
862 unsigned NumElts = Op0->getType()->getVectorNumElements();
863 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
864 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
865 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
867 // Mask the immediate for VALIGN.
869 ShiftVal &= (NumElts - 1);
871 // If palignr is shifting the pair of vectors more than the size of two
874 return llvm::Constant::getNullValue(Op0->getType());
876 // If palignr is shifting the pair of input vectors more than one lane,
877 // but less than two lanes, convert to shifting in zeroes.
881 Op0 = llvm::Constant::getNullValue(Op0->getType());
884 uint32_t Indices[64];
885 // 256-bit palignr operates on 128-bit lanes so we need to handle that
886 for (unsigned l = 0; l < NumElts; l += 16) {
887 for (unsigned i = 0; i != 16; ++i) {
888 unsigned Idx = ShiftVal + i;
889 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
890 Idx += NumElts - 16; // End of lane, switch operand.
891 Indices[l + i] = Idx + l;
895 Value *Align = Builder.CreateShuffleVector(Op1, Op0,
896 makeArrayRef(Indices, NumElts),
899 return EmitX86Select(Builder, Mask, Align, Passthru);
902 static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
903 Value *Ptr, Value *Data, Value *Mask,
905 // Cast the pointer to the right type.
906 Ptr = Builder.CreateBitCast(Ptr,
907 llvm::PointerType::getUnqual(Data->getType()));
909 Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1;
911 // If the mask is all ones just emit a regular store.
912 if (const auto *C = dyn_cast<Constant>(Mask))
913 if (C->isAllOnesValue())
914 return Builder.CreateAlignedStore(Data, Ptr, Align);
916 // Convert the mask from an integer type to a vector of i1.
917 unsigned NumElts = Data->getType()->getVectorNumElements();
918 Mask = getX86MaskVec(Builder, Mask, NumElts);
919 return Builder.CreateMaskedStore(Data, Ptr, Align, Mask);
922 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
923 Value *Ptr, Value *Passthru, Value *Mask,
925 // Cast the pointer to the right type.
926 Ptr = Builder.CreateBitCast(Ptr,
927 llvm::PointerType::getUnqual(Passthru->getType()));
929 Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1;
931 // If the mask is all ones just emit a regular store.
932 if (const auto *C = dyn_cast<Constant>(Mask))
933 if (C->isAllOnesValue())
934 return Builder.CreateAlignedLoad(Ptr, Align);
936 // Convert the mask from an integer type to a vector of i1.
937 unsigned NumElts = Passthru->getType()->getVectorNumElements();
938 Mask = getX86MaskVec(Builder, Mask, NumElts);
939 return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru);
942 static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) {
943 Value *Op0 = CI.getArgOperand(0);
944 llvm::Type *Ty = Op0->getType();
945 Value *Zero = llvm::Constant::getNullValue(Ty);
946 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Op0, Zero);
947 Value *Neg = Builder.CreateNeg(Op0);
948 Value *Res = Builder.CreateSelect(Cmp, Op0, Neg);
950 if (CI.getNumArgOperands() == 3)
951 Res = EmitX86Select(Builder,CI.getArgOperand(2), Res, CI.getArgOperand(1));
956 static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
957 ICmpInst::Predicate Pred) {
958 Value *Op0 = CI.getArgOperand(0);
959 Value *Op1 = CI.getArgOperand(1);
960 Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1);
961 Value *Res = Builder.CreateSelect(Cmp, Op0, Op1);
963 if (CI.getNumArgOperands() == 4)
964 Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
969 static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) {
970 Type *Ty = CI.getType();
972 // Arguments have a vXi32 type so cast to vXi64.
973 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
974 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
977 // Shift left then arithmetic shift right.
978 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
979 LHS = Builder.CreateShl(LHS, ShiftAmt);
980 LHS = Builder.CreateAShr(LHS, ShiftAmt);
981 RHS = Builder.CreateShl(RHS, ShiftAmt);
982 RHS = Builder.CreateAShr(RHS, ShiftAmt);
984 // Clear the upper bits.
985 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
986 LHS = Builder.CreateAnd(LHS, Mask);
987 RHS = Builder.CreateAnd(RHS, Mask);
990 Value *Res = Builder.CreateMul(LHS, RHS);
992 if (CI.getNumArgOperands() == 4)
993 Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
998 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
999 static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
1001 unsigned NumElts = Vec->getType()->getVectorNumElements();
1003 const auto *C = dyn_cast<Constant>(Mask);
1004 if (!C || !C->isAllOnesValue())
1005 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1009 uint32_t Indices[8];
1010 for (unsigned i = 0; i != NumElts; ++i)
1012 for (unsigned i = NumElts; i != 8; ++i)
1013 Indices[i] = NumElts + i % NumElts;
1014 Vec = Builder.CreateShuffleVector(Vec,
1015 Constant::getNullValue(Vec->getType()),
1018 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1021 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
1022 unsigned CC, bool Signed) {
1023 Value *Op0 = CI.getArgOperand(0);
1024 unsigned NumElts = Op0->getType()->getVectorNumElements();
1028 Cmp = Constant::getNullValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
1029 } else if (CC == 7) {
1030 Cmp = Constant::getAllOnesValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
1032 ICmpInst::Predicate Pred;
1034 default: llvm_unreachable("Unknown condition code");
1035 case 0: Pred = ICmpInst::ICMP_EQ; break;
1036 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
1037 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
1038 case 4: Pred = ICmpInst::ICMP_NE; break;
1039 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
1040 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
1042 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
1045 Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1);
1047 return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
1050 // Replace a masked intrinsic with an older unmasked intrinsic.
1051 static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI,
1052 Intrinsic::ID IID) {
1053 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
1054 Value *Rep = Builder.CreateCall(Intrin,
1055 { CI.getArgOperand(0), CI.getArgOperand(1) });
1056 return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
1059 static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
1060 Value* A = CI.getArgOperand(0);
1061 Value* B = CI.getArgOperand(1);
1062 Value* Src = CI.getArgOperand(2);
1063 Value* Mask = CI.getArgOperand(3);
1065 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
1066 Value* Cmp = Builder.CreateIsNotNull(AndNode);
1067 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
1068 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
1069 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
1070 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
1074 static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) {
1075 Value* Op = CI.getArgOperand(0);
1076 Type* ReturnOp = CI.getType();
1077 unsigned NumElts = CI.getType()->getVectorNumElements();
1078 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
1079 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
1082 // Replace intrinsic with unmasked version and a select.
1083 static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
1084 CallInst &CI, Value *&Rep) {
1085 Name = Name.substr(12); // Remove avx512.mask.
1087 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
1088 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
1090 if (Name.startswith("max.p")) {
1091 if (VecWidth == 128 && EltWidth == 32)
1092 IID = Intrinsic::x86_sse_max_ps;
1093 else if (VecWidth == 128 && EltWidth == 64)
1094 IID = Intrinsic::x86_sse2_max_pd;
1095 else if (VecWidth == 256 && EltWidth == 32)
1096 IID = Intrinsic::x86_avx_max_ps_256;
1097 else if (VecWidth == 256 && EltWidth == 64)
1098 IID = Intrinsic::x86_avx_max_pd_256;
1100 llvm_unreachable("Unexpected intrinsic");
1101 } else if (Name.startswith("min.p")) {
1102 if (VecWidth == 128 && EltWidth == 32)
1103 IID = Intrinsic::x86_sse_min_ps;
1104 else if (VecWidth == 128 && EltWidth == 64)
1105 IID = Intrinsic::x86_sse2_min_pd;
1106 else if (VecWidth == 256 && EltWidth == 32)
1107 IID = Intrinsic::x86_avx_min_ps_256;
1108 else if (VecWidth == 256 && EltWidth == 64)
1109 IID = Intrinsic::x86_avx_min_pd_256;
1111 llvm_unreachable("Unexpected intrinsic");
1112 } else if (Name.startswith("pshuf.b.")) {
1113 if (VecWidth == 128)
1114 IID = Intrinsic::x86_ssse3_pshuf_b_128;
1115 else if (VecWidth == 256)
1116 IID = Intrinsic::x86_avx2_pshuf_b;
1117 else if (VecWidth == 512)
1118 IID = Intrinsic::x86_avx512_pshuf_b_512;
1120 llvm_unreachable("Unexpected intrinsic");
1121 } else if (Name.startswith("pmul.hr.sw.")) {
1122 if (VecWidth == 128)
1123 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
1124 else if (VecWidth == 256)
1125 IID = Intrinsic::x86_avx2_pmul_hr_sw;
1126 else if (VecWidth == 512)
1127 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
1129 llvm_unreachable("Unexpected intrinsic");
1130 } else if (Name.startswith("pmulh.w.")) {
1131 if (VecWidth == 128)
1132 IID = Intrinsic::x86_sse2_pmulh_w;
1133 else if (VecWidth == 256)
1134 IID = Intrinsic::x86_avx2_pmulh_w;
1135 else if (VecWidth == 512)
1136 IID = Intrinsic::x86_avx512_pmulh_w_512;
1138 llvm_unreachable("Unexpected intrinsic");
1139 } else if (Name.startswith("pmulhu.w.")) {
1140 if (VecWidth == 128)
1141 IID = Intrinsic::x86_sse2_pmulhu_w;
1142 else if (VecWidth == 256)
1143 IID = Intrinsic::x86_avx2_pmulhu_w;
1144 else if (VecWidth == 512)
1145 IID = Intrinsic::x86_avx512_pmulhu_w_512;
1147 llvm_unreachable("Unexpected intrinsic");
1148 } else if (Name.startswith("pmaddw.d.")) {
1149 if (VecWidth == 128)
1150 IID = Intrinsic::x86_sse2_pmadd_wd;
1151 else if (VecWidth == 256)
1152 IID = Intrinsic::x86_avx2_pmadd_wd;
1153 else if (VecWidth == 512)
1154 IID = Intrinsic::x86_avx512_pmaddw_d_512;
1156 llvm_unreachable("Unexpected intrinsic");
1157 } else if (Name.startswith("pmaddubs.w.")) {
1158 if (VecWidth == 128)
1159 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
1160 else if (VecWidth == 256)
1161 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
1162 else if (VecWidth == 512)
1163 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
1165 llvm_unreachable("Unexpected intrinsic");
1166 } else if (Name.startswith("packsswb.")) {
1167 if (VecWidth == 128)
1168 IID = Intrinsic::x86_sse2_packsswb_128;
1169 else if (VecWidth == 256)
1170 IID = Intrinsic::x86_avx2_packsswb;
1171 else if (VecWidth == 512)
1172 IID = Intrinsic::x86_avx512_packsswb_512;
1174 llvm_unreachable("Unexpected intrinsic");
1175 } else if (Name.startswith("packssdw.")) {
1176 if (VecWidth == 128)
1177 IID = Intrinsic::x86_sse2_packssdw_128;
1178 else if (VecWidth == 256)
1179 IID = Intrinsic::x86_avx2_packssdw;
1180 else if (VecWidth == 512)
1181 IID = Intrinsic::x86_avx512_packssdw_512;
1183 llvm_unreachable("Unexpected intrinsic");
1184 } else if (Name.startswith("packuswb.")) {
1185 if (VecWidth == 128)
1186 IID = Intrinsic::x86_sse2_packuswb_128;
1187 else if (VecWidth == 256)
1188 IID = Intrinsic::x86_avx2_packuswb;
1189 else if (VecWidth == 512)
1190 IID = Intrinsic::x86_avx512_packuswb_512;
1192 llvm_unreachable("Unexpected intrinsic");
1193 } else if (Name.startswith("packusdw.")) {
1194 if (VecWidth == 128)
1195 IID = Intrinsic::x86_sse41_packusdw;
1196 else if (VecWidth == 256)
1197 IID = Intrinsic::x86_avx2_packusdw;
1198 else if (VecWidth == 512)
1199 IID = Intrinsic::x86_avx512_packusdw_512;
1201 llvm_unreachable("Unexpected intrinsic");
1202 } else if (Name.startswith("vpermilvar.")) {
1203 if (VecWidth == 128 && EltWidth == 32)
1204 IID = Intrinsic::x86_avx_vpermilvar_ps;
1205 else if (VecWidth == 128 && EltWidth == 64)
1206 IID = Intrinsic::x86_avx_vpermilvar_pd;
1207 else if (VecWidth == 256 && EltWidth == 32)
1208 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
1209 else if (VecWidth == 256 && EltWidth == 64)
1210 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
1211 else if (VecWidth == 512 && EltWidth == 32)
1212 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
1213 else if (VecWidth == 512 && EltWidth == 64)
1214 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
1216 llvm_unreachable("Unexpected intrinsic");
1217 } else if (Name == "cvtpd2dq.256") {
1218 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
1219 } else if (Name == "cvtpd2ps.256") {
1220 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
1221 } else if (Name == "cvttpd2dq.256") {
1222 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
1223 } else if (Name == "cvttps2dq.128") {
1224 IID = Intrinsic::x86_sse2_cvttps2dq;
1225 } else if (Name == "cvttps2dq.256") {
1226 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
1227 } else if (Name.startswith("permvar.")) {
1228 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
1229 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1230 IID = Intrinsic::x86_avx2_permps;
1231 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1232 IID = Intrinsic::x86_avx2_permd;
1233 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1234 IID = Intrinsic::x86_avx512_permvar_df_256;
1235 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1236 IID = Intrinsic::x86_avx512_permvar_di_256;
1237 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1238 IID = Intrinsic::x86_avx512_permvar_sf_512;
1239 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1240 IID = Intrinsic::x86_avx512_permvar_si_512;
1241 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1242 IID = Intrinsic::x86_avx512_permvar_df_512;
1243 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1244 IID = Intrinsic::x86_avx512_permvar_di_512;
1245 else if (VecWidth == 128 && EltWidth == 16)
1246 IID = Intrinsic::x86_avx512_permvar_hi_128;
1247 else if (VecWidth == 256 && EltWidth == 16)
1248 IID = Intrinsic::x86_avx512_permvar_hi_256;
1249 else if (VecWidth == 512 && EltWidth == 16)
1250 IID = Intrinsic::x86_avx512_permvar_hi_512;
1251 else if (VecWidth == 128 && EltWidth == 8)
1252 IID = Intrinsic::x86_avx512_permvar_qi_128;
1253 else if (VecWidth == 256 && EltWidth == 8)
1254 IID = Intrinsic::x86_avx512_permvar_qi_256;
1255 else if (VecWidth == 512 && EltWidth == 8)
1256 IID = Intrinsic::x86_avx512_permvar_qi_512;
1258 llvm_unreachable("Unexpected intrinsic");
1259 } else if (Name.startswith("dbpsadbw.")) {
1260 if (VecWidth == 128)
1261 IID = Intrinsic::x86_avx512_dbpsadbw_128;
1262 else if (VecWidth == 256)
1263 IID = Intrinsic::x86_avx512_dbpsadbw_256;
1264 else if (VecWidth == 512)
1265 IID = Intrinsic::x86_avx512_dbpsadbw_512;
1267 llvm_unreachable("Unexpected intrinsic");
1268 } else if (Name.startswith("vpshld.")) {
1269 if (VecWidth == 128 && Name[7] == 'q')
1270 IID = Intrinsic::x86_avx512_vpshld_q_128;
1271 else if (VecWidth == 128 && Name[7] == 'd')
1272 IID = Intrinsic::x86_avx512_vpshld_d_128;
1273 else if (VecWidth == 128 && Name[7] == 'w')
1274 IID = Intrinsic::x86_avx512_vpshld_w_128;
1275 else if (VecWidth == 256 && Name[7] == 'q')
1276 IID = Intrinsic::x86_avx512_vpshld_q_256;
1277 else if (VecWidth == 256 && Name[7] == 'd')
1278 IID = Intrinsic::x86_avx512_vpshld_d_256;
1279 else if (VecWidth == 256 && Name[7] == 'w')
1280 IID = Intrinsic::x86_avx512_vpshld_w_256;
1281 else if (VecWidth == 512 && Name[7] == 'q')
1282 IID = Intrinsic::x86_avx512_vpshld_q_512;
1283 else if (VecWidth == 512 && Name[7] == 'd')
1284 IID = Intrinsic::x86_avx512_vpshld_d_512;
1285 else if (VecWidth == 512 && Name[7] == 'w')
1286 IID = Intrinsic::x86_avx512_vpshld_w_512;
1288 llvm_unreachable("Unexpected intrinsic");
1289 } else if (Name.startswith("vpshrd.")) {
1290 if (VecWidth == 128 && Name[7] == 'q')
1291 IID = Intrinsic::x86_avx512_vpshrd_q_128;
1292 else if (VecWidth == 128 && Name[7] == 'd')
1293 IID = Intrinsic::x86_avx512_vpshrd_d_128;
1294 else if (VecWidth == 128 && Name[7] == 'w')
1295 IID = Intrinsic::x86_avx512_vpshrd_w_128;
1296 else if (VecWidth == 256 && Name[7] == 'q')
1297 IID = Intrinsic::x86_avx512_vpshrd_q_256;
1298 else if (VecWidth == 256 && Name[7] == 'd')
1299 IID = Intrinsic::x86_avx512_vpshrd_d_256;
1300 else if (VecWidth == 256 && Name[7] == 'w')
1301 IID = Intrinsic::x86_avx512_vpshrd_w_256;
1302 else if (VecWidth == 512 && Name[7] == 'q')
1303 IID = Intrinsic::x86_avx512_vpshrd_q_512;
1304 else if (VecWidth == 512 && Name[7] == 'd')
1305 IID = Intrinsic::x86_avx512_vpshrd_d_512;
1306 else if (VecWidth == 512 && Name[7] == 'w')
1307 IID = Intrinsic::x86_avx512_vpshrd_w_512;
1309 llvm_unreachable("Unexpected intrinsic");
1310 } else if (Name.startswith("prorv.")) {
1311 if (VecWidth == 128 && EltWidth == 32)
1312 IID = Intrinsic::x86_avx512_prorv_d_128;
1313 else if (VecWidth == 256 && EltWidth == 32)
1314 IID = Intrinsic::x86_avx512_prorv_d_256;
1315 else if (VecWidth == 512 && EltWidth == 32)
1316 IID = Intrinsic::x86_avx512_prorv_d_512;
1317 else if (VecWidth == 128 && EltWidth == 64)
1318 IID = Intrinsic::x86_avx512_prorv_q_128;
1319 else if (VecWidth == 256 && EltWidth == 64)
1320 IID = Intrinsic::x86_avx512_prorv_q_256;
1321 else if (VecWidth == 512 && EltWidth == 64)
1322 IID = Intrinsic::x86_avx512_prorv_q_512;
1324 llvm_unreachable("Unexpected intrinsic");
1325 } else if (Name.startswith("prolv.")) {
1326 if (VecWidth == 128 && EltWidth == 32)
1327 IID = Intrinsic::x86_avx512_prolv_d_128;
1328 else if (VecWidth == 256 && EltWidth == 32)
1329 IID = Intrinsic::x86_avx512_prolv_d_256;
1330 else if (VecWidth == 512 && EltWidth == 32)
1331 IID = Intrinsic::x86_avx512_prolv_d_512;
1332 else if (VecWidth == 128 && EltWidth == 64)
1333 IID = Intrinsic::x86_avx512_prolv_q_128;
1334 else if (VecWidth == 256 && EltWidth == 64)
1335 IID = Intrinsic::x86_avx512_prolv_q_256;
1336 else if (VecWidth == 512 && EltWidth == 64)
1337 IID = Intrinsic::x86_avx512_prolv_q_512;
1339 llvm_unreachable("Unexpected intrinsic");
1340 } else if (Name.startswith("pror.")) {
1341 if (VecWidth == 128 && EltWidth == 32)
1342 IID = Intrinsic::x86_avx512_pror_d_128;
1343 else if (VecWidth == 256 && EltWidth == 32)
1344 IID = Intrinsic::x86_avx512_pror_d_256;
1345 else if (VecWidth == 512 && EltWidth == 32)
1346 IID = Intrinsic::x86_avx512_pror_d_512;
1347 else if (VecWidth == 128 && EltWidth == 64)
1348 IID = Intrinsic::x86_avx512_pror_q_128;
1349 else if (VecWidth == 256 && EltWidth == 64)
1350 IID = Intrinsic::x86_avx512_pror_q_256;
1351 else if (VecWidth == 512 && EltWidth == 64)
1352 IID = Intrinsic::x86_avx512_pror_q_512;
1354 llvm_unreachable("Unexpected intrinsic");
1355 } else if (Name.startswith("prol.")) {
1356 if (VecWidth == 128 && EltWidth == 32)
1357 IID = Intrinsic::x86_avx512_prol_d_128;
1358 else if (VecWidth == 256 && EltWidth == 32)
1359 IID = Intrinsic::x86_avx512_prol_d_256;
1360 else if (VecWidth == 512 && EltWidth == 32)
1361 IID = Intrinsic::x86_avx512_prol_d_512;
1362 else if (VecWidth == 128 && EltWidth == 64)
1363 IID = Intrinsic::x86_avx512_prol_q_128;
1364 else if (VecWidth == 256 && EltWidth == 64)
1365 IID = Intrinsic::x86_avx512_prol_q_256;
1366 else if (VecWidth == 512 && EltWidth == 64)
1367 IID = Intrinsic::x86_avx512_prol_q_512;
1369 llvm_unreachable("Unexpected intrinsic");
1373 SmallVector<Value *, 4> Args(CI.arg_operands().begin(),
1374 CI.arg_operands().end());
1377 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1379 unsigned NumArgs = CI.getNumArgOperands();
1380 Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
1381 CI.getArgOperand(NumArgs - 2));
1385 /// Upgrade comment in call to inline asm that represents an objc retain release
1387 void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
1389 if (AsmStr->find("mov\tfp") == 0 &&
1390 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
1391 (Pos = AsmStr->find("# marker")) != std::string::npos) {
1392 AsmStr->replace(Pos, 1, ";");
1397 /// Upgrade a call to an old intrinsic. All argument and return casting must be
1398 /// provided to seamlessly integrate with existing context.
1399 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
1400 Function *F = CI->getCalledFunction();
1401 LLVMContext &C = CI->getContext();
1402 IRBuilder<> Builder(C);
1403 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
1405 assert(F && "Intrinsic call is not direct?");
1408 // Get the Function's name.
1409 StringRef Name = F->getName();
1411 assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
1412 Name = Name.substr(5);
1414 bool IsX86 = Name.startswith("x86.");
1416 Name = Name.substr(4);
1417 bool IsNVVM = Name.startswith("nvvm.");
1419 Name = Name.substr(5);
1421 if (IsX86 && Name.startswith("sse4a.movnt.")) {
1422 Module *M = F->getParent();
1423 SmallVector<Metadata *, 1> Elts;
1425 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1426 MDNode *Node = MDNode::get(C, Elts);
1428 Value *Arg0 = CI->getArgOperand(0);
1429 Value *Arg1 = CI->getArgOperand(1);
1431 // Nontemporal (unaligned) store of the 0'th element of the float/double
1433 Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
1434 PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
1435 Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
1437 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
1439 StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, 1);
1440 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1442 // Remove intrinsic.
1443 CI->eraseFromParent();
1447 if (IsX86 && (Name.startswith("avx.movnt.") ||
1448 Name.startswith("avx512.storent."))) {
1449 Module *M = F->getParent();
1450 SmallVector<Metadata *, 1> Elts;
1452 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1453 MDNode *Node = MDNode::get(C, Elts);
1455 Value *Arg0 = CI->getArgOperand(0);
1456 Value *Arg1 = CI->getArgOperand(1);
1458 // Convert the type of the pointer to a pointer to the stored type.
1459 Value *BC = Builder.CreateBitCast(Arg0,
1460 PointerType::getUnqual(Arg1->getType()),
1462 VectorType *VTy = cast<VectorType>(Arg1->getType());
1463 StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC,
1464 VTy->getBitWidth() / 8);
1465 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1467 // Remove intrinsic.
1468 CI->eraseFromParent();
1472 if (IsX86 && Name == "sse2.storel.dq") {
1473 Value *Arg0 = CI->getArgOperand(0);
1474 Value *Arg1 = CI->getArgOperand(1);
1476 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
1477 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
1478 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
1479 Value *BC = Builder.CreateBitCast(Arg0,
1480 PointerType::getUnqual(Elt->getType()),
1482 Builder.CreateAlignedStore(Elt, BC, 1);
1484 // Remove intrinsic.
1485 CI->eraseFromParent();
1489 if (IsX86 && (Name.startswith("sse.storeu.") ||
1490 Name.startswith("sse2.storeu.") ||
1491 Name.startswith("avx.storeu."))) {
1492 Value *Arg0 = CI->getArgOperand(0);
1493 Value *Arg1 = CI->getArgOperand(1);
1495 Arg0 = Builder.CreateBitCast(Arg0,
1496 PointerType::getUnqual(Arg1->getType()),
1498 Builder.CreateAlignedStore(Arg1, Arg0, 1);
1500 // Remove intrinsic.
1501 CI->eraseFromParent();
1505 if (IsX86 && Name == "avx512.mask.store.ss") {
1506 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
1507 UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
1510 // Remove intrinsic.
1511 CI->eraseFromParent();
1515 if (IsX86 && (Name.startswith("avx512.mask.store"))) {
1516 // "avx512.mask.storeu." or "avx512.mask.store."
1517 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
1518 UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
1519 CI->getArgOperand(2), Aligned);
1521 // Remove intrinsic.
1522 CI->eraseFromParent();
1527 // Upgrade packed integer vector compare intrinsics to compare instructions.
1528 if (IsX86 && (Name.startswith("sse2.pcmp") ||
1529 Name.startswith("avx2.pcmp"))) {
1530 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
1531 bool CmpEq = Name[9] == 'e';
1532 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
1533 CI->getArgOperand(0), CI->getArgOperand(1));
1534 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
1535 } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) {
1536 Type *ExtTy = Type::getInt32Ty(C);
1537 if (CI->getOperand(0)->getType()->isIntegerTy(8))
1538 ExtTy = Type::getInt64Ty(C);
1539 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
1540 ExtTy->getPrimitiveSizeInBits();
1541 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
1542 Rep = Builder.CreateVectorSplat(NumElts, Rep);
1543 } else if (IsX86 && (Name == "sse.sqrt.ss" ||
1544 Name == "sse2.sqrt.sd")) {
1545 Value *Vec = CI->getArgOperand(0);
1546 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
1547 Function *Intr = Intrinsic::getDeclaration(F->getParent(),
1548 Intrinsic::sqrt, Elt0->getType());
1549 Elt0 = Builder.CreateCall(Intr, Elt0);
1550 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
1551 } else if (IsX86 && (Name.startswith("avx.sqrt.p") ||
1552 Name.startswith("sse2.sqrt.p") ||
1553 Name.startswith("sse.sqrt.p"))) {
1554 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1557 {CI->getArgOperand(0)});
1558 } else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) {
1559 if (CI->getNumArgOperands() == 4 &&
1560 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
1561 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
1562 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
1563 : Intrinsic::x86_avx512_sqrt_pd_512;
1565 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) };
1566 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
1569 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1572 {CI->getArgOperand(0)});
1574 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1575 CI->getArgOperand(1));
1576 } else if (IsX86 && (Name.startswith("avx512.ptestm") ||
1577 Name.startswith("avx512.ptestnm"))) {
1578 Value *Op0 = CI->getArgOperand(0);
1579 Value *Op1 = CI->getArgOperand(1);
1580 Value *Mask = CI->getArgOperand(2);
1581 Rep = Builder.CreateAnd(Op0, Op1);
1582 llvm::Type *Ty = Op0->getType();
1583 Value *Zero = llvm::Constant::getNullValue(Ty);
1584 ICmpInst::Predicate Pred =
1585 Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1586 Rep = Builder.CreateICmp(Pred, Rep, Zero);
1587 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
1588 } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
1590 CI->getArgOperand(1)->getType()->getVectorNumElements();
1591 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
1592 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1593 CI->getArgOperand(1));
1594 } else if (IsX86 && (Name.startswith("avx512.kunpck"))) {
1595 unsigned NumElts = CI->getType()->getScalarSizeInBits();
1596 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
1597 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
1598 uint32_t Indices[64];
1599 for (unsigned i = 0; i != NumElts; ++i)
1602 // First extract half of each vector. This gives better codegen than
1603 // doing it in a single shuffle.
1604 LHS = Builder.CreateShuffleVector(LHS, LHS,
1605 makeArrayRef(Indices, NumElts / 2));
1606 RHS = Builder.CreateShuffleVector(RHS, RHS,
1607 makeArrayRef(Indices, NumElts / 2));
1608 // Concat the vectors.
1609 // NOTE: Operands have to be swapped to match intrinsic definition.
1610 Rep = Builder.CreateShuffleVector(RHS, LHS,
1611 makeArrayRef(Indices, NumElts));
1612 Rep = Builder.CreateBitCast(Rep, CI->getType());
1613 } else if (IsX86 && Name == "avx512.kand.w") {
1614 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1615 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1616 Rep = Builder.CreateAnd(LHS, RHS);
1617 Rep = Builder.CreateBitCast(Rep, CI->getType());
1618 } else if (IsX86 && Name == "avx512.kandn.w") {
1619 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1620 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1621 LHS = Builder.CreateNot(LHS);
1622 Rep = Builder.CreateAnd(LHS, RHS);
1623 Rep = Builder.CreateBitCast(Rep, CI->getType());
1624 } else if (IsX86 && Name == "avx512.kor.w") {
1625 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1626 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1627 Rep = Builder.CreateOr(LHS, RHS);
1628 Rep = Builder.CreateBitCast(Rep, CI->getType());
1629 } else if (IsX86 && Name == "avx512.kxor.w") {
1630 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1631 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1632 Rep = Builder.CreateXor(LHS, RHS);
1633 Rep = Builder.CreateBitCast(Rep, CI->getType());
1634 } else if (IsX86 && Name == "avx512.kxnor.w") {
1635 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1636 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1637 LHS = Builder.CreateNot(LHS);
1638 Rep = Builder.CreateXor(LHS, RHS);
1639 Rep = Builder.CreateBitCast(Rep, CI->getType());
1640 } else if (IsX86 && Name == "avx512.knot.w") {
1641 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1642 Rep = Builder.CreateNot(Rep);
1643 Rep = Builder.CreateBitCast(Rep, CI->getType());
1645 (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
1646 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1647 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1648 Rep = Builder.CreateOr(LHS, RHS);
1649 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
1651 if (Name[14] == 'c')
1652 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
1654 C = ConstantInt::getNullValue(Builder.getInt16Ty());
1655 Rep = Builder.CreateICmpEQ(Rep, C);
1656 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
1657 } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd")) {
1658 Type *I32Ty = Type::getInt32Ty(C);
1659 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
1660 ConstantInt::get(I32Ty, 0));
1661 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
1662 ConstantInt::get(I32Ty, 0));
1663 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
1664 Builder.CreateFAdd(Elt0, Elt1),
1665 ConstantInt::get(I32Ty, 0));
1666 } else if (IsX86 && (Name == "sse.sub.ss" || Name == "sse2.sub.sd")) {
1667 Type *I32Ty = Type::getInt32Ty(C);
1668 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
1669 ConstantInt::get(I32Ty, 0));
1670 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
1671 ConstantInt::get(I32Ty, 0));
1672 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
1673 Builder.CreateFSub(Elt0, Elt1),
1674 ConstantInt::get(I32Ty, 0));
1675 } else if (IsX86 && (Name == "sse.mul.ss" || Name == "sse2.mul.sd")) {
1676 Type *I32Ty = Type::getInt32Ty(C);
1677 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
1678 ConstantInt::get(I32Ty, 0));
1679 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
1680 ConstantInt::get(I32Ty, 0));
1681 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
1682 Builder.CreateFMul(Elt0, Elt1),
1683 ConstantInt::get(I32Ty, 0));
1684 } else if (IsX86 && (Name == "sse.div.ss" || Name == "sse2.div.sd")) {
1685 Type *I32Ty = Type::getInt32Ty(C);
1686 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
1687 ConstantInt::get(I32Ty, 0));
1688 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
1689 ConstantInt::get(I32Ty, 0));
1690 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
1691 Builder.CreateFDiv(Elt0, Elt1),
1692 ConstantInt::get(I32Ty, 0));
1693 } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
1694 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
1695 bool CmpEq = Name[16] == 'e';
1696 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
1697 } else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) {
1698 Type *OpTy = CI->getArgOperand(0)->getType();
1699 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1700 unsigned EltWidth = OpTy->getScalarSizeInBits();
1702 if (VecWidth == 128 && EltWidth == 32)
1703 IID = Intrinsic::x86_avx512_fpclass_ps_128;
1704 else if (VecWidth == 256 && EltWidth == 32)
1705 IID = Intrinsic::x86_avx512_fpclass_ps_256;
1706 else if (VecWidth == 512 && EltWidth == 32)
1707 IID = Intrinsic::x86_avx512_fpclass_ps_512;
1708 else if (VecWidth == 128 && EltWidth == 64)
1709 IID = Intrinsic::x86_avx512_fpclass_pd_128;
1710 else if (VecWidth == 256 && EltWidth == 64)
1711 IID = Intrinsic::x86_avx512_fpclass_pd_256;
1712 else if (VecWidth == 512 && EltWidth == 64)
1713 IID = Intrinsic::x86_avx512_fpclass_pd_512;
1715 llvm_unreachable("Unexpected intrinsic");
1717 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1718 { CI->getOperand(0), CI->getArgOperand(1) });
1719 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
1720 } else if (IsX86 && Name.startswith("avx512.mask.cmp.p")) {
1721 Type *OpTy = CI->getArgOperand(0)->getType();
1722 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1723 unsigned EltWidth = OpTy->getScalarSizeInBits();
1725 if (VecWidth == 128 && EltWidth == 32)
1726 IID = Intrinsic::x86_avx512_cmp_ps_128;
1727 else if (VecWidth == 256 && EltWidth == 32)
1728 IID = Intrinsic::x86_avx512_cmp_ps_256;
1729 else if (VecWidth == 512 && EltWidth == 32)
1730 IID = Intrinsic::x86_avx512_cmp_ps_512;
1731 else if (VecWidth == 128 && EltWidth == 64)
1732 IID = Intrinsic::x86_avx512_cmp_pd_128;
1733 else if (VecWidth == 256 && EltWidth == 64)
1734 IID = Intrinsic::x86_avx512_cmp_pd_256;
1735 else if (VecWidth == 512 && EltWidth == 64)
1736 IID = Intrinsic::x86_avx512_cmp_pd_512;
1738 llvm_unreachable("Unexpected intrinsic");
1740 SmallVector<Value *, 4> Args;
1741 Args.push_back(CI->getArgOperand(0));
1742 Args.push_back(CI->getArgOperand(1));
1743 Args.push_back(CI->getArgOperand(2));
1744 if (CI->getNumArgOperands() == 5)
1745 Args.push_back(CI->getArgOperand(4));
1747 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1749 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(3));
1750 } else if (IsX86 && Name.startswith("avx512.mask.cmp.") &&
1752 // Integer compare intrinsics.
1753 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1754 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
1755 } else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) {
1756 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1757 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
1758 } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") ||
1759 Name.startswith("avx512.cvtw2mask.") ||
1760 Name.startswith("avx512.cvtd2mask.") ||
1761 Name.startswith("avx512.cvtq2mask."))) {
1762 Value *Op = CI->getArgOperand(0);
1763 Value *Zero = llvm::Constant::getNullValue(Op->getType());
1764 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
1765 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr);
1766 } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
1767 Name == "ssse3.pabs.w.128" ||
1768 Name == "ssse3.pabs.d.128" ||
1769 Name.startswith("avx2.pabs") ||
1770 Name.startswith("avx512.mask.pabs"))) {
1771 Rep = upgradeAbs(Builder, *CI);
1772 } else if (IsX86 && (Name == "sse41.pmaxsb" ||
1773 Name == "sse2.pmaxs.w" ||
1774 Name == "sse41.pmaxsd" ||
1775 Name.startswith("avx2.pmaxs") ||
1776 Name.startswith("avx512.mask.pmaxs"))) {
1777 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
1778 } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
1779 Name == "sse41.pmaxuw" ||
1780 Name == "sse41.pmaxud" ||
1781 Name.startswith("avx2.pmaxu") ||
1782 Name.startswith("avx512.mask.pmaxu"))) {
1783 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
1784 } else if (IsX86 && (Name == "sse41.pminsb" ||
1785 Name == "sse2.pmins.w" ||
1786 Name == "sse41.pminsd" ||
1787 Name.startswith("avx2.pmins") ||
1788 Name.startswith("avx512.mask.pmins"))) {
1789 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
1790 } else if (IsX86 && (Name == "sse2.pminu.b" ||
1791 Name == "sse41.pminuw" ||
1792 Name == "sse41.pminud" ||
1793 Name.startswith("avx2.pminu") ||
1794 Name.startswith("avx512.mask.pminu"))) {
1795 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
1796 } else if (IsX86 && (Name == "sse2.pmulu.dq" ||
1797 Name == "avx2.pmulu.dq" ||
1798 Name == "avx512.pmulu.dq.512" ||
1799 Name.startswith("avx512.mask.pmulu.dq."))) {
1800 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
1801 } else if (IsX86 && (Name == "sse41.pmuldq" ||
1802 Name == "avx2.pmul.dq" ||
1803 Name == "avx512.pmul.dq.512" ||
1804 Name.startswith("avx512.mask.pmul.dq."))) {
1805 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
1806 } else if (IsX86 && (Name == "sse.cvtsi2ss" ||
1807 Name == "sse2.cvtsi2sd" ||
1808 Name == "sse.cvtsi642ss" ||
1809 Name == "sse2.cvtsi642sd")) {
1810 Rep = Builder.CreateSIToFP(CI->getArgOperand(1),
1811 CI->getType()->getVectorElementType());
1812 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
1813 } else if (IsX86 && Name == "avx512.cvtusi2sd") {
1814 Rep = Builder.CreateUIToFP(CI->getArgOperand(1),
1815 CI->getType()->getVectorElementType());
1816 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
1817 } else if (IsX86 && Name == "sse2.cvtss2sd") {
1818 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
1819 Rep = Builder.CreateFPExt(Rep, CI->getType()->getVectorElementType());
1820 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
1821 } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
1822 Name == "sse2.cvtdq2ps" ||
1823 Name == "avx.cvtdq2.pd.256" ||
1824 Name == "avx.cvtdq2.ps.256" ||
1825 Name.startswith("avx512.mask.cvtdq2pd.") ||
1826 Name.startswith("avx512.mask.cvtudq2pd.") ||
1827 Name == "avx512.mask.cvtdq2ps.128" ||
1828 Name == "avx512.mask.cvtdq2ps.256" ||
1829 Name == "avx512.mask.cvtudq2ps.128" ||
1830 Name == "avx512.mask.cvtudq2ps.256" ||
1831 Name == "avx512.mask.cvtqq2pd.128" ||
1832 Name == "avx512.mask.cvtqq2pd.256" ||
1833 Name == "avx512.mask.cvtuqq2pd.128" ||
1834 Name == "avx512.mask.cvtuqq2pd.256" ||
1835 Name == "sse2.cvtps2pd" ||
1836 Name == "avx.cvt.ps2.pd.256" ||
1837 Name == "avx512.mask.cvtps2pd.128" ||
1838 Name == "avx512.mask.cvtps2pd.256")) {
1839 Type *DstTy = CI->getType();
1840 Rep = CI->getArgOperand(0);
1842 unsigned NumDstElts = DstTy->getVectorNumElements();
1843 if (NumDstElts < Rep->getType()->getVectorNumElements()) {
1844 assert(NumDstElts == 2 && "Unexpected vector size");
1845 uint32_t ShuffleMask[2] = { 0, 1 };
1846 Rep = Builder.CreateShuffleVector(Rep, Rep, ShuffleMask);
1849 bool IsPS2PD = (StringRef::npos != Name.find("ps2"));
1850 bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
1852 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
1853 else if (IsUnsigned)
1854 Rep = Builder.CreateUIToFP(Rep, DstTy, "cvt");
1856 Rep = Builder.CreateSIToFP(Rep, DstTy, "cvt");
1858 if (CI->getNumArgOperands() == 3)
1859 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1860 CI->getArgOperand(1));
1861 } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
1862 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
1863 CI->getArgOperand(1), CI->getArgOperand(2),
1865 } else if (IsX86 && (Name.startswith("avx512.mask.load."))) {
1866 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
1867 CI->getArgOperand(1),CI->getArgOperand(2),
1869 } else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) {
1870 Type *ResultTy = CI->getType();
1871 Type *PtrTy = ResultTy->getVectorElementType();
1873 // Cast the pointer to element type.
1874 Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
1875 llvm::PointerType::getUnqual(PtrTy));
1877 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
1878 ResultTy->getVectorNumElements());
1880 Function *ELd = Intrinsic::getDeclaration(F->getParent(),
1881 Intrinsic::masked_expandload,
1883 Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
1884 } else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) {
1885 Type *ResultTy = CI->getArgOperand(1)->getType();
1886 Type *PtrTy = ResultTy->getVectorElementType();
1888 // Cast the pointer to element type.
1889 Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
1890 llvm::PointerType::getUnqual(PtrTy));
1892 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
1893 ResultTy->getVectorNumElements());
1895 Function *CSt = Intrinsic::getDeclaration(F->getParent(),
1896 Intrinsic::masked_compressstore,
1898 Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
1899 } else if (IsX86 && Name.startswith("xop.vpcom")) {
1900 Intrinsic::ID intID;
1901 if (Name.endswith("ub"))
1902 intID = Intrinsic::x86_xop_vpcomub;
1903 else if (Name.endswith("uw"))
1904 intID = Intrinsic::x86_xop_vpcomuw;
1905 else if (Name.endswith("ud"))
1906 intID = Intrinsic::x86_xop_vpcomud;
1907 else if (Name.endswith("uq"))
1908 intID = Intrinsic::x86_xop_vpcomuq;
1909 else if (Name.endswith("b"))
1910 intID = Intrinsic::x86_xop_vpcomb;
1911 else if (Name.endswith("w"))
1912 intID = Intrinsic::x86_xop_vpcomw;
1913 else if (Name.endswith("d"))
1914 intID = Intrinsic::x86_xop_vpcomd;
1915 else if (Name.endswith("q"))
1916 intID = Intrinsic::x86_xop_vpcomq;
1918 llvm_unreachable("Unknown suffix");
1920 Name = Name.substr(9); // strip off "xop.vpcom"
1922 if (Name.startswith("lt"))
1924 else if (Name.startswith("le"))
1926 else if (Name.startswith("gt"))
1928 else if (Name.startswith("ge"))
1930 else if (Name.startswith("eq"))
1932 else if (Name.startswith("ne"))
1934 else if (Name.startswith("false"))
1936 else if (Name.startswith("true"))
1939 llvm_unreachable("Unknown condition");
1941 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
1943 Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
1944 Builder.getInt8(Imm)});
1945 } else if (IsX86 && Name.startswith("xop.vpcmov")) {
1946 Value *Sel = CI->getArgOperand(2);
1947 Value *NotSel = Builder.CreateNot(Sel);
1948 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
1949 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
1950 Rep = Builder.CreateOr(Sel0, Sel1);
1951 } else if (IsX86 && Name == "sse42.crc32.64.8") {
1952 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
1953 Intrinsic::x86_sse42_crc32_32_8);
1954 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
1955 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
1956 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
1957 } else if (IsX86 && (Name.startswith("avx.vbroadcast.s") ||
1958 Name.startswith("avx512.vbroadcast.s"))) {
1959 // Replace broadcasts with a series of insertelements.
1960 Type *VecTy = CI->getType();
1961 Type *EltTy = VecTy->getVectorElementType();
1962 unsigned EltNum = VecTy->getVectorNumElements();
1963 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
1964 EltTy->getPointerTo());
1965 Value *Load = Builder.CreateLoad(EltTy, Cast);
1966 Type *I32Ty = Type::getInt32Ty(C);
1967 Rep = UndefValue::get(VecTy);
1968 for (unsigned I = 0; I < EltNum; ++I)
1969 Rep = Builder.CreateInsertElement(Rep, Load,
1970 ConstantInt::get(I32Ty, I));
1971 } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
1972 Name.startswith("sse41.pmovzx") ||
1973 Name.startswith("avx2.pmovsx") ||
1974 Name.startswith("avx2.pmovzx") ||
1975 Name.startswith("avx512.mask.pmovsx") ||
1976 Name.startswith("avx512.mask.pmovzx"))) {
1977 VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
1978 VectorType *DstTy = cast<VectorType>(CI->getType());
1979 unsigned NumDstElts = DstTy->getNumElements();
1981 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
1982 SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
1983 for (unsigned i = 0; i != NumDstElts; ++i)
1986 Value *SV = Builder.CreateShuffleVector(
1987 CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
1989 bool DoSext = (StringRef::npos != Name.find("pmovsx"));
1990 Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
1991 : Builder.CreateZExt(SV, DstTy);
1992 // If there are 3 arguments, it's a masked intrinsic so we need a select.
1993 if (CI->getNumArgOperands() == 3)
1994 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1995 CI->getArgOperand(1));
1996 } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
1997 Name == "avx2.vbroadcasti128")) {
1998 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
1999 Type *EltTy = CI->getType()->getVectorElementType();
2000 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2001 Type *VT = VectorType::get(EltTy, NumSrcElts);
2002 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
2003 PointerType::getUnqual(VT));
2004 Value *Load = Builder.CreateAlignedLoad(Op, 1);
2005 if (NumSrcElts == 2)
2006 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
2009 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
2010 { 0, 1, 2, 3, 0, 1, 2, 3 });
2011 } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") ||
2012 Name.startswith("avx512.mask.shuf.f"))) {
2013 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2014 Type *VT = CI->getType();
2015 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2016 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2017 unsigned ControlBitsMask = NumLanes - 1;
2018 unsigned NumControlBits = NumLanes / 2;
2019 SmallVector<uint32_t, 8> ShuffleMask(0);
2021 for (unsigned l = 0; l != NumLanes; ++l) {
2022 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
2023 // We actually need the other source.
2024 if (l >= NumLanes / 2)
2025 LaneMask += NumLanes;
2026 for (unsigned i = 0; i != NumElementsInLane; ++i)
2027 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
2029 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2030 CI->getArgOperand(1), ShuffleMask);
2031 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2032 CI->getArgOperand(3));
2033 }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") ||
2034 Name.startswith("avx512.mask.broadcasti"))) {
2035 unsigned NumSrcElts =
2036 CI->getArgOperand(0)->getType()->getVectorNumElements();
2037 unsigned NumDstElts = CI->getType()->getVectorNumElements();
2039 SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
2040 for (unsigned i = 0; i != NumDstElts; ++i)
2041 ShuffleMask[i] = i % NumSrcElts;
2043 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2044 CI->getArgOperand(0),
2046 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2047 CI->getArgOperand(1));
2048 } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
2049 Name.startswith("avx2.vbroadcast") ||
2050 Name.startswith("avx512.pbroadcast") ||
2051 Name.startswith("avx512.mask.broadcast.s"))) {
2052 // Replace vp?broadcasts with a vector shuffle.
2053 Value *Op = CI->getArgOperand(0);
2054 unsigned NumElts = CI->getType()->getVectorNumElements();
2055 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
2056 Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
2057 Constant::getNullValue(MaskTy));
2059 if (CI->getNumArgOperands() == 3)
2060 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2061 CI->getArgOperand(1));
2062 } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
2063 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2064 CI->getArgOperand(1),
2065 CI->getArgOperand(2),
2066 CI->getArgOperand(3),
2067 CI->getArgOperand(4),
2069 } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
2070 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2071 CI->getArgOperand(1),
2072 CI->getArgOperand(2),
2073 CI->getArgOperand(3),
2074 CI->getArgOperand(4),
2076 } else if (IsX86 && (Name == "sse2.psll.dq" ||
2077 Name == "avx2.psll.dq")) {
2078 // 128/256-bit shift left specified in bits.
2079 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2080 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
2081 Shift / 8); // Shift is in bits.
2082 } else if (IsX86 && (Name == "sse2.psrl.dq" ||
2083 Name == "avx2.psrl.dq")) {
2084 // 128/256-bit shift right specified in bits.
2085 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2086 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
2087 Shift / 8); // Shift is in bits.
2088 } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
2089 Name == "avx2.psll.dq.bs" ||
2090 Name == "avx512.psll.dq.512")) {
2091 // 128/256/512-bit shift left specified in bytes.
2092 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2093 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2094 } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
2095 Name == "avx2.psrl.dq.bs" ||
2096 Name == "avx512.psrl.dq.512")) {
2097 // 128/256/512-bit shift right specified in bytes.
2098 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2099 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2100 } else if (IsX86 && (Name == "sse41.pblendw" ||
2101 Name.startswith("sse41.blendp") ||
2102 Name.startswith("avx.blend.p") ||
2103 Name == "avx2.pblendw" ||
2104 Name.startswith("avx2.pblendd."))) {
2105 Value *Op0 = CI->getArgOperand(0);
2106 Value *Op1 = CI->getArgOperand(1);
2107 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2108 VectorType *VecTy = cast<VectorType>(CI->getType());
2109 unsigned NumElts = VecTy->getNumElements();
2111 SmallVector<uint32_t, 16> Idxs(NumElts);
2112 for (unsigned i = 0; i != NumElts; ++i)
2113 Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
2115 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2116 } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
2117 Name == "avx2.vinserti128" ||
2118 Name.startswith("avx512.mask.insert"))) {
2119 Value *Op0 = CI->getArgOperand(0);
2120 Value *Op1 = CI->getArgOperand(1);
2121 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2122 unsigned DstNumElts = CI->getType()->getVectorNumElements();
2123 unsigned SrcNumElts = Op1->getType()->getVectorNumElements();
2124 unsigned Scale = DstNumElts / SrcNumElts;
2126 // Mask off the high bits of the immediate value; hardware ignores those.
2129 // Extend the second operand into a vector the size of the destination.
2130 Value *UndefV = UndefValue::get(Op1->getType());
2131 SmallVector<uint32_t, 8> Idxs(DstNumElts);
2132 for (unsigned i = 0; i != SrcNumElts; ++i)
2134 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
2135 Idxs[i] = SrcNumElts;
2136 Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);
2138 // Insert the second operand into the first operand.
2140 // Note that there is no guarantee that instruction lowering will actually
2141 // produce a vinsertf128 instruction for the created shuffles. In
2142 // particular, the 0 immediate case involves no lane changes, so it can
2143 // be handled as a blend.
2145 // Example of shuffle mask for 32-bit elements:
2146 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
2147 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
2149 // First fill with identify mask.
2150 for (unsigned i = 0; i != DstNumElts; ++i)
2152 // Then replace the elements where we need to insert.
2153 for (unsigned i = 0; i != SrcNumElts; ++i)
2154 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
2155 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
2157 // If the intrinsic has a mask operand, handle that.
2158 if (CI->getNumArgOperands() == 5)
2159 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2160 CI->getArgOperand(3));
2161 } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
2162 Name == "avx2.vextracti128" ||
2163 Name.startswith("avx512.mask.vextract"))) {
2164 Value *Op0 = CI->getArgOperand(0);
2165 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2166 unsigned DstNumElts = CI->getType()->getVectorNumElements();
2167 unsigned SrcNumElts = Op0->getType()->getVectorNumElements();
2168 unsigned Scale = SrcNumElts / DstNumElts;
2170 // Mask off the high bits of the immediate value; hardware ignores those.
2173 // Get indexes for the subvector of the input vector.
2174 SmallVector<uint32_t, 8> Idxs(DstNumElts);
2175 for (unsigned i = 0; i != DstNumElts; ++i) {
2176 Idxs[i] = i + (Imm * DstNumElts);
2178 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2180 // If the intrinsic has a mask operand, handle that.
2181 if (CI->getNumArgOperands() == 4)
2182 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2183 CI->getArgOperand(2));
2184 } else if (!IsX86 && Name == "stackprotectorcheck") {
2186 } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
2187 Name.startswith("avx512.mask.perm.di."))) {
2188 Value *Op0 = CI->getArgOperand(0);
2189 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2190 VectorType *VecTy = cast<VectorType>(CI->getType());
2191 unsigned NumElts = VecTy->getNumElements();
2193 SmallVector<uint32_t, 8> Idxs(NumElts);
2194 for (unsigned i = 0; i != NumElts; ++i)
2195 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
2197 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2199 if (CI->getNumArgOperands() == 4)
2200 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2201 CI->getArgOperand(2));
2202 } else if (IsX86 && (Name.startswith("avx.vperm2f128.") ||
2203 Name == "avx2.vperm2i128")) {
2204 // The immediate permute control byte looks like this:
2205 // [1:0] - select 128 bits from sources for low half of destination
2207 // [3] - zero low half of destination
2208 // [5:4] - select 128 bits from sources for high half of destination
2210 // [7] - zero high half of destination
2212 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2214 unsigned NumElts = CI->getType()->getVectorNumElements();
2215 unsigned HalfSize = NumElts / 2;
2216 SmallVector<uint32_t, 8> ShuffleMask(NumElts);
2218 // Determine which operand(s) are actually in use for this instruction.
2219 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2220 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2222 // If needed, replace operands based on zero mask.
2223 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
2224 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
2226 // Permute low half of result.
2227 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
2228 for (unsigned i = 0; i < HalfSize; ++i)
2229 ShuffleMask[i] = StartIndex + i;
2231 // Permute high half of result.
2232 StartIndex = (Imm & 0x10) ? HalfSize : 0;
2233 for (unsigned i = 0; i < HalfSize; ++i)
2234 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
2236 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2238 } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
2239 Name == "sse2.pshuf.d" ||
2240 Name.startswith("avx512.mask.vpermil.p") ||
2241 Name.startswith("avx512.mask.pshuf.d."))) {
2242 Value *Op0 = CI->getArgOperand(0);
2243 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2244 VectorType *VecTy = cast<VectorType>(CI->getType());
2245 unsigned NumElts = VecTy->getNumElements();
2246 // Calculate the size of each index in the immediate.
2247 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
2248 unsigned IdxMask = ((1 << IdxSize) - 1);
2250 SmallVector<uint32_t, 8> Idxs(NumElts);
2251 // Lookup the bits for this element, wrapping around the immediate every
2252 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
2253 // to offset by the first index of each group.
2254 for (unsigned i = 0; i != NumElts; ++i)
2255 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
2257 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2259 if (CI->getNumArgOperands() == 4)
2260 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2261 CI->getArgOperand(2));
2262 } else if (IsX86 && (Name == "sse2.pshufl.w" ||
2263 Name.startswith("avx512.mask.pshufl.w."))) {
2264 Value *Op0 = CI->getArgOperand(0);
2265 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2266 unsigned NumElts = CI->getType()->getVectorNumElements();
2268 SmallVector<uint32_t, 16> Idxs(NumElts);
2269 for (unsigned l = 0; l != NumElts; l += 8) {
2270 for (unsigned i = 0; i != 4; ++i)
2271 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
2272 for (unsigned i = 4; i != 8; ++i)
2273 Idxs[i + l] = i + l;
2276 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2278 if (CI->getNumArgOperands() == 4)
2279 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2280 CI->getArgOperand(2));
2281 } else if (IsX86 && (Name == "sse2.pshufh.w" ||
2282 Name.startswith("avx512.mask.pshufh.w."))) {
2283 Value *Op0 = CI->getArgOperand(0);
2284 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2285 unsigned NumElts = CI->getType()->getVectorNumElements();
2287 SmallVector<uint32_t, 16> Idxs(NumElts);
2288 for (unsigned l = 0; l != NumElts; l += 8) {
2289 for (unsigned i = 0; i != 4; ++i)
2290 Idxs[i + l] = i + l;
2291 for (unsigned i = 0; i != 4; ++i)
2292 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
2295 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2297 if (CI->getNumArgOperands() == 4)
2298 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2299 CI->getArgOperand(2));
2300 } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
2301 Value *Op0 = CI->getArgOperand(0);
2302 Value *Op1 = CI->getArgOperand(1);
2303 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2304 unsigned NumElts = CI->getType()->getVectorNumElements();
2306 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2307 unsigned HalfLaneElts = NumLaneElts / 2;
2309 SmallVector<uint32_t, 16> Idxs(NumElts);
2310 for (unsigned i = 0; i != NumElts; ++i) {
2311 // Base index is the starting element of the lane.
2312 Idxs[i] = i - (i % NumLaneElts);
2313 // If we are half way through the lane switch to the other source.
2314 if ((i % NumLaneElts) >= HalfLaneElts)
2316 // Now select the specific element. By adding HalfLaneElts bits from
2317 // the immediate. Wrapping around the immediate every 8-bits.
2318 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
2321 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2323 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2324 CI->getArgOperand(3));
2325 } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
2326 Name.startswith("avx512.mask.movshdup") ||
2327 Name.startswith("avx512.mask.movsldup"))) {
2328 Value *Op0 = CI->getArgOperand(0);
2329 unsigned NumElts = CI->getType()->getVectorNumElements();
2330 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2332 unsigned Offset = 0;
2333 if (Name.startswith("avx512.mask.movshdup."))
2336 SmallVector<uint32_t, 16> Idxs(NumElts);
2337 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
2338 for (unsigned i = 0; i != NumLaneElts; i += 2) {
2339 Idxs[i + l + 0] = i + l + Offset;
2340 Idxs[i + l + 1] = i + l + Offset;
2343 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2345 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2346 CI->getArgOperand(1));
2347 } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
2348 Name.startswith("avx512.mask.unpckl."))) {
2349 Value *Op0 = CI->getArgOperand(0);
2350 Value *Op1 = CI->getArgOperand(1);
2351 int NumElts = CI->getType()->getVectorNumElements();
2352 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2354 SmallVector<uint32_t, 64> Idxs(NumElts);
2355 for (int l = 0; l != NumElts; l += NumLaneElts)
2356 for (int i = 0; i != NumLaneElts; ++i)
2357 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
2359 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2361 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2362 CI->getArgOperand(2));
2363 } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
2364 Name.startswith("avx512.mask.unpckh."))) {
2365 Value *Op0 = CI->getArgOperand(0);
2366 Value *Op1 = CI->getArgOperand(1);
2367 int NumElts = CI->getType()->getVectorNumElements();
2368 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2370 SmallVector<uint32_t, 64> Idxs(NumElts);
2371 for (int l = 0; l != NumElts; l += NumLaneElts)
2372 for (int i = 0; i != NumLaneElts; ++i)
2373 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
2375 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2377 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2378 CI->getArgOperand(2));
2379 } else if (IsX86 && Name.startswith("avx512.mask.pand.")) {
2380 Rep = Builder.CreateAnd(CI->getArgOperand(0), CI->getArgOperand(1));
2381 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2382 CI->getArgOperand(2));
2383 } else if (IsX86 && Name.startswith("avx512.mask.pandn.")) {
2384 Rep = Builder.CreateAnd(Builder.CreateNot(CI->getArgOperand(0)),
2385 CI->getArgOperand(1));
2386 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2387 CI->getArgOperand(2));
2388 } else if (IsX86 && Name.startswith("avx512.mask.por.")) {
2389 Rep = Builder.CreateOr(CI->getArgOperand(0), CI->getArgOperand(1));
2390 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2391 CI->getArgOperand(2));
2392 } else if (IsX86 && Name.startswith("avx512.mask.pxor.")) {
2393 Rep = Builder.CreateXor(CI->getArgOperand(0), CI->getArgOperand(1));
2394 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2395 CI->getArgOperand(2));
2396 } else if (IsX86 && Name.startswith("avx512.mask.and.")) {
2397 VectorType *FTy = cast<VectorType>(CI->getType());
2398 VectorType *ITy = VectorType::getInteger(FTy);
2399 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2400 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2401 Rep = Builder.CreateBitCast(Rep, FTy);
2402 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2403 CI->getArgOperand(2));
2404 } else if (IsX86 && Name.startswith("avx512.mask.andn.")) {
2405 VectorType *FTy = cast<VectorType>(CI->getType());
2406 VectorType *ITy = VectorType::getInteger(FTy);
2407 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
2408 Rep = Builder.CreateAnd(Rep,
2409 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2410 Rep = Builder.CreateBitCast(Rep, FTy);
2411 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2412 CI->getArgOperand(2));
2413 } else if (IsX86 && Name.startswith("avx512.mask.or.")) {
2414 VectorType *FTy = cast<VectorType>(CI->getType());
2415 VectorType *ITy = VectorType::getInteger(FTy);
2416 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2417 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2418 Rep = Builder.CreateBitCast(Rep, FTy);
2419 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2420 CI->getArgOperand(2));
2421 } else if (IsX86 && Name.startswith("avx512.mask.xor.")) {
2422 VectorType *FTy = cast<VectorType>(CI->getType());
2423 VectorType *ITy = VectorType::getInteger(FTy);
2424 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2425 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2426 Rep = Builder.CreateBitCast(Rep, FTy);
2427 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2428 CI->getArgOperand(2));
2429 } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
2430 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
2431 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2432 CI->getArgOperand(2));
2433 } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
2434 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
2435 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2436 CI->getArgOperand(2));
2437 } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
2438 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
2439 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2440 CI->getArgOperand(2));
2441 } else if (IsX86 && Name.startswith("avx512.mask.add.p")) {
2442 if (Name.endswith(".512")) {
2444 if (Name[17] == 's')
2445 IID = Intrinsic::x86_avx512_add_ps_512;
2447 IID = Intrinsic::x86_avx512_add_pd_512;
2449 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2450 { CI->getArgOperand(0), CI->getArgOperand(1),
2451 CI->getArgOperand(4) });
2453 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
2455 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2456 CI->getArgOperand(2));
2457 } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
2458 if (Name.endswith(".512")) {
2460 if (Name[17] == 's')
2461 IID = Intrinsic::x86_avx512_div_ps_512;
2463 IID = Intrinsic::x86_avx512_div_pd_512;
2465 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2466 { CI->getArgOperand(0), CI->getArgOperand(1),
2467 CI->getArgOperand(4) });
2469 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
2471 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2472 CI->getArgOperand(2));
2473 } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
2474 if (Name.endswith(".512")) {
2476 if (Name[17] == 's')
2477 IID = Intrinsic::x86_avx512_mul_ps_512;
2479 IID = Intrinsic::x86_avx512_mul_pd_512;
2481 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2482 { CI->getArgOperand(0), CI->getArgOperand(1),
2483 CI->getArgOperand(4) });
2485 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
2487 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2488 CI->getArgOperand(2));
2489 } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
2490 if (Name.endswith(".512")) {
2492 if (Name[17] == 's')
2493 IID = Intrinsic::x86_avx512_sub_ps_512;
2495 IID = Intrinsic::x86_avx512_sub_pd_512;
2497 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2498 { CI->getArgOperand(0), CI->getArgOperand(1),
2499 CI->getArgOperand(4) });
2501 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
2503 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2504 CI->getArgOperand(2));
2505 } else if (IsX86 && Name.startswith("avx512.mask.max.p") &&
2506 Name.drop_front(18) == ".512") {
2508 if (Name[17] == 's')
2509 IID = Intrinsic::x86_avx512_max_ps_512;
2511 IID = Intrinsic::x86_avx512_max_pd_512;
2513 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2514 { CI->getArgOperand(0), CI->getArgOperand(1),
2515 CI->getArgOperand(4) });
2516 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2517 CI->getArgOperand(2));
2518 } else if (IsX86 && Name.startswith("avx512.mask.min.p") &&
2519 Name.drop_front(18) == ".512") {
2521 if (Name[17] == 's')
2522 IID = Intrinsic::x86_avx512_min_ps_512;
2524 IID = Intrinsic::x86_avx512_min_pd_512;
2526 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2527 { CI->getArgOperand(0), CI->getArgOperand(1),
2528 CI->getArgOperand(4) });
2529 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2530 CI->getArgOperand(2));
2531 } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
2532 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2535 { CI->getArgOperand(0), Builder.getInt1(false) });
2536 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2537 CI->getArgOperand(1));
2538 } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
2539 bool IsImmediate = Name[16] == 'i' ||
2540 (Name.size() > 18 && Name[18] == 'i');
2541 bool IsVariable = Name[16] == 'v';
2542 char Size = Name[16] == '.' ? Name[17] :
2543 Name[17] == '.' ? Name[18] :
2544 Name[18] == '.' ? Name[19] :
2548 if (IsVariable && Name[17] != '.') {
2549 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
2550 IID = Intrinsic::x86_avx2_psllv_q;
2551 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
2552 IID = Intrinsic::x86_avx2_psllv_q_256;
2553 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
2554 IID = Intrinsic::x86_avx2_psllv_d;
2555 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
2556 IID = Intrinsic::x86_avx2_psllv_d_256;
2557 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
2558 IID = Intrinsic::x86_avx512_psllv_w_128;
2559 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
2560 IID = Intrinsic::x86_avx512_psllv_w_256;
2561 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
2562 IID = Intrinsic::x86_avx512_psllv_w_512;
2564 llvm_unreachable("Unexpected size");
2565 } else if (Name.endswith(".128")) {
2566 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
2567 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
2568 : Intrinsic::x86_sse2_psll_d;
2569 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
2570 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
2571 : Intrinsic::x86_sse2_psll_q;
2572 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
2573 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
2574 : Intrinsic::x86_sse2_psll_w;
2576 llvm_unreachable("Unexpected size");
2577 } else if (Name.endswith(".256")) {
2578 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
2579 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
2580 : Intrinsic::x86_avx2_psll_d;
2581 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
2582 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
2583 : Intrinsic::x86_avx2_psll_q;
2584 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
2585 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
2586 : Intrinsic::x86_avx2_psll_w;
2588 llvm_unreachable("Unexpected size");
2590 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
2591 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
2592 IsVariable ? Intrinsic::x86_avx512_psllv_d_512 :
2593 Intrinsic::x86_avx512_psll_d_512;
2594 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
2595 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
2596 IsVariable ? Intrinsic::x86_avx512_psllv_q_512 :
2597 Intrinsic::x86_avx512_psll_q_512;
2598 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
2599 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
2600 : Intrinsic::x86_avx512_psll_w_512;
2602 llvm_unreachable("Unexpected size");
2605 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2606 } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
2607 bool IsImmediate = Name[16] == 'i' ||
2608 (Name.size() > 18 && Name[18] == 'i');
2609 bool IsVariable = Name[16] == 'v';
2610 char Size = Name[16] == '.' ? Name[17] :
2611 Name[17] == '.' ? Name[18] :
2612 Name[18] == '.' ? Name[19] :
2616 if (IsVariable && Name[17] != '.') {
2617 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
2618 IID = Intrinsic::x86_avx2_psrlv_q;
2619 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
2620 IID = Intrinsic::x86_avx2_psrlv_q_256;
2621 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
2622 IID = Intrinsic::x86_avx2_psrlv_d;
2623 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
2624 IID = Intrinsic::x86_avx2_psrlv_d_256;
2625 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
2626 IID = Intrinsic::x86_avx512_psrlv_w_128;
2627 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
2628 IID = Intrinsic::x86_avx512_psrlv_w_256;
2629 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
2630 IID = Intrinsic::x86_avx512_psrlv_w_512;
2632 llvm_unreachable("Unexpected size");
2633 } else if (Name.endswith(".128")) {
2634 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
2635 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
2636 : Intrinsic::x86_sse2_psrl_d;
2637 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
2638 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
2639 : Intrinsic::x86_sse2_psrl_q;
2640 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
2641 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
2642 : Intrinsic::x86_sse2_psrl_w;
2644 llvm_unreachable("Unexpected size");
2645 } else if (Name.endswith(".256")) {
2646 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
2647 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
2648 : Intrinsic::x86_avx2_psrl_d;
2649 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
2650 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
2651 : Intrinsic::x86_avx2_psrl_q;
2652 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
2653 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
2654 : Intrinsic::x86_avx2_psrl_w;
2656 llvm_unreachable("Unexpected size");
2658 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
2659 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
2660 IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 :
2661 Intrinsic::x86_avx512_psrl_d_512;
2662 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
2663 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
2664 IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 :
2665 Intrinsic::x86_avx512_psrl_q_512;
2666 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
2667 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
2668 : Intrinsic::x86_avx512_psrl_w_512;
2670 llvm_unreachable("Unexpected size");
2673 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2674 } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
2675 bool IsImmediate = Name[16] == 'i' ||
2676 (Name.size() > 18 && Name[18] == 'i');
2677 bool IsVariable = Name[16] == 'v';
2678 char Size = Name[16] == '.' ? Name[17] :
2679 Name[17] == '.' ? Name[18] :
2680 Name[18] == '.' ? Name[19] :
2684 if (IsVariable && Name[17] != '.') {
2685 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
2686 IID = Intrinsic::x86_avx2_psrav_d;
2687 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
2688 IID = Intrinsic::x86_avx2_psrav_d_256;
2689 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
2690 IID = Intrinsic::x86_avx512_psrav_w_128;
2691 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
2692 IID = Intrinsic::x86_avx512_psrav_w_256;
2693 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
2694 IID = Intrinsic::x86_avx512_psrav_w_512;
2696 llvm_unreachable("Unexpected size");
2697 } else if (Name.endswith(".128")) {
2698 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
2699 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
2700 : Intrinsic::x86_sse2_psra_d;
2701 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
2702 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
2703 IsVariable ? Intrinsic::x86_avx512_psrav_q_128 :
2704 Intrinsic::x86_avx512_psra_q_128;
2705 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
2706 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
2707 : Intrinsic::x86_sse2_psra_w;
2709 llvm_unreachable("Unexpected size");
2710 } else if (Name.endswith(".256")) {
2711 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
2712 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
2713 : Intrinsic::x86_avx2_psra_d;
2714 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
2715 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
2716 IsVariable ? Intrinsic::x86_avx512_psrav_q_256 :
2717 Intrinsic::x86_avx512_psra_q_256;
2718 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
2719 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
2720 : Intrinsic::x86_avx2_psra_w;
2722 llvm_unreachable("Unexpected size");
2724 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
2725 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
2726 IsVariable ? Intrinsic::x86_avx512_psrav_d_512 :
2727 Intrinsic::x86_avx512_psra_d_512;
2728 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
2729 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
2730 IsVariable ? Intrinsic::x86_avx512_psrav_q_512 :
2731 Intrinsic::x86_avx512_psra_q_512;
2732 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
2733 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
2734 : Intrinsic::x86_avx512_psra_w_512;
2736 llvm_unreachable("Unexpected size");
2739 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2740 } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
2741 Rep = upgradeMaskedMove(Builder, *CI);
2742 } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
2743 Rep = UpgradeMaskToInt(Builder, *CI);
2744 } else if (IsX86 && Name.endswith(".movntdqa")) {
2745 Module *M = F->getParent();
2746 MDNode *Node = MDNode::get(
2747 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2749 Value *Ptr = CI->getArgOperand(0);
2750 VectorType *VTy = cast<VectorType>(CI->getType());
2752 // Convert the type of the pointer to a pointer to the stored type.
2754 Builder.CreateBitCast(Ptr, PointerType::getUnqual(VTy), "cast");
2755 LoadInst *LI = Builder.CreateAlignedLoad(BC, VTy->getBitWidth() / 8);
2756 LI->setMetadata(M->getMDKindID("nontemporal"), Node);
2759 (Name.startswith("sse2.pavg") || Name.startswith("avx2.pavg") ||
2760 Name.startswith("avx512.mask.pavg"))) {
2761 // llvm.x86.sse2.pavg.b/w, llvm.x86.avx2.pavg.b/w,
2762 // llvm.x86.avx512.mask.pavg.b/w
2763 Value *A = CI->getArgOperand(0);
2764 Value *B = CI->getArgOperand(1);
2765 VectorType *ZextType = VectorType::getExtendedElementVectorType(
2766 cast<VectorType>(A->getType()));
2767 Value *ExtendedA = Builder.CreateZExt(A, ZextType);
2768 Value *ExtendedB = Builder.CreateZExt(B, ZextType);
2769 Value *Sum = Builder.CreateAdd(ExtendedA, ExtendedB);
2770 Value *AddOne = Builder.CreateAdd(Sum, ConstantInt::get(ZextType, 1));
2771 Value *ShiftR = Builder.CreateLShr(AddOne, ConstantInt::get(ZextType, 1));
2772 Rep = Builder.CreateTrunc(ShiftR, A->getType());
2773 if (CI->getNumArgOperands() > 2) {
2774 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2775 CI->getArgOperand(2));
2777 } else if (IsX86 && (Name.startswith("fma.vfmadd.") ||
2778 Name.startswith("fma.vfmsub.") ||
2779 Name.startswith("fma.vfnmadd.") ||
2780 Name.startswith("fma.vfnmsub."))) {
2781 bool NegMul = Name[6] == 'n';
2782 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
2783 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
2785 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
2786 CI->getArgOperand(2) };
2789 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
2790 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
2791 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
2794 if (NegMul && !IsScalar)
2795 Ops[0] = Builder.CreateFNeg(Ops[0]);
2796 if (NegMul && IsScalar)
2797 Ops[1] = Builder.CreateFNeg(Ops[1]);
2799 Ops[2] = Builder.CreateFNeg(Ops[2]);
2801 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
2807 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
2809 } else if (IsX86 && Name.startswith("fma4.vfmadd.s")) {
2810 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
2811 CI->getArgOperand(2) };
2813 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
2814 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
2815 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
2817 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
2822 Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
2824 } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.s") ||
2825 Name.startswith("avx512.maskz.vfmadd.s") ||
2826 Name.startswith("avx512.mask3.vfmadd.s") ||
2827 Name.startswith("avx512.mask3.vfmsub.s") ||
2828 Name.startswith("avx512.mask3.vfnmsub.s"))) {
2829 bool IsMask3 = Name[11] == '3';
2830 bool IsMaskZ = Name[11] == 'z';
2831 // Drop the "avx512.mask." to make it easier.
2832 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
2833 bool NegMul = Name[2] == 'n';
2834 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
2836 Value *A = CI->getArgOperand(0);
2837 Value *B = CI->getArgOperand(1);
2838 Value *C = CI->getArgOperand(2);
2840 if (NegMul && (IsMask3 || IsMaskZ))
2841 A = Builder.CreateFNeg(A);
2842 if (NegMul && !(IsMask3 || IsMaskZ))
2843 B = Builder.CreateFNeg(B);
2845 C = Builder.CreateFNeg(C);
2847 A = Builder.CreateExtractElement(A, (uint64_t)0);
2848 B = Builder.CreateExtractElement(B, (uint64_t)0);
2849 C = Builder.CreateExtractElement(C, (uint64_t)0);
2851 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
2852 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
2853 Value *Ops[] = { A, B, C, CI->getArgOperand(4) };
2856 if (Name.back() == 'd')
2857 IID = Intrinsic::x86_avx512_vfmadd_f64;
2859 IID = Intrinsic::x86_avx512_vfmadd_f32;
2860 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID);
2861 Rep = Builder.CreateCall(FMA, Ops);
2863 Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
2866 Rep = Builder.CreateCall(FMA, { A, B, C });
2869 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) :
2872 // For Mask3 with NegAcc, we need to create a new extractelement that
2873 // avoids the negation above.
2874 if (NegAcc && IsMask3)
2875 PassThru = Builder.CreateExtractElement(CI->getArgOperand(2),
2878 Rep = EmitX86ScalarSelect(Builder, CI->getArgOperand(3),
2880 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
2882 } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") ||
2883 Name.startswith("avx512.mask.vfnmadd.p") ||
2884 Name.startswith("avx512.mask.vfnmsub.p") ||
2885 Name.startswith("avx512.mask3.vfmadd.p") ||
2886 Name.startswith("avx512.mask3.vfmsub.p") ||
2887 Name.startswith("avx512.mask3.vfnmsub.p") ||
2888 Name.startswith("avx512.maskz.vfmadd.p"))) {
2889 bool IsMask3 = Name[11] == '3';
2890 bool IsMaskZ = Name[11] == 'z';
2891 // Drop the "avx512.mask." to make it easier.
2892 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
2893 bool NegMul = Name[2] == 'n';
2894 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
2896 Value *A = CI->getArgOperand(0);
2897 Value *B = CI->getArgOperand(1);
2898 Value *C = CI->getArgOperand(2);
2900 if (NegMul && (IsMask3 || IsMaskZ))
2901 A = Builder.CreateFNeg(A);
2902 if (NegMul && !(IsMask3 || IsMaskZ))
2903 B = Builder.CreateFNeg(B);
2905 C = Builder.CreateFNeg(C);
2907 if (CI->getNumArgOperands() == 5 &&
2908 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
2909 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
2911 // Check the character before ".512" in string.
2912 if (Name[Name.size()-5] == 's')
2913 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
2915 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
2917 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2918 { A, B, C, CI->getArgOperand(4) });
2920 Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
2923 Rep = Builder.CreateCall(FMA, { A, B, C });
2926 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
2927 IsMask3 ? CI->getArgOperand(2) :
2928 CI->getArgOperand(0);
2930 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
2931 } else if (IsX86 && (Name.startswith("fma.vfmaddsub.p") ||
2932 Name.startswith("fma.vfmsubadd.p"))) {
2933 bool IsSubAdd = Name[7] == 's';
2934 int NumElts = CI->getType()->getVectorNumElements();
2936 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
2937 CI->getArgOperand(2) };
2939 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
2941 Value *Odd = Builder.CreateCall(FMA, Ops);
2942 Ops[2] = Builder.CreateFNeg(Ops[2]);
2943 Value *Even = Builder.CreateCall(FMA, Ops);
2946 std::swap(Even, Odd);
2948 SmallVector<uint32_t, 32> Idxs(NumElts);
2949 for (int i = 0; i != NumElts; ++i)
2950 Idxs[i] = i + (i % 2) * NumElts;
2952 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
2953 } else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") ||
2954 Name.startswith("avx512.mask3.vfmaddsub.p") ||
2955 Name.startswith("avx512.maskz.vfmaddsub.p") ||
2956 Name.startswith("avx512.mask3.vfmsubadd.p"))) {
2957 bool IsMask3 = Name[11] == '3';
2958 bool IsMaskZ = Name[11] == 'z';
2959 // Drop the "avx512.mask." to make it easier.
2960 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
2961 bool IsSubAdd = Name[3] == 's';
2962 if (CI->getNumArgOperands() == 5 &&
2963 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
2964 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
2966 // Check the character before ".512" in string.
2967 if (Name[Name.size()-5] == 's')
2968 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
2970 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
2972 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
2973 CI->getArgOperand(2), CI->getArgOperand(4) };
2975 Ops[2] = Builder.CreateFNeg(Ops[2]);
2977 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2978 {CI->getArgOperand(0), CI->getArgOperand(1),
2979 CI->getArgOperand(2), CI->getArgOperand(4)});
2981 int NumElts = CI->getType()->getVectorNumElements();
2983 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
2984 CI->getArgOperand(2) };
2986 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
2988 Value *Odd = Builder.CreateCall(FMA, Ops);
2989 Ops[2] = Builder.CreateFNeg(Ops[2]);
2990 Value *Even = Builder.CreateCall(FMA, Ops);
2993 std::swap(Even, Odd);
2995 SmallVector<uint32_t, 32> Idxs(NumElts);
2996 for (int i = 0; i != NumElts; ++i)
2997 Idxs[i] = i + (i % 2) * NumElts;
2999 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3002 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3003 IsMask3 ? CI->getArgOperand(2) :
3004 CI->getArgOperand(0);
3006 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3007 } else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
3008 Name.startswith("avx512.maskz.pternlog."))) {
3009 bool ZeroMask = Name[11] == 'z';
3010 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3011 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3013 if (VecWidth == 128 && EltWidth == 32)
3014 IID = Intrinsic::x86_avx512_pternlog_d_128;
3015 else if (VecWidth == 256 && EltWidth == 32)
3016 IID = Intrinsic::x86_avx512_pternlog_d_256;
3017 else if (VecWidth == 512 && EltWidth == 32)
3018 IID = Intrinsic::x86_avx512_pternlog_d_512;
3019 else if (VecWidth == 128 && EltWidth == 64)
3020 IID = Intrinsic::x86_avx512_pternlog_q_128;
3021 else if (VecWidth == 256 && EltWidth == 64)
3022 IID = Intrinsic::x86_avx512_pternlog_q_256;
3023 else if (VecWidth == 512 && EltWidth == 64)
3024 IID = Intrinsic::x86_avx512_pternlog_q_512;
3026 llvm_unreachable("Unexpected intrinsic");
3028 Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3029 CI->getArgOperand(2), CI->getArgOperand(3) };
3030 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3032 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3033 : CI->getArgOperand(0);
3034 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3035 } else if (IsX86 && (Name.startswith("avx512.mask.vpmadd52") ||
3036 Name.startswith("avx512.maskz.vpmadd52"))) {
3037 bool ZeroMask = Name[11] == 'z';
3038 bool High = Name[20] == 'h' || Name[21] == 'h';
3039 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3041 if (VecWidth == 128 && !High)
3042 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3043 else if (VecWidth == 256 && !High)
3044 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3045 else if (VecWidth == 512 && !High)
3046 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3047 else if (VecWidth == 128 && High)
3048 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
3049 else if (VecWidth == 256 && High)
3050 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
3051 else if (VecWidth == 512 && High)
3052 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
3054 llvm_unreachable("Unexpected intrinsic");
3056 Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3057 CI->getArgOperand(2) };
3058 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3060 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3061 : CI->getArgOperand(0);
3062 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3063 } else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") ||
3064 Name.startswith("avx512.mask.vpermt2var.") ||
3065 Name.startswith("avx512.maskz.vpermt2var."))) {
3066 bool ZeroMask = Name[11] == 'z';
3067 bool IndexForm = Name[17] == 'i';
3068 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3069 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3070 bool IsFloat = CI->getType()->isFPOrFPVectorTy();
3072 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
3073 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
3074 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
3075 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
3076 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
3077 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
3078 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
3079 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
3080 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
3081 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
3082 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
3083 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
3084 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
3085 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
3086 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
3087 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
3088 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
3089 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
3090 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
3091 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
3092 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
3093 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
3094 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
3095 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
3096 else if (VecWidth == 128 && EltWidth == 16)
3097 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
3098 else if (VecWidth == 256 && EltWidth == 16)
3099 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
3100 else if (VecWidth == 512 && EltWidth == 16)
3101 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
3102 else if (VecWidth == 128 && EltWidth == 8)
3103 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
3104 else if (VecWidth == 256 && EltWidth == 8)
3105 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
3106 else if (VecWidth == 512 && EltWidth == 8)
3107 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
3109 llvm_unreachable("Unexpected intrinsic");
3111 Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3112 CI->getArgOperand(2) };
3114 // If this isn't index form we need to swap operand 0 and 1.
3116 std::swap(Args[0], Args[1]);
3118 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3120 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3121 : Builder.CreateBitCast(CI->getArgOperand(1),
3123 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3124 } else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") ||
3125 Name.startswith("avx512.maskz.vpdpbusd.") ||
3126 Name.startswith("avx512.mask.vpdpbusds.") ||
3127 Name.startswith("avx512.maskz.vpdpbusds."))) {
3128 bool ZeroMask = Name[11] == 'z';
3129 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3130 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3132 if (VecWidth == 128 && !IsSaturating)
3133 IID = Intrinsic::x86_avx512_vpdpbusd_128;
3134 else if (VecWidth == 256 && !IsSaturating)
3135 IID = Intrinsic::x86_avx512_vpdpbusd_256;
3136 else if (VecWidth == 512 && !IsSaturating)
3137 IID = Intrinsic::x86_avx512_vpdpbusd_512;
3138 else if (VecWidth == 128 && IsSaturating)
3139 IID = Intrinsic::x86_avx512_vpdpbusds_128;
3140 else if (VecWidth == 256 && IsSaturating)
3141 IID = Intrinsic::x86_avx512_vpdpbusds_256;
3142 else if (VecWidth == 512 && IsSaturating)
3143 IID = Intrinsic::x86_avx512_vpdpbusds_512;
3145 llvm_unreachable("Unexpected intrinsic");
3147 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3148 CI->getArgOperand(2) };
3149 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3151 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3152 : CI->getArgOperand(0);
3153 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3154 } else if (IsX86 && (Name.startswith("avx512.mask.vpdpwssd.") ||
3155 Name.startswith("avx512.maskz.vpdpwssd.") ||
3156 Name.startswith("avx512.mask.vpdpwssds.") ||
3157 Name.startswith("avx512.maskz.vpdpwssds."))) {
3158 bool ZeroMask = Name[11] == 'z';
3159 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3160 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3162 if (VecWidth == 128 && !IsSaturating)
3163 IID = Intrinsic::x86_avx512_vpdpwssd_128;
3164 else if (VecWidth == 256 && !IsSaturating)
3165 IID = Intrinsic::x86_avx512_vpdpwssd_256;
3166 else if (VecWidth == 512 && !IsSaturating)
3167 IID = Intrinsic::x86_avx512_vpdpwssd_512;
3168 else if (VecWidth == 128 && IsSaturating)
3169 IID = Intrinsic::x86_avx512_vpdpwssds_128;
3170 else if (VecWidth == 256 && IsSaturating)
3171 IID = Intrinsic::x86_avx512_vpdpwssds_256;
3172 else if (VecWidth == 512 && IsSaturating)
3173 IID = Intrinsic::x86_avx512_vpdpwssds_512;
3175 llvm_unreachable("Unexpected intrinsic");
3177 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3178 CI->getArgOperand(2) };
3179 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3181 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3182 : CI->getArgOperand(0);
3183 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3184 } else if (IsX86 && Name.startswith("avx512.mask.") &&
3185 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
3186 // Rep will be updated by the call in the condition.
3187 } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
3188 Value *Arg = CI->getArgOperand(0);
3189 Value *Neg = Builder.CreateNeg(Arg, "neg");
3190 Value *Cmp = Builder.CreateICmpSGE(
3191 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
3192 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
3193 } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
3194 Name == "max.ui" || Name == "max.ull")) {
3195 Value *Arg0 = CI->getArgOperand(0);
3196 Value *Arg1 = CI->getArgOperand(1);
3197 Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3198 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
3199 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
3200 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
3201 } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
3202 Name == "min.ui" || Name == "min.ull")) {
3203 Value *Arg0 = CI->getArgOperand(0);
3204 Value *Arg1 = CI->getArgOperand(1);
3205 Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3206 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
3207 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
3208 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
3209 } else if (IsNVVM && Name == "clz.ll") {
3210 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
3211 Value *Arg = CI->getArgOperand(0);
3212 Value *Ctlz = Builder.CreateCall(
3213 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
3215 {Arg, Builder.getFalse()}, "ctlz");
3216 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
3217 } else if (IsNVVM && Name == "popc.ll") {
3218 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
3220 Value *Arg = CI->getArgOperand(0);
3221 Value *Popc = Builder.CreateCall(
3222 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
3225 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
3226 } else if (IsNVVM && Name == "h2f") {
3227 Rep = Builder.CreateCall(Intrinsic::getDeclaration(
3228 F->getParent(), Intrinsic::convert_from_fp16,
3229 {Builder.getFloatTy()}),
3230 CI->getArgOperand(0), "h2f");
3232 llvm_unreachable("Unknown function for CallInst upgrade.");
3236 CI->replaceAllUsesWith(Rep);
3237 CI->eraseFromParent();
3241 const auto &DefaultCase = [&NewFn, &CI]() -> void {
3242 // Handle generic mangling change, but nothing else
3244 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
3245 "Unknown function for CallInst upgrade and isn't just a name change");
3246 CI->setCalledFunction(NewFn);
3248 CallInst *NewCall = nullptr;
3249 switch (NewFn->getIntrinsicID()) {
3255 case Intrinsic::arm_neon_vld1:
3256 case Intrinsic::arm_neon_vld2:
3257 case Intrinsic::arm_neon_vld3:
3258 case Intrinsic::arm_neon_vld4:
3259 case Intrinsic::arm_neon_vld2lane:
3260 case Intrinsic::arm_neon_vld3lane:
3261 case Intrinsic::arm_neon_vld4lane:
3262 case Intrinsic::arm_neon_vst1:
3263 case Intrinsic::arm_neon_vst2:
3264 case Intrinsic::arm_neon_vst3:
3265 case Intrinsic::arm_neon_vst4:
3266 case Intrinsic::arm_neon_vst2lane:
3267 case Intrinsic::arm_neon_vst3lane:
3268 case Intrinsic::arm_neon_vst4lane: {
3269 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3270 CI->arg_operands().end());
3271 NewCall = Builder.CreateCall(NewFn, Args);
3275 case Intrinsic::bitreverse:
3276 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3279 case Intrinsic::ctlz:
3280 case Intrinsic::cttz:
3281 assert(CI->getNumArgOperands() == 1 &&
3282 "Mismatch between function args and call args");
3284 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
3287 case Intrinsic::objectsize: {
3288 Value *NullIsUnknownSize = CI->getNumArgOperands() == 2
3289 ? Builder.getFalse()
3290 : CI->getArgOperand(2);
3291 NewCall = Builder.CreateCall(
3292 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize});
3296 case Intrinsic::ctpop:
3297 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3300 case Intrinsic::convert_from_fp16:
3301 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3304 case Intrinsic::dbg_value:
3305 // Upgrade from the old version that had an extra offset argument.
3306 assert(CI->getNumArgOperands() == 4);
3307 // Drop nonzero offsets instead of attempting to upgrade them.
3308 if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
3309 if (Offset->isZeroValue()) {
3310 NewCall = Builder.CreateCall(
3312 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
3315 CI->eraseFromParent();
3318 case Intrinsic::x86_xop_vfrcz_ss:
3319 case Intrinsic::x86_xop_vfrcz_sd:
3320 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
3323 case Intrinsic::x86_xop_vpermil2pd:
3324 case Intrinsic::x86_xop_vpermil2ps:
3325 case Intrinsic::x86_xop_vpermil2pd_256:
3326 case Intrinsic::x86_xop_vpermil2ps_256: {
3327 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3328 CI->arg_operands().end());
3329 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
3330 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
3331 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
3332 NewCall = Builder.CreateCall(NewFn, Args);
3336 case Intrinsic::x86_sse41_ptestc:
3337 case Intrinsic::x86_sse41_ptestz:
3338 case Intrinsic::x86_sse41_ptestnzc: {
3339 // The arguments for these intrinsics used to be v4f32, and changed
3340 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
3341 // So, the only thing required is a bitcast for both arguments.
3342 // First, check the arguments have the old type.
3343 Value *Arg0 = CI->getArgOperand(0);
3344 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
3347 // Old intrinsic, add bitcasts
3348 Value *Arg1 = CI->getArgOperand(1);
3350 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
3352 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
3353 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
3355 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
3359 case Intrinsic::x86_sse41_insertps:
3360 case Intrinsic::x86_sse41_dppd:
3361 case Intrinsic::x86_sse41_dpps:
3362 case Intrinsic::x86_sse41_mpsadbw:
3363 case Intrinsic::x86_avx_dp_ps_256:
3364 case Intrinsic::x86_avx2_mpsadbw: {
3365 // Need to truncate the last argument from i32 to i8 -- this argument models
3366 // an inherently 8-bit immediate operand to these x86 instructions.
3367 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3368 CI->arg_operands().end());
3370 // Replace the last argument with a trunc.
3371 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
3372 NewCall = Builder.CreateCall(NewFn, Args);
3376 case Intrinsic::thread_pointer: {
3377 NewCall = Builder.CreateCall(NewFn, {});
3381 case Intrinsic::invariant_start:
3382 case Intrinsic::invariant_end:
3383 case Intrinsic::masked_load:
3384 case Intrinsic::masked_store:
3385 case Intrinsic::masked_gather:
3386 case Intrinsic::masked_scatter: {
3387 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3388 CI->arg_operands().end());
3389 NewCall = Builder.CreateCall(NewFn, Args);
3393 case Intrinsic::memcpy:
3394 case Intrinsic::memmove:
3395 case Intrinsic::memset: {
3396 // We have to make sure that the call signature is what we're expecting.
3397 // We only want to change the old signatures by removing the alignment arg:
3398 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
3399 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
3400 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
3401 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
3402 // Note: i8*'s in the above can be any pointer type
3403 if (CI->getNumArgOperands() != 5) {
3407 // Remove alignment argument (3), and add alignment attributes to the
3408 // dest/src pointers.
3409 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
3410 CI->getArgOperand(2), CI->getArgOperand(4)};
3411 NewCall = Builder.CreateCall(NewFn, Args);
3412 auto *MemCI = cast<MemIntrinsic>(NewCall);
3413 // All mem intrinsics support dest alignment.
3414 const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
3415 MemCI->setDestAlignment(Align->getZExtValue());
3416 // Memcpy/Memmove also support source alignment.
3417 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
3418 MTI->setSourceAlignment(Align->getZExtValue());
3422 assert(NewCall && "Should have either set this variable or returned through "
3423 "the default case");
3424 std::string Name = CI->getName();
3425 if (!Name.empty()) {
3426 CI->setName(Name + ".old");
3427 NewCall->setName(Name);
3429 CI->replaceAllUsesWith(NewCall);
3430 CI->eraseFromParent();
3433 void llvm::UpgradeCallsToIntrinsic(Function *F) {
3434 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
3436 // Check if this function should be upgraded and get the replacement function
3439 if (UpgradeIntrinsicFunction(F, NewFn)) {
3440 // Replace all users of the old function with the new function or new
3441 // instructions. This is not a range loop because the call is deleted.
3442 for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; )
3443 if (CallInst *CI = dyn_cast<CallInst>(*UI++))
3444 UpgradeIntrinsicCall(CI, NewFn);
3446 // Remove old function, no longer used, from the module.
3447 F->eraseFromParent();
3451 MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
3452 // Check if the tag uses struct-path aware TBAA format.
3453 if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
3456 auto &Context = MD.getContext();
3457 if (MD.getNumOperands() == 3) {
3458 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
3459 MDNode *ScalarType = MDNode::get(Context, Elts);
3460 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
3461 Metadata *Elts2[] = {ScalarType, ScalarType,
3462 ConstantAsMetadata::get(
3463 Constant::getNullValue(Type::getInt64Ty(Context))),
3465 return MDNode::get(Context, Elts2);
3467 // Create a MDNode <MD, MD, offset 0>
3468 Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
3469 Type::getInt64Ty(Context)))};
3470 return MDNode::get(Context, Elts);
3473 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
3474 Instruction *&Temp) {
3475 if (Opc != Instruction::BitCast)
3479 Type *SrcTy = V->getType();
3480 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
3481 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
3482 LLVMContext &Context = V->getContext();
3484 // We have no information about target data layout, so we assume that
3485 // the maximum pointer size is 64bit.
3486 Type *MidTy = Type::getInt64Ty(Context);
3487 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
3489 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
3495 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
3496 if (Opc != Instruction::BitCast)
3499 Type *SrcTy = C->getType();
3500 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
3501 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
3502 LLVMContext &Context = C->getContext();
3504 // We have no information about target data layout, so we assume that
3505 // the maximum pointer size is 64bit.
3506 Type *MidTy = Type::getInt64Ty(Context);
3508 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
3515 /// Check the debug info version number, if it is out-dated, drop the debug
3516 /// info. Return true if module is modified.
3517 bool llvm::UpgradeDebugInfo(Module &M) {
3518 unsigned Version = getDebugMetadataVersionFromModule(M);
3519 if (Version == DEBUG_METADATA_VERSION) {
3520 bool BrokenDebugInfo = false;
3521 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
3522 report_fatal_error("Broken module found, compilation aborted!");
3523 if (!BrokenDebugInfo)
3524 // Everything is ok.
3527 // Diagnose malformed debug info.
3528 DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
3529 M.getContext().diagnose(Diag);
3532 bool Modified = StripDebugInfo(M);
3533 if (Modified && Version != DEBUG_METADATA_VERSION) {
3534 // Diagnose a version mismatch.
3535 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
3536 M.getContext().diagnose(DiagVersion);
3541 bool llvm::UpgradeRetainReleaseMarker(Module &M) {
3542 bool Changed = false;
3543 NamedMDNode *ModRetainReleaseMarker =
3544 M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker");
3545 if (ModRetainReleaseMarker) {
3546 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
3548 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
3550 SmallVector<StringRef, 4> ValueComp;
3551 ID->getString().split(ValueComp, "#");
3552 if (ValueComp.size() == 2) {
3553 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
3554 Metadata *Ops[1] = {MDString::get(M.getContext(), NewValue)};
3555 ModRetainReleaseMarker->setOperand(0,
3556 MDNode::get(M.getContext(), Ops));
3565 bool llvm::UpgradeModuleFlags(Module &M) {
3566 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
3570 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
3571 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
3572 MDNode *Op = ModFlags->getOperand(I);
3573 if (Op->getNumOperands() != 3)
3575 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
3578 if (ID->getString() == "Objective-C Image Info Version")
3580 if (ID->getString() == "Objective-C Class Properties")
3581 HasClassProperties = true;
3582 // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
3583 // field was Error and now they are Max.
3584 if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") {
3585 if (auto *Behavior =
3586 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
3587 if (Behavior->getLimitedValue() == Module::Error) {
3588 Type *Int32Ty = Type::getInt32Ty(M.getContext());
3589 Metadata *Ops[3] = {
3590 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)),
3591 MDString::get(M.getContext(), ID->getString()),
3593 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
3598 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
3599 // section name so that llvm-lto will not complain about mismatching
3600 // module flags that is functionally the same.
3601 if (ID->getString() == "Objective-C Image Info Section") {
3602 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
3603 SmallVector<StringRef, 4> ValueComp;
3604 Value->getString().split(ValueComp, " ");
3605 if (ValueComp.size() != 1) {
3606 std::string NewValue;
3607 for (auto &S : ValueComp)
3608 NewValue += S.str();
3609 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
3610 MDString::get(M.getContext(), NewValue)};
3611 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
3618 // "Objective-C Class Properties" is recently added for Objective-C. We
3619 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
3620 // flag of value 0, so we can correclty downgrade this flag when trying to
3621 // link an ObjC bitcode without this module flag with an ObjC bitcode with
3622 // this module flag.
3623 if (HasObjCFlag && !HasClassProperties) {
3624 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
3632 void llvm::UpgradeSectionAttributes(Module &M) {
3633 auto TrimSpaces = [](StringRef Section) -> std::string {
3634 SmallVector<StringRef, 5> Components;
3635 Section.split(Components, ',');
3637 SmallString<32> Buffer;
3638 raw_svector_ostream OS(Buffer);
3640 for (auto Component : Components)
3641 OS << ',' << Component.trim();
3643 return OS.str().substr(1);
3646 for (auto &GV : M.globals()) {
3647 if (!GV.hasSection())
3650 StringRef Section = GV.getSection();
3652 if (!Section.startswith("__DATA, __objc_catlist"))
3655 // __DATA, __objc_catlist, regular, no_dead_strip
3656 // __DATA,__objc_catlist,regular,no_dead_strip
3657 GV.setSection(TrimSpaces(Section));
3661 static bool isOldLoopArgument(Metadata *MD) {
3662 auto *T = dyn_cast_or_null<MDTuple>(MD);
3665 if (T->getNumOperands() < 1)
3667 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
3670 return S->getString().startswith("llvm.vectorizer.");
3673 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
3674 StringRef OldPrefix = "llvm.vectorizer.";
3675 assert(OldTag.startswith(OldPrefix) && "Expected old prefix");
3677 if (OldTag == "llvm.vectorizer.unroll")
3678 return MDString::get(C, "llvm.loop.interleave.count");
3680 return MDString::get(
3681 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
3685 static Metadata *upgradeLoopArgument(Metadata *MD) {
3686 auto *T = dyn_cast_or_null<MDTuple>(MD);
3689 if (T->getNumOperands() < 1)
3691 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
3694 if (!OldTag->getString().startswith("llvm.vectorizer."))
3697 // This has an old tag. Upgrade it.
3698 SmallVector<Metadata *, 8> Ops;
3699 Ops.reserve(T->getNumOperands());
3700 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
3701 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
3702 Ops.push_back(T->getOperand(I));
3704 return MDTuple::get(T->getContext(), Ops);
3707 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
3708 auto *T = dyn_cast<MDTuple>(&N);
3712 if (none_of(T->operands(), isOldLoopArgument))
3715 SmallVector<Metadata *, 8> Ops;
3716 Ops.reserve(T->getNumOperands());
3717 for (Metadata *MD : T->operands())
3718 Ops.push_back(upgradeLoopArgument(MD));
3720 return MDTuple::get(T->getContext(), Ops);