1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the auto-upgrade helper functions.
11 // This is where deprecated IR intrinsics and other IR features are updated to
12 // current specifications.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/IR/AutoUpgrade.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/IR/CFG.h"
19 #include "llvm/IR/CallSite.h"
20 #include "llvm/IR/Constants.h"
21 #include "llvm/IR/DIBuilder.h"
22 #include "llvm/IR/DebugInfo.h"
23 #include "llvm/IR/DiagnosticInfo.h"
24 #include "llvm/IR/Function.h"
25 #include "llvm/IR/IRBuilder.h"
26 #include "llvm/IR/Instruction.h"
27 #include "llvm/IR/IntrinsicInst.h"
28 #include "llvm/IR/LLVMContext.h"
29 #include "llvm/IR/Module.h"
30 #include "llvm/Support/ErrorHandling.h"
31 #include "llvm/Support/Regex.h"
35 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
37 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
38 // changed their type from v4f32 to v2i64.
39 static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
41 // Check whether this is an old version of the function, which received
43 Type *Arg0Type = F->getFunctionType()->getParamType(0);
44 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
47 // Yes, it's old, replace it with new version.
49 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
53 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
54 // arguments have changed their type from i32 to i8.
55 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
57 // Check that the last argument is an i32.
58 Type *LastArgType = F->getFunctionType()->getParamType(
59 F->getFunctionType()->getNumParams() - 1);
60 if (!LastArgType->isIntegerTy(32))
63 // Move this function aside and map down.
65 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
69 static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
70 // All of the intrinsics matches below should be marked with which llvm
71 // version started autoupgrading them. At some point in the future we would
72 // like to use this information to remove upgrade code for some older
73 // intrinsics. It is currently undecided how we will determine that future
75 if (Name.startswith("sse2.pcmpeq.") || // Added in 3.1
76 Name.startswith("sse2.pcmpgt.") || // Added in 3.1
77 Name.startswith("avx2.pcmpeq.") || // Added in 3.1
78 Name.startswith("avx2.pcmpgt.") || // Added in 3.1
79 Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
80 Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
81 Name == "sse.add.ss" || // Added in 4.0
82 Name == "sse2.add.sd" || // Added in 4.0
83 Name == "sse.sub.ss" || // Added in 4.0
84 Name == "sse2.sub.sd" || // Added in 4.0
85 Name == "sse.mul.ss" || // Added in 4.0
86 Name == "sse2.mul.sd" || // Added in 4.0
87 Name == "sse.div.ss" || // Added in 4.0
88 Name == "sse2.div.sd" || // Added in 4.0
89 Name == "sse41.pmaxsb" || // Added in 3.9
90 Name == "sse2.pmaxs.w" || // Added in 3.9
91 Name == "sse41.pmaxsd" || // Added in 3.9
92 Name == "sse2.pmaxu.b" || // Added in 3.9
93 Name == "sse41.pmaxuw" || // Added in 3.9
94 Name == "sse41.pmaxud" || // Added in 3.9
95 Name == "sse41.pminsb" || // Added in 3.9
96 Name == "sse2.pmins.w" || // Added in 3.9
97 Name == "sse41.pminsd" || // Added in 3.9
98 Name == "sse2.pminu.b" || // Added in 3.9
99 Name == "sse41.pminuw" || // Added in 3.9
100 Name == "sse41.pminud" || // Added in 3.9
101 Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
102 Name.startswith("avx2.pmax") || // Added in 3.9
103 Name.startswith("avx2.pmin") || // Added in 3.9
104 Name.startswith("avx512.mask.pmax") || // Added in 4.0
105 Name.startswith("avx512.mask.pmin") || // Added in 4.0
106 Name.startswith("avx2.vbroadcast") || // Added in 3.8
107 Name.startswith("avx2.pbroadcast") || // Added in 3.8
108 Name.startswith("avx.vpermil.") || // Added in 3.1
109 Name.startswith("sse2.pshuf") || // Added in 3.9
110 Name.startswith("avx512.pbroadcast") || // Added in 3.9
111 Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
112 Name.startswith("avx512.mask.movddup") || // Added in 3.9
113 Name.startswith("avx512.mask.movshdup") || // Added in 3.9
114 Name.startswith("avx512.mask.movsldup") || // Added in 3.9
115 Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
116 Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
117 Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
118 Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
119 Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
120 Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
121 Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
122 Name.startswith("avx512.mask.punpckl") || // Added in 3.9
123 Name.startswith("avx512.mask.punpckh") || // Added in 3.9
124 Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
125 Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
126 Name.startswith("avx512.mask.pand.") || // Added in 3.9
127 Name.startswith("avx512.mask.pandn.") || // Added in 3.9
128 Name.startswith("avx512.mask.por.") || // Added in 3.9
129 Name.startswith("avx512.mask.pxor.") || // Added in 3.9
130 Name.startswith("avx512.mask.and.") || // Added in 3.9
131 Name.startswith("avx512.mask.andn.") || // Added in 3.9
132 Name.startswith("avx512.mask.or.") || // Added in 3.9
133 Name.startswith("avx512.mask.xor.") || // Added in 3.9
134 Name.startswith("avx512.mask.padd.") || // Added in 4.0
135 Name.startswith("avx512.mask.psub.") || // Added in 4.0
136 Name.startswith("avx512.mask.pmull.") || // Added in 4.0
137 Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
138 Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
139 Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
140 Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
141 Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
142 Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
143 Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
144 Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
145 Name == "avx512.mask.add.pd.128" || // Added in 4.0
146 Name == "avx512.mask.add.pd.256" || // Added in 4.0
147 Name == "avx512.mask.add.ps.128" || // Added in 4.0
148 Name == "avx512.mask.add.ps.256" || // Added in 4.0
149 Name == "avx512.mask.div.pd.128" || // Added in 4.0
150 Name == "avx512.mask.div.pd.256" || // Added in 4.0
151 Name == "avx512.mask.div.ps.128" || // Added in 4.0
152 Name == "avx512.mask.div.ps.256" || // Added in 4.0
153 Name == "avx512.mask.mul.pd.128" || // Added in 4.0
154 Name == "avx512.mask.mul.pd.256" || // Added in 4.0
155 Name == "avx512.mask.mul.ps.128" || // Added in 4.0
156 Name == "avx512.mask.mul.ps.256" || // Added in 4.0
157 Name == "avx512.mask.sub.pd.128" || // Added in 4.0
158 Name == "avx512.mask.sub.pd.256" || // Added in 4.0
159 Name == "avx512.mask.sub.ps.128" || // Added in 4.0
160 Name == "avx512.mask.sub.ps.256" || // Added in 4.0
161 Name == "avx512.mask.max.pd.128" || // Added in 5.0
162 Name == "avx512.mask.max.pd.256" || // Added in 5.0
163 Name == "avx512.mask.max.ps.128" || // Added in 5.0
164 Name == "avx512.mask.max.ps.256" || // Added in 5.0
165 Name == "avx512.mask.min.pd.128" || // Added in 5.0
166 Name == "avx512.mask.min.pd.256" || // Added in 5.0
167 Name == "avx512.mask.min.ps.128" || // Added in 5.0
168 Name == "avx512.mask.min.ps.256" || // Added in 5.0
169 Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
170 Name.startswith("avx512.mask.psll.d") || // Added in 4.0
171 Name.startswith("avx512.mask.psll.q") || // Added in 4.0
172 Name.startswith("avx512.mask.psll.w") || // Added in 4.0
173 Name.startswith("avx512.mask.psra.d") || // Added in 4.0
174 Name.startswith("avx512.mask.psra.q") || // Added in 4.0
175 Name.startswith("avx512.mask.psra.w") || // Added in 4.0
176 Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
177 Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
178 Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
179 Name.startswith("avx512.mask.pslli") || // Added in 4.0
180 Name.startswith("avx512.mask.psrai") || // Added in 4.0
181 Name.startswith("avx512.mask.psrli") || // Added in 4.0
182 Name.startswith("avx512.mask.psllv") || // Added in 4.0
183 Name.startswith("avx512.mask.psrav") || // Added in 4.0
184 Name.startswith("avx512.mask.psrlv") || // Added in 4.0
185 Name.startswith("sse41.pmovsx") || // Added in 3.8
186 Name.startswith("sse41.pmovzx") || // Added in 3.9
187 Name.startswith("avx2.pmovsx") || // Added in 3.9
188 Name.startswith("avx2.pmovzx") || // Added in 3.9
189 Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
190 Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
191 Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
192 Name == "sse2.cvtdq2pd" || // Added in 3.9
193 Name == "sse2.cvtps2pd" || // Added in 3.9
194 Name == "avx.cvtdq2.pd.256" || // Added in 3.9
195 Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
196 Name.startswith("avx.vinsertf128.") || // Added in 3.7
197 Name == "avx2.vinserti128" || // Added in 3.7
198 Name.startswith("avx512.mask.insert") || // Added in 4.0
199 Name.startswith("avx.vextractf128.") || // Added in 3.7
200 Name == "avx2.vextracti128" || // Added in 3.7
201 Name.startswith("avx512.mask.vextract") || // Added in 4.0
202 Name.startswith("sse4a.movnt.") || // Added in 3.9
203 Name.startswith("avx.movnt.") || // Added in 3.2
204 Name.startswith("avx512.storent.") || // Added in 3.9
205 Name == "sse41.movntdqa" || // Added in 5.0
206 Name == "avx2.movntdqa" || // Added in 5.0
207 Name == "avx512.movntdqa" || // Added in 5.0
208 Name == "sse2.storel.dq" || // Added in 3.9
209 Name.startswith("sse.storeu.") || // Added in 3.9
210 Name.startswith("sse2.storeu.") || // Added in 3.9
211 Name.startswith("avx.storeu.") || // Added in 3.9
212 Name.startswith("avx512.mask.storeu.") || // Added in 3.9
213 Name.startswith("avx512.mask.store.p") || // Added in 3.9
214 Name.startswith("avx512.mask.store.b.") || // Added in 3.9
215 Name.startswith("avx512.mask.store.w.") || // Added in 3.9
216 Name.startswith("avx512.mask.store.d.") || // Added in 3.9
217 Name.startswith("avx512.mask.store.q.") || // Added in 3.9
218 Name.startswith("avx512.mask.loadu.") || // Added in 3.9
219 Name.startswith("avx512.mask.load.") || // Added in 3.9
220 Name == "sse42.crc32.64.8" || // Added in 3.4
221 Name.startswith("avx.vbroadcast.s") || // Added in 3.5
222 Name.startswith("avx512.mask.palignr.") || // Added in 3.9
223 Name.startswith("avx512.mask.valign.") || // Added in 4.0
224 Name.startswith("sse2.psll.dq") || // Added in 3.7
225 Name.startswith("sse2.psrl.dq") || // Added in 3.7
226 Name.startswith("avx2.psll.dq") || // Added in 3.7
227 Name.startswith("avx2.psrl.dq") || // Added in 3.7
228 Name.startswith("avx512.psll.dq") || // Added in 3.9
229 Name.startswith("avx512.psrl.dq") || // Added in 3.9
230 Name == "sse41.pblendw" || // Added in 3.7
231 Name.startswith("sse41.blendp") || // Added in 3.7
232 Name.startswith("avx.blend.p") || // Added in 3.7
233 Name == "avx2.pblendw" || // Added in 3.7
234 Name.startswith("avx2.pblendd.") || // Added in 3.7
235 Name.startswith("avx.vbroadcastf128") || // Added in 4.0
236 Name == "avx2.vbroadcasti128" || // Added in 3.7
237 Name == "xop.vpcmov" || // Added in 3.8
238 Name == "xop.vpcmov.256" || // Added in 5.0
239 Name.startswith("avx512.mask.move.s") || // Added in 4.0
240 Name.startswith("avx512.cvtmask2") || // Added in 5.0
241 (Name.startswith("xop.vpcom") && // Added in 3.2
248 static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
250 // Only handle intrinsics that start with "x86.".
251 if (!Name.startswith("x86."))
253 // Remove "x86." prefix.
254 Name = Name.substr(4);
256 if (ShouldUpgradeX86Intrinsic(F, Name)) {
261 // SSE4.1 ptest functions may have an old signature.
262 if (Name.startswith("sse41.ptest")) { // Added in 3.2
263 if (Name.substr(11) == "c")
264 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
265 if (Name.substr(11) == "z")
266 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
267 if (Name.substr(11) == "nzc")
268 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
270 // Several blend and other instructions with masks used the wrong number of
272 if (Name == "sse41.insertps") // Added in 3.6
273 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
275 if (Name == "sse41.dppd") // Added in 3.6
276 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
278 if (Name == "sse41.dpps") // Added in 3.6
279 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
281 if (Name == "sse41.mpsadbw") // Added in 3.6
282 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
284 if (Name == "avx.dp.ps.256") // Added in 3.6
285 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
287 if (Name == "avx2.mpsadbw") // Added in 3.6
288 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
291 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
292 if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
294 NewFn = Intrinsic::getDeclaration(F->getParent(),
295 Intrinsic::x86_xop_vfrcz_ss);
298 if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
300 NewFn = Intrinsic::getDeclaration(F->getParent(),
301 Intrinsic::x86_xop_vfrcz_sd);
304 // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
305 if (Name.startswith("xop.vpermil2")) { // Added in 3.9
306 auto Idx = F->getFunctionType()->getParamType(2);
307 if (Idx->isFPOrFPVectorTy()) {
309 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
310 unsigned EltSize = Idx->getScalarSizeInBits();
311 Intrinsic::ID Permil2ID;
312 if (EltSize == 64 && IdxSize == 128)
313 Permil2ID = Intrinsic::x86_xop_vpermil2pd;
314 else if (EltSize == 32 && IdxSize == 128)
315 Permil2ID = Intrinsic::x86_xop_vpermil2ps;
316 else if (EltSize == 64 && IdxSize == 256)
317 Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
319 Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
320 NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
328 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
329 assert(F && "Illegal to upgrade a non-existent Function.");
331 // Quickly eliminate it, if it's not a candidate.
332 StringRef Name = F->getName();
333 if (Name.size() <= 8 || !Name.startswith("llvm."))
335 Name = Name.substr(5); // Strip off "llvm."
340 if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
341 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
342 F->arg_begin()->getType());
345 if (Name.startswith("arm.neon.vclz")) {
347 F->arg_begin()->getType(),
348 Type::getInt1Ty(F->getContext())
350 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
351 // the end of the name. Change name from llvm.arm.neon.vclz.* to
353 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
354 NewFn = Function::Create(fType, F->getLinkage(),
355 "llvm.ctlz." + Name.substr(14), F->getParent());
358 if (Name.startswith("arm.neon.vcnt")) {
359 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
360 F->arg_begin()->getType());
363 Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
364 if (vldRegex.match(Name)) {
365 auto fArgs = F->getFunctionType()->params();
366 SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
367 // Can't use Intrinsic::getDeclaration here as the return types might
368 // then only be structurally equal.
369 FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
370 NewFn = Function::Create(fType, F->getLinkage(),
371 "llvm." + Name + ".p0i8", F->getParent());
374 Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
375 if (vstRegex.match(Name)) {
376 static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
377 Intrinsic::arm_neon_vst2,
378 Intrinsic::arm_neon_vst3,
379 Intrinsic::arm_neon_vst4};
381 static const Intrinsic::ID StoreLaneInts[] = {
382 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
383 Intrinsic::arm_neon_vst4lane
386 auto fArgs = F->getFunctionType()->params();
387 Type *Tys[] = {fArgs[0], fArgs[1]};
388 if (Name.find("lane") == StringRef::npos)
389 NewFn = Intrinsic::getDeclaration(F->getParent(),
390 StoreInts[fArgs.size() - 3], Tys);
392 NewFn = Intrinsic::getDeclaration(F->getParent(),
393 StoreLaneInts[fArgs.size() - 5], Tys);
396 if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
397 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
404 if (Name.startswith("ctlz.") && F->arg_size() == 1) {
406 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
407 F->arg_begin()->getType());
410 if (Name.startswith("cttz.") && F->arg_size() == 1) {
412 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
413 F->arg_begin()->getType());
420 bool IsLifetimeStart = Name.startswith("lifetime.start");
421 if (IsLifetimeStart || Name.startswith("invariant.start")) {
422 Intrinsic::ID ID = IsLifetimeStart ?
423 Intrinsic::lifetime_start : Intrinsic::invariant_start;
424 auto Args = F->getFunctionType()->params();
425 Type* ObjectPtr[1] = {Args[1]};
426 if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
428 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
433 bool IsLifetimeEnd = Name.startswith("lifetime.end");
434 if (IsLifetimeEnd || Name.startswith("invariant.end")) {
435 Intrinsic::ID ID = IsLifetimeEnd ?
436 Intrinsic::lifetime_end : Intrinsic::invariant_end;
438 auto Args = F->getFunctionType()->params();
439 Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
440 if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
442 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
449 if (Name.startswith("masked.load.")) {
450 Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
451 if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) {
453 NewFn = Intrinsic::getDeclaration(F->getParent(),
454 Intrinsic::masked_load,
459 if (Name.startswith("masked.store.")) {
460 auto Args = F->getFunctionType()->params();
461 Type *Tys[] = { Args[0], Args[1] };
462 if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) {
464 NewFn = Intrinsic::getDeclaration(F->getParent(),
465 Intrinsic::masked_store,
470 // Renaming gather/scatter intrinsics with no address space overloading
471 // to the new overload which includes an address space
472 if (Name.startswith("masked.gather.")) {
473 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
474 if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) {
476 NewFn = Intrinsic::getDeclaration(F->getParent(),
477 Intrinsic::masked_gather, Tys);
481 if (Name.startswith("masked.scatter.")) {
482 auto Args = F->getFunctionType()->params();
483 Type *Tys[] = {Args[0], Args[1]};
484 if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) {
486 NewFn = Intrinsic::getDeclaration(F->getParent(),
487 Intrinsic::masked_scatter, Tys);
494 if (Name.startswith("nvvm.")) {
495 Name = Name.substr(5);
497 // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
498 Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name)
499 .Cases("brev32", "brev64", Intrinsic::bitreverse)
500 .Case("clz.i", Intrinsic::ctlz)
501 .Case("popc.i", Intrinsic::ctpop)
502 .Default(Intrinsic::not_intrinsic);
503 if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
504 NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
505 {F->getReturnType()});
509 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
510 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
512 // TODO: We could add lohi.i2d.
513 bool Expand = StringSwitch<bool>(Name)
514 .Cases("abs.i", "abs.ll", true)
515 .Cases("clz.ll", "popc.ll", "h2f", true)
516 .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
517 .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
526 // We only need to change the name to match the mangling including the
528 if (Name.startswith("objectsize.")) {
529 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
530 if (F->arg_size() == 2 ||
531 F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
533 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
541 if (Name == "stackprotectorcheck") {
548 if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
551 // Remangle our intrinsic since we upgrade the mangling
552 auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
553 if (Result != None) {
554 NewFn = Result.getValue();
558 // This may not belong here. This function is effectively being overloaded
559 // to both detect an intrinsic which needs upgrading, and to provide the
560 // upgraded form of the intrinsic. We should perhaps have two separate
561 // functions for this.
565 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
567 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
568 assert(F != NewFn && "Intrinsic function upgraded to the same function");
570 // Upgrade intrinsic attributes. This does not change the function.
573 if (Intrinsic::ID id = F->getIntrinsicID())
574 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
578 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
579 // Nothing to do yet.
583 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
585 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
586 Value *Op, unsigned Shift) {
587 Type *ResultTy = Op->getType();
588 unsigned NumElts = ResultTy->getVectorNumElements() * 8;
590 // Bitcast from a 64-bit element type to a byte element type.
591 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
592 Op = Builder.CreateBitCast(Op, VecTy, "cast");
594 // We'll be shuffling in zeroes.
595 Value *Res = Constant::getNullValue(VecTy);
597 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
598 // we'll just return the zero vector.
601 // 256/512-bit version is split into 2/4 16-byte lanes.
602 for (unsigned l = 0; l != NumElts; l += 16)
603 for (unsigned i = 0; i != 16; ++i) {
604 unsigned Idx = NumElts + i - Shift;
606 Idx -= NumElts - 16; // end of lane, switch operand.
607 Idxs[l + i] = Idx + l;
610 Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
613 // Bitcast back to a 64-bit element type.
614 return Builder.CreateBitCast(Res, ResultTy, "cast");
617 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
619 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
621 Type *ResultTy = Op->getType();
622 unsigned NumElts = ResultTy->getVectorNumElements() * 8;
624 // Bitcast from a 64-bit element type to a byte element type.
625 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
626 Op = Builder.CreateBitCast(Op, VecTy, "cast");
628 // We'll be shuffling in zeroes.
629 Value *Res = Constant::getNullValue(VecTy);
631 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
632 // we'll just return the zero vector.
635 // 256/512-bit version is split into 2/4 16-byte lanes.
636 for (unsigned l = 0; l != NumElts; l += 16)
637 for (unsigned i = 0; i != 16; ++i) {
638 unsigned Idx = i + Shift;
640 Idx += NumElts - 16; // end of lane, switch operand.
641 Idxs[l + i] = Idx + l;
644 Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
647 // Bitcast back to a 64-bit element type.
648 return Builder.CreateBitCast(Res, ResultTy, "cast");
651 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
653 llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(),
654 cast<IntegerType>(Mask->getType())->getBitWidth());
655 Mask = Builder.CreateBitCast(Mask, MaskTy);
657 // If we have less than 8 elements, then the starting mask was an i8 and
658 // we need to extract down to the right number of elements.
661 for (unsigned i = 0; i != NumElts; ++i)
663 Mask = Builder.CreateShuffleVector(Mask, Mask,
664 makeArrayRef(Indices, NumElts),
671 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
672 Value *Op0, Value *Op1) {
673 // If the mask is all ones just emit the align operation.
674 if (const auto *C = dyn_cast<Constant>(Mask))
675 if (C->isAllOnesValue())
678 Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements());
679 return Builder.CreateSelect(Mask, Op0, Op1);
682 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
683 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
684 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
685 static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
686 Value *Op1, Value *Shift,
687 Value *Passthru, Value *Mask,
689 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
691 unsigned NumElts = Op0->getType()->getVectorNumElements();
692 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
693 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
694 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
696 // Mask the immediate for VALIGN.
698 ShiftVal &= (NumElts - 1);
700 // If palignr is shifting the pair of vectors more than the size of two
703 return llvm::Constant::getNullValue(Op0->getType());
705 // If palignr is shifting the pair of input vectors more than one lane,
706 // but less than two lanes, convert to shifting in zeroes.
710 Op0 = llvm::Constant::getNullValue(Op0->getType());
713 uint32_t Indices[64];
714 // 256-bit palignr operates on 128-bit lanes so we need to handle that
715 for (unsigned l = 0; l < NumElts; l += 16) {
716 for (unsigned i = 0; i != 16; ++i) {
717 unsigned Idx = ShiftVal + i;
718 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
719 Idx += NumElts - 16; // End of lane, switch operand.
720 Indices[l + i] = Idx + l;
724 Value *Align = Builder.CreateShuffleVector(Op1, Op0,
725 makeArrayRef(Indices, NumElts),
728 return EmitX86Select(Builder, Mask, Align, Passthru);
731 static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
732 Value *Ptr, Value *Data, Value *Mask,
734 // Cast the pointer to the right type.
735 Ptr = Builder.CreateBitCast(Ptr,
736 llvm::PointerType::getUnqual(Data->getType()));
738 Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1;
740 // If the mask is all ones just emit a regular store.
741 if (const auto *C = dyn_cast<Constant>(Mask))
742 if (C->isAllOnesValue())
743 return Builder.CreateAlignedStore(Data, Ptr, Align);
745 // Convert the mask from an integer type to a vector of i1.
746 unsigned NumElts = Data->getType()->getVectorNumElements();
747 Mask = getX86MaskVec(Builder, Mask, NumElts);
748 return Builder.CreateMaskedStore(Data, Ptr, Align, Mask);
751 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
752 Value *Ptr, Value *Passthru, Value *Mask,
754 // Cast the pointer to the right type.
755 Ptr = Builder.CreateBitCast(Ptr,
756 llvm::PointerType::getUnqual(Passthru->getType()));
758 Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1;
760 // If the mask is all ones just emit a regular store.
761 if (const auto *C = dyn_cast<Constant>(Mask))
762 if (C->isAllOnesValue())
763 return Builder.CreateAlignedLoad(Ptr, Align);
765 // Convert the mask from an integer type to a vector of i1.
766 unsigned NumElts = Passthru->getType()->getVectorNumElements();
767 Mask = getX86MaskVec(Builder, Mask, NumElts);
768 return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru);
771 static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
772 ICmpInst::Predicate Pred) {
773 Value *Op0 = CI.getArgOperand(0);
774 Value *Op1 = CI.getArgOperand(1);
775 Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1);
776 Value *Res = Builder.CreateSelect(Cmp, Op0, Op1);
778 if (CI.getNumArgOperands() == 4)
779 Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
784 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
785 ICmpInst::Predicate Pred) {
786 Value *Op0 = CI.getArgOperand(0);
787 unsigned NumElts = Op0->getType()->getVectorNumElements();
788 Value *Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
790 Value *Mask = CI.getArgOperand(2);
791 const auto *C = dyn_cast<Constant>(Mask);
792 if (!C || !C->isAllOnesValue())
793 Cmp = Builder.CreateAnd(Cmp, getX86MaskVec(Builder, Mask, NumElts));
797 for (unsigned i = 0; i != NumElts; ++i)
799 for (unsigned i = NumElts; i != 8; ++i)
800 Indices[i] = NumElts + i % NumElts;
801 Cmp = Builder.CreateShuffleVector(Cmp,
802 Constant::getNullValue(Cmp->getType()),
805 return Builder.CreateBitCast(Cmp, IntegerType::get(CI.getContext(),
806 std::max(NumElts, 8U)));
809 // Replace a masked intrinsic with an older unmasked intrinsic.
810 static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI,
812 Function *F = CI.getCalledFunction();
813 Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID);
814 Value *Rep = Builder.CreateCall(Intrin,
815 { CI.getArgOperand(0), CI.getArgOperand(1) });
816 return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
819 static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
820 Value* A = CI.getArgOperand(0);
821 Value* B = CI.getArgOperand(1);
822 Value* Src = CI.getArgOperand(2);
823 Value* Mask = CI.getArgOperand(3);
825 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
826 Value* Cmp = Builder.CreateIsNotNull(AndNode);
827 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
828 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
829 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
830 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
834 static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) {
835 Value* Op = CI.getArgOperand(0);
836 Type* ReturnOp = CI.getType();
837 unsigned NumElts = CI.getType()->getVectorNumElements();
838 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
839 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
842 /// Upgrade a call to an old intrinsic. All argument and return casting must be
843 /// provided to seamlessly integrate with existing context.
844 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
845 Function *F = CI->getCalledFunction();
846 LLVMContext &C = CI->getContext();
847 IRBuilder<> Builder(C);
848 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
850 assert(F && "Intrinsic call is not direct?");
853 // Get the Function's name.
854 StringRef Name = F->getName();
856 assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
857 Name = Name.substr(5);
859 bool IsX86 = Name.startswith("x86.");
861 Name = Name.substr(4);
862 bool IsNVVM = Name.startswith("nvvm.");
864 Name = Name.substr(5);
866 if (IsX86 && Name.startswith("sse4a.movnt.")) {
867 Module *M = F->getParent();
868 SmallVector<Metadata *, 1> Elts;
870 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
871 MDNode *Node = MDNode::get(C, Elts);
873 Value *Arg0 = CI->getArgOperand(0);
874 Value *Arg1 = CI->getArgOperand(1);
876 // Nontemporal (unaligned) store of the 0'th element of the float/double
878 Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
879 PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
880 Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
882 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
884 StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, 1);
885 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
888 CI->eraseFromParent();
892 if (IsX86 && (Name.startswith("avx.movnt.") ||
893 Name.startswith("avx512.storent."))) {
894 Module *M = F->getParent();
895 SmallVector<Metadata *, 1> Elts;
897 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
898 MDNode *Node = MDNode::get(C, Elts);
900 Value *Arg0 = CI->getArgOperand(0);
901 Value *Arg1 = CI->getArgOperand(1);
903 // Convert the type of the pointer to a pointer to the stored type.
904 Value *BC = Builder.CreateBitCast(Arg0,
905 PointerType::getUnqual(Arg1->getType()),
907 VectorType *VTy = cast<VectorType>(Arg1->getType());
908 StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC,
909 VTy->getBitWidth() / 8);
910 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
913 CI->eraseFromParent();
917 if (IsX86 && Name == "sse2.storel.dq") {
918 Value *Arg0 = CI->getArgOperand(0);
919 Value *Arg1 = CI->getArgOperand(1);
921 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
922 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
923 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
924 Value *BC = Builder.CreateBitCast(Arg0,
925 PointerType::getUnqual(Elt->getType()),
927 Builder.CreateAlignedStore(Elt, BC, 1);
930 CI->eraseFromParent();
934 if (IsX86 && (Name.startswith("sse.storeu.") ||
935 Name.startswith("sse2.storeu.") ||
936 Name.startswith("avx.storeu."))) {
937 Value *Arg0 = CI->getArgOperand(0);
938 Value *Arg1 = CI->getArgOperand(1);
940 Arg0 = Builder.CreateBitCast(Arg0,
941 PointerType::getUnqual(Arg1->getType()),
943 Builder.CreateAlignedStore(Arg1, Arg0, 1);
946 CI->eraseFromParent();
950 if (IsX86 && (Name.startswith("avx512.mask.store"))) {
951 // "avx512.mask.storeu." or "avx512.mask.store."
952 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
953 UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
954 CI->getArgOperand(2), Aligned);
957 CI->eraseFromParent();
962 // Upgrade packed integer vector compare intrinsics to compare instructions.
963 if (IsX86 && (Name.startswith("sse2.pcmp") ||
964 Name.startswith("avx2.pcmp"))) {
965 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
966 bool CmpEq = Name[9] == 'e';
967 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
968 CI->getArgOperand(0), CI->getArgOperand(1));
969 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
970 } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd")) {
971 Type *I32Ty = Type::getInt32Ty(C);
972 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
973 ConstantInt::get(I32Ty, 0));
974 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
975 ConstantInt::get(I32Ty, 0));
976 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
977 Builder.CreateFAdd(Elt0, Elt1),
978 ConstantInt::get(I32Ty, 0));
979 } else if (IsX86 && (Name == "sse.sub.ss" || Name == "sse2.sub.sd")) {
980 Type *I32Ty = Type::getInt32Ty(C);
981 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
982 ConstantInt::get(I32Ty, 0));
983 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
984 ConstantInt::get(I32Ty, 0));
985 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
986 Builder.CreateFSub(Elt0, Elt1),
987 ConstantInt::get(I32Ty, 0));
988 } else if (IsX86 && (Name == "sse.mul.ss" || Name == "sse2.mul.sd")) {
989 Type *I32Ty = Type::getInt32Ty(C);
990 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
991 ConstantInt::get(I32Ty, 0));
992 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
993 ConstantInt::get(I32Ty, 0));
994 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
995 Builder.CreateFMul(Elt0, Elt1),
996 ConstantInt::get(I32Ty, 0));
997 } else if (IsX86 && (Name == "sse.div.ss" || Name == "sse2.div.sd")) {
998 Type *I32Ty = Type::getInt32Ty(C);
999 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
1000 ConstantInt::get(I32Ty, 0));
1001 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
1002 ConstantInt::get(I32Ty, 0));
1003 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
1004 Builder.CreateFDiv(Elt0, Elt1),
1005 ConstantInt::get(I32Ty, 0));
1006 } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
1007 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
1008 bool CmpEq = Name[16] == 'e';
1009 Rep = upgradeMaskedCompare(Builder, *CI,
1010 CmpEq ? ICmpInst::ICMP_EQ
1011 : ICmpInst::ICMP_SGT);
1012 } else if (IsX86 && (Name == "sse41.pmaxsb" ||
1013 Name == "sse2.pmaxs.w" ||
1014 Name == "sse41.pmaxsd" ||
1015 Name.startswith("avx2.pmaxs") ||
1016 Name.startswith("avx512.mask.pmaxs"))) {
1017 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
1018 } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
1019 Name == "sse41.pmaxuw" ||
1020 Name == "sse41.pmaxud" ||
1021 Name.startswith("avx2.pmaxu") ||
1022 Name.startswith("avx512.mask.pmaxu"))) {
1023 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
1024 } else if (IsX86 && (Name == "sse41.pminsb" ||
1025 Name == "sse2.pmins.w" ||
1026 Name == "sse41.pminsd" ||
1027 Name.startswith("avx2.pmins") ||
1028 Name.startswith("avx512.mask.pmins"))) {
1029 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
1030 } else if (IsX86 && (Name == "sse2.pminu.b" ||
1031 Name == "sse41.pminuw" ||
1032 Name == "sse41.pminud" ||
1033 Name.startswith("avx2.pminu") ||
1034 Name.startswith("avx512.mask.pminu"))) {
1035 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
1036 } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
1037 Name == "sse2.cvtps2pd" ||
1038 Name == "avx.cvtdq2.pd.256" ||
1039 Name == "avx.cvt.ps2.pd.256" ||
1040 Name.startswith("avx512.mask.cvtdq2pd.") ||
1041 Name.startswith("avx512.mask.cvtudq2pd."))) {
1042 // Lossless i32/float to double conversion.
1043 // Extract the bottom elements if necessary and convert to double vector.
1044 Value *Src = CI->getArgOperand(0);
1045 VectorType *SrcTy = cast<VectorType>(Src->getType());
1046 VectorType *DstTy = cast<VectorType>(CI->getType());
1047 Rep = CI->getArgOperand(0);
1049 unsigned NumDstElts = DstTy->getNumElements();
1050 if (NumDstElts < SrcTy->getNumElements()) {
1051 assert(NumDstElts == 2 && "Unexpected vector size");
1052 uint32_t ShuffleMask[2] = { 0, 1 };
1053 Rep = Builder.CreateShuffleVector(Rep, UndefValue::get(SrcTy),
1057 bool SInt2Double = (StringRef::npos != Name.find("cvtdq2"));
1058 bool UInt2Double = (StringRef::npos != Name.find("cvtudq2"));
1060 Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd");
1061 else if (UInt2Double)
1062 Rep = Builder.CreateUIToFP(Rep, DstTy, "cvtudq2pd");
1064 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
1066 if (CI->getNumArgOperands() == 3)
1067 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1068 CI->getArgOperand(1));
1069 } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
1070 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
1071 CI->getArgOperand(1), CI->getArgOperand(2),
1073 } else if (IsX86 && (Name.startswith("avx512.mask.load."))) {
1074 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
1075 CI->getArgOperand(1),CI->getArgOperand(2),
1077 } else if (IsX86 && Name.startswith("xop.vpcom")) {
1078 Intrinsic::ID intID;
1079 if (Name.endswith("ub"))
1080 intID = Intrinsic::x86_xop_vpcomub;
1081 else if (Name.endswith("uw"))
1082 intID = Intrinsic::x86_xop_vpcomuw;
1083 else if (Name.endswith("ud"))
1084 intID = Intrinsic::x86_xop_vpcomud;
1085 else if (Name.endswith("uq"))
1086 intID = Intrinsic::x86_xop_vpcomuq;
1087 else if (Name.endswith("b"))
1088 intID = Intrinsic::x86_xop_vpcomb;
1089 else if (Name.endswith("w"))
1090 intID = Intrinsic::x86_xop_vpcomw;
1091 else if (Name.endswith("d"))
1092 intID = Intrinsic::x86_xop_vpcomd;
1093 else if (Name.endswith("q"))
1094 intID = Intrinsic::x86_xop_vpcomq;
1096 llvm_unreachable("Unknown suffix");
1098 Name = Name.substr(9); // strip off "xop.vpcom"
1100 if (Name.startswith("lt"))
1102 else if (Name.startswith("le"))
1104 else if (Name.startswith("gt"))
1106 else if (Name.startswith("ge"))
1108 else if (Name.startswith("eq"))
1110 else if (Name.startswith("ne"))
1112 else if (Name.startswith("false"))
1114 else if (Name.startswith("true"))
1117 llvm_unreachable("Unknown condition");
1119 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
1121 Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
1122 Builder.getInt8(Imm)});
1123 } else if (IsX86 && Name.startswith("xop.vpcmov")) {
1124 Value *Sel = CI->getArgOperand(2);
1125 Value *NotSel = Builder.CreateNot(Sel);
1126 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
1127 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
1128 Rep = Builder.CreateOr(Sel0, Sel1);
1129 } else if (IsX86 && Name == "sse42.crc32.64.8") {
1130 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
1131 Intrinsic::x86_sse42_crc32_32_8);
1132 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
1133 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
1134 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
1135 } else if (IsX86 && Name.startswith("avx.vbroadcast.s")) {
1136 // Replace broadcasts with a series of insertelements.
1137 Type *VecTy = CI->getType();
1138 Type *EltTy = VecTy->getVectorElementType();
1139 unsigned EltNum = VecTy->getVectorNumElements();
1140 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
1141 EltTy->getPointerTo());
1142 Value *Load = Builder.CreateLoad(EltTy, Cast);
1143 Type *I32Ty = Type::getInt32Ty(C);
1144 Rep = UndefValue::get(VecTy);
1145 for (unsigned I = 0; I < EltNum; ++I)
1146 Rep = Builder.CreateInsertElement(Rep, Load,
1147 ConstantInt::get(I32Ty, I));
1148 } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
1149 Name.startswith("sse41.pmovzx") ||
1150 Name.startswith("avx2.pmovsx") ||
1151 Name.startswith("avx2.pmovzx") ||
1152 Name.startswith("avx512.mask.pmovsx") ||
1153 Name.startswith("avx512.mask.pmovzx"))) {
1154 VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
1155 VectorType *DstTy = cast<VectorType>(CI->getType());
1156 unsigned NumDstElts = DstTy->getNumElements();
1158 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
1159 SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
1160 for (unsigned i = 0; i != NumDstElts; ++i)
1163 Value *SV = Builder.CreateShuffleVector(
1164 CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
1166 bool DoSext = (StringRef::npos != Name.find("pmovsx"));
1167 Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
1168 : Builder.CreateZExt(SV, DstTy);
1169 // If there are 3 arguments, it's a masked intrinsic so we need a select.
1170 if (CI->getNumArgOperands() == 3)
1171 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1172 CI->getArgOperand(1));
1173 } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
1174 Name == "avx2.vbroadcasti128")) {
1175 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
1176 Type *EltTy = CI->getType()->getVectorElementType();
1177 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
1178 Type *VT = VectorType::get(EltTy, NumSrcElts);
1179 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
1180 PointerType::getUnqual(VT));
1181 Value *Load = Builder.CreateAlignedLoad(Op, 1);
1182 if (NumSrcElts == 2)
1183 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
1186 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
1187 { 0, 1, 2, 3, 0, 1, 2, 3 });
1188 } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
1189 Name.startswith("avx2.vbroadcast") ||
1190 Name.startswith("avx512.pbroadcast") ||
1191 Name.startswith("avx512.mask.broadcast.s"))) {
1192 // Replace vp?broadcasts with a vector shuffle.
1193 Value *Op = CI->getArgOperand(0);
1194 unsigned NumElts = CI->getType()->getVectorNumElements();
1195 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
1196 Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
1197 Constant::getNullValue(MaskTy));
1199 if (CI->getNumArgOperands() == 3)
1200 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1201 CI->getArgOperand(1));
1202 } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
1203 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
1204 CI->getArgOperand(1),
1205 CI->getArgOperand(2),
1206 CI->getArgOperand(3),
1207 CI->getArgOperand(4),
1209 } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
1210 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
1211 CI->getArgOperand(1),
1212 CI->getArgOperand(2),
1213 CI->getArgOperand(3),
1214 CI->getArgOperand(4),
1216 } else if (IsX86 && (Name == "sse2.psll.dq" ||
1217 Name == "avx2.psll.dq")) {
1218 // 128/256-bit shift left specified in bits.
1219 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1220 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
1221 Shift / 8); // Shift is in bits.
1222 } else if (IsX86 && (Name == "sse2.psrl.dq" ||
1223 Name == "avx2.psrl.dq")) {
1224 // 128/256-bit shift right specified in bits.
1225 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1226 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
1227 Shift / 8); // Shift is in bits.
1228 } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
1229 Name == "avx2.psll.dq.bs" ||
1230 Name == "avx512.psll.dq.512")) {
1231 // 128/256/512-bit shift left specified in bytes.
1232 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1233 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
1234 } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
1235 Name == "avx2.psrl.dq.bs" ||
1236 Name == "avx512.psrl.dq.512")) {
1237 // 128/256/512-bit shift right specified in bytes.
1238 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1239 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
1240 } else if (IsX86 && (Name == "sse41.pblendw" ||
1241 Name.startswith("sse41.blendp") ||
1242 Name.startswith("avx.blend.p") ||
1243 Name == "avx2.pblendw" ||
1244 Name.startswith("avx2.pblendd."))) {
1245 Value *Op0 = CI->getArgOperand(0);
1246 Value *Op1 = CI->getArgOperand(1);
1247 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1248 VectorType *VecTy = cast<VectorType>(CI->getType());
1249 unsigned NumElts = VecTy->getNumElements();
1251 SmallVector<uint32_t, 16> Idxs(NumElts);
1252 for (unsigned i = 0; i != NumElts; ++i)
1253 Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
1255 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1256 } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
1257 Name == "avx2.vinserti128" ||
1258 Name.startswith("avx512.mask.insert"))) {
1259 Value *Op0 = CI->getArgOperand(0);
1260 Value *Op1 = CI->getArgOperand(1);
1261 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1262 unsigned DstNumElts = CI->getType()->getVectorNumElements();
1263 unsigned SrcNumElts = Op1->getType()->getVectorNumElements();
1264 unsigned Scale = DstNumElts / SrcNumElts;
1266 // Mask off the high bits of the immediate value; hardware ignores those.
1269 // Extend the second operand into a vector the size of the destination.
1270 Value *UndefV = UndefValue::get(Op1->getType());
1271 SmallVector<uint32_t, 8> Idxs(DstNumElts);
1272 for (unsigned i = 0; i != SrcNumElts; ++i)
1274 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
1275 Idxs[i] = SrcNumElts;
1276 Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);
1278 // Insert the second operand into the first operand.
1280 // Note that there is no guarantee that instruction lowering will actually
1281 // produce a vinsertf128 instruction for the created shuffles. In
1282 // particular, the 0 immediate case involves no lane changes, so it can
1283 // be handled as a blend.
1285 // Example of shuffle mask for 32-bit elements:
1286 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
1287 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
1289 // First fill with identify mask.
1290 for (unsigned i = 0; i != DstNumElts; ++i)
1292 // Then replace the elements where we need to insert.
1293 for (unsigned i = 0; i != SrcNumElts; ++i)
1294 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
1295 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
1297 // If the intrinsic has a mask operand, handle that.
1298 if (CI->getNumArgOperands() == 5)
1299 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
1300 CI->getArgOperand(3));
1301 } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
1302 Name == "avx2.vextracti128" ||
1303 Name.startswith("avx512.mask.vextract"))) {
1304 Value *Op0 = CI->getArgOperand(0);
1305 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1306 unsigned DstNumElts = CI->getType()->getVectorNumElements();
1307 unsigned SrcNumElts = Op0->getType()->getVectorNumElements();
1308 unsigned Scale = SrcNumElts / DstNumElts;
1310 // Mask off the high bits of the immediate value; hardware ignores those.
1313 // Get indexes for the subvector of the input vector.
1314 SmallVector<uint32_t, 8> Idxs(DstNumElts);
1315 for (unsigned i = 0; i != DstNumElts; ++i) {
1316 Idxs[i] = i + (Imm * DstNumElts);
1318 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1320 // If the intrinsic has a mask operand, handle that.
1321 if (CI->getNumArgOperands() == 4)
1322 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1323 CI->getArgOperand(2));
1324 } else if (!IsX86 && Name == "stackprotectorcheck") {
1326 } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
1327 Name.startswith("avx512.mask.perm.di."))) {
1328 Value *Op0 = CI->getArgOperand(0);
1329 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1330 VectorType *VecTy = cast<VectorType>(CI->getType());
1331 unsigned NumElts = VecTy->getNumElements();
1333 SmallVector<uint32_t, 8> Idxs(NumElts);
1334 for (unsigned i = 0; i != NumElts; ++i)
1335 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
1337 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1339 if (CI->getNumArgOperands() == 4)
1340 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1341 CI->getArgOperand(2));
1342 } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
1343 Name == "sse2.pshuf.d" ||
1344 Name.startswith("avx512.mask.vpermil.p") ||
1345 Name.startswith("avx512.mask.pshuf.d."))) {
1346 Value *Op0 = CI->getArgOperand(0);
1347 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1348 VectorType *VecTy = cast<VectorType>(CI->getType());
1349 unsigned NumElts = VecTy->getNumElements();
1350 // Calculate the size of each index in the immediate.
1351 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
1352 unsigned IdxMask = ((1 << IdxSize) - 1);
1354 SmallVector<uint32_t, 8> Idxs(NumElts);
1355 // Lookup the bits for this element, wrapping around the immediate every
1356 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
1357 // to offset by the first index of each group.
1358 for (unsigned i = 0; i != NumElts; ++i)
1359 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
1361 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1363 if (CI->getNumArgOperands() == 4)
1364 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1365 CI->getArgOperand(2));
1366 } else if (IsX86 && (Name == "sse2.pshufl.w" ||
1367 Name.startswith("avx512.mask.pshufl.w."))) {
1368 Value *Op0 = CI->getArgOperand(0);
1369 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1370 unsigned NumElts = CI->getType()->getVectorNumElements();
1372 SmallVector<uint32_t, 16> Idxs(NumElts);
1373 for (unsigned l = 0; l != NumElts; l += 8) {
1374 for (unsigned i = 0; i != 4; ++i)
1375 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
1376 for (unsigned i = 4; i != 8; ++i)
1377 Idxs[i + l] = i + l;
1380 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1382 if (CI->getNumArgOperands() == 4)
1383 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1384 CI->getArgOperand(2));
1385 } else if (IsX86 && (Name == "sse2.pshufh.w" ||
1386 Name.startswith("avx512.mask.pshufh.w."))) {
1387 Value *Op0 = CI->getArgOperand(0);
1388 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1389 unsigned NumElts = CI->getType()->getVectorNumElements();
1391 SmallVector<uint32_t, 16> Idxs(NumElts);
1392 for (unsigned l = 0; l != NumElts; l += 8) {
1393 for (unsigned i = 0; i != 4; ++i)
1394 Idxs[i + l] = i + l;
1395 for (unsigned i = 0; i != 4; ++i)
1396 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
1399 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1401 if (CI->getNumArgOperands() == 4)
1402 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1403 CI->getArgOperand(2));
1404 } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
1405 Value *Op0 = CI->getArgOperand(0);
1406 Value *Op1 = CI->getArgOperand(1);
1407 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1408 unsigned NumElts = CI->getType()->getVectorNumElements();
1410 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1411 unsigned HalfLaneElts = NumLaneElts / 2;
1413 SmallVector<uint32_t, 16> Idxs(NumElts);
1414 for (unsigned i = 0; i != NumElts; ++i) {
1415 // Base index is the starting element of the lane.
1416 Idxs[i] = i - (i % NumLaneElts);
1417 // If we are half way through the lane switch to the other source.
1418 if ((i % NumLaneElts) >= HalfLaneElts)
1420 // Now select the specific element. By adding HalfLaneElts bits from
1421 // the immediate. Wrapping around the immediate every 8-bits.
1422 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
1425 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1427 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
1428 CI->getArgOperand(3));
1429 } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
1430 Name.startswith("avx512.mask.movshdup") ||
1431 Name.startswith("avx512.mask.movsldup"))) {
1432 Value *Op0 = CI->getArgOperand(0);
1433 unsigned NumElts = CI->getType()->getVectorNumElements();
1434 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1436 unsigned Offset = 0;
1437 if (Name.startswith("avx512.mask.movshdup."))
1440 SmallVector<uint32_t, 16> Idxs(NumElts);
1441 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
1442 for (unsigned i = 0; i != NumLaneElts; i += 2) {
1443 Idxs[i + l + 0] = i + l + Offset;
1444 Idxs[i + l + 1] = i + l + Offset;
1447 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1449 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1450 CI->getArgOperand(1));
1451 } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
1452 Name.startswith("avx512.mask.unpckl."))) {
1453 Value *Op0 = CI->getArgOperand(0);
1454 Value *Op1 = CI->getArgOperand(1);
1455 int NumElts = CI->getType()->getVectorNumElements();
1456 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1458 SmallVector<uint32_t, 64> Idxs(NumElts);
1459 for (int l = 0; l != NumElts; l += NumLaneElts)
1460 for (int i = 0; i != NumLaneElts; ++i)
1461 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
1463 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1465 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1466 CI->getArgOperand(2));
1467 } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
1468 Name.startswith("avx512.mask.unpckh."))) {
1469 Value *Op0 = CI->getArgOperand(0);
1470 Value *Op1 = CI->getArgOperand(1);
1471 int NumElts = CI->getType()->getVectorNumElements();
1472 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1474 SmallVector<uint32_t, 64> Idxs(NumElts);
1475 for (int l = 0; l != NumElts; l += NumLaneElts)
1476 for (int i = 0; i != NumLaneElts; ++i)
1477 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
1479 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1481 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1482 CI->getArgOperand(2));
1483 } else if (IsX86 && Name.startswith("avx512.mask.pand.")) {
1484 Rep = Builder.CreateAnd(CI->getArgOperand(0), CI->getArgOperand(1));
1485 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1486 CI->getArgOperand(2));
1487 } else if (IsX86 && Name.startswith("avx512.mask.pandn.")) {
1488 Rep = Builder.CreateAnd(Builder.CreateNot(CI->getArgOperand(0)),
1489 CI->getArgOperand(1));
1490 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1491 CI->getArgOperand(2));
1492 } else if (IsX86 && Name.startswith("avx512.mask.por.")) {
1493 Rep = Builder.CreateOr(CI->getArgOperand(0), CI->getArgOperand(1));
1494 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1495 CI->getArgOperand(2));
1496 } else if (IsX86 && Name.startswith("avx512.mask.pxor.")) {
1497 Rep = Builder.CreateXor(CI->getArgOperand(0), CI->getArgOperand(1));
1498 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1499 CI->getArgOperand(2));
1500 } else if (IsX86 && Name.startswith("avx512.mask.and.")) {
1501 VectorType *FTy = cast<VectorType>(CI->getType());
1502 VectorType *ITy = VectorType::getInteger(FTy);
1503 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
1504 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1505 Rep = Builder.CreateBitCast(Rep, FTy);
1506 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1507 CI->getArgOperand(2));
1508 } else if (IsX86 && Name.startswith("avx512.mask.andn.")) {
1509 VectorType *FTy = cast<VectorType>(CI->getType());
1510 VectorType *ITy = VectorType::getInteger(FTy);
1511 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
1512 Rep = Builder.CreateAnd(Rep,
1513 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1514 Rep = Builder.CreateBitCast(Rep, FTy);
1515 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1516 CI->getArgOperand(2));
1517 } else if (IsX86 && Name.startswith("avx512.mask.or.")) {
1518 VectorType *FTy = cast<VectorType>(CI->getType());
1519 VectorType *ITy = VectorType::getInteger(FTy);
1520 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
1521 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1522 Rep = Builder.CreateBitCast(Rep, FTy);
1523 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1524 CI->getArgOperand(2));
1525 } else if (IsX86 && Name.startswith("avx512.mask.xor.")) {
1526 VectorType *FTy = cast<VectorType>(CI->getType());
1527 VectorType *ITy = VectorType::getInteger(FTy);
1528 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
1529 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1530 Rep = Builder.CreateBitCast(Rep, FTy);
1531 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1532 CI->getArgOperand(2));
1533 } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
1534 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
1535 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1536 CI->getArgOperand(2));
1537 } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
1538 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
1539 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1540 CI->getArgOperand(2));
1541 } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
1542 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
1543 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1544 CI->getArgOperand(2));
1545 } else if (IsX86 && (Name.startswith("avx512.mask.add.p"))) {
1546 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
1547 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1548 CI->getArgOperand(2));
1549 } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
1550 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
1551 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1552 CI->getArgOperand(2));
1553 } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
1554 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
1555 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1556 CI->getArgOperand(2));
1557 } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
1558 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
1559 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1560 CI->getArgOperand(2));
1561 } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
1562 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1565 { CI->getArgOperand(0), Builder.getInt1(false) });
1566 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1567 CI->getArgOperand(1));
1568 } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
1569 Name.startswith("avx512.mask.min.p"))) {
1570 bool IsMin = Name[13] == 'i';
1571 VectorType *VecTy = cast<VectorType>(CI->getType());
1572 unsigned VecWidth = VecTy->getPrimitiveSizeInBits();
1573 unsigned EltWidth = VecTy->getScalarSizeInBits();
1575 if (!IsMin && VecWidth == 128 && EltWidth == 32)
1576 IID = Intrinsic::x86_sse_max_ps;
1577 else if (!IsMin && VecWidth == 128 && EltWidth == 64)
1578 IID = Intrinsic::x86_sse2_max_pd;
1579 else if (!IsMin && VecWidth == 256 && EltWidth == 32)
1580 IID = Intrinsic::x86_avx_max_ps_256;
1581 else if (!IsMin && VecWidth == 256 && EltWidth == 64)
1582 IID = Intrinsic::x86_avx_max_pd_256;
1583 else if (IsMin && VecWidth == 128 && EltWidth == 32)
1584 IID = Intrinsic::x86_sse_min_ps;
1585 else if (IsMin && VecWidth == 128 && EltWidth == 64)
1586 IID = Intrinsic::x86_sse2_min_pd;
1587 else if (IsMin && VecWidth == 256 && EltWidth == 32)
1588 IID = Intrinsic::x86_avx_min_ps_256;
1589 else if (IsMin && VecWidth == 256 && EltWidth == 64)
1590 IID = Intrinsic::x86_avx_min_pd_256;
1592 llvm_unreachable("Unexpected intrinsic");
1594 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1595 { CI->getArgOperand(0), CI->getArgOperand(1) });
1596 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1597 CI->getArgOperand(2));
1598 } else if (IsX86 && Name.startswith("avx512.mask.pshuf.b.")) {
1599 VectorType *VecTy = cast<VectorType>(CI->getType());
1601 if (VecTy->getPrimitiveSizeInBits() == 128)
1602 IID = Intrinsic::x86_ssse3_pshuf_b_128;
1603 else if (VecTy->getPrimitiveSizeInBits() == 256)
1604 IID = Intrinsic::x86_avx2_pshuf_b;
1605 else if (VecTy->getPrimitiveSizeInBits() == 512)
1606 IID = Intrinsic::x86_avx512_pshuf_b_512;
1608 llvm_unreachable("Unexpected intrinsic");
1610 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1611 { CI->getArgOperand(0), CI->getArgOperand(1) });
1612 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1613 CI->getArgOperand(2));
1614 } else if (IsX86 && (Name.startswith("avx512.mask.pmul.dq.") ||
1615 Name.startswith("avx512.mask.pmulu.dq."))) {
1616 bool IsUnsigned = Name[16] == 'u';
1617 VectorType *VecTy = cast<VectorType>(CI->getType());
1619 if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 128)
1620 IID = Intrinsic::x86_sse41_pmuldq;
1621 else if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 256)
1622 IID = Intrinsic::x86_avx2_pmul_dq;
1623 else if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 512)
1624 IID = Intrinsic::x86_avx512_pmul_dq_512;
1625 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 128)
1626 IID = Intrinsic::x86_sse2_pmulu_dq;
1627 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 256)
1628 IID = Intrinsic::x86_avx2_pmulu_dq;
1629 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 512)
1630 IID = Intrinsic::x86_avx512_pmulu_dq_512;
1632 llvm_unreachable("Unexpected intrinsic");
1634 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1635 { CI->getArgOperand(0), CI->getArgOperand(1) });
1636 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1637 CI->getArgOperand(2));
1638 } else if (IsX86 && Name.startswith("avx512.mask.pack")) {
1639 bool IsUnsigned = Name[16] == 'u';
1640 bool IsDW = Name[18] == 'd';
1641 VectorType *VecTy = cast<VectorType>(CI->getType());
1643 if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 128)
1644 IID = Intrinsic::x86_sse2_packsswb_128;
1645 else if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 256)
1646 IID = Intrinsic::x86_avx2_packsswb;
1647 else if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 512)
1648 IID = Intrinsic::x86_avx512_packsswb_512;
1649 else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 128)
1650 IID = Intrinsic::x86_sse2_packssdw_128;
1651 else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 256)
1652 IID = Intrinsic::x86_avx2_packssdw;
1653 else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 512)
1654 IID = Intrinsic::x86_avx512_packssdw_512;
1655 else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 128)
1656 IID = Intrinsic::x86_sse2_packuswb_128;
1657 else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 256)
1658 IID = Intrinsic::x86_avx2_packuswb;
1659 else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 512)
1660 IID = Intrinsic::x86_avx512_packuswb_512;
1661 else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 128)
1662 IID = Intrinsic::x86_sse41_packusdw;
1663 else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 256)
1664 IID = Intrinsic::x86_avx2_packusdw;
1665 else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 512)
1666 IID = Intrinsic::x86_avx512_packusdw_512;
1668 llvm_unreachable("Unexpected intrinsic");
1670 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1671 { CI->getArgOperand(0), CI->getArgOperand(1) });
1672 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1673 CI->getArgOperand(2));
1674 } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
1675 bool IsImmediate = Name[16] == 'i' ||
1676 (Name.size() > 18 && Name[18] == 'i');
1677 bool IsVariable = Name[16] == 'v';
1678 char Size = Name[16] == '.' ? Name[17] :
1679 Name[17] == '.' ? Name[18] :
1680 Name[18] == '.' ? Name[19] :
1684 if (IsVariable && Name[17] != '.') {
1685 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
1686 IID = Intrinsic::x86_avx2_psllv_q;
1687 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
1688 IID = Intrinsic::x86_avx2_psllv_q_256;
1689 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
1690 IID = Intrinsic::x86_avx2_psllv_d;
1691 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
1692 IID = Intrinsic::x86_avx2_psllv_d_256;
1693 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
1694 IID = Intrinsic::x86_avx512_psllv_w_128;
1695 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
1696 IID = Intrinsic::x86_avx512_psllv_w_256;
1697 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
1698 IID = Intrinsic::x86_avx512_psllv_w_512;
1700 llvm_unreachable("Unexpected size");
1701 } else if (Name.endswith(".128")) {
1702 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
1703 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
1704 : Intrinsic::x86_sse2_psll_d;
1705 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
1706 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
1707 : Intrinsic::x86_sse2_psll_q;
1708 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
1709 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
1710 : Intrinsic::x86_sse2_psll_w;
1712 llvm_unreachable("Unexpected size");
1713 } else if (Name.endswith(".256")) {
1714 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
1715 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
1716 : Intrinsic::x86_avx2_psll_d;
1717 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
1718 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
1719 : Intrinsic::x86_avx2_psll_q;
1720 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
1721 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
1722 : Intrinsic::x86_avx2_psll_w;
1724 llvm_unreachable("Unexpected size");
1726 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
1727 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
1728 IsVariable ? Intrinsic::x86_avx512_psllv_d_512 :
1729 Intrinsic::x86_avx512_psll_d_512;
1730 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
1731 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
1732 IsVariable ? Intrinsic::x86_avx512_psllv_q_512 :
1733 Intrinsic::x86_avx512_psll_q_512;
1734 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
1735 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
1736 : Intrinsic::x86_avx512_psll_w_512;
1738 llvm_unreachable("Unexpected size");
1741 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
1742 } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
1743 bool IsImmediate = Name[16] == 'i' ||
1744 (Name.size() > 18 && Name[18] == 'i');
1745 bool IsVariable = Name[16] == 'v';
1746 char Size = Name[16] == '.' ? Name[17] :
1747 Name[17] == '.' ? Name[18] :
1748 Name[18] == '.' ? Name[19] :
1752 if (IsVariable && Name[17] != '.') {
1753 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
1754 IID = Intrinsic::x86_avx2_psrlv_q;
1755 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
1756 IID = Intrinsic::x86_avx2_psrlv_q_256;
1757 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
1758 IID = Intrinsic::x86_avx2_psrlv_d;
1759 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
1760 IID = Intrinsic::x86_avx2_psrlv_d_256;
1761 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
1762 IID = Intrinsic::x86_avx512_psrlv_w_128;
1763 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
1764 IID = Intrinsic::x86_avx512_psrlv_w_256;
1765 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
1766 IID = Intrinsic::x86_avx512_psrlv_w_512;
1768 llvm_unreachable("Unexpected size");
1769 } else if (Name.endswith(".128")) {
1770 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
1771 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
1772 : Intrinsic::x86_sse2_psrl_d;
1773 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
1774 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
1775 : Intrinsic::x86_sse2_psrl_q;
1776 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
1777 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
1778 : Intrinsic::x86_sse2_psrl_w;
1780 llvm_unreachable("Unexpected size");
1781 } else if (Name.endswith(".256")) {
1782 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
1783 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
1784 : Intrinsic::x86_avx2_psrl_d;
1785 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
1786 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
1787 : Intrinsic::x86_avx2_psrl_q;
1788 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
1789 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
1790 : Intrinsic::x86_avx2_psrl_w;
1792 llvm_unreachable("Unexpected size");
1794 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
1795 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
1796 IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 :
1797 Intrinsic::x86_avx512_psrl_d_512;
1798 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
1799 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
1800 IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 :
1801 Intrinsic::x86_avx512_psrl_q_512;
1802 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
1803 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
1804 : Intrinsic::x86_avx512_psrl_w_512;
1806 llvm_unreachable("Unexpected size");
1809 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
1810 } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
1811 bool IsImmediate = Name[16] == 'i' ||
1812 (Name.size() > 18 && Name[18] == 'i');
1813 bool IsVariable = Name[16] == 'v';
1814 char Size = Name[16] == '.' ? Name[17] :
1815 Name[17] == '.' ? Name[18] :
1816 Name[18] == '.' ? Name[19] :
1820 if (IsVariable && Name[17] != '.') {
1821 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
1822 IID = Intrinsic::x86_avx2_psrav_d;
1823 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
1824 IID = Intrinsic::x86_avx2_psrav_d_256;
1825 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
1826 IID = Intrinsic::x86_avx512_psrav_w_128;
1827 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
1828 IID = Intrinsic::x86_avx512_psrav_w_256;
1829 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
1830 IID = Intrinsic::x86_avx512_psrav_w_512;
1832 llvm_unreachable("Unexpected size");
1833 } else if (Name.endswith(".128")) {
1834 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
1835 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
1836 : Intrinsic::x86_sse2_psra_d;
1837 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
1838 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
1839 IsVariable ? Intrinsic::x86_avx512_psrav_q_128 :
1840 Intrinsic::x86_avx512_psra_q_128;
1841 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
1842 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
1843 : Intrinsic::x86_sse2_psra_w;
1845 llvm_unreachable("Unexpected size");
1846 } else if (Name.endswith(".256")) {
1847 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
1848 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
1849 : Intrinsic::x86_avx2_psra_d;
1850 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
1851 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
1852 IsVariable ? Intrinsic::x86_avx512_psrav_q_256 :
1853 Intrinsic::x86_avx512_psra_q_256;
1854 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
1855 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
1856 : Intrinsic::x86_avx2_psra_w;
1858 llvm_unreachable("Unexpected size");
1860 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
1861 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
1862 IsVariable ? Intrinsic::x86_avx512_psrav_d_512 :
1863 Intrinsic::x86_avx512_psra_d_512;
1864 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
1865 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
1866 IsVariable ? Intrinsic::x86_avx512_psrav_q_512 :
1867 Intrinsic::x86_avx512_psra_q_512;
1868 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
1869 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
1870 : Intrinsic::x86_avx512_psra_w_512;
1872 llvm_unreachable("Unexpected size");
1875 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
1876 } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
1877 Rep = upgradeMaskedMove(Builder, *CI);
1878 } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
1879 Rep = UpgradeMaskToInt(Builder, *CI);
1880 } else if (IsX86 && Name.startswith("avx512.mask.vpermilvar.")) {
1882 if (Name.endswith("ps.128"))
1883 IID = Intrinsic::x86_avx_vpermilvar_ps;
1884 else if (Name.endswith("pd.128"))
1885 IID = Intrinsic::x86_avx_vpermilvar_pd;
1886 else if (Name.endswith("ps.256"))
1887 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
1888 else if (Name.endswith("pd.256"))
1889 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
1890 else if (Name.endswith("ps.512"))
1891 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
1892 else if (Name.endswith("pd.512"))
1893 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
1895 llvm_unreachable("Unexpected vpermilvar intrinsic");
1897 Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID);
1898 Rep = Builder.CreateCall(Intrin,
1899 { CI->getArgOperand(0), CI->getArgOperand(1) });
1900 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1901 CI->getArgOperand(2));
1902 } else if (IsX86 && Name.endswith(".movntdqa")) {
1903 Module *M = F->getParent();
1904 MDNode *Node = MDNode::get(
1905 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1907 Value *Ptr = CI->getArgOperand(0);
1908 VectorType *VTy = cast<VectorType>(CI->getType());
1910 // Convert the type of the pointer to a pointer to the stored type.
1912 Builder.CreateBitCast(Ptr, PointerType::getUnqual(VTy), "cast");
1913 LoadInst *LI = Builder.CreateAlignedLoad(BC, VTy->getBitWidth() / 8);
1914 LI->setMetadata(M->getMDKindID("nontemporal"), Node);
1916 } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
1917 Value *Arg = CI->getArgOperand(0);
1918 Value *Neg = Builder.CreateNeg(Arg, "neg");
1919 Value *Cmp = Builder.CreateICmpSGE(
1920 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
1921 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
1922 } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
1923 Name == "max.ui" || Name == "max.ull")) {
1924 Value *Arg0 = CI->getArgOperand(0);
1925 Value *Arg1 = CI->getArgOperand(1);
1926 Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
1927 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
1928 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
1929 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
1930 } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
1931 Name == "min.ui" || Name == "min.ull")) {
1932 Value *Arg0 = CI->getArgOperand(0);
1933 Value *Arg1 = CI->getArgOperand(1);
1934 Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
1935 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
1936 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
1937 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
1938 } else if (IsNVVM && Name == "clz.ll") {
1939 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
1940 Value *Arg = CI->getArgOperand(0);
1941 Value *Ctlz = Builder.CreateCall(
1942 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
1944 {Arg, Builder.getFalse()}, "ctlz");
1945 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
1946 } else if (IsNVVM && Name == "popc.ll") {
1947 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
1949 Value *Arg = CI->getArgOperand(0);
1950 Value *Popc = Builder.CreateCall(
1951 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
1954 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
1955 } else if (IsNVVM && Name == "h2f") {
1956 Rep = Builder.CreateCall(Intrinsic::getDeclaration(
1957 F->getParent(), Intrinsic::convert_from_fp16,
1958 {Builder.getFloatTy()}),
1959 CI->getArgOperand(0), "h2f");
1961 llvm_unreachable("Unknown function for CallInst upgrade.");
1965 CI->replaceAllUsesWith(Rep);
1966 CI->eraseFromParent();
1970 CallInst *NewCall = nullptr;
1971 switch (NewFn->getIntrinsicID()) {
1973 // Handle generic mangling change, but nothing else
1975 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
1976 "Unknown function for CallInst upgrade and isn't just a name change");
1977 CI->setCalledFunction(NewFn);
1981 case Intrinsic::arm_neon_vld1:
1982 case Intrinsic::arm_neon_vld2:
1983 case Intrinsic::arm_neon_vld3:
1984 case Intrinsic::arm_neon_vld4:
1985 case Intrinsic::arm_neon_vld2lane:
1986 case Intrinsic::arm_neon_vld3lane:
1987 case Intrinsic::arm_neon_vld4lane:
1988 case Intrinsic::arm_neon_vst1:
1989 case Intrinsic::arm_neon_vst2:
1990 case Intrinsic::arm_neon_vst3:
1991 case Intrinsic::arm_neon_vst4:
1992 case Intrinsic::arm_neon_vst2lane:
1993 case Intrinsic::arm_neon_vst3lane:
1994 case Intrinsic::arm_neon_vst4lane: {
1995 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
1996 CI->arg_operands().end());
1997 NewCall = Builder.CreateCall(NewFn, Args);
2001 case Intrinsic::bitreverse:
2002 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
2005 case Intrinsic::ctlz:
2006 case Intrinsic::cttz:
2007 assert(CI->getNumArgOperands() == 1 &&
2008 "Mismatch between function args and call args");
2010 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
2013 case Intrinsic::objectsize: {
2014 Value *NullIsUnknownSize = CI->getNumArgOperands() == 2
2015 ? Builder.getFalse()
2016 : CI->getArgOperand(2);
2017 NewCall = Builder.CreateCall(
2018 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize});
2022 case Intrinsic::ctpop:
2023 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
2026 case Intrinsic::convert_from_fp16:
2027 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
2030 case Intrinsic::x86_xop_vfrcz_ss:
2031 case Intrinsic::x86_xop_vfrcz_sd:
2032 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
2035 case Intrinsic::x86_xop_vpermil2pd:
2036 case Intrinsic::x86_xop_vpermil2ps:
2037 case Intrinsic::x86_xop_vpermil2pd_256:
2038 case Intrinsic::x86_xop_vpermil2ps_256: {
2039 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
2040 CI->arg_operands().end());
2041 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
2042 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
2043 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
2044 NewCall = Builder.CreateCall(NewFn, Args);
2048 case Intrinsic::x86_sse41_ptestc:
2049 case Intrinsic::x86_sse41_ptestz:
2050 case Intrinsic::x86_sse41_ptestnzc: {
2051 // The arguments for these intrinsics used to be v4f32, and changed
2052 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
2053 // So, the only thing required is a bitcast for both arguments.
2054 // First, check the arguments have the old type.
2055 Value *Arg0 = CI->getArgOperand(0);
2056 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
2059 // Old intrinsic, add bitcasts
2060 Value *Arg1 = CI->getArgOperand(1);
2062 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
2064 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
2065 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2067 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
2071 case Intrinsic::x86_sse41_insertps:
2072 case Intrinsic::x86_sse41_dppd:
2073 case Intrinsic::x86_sse41_dpps:
2074 case Intrinsic::x86_sse41_mpsadbw:
2075 case Intrinsic::x86_avx_dp_ps_256:
2076 case Intrinsic::x86_avx2_mpsadbw: {
2077 // Need to truncate the last argument from i32 to i8 -- this argument models
2078 // an inherently 8-bit immediate operand to these x86 instructions.
2079 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
2080 CI->arg_operands().end());
2082 // Replace the last argument with a trunc.
2083 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
2084 NewCall = Builder.CreateCall(NewFn, Args);
2088 case Intrinsic::thread_pointer: {
2089 NewCall = Builder.CreateCall(NewFn, {});
2093 case Intrinsic::invariant_start:
2094 case Intrinsic::invariant_end:
2095 case Intrinsic::masked_load:
2096 case Intrinsic::masked_store:
2097 case Intrinsic::masked_gather:
2098 case Intrinsic::masked_scatter: {
2099 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
2100 CI->arg_operands().end());
2101 NewCall = Builder.CreateCall(NewFn, Args);
2105 assert(NewCall && "Should have either set this variable or returned through "
2106 "the default case");
2107 std::string Name = CI->getName();
2108 if (!Name.empty()) {
2109 CI->setName(Name + ".old");
2110 NewCall->setName(Name);
2112 CI->replaceAllUsesWith(NewCall);
2113 CI->eraseFromParent();
2116 void llvm::UpgradeCallsToIntrinsic(Function *F) {
2117 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
2119 // Check if this function should be upgraded and get the replacement function
2122 if (UpgradeIntrinsicFunction(F, NewFn)) {
2123 // Replace all users of the old function with the new function or new
2124 // instructions. This is not a range loop because the call is deleted.
2125 for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; )
2126 if (CallInst *CI = dyn_cast<CallInst>(*UI++))
2127 UpgradeIntrinsicCall(CI, NewFn);
2129 // Remove old function, no longer used, from the module.
2130 F->eraseFromParent();
2134 MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
2135 // Check if the tag uses struct-path aware TBAA format.
2136 if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
2139 auto &Context = MD.getContext();
2140 if (MD.getNumOperands() == 3) {
2141 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
2142 MDNode *ScalarType = MDNode::get(Context, Elts);
2143 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
2144 Metadata *Elts2[] = {ScalarType, ScalarType,
2145 ConstantAsMetadata::get(
2146 Constant::getNullValue(Type::getInt64Ty(Context))),
2148 return MDNode::get(Context, Elts2);
2150 // Create a MDNode <MD, MD, offset 0>
2151 Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
2152 Type::getInt64Ty(Context)))};
2153 return MDNode::get(Context, Elts);
2156 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
2157 Instruction *&Temp) {
2158 if (Opc != Instruction::BitCast)
2162 Type *SrcTy = V->getType();
2163 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
2164 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
2165 LLVMContext &Context = V->getContext();
2167 // We have no information about target data layout, so we assume that
2168 // the maximum pointer size is 64bit.
2169 Type *MidTy = Type::getInt64Ty(Context);
2170 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
2172 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
2178 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
2179 if (Opc != Instruction::BitCast)
2182 Type *SrcTy = C->getType();
2183 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
2184 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
2185 LLVMContext &Context = C->getContext();
2187 // We have no information about target data layout, so we assume that
2188 // the maximum pointer size is 64bit.
2189 Type *MidTy = Type::getInt64Ty(Context);
2191 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
2198 /// Check the debug info version number, if it is out-dated, drop the debug
2199 /// info. Return true if module is modified.
2200 bool llvm::UpgradeDebugInfo(Module &M) {
2201 unsigned Version = getDebugMetadataVersionFromModule(M);
2202 if (Version == DEBUG_METADATA_VERSION)
2205 bool RetCode = StripDebugInfo(M);
2207 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
2208 M.getContext().diagnose(DiagVersion);
2213 bool llvm::UpgradeModuleFlags(Module &M) {
2214 const NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
2218 bool HasObjCFlag = false, HasClassProperties = false;
2219 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
2220 MDNode *Op = ModFlags->getOperand(I);
2221 if (Op->getNumOperands() < 2)
2223 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
2226 if (ID->getString() == "Objective-C Image Info Version")
2228 if (ID->getString() == "Objective-C Class Properties")
2229 HasClassProperties = true;
2231 // "Objective-C Class Properties" is recently added for Objective-C. We
2232 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
2233 // flag of value 0, so we can correclty downgrade this flag when trying to
2234 // link an ObjC bitcode without this module flag with an ObjC bitcode with
2235 // this module flag.
2236 if (HasObjCFlag && !HasClassProperties) {
2237 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
2244 static bool isOldLoopArgument(Metadata *MD) {
2245 auto *T = dyn_cast_or_null<MDTuple>(MD);
2248 if (T->getNumOperands() < 1)
2250 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
2253 return S->getString().startswith("llvm.vectorizer.");
2256 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
2257 StringRef OldPrefix = "llvm.vectorizer.";
2258 assert(OldTag.startswith(OldPrefix) && "Expected old prefix");
2260 if (OldTag == "llvm.vectorizer.unroll")
2261 return MDString::get(C, "llvm.loop.interleave.count");
2263 return MDString::get(
2264 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
2268 static Metadata *upgradeLoopArgument(Metadata *MD) {
2269 auto *T = dyn_cast_or_null<MDTuple>(MD);
2272 if (T->getNumOperands() < 1)
2274 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
2277 if (!OldTag->getString().startswith("llvm.vectorizer."))
2280 // This has an old tag. Upgrade it.
2281 SmallVector<Metadata *, 8> Ops;
2282 Ops.reserve(T->getNumOperands());
2283 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
2284 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
2285 Ops.push_back(T->getOperand(I));
2287 return MDTuple::get(T->getContext(), Ops);
2290 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
2291 auto *T = dyn_cast<MDTuple>(&N);
2295 if (none_of(T->operands(), isOldLoopArgument))
2298 SmallVector<Metadata *, 8> Ops;
2299 Ops.reserve(T->getNumOperands());
2300 for (Metadata *MD : T->operands())
2301 Ops.push_back(upgradeLoopArgument(MD));
2303 return MDTuple::get(T->getContext(), Ops);