1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the auto-upgrade helper functions.
11 // This is where deprecated IR intrinsics and other IR features are updated to
12 // current specifications.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/IR/AutoUpgrade.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/IR/CFG.h"
19 #include "llvm/IR/CallSite.h"
20 #include "llvm/IR/Constants.h"
21 #include "llvm/IR/DIBuilder.h"
22 #include "llvm/IR/DebugInfo.h"
23 #include "llvm/IR/DiagnosticInfo.h"
24 #include "llvm/IR/Function.h"
25 #include "llvm/IR/IRBuilder.h"
26 #include "llvm/IR/Instruction.h"
27 #include "llvm/IR/IntrinsicInst.h"
28 #include "llvm/IR/LLVMContext.h"
29 #include "llvm/IR/Module.h"
30 #include "llvm/Support/ErrorHandling.h"
31 #include "llvm/Support/Regex.h"
35 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
37 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
38 // changed their type from v4f32 to v2i64.
39 static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
41 // Check whether this is an old version of the function, which received
43 Type *Arg0Type = F->getFunctionType()->getParamType(0);
44 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
47 // Yes, it's old, replace it with new version.
49 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
53 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
54 // arguments have changed their type from i32 to i8.
55 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
57 // Check that the last argument is an i32.
58 Type *LastArgType = F->getFunctionType()->getParamType(
59 F->getFunctionType()->getNumParams() - 1);
60 if (!LastArgType->isIntegerTy(32))
63 // Move this function aside and map down.
65 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
69 static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
70 // All of the intrinsics matches below should be marked with which llvm
71 // version started autoupgrading them. At some point in the future we would
72 // like to use this information to remove upgrade code for some older
73 // intrinsics. It is currently undecided how we will determine that future
75 if (Name.startswith("sse2.pcmpeq.") || // Added in 3.1
76 Name.startswith("sse2.pcmpgt.") || // Added in 3.1
77 Name.startswith("avx2.pcmpeq.") || // Added in 3.1
78 Name.startswith("avx2.pcmpgt.") || // Added in 3.1
79 Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
80 Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
81 Name == "sse.add.ss" || // Added in 4.0
82 Name == "sse2.add.sd" || // Added in 4.0
83 Name == "sse.sub.ss" || // Added in 4.0
84 Name == "sse2.sub.sd" || // Added in 4.0
85 Name == "sse.mul.ss" || // Added in 4.0
86 Name == "sse2.mul.sd" || // Added in 4.0
87 Name == "sse.div.ss" || // Added in 4.0
88 Name == "sse2.div.sd" || // Added in 4.0
89 Name == "sse41.pmaxsb" || // Added in 3.9
90 Name == "sse2.pmaxs.w" || // Added in 3.9
91 Name == "sse41.pmaxsd" || // Added in 3.9
92 Name == "sse2.pmaxu.b" || // Added in 3.9
93 Name == "sse41.pmaxuw" || // Added in 3.9
94 Name == "sse41.pmaxud" || // Added in 3.9
95 Name == "sse41.pminsb" || // Added in 3.9
96 Name == "sse2.pmins.w" || // Added in 3.9
97 Name == "sse41.pminsd" || // Added in 3.9
98 Name == "sse2.pminu.b" || // Added in 3.9
99 Name == "sse41.pminuw" || // Added in 3.9
100 Name == "sse41.pminud" || // Added in 3.9
101 Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
102 Name.startswith("avx2.pmax") || // Added in 3.9
103 Name.startswith("avx2.pmin") || // Added in 3.9
104 Name.startswith("avx512.mask.pmax") || // Added in 4.0
105 Name.startswith("avx512.mask.pmin") || // Added in 4.0
106 Name.startswith("avx2.vbroadcast") || // Added in 3.8
107 Name.startswith("avx2.pbroadcast") || // Added in 3.8
108 Name.startswith("avx.vpermil.") || // Added in 3.1
109 Name.startswith("sse2.pshuf") || // Added in 3.9
110 Name.startswith("avx512.pbroadcast") || // Added in 3.9
111 Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
112 Name.startswith("avx512.mask.movddup") || // Added in 3.9
113 Name.startswith("avx512.mask.movshdup") || // Added in 3.9
114 Name.startswith("avx512.mask.movsldup") || // Added in 3.9
115 Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
116 Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
117 Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
118 Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
119 Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
120 Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
121 Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
122 Name.startswith("avx512.mask.punpckl") || // Added in 3.9
123 Name.startswith("avx512.mask.punpckh") || // Added in 3.9
124 Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
125 Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
126 Name.startswith("avx512.mask.pand.") || // Added in 3.9
127 Name.startswith("avx512.mask.pandn.") || // Added in 3.9
128 Name.startswith("avx512.mask.por.") || // Added in 3.9
129 Name.startswith("avx512.mask.pxor.") || // Added in 3.9
130 Name.startswith("avx512.mask.and.") || // Added in 3.9
131 Name.startswith("avx512.mask.andn.") || // Added in 3.9
132 Name.startswith("avx512.mask.or.") || // Added in 3.9
133 Name.startswith("avx512.mask.xor.") || // Added in 3.9
134 Name.startswith("avx512.mask.padd.") || // Added in 4.0
135 Name.startswith("avx512.mask.psub.") || // Added in 4.0
136 Name.startswith("avx512.mask.pmull.") || // Added in 4.0
137 Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
138 Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
139 Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
140 Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
141 Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
142 Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
143 Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
144 Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
145 Name == "avx512.mask.add.pd.128" || // Added in 4.0
146 Name == "avx512.mask.add.pd.256" || // Added in 4.0
147 Name == "avx512.mask.add.ps.128" || // Added in 4.0
148 Name == "avx512.mask.add.ps.256" || // Added in 4.0
149 Name == "avx512.mask.div.pd.128" || // Added in 4.0
150 Name == "avx512.mask.div.pd.256" || // Added in 4.0
151 Name == "avx512.mask.div.ps.128" || // Added in 4.0
152 Name == "avx512.mask.div.ps.256" || // Added in 4.0
153 Name == "avx512.mask.mul.pd.128" || // Added in 4.0
154 Name == "avx512.mask.mul.pd.256" || // Added in 4.0
155 Name == "avx512.mask.mul.ps.128" || // Added in 4.0
156 Name == "avx512.mask.mul.ps.256" || // Added in 4.0
157 Name == "avx512.mask.sub.pd.128" || // Added in 4.0
158 Name == "avx512.mask.sub.pd.256" || // Added in 4.0
159 Name == "avx512.mask.sub.ps.128" || // Added in 4.0
160 Name == "avx512.mask.sub.ps.256" || // Added in 4.0
161 Name == "avx512.mask.max.pd.128" || // Added in 5.0
162 Name == "avx512.mask.max.pd.256" || // Added in 5.0
163 Name == "avx512.mask.max.ps.128" || // Added in 5.0
164 Name == "avx512.mask.max.ps.256" || // Added in 5.0
165 Name == "avx512.mask.min.pd.128" || // Added in 5.0
166 Name == "avx512.mask.min.pd.256" || // Added in 5.0
167 Name == "avx512.mask.min.ps.128" || // Added in 5.0
168 Name == "avx512.mask.min.ps.256" || // Added in 5.0
169 Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
170 Name.startswith("avx512.mask.psll.d") || // Added in 4.0
171 Name.startswith("avx512.mask.psll.q") || // Added in 4.0
172 Name.startswith("avx512.mask.psll.w") || // Added in 4.0
173 Name.startswith("avx512.mask.psra.d") || // Added in 4.0
174 Name.startswith("avx512.mask.psra.q") || // Added in 4.0
175 Name.startswith("avx512.mask.psra.w") || // Added in 4.0
176 Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
177 Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
178 Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
179 Name.startswith("avx512.mask.pslli") || // Added in 4.0
180 Name.startswith("avx512.mask.psrai") || // Added in 4.0
181 Name.startswith("avx512.mask.psrli") || // Added in 4.0
182 Name.startswith("avx512.mask.psllv") || // Added in 4.0
183 Name.startswith("avx512.mask.psrav") || // Added in 4.0
184 Name.startswith("avx512.mask.psrlv") || // Added in 4.0
185 Name.startswith("sse41.pmovsx") || // Added in 3.8
186 Name.startswith("sse41.pmovzx") || // Added in 3.9
187 Name.startswith("avx2.pmovsx") || // Added in 3.9
188 Name.startswith("avx2.pmovzx") || // Added in 3.9
189 Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
190 Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
191 Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
192 Name == "sse2.cvtdq2pd" || // Added in 3.9
193 Name == "sse2.cvtps2pd" || // Added in 3.9
194 Name == "avx.cvtdq2.pd.256" || // Added in 3.9
195 Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
196 Name.startswith("avx.vinsertf128.") || // Added in 3.7
197 Name == "avx2.vinserti128" || // Added in 3.7
198 Name.startswith("avx512.mask.insert") || // Added in 4.0
199 Name.startswith("avx.vextractf128.") || // Added in 3.7
200 Name == "avx2.vextracti128" || // Added in 3.7
201 Name.startswith("avx512.mask.vextract") || // Added in 4.0
202 Name.startswith("sse4a.movnt.") || // Added in 3.9
203 Name.startswith("avx.movnt.") || // Added in 3.2
204 Name.startswith("avx512.storent.") || // Added in 3.9
205 Name == "sse41.movntdqa" || // Added in 5.0
206 Name == "avx2.movntdqa" || // Added in 5.0
207 Name == "avx512.movntdqa" || // Added in 5.0
208 Name == "sse2.storel.dq" || // Added in 3.9
209 Name.startswith("sse.storeu.") || // Added in 3.9
210 Name.startswith("sse2.storeu.") || // Added in 3.9
211 Name.startswith("avx.storeu.") || // Added in 3.9
212 Name.startswith("avx512.mask.storeu.") || // Added in 3.9
213 Name.startswith("avx512.mask.store.p") || // Added in 3.9
214 Name.startswith("avx512.mask.store.b.") || // Added in 3.9
215 Name.startswith("avx512.mask.store.w.") || // Added in 3.9
216 Name.startswith("avx512.mask.store.d.") || // Added in 3.9
217 Name.startswith("avx512.mask.store.q.") || // Added in 3.9
218 Name.startswith("avx512.mask.loadu.") || // Added in 3.9
219 Name.startswith("avx512.mask.load.") || // Added in 3.9
220 Name == "sse42.crc32.64.8" || // Added in 3.4
221 Name.startswith("avx.vbroadcast.s") || // Added in 3.5
222 Name.startswith("avx512.mask.palignr.") || // Added in 3.9
223 Name.startswith("avx512.mask.valign.") || // Added in 4.0
224 Name.startswith("sse2.psll.dq") || // Added in 3.7
225 Name.startswith("sse2.psrl.dq") || // Added in 3.7
226 Name.startswith("avx2.psll.dq") || // Added in 3.7
227 Name.startswith("avx2.psrl.dq") || // Added in 3.7
228 Name.startswith("avx512.psll.dq") || // Added in 3.9
229 Name.startswith("avx512.psrl.dq") || // Added in 3.9
230 Name == "sse41.pblendw" || // Added in 3.7
231 Name.startswith("sse41.blendp") || // Added in 3.7
232 Name.startswith("avx.blend.p") || // Added in 3.7
233 Name == "avx2.pblendw" || // Added in 3.7
234 Name.startswith("avx2.pblendd.") || // Added in 3.7
235 Name.startswith("avx.vbroadcastf128") || // Added in 4.0
236 Name == "avx2.vbroadcasti128" || // Added in 3.7
237 Name == "xop.vpcmov" || // Added in 3.8
238 Name == "xop.vpcmov.256" || // Added in 5.0
239 Name.startswith("avx512.mask.move.s") || // Added in 4.0
240 Name.startswith("avx512.cvtmask2") || // Added in 5.0
241 (Name.startswith("xop.vpcom") && // Added in 3.2
248 static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
250 // Only handle intrinsics that start with "x86.".
251 if (!Name.startswith("x86."))
253 // Remove "x86." prefix.
254 Name = Name.substr(4);
256 if (ShouldUpgradeX86Intrinsic(F, Name)) {
261 // SSE4.1 ptest functions may have an old signature.
262 if (Name.startswith("sse41.ptest")) { // Added in 3.2
263 if (Name.substr(11) == "c")
264 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
265 if (Name.substr(11) == "z")
266 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
267 if (Name.substr(11) == "nzc")
268 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
270 // Several blend and other instructions with masks used the wrong number of
272 if (Name == "sse41.insertps") // Added in 3.6
273 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
275 if (Name == "sse41.dppd") // Added in 3.6
276 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
278 if (Name == "sse41.dpps") // Added in 3.6
279 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
281 if (Name == "sse41.mpsadbw") // Added in 3.6
282 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
284 if (Name == "avx.dp.ps.256") // Added in 3.6
285 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
287 if (Name == "avx2.mpsadbw") // Added in 3.6
288 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
291 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
292 if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
294 NewFn = Intrinsic::getDeclaration(F->getParent(),
295 Intrinsic::x86_xop_vfrcz_ss);
298 if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
300 NewFn = Intrinsic::getDeclaration(F->getParent(),
301 Intrinsic::x86_xop_vfrcz_sd);
304 // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
305 if (Name.startswith("xop.vpermil2")) { // Added in 3.9
306 auto Idx = F->getFunctionType()->getParamType(2);
307 if (Idx->isFPOrFPVectorTy()) {
309 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
310 unsigned EltSize = Idx->getScalarSizeInBits();
311 Intrinsic::ID Permil2ID;
312 if (EltSize == 64 && IdxSize == 128)
313 Permil2ID = Intrinsic::x86_xop_vpermil2pd;
314 else if (EltSize == 32 && IdxSize == 128)
315 Permil2ID = Intrinsic::x86_xop_vpermil2ps;
316 else if (EltSize == 64 && IdxSize == 256)
317 Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
319 Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
320 NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
328 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
329 assert(F && "Illegal to upgrade a non-existent Function.");
331 // Quickly eliminate it, if it's not a candidate.
332 StringRef Name = F->getName();
333 if (Name.size() <= 8 || !Name.startswith("llvm."))
335 Name = Name.substr(5); // Strip off "llvm."
340 if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
341 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
342 F->arg_begin()->getType());
345 if (Name.startswith("arm.neon.vclz")) {
347 F->arg_begin()->getType(),
348 Type::getInt1Ty(F->getContext())
350 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
351 // the end of the name. Change name from llvm.arm.neon.vclz.* to
353 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
354 NewFn = Function::Create(fType, F->getLinkage(),
355 "llvm.ctlz." + Name.substr(14), F->getParent());
358 if (Name.startswith("arm.neon.vcnt")) {
359 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
360 F->arg_begin()->getType());
363 Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
364 if (vldRegex.match(Name)) {
365 auto fArgs = F->getFunctionType()->params();
366 SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
367 // Can't use Intrinsic::getDeclaration here as the return types might
368 // then only be structurally equal.
369 FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
370 NewFn = Function::Create(fType, F->getLinkage(),
371 "llvm." + Name + ".p0i8", F->getParent());
374 Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
375 if (vstRegex.match(Name)) {
376 static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
377 Intrinsic::arm_neon_vst2,
378 Intrinsic::arm_neon_vst3,
379 Intrinsic::arm_neon_vst4};
381 static const Intrinsic::ID StoreLaneInts[] = {
382 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
383 Intrinsic::arm_neon_vst4lane
386 auto fArgs = F->getFunctionType()->params();
387 Type *Tys[] = {fArgs[0], fArgs[1]};
388 if (Name.find("lane") == StringRef::npos)
389 NewFn = Intrinsic::getDeclaration(F->getParent(),
390 StoreInts[fArgs.size() - 3], Tys);
392 NewFn = Intrinsic::getDeclaration(F->getParent(),
393 StoreLaneInts[fArgs.size() - 5], Tys);
396 if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
397 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
404 if (Name.startswith("ctlz.") && F->arg_size() == 1) {
406 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
407 F->arg_begin()->getType());
410 if (Name.startswith("cttz.") && F->arg_size() == 1) {
412 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
413 F->arg_begin()->getType());
420 bool IsLifetimeStart = Name.startswith("lifetime.start");
421 if (IsLifetimeStart || Name.startswith("invariant.start")) {
422 Intrinsic::ID ID = IsLifetimeStart ?
423 Intrinsic::lifetime_start : Intrinsic::invariant_start;
424 auto Args = F->getFunctionType()->params();
425 Type* ObjectPtr[1] = {Args[1]};
426 if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
428 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
433 bool IsLifetimeEnd = Name.startswith("lifetime.end");
434 if (IsLifetimeEnd || Name.startswith("invariant.end")) {
435 Intrinsic::ID ID = IsLifetimeEnd ?
436 Intrinsic::lifetime_end : Intrinsic::invariant_end;
438 auto Args = F->getFunctionType()->params();
439 Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
440 if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
442 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
449 if (Name.startswith("masked.load.")) {
450 Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
451 if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) {
453 NewFn = Intrinsic::getDeclaration(F->getParent(),
454 Intrinsic::masked_load,
459 if (Name.startswith("masked.store.")) {
460 auto Args = F->getFunctionType()->params();
461 Type *Tys[] = { Args[0], Args[1] };
462 if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) {
464 NewFn = Intrinsic::getDeclaration(F->getParent(),
465 Intrinsic::masked_store,
473 if (Name.startswith("nvvm.")) {
474 Name = Name.substr(5);
476 // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
477 Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name)
478 .Cases("brev32", "brev64", Intrinsic::bitreverse)
479 .Case("clz.i", Intrinsic::ctlz)
480 .Case("popc.i", Intrinsic::ctpop)
481 .Default(Intrinsic::not_intrinsic);
482 if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
483 NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
484 {F->getReturnType()});
488 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
489 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
491 // TODO: We could add lohi.i2d.
492 bool Expand = StringSwitch<bool>(Name)
493 .Cases("abs.i", "abs.ll", true)
494 .Cases("clz.ll", "popc.ll", "h2f", true)
495 .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
496 .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
505 // We only need to change the name to match the mangling including the
507 if (Name.startswith("objectsize.")) {
508 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
509 if (F->arg_size() == 2 ||
510 F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
512 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
520 if (Name == "stackprotectorcheck") {
527 if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
530 // Remangle our intrinsic since we upgrade the mangling
531 auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
532 if (Result != None) {
533 NewFn = Result.getValue();
537 // This may not belong here. This function is effectively being overloaded
538 // to both detect an intrinsic which needs upgrading, and to provide the
539 // upgraded form of the intrinsic. We should perhaps have two separate
540 // functions for this.
544 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
546 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
547 assert(F != NewFn && "Intrinsic function upgraded to the same function");
549 // Upgrade intrinsic attributes. This does not change the function.
552 if (Intrinsic::ID id = F->getIntrinsicID())
553 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
557 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
558 // Nothing to do yet.
562 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
564 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
565 Value *Op, unsigned Shift) {
566 Type *ResultTy = Op->getType();
567 unsigned NumElts = ResultTy->getVectorNumElements() * 8;
569 // Bitcast from a 64-bit element type to a byte element type.
570 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
571 Op = Builder.CreateBitCast(Op, VecTy, "cast");
573 // We'll be shuffling in zeroes.
574 Value *Res = Constant::getNullValue(VecTy);
576 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
577 // we'll just return the zero vector.
580 // 256/512-bit version is split into 2/4 16-byte lanes.
581 for (unsigned l = 0; l != NumElts; l += 16)
582 for (unsigned i = 0; i != 16; ++i) {
583 unsigned Idx = NumElts + i - Shift;
585 Idx -= NumElts - 16; // end of lane, switch operand.
586 Idxs[l + i] = Idx + l;
589 Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
592 // Bitcast back to a 64-bit element type.
593 return Builder.CreateBitCast(Res, ResultTy, "cast");
596 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
598 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
600 Type *ResultTy = Op->getType();
601 unsigned NumElts = ResultTy->getVectorNumElements() * 8;
603 // Bitcast from a 64-bit element type to a byte element type.
604 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
605 Op = Builder.CreateBitCast(Op, VecTy, "cast");
607 // We'll be shuffling in zeroes.
608 Value *Res = Constant::getNullValue(VecTy);
610 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
611 // we'll just return the zero vector.
614 // 256/512-bit version is split into 2/4 16-byte lanes.
615 for (unsigned l = 0; l != NumElts; l += 16)
616 for (unsigned i = 0; i != 16; ++i) {
617 unsigned Idx = i + Shift;
619 Idx += NumElts - 16; // end of lane, switch operand.
620 Idxs[l + i] = Idx + l;
623 Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
626 // Bitcast back to a 64-bit element type.
627 return Builder.CreateBitCast(Res, ResultTy, "cast");
630 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
632 llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(),
633 cast<IntegerType>(Mask->getType())->getBitWidth());
634 Mask = Builder.CreateBitCast(Mask, MaskTy);
636 // If we have less than 8 elements, then the starting mask was an i8 and
637 // we need to extract down to the right number of elements.
640 for (unsigned i = 0; i != NumElts; ++i)
642 Mask = Builder.CreateShuffleVector(Mask, Mask,
643 makeArrayRef(Indices, NumElts),
650 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
651 Value *Op0, Value *Op1) {
652 // If the mask is all ones just emit the align operation.
653 if (const auto *C = dyn_cast<Constant>(Mask))
654 if (C->isAllOnesValue())
657 Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements());
658 return Builder.CreateSelect(Mask, Op0, Op1);
661 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
662 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
663 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
664 static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
665 Value *Op1, Value *Shift,
666 Value *Passthru, Value *Mask,
668 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
670 unsigned NumElts = Op0->getType()->getVectorNumElements();
671 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
672 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
673 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
675 // Mask the immediate for VALIGN.
677 ShiftVal &= (NumElts - 1);
679 // If palignr is shifting the pair of vectors more than the size of two
682 return llvm::Constant::getNullValue(Op0->getType());
684 // If palignr is shifting the pair of input vectors more than one lane,
685 // but less than two lanes, convert to shifting in zeroes.
689 Op0 = llvm::Constant::getNullValue(Op0->getType());
692 uint32_t Indices[64];
693 // 256-bit palignr operates on 128-bit lanes so we need to handle that
694 for (unsigned l = 0; l < NumElts; l += 16) {
695 for (unsigned i = 0; i != 16; ++i) {
696 unsigned Idx = ShiftVal + i;
697 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
698 Idx += NumElts - 16; // End of lane, switch operand.
699 Indices[l + i] = Idx + l;
703 Value *Align = Builder.CreateShuffleVector(Op1, Op0,
704 makeArrayRef(Indices, NumElts),
707 return EmitX86Select(Builder, Mask, Align, Passthru);
710 static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
711 Value *Ptr, Value *Data, Value *Mask,
713 // Cast the pointer to the right type.
714 Ptr = Builder.CreateBitCast(Ptr,
715 llvm::PointerType::getUnqual(Data->getType()));
717 Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1;
719 // If the mask is all ones just emit a regular store.
720 if (const auto *C = dyn_cast<Constant>(Mask))
721 if (C->isAllOnesValue())
722 return Builder.CreateAlignedStore(Data, Ptr, Align);
724 // Convert the mask from an integer type to a vector of i1.
725 unsigned NumElts = Data->getType()->getVectorNumElements();
726 Mask = getX86MaskVec(Builder, Mask, NumElts);
727 return Builder.CreateMaskedStore(Data, Ptr, Align, Mask);
730 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
731 Value *Ptr, Value *Passthru, Value *Mask,
733 // Cast the pointer to the right type.
734 Ptr = Builder.CreateBitCast(Ptr,
735 llvm::PointerType::getUnqual(Passthru->getType()));
737 Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1;
739 // If the mask is all ones just emit a regular store.
740 if (const auto *C = dyn_cast<Constant>(Mask))
741 if (C->isAllOnesValue())
742 return Builder.CreateAlignedLoad(Ptr, Align);
744 // Convert the mask from an integer type to a vector of i1.
745 unsigned NumElts = Passthru->getType()->getVectorNumElements();
746 Mask = getX86MaskVec(Builder, Mask, NumElts);
747 return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru);
750 static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
751 ICmpInst::Predicate Pred) {
752 Value *Op0 = CI.getArgOperand(0);
753 Value *Op1 = CI.getArgOperand(1);
754 Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1);
755 Value *Res = Builder.CreateSelect(Cmp, Op0, Op1);
757 if (CI.getNumArgOperands() == 4)
758 Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
763 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
764 ICmpInst::Predicate Pred) {
765 Value *Op0 = CI.getArgOperand(0);
766 unsigned NumElts = Op0->getType()->getVectorNumElements();
767 Value *Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
769 Value *Mask = CI.getArgOperand(2);
770 const auto *C = dyn_cast<Constant>(Mask);
771 if (!C || !C->isAllOnesValue())
772 Cmp = Builder.CreateAnd(Cmp, getX86MaskVec(Builder, Mask, NumElts));
776 for (unsigned i = 0; i != NumElts; ++i)
778 for (unsigned i = NumElts; i != 8; ++i)
779 Indices[i] = NumElts + i % NumElts;
780 Cmp = Builder.CreateShuffleVector(Cmp,
781 Constant::getNullValue(Cmp->getType()),
784 return Builder.CreateBitCast(Cmp, IntegerType::get(CI.getContext(),
785 std::max(NumElts, 8U)));
788 // Replace a masked intrinsic with an older unmasked intrinsic.
789 static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI,
791 Function *F = CI.getCalledFunction();
792 Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID);
793 Value *Rep = Builder.CreateCall(Intrin,
794 { CI.getArgOperand(0), CI.getArgOperand(1) });
795 return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
798 static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
799 Value* A = CI.getArgOperand(0);
800 Value* B = CI.getArgOperand(1);
801 Value* Src = CI.getArgOperand(2);
802 Value* Mask = CI.getArgOperand(3);
804 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
805 Value* Cmp = Builder.CreateIsNotNull(AndNode);
806 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
807 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
808 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
809 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
813 static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) {
814 Value* Op = CI.getArgOperand(0);
815 Type* ReturnOp = CI.getType();
816 unsigned NumElts = CI.getType()->getVectorNumElements();
817 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
818 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
821 /// Upgrade a call to an old intrinsic. All argument and return casting must be
822 /// provided to seamlessly integrate with existing context.
823 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
824 Function *F = CI->getCalledFunction();
825 LLVMContext &C = CI->getContext();
826 IRBuilder<> Builder(C);
827 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
829 assert(F && "Intrinsic call is not direct?");
832 // Get the Function's name.
833 StringRef Name = F->getName();
835 assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
836 Name = Name.substr(5);
838 bool IsX86 = Name.startswith("x86.");
840 Name = Name.substr(4);
841 bool IsNVVM = Name.startswith("nvvm.");
843 Name = Name.substr(5);
845 if (IsX86 && Name.startswith("sse4a.movnt.")) {
846 Module *M = F->getParent();
847 SmallVector<Metadata *, 1> Elts;
849 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
850 MDNode *Node = MDNode::get(C, Elts);
852 Value *Arg0 = CI->getArgOperand(0);
853 Value *Arg1 = CI->getArgOperand(1);
855 // Nontemporal (unaligned) store of the 0'th element of the float/double
857 Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
858 PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
859 Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
861 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
863 StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, 1);
864 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
867 CI->eraseFromParent();
871 if (IsX86 && (Name.startswith("avx.movnt.") ||
872 Name.startswith("avx512.storent."))) {
873 Module *M = F->getParent();
874 SmallVector<Metadata *, 1> Elts;
876 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
877 MDNode *Node = MDNode::get(C, Elts);
879 Value *Arg0 = CI->getArgOperand(0);
880 Value *Arg1 = CI->getArgOperand(1);
882 // Convert the type of the pointer to a pointer to the stored type.
883 Value *BC = Builder.CreateBitCast(Arg0,
884 PointerType::getUnqual(Arg1->getType()),
886 VectorType *VTy = cast<VectorType>(Arg1->getType());
887 StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC,
888 VTy->getBitWidth() / 8);
889 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
892 CI->eraseFromParent();
896 if (IsX86 && Name == "sse2.storel.dq") {
897 Value *Arg0 = CI->getArgOperand(0);
898 Value *Arg1 = CI->getArgOperand(1);
900 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
901 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
902 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
903 Value *BC = Builder.CreateBitCast(Arg0,
904 PointerType::getUnqual(Elt->getType()),
906 Builder.CreateAlignedStore(Elt, BC, 1);
909 CI->eraseFromParent();
913 if (IsX86 && (Name.startswith("sse.storeu.") ||
914 Name.startswith("sse2.storeu.") ||
915 Name.startswith("avx.storeu."))) {
916 Value *Arg0 = CI->getArgOperand(0);
917 Value *Arg1 = CI->getArgOperand(1);
919 Arg0 = Builder.CreateBitCast(Arg0,
920 PointerType::getUnqual(Arg1->getType()),
922 Builder.CreateAlignedStore(Arg1, Arg0, 1);
925 CI->eraseFromParent();
929 if (IsX86 && (Name.startswith("avx512.mask.store"))) {
930 // "avx512.mask.storeu." or "avx512.mask.store."
931 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
932 UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
933 CI->getArgOperand(2), Aligned);
936 CI->eraseFromParent();
941 // Upgrade packed integer vector compare intrinsics to compare instructions.
942 if (IsX86 && (Name.startswith("sse2.pcmp") ||
943 Name.startswith("avx2.pcmp"))) {
944 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
945 bool CmpEq = Name[9] == 'e';
946 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
947 CI->getArgOperand(0), CI->getArgOperand(1));
948 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
949 } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd")) {
950 Type *I32Ty = Type::getInt32Ty(C);
951 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
952 ConstantInt::get(I32Ty, 0));
953 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
954 ConstantInt::get(I32Ty, 0));
955 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
956 Builder.CreateFAdd(Elt0, Elt1),
957 ConstantInt::get(I32Ty, 0));
958 } else if (IsX86 && (Name == "sse.sub.ss" || Name == "sse2.sub.sd")) {
959 Type *I32Ty = Type::getInt32Ty(C);
960 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
961 ConstantInt::get(I32Ty, 0));
962 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
963 ConstantInt::get(I32Ty, 0));
964 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
965 Builder.CreateFSub(Elt0, Elt1),
966 ConstantInt::get(I32Ty, 0));
967 } else if (IsX86 && (Name == "sse.mul.ss" || Name == "sse2.mul.sd")) {
968 Type *I32Ty = Type::getInt32Ty(C);
969 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
970 ConstantInt::get(I32Ty, 0));
971 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
972 ConstantInt::get(I32Ty, 0));
973 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
974 Builder.CreateFMul(Elt0, Elt1),
975 ConstantInt::get(I32Ty, 0));
976 } else if (IsX86 && (Name == "sse.div.ss" || Name == "sse2.div.sd")) {
977 Type *I32Ty = Type::getInt32Ty(C);
978 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
979 ConstantInt::get(I32Ty, 0));
980 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
981 ConstantInt::get(I32Ty, 0));
982 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
983 Builder.CreateFDiv(Elt0, Elt1),
984 ConstantInt::get(I32Ty, 0));
985 } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
986 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
987 bool CmpEq = Name[16] == 'e';
988 Rep = upgradeMaskedCompare(Builder, *CI,
989 CmpEq ? ICmpInst::ICMP_EQ
990 : ICmpInst::ICMP_SGT);
991 } else if (IsX86 && (Name == "sse41.pmaxsb" ||
992 Name == "sse2.pmaxs.w" ||
993 Name == "sse41.pmaxsd" ||
994 Name.startswith("avx2.pmaxs") ||
995 Name.startswith("avx512.mask.pmaxs"))) {
996 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
997 } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
998 Name == "sse41.pmaxuw" ||
999 Name == "sse41.pmaxud" ||
1000 Name.startswith("avx2.pmaxu") ||
1001 Name.startswith("avx512.mask.pmaxu"))) {
1002 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
1003 } else if (IsX86 && (Name == "sse41.pminsb" ||
1004 Name == "sse2.pmins.w" ||
1005 Name == "sse41.pminsd" ||
1006 Name.startswith("avx2.pmins") ||
1007 Name.startswith("avx512.mask.pmins"))) {
1008 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
1009 } else if (IsX86 && (Name == "sse2.pminu.b" ||
1010 Name == "sse41.pminuw" ||
1011 Name == "sse41.pminud" ||
1012 Name.startswith("avx2.pminu") ||
1013 Name.startswith("avx512.mask.pminu"))) {
1014 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
1015 } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
1016 Name == "sse2.cvtps2pd" ||
1017 Name == "avx.cvtdq2.pd.256" ||
1018 Name == "avx.cvt.ps2.pd.256" ||
1019 Name.startswith("avx512.mask.cvtdq2pd.") ||
1020 Name.startswith("avx512.mask.cvtudq2pd."))) {
1021 // Lossless i32/float to double conversion.
1022 // Extract the bottom elements if necessary and convert to double vector.
1023 Value *Src = CI->getArgOperand(0);
1024 VectorType *SrcTy = cast<VectorType>(Src->getType());
1025 VectorType *DstTy = cast<VectorType>(CI->getType());
1026 Rep = CI->getArgOperand(0);
1028 unsigned NumDstElts = DstTy->getNumElements();
1029 if (NumDstElts < SrcTy->getNumElements()) {
1030 assert(NumDstElts == 2 && "Unexpected vector size");
1031 uint32_t ShuffleMask[2] = { 0, 1 };
1032 Rep = Builder.CreateShuffleVector(Rep, UndefValue::get(SrcTy),
1036 bool SInt2Double = (StringRef::npos != Name.find("cvtdq2"));
1037 bool UInt2Double = (StringRef::npos != Name.find("cvtudq2"));
1039 Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd");
1040 else if (UInt2Double)
1041 Rep = Builder.CreateUIToFP(Rep, DstTy, "cvtudq2pd");
1043 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
1045 if (CI->getNumArgOperands() == 3)
1046 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1047 CI->getArgOperand(1));
1048 } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
1049 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
1050 CI->getArgOperand(1), CI->getArgOperand(2),
1052 } else if (IsX86 && (Name.startswith("avx512.mask.load."))) {
1053 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
1054 CI->getArgOperand(1),CI->getArgOperand(2),
1056 } else if (IsX86 && Name.startswith("xop.vpcom")) {
1057 Intrinsic::ID intID;
1058 if (Name.endswith("ub"))
1059 intID = Intrinsic::x86_xop_vpcomub;
1060 else if (Name.endswith("uw"))
1061 intID = Intrinsic::x86_xop_vpcomuw;
1062 else if (Name.endswith("ud"))
1063 intID = Intrinsic::x86_xop_vpcomud;
1064 else if (Name.endswith("uq"))
1065 intID = Intrinsic::x86_xop_vpcomuq;
1066 else if (Name.endswith("b"))
1067 intID = Intrinsic::x86_xop_vpcomb;
1068 else if (Name.endswith("w"))
1069 intID = Intrinsic::x86_xop_vpcomw;
1070 else if (Name.endswith("d"))
1071 intID = Intrinsic::x86_xop_vpcomd;
1072 else if (Name.endswith("q"))
1073 intID = Intrinsic::x86_xop_vpcomq;
1075 llvm_unreachable("Unknown suffix");
1077 Name = Name.substr(9); // strip off "xop.vpcom"
1079 if (Name.startswith("lt"))
1081 else if (Name.startswith("le"))
1083 else if (Name.startswith("gt"))
1085 else if (Name.startswith("ge"))
1087 else if (Name.startswith("eq"))
1089 else if (Name.startswith("ne"))
1091 else if (Name.startswith("false"))
1093 else if (Name.startswith("true"))
1096 llvm_unreachable("Unknown condition");
1098 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
1100 Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
1101 Builder.getInt8(Imm)});
1102 } else if (IsX86 && Name.startswith("xop.vpcmov")) {
1103 Value *Sel = CI->getArgOperand(2);
1104 Value *NotSel = Builder.CreateNot(Sel);
1105 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
1106 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
1107 Rep = Builder.CreateOr(Sel0, Sel1);
1108 } else if (IsX86 && Name == "sse42.crc32.64.8") {
1109 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
1110 Intrinsic::x86_sse42_crc32_32_8);
1111 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
1112 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
1113 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
1114 } else if (IsX86 && Name.startswith("avx.vbroadcast.s")) {
1115 // Replace broadcasts with a series of insertelements.
1116 Type *VecTy = CI->getType();
1117 Type *EltTy = VecTy->getVectorElementType();
1118 unsigned EltNum = VecTy->getVectorNumElements();
1119 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
1120 EltTy->getPointerTo());
1121 Value *Load = Builder.CreateLoad(EltTy, Cast);
1122 Type *I32Ty = Type::getInt32Ty(C);
1123 Rep = UndefValue::get(VecTy);
1124 for (unsigned I = 0; I < EltNum; ++I)
1125 Rep = Builder.CreateInsertElement(Rep, Load,
1126 ConstantInt::get(I32Ty, I));
1127 } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
1128 Name.startswith("sse41.pmovzx") ||
1129 Name.startswith("avx2.pmovsx") ||
1130 Name.startswith("avx2.pmovzx") ||
1131 Name.startswith("avx512.mask.pmovsx") ||
1132 Name.startswith("avx512.mask.pmovzx"))) {
1133 VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
1134 VectorType *DstTy = cast<VectorType>(CI->getType());
1135 unsigned NumDstElts = DstTy->getNumElements();
1137 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
1138 SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
1139 for (unsigned i = 0; i != NumDstElts; ++i)
1142 Value *SV = Builder.CreateShuffleVector(
1143 CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
1145 bool DoSext = (StringRef::npos != Name.find("pmovsx"));
1146 Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
1147 : Builder.CreateZExt(SV, DstTy);
1148 // If there are 3 arguments, it's a masked intrinsic so we need a select.
1149 if (CI->getNumArgOperands() == 3)
1150 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1151 CI->getArgOperand(1));
1152 } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
1153 Name == "avx2.vbroadcasti128")) {
1154 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
1155 Type *EltTy = CI->getType()->getVectorElementType();
1156 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
1157 Type *VT = VectorType::get(EltTy, NumSrcElts);
1158 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
1159 PointerType::getUnqual(VT));
1160 Value *Load = Builder.CreateAlignedLoad(Op, 1);
1161 if (NumSrcElts == 2)
1162 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
1165 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
1166 { 0, 1, 2, 3, 0, 1, 2, 3 });
1167 } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
1168 Name.startswith("avx2.vbroadcast") ||
1169 Name.startswith("avx512.pbroadcast") ||
1170 Name.startswith("avx512.mask.broadcast.s"))) {
1171 // Replace vp?broadcasts with a vector shuffle.
1172 Value *Op = CI->getArgOperand(0);
1173 unsigned NumElts = CI->getType()->getVectorNumElements();
1174 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
1175 Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
1176 Constant::getNullValue(MaskTy));
1178 if (CI->getNumArgOperands() == 3)
1179 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1180 CI->getArgOperand(1));
1181 } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
1182 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
1183 CI->getArgOperand(1),
1184 CI->getArgOperand(2),
1185 CI->getArgOperand(3),
1186 CI->getArgOperand(4),
1188 } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
1189 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
1190 CI->getArgOperand(1),
1191 CI->getArgOperand(2),
1192 CI->getArgOperand(3),
1193 CI->getArgOperand(4),
1195 } else if (IsX86 && (Name == "sse2.psll.dq" ||
1196 Name == "avx2.psll.dq")) {
1197 // 128/256-bit shift left specified in bits.
1198 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1199 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
1200 Shift / 8); // Shift is in bits.
1201 } else if (IsX86 && (Name == "sse2.psrl.dq" ||
1202 Name == "avx2.psrl.dq")) {
1203 // 128/256-bit shift right specified in bits.
1204 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1205 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
1206 Shift / 8); // Shift is in bits.
1207 } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
1208 Name == "avx2.psll.dq.bs" ||
1209 Name == "avx512.psll.dq.512")) {
1210 // 128/256/512-bit shift left specified in bytes.
1211 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1212 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
1213 } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
1214 Name == "avx2.psrl.dq.bs" ||
1215 Name == "avx512.psrl.dq.512")) {
1216 // 128/256/512-bit shift right specified in bytes.
1217 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1218 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
1219 } else if (IsX86 && (Name == "sse41.pblendw" ||
1220 Name.startswith("sse41.blendp") ||
1221 Name.startswith("avx.blend.p") ||
1222 Name == "avx2.pblendw" ||
1223 Name.startswith("avx2.pblendd."))) {
1224 Value *Op0 = CI->getArgOperand(0);
1225 Value *Op1 = CI->getArgOperand(1);
1226 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1227 VectorType *VecTy = cast<VectorType>(CI->getType());
1228 unsigned NumElts = VecTy->getNumElements();
1230 SmallVector<uint32_t, 16> Idxs(NumElts);
1231 for (unsigned i = 0; i != NumElts; ++i)
1232 Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
1234 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1235 } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
1236 Name == "avx2.vinserti128" ||
1237 Name.startswith("avx512.mask.insert"))) {
1238 Value *Op0 = CI->getArgOperand(0);
1239 Value *Op1 = CI->getArgOperand(1);
1240 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1241 unsigned DstNumElts = CI->getType()->getVectorNumElements();
1242 unsigned SrcNumElts = Op1->getType()->getVectorNumElements();
1243 unsigned Scale = DstNumElts / SrcNumElts;
1245 // Mask off the high bits of the immediate value; hardware ignores those.
1248 // Extend the second operand into a vector the size of the destination.
1249 Value *UndefV = UndefValue::get(Op1->getType());
1250 SmallVector<uint32_t, 8> Idxs(DstNumElts);
1251 for (unsigned i = 0; i != SrcNumElts; ++i)
1253 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
1254 Idxs[i] = SrcNumElts;
1255 Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);
1257 // Insert the second operand into the first operand.
1259 // Note that there is no guarantee that instruction lowering will actually
1260 // produce a vinsertf128 instruction for the created shuffles. In
1261 // particular, the 0 immediate case involves no lane changes, so it can
1262 // be handled as a blend.
1264 // Example of shuffle mask for 32-bit elements:
1265 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
1266 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
1268 // First fill with identify mask.
1269 for (unsigned i = 0; i != DstNumElts; ++i)
1271 // Then replace the elements where we need to insert.
1272 for (unsigned i = 0; i != SrcNumElts; ++i)
1273 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
1274 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
1276 // If the intrinsic has a mask operand, handle that.
1277 if (CI->getNumArgOperands() == 5)
1278 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
1279 CI->getArgOperand(3));
1280 } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
1281 Name == "avx2.vextracti128" ||
1282 Name.startswith("avx512.mask.vextract"))) {
1283 Value *Op0 = CI->getArgOperand(0);
1284 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1285 unsigned DstNumElts = CI->getType()->getVectorNumElements();
1286 unsigned SrcNumElts = Op0->getType()->getVectorNumElements();
1287 unsigned Scale = SrcNumElts / DstNumElts;
1289 // Mask off the high bits of the immediate value; hardware ignores those.
1292 // Get indexes for the subvector of the input vector.
1293 SmallVector<uint32_t, 8> Idxs(DstNumElts);
1294 for (unsigned i = 0; i != DstNumElts; ++i) {
1295 Idxs[i] = i + (Imm * DstNumElts);
1297 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1299 // If the intrinsic has a mask operand, handle that.
1300 if (CI->getNumArgOperands() == 4)
1301 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1302 CI->getArgOperand(2));
1303 } else if (!IsX86 && Name == "stackprotectorcheck") {
1305 } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
1306 Name.startswith("avx512.mask.perm.di."))) {
1307 Value *Op0 = CI->getArgOperand(0);
1308 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1309 VectorType *VecTy = cast<VectorType>(CI->getType());
1310 unsigned NumElts = VecTy->getNumElements();
1312 SmallVector<uint32_t, 8> Idxs(NumElts);
1313 for (unsigned i = 0; i != NumElts; ++i)
1314 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
1316 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1318 if (CI->getNumArgOperands() == 4)
1319 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1320 CI->getArgOperand(2));
1321 } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
1322 Name == "sse2.pshuf.d" ||
1323 Name.startswith("avx512.mask.vpermil.p") ||
1324 Name.startswith("avx512.mask.pshuf.d."))) {
1325 Value *Op0 = CI->getArgOperand(0);
1326 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1327 VectorType *VecTy = cast<VectorType>(CI->getType());
1328 unsigned NumElts = VecTy->getNumElements();
1329 // Calculate the size of each index in the immediate.
1330 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
1331 unsigned IdxMask = ((1 << IdxSize) - 1);
1333 SmallVector<uint32_t, 8> Idxs(NumElts);
1334 // Lookup the bits for this element, wrapping around the immediate every
1335 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
1336 // to offset by the first index of each group.
1337 for (unsigned i = 0; i != NumElts; ++i)
1338 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
1340 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1342 if (CI->getNumArgOperands() == 4)
1343 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1344 CI->getArgOperand(2));
1345 } else if (IsX86 && (Name == "sse2.pshufl.w" ||
1346 Name.startswith("avx512.mask.pshufl.w."))) {
1347 Value *Op0 = CI->getArgOperand(0);
1348 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1349 unsigned NumElts = CI->getType()->getVectorNumElements();
1351 SmallVector<uint32_t, 16> Idxs(NumElts);
1352 for (unsigned l = 0; l != NumElts; l += 8) {
1353 for (unsigned i = 0; i != 4; ++i)
1354 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
1355 for (unsigned i = 4; i != 8; ++i)
1356 Idxs[i + l] = i + l;
1359 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1361 if (CI->getNumArgOperands() == 4)
1362 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1363 CI->getArgOperand(2));
1364 } else if (IsX86 && (Name == "sse2.pshufh.w" ||
1365 Name.startswith("avx512.mask.pshufh.w."))) {
1366 Value *Op0 = CI->getArgOperand(0);
1367 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1368 unsigned NumElts = CI->getType()->getVectorNumElements();
1370 SmallVector<uint32_t, 16> Idxs(NumElts);
1371 for (unsigned l = 0; l != NumElts; l += 8) {
1372 for (unsigned i = 0; i != 4; ++i)
1373 Idxs[i + l] = i + l;
1374 for (unsigned i = 0; i != 4; ++i)
1375 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
1378 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1380 if (CI->getNumArgOperands() == 4)
1381 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1382 CI->getArgOperand(2));
1383 } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
1384 Value *Op0 = CI->getArgOperand(0);
1385 Value *Op1 = CI->getArgOperand(1);
1386 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1387 unsigned NumElts = CI->getType()->getVectorNumElements();
1389 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1390 unsigned HalfLaneElts = NumLaneElts / 2;
1392 SmallVector<uint32_t, 16> Idxs(NumElts);
1393 for (unsigned i = 0; i != NumElts; ++i) {
1394 // Base index is the starting element of the lane.
1395 Idxs[i] = i - (i % NumLaneElts);
1396 // If we are half way through the lane switch to the other source.
1397 if ((i % NumLaneElts) >= HalfLaneElts)
1399 // Now select the specific element. By adding HalfLaneElts bits from
1400 // the immediate. Wrapping around the immediate every 8-bits.
1401 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
1404 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1406 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
1407 CI->getArgOperand(3));
1408 } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
1409 Name.startswith("avx512.mask.movshdup") ||
1410 Name.startswith("avx512.mask.movsldup"))) {
1411 Value *Op0 = CI->getArgOperand(0);
1412 unsigned NumElts = CI->getType()->getVectorNumElements();
1413 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1415 unsigned Offset = 0;
1416 if (Name.startswith("avx512.mask.movshdup."))
1419 SmallVector<uint32_t, 16> Idxs(NumElts);
1420 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
1421 for (unsigned i = 0; i != NumLaneElts; i += 2) {
1422 Idxs[i + l + 0] = i + l + Offset;
1423 Idxs[i + l + 1] = i + l + Offset;
1426 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1428 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1429 CI->getArgOperand(1));
1430 } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
1431 Name.startswith("avx512.mask.unpckl."))) {
1432 Value *Op0 = CI->getArgOperand(0);
1433 Value *Op1 = CI->getArgOperand(1);
1434 int NumElts = CI->getType()->getVectorNumElements();
1435 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1437 SmallVector<uint32_t, 64> Idxs(NumElts);
1438 for (int l = 0; l != NumElts; l += NumLaneElts)
1439 for (int i = 0; i != NumLaneElts; ++i)
1440 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
1442 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1444 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1445 CI->getArgOperand(2));
1446 } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
1447 Name.startswith("avx512.mask.unpckh."))) {
1448 Value *Op0 = CI->getArgOperand(0);
1449 Value *Op1 = CI->getArgOperand(1);
1450 int NumElts = CI->getType()->getVectorNumElements();
1451 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1453 SmallVector<uint32_t, 64> Idxs(NumElts);
1454 for (int l = 0; l != NumElts; l += NumLaneElts)
1455 for (int i = 0; i != NumLaneElts; ++i)
1456 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
1458 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1460 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1461 CI->getArgOperand(2));
1462 } else if (IsX86 && Name.startswith("avx512.mask.pand.")) {
1463 Rep = Builder.CreateAnd(CI->getArgOperand(0), CI->getArgOperand(1));
1464 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1465 CI->getArgOperand(2));
1466 } else if (IsX86 && Name.startswith("avx512.mask.pandn.")) {
1467 Rep = Builder.CreateAnd(Builder.CreateNot(CI->getArgOperand(0)),
1468 CI->getArgOperand(1));
1469 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1470 CI->getArgOperand(2));
1471 } else if (IsX86 && Name.startswith("avx512.mask.por.")) {
1472 Rep = Builder.CreateOr(CI->getArgOperand(0), CI->getArgOperand(1));
1473 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1474 CI->getArgOperand(2));
1475 } else if (IsX86 && Name.startswith("avx512.mask.pxor.")) {
1476 Rep = Builder.CreateXor(CI->getArgOperand(0), CI->getArgOperand(1));
1477 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1478 CI->getArgOperand(2));
1479 } else if (IsX86 && Name.startswith("avx512.mask.and.")) {
1480 VectorType *FTy = cast<VectorType>(CI->getType());
1481 VectorType *ITy = VectorType::getInteger(FTy);
1482 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
1483 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1484 Rep = Builder.CreateBitCast(Rep, FTy);
1485 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1486 CI->getArgOperand(2));
1487 } else if (IsX86 && Name.startswith("avx512.mask.andn.")) {
1488 VectorType *FTy = cast<VectorType>(CI->getType());
1489 VectorType *ITy = VectorType::getInteger(FTy);
1490 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
1491 Rep = Builder.CreateAnd(Rep,
1492 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1493 Rep = Builder.CreateBitCast(Rep, FTy);
1494 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1495 CI->getArgOperand(2));
1496 } else if (IsX86 && Name.startswith("avx512.mask.or.")) {
1497 VectorType *FTy = cast<VectorType>(CI->getType());
1498 VectorType *ITy = VectorType::getInteger(FTy);
1499 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
1500 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1501 Rep = Builder.CreateBitCast(Rep, FTy);
1502 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1503 CI->getArgOperand(2));
1504 } else if (IsX86 && Name.startswith("avx512.mask.xor.")) {
1505 VectorType *FTy = cast<VectorType>(CI->getType());
1506 VectorType *ITy = VectorType::getInteger(FTy);
1507 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
1508 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1509 Rep = Builder.CreateBitCast(Rep, FTy);
1510 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1511 CI->getArgOperand(2));
1512 } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
1513 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
1514 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1515 CI->getArgOperand(2));
1516 } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
1517 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
1518 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1519 CI->getArgOperand(2));
1520 } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
1521 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
1522 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1523 CI->getArgOperand(2));
1524 } else if (IsX86 && (Name.startswith("avx512.mask.add.p"))) {
1525 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
1526 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1527 CI->getArgOperand(2));
1528 } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
1529 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
1530 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1531 CI->getArgOperand(2));
1532 } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
1533 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
1534 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1535 CI->getArgOperand(2));
1536 } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
1537 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
1538 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1539 CI->getArgOperand(2));
1540 } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
1541 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1544 { CI->getArgOperand(0), Builder.getInt1(false) });
1545 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1546 CI->getArgOperand(1));
1547 } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
1548 Name.startswith("avx512.mask.min.p"))) {
1549 bool IsMin = Name[13] == 'i';
1550 VectorType *VecTy = cast<VectorType>(CI->getType());
1551 unsigned VecWidth = VecTy->getPrimitiveSizeInBits();
1552 unsigned EltWidth = VecTy->getScalarSizeInBits();
1554 if (!IsMin && VecWidth == 128 && EltWidth == 32)
1555 IID = Intrinsic::x86_sse_max_ps;
1556 else if (!IsMin && VecWidth == 128 && EltWidth == 64)
1557 IID = Intrinsic::x86_sse2_max_pd;
1558 else if (!IsMin && VecWidth == 256 && EltWidth == 32)
1559 IID = Intrinsic::x86_avx_max_ps_256;
1560 else if (!IsMin && VecWidth == 256 && EltWidth == 64)
1561 IID = Intrinsic::x86_avx_max_pd_256;
1562 else if (IsMin && VecWidth == 128 && EltWidth == 32)
1563 IID = Intrinsic::x86_sse_min_ps;
1564 else if (IsMin && VecWidth == 128 && EltWidth == 64)
1565 IID = Intrinsic::x86_sse2_min_pd;
1566 else if (IsMin && VecWidth == 256 && EltWidth == 32)
1567 IID = Intrinsic::x86_avx_min_ps_256;
1568 else if (IsMin && VecWidth == 256 && EltWidth == 64)
1569 IID = Intrinsic::x86_avx_min_pd_256;
1571 llvm_unreachable("Unexpected intrinsic");
1573 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1574 { CI->getArgOperand(0), CI->getArgOperand(1) });
1575 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1576 CI->getArgOperand(2));
1577 } else if (IsX86 && Name.startswith("avx512.mask.pshuf.b.")) {
1578 VectorType *VecTy = cast<VectorType>(CI->getType());
1580 if (VecTy->getPrimitiveSizeInBits() == 128)
1581 IID = Intrinsic::x86_ssse3_pshuf_b_128;
1582 else if (VecTy->getPrimitiveSizeInBits() == 256)
1583 IID = Intrinsic::x86_avx2_pshuf_b;
1584 else if (VecTy->getPrimitiveSizeInBits() == 512)
1585 IID = Intrinsic::x86_avx512_pshuf_b_512;
1587 llvm_unreachable("Unexpected intrinsic");
1589 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1590 { CI->getArgOperand(0), CI->getArgOperand(1) });
1591 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1592 CI->getArgOperand(2));
1593 } else if (IsX86 && (Name.startswith("avx512.mask.pmul.dq.") ||
1594 Name.startswith("avx512.mask.pmulu.dq."))) {
1595 bool IsUnsigned = Name[16] == 'u';
1596 VectorType *VecTy = cast<VectorType>(CI->getType());
1598 if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 128)
1599 IID = Intrinsic::x86_sse41_pmuldq;
1600 else if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 256)
1601 IID = Intrinsic::x86_avx2_pmul_dq;
1602 else if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 512)
1603 IID = Intrinsic::x86_avx512_pmul_dq_512;
1604 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 128)
1605 IID = Intrinsic::x86_sse2_pmulu_dq;
1606 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 256)
1607 IID = Intrinsic::x86_avx2_pmulu_dq;
1608 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 512)
1609 IID = Intrinsic::x86_avx512_pmulu_dq_512;
1611 llvm_unreachable("Unexpected intrinsic");
1613 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1614 { CI->getArgOperand(0), CI->getArgOperand(1) });
1615 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1616 CI->getArgOperand(2));
1617 } else if (IsX86 && Name.startswith("avx512.mask.pack")) {
1618 bool IsUnsigned = Name[16] == 'u';
1619 bool IsDW = Name[18] == 'd';
1620 VectorType *VecTy = cast<VectorType>(CI->getType());
1622 if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 128)
1623 IID = Intrinsic::x86_sse2_packsswb_128;
1624 else if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 256)
1625 IID = Intrinsic::x86_avx2_packsswb;
1626 else if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 512)
1627 IID = Intrinsic::x86_avx512_packsswb_512;
1628 else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 128)
1629 IID = Intrinsic::x86_sse2_packssdw_128;
1630 else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 256)
1631 IID = Intrinsic::x86_avx2_packssdw;
1632 else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 512)
1633 IID = Intrinsic::x86_avx512_packssdw_512;
1634 else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 128)
1635 IID = Intrinsic::x86_sse2_packuswb_128;
1636 else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 256)
1637 IID = Intrinsic::x86_avx2_packuswb;
1638 else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 512)
1639 IID = Intrinsic::x86_avx512_packuswb_512;
1640 else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 128)
1641 IID = Intrinsic::x86_sse41_packusdw;
1642 else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 256)
1643 IID = Intrinsic::x86_avx2_packusdw;
1644 else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 512)
1645 IID = Intrinsic::x86_avx512_packusdw_512;
1647 llvm_unreachable("Unexpected intrinsic");
1649 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1650 { CI->getArgOperand(0), CI->getArgOperand(1) });
1651 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1652 CI->getArgOperand(2));
1653 } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
1654 bool IsImmediate = Name[16] == 'i' ||
1655 (Name.size() > 18 && Name[18] == 'i');
1656 bool IsVariable = Name[16] == 'v';
1657 char Size = Name[16] == '.' ? Name[17] :
1658 Name[17] == '.' ? Name[18] :
1659 Name[18] == '.' ? Name[19] :
1663 if (IsVariable && Name[17] != '.') {
1664 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
1665 IID = Intrinsic::x86_avx2_psllv_q;
1666 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
1667 IID = Intrinsic::x86_avx2_psllv_q_256;
1668 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
1669 IID = Intrinsic::x86_avx2_psllv_d;
1670 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
1671 IID = Intrinsic::x86_avx2_psllv_d_256;
1672 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
1673 IID = Intrinsic::x86_avx512_psllv_w_128;
1674 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
1675 IID = Intrinsic::x86_avx512_psllv_w_256;
1676 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
1677 IID = Intrinsic::x86_avx512_psllv_w_512;
1679 llvm_unreachable("Unexpected size");
1680 } else if (Name.endswith(".128")) {
1681 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
1682 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
1683 : Intrinsic::x86_sse2_psll_d;
1684 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
1685 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
1686 : Intrinsic::x86_sse2_psll_q;
1687 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
1688 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
1689 : Intrinsic::x86_sse2_psll_w;
1691 llvm_unreachable("Unexpected size");
1692 } else if (Name.endswith(".256")) {
1693 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
1694 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
1695 : Intrinsic::x86_avx2_psll_d;
1696 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
1697 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
1698 : Intrinsic::x86_avx2_psll_q;
1699 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
1700 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
1701 : Intrinsic::x86_avx2_psll_w;
1703 llvm_unreachable("Unexpected size");
1705 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
1706 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
1707 IsVariable ? Intrinsic::x86_avx512_psllv_d_512 :
1708 Intrinsic::x86_avx512_psll_d_512;
1709 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
1710 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
1711 IsVariable ? Intrinsic::x86_avx512_psllv_q_512 :
1712 Intrinsic::x86_avx512_psll_q_512;
1713 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
1714 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
1715 : Intrinsic::x86_avx512_psll_w_512;
1717 llvm_unreachable("Unexpected size");
1720 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
1721 } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
1722 bool IsImmediate = Name[16] == 'i' ||
1723 (Name.size() > 18 && Name[18] == 'i');
1724 bool IsVariable = Name[16] == 'v';
1725 char Size = Name[16] == '.' ? Name[17] :
1726 Name[17] == '.' ? Name[18] :
1727 Name[18] == '.' ? Name[19] :
1731 if (IsVariable && Name[17] != '.') {
1732 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
1733 IID = Intrinsic::x86_avx2_psrlv_q;
1734 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
1735 IID = Intrinsic::x86_avx2_psrlv_q_256;
1736 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
1737 IID = Intrinsic::x86_avx2_psrlv_d;
1738 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
1739 IID = Intrinsic::x86_avx2_psrlv_d_256;
1740 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
1741 IID = Intrinsic::x86_avx512_psrlv_w_128;
1742 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
1743 IID = Intrinsic::x86_avx512_psrlv_w_256;
1744 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
1745 IID = Intrinsic::x86_avx512_psrlv_w_512;
1747 llvm_unreachable("Unexpected size");
1748 } else if (Name.endswith(".128")) {
1749 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
1750 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
1751 : Intrinsic::x86_sse2_psrl_d;
1752 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
1753 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
1754 : Intrinsic::x86_sse2_psrl_q;
1755 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
1756 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
1757 : Intrinsic::x86_sse2_psrl_w;
1759 llvm_unreachable("Unexpected size");
1760 } else if (Name.endswith(".256")) {
1761 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
1762 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
1763 : Intrinsic::x86_avx2_psrl_d;
1764 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
1765 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
1766 : Intrinsic::x86_avx2_psrl_q;
1767 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
1768 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
1769 : Intrinsic::x86_avx2_psrl_w;
1771 llvm_unreachable("Unexpected size");
1773 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
1774 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
1775 IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 :
1776 Intrinsic::x86_avx512_psrl_d_512;
1777 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
1778 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
1779 IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 :
1780 Intrinsic::x86_avx512_psrl_q_512;
1781 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
1782 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
1783 : Intrinsic::x86_avx512_psrl_w_512;
1785 llvm_unreachable("Unexpected size");
1788 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
1789 } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
1790 bool IsImmediate = Name[16] == 'i' ||
1791 (Name.size() > 18 && Name[18] == 'i');
1792 bool IsVariable = Name[16] == 'v';
1793 char Size = Name[16] == '.' ? Name[17] :
1794 Name[17] == '.' ? Name[18] :
1795 Name[18] == '.' ? Name[19] :
1799 if (IsVariable && Name[17] != '.') {
1800 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
1801 IID = Intrinsic::x86_avx2_psrav_d;
1802 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
1803 IID = Intrinsic::x86_avx2_psrav_d_256;
1804 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
1805 IID = Intrinsic::x86_avx512_psrav_w_128;
1806 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
1807 IID = Intrinsic::x86_avx512_psrav_w_256;
1808 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
1809 IID = Intrinsic::x86_avx512_psrav_w_512;
1811 llvm_unreachable("Unexpected size");
1812 } else if (Name.endswith(".128")) {
1813 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
1814 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
1815 : Intrinsic::x86_sse2_psra_d;
1816 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
1817 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
1818 IsVariable ? Intrinsic::x86_avx512_psrav_q_128 :
1819 Intrinsic::x86_avx512_psra_q_128;
1820 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
1821 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
1822 : Intrinsic::x86_sse2_psra_w;
1824 llvm_unreachable("Unexpected size");
1825 } else if (Name.endswith(".256")) {
1826 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
1827 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
1828 : Intrinsic::x86_avx2_psra_d;
1829 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
1830 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
1831 IsVariable ? Intrinsic::x86_avx512_psrav_q_256 :
1832 Intrinsic::x86_avx512_psra_q_256;
1833 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
1834 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
1835 : Intrinsic::x86_avx2_psra_w;
1837 llvm_unreachable("Unexpected size");
1839 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
1840 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
1841 IsVariable ? Intrinsic::x86_avx512_psrav_d_512 :
1842 Intrinsic::x86_avx512_psra_d_512;
1843 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
1844 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
1845 IsVariable ? Intrinsic::x86_avx512_psrav_q_512 :
1846 Intrinsic::x86_avx512_psra_q_512;
1847 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
1848 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
1849 : Intrinsic::x86_avx512_psra_w_512;
1851 llvm_unreachable("Unexpected size");
1854 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
1855 } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
1856 Rep = upgradeMaskedMove(Builder, *CI);
1857 } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
1858 Rep = UpgradeMaskToInt(Builder, *CI);
1859 } else if (IsX86 && Name.startswith("avx512.mask.vpermilvar.")) {
1861 if (Name.endswith("ps.128"))
1862 IID = Intrinsic::x86_avx_vpermilvar_ps;
1863 else if (Name.endswith("pd.128"))
1864 IID = Intrinsic::x86_avx_vpermilvar_pd;
1865 else if (Name.endswith("ps.256"))
1866 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
1867 else if (Name.endswith("pd.256"))
1868 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
1869 else if (Name.endswith("ps.512"))
1870 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
1871 else if (Name.endswith("pd.512"))
1872 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
1874 llvm_unreachable("Unexpected vpermilvar intrinsic");
1876 Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID);
1877 Rep = Builder.CreateCall(Intrin,
1878 { CI->getArgOperand(0), CI->getArgOperand(1) });
1879 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1880 CI->getArgOperand(2));
1881 } else if (IsX86 && Name.endswith(".movntdqa")) {
1882 Module *M = F->getParent();
1883 MDNode *Node = MDNode::get(
1884 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1886 Value *Ptr = CI->getArgOperand(0);
1887 VectorType *VTy = cast<VectorType>(CI->getType());
1889 // Convert the type of the pointer to a pointer to the stored type.
1891 Builder.CreateBitCast(Ptr, PointerType::getUnqual(VTy), "cast");
1892 LoadInst *LI = Builder.CreateAlignedLoad(BC, VTy->getBitWidth() / 8);
1893 LI->setMetadata(M->getMDKindID("nontemporal"), Node);
1895 } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
1896 Value *Arg = CI->getArgOperand(0);
1897 Value *Neg = Builder.CreateNeg(Arg, "neg");
1898 Value *Cmp = Builder.CreateICmpSGE(
1899 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
1900 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
1901 } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
1902 Name == "max.ui" || Name == "max.ull")) {
1903 Value *Arg0 = CI->getArgOperand(0);
1904 Value *Arg1 = CI->getArgOperand(1);
1905 Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
1906 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
1907 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
1908 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
1909 } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
1910 Name == "min.ui" || Name == "min.ull")) {
1911 Value *Arg0 = CI->getArgOperand(0);
1912 Value *Arg1 = CI->getArgOperand(1);
1913 Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
1914 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
1915 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
1916 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
1917 } else if (IsNVVM && Name == "clz.ll") {
1918 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
1919 Value *Arg = CI->getArgOperand(0);
1920 Value *Ctlz = Builder.CreateCall(
1921 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
1923 {Arg, Builder.getFalse()}, "ctlz");
1924 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
1925 } else if (IsNVVM && Name == "popc.ll") {
1926 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
1928 Value *Arg = CI->getArgOperand(0);
1929 Value *Popc = Builder.CreateCall(
1930 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
1933 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
1934 } else if (IsNVVM && Name == "h2f") {
1935 Rep = Builder.CreateCall(Intrinsic::getDeclaration(
1936 F->getParent(), Intrinsic::convert_from_fp16,
1937 {Builder.getFloatTy()}),
1938 CI->getArgOperand(0), "h2f");
1940 llvm_unreachable("Unknown function for CallInst upgrade.");
1944 CI->replaceAllUsesWith(Rep);
1945 CI->eraseFromParent();
1949 CallInst *NewCall = nullptr;
1950 switch (NewFn->getIntrinsicID()) {
1952 // Handle generic mangling change, but nothing else
1954 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
1955 "Unknown function for CallInst upgrade and isn't just a name change");
1956 CI->setCalledFunction(NewFn);
1960 case Intrinsic::arm_neon_vld1:
1961 case Intrinsic::arm_neon_vld2:
1962 case Intrinsic::arm_neon_vld3:
1963 case Intrinsic::arm_neon_vld4:
1964 case Intrinsic::arm_neon_vld2lane:
1965 case Intrinsic::arm_neon_vld3lane:
1966 case Intrinsic::arm_neon_vld4lane:
1967 case Intrinsic::arm_neon_vst1:
1968 case Intrinsic::arm_neon_vst2:
1969 case Intrinsic::arm_neon_vst3:
1970 case Intrinsic::arm_neon_vst4:
1971 case Intrinsic::arm_neon_vst2lane:
1972 case Intrinsic::arm_neon_vst3lane:
1973 case Intrinsic::arm_neon_vst4lane: {
1974 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
1975 CI->arg_operands().end());
1976 NewCall = Builder.CreateCall(NewFn, Args);
1980 case Intrinsic::bitreverse:
1981 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
1984 case Intrinsic::ctlz:
1985 case Intrinsic::cttz:
1986 assert(CI->getNumArgOperands() == 1 &&
1987 "Mismatch between function args and call args");
1989 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
1992 case Intrinsic::objectsize: {
1993 Value *NullIsUnknownSize = CI->getNumArgOperands() == 2
1994 ? Builder.getFalse()
1995 : CI->getArgOperand(2);
1996 NewCall = Builder.CreateCall(
1997 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize});
2001 case Intrinsic::ctpop:
2002 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
2005 case Intrinsic::convert_from_fp16:
2006 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
2009 case Intrinsic::x86_xop_vfrcz_ss:
2010 case Intrinsic::x86_xop_vfrcz_sd:
2011 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
2014 case Intrinsic::x86_xop_vpermil2pd:
2015 case Intrinsic::x86_xop_vpermil2ps:
2016 case Intrinsic::x86_xop_vpermil2pd_256:
2017 case Intrinsic::x86_xop_vpermil2ps_256: {
2018 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
2019 CI->arg_operands().end());
2020 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
2021 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
2022 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
2023 NewCall = Builder.CreateCall(NewFn, Args);
2027 case Intrinsic::x86_sse41_ptestc:
2028 case Intrinsic::x86_sse41_ptestz:
2029 case Intrinsic::x86_sse41_ptestnzc: {
2030 // The arguments for these intrinsics used to be v4f32, and changed
2031 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
2032 // So, the only thing required is a bitcast for both arguments.
2033 // First, check the arguments have the old type.
2034 Value *Arg0 = CI->getArgOperand(0);
2035 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
2038 // Old intrinsic, add bitcasts
2039 Value *Arg1 = CI->getArgOperand(1);
2041 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
2043 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
2044 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2046 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
2050 case Intrinsic::x86_sse41_insertps:
2051 case Intrinsic::x86_sse41_dppd:
2052 case Intrinsic::x86_sse41_dpps:
2053 case Intrinsic::x86_sse41_mpsadbw:
2054 case Intrinsic::x86_avx_dp_ps_256:
2055 case Intrinsic::x86_avx2_mpsadbw: {
2056 // Need to truncate the last argument from i32 to i8 -- this argument models
2057 // an inherently 8-bit immediate operand to these x86 instructions.
2058 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
2059 CI->arg_operands().end());
2061 // Replace the last argument with a trunc.
2062 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
2063 NewCall = Builder.CreateCall(NewFn, Args);
2067 case Intrinsic::thread_pointer: {
2068 NewCall = Builder.CreateCall(NewFn, {});
2072 case Intrinsic::invariant_start:
2073 case Intrinsic::invariant_end:
2074 case Intrinsic::masked_load:
2075 case Intrinsic::masked_store: {
2076 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
2077 CI->arg_operands().end());
2078 NewCall = Builder.CreateCall(NewFn, Args);
2082 assert(NewCall && "Should have either set this variable or returned through "
2083 "the default case");
2084 std::string Name = CI->getName();
2085 if (!Name.empty()) {
2086 CI->setName(Name + ".old");
2087 NewCall->setName(Name);
2089 CI->replaceAllUsesWith(NewCall);
2090 CI->eraseFromParent();
2093 void llvm::UpgradeCallsToIntrinsic(Function *F) {
2094 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
2096 // Check if this function should be upgraded and get the replacement function
2099 if (UpgradeIntrinsicFunction(F, NewFn)) {
2100 // Replace all users of the old function with the new function or new
2101 // instructions. This is not a range loop because the call is deleted.
2102 for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; )
2103 if (CallInst *CI = dyn_cast<CallInst>(*UI++))
2104 UpgradeIntrinsicCall(CI, NewFn);
2106 // Remove old function, no longer used, from the module.
2107 F->eraseFromParent();
2111 MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
2112 // Check if the tag uses struct-path aware TBAA format.
2113 if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
2116 auto &Context = MD.getContext();
2117 if (MD.getNumOperands() == 3) {
2118 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
2119 MDNode *ScalarType = MDNode::get(Context, Elts);
2120 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
2121 Metadata *Elts2[] = {ScalarType, ScalarType,
2122 ConstantAsMetadata::get(
2123 Constant::getNullValue(Type::getInt64Ty(Context))),
2125 return MDNode::get(Context, Elts2);
2127 // Create a MDNode <MD, MD, offset 0>
2128 Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
2129 Type::getInt64Ty(Context)))};
2130 return MDNode::get(Context, Elts);
2133 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
2134 Instruction *&Temp) {
2135 if (Opc != Instruction::BitCast)
2139 Type *SrcTy = V->getType();
2140 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
2141 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
2142 LLVMContext &Context = V->getContext();
2144 // We have no information about target data layout, so we assume that
2145 // the maximum pointer size is 64bit.
2146 Type *MidTy = Type::getInt64Ty(Context);
2147 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
2149 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
2155 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
2156 if (Opc != Instruction::BitCast)
2159 Type *SrcTy = C->getType();
2160 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
2161 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
2162 LLVMContext &Context = C->getContext();
2164 // We have no information about target data layout, so we assume that
2165 // the maximum pointer size is 64bit.
2166 Type *MidTy = Type::getInt64Ty(Context);
2168 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
2175 /// Check the debug info version number, if it is out-dated, drop the debug
2176 /// info. Return true if module is modified.
2177 bool llvm::UpgradeDebugInfo(Module &M) {
2178 unsigned Version = getDebugMetadataVersionFromModule(M);
2179 if (Version == DEBUG_METADATA_VERSION)
2182 bool RetCode = StripDebugInfo(M);
2184 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
2185 M.getContext().diagnose(DiagVersion);
2190 bool llvm::UpgradeModuleFlags(Module &M) {
2191 const NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
2195 bool HasObjCFlag = false, HasClassProperties = false;
2196 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
2197 MDNode *Op = ModFlags->getOperand(I);
2198 if (Op->getNumOperands() < 2)
2200 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
2203 if (ID->getString() == "Objective-C Image Info Version")
2205 if (ID->getString() == "Objective-C Class Properties")
2206 HasClassProperties = true;
2208 // "Objective-C Class Properties" is recently added for Objective-C. We
2209 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
2210 // flag of value 0, so we can correclty downgrade this flag when trying to
2211 // link an ObjC bitcode without this module flag with an ObjC bitcode with
2212 // this module flag.
2213 if (HasObjCFlag && !HasClassProperties) {
2214 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
2221 static bool isOldLoopArgument(Metadata *MD) {
2222 auto *T = dyn_cast_or_null<MDTuple>(MD);
2225 if (T->getNumOperands() < 1)
2227 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
2230 return S->getString().startswith("llvm.vectorizer.");
2233 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
2234 StringRef OldPrefix = "llvm.vectorizer.";
2235 assert(OldTag.startswith(OldPrefix) && "Expected old prefix");
2237 if (OldTag == "llvm.vectorizer.unroll")
2238 return MDString::get(C, "llvm.loop.interleave.count");
2240 return MDString::get(
2241 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
2245 static Metadata *upgradeLoopArgument(Metadata *MD) {
2246 auto *T = dyn_cast_or_null<MDTuple>(MD);
2249 if (T->getNumOperands() < 1)
2251 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
2254 if (!OldTag->getString().startswith("llvm.vectorizer."))
2257 // This has an old tag. Upgrade it.
2258 SmallVector<Metadata *, 8> Ops;
2259 Ops.reserve(T->getNumOperands());
2260 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
2261 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
2262 Ops.push_back(T->getOperand(I));
2264 return MDTuple::get(T->getContext(), Ops);
2267 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
2268 auto *T = dyn_cast<MDTuple>(&N);
2272 if (none_of(T->operands(), isOldLoopArgument))
2275 SmallVector<Metadata *, 8> Ops;
2276 Ops.reserve(T->getNumOperands());
2277 for (Metadata *MD : T->operands())
2278 Ops.push_back(upgradeLoopArgument(MD));
2280 return MDTuple::get(T->getContext(), Ops);