1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the auto-upgrade helper functions.
11 // This is where deprecated IR intrinsics and other IR features are updated to
12 // current specifications.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/IR/AutoUpgrade.h"
17 #include "llvm/IR/CFG.h"
18 #include "llvm/IR/CallSite.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/DIBuilder.h"
21 #include "llvm/IR/DebugInfo.h"
22 #include "llvm/IR/DiagnosticInfo.h"
23 #include "llvm/IR/Function.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/Instruction.h"
26 #include "llvm/IR/IntrinsicInst.h"
27 #include "llvm/IR/LLVMContext.h"
28 #include "llvm/IR/Module.h"
29 #include "llvm/Support/ErrorHandling.h"
30 #include "llvm/Support/Regex.h"
34 // Upgrade the declarations of the SSE4.1 functions whose arguments have
35 // changed their type from v4f32 to v2i64.
36 static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID,
38 // Check whether this is an old version of the function, which received
40 Type *Arg0Type = F->getFunctionType()->getParamType(0);
41 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
44 // Yes, it's old, replace it with new version.
45 F->setName(F->getName() + ".old");
46 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
50 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
51 // arguments have changed their type from i32 to i8.
52 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
54 // Check that the last argument is an i32.
55 Type *LastArgType = F->getFunctionType()->getParamType(
56 F->getFunctionType()->getNumParams() - 1);
57 if (!LastArgType->isIntegerTy(32))
60 // Move this function aside and map down.
61 F->setName(F->getName() + ".old");
62 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
66 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
67 assert(F && "Illegal to upgrade a non-existent Function.");
69 // Quickly eliminate it, if it's not a candidate.
70 StringRef Name = F->getName();
71 if (Name.size() <= 8 || !Name.startswith("llvm."))
73 Name = Name.substr(5); // Strip off "llvm."
78 if (Name.startswith("arm.neon.vclz")) {
80 F->arg_begin()->getType(),
81 Type::getInt1Ty(F->getContext())
83 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
84 // the end of the name. Change name from llvm.arm.neon.vclz.* to
86 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
87 NewFn = Function::Create(fType, F->getLinkage(),
88 "llvm.ctlz." + Name.substr(14), F->getParent());
91 if (Name.startswith("arm.neon.vcnt")) {
92 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
93 F->arg_begin()->getType());
96 Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
97 if (vldRegex.match(Name)) {
98 auto fArgs = F->getFunctionType()->params();
99 SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
100 // Can't use Intrinsic::getDeclaration here as the return types might
101 // then only be structurally equal.
102 FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
103 NewFn = Function::Create(fType, F->getLinkage(),
104 "llvm." + Name + ".p0i8", F->getParent());
107 Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
108 if (vstRegex.match(Name)) {
109 static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
110 Intrinsic::arm_neon_vst2,
111 Intrinsic::arm_neon_vst3,
112 Intrinsic::arm_neon_vst4};
114 static const Intrinsic::ID StoreLaneInts[] = {
115 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
116 Intrinsic::arm_neon_vst4lane
119 auto fArgs = F->getFunctionType()->params();
120 Type *Tys[] = {fArgs[0], fArgs[1]};
121 if (Name.find("lane") == StringRef::npos)
122 NewFn = Intrinsic::getDeclaration(F->getParent(),
123 StoreInts[fArgs.size() - 3], Tys);
125 NewFn = Intrinsic::getDeclaration(F->getParent(),
126 StoreLaneInts[fArgs.size() - 5], Tys);
129 if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
130 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
137 if (Name.startswith("ctlz.") && F->arg_size() == 1) {
138 F->setName(Name + ".old");
139 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
140 F->arg_begin()->getType());
143 if (Name.startswith("cttz.") && F->arg_size() == 1) {
144 F->setName(Name + ".old");
145 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
146 F->arg_begin()->getType());
153 if (Name.startswith("masked.load.")) {
154 Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
155 if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) {
156 F->setName(Name + ".old");
157 NewFn = Intrinsic::getDeclaration(F->getParent(),
158 Intrinsic::masked_load,
163 if (Name.startswith("masked.store.")) {
164 auto Args = F->getFunctionType()->params();
165 Type *Tys[] = { Args[0], Args[1] };
166 if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) {
167 F->setName(Name + ".old");
168 NewFn = Intrinsic::getDeclaration(F->getParent(),
169 Intrinsic::masked_store,
178 // We only need to change the name to match the mangling including the
180 if (F->arg_size() == 2 && Name.startswith("objectsize.")) {
181 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
182 if (F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
183 F->setName(Name + ".old");
184 NewFn = Intrinsic::getDeclaration(F->getParent(),
185 Intrinsic::objectsize, Tys);
192 if (Name == "stackprotectorcheck") {
198 bool IsX86 = Name.startswith("x86.");
200 Name = Name.substr(4);
203 (Name.startswith("sse2.pcmpeq.") ||
204 Name.startswith("sse2.pcmpgt.") ||
205 Name.startswith("avx2.pcmpeq.") ||
206 Name.startswith("avx2.pcmpgt.") ||
207 Name.startswith("avx512.mask.pcmpeq.") ||
208 Name.startswith("avx512.mask.pcmpgt.") ||
209 Name == "sse41.pmaxsb" ||
210 Name == "sse2.pmaxs.w" ||
211 Name == "sse41.pmaxsd" ||
212 Name == "sse2.pmaxu.b" ||
213 Name == "sse41.pmaxuw" ||
214 Name == "sse41.pmaxud" ||
215 Name == "sse41.pminsb" ||
216 Name == "sse2.pmins.w" ||
217 Name == "sse41.pminsd" ||
218 Name == "sse2.pminu.b" ||
219 Name == "sse41.pminuw" ||
220 Name == "sse41.pminud" ||
221 Name.startswith("avx2.pmax") ||
222 Name.startswith("avx2.pmin") ||
223 Name.startswith("avx2.vbroadcast") ||
224 Name.startswith("avx2.pbroadcast") ||
225 Name.startswith("avx.vpermil.") ||
226 Name.startswith("sse2.pshuf") ||
227 Name.startswith("avx512.pbroadcast") ||
228 Name.startswith("avx512.mask.broadcast.s") ||
229 Name.startswith("avx512.mask.movddup") ||
230 Name.startswith("avx512.mask.movshdup") ||
231 Name.startswith("avx512.mask.movsldup") ||
232 Name.startswith("avx512.mask.pshuf.d.") ||
233 Name.startswith("avx512.mask.pshufl.w.") ||
234 Name.startswith("avx512.mask.pshufh.w.") ||
235 Name.startswith("avx512.mask.vpermil.p") ||
236 Name.startswith("avx512.mask.perm.df.") ||
237 Name.startswith("avx512.mask.perm.di.") ||
238 Name.startswith("avx512.mask.punpckl") ||
239 Name.startswith("avx512.mask.punpckh") ||
240 Name.startswith("avx512.mask.unpckl.") ||
241 Name.startswith("avx512.mask.unpckh.") ||
242 Name.startswith("avx512.mask.pand.") ||
243 Name.startswith("avx512.mask.pandn.") ||
244 Name.startswith("avx512.mask.por.") ||
245 Name.startswith("avx512.mask.pxor.") ||
246 Name.startswith("sse41.pmovsx") ||
247 Name.startswith("sse41.pmovzx") ||
248 Name.startswith("avx2.pmovsx") ||
249 Name.startswith("avx2.pmovzx") ||
250 Name == "sse2.cvtdq2pd" ||
251 Name == "sse2.cvtps2pd" ||
252 Name == "avx.cvtdq2.pd.256" ||
253 Name == "avx.cvt.ps2.pd.256" ||
254 Name == "sse2.cvttps2dq" ||
255 Name.startswith("avx.cvtt.") ||
256 Name.startswith("avx.vinsertf128.") ||
257 Name == "avx2.vinserti128" ||
258 Name.startswith("avx.vextractf128.") ||
259 Name == "avx2.vextracti128" ||
260 Name.startswith("sse4a.movnt.") ||
261 Name.startswith("avx.movnt.") ||
262 Name.startswith("avx512.storent.") ||
263 Name == "sse2.storel.dq" ||
264 Name.startswith("sse.storeu.") ||
265 Name.startswith("sse2.storeu.") ||
266 Name.startswith("avx.storeu.") ||
267 Name.startswith("avx512.mask.storeu.p") ||
268 Name.startswith("avx512.mask.storeu.b.") ||
269 Name.startswith("avx512.mask.storeu.w.") ||
270 Name.startswith("avx512.mask.storeu.d.") ||
271 Name.startswith("avx512.mask.storeu.q.") ||
272 Name.startswith("avx512.mask.store.p") ||
273 Name.startswith("avx512.mask.store.b.") ||
274 Name.startswith("avx512.mask.store.w.") ||
275 Name.startswith("avx512.mask.store.d.") ||
276 Name.startswith("avx512.mask.store.q.") ||
277 Name.startswith("avx512.mask.loadu.p") ||
278 Name.startswith("avx512.mask.loadu.b.") ||
279 Name.startswith("avx512.mask.loadu.w.") ||
280 Name.startswith("avx512.mask.loadu.d.") ||
281 Name.startswith("avx512.mask.loadu.q.") ||
282 Name.startswith("avx512.mask.load.p") ||
283 Name.startswith("avx512.mask.load.b.") ||
284 Name.startswith("avx512.mask.load.w.") ||
285 Name.startswith("avx512.mask.load.d.") ||
286 Name.startswith("avx512.mask.load.q.") ||
287 Name == "sse42.crc32.64.8" ||
288 Name.startswith("avx.vbroadcast.s") ||
289 Name.startswith("avx512.mask.palignr.") ||
290 Name.startswith("sse2.psll.dq") ||
291 Name.startswith("sse2.psrl.dq") ||
292 Name.startswith("avx2.psll.dq") ||
293 Name.startswith("avx2.psrl.dq") ||
294 Name.startswith("avx512.psll.dq") ||
295 Name.startswith("avx512.psrl.dq") ||
296 Name == "sse41.pblendw" ||
297 Name.startswith("sse41.blendp") ||
298 Name.startswith("avx.blend.p") ||
299 Name == "avx2.pblendw" ||
300 Name.startswith("avx2.pblendd.") ||
301 Name == "avx2.vbroadcasti128" ||
302 Name == "xop.vpcmov" ||
303 (Name.startswith("xop.vpcom") && F->arg_size() == 2))) {
307 // SSE4.1 ptest functions may have an old signature.
308 if (IsX86 && Name.startswith("sse41.ptest")) {
309 if (Name.substr(11) == "c")
310 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn);
311 if (Name.substr(11) == "z")
312 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestz, NewFn);
313 if (Name.substr(11) == "nzc")
314 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
316 // Several blend and other instructions with masks used the wrong number of
318 if (IsX86 && Name == "sse41.insertps")
319 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
321 if (IsX86 && Name == "sse41.dppd")
322 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
324 if (IsX86 && Name == "sse41.dpps")
325 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
327 if (IsX86 && Name == "sse41.mpsadbw")
328 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
330 if (IsX86 && Name == "avx.dp.ps.256")
331 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
333 if (IsX86 && Name == "avx2.mpsadbw")
334 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
337 // frcz.ss/sd may need to have an argument dropped
338 if (IsX86 && Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
339 F->setName(Name + ".old");
340 NewFn = Intrinsic::getDeclaration(F->getParent(),
341 Intrinsic::x86_xop_vfrcz_ss);
344 if (IsX86 && Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
345 F->setName(Name + ".old");
346 NewFn = Intrinsic::getDeclaration(F->getParent(),
347 Intrinsic::x86_xop_vfrcz_sd);
350 if (IsX86 && (Name.startswith("avx512.mask.pslli.") ||
351 Name.startswith("avx512.mask.psrai.") ||
352 Name.startswith("avx512.mask.psrli."))) {
353 Intrinsic::ID ShiftID;
354 if (Name.slice(12, 16) == "psll")
355 ShiftID = Name[18] == 'd' ? Intrinsic::x86_avx512_mask_psll_di_512
356 : Intrinsic::x86_avx512_mask_psll_qi_512;
357 else if (Name.slice(12, 16) == "psra")
358 ShiftID = Name[18] == 'd' ? Intrinsic::x86_avx512_mask_psra_di_512
359 : Intrinsic::x86_avx512_mask_psra_qi_512;
361 ShiftID = Name[18] == 'd' ? Intrinsic::x86_avx512_mask_psrl_di_512
362 : Intrinsic::x86_avx512_mask_psrl_qi_512;
363 F->setName("llvm.x86." + Name + ".old");
364 NewFn = Intrinsic::getDeclaration(F->getParent(), ShiftID);
367 // Fix the FMA4 intrinsics to remove the 4
368 if (IsX86 && Name.startswith("fma4.")) {
369 F->setName("llvm.x86.fma" + Name.substr(5));
373 // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
374 if (IsX86 && Name.startswith("xop.vpermil2")) {
375 auto Params = F->getFunctionType()->params();
376 auto Idx = Params[2];
377 if (Idx->getScalarType()->isFloatingPointTy()) {
378 F->setName("llvm.x86." + Name + ".old");
379 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
380 unsigned EltSize = Idx->getScalarSizeInBits();
381 Intrinsic::ID Permil2ID;
382 if (EltSize == 64 && IdxSize == 128)
383 Permil2ID = Intrinsic::x86_xop_vpermil2pd;
384 else if (EltSize == 32 && IdxSize == 128)
385 Permil2ID = Intrinsic::x86_xop_vpermil2ps;
386 else if (EltSize == 64 && IdxSize == 256)
387 Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
389 Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
390 NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
398 // This may not belong here. This function is effectively being overloaded
399 // to both detect an intrinsic which needs upgrading, and to provide the
400 // upgraded form of the intrinsic. We should perhaps have two separate
401 // functions for this.
405 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
407 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
408 assert(F != NewFn && "Intrinsic function upgraded to the same function");
410 // Upgrade intrinsic attributes. This does not change the function.
413 if (Intrinsic::ID id = F->getIntrinsicID())
414 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
418 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
419 // Nothing to do yet.
423 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
425 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
426 Value *Op, unsigned Shift) {
427 Type *ResultTy = Op->getType();
428 unsigned NumElts = ResultTy->getVectorNumElements() * 8;
430 // Bitcast from a 64-bit element type to a byte element type.
431 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
432 Op = Builder.CreateBitCast(Op, VecTy, "cast");
434 // We'll be shuffling in zeroes.
435 Value *Res = Constant::getNullValue(VecTy);
437 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
438 // we'll just return the zero vector.
441 // 256/512-bit version is split into 2/4 16-byte lanes.
442 for (unsigned l = 0; l != NumElts; l += 16)
443 for (unsigned i = 0; i != 16; ++i) {
444 unsigned Idx = NumElts + i - Shift;
446 Idx -= NumElts - 16; // end of lane, switch operand.
447 Idxs[l + i] = Idx + l;
450 Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
453 // Bitcast back to a 64-bit element type.
454 return Builder.CreateBitCast(Res, ResultTy, "cast");
457 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
459 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
461 Type *ResultTy = Op->getType();
462 unsigned NumElts = ResultTy->getVectorNumElements() * 8;
464 // Bitcast from a 64-bit element type to a byte element type.
465 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
466 Op = Builder.CreateBitCast(Op, VecTy, "cast");
468 // We'll be shuffling in zeroes.
469 Value *Res = Constant::getNullValue(VecTy);
471 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
472 // we'll just return the zero vector.
475 // 256/512-bit version is split into 2/4 16-byte lanes.
476 for (unsigned l = 0; l != NumElts; l += 16)
477 for (unsigned i = 0; i != 16; ++i) {
478 unsigned Idx = i + Shift;
480 Idx += NumElts - 16; // end of lane, switch operand.
481 Idxs[l + i] = Idx + l;
484 Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
487 // Bitcast back to a 64-bit element type.
488 return Builder.CreateBitCast(Res, ResultTy, "cast");
491 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
493 llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(),
494 cast<IntegerType>(Mask->getType())->getBitWidth());
495 Mask = Builder.CreateBitCast(Mask, MaskTy);
497 // If we have less than 8 elements, then the starting mask was an i8 and
498 // we need to extract down to the right number of elements.
501 for (unsigned i = 0; i != NumElts; ++i)
503 Mask = Builder.CreateShuffleVector(Mask, Mask,
504 makeArrayRef(Indices, NumElts),
511 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
512 Value *Op0, Value *Op1) {
513 // If the mask is all ones just emit the align operation.
514 if (const auto *C = dyn_cast<Constant>(Mask))
515 if (C->isAllOnesValue())
518 Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements());
519 return Builder.CreateSelect(Mask, Op0, Op1);
522 static Value *UpgradeX86PALIGNRIntrinsics(IRBuilder<> &Builder,
523 Value *Op0, Value *Op1, Value *Shift,
524 Value *Passthru, Value *Mask) {
525 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
527 unsigned NumElts = Op0->getType()->getVectorNumElements();
528 assert(NumElts % 16 == 0);
530 // If palignr is shifting the pair of vectors more than the size of two
533 return llvm::Constant::getNullValue(Op0->getType());
535 // If palignr is shifting the pair of input vectors more than one lane,
536 // but less than two lanes, convert to shifting in zeroes.
540 Op0 = llvm::Constant::getNullValue(Op0->getType());
543 uint32_t Indices[64];
544 // 256-bit palignr operates on 128-bit lanes so we need to handle that
545 for (unsigned l = 0; l != NumElts; l += 16) {
546 for (unsigned i = 0; i != 16; ++i) {
547 unsigned Idx = ShiftVal + i;
549 Idx += NumElts - 16; // End of lane, switch operand.
550 Indices[l + i] = Idx + l;
554 Value *Align = Builder.CreateShuffleVector(Op1, Op0,
555 makeArrayRef(Indices, NumElts),
558 return EmitX86Select(Builder, Mask, Align, Passthru);
561 static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
562 Value *Ptr, Value *Data, Value *Mask,
564 // Cast the pointer to the right type.
565 Ptr = Builder.CreateBitCast(Ptr,
566 llvm::PointerType::getUnqual(Data->getType()));
568 Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1;
570 // If the mask is all ones just emit a regular store.
571 if (const auto *C = dyn_cast<Constant>(Mask))
572 if (C->isAllOnesValue())
573 return Builder.CreateAlignedStore(Data, Ptr, Align);
575 // Convert the mask from an integer type to a vector of i1.
576 unsigned NumElts = Data->getType()->getVectorNumElements();
577 Mask = getX86MaskVec(Builder, Mask, NumElts);
578 return Builder.CreateMaskedStore(Data, Ptr, Align, Mask);
581 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
582 Value *Ptr, Value *Passthru, Value *Mask,
584 // Cast the pointer to the right type.
585 Ptr = Builder.CreateBitCast(Ptr,
586 llvm::PointerType::getUnqual(Passthru->getType()));
588 Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1;
590 // If the mask is all ones just emit a regular store.
591 if (const auto *C = dyn_cast<Constant>(Mask))
592 if (C->isAllOnesValue())
593 return Builder.CreateAlignedLoad(Ptr, Align);
595 // Convert the mask from an integer type to a vector of i1.
596 unsigned NumElts = Passthru->getType()->getVectorNumElements();
597 Mask = getX86MaskVec(Builder, Mask, NumElts);
598 return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru);
601 static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
602 ICmpInst::Predicate Pred) {
603 Value *Op0 = CI.getArgOperand(0);
604 Value *Op1 = CI.getArgOperand(1);
605 Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1);
606 return Builder.CreateSelect(Cmp, Op0, Op1);
609 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
610 ICmpInst::Predicate Pred) {
611 Value *Op0 = CI.getArgOperand(0);
612 unsigned NumElts = Op0->getType()->getVectorNumElements();
613 Value *Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
615 Value *Mask = CI.getArgOperand(2);
616 const auto *C = dyn_cast<Constant>(Mask);
617 if (!C || !C->isAllOnesValue())
618 Cmp = Builder.CreateAnd(Cmp, getX86MaskVec(Builder, Mask, NumElts));
622 for (unsigned i = 0; i != NumElts; ++i)
624 for (unsigned i = NumElts; i != 8; ++i)
625 Indices[i] = NumElts + i % NumElts;
626 Cmp = Builder.CreateShuffleVector(Cmp,
627 Constant::getNullValue(Cmp->getType()),
630 return Builder.CreateBitCast(Cmp, IntegerType::get(CI.getContext(),
631 std::max(NumElts, 8U)));
634 /// Upgrade a call to an old intrinsic. All argument and return casting must be
635 /// provided to seamlessly integrate with existing context.
636 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
637 Function *F = CI->getCalledFunction();
638 LLVMContext &C = CI->getContext();
639 IRBuilder<> Builder(C);
640 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
642 assert(F && "Intrinsic call is not direct?");
645 // Get the Function's name.
646 StringRef Name = F->getName();
648 assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
649 Name = Name.substr(5);
651 bool IsX86 = Name.startswith("x86.");
653 Name = Name.substr(4);
656 // Upgrade packed integer vector compare intrinsics to compare instructions.
657 if (IsX86 && (Name.startswith("sse2.pcmpeq.") ||
658 Name.startswith("avx2.pcmpeq."))) {
659 Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1),
661 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
662 } else if (IsX86 && (Name.startswith("sse2.pcmpgt.") ||
663 Name.startswith("avx2.pcmpgt."))) {
664 Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1),
666 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
667 } else if (IsX86 && Name.startswith("avx512.mask.pcmpeq.")) {
668 Rep = upgradeMaskedCompare(Builder, *CI, ICmpInst::ICMP_EQ);
669 } else if (IsX86 && Name.startswith("avx512.mask.pcmpgt.")) {
670 Rep = upgradeMaskedCompare(Builder, *CI, ICmpInst::ICMP_SGT);
671 } else if (IsX86 && (Name == "sse41.pmaxsb" ||
672 Name == "sse2.pmaxs.w" ||
673 Name == "sse41.pmaxsd" ||
674 Name.startswith("avx2.pmaxs"))) {
675 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
676 } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
677 Name == "sse41.pmaxuw" ||
678 Name == "sse41.pmaxud" ||
679 Name.startswith("avx2.pmaxu"))) {
680 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
681 } else if (IsX86 && (Name == "sse41.pminsb" ||
682 Name == "sse2.pmins.w" ||
683 Name == "sse41.pminsd" ||
684 Name.startswith("avx2.pmins"))) {
685 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
686 } else if (IsX86 && (Name == "sse2.pminu.b" ||
687 Name == "sse41.pminuw" ||
688 Name == "sse41.pminud" ||
689 Name.startswith("avx2.pminu"))) {
690 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
691 } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
692 Name == "sse2.cvtps2pd" ||
693 Name == "avx.cvtdq2.pd.256" ||
694 Name == "avx.cvt.ps2.pd.256")) {
695 // Lossless i32/float to double conversion.
696 // Extract the bottom elements if necessary and convert to double vector.
697 Value *Src = CI->getArgOperand(0);
698 VectorType *SrcTy = cast<VectorType>(Src->getType());
699 VectorType *DstTy = cast<VectorType>(CI->getType());
700 Rep = CI->getArgOperand(0);
702 unsigned NumDstElts = DstTy->getNumElements();
703 if (NumDstElts < SrcTy->getNumElements()) {
704 assert(NumDstElts == 2 && "Unexpected vector size");
705 uint32_t ShuffleMask[2] = { 0, 1 };
706 Rep = Builder.CreateShuffleVector(Rep, UndefValue::get(SrcTy),
710 bool Int2Double = (StringRef::npos != Name.find("cvtdq2"));
712 Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd");
714 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
715 } else if (IsX86 && (Name == "sse2.cvttps2dq" ||
716 Name.startswith("avx.cvtt."))) {
717 // Truncation (round to zero) float/double to i32 vector conversion.
718 Value *Src = CI->getArgOperand(0);
719 VectorType *DstTy = cast<VectorType>(CI->getType());
720 Rep = Builder.CreateFPToSI(Src, DstTy, "cvtt");
721 } else if (IsX86 && Name.startswith("sse4a.movnt.")) {
722 Module *M = F->getParent();
723 SmallVector<Metadata *, 1> Elts;
725 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
726 MDNode *Node = MDNode::get(C, Elts);
728 Value *Arg0 = CI->getArgOperand(0);
729 Value *Arg1 = CI->getArgOperand(1);
731 // Nontemporal (unaligned) store of the 0'th element of the float/double
733 Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
734 PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
735 Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
737 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
739 StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, 1);
740 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
743 CI->eraseFromParent();
745 } else if (IsX86 && (Name.startswith("avx.movnt.") ||
746 Name.startswith("avx512.storent."))) {
747 Module *M = F->getParent();
748 SmallVector<Metadata *, 1> Elts;
750 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
751 MDNode *Node = MDNode::get(C, Elts);
753 Value *Arg0 = CI->getArgOperand(0);
754 Value *Arg1 = CI->getArgOperand(1);
756 // Convert the type of the pointer to a pointer to the stored type.
757 Value *BC = Builder.CreateBitCast(Arg0,
758 PointerType::getUnqual(Arg1->getType()),
760 VectorType *VTy = cast<VectorType>(Arg1->getType());
761 StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC,
762 VTy->getBitWidth() / 8);
763 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
766 CI->eraseFromParent();
768 } else if (IsX86 && Name == "sse2.storel.dq") {
769 Value *Arg0 = CI->getArgOperand(0);
770 Value *Arg1 = CI->getArgOperand(1);
772 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
773 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
774 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
775 Value *BC = Builder.CreateBitCast(Arg0,
776 PointerType::getUnqual(Elt->getType()),
778 Builder.CreateAlignedStore(Elt, BC, 1);
781 CI->eraseFromParent();
783 } else if (IsX86 && (Name.startswith("sse.storeu.") ||
784 Name.startswith("sse2.storeu.") ||
785 Name.startswith("avx.storeu."))) {
786 Value *Arg0 = CI->getArgOperand(0);
787 Value *Arg1 = CI->getArgOperand(1);
789 Arg0 = Builder.CreateBitCast(Arg0,
790 PointerType::getUnqual(Arg1->getType()),
792 Builder.CreateAlignedStore(Arg1, Arg0, 1);
795 CI->eraseFromParent();
797 } else if (IsX86 && (Name.startswith("avx512.mask.storeu.p") ||
798 Name.startswith("avx512.mask.storeu.b.") ||
799 Name.startswith("avx512.mask.storeu.w.") ||
800 Name.startswith("avx512.mask.storeu.d.") ||
801 Name.startswith("avx512.mask.storeu.q."))) {
802 UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
803 CI->getArgOperand(2), /*Aligned*/false);
806 CI->eraseFromParent();
808 } else if (IsX86 && (Name.startswith("avx512.mask.store.p") ||
809 Name.startswith("avx512.mask.store.b.") ||
810 Name.startswith("avx512.mask.store.w.") ||
811 Name.startswith("avx512.mask.store.d.") ||
812 Name.startswith("avx512.mask.store.q."))) {
813 UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
814 CI->getArgOperand(2), /*Aligned*/true);
817 CI->eraseFromParent();
819 } else if (IsX86 && (Name.startswith("avx512.mask.loadu.p") ||
820 Name.startswith("avx512.mask.loadu.b.") ||
821 Name.startswith("avx512.mask.loadu.w.") ||
822 Name.startswith("avx512.mask.loadu.d.") ||
823 Name.startswith("avx512.mask.loadu.q."))) {
824 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
825 CI->getArgOperand(1), CI->getArgOperand(2),
827 } else if (IsX86 && (Name.startswith("avx512.mask.load.p") ||
828 Name.startswith("avx512.mask.load.b.") ||
829 Name.startswith("avx512.mask.load.w.") ||
830 Name.startswith("avx512.mask.load.d.") ||
831 Name.startswith("avx512.mask.load.q."))) {
832 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
833 CI->getArgOperand(1),CI->getArgOperand(2),
835 } else if (IsX86 && Name.startswith("xop.vpcom")) {
837 if (Name.endswith("ub"))
838 intID = Intrinsic::x86_xop_vpcomub;
839 else if (Name.endswith("uw"))
840 intID = Intrinsic::x86_xop_vpcomuw;
841 else if (Name.endswith("ud"))
842 intID = Intrinsic::x86_xop_vpcomud;
843 else if (Name.endswith("uq"))
844 intID = Intrinsic::x86_xop_vpcomuq;
845 else if (Name.endswith("b"))
846 intID = Intrinsic::x86_xop_vpcomb;
847 else if (Name.endswith("w"))
848 intID = Intrinsic::x86_xop_vpcomw;
849 else if (Name.endswith("d"))
850 intID = Intrinsic::x86_xop_vpcomd;
851 else if (Name.endswith("q"))
852 intID = Intrinsic::x86_xop_vpcomq;
854 llvm_unreachable("Unknown suffix");
856 Name = Name.substr(9); // strip off "xop.vpcom"
858 if (Name.startswith("lt"))
860 else if (Name.startswith("le"))
862 else if (Name.startswith("gt"))
864 else if (Name.startswith("ge"))
866 else if (Name.startswith("eq"))
868 else if (Name.startswith("ne"))
870 else if (Name.startswith("false"))
872 else if (Name.startswith("true"))
875 llvm_unreachable("Unknown condition");
877 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
879 Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
880 Builder.getInt8(Imm)});
881 } else if (IsX86 && Name == "xop.vpcmov") {
882 Value *Arg0 = CI->getArgOperand(0);
883 Value *Arg1 = CI->getArgOperand(1);
884 Value *Sel = CI->getArgOperand(2);
885 unsigned NumElts = CI->getType()->getVectorNumElements();
886 Constant *MinusOne = ConstantVector::getSplat(NumElts, Builder.getInt64(-1));
887 Value *NotSel = Builder.CreateXor(Sel, MinusOne);
888 Value *Sel0 = Builder.CreateAnd(Arg0, Sel);
889 Value *Sel1 = Builder.CreateAnd(Arg1, NotSel);
890 Rep = Builder.CreateOr(Sel0, Sel1);
891 } else if (IsX86 && Name == "sse42.crc32.64.8") {
892 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
893 Intrinsic::x86_sse42_crc32_32_8);
894 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
895 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
896 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
897 } else if (IsX86 && Name.startswith("avx.vbroadcast")) {
898 // Replace broadcasts with a series of insertelements.
899 Type *VecTy = CI->getType();
900 Type *EltTy = VecTy->getVectorElementType();
901 unsigned EltNum = VecTy->getVectorNumElements();
902 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
903 EltTy->getPointerTo());
904 Value *Load = Builder.CreateLoad(EltTy, Cast);
905 Type *I32Ty = Type::getInt32Ty(C);
906 Rep = UndefValue::get(VecTy);
907 for (unsigned I = 0; I < EltNum; ++I)
908 Rep = Builder.CreateInsertElement(Rep, Load,
909 ConstantInt::get(I32Ty, I));
910 } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
911 Name.startswith("sse41.pmovzx") ||
912 Name.startswith("avx2.pmovsx") ||
913 Name.startswith("avx2.pmovzx"))) {
914 VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
915 VectorType *DstTy = cast<VectorType>(CI->getType());
916 unsigned NumDstElts = DstTy->getNumElements();
918 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
919 SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
920 for (unsigned i = 0; i != NumDstElts; ++i)
923 Value *SV = Builder.CreateShuffleVector(
924 CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
926 bool DoSext = (StringRef::npos != Name.find("pmovsx"));
927 Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
928 : Builder.CreateZExt(SV, DstTy);
929 } else if (IsX86 && Name == "avx2.vbroadcasti128") {
930 // Replace vbroadcasts with a vector shuffle.
931 Type *VT = VectorType::get(Type::getInt64Ty(C), 2);
932 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
933 PointerType::getUnqual(VT));
934 Value *Load = Builder.CreateLoad(VT, Op);
935 uint32_t Idxs[4] = { 0, 1, 0, 1 };
936 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
938 } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
939 Name.startswith("avx2.vbroadcast") ||
940 Name.startswith("avx512.pbroadcast") ||
941 Name.startswith("avx512.mask.broadcast.s"))) {
942 // Replace vp?broadcasts with a vector shuffle.
943 Value *Op = CI->getArgOperand(0);
944 unsigned NumElts = CI->getType()->getVectorNumElements();
945 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
946 Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
947 Constant::getNullValue(MaskTy));
949 if (CI->getNumArgOperands() == 3)
950 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
951 CI->getArgOperand(1));
952 } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
953 Rep = UpgradeX86PALIGNRIntrinsics(Builder, CI->getArgOperand(0),
954 CI->getArgOperand(1),
955 CI->getArgOperand(2),
956 CI->getArgOperand(3),
957 CI->getArgOperand(4));
958 } else if (IsX86 && (Name == "sse2.psll.dq" ||
959 Name == "avx2.psll.dq")) {
960 // 128/256-bit shift left specified in bits.
961 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
962 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
963 Shift / 8); // Shift is in bits.
964 } else if (IsX86 && (Name == "sse2.psrl.dq" ||
965 Name == "avx2.psrl.dq")) {
966 // 128/256-bit shift right specified in bits.
967 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
968 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
969 Shift / 8); // Shift is in bits.
970 } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
971 Name == "avx2.psll.dq.bs" ||
972 Name == "avx512.psll.dq.512")) {
973 // 128/256/512-bit shift left specified in bytes.
974 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
975 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
976 } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
977 Name == "avx2.psrl.dq.bs" ||
978 Name == "avx512.psrl.dq.512")) {
979 // 128/256/512-bit shift right specified in bytes.
980 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
981 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
982 } else if (IsX86 && (Name == "sse41.pblendw" ||
983 Name.startswith("sse41.blendp") ||
984 Name.startswith("avx.blend.p") ||
985 Name == "avx2.pblendw" ||
986 Name.startswith("avx2.pblendd."))) {
987 Value *Op0 = CI->getArgOperand(0);
988 Value *Op1 = CI->getArgOperand(1);
989 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
990 VectorType *VecTy = cast<VectorType>(CI->getType());
991 unsigned NumElts = VecTy->getNumElements();
993 SmallVector<uint32_t, 16> Idxs(NumElts);
994 for (unsigned i = 0; i != NumElts; ++i)
995 Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
997 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
998 } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
999 Name == "avx2.vinserti128")) {
1000 Value *Op0 = CI->getArgOperand(0);
1001 Value *Op1 = CI->getArgOperand(1);
1002 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1003 VectorType *VecTy = cast<VectorType>(CI->getType());
1004 unsigned NumElts = VecTy->getNumElements();
1006 // Mask off the high bits of the immediate value; hardware ignores those.
1009 // Extend the second operand into a vector that is twice as big.
1010 Value *UndefV = UndefValue::get(Op1->getType());
1011 SmallVector<uint32_t, 8> Idxs(NumElts);
1012 for (unsigned i = 0; i != NumElts; ++i)
1014 Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);
1016 // Insert the second operand into the first operand.
1018 // Note that there is no guarantee that instruction lowering will actually
1019 // produce a vinsertf128 instruction for the created shuffles. In
1020 // particular, the 0 immediate case involves no lane changes, so it can
1021 // be handled as a blend.
1023 // Example of shuffle mask for 32-bit elements:
1024 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
1025 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
1027 // The low half of the result is either the low half of the 1st operand
1028 // or the low half of the 2nd operand (the inserted vector).
1029 for (unsigned i = 0; i != NumElts / 2; ++i)
1030 Idxs[i] = Imm ? i : (i + NumElts);
1031 // The high half of the result is either the low half of the 2nd operand
1032 // (the inserted vector) or the high half of the 1st operand.
1033 for (unsigned i = NumElts / 2; i != NumElts; ++i)
1034 Idxs[i] = Imm ? (i + NumElts / 2) : i;
1035 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
1036 } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
1037 Name == "avx2.vextracti128")) {
1038 Value *Op0 = CI->getArgOperand(0);
1039 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1040 VectorType *VecTy = cast<VectorType>(CI->getType());
1041 unsigned NumElts = VecTy->getNumElements();
1043 // Mask off the high bits of the immediate value; hardware ignores those.
1046 // Get indexes for either the high half or low half of the input vector.
1047 SmallVector<uint32_t, 4> Idxs(NumElts);
1048 for (unsigned i = 0; i != NumElts; ++i) {
1049 Idxs[i] = Imm ? (i + NumElts) : i;
1052 Value *UndefV = UndefValue::get(Op0->getType());
1053 Rep = Builder.CreateShuffleVector(Op0, UndefV, Idxs);
1054 } else if (!IsX86 && Name == "stackprotectorcheck") {
1056 } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
1057 Name.startswith("avx512.mask.perm.di."))) {
1058 Value *Op0 = CI->getArgOperand(0);
1059 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1060 VectorType *VecTy = cast<VectorType>(CI->getType());
1061 unsigned NumElts = VecTy->getNumElements();
1063 SmallVector<uint32_t, 8> Idxs(NumElts);
1064 for (unsigned i = 0; i != NumElts; ++i)
1065 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
1067 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1069 if (CI->getNumArgOperands() == 4)
1070 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1071 CI->getArgOperand(2));
1072 } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
1073 Name == "sse2.pshuf.d" ||
1074 Name.startswith("avx512.mask.vpermil.p") ||
1075 Name.startswith("avx512.mask.pshuf.d."))) {
1076 Value *Op0 = CI->getArgOperand(0);
1077 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1078 VectorType *VecTy = cast<VectorType>(CI->getType());
1079 unsigned NumElts = VecTy->getNumElements();
1080 // Calculate the size of each index in the immediate.
1081 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
1082 unsigned IdxMask = ((1 << IdxSize) - 1);
1084 SmallVector<uint32_t, 8> Idxs(NumElts);
1085 // Lookup the bits for this element, wrapping around the immediate every
1086 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
1087 // to offset by the first index of each group.
1088 for (unsigned i = 0; i != NumElts; ++i)
1089 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
1091 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1093 if (CI->getNumArgOperands() == 4)
1094 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1095 CI->getArgOperand(2));
1096 } else if (IsX86 && (Name == "sse2.pshufl.w" ||
1097 Name.startswith("avx512.mask.pshufl.w."))) {
1098 Value *Op0 = CI->getArgOperand(0);
1099 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1100 unsigned NumElts = CI->getType()->getVectorNumElements();
1102 SmallVector<uint32_t, 16> Idxs(NumElts);
1103 for (unsigned l = 0; l != NumElts; l += 8) {
1104 for (unsigned i = 0; i != 4; ++i)
1105 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
1106 for (unsigned i = 4; i != 8; ++i)
1107 Idxs[i + l] = i + l;
1110 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1112 if (CI->getNumArgOperands() == 4)
1113 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1114 CI->getArgOperand(2));
1115 } else if (IsX86 && (Name == "sse2.pshufh.w" ||
1116 Name.startswith("avx512.mask.pshufh.w."))) {
1117 Value *Op0 = CI->getArgOperand(0);
1118 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1119 unsigned NumElts = CI->getType()->getVectorNumElements();
1121 SmallVector<uint32_t, 16> Idxs(NumElts);
1122 for (unsigned l = 0; l != NumElts; l += 8) {
1123 for (unsigned i = 0; i != 4; ++i)
1124 Idxs[i + l] = i + l;
1125 for (unsigned i = 0; i != 4; ++i)
1126 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
1129 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1131 if (CI->getNumArgOperands() == 4)
1132 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1133 CI->getArgOperand(2));
1134 } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
1135 Name.startswith("avx512.mask.movshdup") ||
1136 Name.startswith("avx512.mask.movsldup"))) {
1137 Value *Op0 = CI->getArgOperand(0);
1138 unsigned NumElts = CI->getType()->getVectorNumElements();
1139 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1141 unsigned Offset = 0;
1142 if (Name.startswith("avx512.mask.movshdup."))
1145 SmallVector<uint32_t, 16> Idxs(NumElts);
1146 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
1147 for (unsigned i = 0; i != NumLaneElts; i += 2) {
1148 Idxs[i + l + 0] = i + l + Offset;
1149 Idxs[i + l + 1] = i + l + Offset;
1152 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1154 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1155 CI->getArgOperand(1));
1156 } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
1157 Name.startswith("avx512.mask.unpckl."))) {
1158 Value *Op0 = CI->getArgOperand(0);
1159 Value *Op1 = CI->getArgOperand(1);
1160 int NumElts = CI->getType()->getVectorNumElements();
1161 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1163 SmallVector<uint32_t, 64> Idxs(NumElts);
1164 for (int l = 0; l != NumElts; l += NumLaneElts)
1165 for (int i = 0; i != NumLaneElts; ++i)
1166 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
1168 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1170 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1171 CI->getArgOperand(2));
1172 } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
1173 Name.startswith("avx512.mask.unpckh."))) {
1174 Value *Op0 = CI->getArgOperand(0);
1175 Value *Op1 = CI->getArgOperand(1);
1176 int NumElts = CI->getType()->getVectorNumElements();
1177 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1179 SmallVector<uint32_t, 64> Idxs(NumElts);
1180 for (int l = 0; l != NumElts; l += NumLaneElts)
1181 for (int i = 0; i != NumLaneElts; ++i)
1182 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
1184 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1186 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1187 CI->getArgOperand(2));
1188 } else if (IsX86 && Name.startswith("avx512.mask.pand.")) {
1189 Rep = Builder.CreateAnd(CI->getArgOperand(0), CI->getArgOperand(1));
1190 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1191 CI->getArgOperand(2));
1192 } else if (IsX86 && Name.startswith("avx512.mask.pandn.")) {
1193 Rep = Builder.CreateAnd(Builder.CreateNot(CI->getArgOperand(0)),
1194 CI->getArgOperand(1));
1195 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1196 CI->getArgOperand(2));
1197 } else if (IsX86 && Name.startswith("avx512.mask.por.")) {
1198 Rep = Builder.CreateOr(CI->getArgOperand(0), CI->getArgOperand(1));
1199 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1200 CI->getArgOperand(2));
1201 } else if (IsX86 && Name.startswith("avx512.mask.pxor.")) {
1202 Rep = Builder.CreateXor(CI->getArgOperand(0), CI->getArgOperand(1));
1203 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1204 CI->getArgOperand(2));
1206 llvm_unreachable("Unknown function for CallInst upgrade.");
1210 CI->replaceAllUsesWith(Rep);
1211 CI->eraseFromParent();
1215 std::string Name = CI->getName();
1217 CI->setName(Name + ".old");
1219 switch (NewFn->getIntrinsicID()) {
1221 llvm_unreachable("Unknown function for CallInst upgrade.");
1223 case Intrinsic::x86_avx512_mask_psll_di_512:
1224 case Intrinsic::x86_avx512_mask_psra_di_512:
1225 case Intrinsic::x86_avx512_mask_psrl_di_512:
1226 case Intrinsic::x86_avx512_mask_psll_qi_512:
1227 case Intrinsic::x86_avx512_mask_psra_qi_512:
1228 case Intrinsic::x86_avx512_mask_psrl_qi_512:
1229 case Intrinsic::arm_neon_vld1:
1230 case Intrinsic::arm_neon_vld2:
1231 case Intrinsic::arm_neon_vld3:
1232 case Intrinsic::arm_neon_vld4:
1233 case Intrinsic::arm_neon_vld2lane:
1234 case Intrinsic::arm_neon_vld3lane:
1235 case Intrinsic::arm_neon_vld4lane:
1236 case Intrinsic::arm_neon_vst1:
1237 case Intrinsic::arm_neon_vst2:
1238 case Intrinsic::arm_neon_vst3:
1239 case Intrinsic::arm_neon_vst4:
1240 case Intrinsic::arm_neon_vst2lane:
1241 case Intrinsic::arm_neon_vst3lane:
1242 case Intrinsic::arm_neon_vst4lane: {
1243 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
1244 CI->arg_operands().end());
1245 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args));
1246 CI->eraseFromParent();
1250 case Intrinsic::ctlz:
1251 case Intrinsic::cttz:
1252 assert(CI->getNumArgOperands() == 1 &&
1253 "Mismatch between function args and call args");
1254 CI->replaceAllUsesWith(Builder.CreateCall(
1255 NewFn, {CI->getArgOperand(0), Builder.getFalse()}, Name));
1256 CI->eraseFromParent();
1259 case Intrinsic::objectsize:
1260 CI->replaceAllUsesWith(Builder.CreateCall(
1261 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)}, Name));
1262 CI->eraseFromParent();
1265 case Intrinsic::ctpop: {
1266 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {CI->getArgOperand(0)}));
1267 CI->eraseFromParent();
1271 case Intrinsic::x86_xop_vfrcz_ss:
1272 case Intrinsic::x86_xop_vfrcz_sd:
1273 CI->replaceAllUsesWith(
1274 Builder.CreateCall(NewFn, {CI->getArgOperand(1)}, Name));
1275 CI->eraseFromParent();
1278 case Intrinsic::x86_xop_vpermil2pd:
1279 case Intrinsic::x86_xop_vpermil2ps:
1280 case Intrinsic::x86_xop_vpermil2pd_256:
1281 case Intrinsic::x86_xop_vpermil2ps_256: {
1282 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
1283 CI->arg_operands().end());
1284 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
1285 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
1286 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
1287 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args, Name));
1288 CI->eraseFromParent();
1292 case Intrinsic::x86_sse41_ptestc:
1293 case Intrinsic::x86_sse41_ptestz:
1294 case Intrinsic::x86_sse41_ptestnzc: {
1295 // The arguments for these intrinsics used to be v4f32, and changed
1296 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
1297 // So, the only thing required is a bitcast for both arguments.
1298 // First, check the arguments have the old type.
1299 Value *Arg0 = CI->getArgOperand(0);
1300 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
1303 // Old intrinsic, add bitcasts
1304 Value *Arg1 = CI->getArgOperand(1);
1306 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
1308 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
1309 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
1311 CallInst *NewCall = Builder.CreateCall(NewFn, {BC0, BC1}, Name);
1312 CI->replaceAllUsesWith(NewCall);
1313 CI->eraseFromParent();
1317 case Intrinsic::x86_sse41_insertps:
1318 case Intrinsic::x86_sse41_dppd:
1319 case Intrinsic::x86_sse41_dpps:
1320 case Intrinsic::x86_sse41_mpsadbw:
1321 case Intrinsic::x86_avx_dp_ps_256:
1322 case Intrinsic::x86_avx2_mpsadbw: {
1323 // Need to truncate the last argument from i32 to i8 -- this argument models
1324 // an inherently 8-bit immediate operand to these x86 instructions.
1325 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
1326 CI->arg_operands().end());
1328 // Replace the last argument with a trunc.
1329 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
1331 CallInst *NewCall = Builder.CreateCall(NewFn, Args);
1332 CI->replaceAllUsesWith(NewCall);
1333 CI->eraseFromParent();
1337 case Intrinsic::thread_pointer: {
1338 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {}));
1339 CI->eraseFromParent();
1343 case Intrinsic::masked_load:
1344 case Intrinsic::masked_store: {
1345 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
1346 CI->arg_operands().end());
1347 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args));
1348 CI->eraseFromParent();
1354 void llvm::UpgradeCallsToIntrinsic(Function *F) {
1355 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
1357 // Check if this function should be upgraded and get the replacement function
1360 if (UpgradeIntrinsicFunction(F, NewFn)) {
1361 // Replace all users of the old function with the new function or new
1362 // instructions. This is not a range loop because the call is deleted.
1363 for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; )
1364 if (CallInst *CI = dyn_cast<CallInst>(*UI++))
1365 UpgradeIntrinsicCall(CI, NewFn);
1367 // Remove old function, no longer used, from the module.
1368 F->eraseFromParent();
1372 void llvm::UpgradeInstWithTBAATag(Instruction *I) {
1373 MDNode *MD = I->getMetadata(LLVMContext::MD_tbaa);
1374 assert(MD && "UpgradeInstWithTBAATag should have a TBAA tag");
1375 // Check if the tag uses struct-path aware TBAA format.
1376 if (isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3)
1379 if (MD->getNumOperands() == 3) {
1380 Metadata *Elts[] = {MD->getOperand(0), MD->getOperand(1)};
1381 MDNode *ScalarType = MDNode::get(I->getContext(), Elts);
1382 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
1383 Metadata *Elts2[] = {ScalarType, ScalarType,
1384 ConstantAsMetadata::get(Constant::getNullValue(
1385 Type::getInt64Ty(I->getContext()))),
1387 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts2));
1389 // Create a MDNode <MD, MD, offset 0>
1390 Metadata *Elts[] = {MD, MD, ConstantAsMetadata::get(Constant::getNullValue(
1391 Type::getInt64Ty(I->getContext())))};
1392 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts));
1396 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
1397 Instruction *&Temp) {
1398 if (Opc != Instruction::BitCast)
1402 Type *SrcTy = V->getType();
1403 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
1404 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
1405 LLVMContext &Context = V->getContext();
1407 // We have no information about target data layout, so we assume that
1408 // the maximum pointer size is 64bit.
1409 Type *MidTy = Type::getInt64Ty(Context);
1410 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
1412 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
1418 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
1419 if (Opc != Instruction::BitCast)
1422 Type *SrcTy = C->getType();
1423 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
1424 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
1425 LLVMContext &Context = C->getContext();
1427 // We have no information about target data layout, so we assume that
1428 // the maximum pointer size is 64bit.
1429 Type *MidTy = Type::getInt64Ty(Context);
1431 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
1438 /// Check the debug info version number, if it is out-dated, drop the debug
1439 /// info. Return true if module is modified.
1440 bool llvm::UpgradeDebugInfo(Module &M) {
1441 unsigned Version = getDebugMetadataVersionFromModule(M);
1442 if (Version == DEBUG_METADATA_VERSION)
1445 bool RetCode = StripDebugInfo(M);
1447 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
1448 M.getContext().diagnose(DiagVersion);
1453 bool llvm::UpgradeModuleFlags(Module &M) {
1454 const NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
1458 bool HasObjCFlag = false, HasClassProperties = false;
1459 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
1460 MDNode *Op = ModFlags->getOperand(I);
1461 if (Op->getNumOperands() < 2)
1463 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
1466 if (ID->getString() == "Objective-C Image Info Version")
1468 if (ID->getString() == "Objective-C Class Properties")
1469 HasClassProperties = true;
1471 // "Objective-C Class Properties" is recently added for Objective-C. We
1472 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
1473 // flag of value 0, so we can correclty report error when trying to link
1474 // an ObjC bitcode without this module flag with an ObjC bitcode with this
1476 if (HasObjCFlag && !HasClassProperties) {
1477 M.addModuleFlag(llvm::Module::Error, "Objective-C Class Properties",
1484 static bool isOldLoopArgument(Metadata *MD) {
1485 auto *T = dyn_cast_or_null<MDTuple>(MD);
1488 if (T->getNumOperands() < 1)
1490 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
1493 return S->getString().startswith("llvm.vectorizer.");
1496 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
1497 StringRef OldPrefix = "llvm.vectorizer.";
1498 assert(OldTag.startswith(OldPrefix) && "Expected old prefix");
1500 if (OldTag == "llvm.vectorizer.unroll")
1501 return MDString::get(C, "llvm.loop.interleave.count");
1503 return MDString::get(
1504 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
1508 static Metadata *upgradeLoopArgument(Metadata *MD) {
1509 auto *T = dyn_cast_or_null<MDTuple>(MD);
1512 if (T->getNumOperands() < 1)
1514 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
1517 if (!OldTag->getString().startswith("llvm.vectorizer."))
1520 // This has an old tag. Upgrade it.
1521 SmallVector<Metadata *, 8> Ops;
1522 Ops.reserve(T->getNumOperands());
1523 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
1524 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
1525 Ops.push_back(T->getOperand(I));
1527 return MDTuple::get(T->getContext(), Ops);
1530 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
1531 auto *T = dyn_cast<MDTuple>(&N);
1535 if (!llvm::any_of(T->operands(), isOldLoopArgument))
1538 SmallVector<Metadata *, 8> Ops;
1539 Ops.reserve(T->getNumOperands());
1540 for (Metadata *MD : T->operands())
1541 Ops.push_back(upgradeLoopArgument(MD));
1543 return MDTuple::get(T->getContext(), Ops);