1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the auto-upgrade helper functions.
11 // This is where deprecated IR intrinsics and other IR features are updated to
12 // current specifications.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/IR/AutoUpgrade.h"
17 #include "llvm/IR/CFG.h"
18 #include "llvm/IR/CallSite.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/DIBuilder.h"
21 #include "llvm/IR/DebugInfo.h"
22 #include "llvm/IR/DiagnosticInfo.h"
23 #include "llvm/IR/Function.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/Instruction.h"
26 #include "llvm/IR/IntrinsicInst.h"
27 #include "llvm/IR/LLVMContext.h"
28 #include "llvm/IR/Module.h"
29 #include "llvm/Support/ErrorHandling.h"
30 #include "llvm/Support/Regex.h"
34 // Upgrade the declarations of the SSE4.1 functions whose arguments have
35 // changed their type from v4f32 to v2i64.
36 static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID,
38 // Check whether this is an old version of the function, which received
40 Type *Arg0Type = F->getFunctionType()->getParamType(0);
41 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
44 // Yes, it's old, replace it with new version.
45 F->setName(F->getName() + ".old");
46 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
50 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
51 // arguments have changed their type from i32 to i8.
52 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
54 // Check that the last argument is an i32.
55 Type *LastArgType = F->getFunctionType()->getParamType(
56 F->getFunctionType()->getNumParams() - 1);
57 if (!LastArgType->isIntegerTy(32))
60 // Move this function aside and map down.
61 F->setName(F->getName() + ".old");
62 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
66 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
67 assert(F && "Illegal to upgrade a non-existent Function.");
69 // Quickly eliminate it, if it's not a candidate.
70 StringRef Name = F->getName();
71 if (Name.size() <= 8 || !Name.startswith("llvm."))
73 Name = Name.substr(5); // Strip off "llvm."
78 if (Name.startswith("arm.neon.vclz")) {
80 F->arg_begin()->getType(),
81 Type::getInt1Ty(F->getContext())
83 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
84 // the end of the name. Change name from llvm.arm.neon.vclz.* to
86 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
87 NewFn = Function::Create(fType, F->getLinkage(),
88 "llvm.ctlz." + Name.substr(14), F->getParent());
91 if (Name.startswith("arm.neon.vcnt")) {
92 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
93 F->arg_begin()->getType());
96 Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
97 if (vldRegex.match(Name)) {
98 auto fArgs = F->getFunctionType()->params();
99 SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
100 // Can't use Intrinsic::getDeclaration here as the return types might
101 // then only be structurally equal.
102 FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
103 NewFn = Function::Create(fType, F->getLinkage(),
104 "llvm." + Name + ".p0i8", F->getParent());
107 Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
108 if (vstRegex.match(Name)) {
109 static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
110 Intrinsic::arm_neon_vst2,
111 Intrinsic::arm_neon_vst3,
112 Intrinsic::arm_neon_vst4};
114 static const Intrinsic::ID StoreLaneInts[] = {
115 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
116 Intrinsic::arm_neon_vst4lane
119 auto fArgs = F->getFunctionType()->params();
120 Type *Tys[] = {fArgs[0], fArgs[1]};
121 if (Name.find("lane") == StringRef::npos)
122 NewFn = Intrinsic::getDeclaration(F->getParent(),
123 StoreInts[fArgs.size() - 3], Tys);
125 NewFn = Intrinsic::getDeclaration(F->getParent(),
126 StoreLaneInts[fArgs.size() - 5], Tys);
129 if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
130 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
137 if (Name.startswith("ctlz.") && F->arg_size() == 1) {
138 F->setName(Name + ".old");
139 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
140 F->arg_begin()->getType());
143 if (Name.startswith("cttz.") && F->arg_size() == 1) {
144 F->setName(Name + ".old");
145 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
146 F->arg_begin()->getType());
153 if (Name.startswith("masked.load.")) {
154 Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
155 if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) {
156 F->setName(Name + ".old");
157 NewFn = Intrinsic::getDeclaration(F->getParent(),
158 Intrinsic::masked_load,
163 if (Name.startswith("masked.store.")) {
164 auto Args = F->getFunctionType()->params();
165 Type *Tys[] = { Args[0], Args[1] };
166 if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) {
167 F->setName(Name + ".old");
168 NewFn = Intrinsic::getDeclaration(F->getParent(),
169 Intrinsic::masked_store,
178 // We only need to change the name to match the mangling including the
180 if (F->arg_size() == 2 && Name.startswith("objectsize.")) {
181 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
182 if (F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
183 F->setName(Name + ".old");
184 NewFn = Intrinsic::getDeclaration(F->getParent(),
185 Intrinsic::objectsize, Tys);
192 if (Name == "stackprotectorcheck") {
198 bool IsX86 = Name.startswith("x86.");
200 Name = Name.substr(4);
203 (Name.startswith("sse2.pcmpeq.") ||
204 Name.startswith("sse2.pcmpgt.") ||
205 Name.startswith("avx2.pcmpeq.") ||
206 Name.startswith("avx2.pcmpgt.") ||
207 Name.startswith("avx512.mask.pcmpeq.") ||
208 Name.startswith("avx512.mask.pcmpgt.") ||
209 Name == "sse41.pmaxsb" ||
210 Name == "sse2.pmaxs.w" ||
211 Name == "sse41.pmaxsd" ||
212 Name == "sse2.pmaxu.b" ||
213 Name == "sse41.pmaxuw" ||
214 Name == "sse41.pmaxud" ||
215 Name == "sse41.pminsb" ||
216 Name == "sse2.pmins.w" ||
217 Name == "sse41.pminsd" ||
218 Name == "sse2.pminu.b" ||
219 Name == "sse41.pminuw" ||
220 Name == "sse41.pminud" ||
221 Name.startswith("avx2.pmax") ||
222 Name.startswith("avx2.pmin") ||
223 Name.startswith("avx2.vbroadcast") ||
224 Name.startswith("avx2.pbroadcast") ||
225 Name.startswith("avx.vpermil.") ||
226 Name.startswith("sse2.pshuf") ||
227 Name.startswith("avx512.pbroadcast") ||
228 Name.startswith("avx512.mask.broadcast.s") ||
229 Name.startswith("avx512.mask.movddup") ||
230 Name.startswith("avx512.mask.movshdup") ||
231 Name.startswith("avx512.mask.movsldup") ||
232 Name.startswith("avx512.mask.pshuf.d.") ||
233 Name.startswith("avx512.mask.pshufl.w.") ||
234 Name.startswith("avx512.mask.pshufh.w.") ||
235 Name.startswith("avx512.mask.vpermil.p") ||
236 Name.startswith("avx512.mask.perm.df.") ||
237 Name.startswith("avx512.mask.perm.di.") ||
238 Name.startswith("avx512.mask.punpckl") ||
239 Name.startswith("avx512.mask.punpckh") ||
240 Name.startswith("avx512.mask.unpckl.") ||
241 Name.startswith("avx512.mask.unpckh.") ||
242 Name.startswith("avx512.mask.pand.") ||
243 Name.startswith("avx512.mask.pandn.") ||
244 Name.startswith("avx512.mask.por.") ||
245 Name.startswith("avx512.mask.pxor.") ||
246 Name.startswith("sse41.pmovsx") ||
247 Name.startswith("sse41.pmovzx") ||
248 Name.startswith("avx2.pmovsx") ||
249 Name.startswith("avx2.pmovzx") ||
250 Name == "sse2.cvtdq2pd" ||
251 Name == "sse2.cvtps2pd" ||
252 Name == "avx.cvtdq2.pd.256" ||
253 Name == "avx.cvt.ps2.pd.256" ||
254 Name.startswith("avx.vinsertf128.") ||
255 Name == "avx2.vinserti128" ||
256 Name.startswith("avx.vextractf128.") ||
257 Name == "avx2.vextracti128" ||
258 Name.startswith("sse4a.movnt.") ||
259 Name.startswith("avx.movnt.") ||
260 Name.startswith("avx512.storent.") ||
261 Name == "sse2.storel.dq" ||
262 Name.startswith("sse.storeu.") ||
263 Name.startswith("sse2.storeu.") ||
264 Name.startswith("avx.storeu.") ||
265 Name.startswith("avx512.mask.storeu.p") ||
266 Name.startswith("avx512.mask.storeu.b.") ||
267 Name.startswith("avx512.mask.storeu.w.") ||
268 Name.startswith("avx512.mask.storeu.d.") ||
269 Name.startswith("avx512.mask.storeu.q.") ||
270 Name.startswith("avx512.mask.store.p") ||
271 Name.startswith("avx512.mask.store.b.") ||
272 Name.startswith("avx512.mask.store.w.") ||
273 Name.startswith("avx512.mask.store.d.") ||
274 Name.startswith("avx512.mask.store.q.") ||
275 Name.startswith("avx512.mask.loadu.p") ||
276 Name.startswith("avx512.mask.loadu.b.") ||
277 Name.startswith("avx512.mask.loadu.w.") ||
278 Name.startswith("avx512.mask.loadu.d.") ||
279 Name.startswith("avx512.mask.loadu.q.") ||
280 Name.startswith("avx512.mask.load.p") ||
281 Name.startswith("avx512.mask.load.b.") ||
282 Name.startswith("avx512.mask.load.w.") ||
283 Name.startswith("avx512.mask.load.d.") ||
284 Name.startswith("avx512.mask.load.q.") ||
285 Name == "sse42.crc32.64.8" ||
286 Name.startswith("avx.vbroadcast.s") ||
287 Name.startswith("avx512.mask.palignr.") ||
288 Name.startswith("sse2.psll.dq") ||
289 Name.startswith("sse2.psrl.dq") ||
290 Name.startswith("avx2.psll.dq") ||
291 Name.startswith("avx2.psrl.dq") ||
292 Name.startswith("avx512.psll.dq") ||
293 Name.startswith("avx512.psrl.dq") ||
294 Name == "sse41.pblendw" ||
295 Name.startswith("sse41.blendp") ||
296 Name.startswith("avx.blend.p") ||
297 Name == "avx2.pblendw" ||
298 Name.startswith("avx2.pblendd.") ||
299 Name == "avx2.vbroadcasti128" ||
300 Name == "xop.vpcmov" ||
301 (Name.startswith("xop.vpcom") && F->arg_size() == 2))) {
305 // SSE4.1 ptest functions may have an old signature.
306 if (IsX86 && Name.startswith("sse41.ptest")) {
307 if (Name.substr(11) == "c")
308 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn);
309 if (Name.substr(11) == "z")
310 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestz, NewFn);
311 if (Name.substr(11) == "nzc")
312 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
314 // Several blend and other instructions with masks used the wrong number of
316 if (IsX86 && Name == "sse41.insertps")
317 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
319 if (IsX86 && Name == "sse41.dppd")
320 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
322 if (IsX86 && Name == "sse41.dpps")
323 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
325 if (IsX86 && Name == "sse41.mpsadbw")
326 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
328 if (IsX86 && Name == "avx.dp.ps.256")
329 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
331 if (IsX86 && Name == "avx2.mpsadbw")
332 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
335 // frcz.ss/sd may need to have an argument dropped
336 if (IsX86 && Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
337 F->setName(Name + ".old");
338 NewFn = Intrinsic::getDeclaration(F->getParent(),
339 Intrinsic::x86_xop_vfrcz_ss);
342 if (IsX86 && Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
343 F->setName(Name + ".old");
344 NewFn = Intrinsic::getDeclaration(F->getParent(),
345 Intrinsic::x86_xop_vfrcz_sd);
348 if (IsX86 && (Name.startswith("avx512.mask.pslli.") ||
349 Name.startswith("avx512.mask.psrai.") ||
350 Name.startswith("avx512.mask.psrli."))) {
351 Intrinsic::ID ShiftID;
352 if (Name.slice(12, 16) == "psll")
353 ShiftID = Name[18] == 'd' ? Intrinsic::x86_avx512_mask_psll_di_512
354 : Intrinsic::x86_avx512_mask_psll_qi_512;
355 else if (Name.slice(12, 16) == "psra")
356 ShiftID = Name[18] == 'd' ? Intrinsic::x86_avx512_mask_psra_di_512
357 : Intrinsic::x86_avx512_mask_psra_qi_512;
359 ShiftID = Name[18] == 'd' ? Intrinsic::x86_avx512_mask_psrl_di_512
360 : Intrinsic::x86_avx512_mask_psrl_qi_512;
361 F->setName("llvm.x86." + Name + ".old");
362 NewFn = Intrinsic::getDeclaration(F->getParent(), ShiftID);
365 // Fix the FMA4 intrinsics to remove the 4
366 if (IsX86 && Name.startswith("fma4.")) {
367 F->setName("llvm.x86.fma" + Name.substr(5));
371 // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
372 if (IsX86 && Name.startswith("xop.vpermil2")) {
373 auto Params = F->getFunctionType()->params();
374 auto Idx = Params[2];
375 if (Idx->getScalarType()->isFloatingPointTy()) {
376 F->setName("llvm.x86." + Name + ".old");
377 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
378 unsigned EltSize = Idx->getScalarSizeInBits();
379 Intrinsic::ID Permil2ID;
380 if (EltSize == 64 && IdxSize == 128)
381 Permil2ID = Intrinsic::x86_xop_vpermil2pd;
382 else if (EltSize == 32 && IdxSize == 128)
383 Permil2ID = Intrinsic::x86_xop_vpermil2ps;
384 else if (EltSize == 64 && IdxSize == 256)
385 Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
387 Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
388 NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
396 // This may not belong here. This function is effectively being overloaded
397 // to both detect an intrinsic which needs upgrading, and to provide the
398 // upgraded form of the intrinsic. We should perhaps have two separate
399 // functions for this.
403 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
405 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
406 assert(F != NewFn && "Intrinsic function upgraded to the same function");
408 // Upgrade intrinsic attributes. This does not change the function.
411 if (Intrinsic::ID id = F->getIntrinsicID())
412 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
416 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
417 // Nothing to do yet.
421 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
423 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
424 Value *Op, unsigned Shift) {
425 Type *ResultTy = Op->getType();
426 unsigned NumElts = ResultTy->getVectorNumElements() * 8;
428 // Bitcast from a 64-bit element type to a byte element type.
429 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
430 Op = Builder.CreateBitCast(Op, VecTy, "cast");
432 // We'll be shuffling in zeroes.
433 Value *Res = Constant::getNullValue(VecTy);
435 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
436 // we'll just return the zero vector.
439 // 256/512-bit version is split into 2/4 16-byte lanes.
440 for (unsigned l = 0; l != NumElts; l += 16)
441 for (unsigned i = 0; i != 16; ++i) {
442 unsigned Idx = NumElts + i - Shift;
444 Idx -= NumElts - 16; // end of lane, switch operand.
445 Idxs[l + i] = Idx + l;
448 Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
451 // Bitcast back to a 64-bit element type.
452 return Builder.CreateBitCast(Res, ResultTy, "cast");
455 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
457 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
459 Type *ResultTy = Op->getType();
460 unsigned NumElts = ResultTy->getVectorNumElements() * 8;
462 // Bitcast from a 64-bit element type to a byte element type.
463 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
464 Op = Builder.CreateBitCast(Op, VecTy, "cast");
466 // We'll be shuffling in zeroes.
467 Value *Res = Constant::getNullValue(VecTy);
469 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
470 // we'll just return the zero vector.
473 // 256/512-bit version is split into 2/4 16-byte lanes.
474 for (unsigned l = 0; l != NumElts; l += 16)
475 for (unsigned i = 0; i != 16; ++i) {
476 unsigned Idx = i + Shift;
478 Idx += NumElts - 16; // end of lane, switch operand.
479 Idxs[l + i] = Idx + l;
482 Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
485 // Bitcast back to a 64-bit element type.
486 return Builder.CreateBitCast(Res, ResultTy, "cast");
489 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
491 llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(),
492 cast<IntegerType>(Mask->getType())->getBitWidth());
493 Mask = Builder.CreateBitCast(Mask, MaskTy);
495 // If we have less than 8 elements, then the starting mask was an i8 and
496 // we need to extract down to the right number of elements.
499 for (unsigned i = 0; i != NumElts; ++i)
501 Mask = Builder.CreateShuffleVector(Mask, Mask,
502 makeArrayRef(Indices, NumElts),
509 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
510 Value *Op0, Value *Op1) {
511 // If the mask is all ones just emit the align operation.
512 if (const auto *C = dyn_cast<Constant>(Mask))
513 if (C->isAllOnesValue())
516 Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements());
517 return Builder.CreateSelect(Mask, Op0, Op1);
520 static Value *UpgradeX86PALIGNRIntrinsics(IRBuilder<> &Builder,
521 Value *Op0, Value *Op1, Value *Shift,
522 Value *Passthru, Value *Mask) {
523 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
525 unsigned NumElts = Op0->getType()->getVectorNumElements();
526 assert(NumElts % 16 == 0);
528 // If palignr is shifting the pair of vectors more than the size of two
531 return llvm::Constant::getNullValue(Op0->getType());
533 // If palignr is shifting the pair of input vectors more than one lane,
534 // but less than two lanes, convert to shifting in zeroes.
538 Op0 = llvm::Constant::getNullValue(Op0->getType());
541 uint32_t Indices[64];
542 // 256-bit palignr operates on 128-bit lanes so we need to handle that
543 for (unsigned l = 0; l != NumElts; l += 16) {
544 for (unsigned i = 0; i != 16; ++i) {
545 unsigned Idx = ShiftVal + i;
547 Idx += NumElts - 16; // End of lane, switch operand.
548 Indices[l + i] = Idx + l;
552 Value *Align = Builder.CreateShuffleVector(Op1, Op0,
553 makeArrayRef(Indices, NumElts),
556 return EmitX86Select(Builder, Mask, Align, Passthru);
559 static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
560 Value *Ptr, Value *Data, Value *Mask,
562 // Cast the pointer to the right type.
563 Ptr = Builder.CreateBitCast(Ptr,
564 llvm::PointerType::getUnqual(Data->getType()));
566 Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1;
568 // If the mask is all ones just emit a regular store.
569 if (const auto *C = dyn_cast<Constant>(Mask))
570 if (C->isAllOnesValue())
571 return Builder.CreateAlignedStore(Data, Ptr, Align);
573 // Convert the mask from an integer type to a vector of i1.
574 unsigned NumElts = Data->getType()->getVectorNumElements();
575 Mask = getX86MaskVec(Builder, Mask, NumElts);
576 return Builder.CreateMaskedStore(Data, Ptr, Align, Mask);
579 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
580 Value *Ptr, Value *Passthru, Value *Mask,
582 // Cast the pointer to the right type.
583 Ptr = Builder.CreateBitCast(Ptr,
584 llvm::PointerType::getUnqual(Passthru->getType()));
586 Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1;
588 // If the mask is all ones just emit a regular store.
589 if (const auto *C = dyn_cast<Constant>(Mask))
590 if (C->isAllOnesValue())
591 return Builder.CreateAlignedLoad(Ptr, Align);
593 // Convert the mask from an integer type to a vector of i1.
594 unsigned NumElts = Passthru->getType()->getVectorNumElements();
595 Mask = getX86MaskVec(Builder, Mask, NumElts);
596 return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru);
599 static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
600 ICmpInst::Predicate Pred) {
601 Value *Op0 = CI.getArgOperand(0);
602 Value *Op1 = CI.getArgOperand(1);
603 Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1);
604 return Builder.CreateSelect(Cmp, Op0, Op1);
607 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
608 ICmpInst::Predicate Pred) {
609 Value *Op0 = CI.getArgOperand(0);
610 unsigned NumElts = Op0->getType()->getVectorNumElements();
611 Value *Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
613 Value *Mask = CI.getArgOperand(2);
614 const auto *C = dyn_cast<Constant>(Mask);
615 if (!C || !C->isAllOnesValue())
616 Cmp = Builder.CreateAnd(Cmp, getX86MaskVec(Builder, Mask, NumElts));
620 for (unsigned i = 0; i != NumElts; ++i)
622 for (unsigned i = NumElts; i != 8; ++i)
623 Indices[i] = NumElts + i % NumElts;
624 Cmp = Builder.CreateShuffleVector(Cmp,
625 Constant::getNullValue(Cmp->getType()),
628 return Builder.CreateBitCast(Cmp, IntegerType::get(CI.getContext(),
629 std::max(NumElts, 8U)));
632 /// Upgrade a call to an old intrinsic. All argument and return casting must be
633 /// provided to seamlessly integrate with existing context.
634 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
635 Function *F = CI->getCalledFunction();
636 LLVMContext &C = CI->getContext();
637 IRBuilder<> Builder(C);
638 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
640 assert(F && "Intrinsic call is not direct?");
643 // Get the Function's name.
644 StringRef Name = F->getName();
646 assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
647 Name = Name.substr(5);
649 bool IsX86 = Name.startswith("x86.");
651 Name = Name.substr(4);
654 // Upgrade packed integer vector compare intrinsics to compare instructions.
655 if (IsX86 && (Name.startswith("sse2.pcmpeq.") ||
656 Name.startswith("avx2.pcmpeq."))) {
657 Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1),
659 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
660 } else if (IsX86 && (Name.startswith("sse2.pcmpgt.") ||
661 Name.startswith("avx2.pcmpgt."))) {
662 Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1),
664 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
665 } else if (IsX86 && Name.startswith("avx512.mask.pcmpeq.")) {
666 Rep = upgradeMaskedCompare(Builder, *CI, ICmpInst::ICMP_EQ);
667 } else if (IsX86 && Name.startswith("avx512.mask.pcmpgt.")) {
668 Rep = upgradeMaskedCompare(Builder, *CI, ICmpInst::ICMP_SGT);
669 } else if (IsX86 && (Name == "sse41.pmaxsb" ||
670 Name == "sse2.pmaxs.w" ||
671 Name == "sse41.pmaxsd" ||
672 Name.startswith("avx2.pmaxs"))) {
673 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
674 } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
675 Name == "sse41.pmaxuw" ||
676 Name == "sse41.pmaxud" ||
677 Name.startswith("avx2.pmaxu"))) {
678 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
679 } else if (IsX86 && (Name == "sse41.pminsb" ||
680 Name == "sse2.pmins.w" ||
681 Name == "sse41.pminsd" ||
682 Name.startswith("avx2.pmins"))) {
683 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
684 } else if (IsX86 && (Name == "sse2.pminu.b" ||
685 Name == "sse41.pminuw" ||
686 Name == "sse41.pminud" ||
687 Name.startswith("avx2.pminu"))) {
688 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
689 } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
690 Name == "sse2.cvtps2pd" ||
691 Name == "avx.cvtdq2.pd.256" ||
692 Name == "avx.cvt.ps2.pd.256")) {
693 // Lossless i32/float to double conversion.
694 // Extract the bottom elements if necessary and convert to double vector.
695 Value *Src = CI->getArgOperand(0);
696 VectorType *SrcTy = cast<VectorType>(Src->getType());
697 VectorType *DstTy = cast<VectorType>(CI->getType());
698 Rep = CI->getArgOperand(0);
700 unsigned NumDstElts = DstTy->getNumElements();
701 if (NumDstElts < SrcTy->getNumElements()) {
702 assert(NumDstElts == 2 && "Unexpected vector size");
703 uint32_t ShuffleMask[2] = { 0, 1 };
704 Rep = Builder.CreateShuffleVector(Rep, UndefValue::get(SrcTy),
708 bool Int2Double = (StringRef::npos != Name.find("cvtdq2"));
710 Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd");
712 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
713 } else if (IsX86 && Name.startswith("sse4a.movnt.")) {
714 Module *M = F->getParent();
715 SmallVector<Metadata *, 1> Elts;
717 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
718 MDNode *Node = MDNode::get(C, Elts);
720 Value *Arg0 = CI->getArgOperand(0);
721 Value *Arg1 = CI->getArgOperand(1);
723 // Nontemporal (unaligned) store of the 0'th element of the float/double
725 Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
726 PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
727 Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
729 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
731 StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, 1);
732 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
735 CI->eraseFromParent();
737 } else if (IsX86 && (Name.startswith("avx.movnt.") ||
738 Name.startswith("avx512.storent."))) {
739 Module *M = F->getParent();
740 SmallVector<Metadata *, 1> Elts;
742 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
743 MDNode *Node = MDNode::get(C, Elts);
745 Value *Arg0 = CI->getArgOperand(0);
746 Value *Arg1 = CI->getArgOperand(1);
748 // Convert the type of the pointer to a pointer to the stored type.
749 Value *BC = Builder.CreateBitCast(Arg0,
750 PointerType::getUnqual(Arg1->getType()),
752 VectorType *VTy = cast<VectorType>(Arg1->getType());
753 StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC,
754 VTy->getBitWidth() / 8);
755 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
758 CI->eraseFromParent();
760 } else if (IsX86 && Name == "sse2.storel.dq") {
761 Value *Arg0 = CI->getArgOperand(0);
762 Value *Arg1 = CI->getArgOperand(1);
764 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
765 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
766 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
767 Value *BC = Builder.CreateBitCast(Arg0,
768 PointerType::getUnqual(Elt->getType()),
770 Builder.CreateAlignedStore(Elt, BC, 1);
773 CI->eraseFromParent();
775 } else if (IsX86 && (Name.startswith("sse.storeu.") ||
776 Name.startswith("sse2.storeu.") ||
777 Name.startswith("avx.storeu."))) {
778 Value *Arg0 = CI->getArgOperand(0);
779 Value *Arg1 = CI->getArgOperand(1);
781 Arg0 = Builder.CreateBitCast(Arg0,
782 PointerType::getUnqual(Arg1->getType()),
784 Builder.CreateAlignedStore(Arg1, Arg0, 1);
787 CI->eraseFromParent();
789 } else if (IsX86 && (Name.startswith("avx512.mask.storeu.p") ||
790 Name.startswith("avx512.mask.storeu.b.") ||
791 Name.startswith("avx512.mask.storeu.w.") ||
792 Name.startswith("avx512.mask.storeu.d.") ||
793 Name.startswith("avx512.mask.storeu.q."))) {
794 UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
795 CI->getArgOperand(2), /*Aligned*/false);
798 CI->eraseFromParent();
800 } else if (IsX86 && (Name.startswith("avx512.mask.store.p") ||
801 Name.startswith("avx512.mask.store.b.") ||
802 Name.startswith("avx512.mask.store.w.") ||
803 Name.startswith("avx512.mask.store.d.") ||
804 Name.startswith("avx512.mask.store.q."))) {
805 UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
806 CI->getArgOperand(2), /*Aligned*/true);
809 CI->eraseFromParent();
811 } else if (IsX86 && (Name.startswith("avx512.mask.loadu.p") ||
812 Name.startswith("avx512.mask.loadu.b.") ||
813 Name.startswith("avx512.mask.loadu.w.") ||
814 Name.startswith("avx512.mask.loadu.d.") ||
815 Name.startswith("avx512.mask.loadu.q."))) {
816 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
817 CI->getArgOperand(1), CI->getArgOperand(2),
819 } else if (IsX86 && (Name.startswith("avx512.mask.load.p") ||
820 Name.startswith("avx512.mask.load.b.") ||
821 Name.startswith("avx512.mask.load.w.") ||
822 Name.startswith("avx512.mask.load.d.") ||
823 Name.startswith("avx512.mask.load.q."))) {
824 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
825 CI->getArgOperand(1),CI->getArgOperand(2),
827 } else if (IsX86 && Name.startswith("xop.vpcom")) {
829 if (Name.endswith("ub"))
830 intID = Intrinsic::x86_xop_vpcomub;
831 else if (Name.endswith("uw"))
832 intID = Intrinsic::x86_xop_vpcomuw;
833 else if (Name.endswith("ud"))
834 intID = Intrinsic::x86_xop_vpcomud;
835 else if (Name.endswith("uq"))
836 intID = Intrinsic::x86_xop_vpcomuq;
837 else if (Name.endswith("b"))
838 intID = Intrinsic::x86_xop_vpcomb;
839 else if (Name.endswith("w"))
840 intID = Intrinsic::x86_xop_vpcomw;
841 else if (Name.endswith("d"))
842 intID = Intrinsic::x86_xop_vpcomd;
843 else if (Name.endswith("q"))
844 intID = Intrinsic::x86_xop_vpcomq;
846 llvm_unreachable("Unknown suffix");
848 Name = Name.substr(9); // strip off "xop.vpcom"
850 if (Name.startswith("lt"))
852 else if (Name.startswith("le"))
854 else if (Name.startswith("gt"))
856 else if (Name.startswith("ge"))
858 else if (Name.startswith("eq"))
860 else if (Name.startswith("ne"))
862 else if (Name.startswith("false"))
864 else if (Name.startswith("true"))
867 llvm_unreachable("Unknown condition");
869 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
871 Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
872 Builder.getInt8(Imm)});
873 } else if (IsX86 && Name == "xop.vpcmov") {
874 Value *Arg0 = CI->getArgOperand(0);
875 Value *Arg1 = CI->getArgOperand(1);
876 Value *Sel = CI->getArgOperand(2);
877 unsigned NumElts = CI->getType()->getVectorNumElements();
878 Constant *MinusOne = ConstantVector::getSplat(NumElts, Builder.getInt64(-1));
879 Value *NotSel = Builder.CreateXor(Sel, MinusOne);
880 Value *Sel0 = Builder.CreateAnd(Arg0, Sel);
881 Value *Sel1 = Builder.CreateAnd(Arg1, NotSel);
882 Rep = Builder.CreateOr(Sel0, Sel1);
883 } else if (IsX86 && Name == "sse42.crc32.64.8") {
884 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
885 Intrinsic::x86_sse42_crc32_32_8);
886 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
887 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
888 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
889 } else if (IsX86 && Name.startswith("avx.vbroadcast")) {
890 // Replace broadcasts with a series of insertelements.
891 Type *VecTy = CI->getType();
892 Type *EltTy = VecTy->getVectorElementType();
893 unsigned EltNum = VecTy->getVectorNumElements();
894 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
895 EltTy->getPointerTo());
896 Value *Load = Builder.CreateLoad(EltTy, Cast);
897 Type *I32Ty = Type::getInt32Ty(C);
898 Rep = UndefValue::get(VecTy);
899 for (unsigned I = 0; I < EltNum; ++I)
900 Rep = Builder.CreateInsertElement(Rep, Load,
901 ConstantInt::get(I32Ty, I));
902 } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
903 Name.startswith("sse41.pmovzx") ||
904 Name.startswith("avx2.pmovsx") ||
905 Name.startswith("avx2.pmovzx"))) {
906 VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
907 VectorType *DstTy = cast<VectorType>(CI->getType());
908 unsigned NumDstElts = DstTy->getNumElements();
910 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
911 SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
912 for (unsigned i = 0; i != NumDstElts; ++i)
915 Value *SV = Builder.CreateShuffleVector(
916 CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
918 bool DoSext = (StringRef::npos != Name.find("pmovsx"));
919 Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
920 : Builder.CreateZExt(SV, DstTy);
921 } else if (IsX86 && Name == "avx2.vbroadcasti128") {
922 // Replace vbroadcasts with a vector shuffle.
923 Type *VT = VectorType::get(Type::getInt64Ty(C), 2);
924 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
925 PointerType::getUnqual(VT));
926 Value *Load = Builder.CreateLoad(VT, Op);
927 uint32_t Idxs[4] = { 0, 1, 0, 1 };
928 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
930 } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
931 Name.startswith("avx2.vbroadcast") ||
932 Name.startswith("avx512.pbroadcast") ||
933 Name.startswith("avx512.mask.broadcast.s"))) {
934 // Replace vp?broadcasts with a vector shuffle.
935 Value *Op = CI->getArgOperand(0);
936 unsigned NumElts = CI->getType()->getVectorNumElements();
937 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
938 Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
939 Constant::getNullValue(MaskTy));
941 if (CI->getNumArgOperands() == 3)
942 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
943 CI->getArgOperand(1));
944 } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
945 Rep = UpgradeX86PALIGNRIntrinsics(Builder, CI->getArgOperand(0),
946 CI->getArgOperand(1),
947 CI->getArgOperand(2),
948 CI->getArgOperand(3),
949 CI->getArgOperand(4));
950 } else if (IsX86 && (Name == "sse2.psll.dq" ||
951 Name == "avx2.psll.dq")) {
952 // 128/256-bit shift left specified in bits.
953 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
954 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
955 Shift / 8); // Shift is in bits.
956 } else if (IsX86 && (Name == "sse2.psrl.dq" ||
957 Name == "avx2.psrl.dq")) {
958 // 128/256-bit shift right specified in bits.
959 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
960 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
961 Shift / 8); // Shift is in bits.
962 } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
963 Name == "avx2.psll.dq.bs" ||
964 Name == "avx512.psll.dq.512")) {
965 // 128/256/512-bit shift left specified in bytes.
966 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
967 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
968 } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
969 Name == "avx2.psrl.dq.bs" ||
970 Name == "avx512.psrl.dq.512")) {
971 // 128/256/512-bit shift right specified in bytes.
972 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
973 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
974 } else if (IsX86 && (Name == "sse41.pblendw" ||
975 Name.startswith("sse41.blendp") ||
976 Name.startswith("avx.blend.p") ||
977 Name == "avx2.pblendw" ||
978 Name.startswith("avx2.pblendd."))) {
979 Value *Op0 = CI->getArgOperand(0);
980 Value *Op1 = CI->getArgOperand(1);
981 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
982 VectorType *VecTy = cast<VectorType>(CI->getType());
983 unsigned NumElts = VecTy->getNumElements();
985 SmallVector<uint32_t, 16> Idxs(NumElts);
986 for (unsigned i = 0; i != NumElts; ++i)
987 Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
989 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
990 } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
991 Name == "avx2.vinserti128")) {
992 Value *Op0 = CI->getArgOperand(0);
993 Value *Op1 = CI->getArgOperand(1);
994 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
995 VectorType *VecTy = cast<VectorType>(CI->getType());
996 unsigned NumElts = VecTy->getNumElements();
998 // Mask off the high bits of the immediate value; hardware ignores those.
1001 // Extend the second operand into a vector that is twice as big.
1002 Value *UndefV = UndefValue::get(Op1->getType());
1003 SmallVector<uint32_t, 8> Idxs(NumElts);
1004 for (unsigned i = 0; i != NumElts; ++i)
1006 Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);
1008 // Insert the second operand into the first operand.
1010 // Note that there is no guarantee that instruction lowering will actually
1011 // produce a vinsertf128 instruction for the created shuffles. In
1012 // particular, the 0 immediate case involves no lane changes, so it can
1013 // be handled as a blend.
1015 // Example of shuffle mask for 32-bit elements:
1016 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
1017 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
1019 // The low half of the result is either the low half of the 1st operand
1020 // or the low half of the 2nd operand (the inserted vector).
1021 for (unsigned i = 0; i != NumElts / 2; ++i)
1022 Idxs[i] = Imm ? i : (i + NumElts);
1023 // The high half of the result is either the low half of the 2nd operand
1024 // (the inserted vector) or the high half of the 1st operand.
1025 for (unsigned i = NumElts / 2; i != NumElts; ++i)
1026 Idxs[i] = Imm ? (i + NumElts / 2) : i;
1027 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
1028 } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
1029 Name == "avx2.vextracti128")) {
1030 Value *Op0 = CI->getArgOperand(0);
1031 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1032 VectorType *VecTy = cast<VectorType>(CI->getType());
1033 unsigned NumElts = VecTy->getNumElements();
1035 // Mask off the high bits of the immediate value; hardware ignores those.
1038 // Get indexes for either the high half or low half of the input vector.
1039 SmallVector<uint32_t, 4> Idxs(NumElts);
1040 for (unsigned i = 0; i != NumElts; ++i) {
1041 Idxs[i] = Imm ? (i + NumElts) : i;
1044 Value *UndefV = UndefValue::get(Op0->getType());
1045 Rep = Builder.CreateShuffleVector(Op0, UndefV, Idxs);
1046 } else if (!IsX86 && Name == "stackprotectorcheck") {
1048 } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
1049 Name.startswith("avx512.mask.perm.di."))) {
1050 Value *Op0 = CI->getArgOperand(0);
1051 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1052 VectorType *VecTy = cast<VectorType>(CI->getType());
1053 unsigned NumElts = VecTy->getNumElements();
1055 SmallVector<uint32_t, 8> Idxs(NumElts);
1056 for (unsigned i = 0; i != NumElts; ++i)
1057 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
1059 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1061 if (CI->getNumArgOperands() == 4)
1062 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1063 CI->getArgOperand(2));
1064 } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
1065 Name == "sse2.pshuf.d" ||
1066 Name.startswith("avx512.mask.vpermil.p") ||
1067 Name.startswith("avx512.mask.pshuf.d."))) {
1068 Value *Op0 = CI->getArgOperand(0);
1069 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1070 VectorType *VecTy = cast<VectorType>(CI->getType());
1071 unsigned NumElts = VecTy->getNumElements();
1072 // Calculate the size of each index in the immediate.
1073 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
1074 unsigned IdxMask = ((1 << IdxSize) - 1);
1076 SmallVector<uint32_t, 8> Idxs(NumElts);
1077 // Lookup the bits for this element, wrapping around the immediate every
1078 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
1079 // to offset by the first index of each group.
1080 for (unsigned i = 0; i != NumElts; ++i)
1081 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
1083 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1085 if (CI->getNumArgOperands() == 4)
1086 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1087 CI->getArgOperand(2));
1088 } else if (IsX86 && (Name == "sse2.pshufl.w" ||
1089 Name.startswith("avx512.mask.pshufl.w."))) {
1090 Value *Op0 = CI->getArgOperand(0);
1091 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1092 unsigned NumElts = CI->getType()->getVectorNumElements();
1094 SmallVector<uint32_t, 16> Idxs(NumElts);
1095 for (unsigned l = 0; l != NumElts; l += 8) {
1096 for (unsigned i = 0; i != 4; ++i)
1097 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
1098 for (unsigned i = 4; i != 8; ++i)
1099 Idxs[i + l] = i + l;
1102 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1104 if (CI->getNumArgOperands() == 4)
1105 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1106 CI->getArgOperand(2));
1107 } else if (IsX86 && (Name == "sse2.pshufh.w" ||
1108 Name.startswith("avx512.mask.pshufh.w."))) {
1109 Value *Op0 = CI->getArgOperand(0);
1110 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1111 unsigned NumElts = CI->getType()->getVectorNumElements();
1113 SmallVector<uint32_t, 16> Idxs(NumElts);
1114 for (unsigned l = 0; l != NumElts; l += 8) {
1115 for (unsigned i = 0; i != 4; ++i)
1116 Idxs[i + l] = i + l;
1117 for (unsigned i = 0; i != 4; ++i)
1118 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
1121 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1123 if (CI->getNumArgOperands() == 4)
1124 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1125 CI->getArgOperand(2));
1126 } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
1127 Name.startswith("avx512.mask.movshdup") ||
1128 Name.startswith("avx512.mask.movsldup"))) {
1129 Value *Op0 = CI->getArgOperand(0);
1130 unsigned NumElts = CI->getType()->getVectorNumElements();
1131 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1133 unsigned Offset = 0;
1134 if (Name.startswith("avx512.mask.movshdup."))
1137 SmallVector<uint32_t, 16> Idxs(NumElts);
1138 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
1139 for (unsigned i = 0; i != NumLaneElts; i += 2) {
1140 Idxs[i + l + 0] = i + l + Offset;
1141 Idxs[i + l + 1] = i + l + Offset;
1144 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1146 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1147 CI->getArgOperand(1));
1148 } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
1149 Name.startswith("avx512.mask.unpckl."))) {
1150 Value *Op0 = CI->getArgOperand(0);
1151 Value *Op1 = CI->getArgOperand(1);
1152 int NumElts = CI->getType()->getVectorNumElements();
1153 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1155 SmallVector<uint32_t, 64> Idxs(NumElts);
1156 for (int l = 0; l != NumElts; l += NumLaneElts)
1157 for (int i = 0; i != NumLaneElts; ++i)
1158 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
1160 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1162 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1163 CI->getArgOperand(2));
1164 } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
1165 Name.startswith("avx512.mask.unpckh."))) {
1166 Value *Op0 = CI->getArgOperand(0);
1167 Value *Op1 = CI->getArgOperand(1);
1168 int NumElts = CI->getType()->getVectorNumElements();
1169 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1171 SmallVector<uint32_t, 64> Idxs(NumElts);
1172 for (int l = 0; l != NumElts; l += NumLaneElts)
1173 for (int i = 0; i != NumLaneElts; ++i)
1174 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
1176 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1178 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1179 CI->getArgOperand(2));
1180 } else if (IsX86 && Name.startswith("avx512.mask.pand.")) {
1181 Rep = Builder.CreateAnd(CI->getArgOperand(0), CI->getArgOperand(1));
1182 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1183 CI->getArgOperand(2));
1184 } else if (IsX86 && Name.startswith("avx512.mask.pandn.")) {
1185 Rep = Builder.CreateAnd(Builder.CreateNot(CI->getArgOperand(0)),
1186 CI->getArgOperand(1));
1187 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1188 CI->getArgOperand(2));
1189 } else if (IsX86 && Name.startswith("avx512.mask.por.")) {
1190 Rep = Builder.CreateOr(CI->getArgOperand(0), CI->getArgOperand(1));
1191 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1192 CI->getArgOperand(2));
1193 } else if (IsX86 && Name.startswith("avx512.mask.pxor.")) {
1194 Rep = Builder.CreateXor(CI->getArgOperand(0), CI->getArgOperand(1));
1195 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1196 CI->getArgOperand(2));
1198 llvm_unreachable("Unknown function for CallInst upgrade.");
1202 CI->replaceAllUsesWith(Rep);
1203 CI->eraseFromParent();
1207 std::string Name = CI->getName();
1209 CI->setName(Name + ".old");
1211 switch (NewFn->getIntrinsicID()) {
1213 llvm_unreachable("Unknown function for CallInst upgrade.");
1215 case Intrinsic::x86_avx512_mask_psll_di_512:
1216 case Intrinsic::x86_avx512_mask_psra_di_512:
1217 case Intrinsic::x86_avx512_mask_psrl_di_512:
1218 case Intrinsic::x86_avx512_mask_psll_qi_512:
1219 case Intrinsic::x86_avx512_mask_psra_qi_512:
1220 case Intrinsic::x86_avx512_mask_psrl_qi_512:
1221 case Intrinsic::arm_neon_vld1:
1222 case Intrinsic::arm_neon_vld2:
1223 case Intrinsic::arm_neon_vld3:
1224 case Intrinsic::arm_neon_vld4:
1225 case Intrinsic::arm_neon_vld2lane:
1226 case Intrinsic::arm_neon_vld3lane:
1227 case Intrinsic::arm_neon_vld4lane:
1228 case Intrinsic::arm_neon_vst1:
1229 case Intrinsic::arm_neon_vst2:
1230 case Intrinsic::arm_neon_vst3:
1231 case Intrinsic::arm_neon_vst4:
1232 case Intrinsic::arm_neon_vst2lane:
1233 case Intrinsic::arm_neon_vst3lane:
1234 case Intrinsic::arm_neon_vst4lane: {
1235 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
1236 CI->arg_operands().end());
1237 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args));
1238 CI->eraseFromParent();
1242 case Intrinsic::ctlz:
1243 case Intrinsic::cttz:
1244 assert(CI->getNumArgOperands() == 1 &&
1245 "Mismatch between function args and call args");
1246 CI->replaceAllUsesWith(Builder.CreateCall(
1247 NewFn, {CI->getArgOperand(0), Builder.getFalse()}, Name));
1248 CI->eraseFromParent();
1251 case Intrinsic::objectsize:
1252 CI->replaceAllUsesWith(Builder.CreateCall(
1253 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)}, Name));
1254 CI->eraseFromParent();
1257 case Intrinsic::ctpop: {
1258 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {CI->getArgOperand(0)}));
1259 CI->eraseFromParent();
1263 case Intrinsic::x86_xop_vfrcz_ss:
1264 case Intrinsic::x86_xop_vfrcz_sd:
1265 CI->replaceAllUsesWith(
1266 Builder.CreateCall(NewFn, {CI->getArgOperand(1)}, Name));
1267 CI->eraseFromParent();
1270 case Intrinsic::x86_xop_vpermil2pd:
1271 case Intrinsic::x86_xop_vpermil2ps:
1272 case Intrinsic::x86_xop_vpermil2pd_256:
1273 case Intrinsic::x86_xop_vpermil2ps_256: {
1274 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
1275 CI->arg_operands().end());
1276 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
1277 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
1278 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
1279 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args, Name));
1280 CI->eraseFromParent();
1284 case Intrinsic::x86_sse41_ptestc:
1285 case Intrinsic::x86_sse41_ptestz:
1286 case Intrinsic::x86_sse41_ptestnzc: {
1287 // The arguments for these intrinsics used to be v4f32, and changed
1288 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
1289 // So, the only thing required is a bitcast for both arguments.
1290 // First, check the arguments have the old type.
1291 Value *Arg0 = CI->getArgOperand(0);
1292 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
1295 // Old intrinsic, add bitcasts
1296 Value *Arg1 = CI->getArgOperand(1);
1298 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
1300 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
1301 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
1303 CallInst *NewCall = Builder.CreateCall(NewFn, {BC0, BC1}, Name);
1304 CI->replaceAllUsesWith(NewCall);
1305 CI->eraseFromParent();
1309 case Intrinsic::x86_sse41_insertps:
1310 case Intrinsic::x86_sse41_dppd:
1311 case Intrinsic::x86_sse41_dpps:
1312 case Intrinsic::x86_sse41_mpsadbw:
1313 case Intrinsic::x86_avx_dp_ps_256:
1314 case Intrinsic::x86_avx2_mpsadbw: {
1315 // Need to truncate the last argument from i32 to i8 -- this argument models
1316 // an inherently 8-bit immediate operand to these x86 instructions.
1317 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
1318 CI->arg_operands().end());
1320 // Replace the last argument with a trunc.
1321 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
1323 CallInst *NewCall = Builder.CreateCall(NewFn, Args);
1324 CI->replaceAllUsesWith(NewCall);
1325 CI->eraseFromParent();
1329 case Intrinsic::thread_pointer: {
1330 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {}));
1331 CI->eraseFromParent();
1335 case Intrinsic::masked_load:
1336 case Intrinsic::masked_store: {
1337 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
1338 CI->arg_operands().end());
1339 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args));
1340 CI->eraseFromParent();
1346 void llvm::UpgradeCallsToIntrinsic(Function *F) {
1347 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
1349 // Check if this function should be upgraded and get the replacement function
1352 if (UpgradeIntrinsicFunction(F, NewFn)) {
1353 // Replace all users of the old function with the new function or new
1354 // instructions. This is not a range loop because the call is deleted.
1355 for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; )
1356 if (CallInst *CI = dyn_cast<CallInst>(*UI++))
1357 UpgradeIntrinsicCall(CI, NewFn);
1359 // Remove old function, no longer used, from the module.
1360 F->eraseFromParent();
1364 void llvm::UpgradeInstWithTBAATag(Instruction *I) {
1365 MDNode *MD = I->getMetadata(LLVMContext::MD_tbaa);
1366 assert(MD && "UpgradeInstWithTBAATag should have a TBAA tag");
1367 // Check if the tag uses struct-path aware TBAA format.
1368 if (isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3)
1371 if (MD->getNumOperands() == 3) {
1372 Metadata *Elts[] = {MD->getOperand(0), MD->getOperand(1)};
1373 MDNode *ScalarType = MDNode::get(I->getContext(), Elts);
1374 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
1375 Metadata *Elts2[] = {ScalarType, ScalarType,
1376 ConstantAsMetadata::get(Constant::getNullValue(
1377 Type::getInt64Ty(I->getContext()))),
1379 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts2));
1381 // Create a MDNode <MD, MD, offset 0>
1382 Metadata *Elts[] = {MD, MD, ConstantAsMetadata::get(Constant::getNullValue(
1383 Type::getInt64Ty(I->getContext())))};
1384 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts));
1388 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
1389 Instruction *&Temp) {
1390 if (Opc != Instruction::BitCast)
1394 Type *SrcTy = V->getType();
1395 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
1396 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
1397 LLVMContext &Context = V->getContext();
1399 // We have no information about target data layout, so we assume that
1400 // the maximum pointer size is 64bit.
1401 Type *MidTy = Type::getInt64Ty(Context);
1402 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
1404 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
1410 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
1411 if (Opc != Instruction::BitCast)
1414 Type *SrcTy = C->getType();
1415 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
1416 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
1417 LLVMContext &Context = C->getContext();
1419 // We have no information about target data layout, so we assume that
1420 // the maximum pointer size is 64bit.
1421 Type *MidTy = Type::getInt64Ty(Context);
1423 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
1430 /// Check the debug info version number, if it is out-dated, drop the debug
1431 /// info. Return true if module is modified.
1432 bool llvm::UpgradeDebugInfo(Module &M) {
1433 unsigned Version = getDebugMetadataVersionFromModule(M);
1434 if (Version == DEBUG_METADATA_VERSION)
1437 bool RetCode = StripDebugInfo(M);
1439 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
1440 M.getContext().diagnose(DiagVersion);
1445 bool llvm::UpgradeModuleFlags(Module &M) {
1446 const NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
1450 bool HasObjCFlag = false, HasClassProperties = false;
1451 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
1452 MDNode *Op = ModFlags->getOperand(I);
1453 if (Op->getNumOperands() < 2)
1455 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
1458 if (ID->getString() == "Objective-C Image Info Version")
1460 if (ID->getString() == "Objective-C Class Properties")
1461 HasClassProperties = true;
1463 // "Objective-C Class Properties" is recently added for Objective-C. We
1464 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
1465 // flag of value 0, so we can correclty report error when trying to link
1466 // an ObjC bitcode without this module flag with an ObjC bitcode with this
1468 if (HasObjCFlag && !HasClassProperties) {
1469 M.addModuleFlag(llvm::Module::Error, "Objective-C Class Properties",
1476 static bool isOldLoopArgument(Metadata *MD) {
1477 auto *T = dyn_cast_or_null<MDTuple>(MD);
1480 if (T->getNumOperands() < 1)
1482 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
1485 return S->getString().startswith("llvm.vectorizer.");
1488 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
1489 StringRef OldPrefix = "llvm.vectorizer.";
1490 assert(OldTag.startswith(OldPrefix) && "Expected old prefix");
1492 if (OldTag == "llvm.vectorizer.unroll")
1493 return MDString::get(C, "llvm.loop.interleave.count");
1495 return MDString::get(
1496 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
1500 static Metadata *upgradeLoopArgument(Metadata *MD) {
1501 auto *T = dyn_cast_or_null<MDTuple>(MD);
1504 if (T->getNumOperands() < 1)
1506 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
1509 if (!OldTag->getString().startswith("llvm.vectorizer."))
1512 // This has an old tag. Upgrade it.
1513 SmallVector<Metadata *, 8> Ops;
1514 Ops.reserve(T->getNumOperands());
1515 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
1516 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
1517 Ops.push_back(T->getOperand(I));
1519 return MDTuple::get(T->getContext(), Ops);
1522 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
1523 auto *T = dyn_cast<MDTuple>(&N);
1527 if (!llvm::any_of(T->operands(), isOldLoopArgument))
1530 SmallVector<Metadata *, 8> Ops;
1531 Ops.reserve(T->getNumOperands());
1532 for (Metadata *MD : T->operands())
1533 Ops.push_back(upgradeLoopArgument(MD));
1535 return MDTuple::get(T->getContext(), Ops);