1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the auto-upgrade helper functions.
11 // This is where deprecated IR intrinsics and other IR features are updated to
12 // current specifications.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/IR/AutoUpgrade.h"
17 #include "llvm/IR/CFG.h"
18 #include "llvm/IR/CallSite.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/DIBuilder.h"
21 #include "llvm/IR/DebugInfo.h"
22 #include "llvm/IR/DiagnosticInfo.h"
23 #include "llvm/IR/Function.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/Instruction.h"
26 #include "llvm/IR/IntrinsicInst.h"
27 #include "llvm/IR/LLVMContext.h"
28 #include "llvm/IR/Module.h"
29 #include "llvm/Support/ErrorHandling.h"
30 #include "llvm/Support/Regex.h"
34 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
36 // Upgrade the declarations of the SSE4.1 functions whose arguments have
37 // changed their type from v4f32 to v2i64.
38 static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID,
40 // Check whether this is an old version of the function, which received
42 Type *Arg0Type = F->getFunctionType()->getParamType(0);
43 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
46 // Yes, it's old, replace it with new version.
48 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
52 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
53 // arguments have changed their type from i32 to i8.
54 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
56 // Check that the last argument is an i32.
57 Type *LastArgType = F->getFunctionType()->getParamType(
58 F->getFunctionType()->getNumParams() - 1);
59 if (!LastArgType->isIntegerTy(32))
62 // Move this function aside and map down.
64 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
68 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
69 assert(F && "Illegal to upgrade a non-existent Function.");
71 // Quickly eliminate it, if it's not a candidate.
72 StringRef Name = F->getName();
73 if (Name.size() <= 8 || !Name.startswith("llvm."))
75 Name = Name.substr(5); // Strip off "llvm."
80 if (Name.startswith("arm.neon.vclz")) {
82 F->arg_begin()->getType(),
83 Type::getInt1Ty(F->getContext())
85 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
86 // the end of the name. Change name from llvm.arm.neon.vclz.* to
88 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
89 NewFn = Function::Create(fType, F->getLinkage(),
90 "llvm.ctlz." + Name.substr(14), F->getParent());
93 if (Name.startswith("arm.neon.vcnt")) {
94 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
95 F->arg_begin()->getType());
98 Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
99 if (vldRegex.match(Name)) {
100 auto fArgs = F->getFunctionType()->params();
101 SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
102 // Can't use Intrinsic::getDeclaration here as the return types might
103 // then only be structurally equal.
104 FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
105 NewFn = Function::Create(fType, F->getLinkage(),
106 "llvm." + Name + ".p0i8", F->getParent());
109 Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
110 if (vstRegex.match(Name)) {
111 static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
112 Intrinsic::arm_neon_vst2,
113 Intrinsic::arm_neon_vst3,
114 Intrinsic::arm_neon_vst4};
116 static const Intrinsic::ID StoreLaneInts[] = {
117 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
118 Intrinsic::arm_neon_vst4lane
121 auto fArgs = F->getFunctionType()->params();
122 Type *Tys[] = {fArgs[0], fArgs[1]};
123 if (Name.find("lane") == StringRef::npos)
124 NewFn = Intrinsic::getDeclaration(F->getParent(),
125 StoreInts[fArgs.size() - 3], Tys);
127 NewFn = Intrinsic::getDeclaration(F->getParent(),
128 StoreLaneInts[fArgs.size() - 5], Tys);
131 if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
132 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
139 if (Name.startswith("ctlz.") && F->arg_size() == 1) {
141 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
142 F->arg_begin()->getType());
145 if (Name.startswith("cttz.") && F->arg_size() == 1) {
147 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
148 F->arg_begin()->getType());
154 if (Name.startswith("invariant.start")) {
155 auto Args = F->getFunctionType()->params();
156 Type* ObjectPtr[1] = {Args[1]};
158 Intrinsic::getName(Intrinsic::invariant_start, ObjectPtr)) {
160 NewFn = Intrinsic::getDeclaration(
161 F->getParent(), Intrinsic::invariant_start, ObjectPtr);
165 if (Name.startswith("invariant.end")) {
166 auto Args = F->getFunctionType()->params();
167 Type* ObjectPtr[1] = {Args[2]};
169 Intrinsic::getName(Intrinsic::invariant_end, ObjectPtr)) {
171 NewFn = Intrinsic::getDeclaration(F->getParent(),
172 Intrinsic::invariant_end, ObjectPtr);
179 if (Name.startswith("masked.load.")) {
180 Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
181 if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) {
183 NewFn = Intrinsic::getDeclaration(F->getParent(),
184 Intrinsic::masked_load,
189 if (Name.startswith("masked.store.")) {
190 auto Args = F->getFunctionType()->params();
191 Type *Tys[] = { Args[0], Args[1] };
192 if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) {
194 NewFn = Intrinsic::getDeclaration(F->getParent(),
195 Intrinsic::masked_store,
204 // We only need to change the name to match the mangling including the
206 if (F->arg_size() == 2 && Name.startswith("objectsize.")) {
207 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
208 if (F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
210 NewFn = Intrinsic::getDeclaration(F->getParent(),
211 Intrinsic::objectsize, Tys);
218 if (Name == "stackprotectorcheck") {
225 bool IsX86 = Name.startswith("x86.");
227 Name = Name.substr(4);
229 // All of the intrinsics matches below should be marked with which llvm
230 // version started autoupgrading them. At some point in the future we would
231 // like to use this information to remove upgrade code for some older
232 // intrinsics. It is currently undecided how we will determine that future
235 (Name.startswith("sse2.pcmpeq.") || // Added in 3.1
236 Name.startswith("sse2.pcmpgt.") || // Added in 3.1
237 Name.startswith("avx2.pcmpeq.") || // Added in 3.1
238 Name.startswith("avx2.pcmpgt.") || // Added in 3.1
239 Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
240 Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
241 Name == "sse.add.ss" || // Added in 4.0
242 Name == "sse2.add.sd" || // Added in 4.0
243 Name == "sse.sub.ss" || // Added in 4.0
244 Name == "sse2.sub.sd" || // Added in 4.0
245 Name == "sse.mul.ss" || // Added in 4.0
246 Name == "sse2.mul.sd" || // Added in 4.0
247 Name == "sse.div.ss" || // Added in 4.0
248 Name == "sse2.div.sd" || // Added in 4.0
249 Name == "sse41.pmaxsb" || // Added in 3.9
250 Name == "sse2.pmaxs.w" || // Added in 3.9
251 Name == "sse41.pmaxsd" || // Added in 3.9
252 Name == "sse2.pmaxu.b" || // Added in 3.9
253 Name == "sse41.pmaxuw" || // Added in 3.9
254 Name == "sse41.pmaxud" || // Added in 3.9
255 Name == "sse41.pminsb" || // Added in 3.9
256 Name == "sse2.pmins.w" || // Added in 3.9
257 Name == "sse41.pminsd" || // Added in 3.9
258 Name == "sse2.pminu.b" || // Added in 3.9
259 Name == "sse41.pminuw" || // Added in 3.9
260 Name == "sse41.pminud" || // Added in 3.9
261 Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
262 Name.startswith("avx2.pmax") || // Added in 3.9
263 Name.startswith("avx2.pmin") || // Added in 3.9
264 Name.startswith("avx512.mask.pmax") || // Added in 4.0
265 Name.startswith("avx512.mask.pmin") || // Added in 4.0
266 Name.startswith("avx2.vbroadcast") || // Added in 3.8
267 Name.startswith("avx2.pbroadcast") || // Added in 3.8
268 Name.startswith("avx.vpermil.") || // Added in 3.1
269 Name.startswith("sse2.pshuf") || // Added in 3.9
270 Name.startswith("avx512.pbroadcast") || // Added in 3.9
271 Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
272 Name.startswith("avx512.mask.movddup") || // Added in 3.9
273 Name.startswith("avx512.mask.movshdup") || // Added in 3.9
274 Name.startswith("avx512.mask.movsldup") || // Added in 3.9
275 Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
276 Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
277 Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
278 Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
279 Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
280 Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
281 Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
282 Name.startswith("avx512.mask.punpckl") || // Added in 3.9
283 Name.startswith("avx512.mask.punpckh") || // Added in 3.9
284 Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
285 Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
286 Name.startswith("avx512.mask.pand.") || // Added in 3.9
287 Name.startswith("avx512.mask.pandn.") || // Added in 3.9
288 Name.startswith("avx512.mask.por.") || // Added in 3.9
289 Name.startswith("avx512.mask.pxor.") || // Added in 3.9
290 Name.startswith("avx512.mask.and.") || // Added in 3.9
291 Name.startswith("avx512.mask.andn.") || // Added in 3.9
292 Name.startswith("avx512.mask.or.") || // Added in 3.9
293 Name.startswith("avx512.mask.xor.") || // Added in 3.9
294 Name.startswith("avx512.mask.padd.") || // Added in 4.0
295 Name.startswith("avx512.mask.psub.") || // Added in 4.0
296 Name.startswith("avx512.mask.pmull.") || // Added in 4.0
297 Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
298 Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
299 Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
300 Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
301 Name == "avx512.mask.add.pd.128" || // Added in 4.0
302 Name == "avx512.mask.add.pd.256" || // Added in 4.0
303 Name == "avx512.mask.add.ps.128" || // Added in 4.0
304 Name == "avx512.mask.add.ps.256" || // Added in 4.0
305 Name == "avx512.mask.div.pd.128" || // Added in 4.0
306 Name == "avx512.mask.div.pd.256" || // Added in 4.0
307 Name == "avx512.mask.div.ps.128" || // Added in 4.0
308 Name == "avx512.mask.div.ps.256" || // Added in 4.0
309 Name == "avx512.mask.mul.pd.128" || // Added in 4.0
310 Name == "avx512.mask.mul.pd.256" || // Added in 4.0
311 Name == "avx512.mask.mul.ps.128" || // Added in 4.0
312 Name == "avx512.mask.mul.ps.256" || // Added in 4.0
313 Name == "avx512.mask.sub.pd.128" || // Added in 4.0
314 Name == "avx512.mask.sub.pd.256" || // Added in 4.0
315 Name == "avx512.mask.sub.ps.128" || // Added in 4.0
316 Name == "avx512.mask.sub.ps.256" || // Added in 4.0
317 Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
318 Name.startswith("avx512.mask.psll.d") || // Added in 4.0
319 Name.startswith("avx512.mask.psll.q") || // Added in 4.0
320 Name.startswith("avx512.mask.psll.w") || // Added in 4.0
321 Name.startswith("avx512.mask.psra.d") || // Added in 4.0
322 Name.startswith("avx512.mask.psra.q") || // Added in 4.0
323 Name.startswith("avx512.mask.psra.w") || // Added in 4.0
324 Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
325 Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
326 Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
327 Name.startswith("avx512.mask.pslli") || // Added in 4.0
328 Name.startswith("avx512.mask.psrai") || // Added in 4.0
329 Name.startswith("avx512.mask.psrli") || // Added in 4.0
330 Name.startswith("avx512.mask.psllv") || // Added in 4.0
331 Name.startswith("avx512.mask.psrav") || // Added in 4.0
332 Name.startswith("avx512.mask.psrlv") || // Added in 4.0
333 Name.startswith("sse41.pmovsx") || // Added in 3.8
334 Name.startswith("sse41.pmovzx") || // Added in 3.9
335 Name.startswith("avx2.pmovsx") || // Added in 3.9
336 Name.startswith("avx2.pmovzx") || // Added in 3.9
337 Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
338 Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
339 Name == "sse2.cvtdq2pd" || // Added in 3.9
340 Name == "sse2.cvtps2pd" || // Added in 3.9
341 Name == "avx.cvtdq2.pd.256" || // Added in 3.9
342 Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
343 Name.startswith("avx.vinsertf128.") || // Added in 3.7
344 Name == "avx2.vinserti128" || // Added in 3.7
345 Name.startswith("avx.vextractf128.") || // Added in 3.7
346 Name == "avx2.vextracti128" || // Added in 3.7
347 Name.startswith("sse4a.movnt.") || // Added in 3.9
348 Name.startswith("avx.movnt.") || // Added in 3.2
349 Name.startswith("avx512.storent.") || // Added in 3.9
350 Name == "sse2.storel.dq" || // Added in 3.9
351 Name.startswith("sse.storeu.") || // Added in 3.9
352 Name.startswith("sse2.storeu.") || // Added in 3.9
353 Name.startswith("avx.storeu.") || // Added in 3.9
354 Name.startswith("avx512.mask.storeu.") || // Added in 3.9
355 Name.startswith("avx512.mask.store.p") || // Added in 3.9
356 Name.startswith("avx512.mask.store.b.") || // Added in 3.9
357 Name.startswith("avx512.mask.store.w.") || // Added in 3.9
358 Name.startswith("avx512.mask.store.d.") || // Added in 3.9
359 Name.startswith("avx512.mask.store.q.") || // Added in 3.9
360 Name.startswith("avx512.mask.loadu.") || // Added in 3.9
361 Name.startswith("avx512.mask.load.") || // Added in 3.9
362 Name == "sse42.crc32.64.8" || // Added in 3.4
363 Name.startswith("avx.vbroadcast.s") || // Added in 3.5
364 Name.startswith("avx512.mask.palignr.") || // Added in 3.9
365 Name.startswith("avx512.mask.valign.") || // Added in 4.0
366 Name.startswith("sse2.psll.dq") || // Added in 3.7
367 Name.startswith("sse2.psrl.dq") || // Added in 3.7
368 Name.startswith("avx2.psll.dq") || // Added in 3.7
369 Name.startswith("avx2.psrl.dq") || // Added in 3.7
370 Name.startswith("avx512.psll.dq") || // Added in 3.9
371 Name.startswith("avx512.psrl.dq") || // Added in 3.9
372 Name == "sse41.pblendw" || // Added in 3.7
373 Name.startswith("sse41.blendp") || // Added in 3.7
374 Name.startswith("avx.blend.p") || // Added in 3.7
375 Name == "avx2.pblendw" || // Added in 3.7
376 Name.startswith("avx2.pblendd.") || // Added in 3.7
377 Name.startswith("avx.vbroadcastf128") || // Added in 4.0
378 Name == "avx2.vbroadcasti128" || // Added in 3.7
379 Name == "xop.vpcmov" || // Added in 3.8
380 Name.startswith("avx512.mask.move.s") || // Added in 4.0
381 (Name.startswith("xop.vpcom") && // Added in 3.2
382 F->arg_size() == 2))) {
386 // SSE4.1 ptest functions may have an old signature.
387 if (IsX86 && Name.startswith("sse41.ptest")) { // Added in 3.2
388 if (Name.substr(11) == "c")
389 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn);
390 if (Name.substr(11) == "z")
391 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestz, NewFn);
392 if (Name.substr(11) == "nzc")
393 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
395 // Several blend and other instructions with masks used the wrong number of
397 if (IsX86 && Name == "sse41.insertps") // Added in 3.6
398 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
400 if (IsX86 && Name == "sse41.dppd") // Added in 3.6
401 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
403 if (IsX86 && Name == "sse41.dpps") // Added in 3.6
404 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
406 if (IsX86 && Name == "sse41.mpsadbw") // Added in 3.6
407 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
409 if (IsX86 && Name == "avx.dp.ps.256") // Added in 3.6
410 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
412 if (IsX86 && Name == "avx2.mpsadbw") // Added in 3.6
413 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
416 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
417 if (IsX86 && Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
419 NewFn = Intrinsic::getDeclaration(F->getParent(),
420 Intrinsic::x86_xop_vfrcz_ss);
423 if (IsX86 && Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
425 NewFn = Intrinsic::getDeclaration(F->getParent(),
426 Intrinsic::x86_xop_vfrcz_sd);
429 // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
430 if (IsX86 && Name.startswith("xop.vpermil2")) { // Added in 3.9
431 auto Params = F->getFunctionType()->params();
432 auto Idx = Params[2];
433 if (Idx->getScalarType()->isFloatingPointTy()) {
435 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
436 unsigned EltSize = Idx->getScalarSizeInBits();
437 Intrinsic::ID Permil2ID;
438 if (EltSize == 64 && IdxSize == 128)
439 Permil2ID = Intrinsic::x86_xop_vpermil2pd;
440 else if (EltSize == 32 && IdxSize == 128)
441 Permil2ID = Intrinsic::x86_xop_vpermil2ps;
442 else if (EltSize == 64 && IdxSize == 256)
443 Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
445 Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
446 NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
454 // This may not belong here. This function is effectively being overloaded
455 // to both detect an intrinsic which needs upgrading, and to provide the
456 // upgraded form of the intrinsic. We should perhaps have two separate
457 // functions for this.
461 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
463 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
464 assert(F != NewFn && "Intrinsic function upgraded to the same function");
466 // Upgrade intrinsic attributes. This does not change the function.
469 if (Intrinsic::ID id = F->getIntrinsicID())
470 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
474 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
475 // Nothing to do yet.
479 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
481 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
482 Value *Op, unsigned Shift) {
483 Type *ResultTy = Op->getType();
484 unsigned NumElts = ResultTy->getVectorNumElements() * 8;
486 // Bitcast from a 64-bit element type to a byte element type.
487 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
488 Op = Builder.CreateBitCast(Op, VecTy, "cast");
490 // We'll be shuffling in zeroes.
491 Value *Res = Constant::getNullValue(VecTy);
493 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
494 // we'll just return the zero vector.
497 // 256/512-bit version is split into 2/4 16-byte lanes.
498 for (unsigned l = 0; l != NumElts; l += 16)
499 for (unsigned i = 0; i != 16; ++i) {
500 unsigned Idx = NumElts + i - Shift;
502 Idx -= NumElts - 16; // end of lane, switch operand.
503 Idxs[l + i] = Idx + l;
506 Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
509 // Bitcast back to a 64-bit element type.
510 return Builder.CreateBitCast(Res, ResultTy, "cast");
513 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
515 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
517 Type *ResultTy = Op->getType();
518 unsigned NumElts = ResultTy->getVectorNumElements() * 8;
520 // Bitcast from a 64-bit element type to a byte element type.
521 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
522 Op = Builder.CreateBitCast(Op, VecTy, "cast");
524 // We'll be shuffling in zeroes.
525 Value *Res = Constant::getNullValue(VecTy);
527 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
528 // we'll just return the zero vector.
531 // 256/512-bit version is split into 2/4 16-byte lanes.
532 for (unsigned l = 0; l != NumElts; l += 16)
533 for (unsigned i = 0; i != 16; ++i) {
534 unsigned Idx = i + Shift;
536 Idx += NumElts - 16; // end of lane, switch operand.
537 Idxs[l + i] = Idx + l;
540 Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
543 // Bitcast back to a 64-bit element type.
544 return Builder.CreateBitCast(Res, ResultTy, "cast");
547 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
549 llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(),
550 cast<IntegerType>(Mask->getType())->getBitWidth());
551 Mask = Builder.CreateBitCast(Mask, MaskTy);
553 // If we have less than 8 elements, then the starting mask was an i8 and
554 // we need to extract down to the right number of elements.
557 for (unsigned i = 0; i != NumElts; ++i)
559 Mask = Builder.CreateShuffleVector(Mask, Mask,
560 makeArrayRef(Indices, NumElts),
567 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
568 Value *Op0, Value *Op1) {
569 // If the mask is all ones just emit the align operation.
570 if (const auto *C = dyn_cast<Constant>(Mask))
571 if (C->isAllOnesValue())
574 Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements());
575 return Builder.CreateSelect(Mask, Op0, Op1);
578 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
579 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
580 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
581 static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
582 Value *Op1, Value *Shift,
583 Value *Passthru, Value *Mask,
585 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
587 unsigned NumElts = Op0->getType()->getVectorNumElements();
588 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
589 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
590 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
592 // Mask the immediate for VALIGN.
594 ShiftVal &= (NumElts - 1);
596 // If palignr is shifting the pair of vectors more than the size of two
599 return llvm::Constant::getNullValue(Op0->getType());
601 // If palignr is shifting the pair of input vectors more than one lane,
602 // but less than two lanes, convert to shifting in zeroes.
606 Op0 = llvm::Constant::getNullValue(Op0->getType());
609 uint32_t Indices[64];
610 // 256-bit palignr operates on 128-bit lanes so we need to handle that
611 for (unsigned l = 0; l < NumElts; l += 16) {
612 for (unsigned i = 0; i != 16; ++i) {
613 unsigned Idx = ShiftVal + i;
614 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
615 Idx += NumElts - 16; // End of lane, switch operand.
616 Indices[l + i] = Idx + l;
620 Value *Align = Builder.CreateShuffleVector(Op1, Op0,
621 makeArrayRef(Indices, NumElts),
624 return EmitX86Select(Builder, Mask, Align, Passthru);
627 static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
628 Value *Ptr, Value *Data, Value *Mask,
630 // Cast the pointer to the right type.
631 Ptr = Builder.CreateBitCast(Ptr,
632 llvm::PointerType::getUnqual(Data->getType()));
634 Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1;
636 // If the mask is all ones just emit a regular store.
637 if (const auto *C = dyn_cast<Constant>(Mask))
638 if (C->isAllOnesValue())
639 return Builder.CreateAlignedStore(Data, Ptr, Align);
641 // Convert the mask from an integer type to a vector of i1.
642 unsigned NumElts = Data->getType()->getVectorNumElements();
643 Mask = getX86MaskVec(Builder, Mask, NumElts);
644 return Builder.CreateMaskedStore(Data, Ptr, Align, Mask);
647 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
648 Value *Ptr, Value *Passthru, Value *Mask,
650 // Cast the pointer to the right type.
651 Ptr = Builder.CreateBitCast(Ptr,
652 llvm::PointerType::getUnqual(Passthru->getType()));
654 Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1;
656 // If the mask is all ones just emit a regular store.
657 if (const auto *C = dyn_cast<Constant>(Mask))
658 if (C->isAllOnesValue())
659 return Builder.CreateAlignedLoad(Ptr, Align);
661 // Convert the mask from an integer type to a vector of i1.
662 unsigned NumElts = Passthru->getType()->getVectorNumElements();
663 Mask = getX86MaskVec(Builder, Mask, NumElts);
664 return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru);
667 static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
668 ICmpInst::Predicate Pred) {
669 Value *Op0 = CI.getArgOperand(0);
670 Value *Op1 = CI.getArgOperand(1);
671 Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1);
672 Value *Res = Builder.CreateSelect(Cmp, Op0, Op1);
674 if (CI.getNumArgOperands() == 4)
675 Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
680 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
681 ICmpInst::Predicate Pred) {
682 Value *Op0 = CI.getArgOperand(0);
683 unsigned NumElts = Op0->getType()->getVectorNumElements();
684 Value *Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
686 Value *Mask = CI.getArgOperand(2);
687 const auto *C = dyn_cast<Constant>(Mask);
688 if (!C || !C->isAllOnesValue())
689 Cmp = Builder.CreateAnd(Cmp, getX86MaskVec(Builder, Mask, NumElts));
693 for (unsigned i = 0; i != NumElts; ++i)
695 for (unsigned i = NumElts; i != 8; ++i)
696 Indices[i] = NumElts + i % NumElts;
697 Cmp = Builder.CreateShuffleVector(Cmp,
698 Constant::getNullValue(Cmp->getType()),
701 return Builder.CreateBitCast(Cmp, IntegerType::get(CI.getContext(),
702 std::max(NumElts, 8U)));
705 // Replace a masked intrinsic with an older unmasked intrinsic.
706 static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI,
708 Function *F = CI.getCalledFunction();
709 Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID);
710 Value *Rep = Builder.CreateCall(Intrin,
711 { CI.getArgOperand(0), CI.getArgOperand(1) });
712 return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
715 static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
716 Value* A = CI.getArgOperand(0);
717 Value* B = CI.getArgOperand(1);
718 Value* Src = CI.getArgOperand(2);
719 Value* Mask = CI.getArgOperand(3);
721 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
722 Value* Cmp = Builder.CreateIsNotNull(AndNode);
723 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
724 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
725 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
726 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
729 /// Upgrade a call to an old intrinsic. All argument and return casting must be
730 /// provided to seamlessly integrate with existing context.
731 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
732 Function *F = CI->getCalledFunction();
733 LLVMContext &C = CI->getContext();
734 IRBuilder<> Builder(C);
735 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
737 assert(F && "Intrinsic call is not direct?");
740 // Get the Function's name.
741 StringRef Name = F->getName();
743 assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
744 Name = Name.substr(5);
746 bool IsX86 = Name.startswith("x86.");
748 Name = Name.substr(4);
750 if (IsX86 && Name.startswith("sse4a.movnt.")) {
751 Module *M = F->getParent();
752 SmallVector<Metadata *, 1> Elts;
754 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
755 MDNode *Node = MDNode::get(C, Elts);
757 Value *Arg0 = CI->getArgOperand(0);
758 Value *Arg1 = CI->getArgOperand(1);
760 // Nontemporal (unaligned) store of the 0'th element of the float/double
762 Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
763 PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
764 Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
766 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
768 StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, 1);
769 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
772 CI->eraseFromParent();
776 if (IsX86 && (Name.startswith("avx.movnt.") ||
777 Name.startswith("avx512.storent."))) {
778 Module *M = F->getParent();
779 SmallVector<Metadata *, 1> Elts;
781 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
782 MDNode *Node = MDNode::get(C, Elts);
784 Value *Arg0 = CI->getArgOperand(0);
785 Value *Arg1 = CI->getArgOperand(1);
787 // Convert the type of the pointer to a pointer to the stored type.
788 Value *BC = Builder.CreateBitCast(Arg0,
789 PointerType::getUnqual(Arg1->getType()),
791 VectorType *VTy = cast<VectorType>(Arg1->getType());
792 StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC,
793 VTy->getBitWidth() / 8);
794 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
797 CI->eraseFromParent();
801 if (IsX86 && Name == "sse2.storel.dq") {
802 Value *Arg0 = CI->getArgOperand(0);
803 Value *Arg1 = CI->getArgOperand(1);
805 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
806 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
807 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
808 Value *BC = Builder.CreateBitCast(Arg0,
809 PointerType::getUnqual(Elt->getType()),
811 Builder.CreateAlignedStore(Elt, BC, 1);
814 CI->eraseFromParent();
818 if (IsX86 && (Name.startswith("sse.storeu.") ||
819 Name.startswith("sse2.storeu.") ||
820 Name.startswith("avx.storeu."))) {
821 Value *Arg0 = CI->getArgOperand(0);
822 Value *Arg1 = CI->getArgOperand(1);
824 Arg0 = Builder.CreateBitCast(Arg0,
825 PointerType::getUnqual(Arg1->getType()),
827 Builder.CreateAlignedStore(Arg1, Arg0, 1);
830 CI->eraseFromParent();
834 if (IsX86 && (Name.startswith("avx512.mask.storeu."))) {
835 UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
836 CI->getArgOperand(2), /*Aligned*/false);
839 CI->eraseFromParent();
843 if (IsX86 && (Name.startswith("avx512.mask.store."))) {
844 UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
845 CI->getArgOperand(2), /*Aligned*/true);
848 CI->eraseFromParent();
853 // Upgrade packed integer vector compare intrinsics to compare instructions.
854 if (IsX86 && (Name.startswith("sse2.pcmpeq.") ||
855 Name.startswith("avx2.pcmpeq."))) {
856 Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1),
858 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
859 } else if (IsX86 && (Name.startswith("sse2.pcmpgt.") ||
860 Name.startswith("avx2.pcmpgt."))) {
861 Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1),
863 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
864 } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd")) {
865 Type *I32Ty = Type::getInt32Ty(C);
866 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
867 ConstantInt::get(I32Ty, 0));
868 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
869 ConstantInt::get(I32Ty, 0));
870 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
871 Builder.CreateFAdd(Elt0, Elt1),
872 ConstantInt::get(I32Ty, 0));
873 } else if (IsX86 && (Name == "sse.sub.ss" || Name == "sse2.sub.sd")) {
874 Type *I32Ty = Type::getInt32Ty(C);
875 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
876 ConstantInt::get(I32Ty, 0));
877 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
878 ConstantInt::get(I32Ty, 0));
879 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
880 Builder.CreateFSub(Elt0, Elt1),
881 ConstantInt::get(I32Ty, 0));
882 } else if (IsX86 && (Name == "sse.mul.ss" || Name == "sse2.mul.sd")) {
883 Type *I32Ty = Type::getInt32Ty(C);
884 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
885 ConstantInt::get(I32Ty, 0));
886 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
887 ConstantInt::get(I32Ty, 0));
888 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
889 Builder.CreateFMul(Elt0, Elt1),
890 ConstantInt::get(I32Ty, 0));
891 } else if (IsX86 && (Name == "sse.div.ss" || Name == "sse2.div.sd")) {
892 Type *I32Ty = Type::getInt32Ty(C);
893 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
894 ConstantInt::get(I32Ty, 0));
895 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
896 ConstantInt::get(I32Ty, 0));
897 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
898 Builder.CreateFDiv(Elt0, Elt1),
899 ConstantInt::get(I32Ty, 0));
900 } else if (IsX86 && Name.startswith("avx512.mask.pcmpeq.")) {
901 Rep = upgradeMaskedCompare(Builder, *CI, ICmpInst::ICMP_EQ);
902 } else if (IsX86 && Name.startswith("avx512.mask.pcmpgt.")) {
903 Rep = upgradeMaskedCompare(Builder, *CI, ICmpInst::ICMP_SGT);
904 } else if (IsX86 && (Name == "sse41.pmaxsb" ||
905 Name == "sse2.pmaxs.w" ||
906 Name == "sse41.pmaxsd" ||
907 Name.startswith("avx2.pmaxs") ||
908 Name.startswith("avx512.mask.pmaxs"))) {
909 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
910 } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
911 Name == "sse41.pmaxuw" ||
912 Name == "sse41.pmaxud" ||
913 Name.startswith("avx2.pmaxu") ||
914 Name.startswith("avx512.mask.pmaxu"))) {
915 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
916 } else if (IsX86 && (Name == "sse41.pminsb" ||
917 Name == "sse2.pmins.w" ||
918 Name == "sse41.pminsd" ||
919 Name.startswith("avx2.pmins") ||
920 Name.startswith("avx512.mask.pmins"))) {
921 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
922 } else if (IsX86 && (Name == "sse2.pminu.b" ||
923 Name == "sse41.pminuw" ||
924 Name == "sse41.pminud" ||
925 Name.startswith("avx2.pminu") ||
926 Name.startswith("avx512.mask.pminu"))) {
927 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
928 } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
929 Name == "sse2.cvtps2pd" ||
930 Name == "avx.cvtdq2.pd.256" ||
931 Name == "avx.cvt.ps2.pd.256" ||
932 Name.startswith("avx512.mask.cvtdq2pd.") ||
933 Name.startswith("avx512.mask.cvtudq2pd."))) {
934 // Lossless i32/float to double conversion.
935 // Extract the bottom elements if necessary and convert to double vector.
936 Value *Src = CI->getArgOperand(0);
937 VectorType *SrcTy = cast<VectorType>(Src->getType());
938 VectorType *DstTy = cast<VectorType>(CI->getType());
939 Rep = CI->getArgOperand(0);
941 unsigned NumDstElts = DstTy->getNumElements();
942 if (NumDstElts < SrcTy->getNumElements()) {
943 assert(NumDstElts == 2 && "Unexpected vector size");
944 uint32_t ShuffleMask[2] = { 0, 1 };
945 Rep = Builder.CreateShuffleVector(Rep, UndefValue::get(SrcTy),
949 bool SInt2Double = (StringRef::npos != Name.find("cvtdq2"));
950 bool UInt2Double = (StringRef::npos != Name.find("cvtudq2"));
952 Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd");
953 else if (UInt2Double)
954 Rep = Builder.CreateUIToFP(Rep, DstTy, "cvtudq2pd");
956 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
958 if (CI->getNumArgOperands() == 3)
959 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
960 CI->getArgOperand(1));
961 } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
962 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
963 CI->getArgOperand(1), CI->getArgOperand(2),
965 } else if (IsX86 && (Name.startswith("avx512.mask.load."))) {
966 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
967 CI->getArgOperand(1),CI->getArgOperand(2),
969 } else if (IsX86 && Name.startswith("xop.vpcom")) {
971 if (Name.endswith("ub"))
972 intID = Intrinsic::x86_xop_vpcomub;
973 else if (Name.endswith("uw"))
974 intID = Intrinsic::x86_xop_vpcomuw;
975 else if (Name.endswith("ud"))
976 intID = Intrinsic::x86_xop_vpcomud;
977 else if (Name.endswith("uq"))
978 intID = Intrinsic::x86_xop_vpcomuq;
979 else if (Name.endswith("b"))
980 intID = Intrinsic::x86_xop_vpcomb;
981 else if (Name.endswith("w"))
982 intID = Intrinsic::x86_xop_vpcomw;
983 else if (Name.endswith("d"))
984 intID = Intrinsic::x86_xop_vpcomd;
985 else if (Name.endswith("q"))
986 intID = Intrinsic::x86_xop_vpcomq;
988 llvm_unreachable("Unknown suffix");
990 Name = Name.substr(9); // strip off "xop.vpcom"
992 if (Name.startswith("lt"))
994 else if (Name.startswith("le"))
996 else if (Name.startswith("gt"))
998 else if (Name.startswith("ge"))
1000 else if (Name.startswith("eq"))
1002 else if (Name.startswith("ne"))
1004 else if (Name.startswith("false"))
1006 else if (Name.startswith("true"))
1009 llvm_unreachable("Unknown condition");
1011 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
1013 Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
1014 Builder.getInt8(Imm)});
1015 } else if (IsX86 && Name == "xop.vpcmov") {
1016 Value *Arg0 = CI->getArgOperand(0);
1017 Value *Arg1 = CI->getArgOperand(1);
1018 Value *Sel = CI->getArgOperand(2);
1019 unsigned NumElts = CI->getType()->getVectorNumElements();
1020 Constant *MinusOne = ConstantVector::getSplat(NumElts, Builder.getInt64(-1));
1021 Value *NotSel = Builder.CreateXor(Sel, MinusOne);
1022 Value *Sel0 = Builder.CreateAnd(Arg0, Sel);
1023 Value *Sel1 = Builder.CreateAnd(Arg1, NotSel);
1024 Rep = Builder.CreateOr(Sel0, Sel1);
1025 } else if (IsX86 && Name == "sse42.crc32.64.8") {
1026 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
1027 Intrinsic::x86_sse42_crc32_32_8);
1028 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
1029 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
1030 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
1031 } else if (IsX86 && Name.startswith("avx.vbroadcast.s")) {
1032 // Replace broadcasts with a series of insertelements.
1033 Type *VecTy = CI->getType();
1034 Type *EltTy = VecTy->getVectorElementType();
1035 unsigned EltNum = VecTy->getVectorNumElements();
1036 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
1037 EltTy->getPointerTo());
1038 Value *Load = Builder.CreateLoad(EltTy, Cast);
1039 Type *I32Ty = Type::getInt32Ty(C);
1040 Rep = UndefValue::get(VecTy);
1041 for (unsigned I = 0; I < EltNum; ++I)
1042 Rep = Builder.CreateInsertElement(Rep, Load,
1043 ConstantInt::get(I32Ty, I));
1044 } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
1045 Name.startswith("sse41.pmovzx") ||
1046 Name.startswith("avx2.pmovsx") ||
1047 Name.startswith("avx2.pmovzx") ||
1048 Name.startswith("avx512.mask.pmovsx") ||
1049 Name.startswith("avx512.mask.pmovzx"))) {
1050 VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
1051 VectorType *DstTy = cast<VectorType>(CI->getType());
1052 unsigned NumDstElts = DstTy->getNumElements();
1054 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
1055 SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
1056 for (unsigned i = 0; i != NumDstElts; ++i)
1059 Value *SV = Builder.CreateShuffleVector(
1060 CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
1062 bool DoSext = (StringRef::npos != Name.find("pmovsx"));
1063 Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
1064 : Builder.CreateZExt(SV, DstTy);
1065 // If there are 3 arguments, it's a masked intrinsic so we need a select.
1066 if (CI->getNumArgOperands() == 3)
1067 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1068 CI->getArgOperand(1));
1069 } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
1070 Name == "avx2.vbroadcasti128")) {
1071 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
1072 Type *EltTy = CI->getType()->getVectorElementType();
1073 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
1074 Type *VT = VectorType::get(EltTy, NumSrcElts);
1075 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
1076 PointerType::getUnqual(VT));
1077 Value *Load = Builder.CreateAlignedLoad(Op, 1);
1078 if (NumSrcElts == 2)
1079 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
1082 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
1083 { 0, 1, 2, 3, 0, 1, 2, 3 });
1084 } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
1085 Name.startswith("avx2.vbroadcast") ||
1086 Name.startswith("avx512.pbroadcast") ||
1087 Name.startswith("avx512.mask.broadcast.s"))) {
1088 // Replace vp?broadcasts with a vector shuffle.
1089 Value *Op = CI->getArgOperand(0);
1090 unsigned NumElts = CI->getType()->getVectorNumElements();
1091 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
1092 Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
1093 Constant::getNullValue(MaskTy));
1095 if (CI->getNumArgOperands() == 3)
1096 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1097 CI->getArgOperand(1));
1098 } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
1099 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
1100 CI->getArgOperand(1),
1101 CI->getArgOperand(2),
1102 CI->getArgOperand(3),
1103 CI->getArgOperand(4),
1105 } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
1106 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
1107 CI->getArgOperand(1),
1108 CI->getArgOperand(2),
1109 CI->getArgOperand(3),
1110 CI->getArgOperand(4),
1112 } else if (IsX86 && (Name == "sse2.psll.dq" ||
1113 Name == "avx2.psll.dq")) {
1114 // 128/256-bit shift left specified in bits.
1115 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1116 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
1117 Shift / 8); // Shift is in bits.
1118 } else if (IsX86 && (Name == "sse2.psrl.dq" ||
1119 Name == "avx2.psrl.dq")) {
1120 // 128/256-bit shift right specified in bits.
1121 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1122 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
1123 Shift / 8); // Shift is in bits.
1124 } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
1125 Name == "avx2.psll.dq.bs" ||
1126 Name == "avx512.psll.dq.512")) {
1127 // 128/256/512-bit shift left specified in bytes.
1128 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1129 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
1130 } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
1131 Name == "avx2.psrl.dq.bs" ||
1132 Name == "avx512.psrl.dq.512")) {
1133 // 128/256/512-bit shift right specified in bytes.
1134 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1135 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
1136 } else if (IsX86 && (Name == "sse41.pblendw" ||
1137 Name.startswith("sse41.blendp") ||
1138 Name.startswith("avx.blend.p") ||
1139 Name == "avx2.pblendw" ||
1140 Name.startswith("avx2.pblendd."))) {
1141 Value *Op0 = CI->getArgOperand(0);
1142 Value *Op1 = CI->getArgOperand(1);
1143 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1144 VectorType *VecTy = cast<VectorType>(CI->getType());
1145 unsigned NumElts = VecTy->getNumElements();
1147 SmallVector<uint32_t, 16> Idxs(NumElts);
1148 for (unsigned i = 0; i != NumElts; ++i)
1149 Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
1151 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1152 } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
1153 Name == "avx2.vinserti128")) {
1154 Value *Op0 = CI->getArgOperand(0);
1155 Value *Op1 = CI->getArgOperand(1);
1156 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1157 VectorType *VecTy = cast<VectorType>(CI->getType());
1158 unsigned NumElts = VecTy->getNumElements();
1160 // Mask off the high bits of the immediate value; hardware ignores those.
1163 // Extend the second operand into a vector that is twice as big.
1164 Value *UndefV = UndefValue::get(Op1->getType());
1165 SmallVector<uint32_t, 8> Idxs(NumElts);
1166 for (unsigned i = 0; i != NumElts; ++i)
1168 Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);
1170 // Insert the second operand into the first operand.
1172 // Note that there is no guarantee that instruction lowering will actually
1173 // produce a vinsertf128 instruction for the created shuffles. In
1174 // particular, the 0 immediate case involves no lane changes, so it can
1175 // be handled as a blend.
1177 // Example of shuffle mask for 32-bit elements:
1178 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
1179 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
1181 // The low half of the result is either the low half of the 1st operand
1182 // or the low half of the 2nd operand (the inserted vector).
1183 for (unsigned i = 0; i != NumElts / 2; ++i)
1184 Idxs[i] = Imm ? i : (i + NumElts);
1185 // The high half of the result is either the low half of the 2nd operand
1186 // (the inserted vector) or the high half of the 1st operand.
1187 for (unsigned i = NumElts / 2; i != NumElts; ++i)
1188 Idxs[i] = Imm ? (i + NumElts / 2) : i;
1189 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
1190 } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
1191 Name == "avx2.vextracti128")) {
1192 Value *Op0 = CI->getArgOperand(0);
1193 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1194 VectorType *VecTy = cast<VectorType>(CI->getType());
1195 unsigned NumElts = VecTy->getNumElements();
1197 // Mask off the high bits of the immediate value; hardware ignores those.
1200 // Get indexes for either the high half or low half of the input vector.
1201 SmallVector<uint32_t, 4> Idxs(NumElts);
1202 for (unsigned i = 0; i != NumElts; ++i) {
1203 Idxs[i] = Imm ? (i + NumElts) : i;
1206 Value *UndefV = UndefValue::get(Op0->getType());
1207 Rep = Builder.CreateShuffleVector(Op0, UndefV, Idxs);
1208 } else if (!IsX86 && Name == "stackprotectorcheck") {
1210 } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
1211 Name.startswith("avx512.mask.perm.di."))) {
1212 Value *Op0 = CI->getArgOperand(0);
1213 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1214 VectorType *VecTy = cast<VectorType>(CI->getType());
1215 unsigned NumElts = VecTy->getNumElements();
1217 SmallVector<uint32_t, 8> Idxs(NumElts);
1218 for (unsigned i = 0; i != NumElts; ++i)
1219 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
1221 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1223 if (CI->getNumArgOperands() == 4)
1224 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1225 CI->getArgOperand(2));
1226 } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
1227 Name == "sse2.pshuf.d" ||
1228 Name.startswith("avx512.mask.vpermil.p") ||
1229 Name.startswith("avx512.mask.pshuf.d."))) {
1230 Value *Op0 = CI->getArgOperand(0);
1231 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1232 VectorType *VecTy = cast<VectorType>(CI->getType());
1233 unsigned NumElts = VecTy->getNumElements();
1234 // Calculate the size of each index in the immediate.
1235 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
1236 unsigned IdxMask = ((1 << IdxSize) - 1);
1238 SmallVector<uint32_t, 8> Idxs(NumElts);
1239 // Lookup the bits for this element, wrapping around the immediate every
1240 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
1241 // to offset by the first index of each group.
1242 for (unsigned i = 0; i != NumElts; ++i)
1243 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
1245 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1247 if (CI->getNumArgOperands() == 4)
1248 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1249 CI->getArgOperand(2));
1250 } else if (IsX86 && (Name == "sse2.pshufl.w" ||
1251 Name.startswith("avx512.mask.pshufl.w."))) {
1252 Value *Op0 = CI->getArgOperand(0);
1253 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1254 unsigned NumElts = CI->getType()->getVectorNumElements();
1256 SmallVector<uint32_t, 16> Idxs(NumElts);
1257 for (unsigned l = 0; l != NumElts; l += 8) {
1258 for (unsigned i = 0; i != 4; ++i)
1259 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
1260 for (unsigned i = 4; i != 8; ++i)
1261 Idxs[i + l] = i + l;
1264 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1266 if (CI->getNumArgOperands() == 4)
1267 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1268 CI->getArgOperand(2));
1269 } else if (IsX86 && (Name == "sse2.pshufh.w" ||
1270 Name.startswith("avx512.mask.pshufh.w."))) {
1271 Value *Op0 = CI->getArgOperand(0);
1272 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1273 unsigned NumElts = CI->getType()->getVectorNumElements();
1275 SmallVector<uint32_t, 16> Idxs(NumElts);
1276 for (unsigned l = 0; l != NumElts; l += 8) {
1277 for (unsigned i = 0; i != 4; ++i)
1278 Idxs[i + l] = i + l;
1279 for (unsigned i = 0; i != 4; ++i)
1280 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
1283 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1285 if (CI->getNumArgOperands() == 4)
1286 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1287 CI->getArgOperand(2));
1288 } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
1289 Value *Op0 = CI->getArgOperand(0);
1290 Value *Op1 = CI->getArgOperand(1);
1291 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1292 unsigned NumElts = CI->getType()->getVectorNumElements();
1294 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1295 unsigned HalfLaneElts = NumLaneElts / 2;
1297 SmallVector<uint32_t, 16> Idxs(NumElts);
1298 for (unsigned i = 0; i != NumElts; ++i) {
1299 // Base index is the starting element of the lane.
1300 Idxs[i] = i - (i % NumLaneElts);
1301 // If we are half way through the lane switch to the other source.
1302 if ((i % NumLaneElts) >= HalfLaneElts)
1304 // Now select the specific element. By adding HalfLaneElts bits from
1305 // the immediate. Wrapping around the immediate every 8-bits.
1306 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
1309 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1311 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
1312 CI->getArgOperand(3));
1313 } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
1314 Name.startswith("avx512.mask.movshdup") ||
1315 Name.startswith("avx512.mask.movsldup"))) {
1316 Value *Op0 = CI->getArgOperand(0);
1317 unsigned NumElts = CI->getType()->getVectorNumElements();
1318 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1320 unsigned Offset = 0;
1321 if (Name.startswith("avx512.mask.movshdup."))
1324 SmallVector<uint32_t, 16> Idxs(NumElts);
1325 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
1326 for (unsigned i = 0; i != NumLaneElts; i += 2) {
1327 Idxs[i + l + 0] = i + l + Offset;
1328 Idxs[i + l + 1] = i + l + Offset;
1331 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1333 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1334 CI->getArgOperand(1));
1335 } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
1336 Name.startswith("avx512.mask.unpckl."))) {
1337 Value *Op0 = CI->getArgOperand(0);
1338 Value *Op1 = CI->getArgOperand(1);
1339 int NumElts = CI->getType()->getVectorNumElements();
1340 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1342 SmallVector<uint32_t, 64> Idxs(NumElts);
1343 for (int l = 0; l != NumElts; l += NumLaneElts)
1344 for (int i = 0; i != NumLaneElts; ++i)
1345 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
1347 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1349 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1350 CI->getArgOperand(2));
1351 } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
1352 Name.startswith("avx512.mask.unpckh."))) {
1353 Value *Op0 = CI->getArgOperand(0);
1354 Value *Op1 = CI->getArgOperand(1);
1355 int NumElts = CI->getType()->getVectorNumElements();
1356 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1358 SmallVector<uint32_t, 64> Idxs(NumElts);
1359 for (int l = 0; l != NumElts; l += NumLaneElts)
1360 for (int i = 0; i != NumLaneElts; ++i)
1361 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
1363 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1365 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1366 CI->getArgOperand(2));
1367 } else if (IsX86 && Name.startswith("avx512.mask.pand.")) {
1368 Rep = Builder.CreateAnd(CI->getArgOperand(0), CI->getArgOperand(1));
1369 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1370 CI->getArgOperand(2));
1371 } else if (IsX86 && Name.startswith("avx512.mask.pandn.")) {
1372 Rep = Builder.CreateAnd(Builder.CreateNot(CI->getArgOperand(0)),
1373 CI->getArgOperand(1));
1374 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1375 CI->getArgOperand(2));
1376 } else if (IsX86 && Name.startswith("avx512.mask.por.")) {
1377 Rep = Builder.CreateOr(CI->getArgOperand(0), CI->getArgOperand(1));
1378 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1379 CI->getArgOperand(2));
1380 } else if (IsX86 && Name.startswith("avx512.mask.pxor.")) {
1381 Rep = Builder.CreateXor(CI->getArgOperand(0), CI->getArgOperand(1));
1382 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1383 CI->getArgOperand(2));
1384 } else if (IsX86 && Name.startswith("avx512.mask.and.")) {
1385 VectorType *FTy = cast<VectorType>(CI->getType());
1386 VectorType *ITy = VectorType::getInteger(FTy);
1387 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
1388 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1389 Rep = Builder.CreateBitCast(Rep, FTy);
1390 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1391 CI->getArgOperand(2));
1392 } else if (IsX86 && Name.startswith("avx512.mask.andn.")) {
1393 VectorType *FTy = cast<VectorType>(CI->getType());
1394 VectorType *ITy = VectorType::getInteger(FTy);
1395 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
1396 Rep = Builder.CreateAnd(Rep,
1397 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1398 Rep = Builder.CreateBitCast(Rep, FTy);
1399 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1400 CI->getArgOperand(2));
1401 } else if (IsX86 && Name.startswith("avx512.mask.or.")) {
1402 VectorType *FTy = cast<VectorType>(CI->getType());
1403 VectorType *ITy = VectorType::getInteger(FTy);
1404 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
1405 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1406 Rep = Builder.CreateBitCast(Rep, FTy);
1407 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1408 CI->getArgOperand(2));
1409 } else if (IsX86 && Name.startswith("avx512.mask.xor.")) {
1410 VectorType *FTy = cast<VectorType>(CI->getType());
1411 VectorType *ITy = VectorType::getInteger(FTy);
1412 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
1413 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1414 Rep = Builder.CreateBitCast(Rep, FTy);
1415 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1416 CI->getArgOperand(2));
1417 } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
1418 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
1419 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1420 CI->getArgOperand(2));
1421 } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
1422 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
1423 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1424 CI->getArgOperand(2));
1425 } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
1426 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
1427 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1428 CI->getArgOperand(2));
1429 } else if (IsX86 && (Name.startswith("avx512.mask.add.p"))) {
1430 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
1431 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1432 CI->getArgOperand(2));
1433 } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
1434 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
1435 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1436 CI->getArgOperand(2));
1437 } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
1438 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
1439 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1440 CI->getArgOperand(2));
1441 } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
1442 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
1443 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1444 CI->getArgOperand(2));
1445 } else if (IsX86 && Name.startswith("avx512.mask.pshuf.b.")) {
1446 VectorType *VecTy = cast<VectorType>(CI->getType());
1448 if (VecTy->getPrimitiveSizeInBits() == 128)
1449 IID = Intrinsic::x86_ssse3_pshuf_b_128;
1450 else if (VecTy->getPrimitiveSizeInBits() == 256)
1451 IID = Intrinsic::x86_avx2_pshuf_b;
1452 else if (VecTy->getPrimitiveSizeInBits() == 512)
1453 IID = Intrinsic::x86_avx512_pshuf_b_512;
1455 llvm_unreachable("Unexpected intrinsic");
1457 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1458 { CI->getArgOperand(0), CI->getArgOperand(1) });
1459 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1460 CI->getArgOperand(2));
1461 } else if (IsX86 && (Name.startswith("avx512.mask.pmul.dq.") ||
1462 Name.startswith("avx512.mask.pmulu.dq."))) {
1463 bool IsUnsigned = Name[16] == 'u';
1464 VectorType *VecTy = cast<VectorType>(CI->getType());
1466 if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 128)
1467 IID = Intrinsic::x86_sse41_pmuldq;
1468 else if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 256)
1469 IID = Intrinsic::x86_avx2_pmul_dq;
1470 else if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 512)
1471 IID = Intrinsic::x86_avx512_pmul_dq_512;
1472 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 128)
1473 IID = Intrinsic::x86_sse2_pmulu_dq;
1474 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 256)
1475 IID = Intrinsic::x86_avx2_pmulu_dq;
1476 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 512)
1477 IID = Intrinsic::x86_avx512_pmulu_dq_512;
1479 llvm_unreachable("Unexpected intrinsic");
1481 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1482 { CI->getArgOperand(0), CI->getArgOperand(1) });
1483 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1484 CI->getArgOperand(2));
1485 } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
1486 bool IsImmediate = Name[16] == 'i' ||
1487 (Name.size() > 18 && Name[18] == 'i');
1488 bool IsVariable = Name[16] == 'v';
1489 char Size = Name[16] == '.' ? Name[17] :
1490 Name[17] == '.' ? Name[18] :
1491 Name[18] == '.' ? Name[19] :
1495 if (IsVariable && Name[17] != '.') {
1496 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
1497 IID = Intrinsic::x86_avx2_psllv_q;
1498 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
1499 IID = Intrinsic::x86_avx2_psllv_q_256;
1500 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
1501 IID = Intrinsic::x86_avx2_psllv_d;
1502 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
1503 IID = Intrinsic::x86_avx2_psllv_d_256;
1504 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
1505 IID = Intrinsic::x86_avx512_psllv_w_128;
1506 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
1507 IID = Intrinsic::x86_avx512_psllv_w_256;
1508 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
1509 IID = Intrinsic::x86_avx512_psllv_w_512;
1511 llvm_unreachable("Unexpected size");
1512 } else if (Name.endswith(".128")) {
1513 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
1514 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
1515 : Intrinsic::x86_sse2_psll_d;
1516 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
1517 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
1518 : Intrinsic::x86_sse2_psll_q;
1519 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
1520 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
1521 : Intrinsic::x86_sse2_psll_w;
1523 llvm_unreachable("Unexpected size");
1524 } else if (Name.endswith(".256")) {
1525 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
1526 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
1527 : Intrinsic::x86_avx2_psll_d;
1528 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
1529 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
1530 : Intrinsic::x86_avx2_psll_q;
1531 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
1532 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
1533 : Intrinsic::x86_avx2_psll_w;
1535 llvm_unreachable("Unexpected size");
1537 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
1538 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
1539 IsVariable ? Intrinsic::x86_avx512_psllv_d_512 :
1540 Intrinsic::x86_avx512_psll_d_512;
1541 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
1542 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
1543 IsVariable ? Intrinsic::x86_avx512_psllv_q_512 :
1544 Intrinsic::x86_avx512_psll_q_512;
1545 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
1546 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
1547 : Intrinsic::x86_avx512_psll_w_512;
1549 llvm_unreachable("Unexpected size");
1552 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
1553 } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
1554 bool IsImmediate = Name[16] == 'i' ||
1555 (Name.size() > 18 && Name[18] == 'i');
1556 bool IsVariable = Name[16] == 'v';
1557 char Size = Name[16] == '.' ? Name[17] :
1558 Name[17] == '.' ? Name[18] :
1559 Name[18] == '.' ? Name[19] :
1563 if (IsVariable && Name[17] != '.') {
1564 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
1565 IID = Intrinsic::x86_avx2_psrlv_q;
1566 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
1567 IID = Intrinsic::x86_avx2_psrlv_q_256;
1568 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
1569 IID = Intrinsic::x86_avx2_psrlv_d;
1570 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
1571 IID = Intrinsic::x86_avx2_psrlv_d_256;
1572 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
1573 IID = Intrinsic::x86_avx512_psrlv_w_128;
1574 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
1575 IID = Intrinsic::x86_avx512_psrlv_w_256;
1576 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
1577 IID = Intrinsic::x86_avx512_psrlv_w_512;
1579 llvm_unreachable("Unexpected size");
1580 } else if (Name.endswith(".128")) {
1581 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
1582 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
1583 : Intrinsic::x86_sse2_psrl_d;
1584 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
1585 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
1586 : Intrinsic::x86_sse2_psrl_q;
1587 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
1588 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
1589 : Intrinsic::x86_sse2_psrl_w;
1591 llvm_unreachable("Unexpected size");
1592 } else if (Name.endswith(".256")) {
1593 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
1594 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
1595 : Intrinsic::x86_avx2_psrl_d;
1596 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
1597 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
1598 : Intrinsic::x86_avx2_psrl_q;
1599 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
1600 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
1601 : Intrinsic::x86_avx2_psrl_w;
1603 llvm_unreachable("Unexpected size");
1605 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
1606 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
1607 IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 :
1608 Intrinsic::x86_avx512_psrl_d_512;
1609 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
1610 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
1611 IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 :
1612 Intrinsic::x86_avx512_psrl_q_512;
1613 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
1614 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
1615 : Intrinsic::x86_avx512_psrl_w_512;
1617 llvm_unreachable("Unexpected size");
1620 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
1621 } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
1622 bool IsImmediate = Name[16] == 'i' ||
1623 (Name.size() > 18 && Name[18] == 'i');
1624 bool IsVariable = Name[16] == 'v';
1625 char Size = Name[16] == '.' ? Name[17] :
1626 Name[17] == '.' ? Name[18] :
1627 Name[18] == '.' ? Name[19] :
1631 if (IsVariable && Name[17] != '.') {
1632 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
1633 IID = Intrinsic::x86_avx2_psrav_d;
1634 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
1635 IID = Intrinsic::x86_avx2_psrav_d_256;
1636 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
1637 IID = Intrinsic::x86_avx512_psrav_w_128;
1638 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
1639 IID = Intrinsic::x86_avx512_psrav_w_256;
1640 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
1641 IID = Intrinsic::x86_avx512_psrav_w_512;
1643 llvm_unreachable("Unexpected size");
1644 } else if (Name.endswith(".128")) {
1645 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
1646 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
1647 : Intrinsic::x86_sse2_psra_d;
1648 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
1649 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
1650 IsVariable ? Intrinsic::x86_avx512_psrav_q_128 :
1651 Intrinsic::x86_avx512_psra_q_128;
1652 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
1653 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
1654 : Intrinsic::x86_sse2_psra_w;
1656 llvm_unreachable("Unexpected size");
1657 } else if (Name.endswith(".256")) {
1658 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
1659 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
1660 : Intrinsic::x86_avx2_psra_d;
1661 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
1662 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
1663 IsVariable ? Intrinsic::x86_avx512_psrav_q_256 :
1664 Intrinsic::x86_avx512_psra_q_256;
1665 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
1666 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
1667 : Intrinsic::x86_avx2_psra_w;
1669 llvm_unreachable("Unexpected size");
1671 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
1672 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
1673 IsVariable ? Intrinsic::x86_avx512_psrav_d_512 :
1674 Intrinsic::x86_avx512_psra_d_512;
1675 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
1676 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
1677 IsVariable ? Intrinsic::x86_avx512_psrav_q_512 :
1678 Intrinsic::x86_avx512_psra_q_512;
1679 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
1680 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
1681 : Intrinsic::x86_avx512_psra_w_512;
1683 llvm_unreachable("Unexpected size");
1686 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
1687 } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
1688 Rep = upgradeMaskedMove(Builder, *CI);
1689 } else if (IsX86 && Name.startswith("avx512.mask.vpermilvar.")) {
1691 if (Name.endswith("ps.128"))
1692 IID = Intrinsic::x86_avx_vpermilvar_ps;
1693 else if (Name.endswith("pd.128"))
1694 IID = Intrinsic::x86_avx_vpermilvar_pd;
1695 else if (Name.endswith("ps.256"))
1696 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
1697 else if (Name.endswith("pd.256"))
1698 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
1699 else if (Name.endswith("ps.512"))
1700 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
1701 else if (Name.endswith("pd.512"))
1702 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
1704 llvm_unreachable("Unexpected vpermilvar intrinsic");
1706 Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID);
1707 Rep = Builder.CreateCall(Intrin,
1708 { CI->getArgOperand(0), CI->getArgOperand(1) });
1709 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1710 CI->getArgOperand(2));
1712 llvm_unreachable("Unknown function for CallInst upgrade.");
1716 CI->replaceAllUsesWith(Rep);
1717 CI->eraseFromParent();
1721 std::string Name = CI->getName();
1723 CI->setName(Name + ".old");
1725 switch (NewFn->getIntrinsicID()) {
1727 llvm_unreachable("Unknown function for CallInst upgrade.");
1729 case Intrinsic::arm_neon_vld1:
1730 case Intrinsic::arm_neon_vld2:
1731 case Intrinsic::arm_neon_vld3:
1732 case Intrinsic::arm_neon_vld4:
1733 case Intrinsic::arm_neon_vld2lane:
1734 case Intrinsic::arm_neon_vld3lane:
1735 case Intrinsic::arm_neon_vld4lane:
1736 case Intrinsic::arm_neon_vst1:
1737 case Intrinsic::arm_neon_vst2:
1738 case Intrinsic::arm_neon_vst3:
1739 case Intrinsic::arm_neon_vst4:
1740 case Intrinsic::arm_neon_vst2lane:
1741 case Intrinsic::arm_neon_vst3lane:
1742 case Intrinsic::arm_neon_vst4lane: {
1743 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
1744 CI->arg_operands().end());
1745 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args));
1746 CI->eraseFromParent();
1750 case Intrinsic::ctlz:
1751 case Intrinsic::cttz:
1752 assert(CI->getNumArgOperands() == 1 &&
1753 "Mismatch between function args and call args");
1754 CI->replaceAllUsesWith(Builder.CreateCall(
1755 NewFn, {CI->getArgOperand(0), Builder.getFalse()}, Name));
1756 CI->eraseFromParent();
1759 case Intrinsic::objectsize:
1760 CI->replaceAllUsesWith(Builder.CreateCall(
1761 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)}, Name));
1762 CI->eraseFromParent();
1765 case Intrinsic::ctpop: {
1766 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {CI->getArgOperand(0)}));
1767 CI->eraseFromParent();
1771 case Intrinsic::x86_xop_vfrcz_ss:
1772 case Intrinsic::x86_xop_vfrcz_sd:
1773 CI->replaceAllUsesWith(
1774 Builder.CreateCall(NewFn, {CI->getArgOperand(1)}, Name));
1775 CI->eraseFromParent();
1778 case Intrinsic::x86_xop_vpermil2pd:
1779 case Intrinsic::x86_xop_vpermil2ps:
1780 case Intrinsic::x86_xop_vpermil2pd_256:
1781 case Intrinsic::x86_xop_vpermil2ps_256: {
1782 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
1783 CI->arg_operands().end());
1784 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
1785 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
1786 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
1787 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args, Name));
1788 CI->eraseFromParent();
1792 case Intrinsic::x86_sse41_ptestc:
1793 case Intrinsic::x86_sse41_ptestz:
1794 case Intrinsic::x86_sse41_ptestnzc: {
1795 // The arguments for these intrinsics used to be v4f32, and changed
1796 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
1797 // So, the only thing required is a bitcast for both arguments.
1798 // First, check the arguments have the old type.
1799 Value *Arg0 = CI->getArgOperand(0);
1800 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
1803 // Old intrinsic, add bitcasts
1804 Value *Arg1 = CI->getArgOperand(1);
1806 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
1808 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
1809 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
1811 CallInst *NewCall = Builder.CreateCall(NewFn, {BC0, BC1}, Name);
1812 CI->replaceAllUsesWith(NewCall);
1813 CI->eraseFromParent();
1817 case Intrinsic::x86_sse41_insertps:
1818 case Intrinsic::x86_sse41_dppd:
1819 case Intrinsic::x86_sse41_dpps:
1820 case Intrinsic::x86_sse41_mpsadbw:
1821 case Intrinsic::x86_avx_dp_ps_256:
1822 case Intrinsic::x86_avx2_mpsadbw: {
1823 // Need to truncate the last argument from i32 to i8 -- this argument models
1824 // an inherently 8-bit immediate operand to these x86 instructions.
1825 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
1826 CI->arg_operands().end());
1828 // Replace the last argument with a trunc.
1829 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
1831 CallInst *NewCall = Builder.CreateCall(NewFn, Args);
1832 CI->replaceAllUsesWith(NewCall);
1833 CI->eraseFromParent();
1837 case Intrinsic::thread_pointer: {
1838 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {}));
1839 CI->eraseFromParent();
1843 case Intrinsic::invariant_start:
1844 case Intrinsic::invariant_end:
1845 case Intrinsic::masked_load:
1846 case Intrinsic::masked_store: {
1847 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
1848 CI->arg_operands().end());
1849 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args));
1850 CI->eraseFromParent();
1856 void llvm::UpgradeCallsToIntrinsic(Function *F) {
1857 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
1859 // Check if this function should be upgraded and get the replacement function
1862 if (UpgradeIntrinsicFunction(F, NewFn)) {
1863 // Replace all users of the old function with the new function or new
1864 // instructions. This is not a range loop because the call is deleted.
1865 for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; )
1866 if (CallInst *CI = dyn_cast<CallInst>(*UI++))
1867 UpgradeIntrinsicCall(CI, NewFn);
1869 // Remove old function, no longer used, from the module.
1870 F->eraseFromParent();
1874 MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
1875 // Check if the tag uses struct-path aware TBAA format.
1876 if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
1879 auto &Context = MD.getContext();
1880 if (MD.getNumOperands() == 3) {
1881 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
1882 MDNode *ScalarType = MDNode::get(Context, Elts);
1883 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
1884 Metadata *Elts2[] = {ScalarType, ScalarType,
1885 ConstantAsMetadata::get(
1886 Constant::getNullValue(Type::getInt64Ty(Context))),
1888 return MDNode::get(Context, Elts2);
1890 // Create a MDNode <MD, MD, offset 0>
1891 Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
1892 Type::getInt64Ty(Context)))};
1893 return MDNode::get(Context, Elts);
1896 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
1897 Instruction *&Temp) {
1898 if (Opc != Instruction::BitCast)
1902 Type *SrcTy = V->getType();
1903 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
1904 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
1905 LLVMContext &Context = V->getContext();
1907 // We have no information about target data layout, so we assume that
1908 // the maximum pointer size is 64bit.
1909 Type *MidTy = Type::getInt64Ty(Context);
1910 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
1912 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
1918 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
1919 if (Opc != Instruction::BitCast)
1922 Type *SrcTy = C->getType();
1923 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
1924 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
1925 LLVMContext &Context = C->getContext();
1927 // We have no information about target data layout, so we assume that
1928 // the maximum pointer size is 64bit.
1929 Type *MidTy = Type::getInt64Ty(Context);
1931 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
1938 /// Check the debug info version number, if it is out-dated, drop the debug
1939 /// info. Return true if module is modified.
1940 bool llvm::UpgradeDebugInfo(Module &M) {
1941 unsigned Version = getDebugMetadataVersionFromModule(M);
1942 if (Version == DEBUG_METADATA_VERSION)
1945 bool RetCode = StripDebugInfo(M);
1947 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
1948 M.getContext().diagnose(DiagVersion);
1953 bool llvm::UpgradeModuleFlags(Module &M) {
1954 const NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
1958 bool HasObjCFlag = false, HasClassProperties = false;
1959 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
1960 MDNode *Op = ModFlags->getOperand(I);
1961 if (Op->getNumOperands() < 2)
1963 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
1966 if (ID->getString() == "Objective-C Image Info Version")
1968 if (ID->getString() == "Objective-C Class Properties")
1969 HasClassProperties = true;
1971 // "Objective-C Class Properties" is recently added for Objective-C. We
1972 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
1973 // flag of value 0, so we can correclty downgrade this flag when trying to
1974 // link an ObjC bitcode without this module flag with an ObjC bitcode with
1975 // this module flag.
1976 if (HasObjCFlag && !HasClassProperties) {
1977 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
1984 static bool isOldLoopArgument(Metadata *MD) {
1985 auto *T = dyn_cast_or_null<MDTuple>(MD);
1988 if (T->getNumOperands() < 1)
1990 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
1993 return S->getString().startswith("llvm.vectorizer.");
1996 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
1997 StringRef OldPrefix = "llvm.vectorizer.";
1998 assert(OldTag.startswith(OldPrefix) && "Expected old prefix");
2000 if (OldTag == "llvm.vectorizer.unroll")
2001 return MDString::get(C, "llvm.loop.interleave.count");
2003 return MDString::get(
2004 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
2008 static Metadata *upgradeLoopArgument(Metadata *MD) {
2009 auto *T = dyn_cast_or_null<MDTuple>(MD);
2012 if (T->getNumOperands() < 1)
2014 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
2017 if (!OldTag->getString().startswith("llvm.vectorizer."))
2020 // This has an old tag. Upgrade it.
2021 SmallVector<Metadata *, 8> Ops;
2022 Ops.reserve(T->getNumOperands());
2023 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
2024 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
2025 Ops.push_back(T->getOperand(I));
2027 return MDTuple::get(T->getContext(), Ops);
2030 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
2031 auto *T = dyn_cast<MDTuple>(&N);
2035 if (none_of(T->operands(), isOldLoopArgument))
2038 SmallVector<Metadata *, 8> Ops;
2039 Ops.reserve(T->getNumOperands());
2040 for (Metadata *MD : T->operands())
2041 Ops.push_back(upgradeLoopArgument(MD));
2043 return MDTuple::get(T->getContext(), Ops);