1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the auto-upgrade helper functions.
11 // This is where deprecated IR intrinsics and other IR features are updated to
12 // current specifications.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/IR/AutoUpgrade.h"
17 #include "llvm/IR/CFG.h"
18 #include "llvm/IR/CallSite.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/DIBuilder.h"
21 #include "llvm/IR/DebugInfo.h"
22 #include "llvm/IR/DiagnosticInfo.h"
23 #include "llvm/IR/Function.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/Instruction.h"
26 #include "llvm/IR/IntrinsicInst.h"
27 #include "llvm/IR/LLVMContext.h"
28 #include "llvm/IR/Module.h"
29 #include "llvm/Support/ErrorHandling.h"
30 #include "llvm/Support/Regex.h"
34 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
36 // Upgrade the declarations of the SSE4.1 functions whose arguments have
37 // changed their type from v4f32 to v2i64.
38 static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID,
40 // Check whether this is an old version of the function, which received
42 Type *Arg0Type = F->getFunctionType()->getParamType(0);
43 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
46 // Yes, it's old, replace it with new version.
48 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
52 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
53 // arguments have changed their type from i32 to i8.
54 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
56 // Check that the last argument is an i32.
57 Type *LastArgType = F->getFunctionType()->getParamType(
58 F->getFunctionType()->getNumParams() - 1);
59 if (!LastArgType->isIntegerTy(32))
62 // Move this function aside and map down.
64 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
68 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
69 assert(F && "Illegal to upgrade a non-existent Function.");
71 // Quickly eliminate it, if it's not a candidate.
72 StringRef Name = F->getName();
73 if (Name.size() <= 8 || !Name.startswith("llvm."))
75 Name = Name.substr(5); // Strip off "llvm."
80 if (Name.startswith("arm.neon.vclz")) {
82 F->arg_begin()->getType(),
83 Type::getInt1Ty(F->getContext())
85 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
86 // the end of the name. Change name from llvm.arm.neon.vclz.* to
88 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
89 NewFn = Function::Create(fType, F->getLinkage(),
90 "llvm.ctlz." + Name.substr(14), F->getParent());
93 if (Name.startswith("arm.neon.vcnt")) {
94 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
95 F->arg_begin()->getType());
98 Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
99 if (vldRegex.match(Name)) {
100 auto fArgs = F->getFunctionType()->params();
101 SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
102 // Can't use Intrinsic::getDeclaration here as the return types might
103 // then only be structurally equal.
104 FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
105 NewFn = Function::Create(fType, F->getLinkage(),
106 "llvm." + Name + ".p0i8", F->getParent());
109 Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
110 if (vstRegex.match(Name)) {
111 static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
112 Intrinsic::arm_neon_vst2,
113 Intrinsic::arm_neon_vst3,
114 Intrinsic::arm_neon_vst4};
116 static const Intrinsic::ID StoreLaneInts[] = {
117 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
118 Intrinsic::arm_neon_vst4lane
121 auto fArgs = F->getFunctionType()->params();
122 Type *Tys[] = {fArgs[0], fArgs[1]};
123 if (Name.find("lane") == StringRef::npos)
124 NewFn = Intrinsic::getDeclaration(F->getParent(),
125 StoreInts[fArgs.size() - 3], Tys);
127 NewFn = Intrinsic::getDeclaration(F->getParent(),
128 StoreLaneInts[fArgs.size() - 5], Tys);
131 if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
132 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
139 if (Name.startswith("ctlz.") && F->arg_size() == 1) {
141 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
142 F->arg_begin()->getType());
145 if (Name.startswith("cttz.") && F->arg_size() == 1) {
147 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
148 F->arg_begin()->getType());
154 if (Name.startswith("invariant.start")) {
155 auto Args = F->getFunctionType()->params();
156 Type* ObjectPtr[1] = {Args[1]};
158 Intrinsic::getName(Intrinsic::invariant_start, ObjectPtr)) {
160 NewFn = Intrinsic::getDeclaration(
161 F->getParent(), Intrinsic::invariant_start, ObjectPtr);
165 if (Name.startswith("invariant.end")) {
166 auto Args = F->getFunctionType()->params();
167 Type* ObjectPtr[1] = {Args[2]};
169 Intrinsic::getName(Intrinsic::invariant_end, ObjectPtr)) {
171 NewFn = Intrinsic::getDeclaration(F->getParent(),
172 Intrinsic::invariant_end, ObjectPtr);
179 if (Name.startswith("masked.load.")) {
180 Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
181 if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) {
183 NewFn = Intrinsic::getDeclaration(F->getParent(),
184 Intrinsic::masked_load,
189 if (Name.startswith("masked.store.")) {
190 auto Args = F->getFunctionType()->params();
191 Type *Tys[] = { Args[0], Args[1] };
192 if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) {
194 NewFn = Intrinsic::getDeclaration(F->getParent(),
195 Intrinsic::masked_store,
204 // We only need to change the name to match the mangling including the
206 if (F->arg_size() == 2 && Name.startswith("objectsize.")) {
207 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
208 if (F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
210 NewFn = Intrinsic::getDeclaration(F->getParent(),
211 Intrinsic::objectsize, Tys);
218 if (Name == "stackprotectorcheck") {
225 bool IsX86 = Name.startswith("x86.");
227 Name = Name.substr(4);
229 // All of the intrinsics matches below should be marked with which llvm
230 // version started autoupgrading them. At some point in the future we would
231 // like to use this information to remove upgrade code for some older
232 // intrinsics. It is currently undecided how we will determine that future
235 (Name.startswith("sse2.pcmpeq.") || // Added in 3.1
236 Name.startswith("sse2.pcmpgt.") || // Added in 3.1
237 Name.startswith("avx2.pcmpeq.") || // Added in 3.1
238 Name.startswith("avx2.pcmpgt.") || // Added in 3.1
239 Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
240 Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
241 Name == "sse.add.ss" || // Added in 4.0
242 Name == "sse2.add.sd" || // Added in 4.0
243 Name == "sse.sub.ss" || // Added in 4.0
244 Name == "sse2.sub.sd" || // Added in 4.0
245 Name == "sse.mul.ss" || // Added in 4.0
246 Name == "sse2.mul.sd" || // Added in 4.0
247 Name == "sse.div.ss" || // Added in 4.0
248 Name == "sse2.div.sd" || // Added in 4.0
249 Name == "sse41.pmaxsb" || // Added in 3.9
250 Name == "sse2.pmaxs.w" || // Added in 3.9
251 Name == "sse41.pmaxsd" || // Added in 3.9
252 Name == "sse2.pmaxu.b" || // Added in 3.9
253 Name == "sse41.pmaxuw" || // Added in 3.9
254 Name == "sse41.pmaxud" || // Added in 3.9
255 Name == "sse41.pminsb" || // Added in 3.9
256 Name == "sse2.pmins.w" || // Added in 3.9
257 Name == "sse41.pminsd" || // Added in 3.9
258 Name == "sse2.pminu.b" || // Added in 3.9
259 Name == "sse41.pminuw" || // Added in 3.9
260 Name == "sse41.pminud" || // Added in 3.9
261 Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
262 Name.startswith("avx2.pmax") || // Added in 3.9
263 Name.startswith("avx2.pmin") || // Added in 3.9
264 Name.startswith("avx512.mask.pmax") || // Added in 4.0
265 Name.startswith("avx512.mask.pmin") || // Added in 4.0
266 Name.startswith("avx2.vbroadcast") || // Added in 3.8
267 Name.startswith("avx2.pbroadcast") || // Added in 3.8
268 Name.startswith("avx.vpermil.") || // Added in 3.1
269 Name.startswith("sse2.pshuf") || // Added in 3.9
270 Name.startswith("avx512.pbroadcast") || // Added in 3.9
271 Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
272 Name.startswith("avx512.mask.movddup") || // Added in 3.9
273 Name.startswith("avx512.mask.movshdup") || // Added in 3.9
274 Name.startswith("avx512.mask.movsldup") || // Added in 3.9
275 Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
276 Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
277 Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
278 Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
279 Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
280 Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
281 Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
282 Name.startswith("avx512.mask.punpckl") || // Added in 3.9
283 Name.startswith("avx512.mask.punpckh") || // Added in 3.9
284 Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
285 Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
286 Name.startswith("avx512.mask.pand.") || // Added in 3.9
287 Name.startswith("avx512.mask.pandn.") || // Added in 3.9
288 Name.startswith("avx512.mask.por.") || // Added in 3.9
289 Name.startswith("avx512.mask.pxor.") || // Added in 3.9
290 Name.startswith("avx512.mask.and.") || // Added in 3.9
291 Name.startswith("avx512.mask.andn.") || // Added in 3.9
292 Name.startswith("avx512.mask.or.") || // Added in 3.9
293 Name.startswith("avx512.mask.xor.") || // Added in 3.9
294 Name.startswith("avx512.mask.padd.") || // Added in 4.0
295 Name.startswith("avx512.mask.psub.") || // Added in 4.0
296 Name.startswith("avx512.mask.pmull.") || // Added in 4.0
297 Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
298 Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
299 Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
300 Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
301 Name == "avx512.mask.add.pd.128" || // Added in 4.0
302 Name == "avx512.mask.add.pd.256" || // Added in 4.0
303 Name == "avx512.mask.add.ps.128" || // Added in 4.0
304 Name == "avx512.mask.add.ps.256" || // Added in 4.0
305 Name == "avx512.mask.div.pd.128" || // Added in 4.0
306 Name == "avx512.mask.div.pd.256" || // Added in 4.0
307 Name == "avx512.mask.div.ps.128" || // Added in 4.0
308 Name == "avx512.mask.div.ps.256" || // Added in 4.0
309 Name == "avx512.mask.mul.pd.128" || // Added in 4.0
310 Name == "avx512.mask.mul.pd.256" || // Added in 4.0
311 Name == "avx512.mask.mul.ps.128" || // Added in 4.0
312 Name == "avx512.mask.mul.ps.256" || // Added in 4.0
313 Name == "avx512.mask.sub.pd.128" || // Added in 4.0
314 Name == "avx512.mask.sub.pd.256" || // Added in 4.0
315 Name == "avx512.mask.sub.ps.128" || // Added in 4.0
316 Name == "avx512.mask.sub.ps.256" || // Added in 4.0
317 Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
318 Name.startswith("avx512.mask.psll.d") || // Added in 4.0
319 Name.startswith("avx512.mask.psll.q") || // Added in 4.0
320 Name.startswith("avx512.mask.psll.w") || // Added in 4.0
321 Name.startswith("avx512.mask.psra.d") || // Added in 4.0
322 Name.startswith("avx512.mask.psra.q") || // Added in 4.0
323 Name.startswith("avx512.mask.psra.w") || // Added in 4.0
324 Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
325 Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
326 Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
327 Name.startswith("avx512.mask.pslli") || // Added in 4.0
328 Name.startswith("avx512.mask.psrai") || // Added in 4.0
329 Name.startswith("avx512.mask.psrli") || // Added in 4.0
330 Name.startswith("avx512.mask.psllv") || // Added in 4.0
331 Name.startswith("avx512.mask.psrav") || // Added in 4.0
332 Name.startswith("avx512.mask.psrlv") || // Added in 4.0
333 Name.startswith("sse41.pmovsx") || // Added in 3.8
334 Name.startswith("sse41.pmovzx") || // Added in 3.9
335 Name.startswith("avx2.pmovsx") || // Added in 3.9
336 Name.startswith("avx2.pmovzx") || // Added in 3.9
337 Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
338 Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
339 Name == "sse2.cvtdq2pd" || // Added in 3.9
340 Name == "sse2.cvtps2pd" || // Added in 3.9
341 Name == "avx.cvtdq2.pd.256" || // Added in 3.9
342 Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
343 Name.startswith("avx.vinsertf128.") || // Added in 3.7
344 Name == "avx2.vinserti128" || // Added in 3.7
345 Name.startswith("avx512.mask.insert") || // Added in 4.0
346 Name.startswith("avx.vextractf128.") || // Added in 3.7
347 Name == "avx2.vextracti128" || // Added in 3.7
348 Name.startswith("avx512.mask.vextract") || // Added in 4.0
349 Name.startswith("sse4a.movnt.") || // Added in 3.9
350 Name.startswith("avx.movnt.") || // Added in 3.2
351 Name.startswith("avx512.storent.") || // Added in 3.9
352 Name == "sse2.storel.dq" || // Added in 3.9
353 Name.startswith("sse.storeu.") || // Added in 3.9
354 Name.startswith("sse2.storeu.") || // Added in 3.9
355 Name.startswith("avx.storeu.") || // Added in 3.9
356 Name.startswith("avx512.mask.storeu.") || // Added in 3.9
357 Name.startswith("avx512.mask.store.p") || // Added in 3.9
358 Name.startswith("avx512.mask.store.b.") || // Added in 3.9
359 Name.startswith("avx512.mask.store.w.") || // Added in 3.9
360 Name.startswith("avx512.mask.store.d.") || // Added in 3.9
361 Name.startswith("avx512.mask.store.q.") || // Added in 3.9
362 Name.startswith("avx512.mask.loadu.") || // Added in 3.9
363 Name.startswith("avx512.mask.load.") || // Added in 3.9
364 Name == "sse42.crc32.64.8" || // Added in 3.4
365 Name.startswith("avx.vbroadcast.s") || // Added in 3.5
366 Name.startswith("avx512.mask.palignr.") || // Added in 3.9
367 Name.startswith("avx512.mask.valign.") || // Added in 4.0
368 Name.startswith("sse2.psll.dq") || // Added in 3.7
369 Name.startswith("sse2.psrl.dq") || // Added in 3.7
370 Name.startswith("avx2.psll.dq") || // Added in 3.7
371 Name.startswith("avx2.psrl.dq") || // Added in 3.7
372 Name.startswith("avx512.psll.dq") || // Added in 3.9
373 Name.startswith("avx512.psrl.dq") || // Added in 3.9
374 Name == "sse41.pblendw" || // Added in 3.7
375 Name.startswith("sse41.blendp") || // Added in 3.7
376 Name.startswith("avx.blend.p") || // Added in 3.7
377 Name == "avx2.pblendw" || // Added in 3.7
378 Name.startswith("avx2.pblendd.") || // Added in 3.7
379 Name.startswith("avx.vbroadcastf128") || // Added in 4.0
380 Name == "avx2.vbroadcasti128" || // Added in 3.7
381 Name == "xop.vpcmov" || // Added in 3.8
382 Name.startswith("avx512.mask.move.s") || // Added in 4.0
383 (Name.startswith("xop.vpcom") && // Added in 3.2
384 F->arg_size() == 2))) {
388 // SSE4.1 ptest functions may have an old signature.
389 if (IsX86 && Name.startswith("sse41.ptest")) { // Added in 3.2
390 if (Name.substr(11) == "c")
391 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn);
392 if (Name.substr(11) == "z")
393 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestz, NewFn);
394 if (Name.substr(11) == "nzc")
395 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
397 // Several blend and other instructions with masks used the wrong number of
399 if (IsX86 && Name == "sse41.insertps") // Added in 3.6
400 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
402 if (IsX86 && Name == "sse41.dppd") // Added in 3.6
403 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
405 if (IsX86 && Name == "sse41.dpps") // Added in 3.6
406 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
408 if (IsX86 && Name == "sse41.mpsadbw") // Added in 3.6
409 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
411 if (IsX86 && Name == "avx.dp.ps.256") // Added in 3.6
412 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
414 if (IsX86 && Name == "avx2.mpsadbw") // Added in 3.6
415 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
418 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
419 if (IsX86 && Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
421 NewFn = Intrinsic::getDeclaration(F->getParent(),
422 Intrinsic::x86_xop_vfrcz_ss);
425 if (IsX86 && Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
427 NewFn = Intrinsic::getDeclaration(F->getParent(),
428 Intrinsic::x86_xop_vfrcz_sd);
431 // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
432 if (IsX86 && Name.startswith("xop.vpermil2")) { // Added in 3.9
433 auto Params = F->getFunctionType()->params();
434 auto Idx = Params[2];
435 if (Idx->getScalarType()->isFloatingPointTy()) {
437 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
438 unsigned EltSize = Idx->getScalarSizeInBits();
439 Intrinsic::ID Permil2ID;
440 if (EltSize == 64 && IdxSize == 128)
441 Permil2ID = Intrinsic::x86_xop_vpermil2pd;
442 else if (EltSize == 32 && IdxSize == 128)
443 Permil2ID = Intrinsic::x86_xop_vpermil2ps;
444 else if (EltSize == 64 && IdxSize == 256)
445 Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
447 Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
448 NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
456 // This may not belong here. This function is effectively being overloaded
457 // to both detect an intrinsic which needs upgrading, and to provide the
458 // upgraded form of the intrinsic. We should perhaps have two separate
459 // functions for this.
463 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
465 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
466 assert(F != NewFn && "Intrinsic function upgraded to the same function");
468 // Upgrade intrinsic attributes. This does not change the function.
471 if (Intrinsic::ID id = F->getIntrinsicID())
472 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
476 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
477 // Nothing to do yet.
481 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
483 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
484 Value *Op, unsigned Shift) {
485 Type *ResultTy = Op->getType();
486 unsigned NumElts = ResultTy->getVectorNumElements() * 8;
488 // Bitcast from a 64-bit element type to a byte element type.
489 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
490 Op = Builder.CreateBitCast(Op, VecTy, "cast");
492 // We'll be shuffling in zeroes.
493 Value *Res = Constant::getNullValue(VecTy);
495 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
496 // we'll just return the zero vector.
499 // 256/512-bit version is split into 2/4 16-byte lanes.
500 for (unsigned l = 0; l != NumElts; l += 16)
501 for (unsigned i = 0; i != 16; ++i) {
502 unsigned Idx = NumElts + i - Shift;
504 Idx -= NumElts - 16; // end of lane, switch operand.
505 Idxs[l + i] = Idx + l;
508 Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
511 // Bitcast back to a 64-bit element type.
512 return Builder.CreateBitCast(Res, ResultTy, "cast");
515 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
517 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
519 Type *ResultTy = Op->getType();
520 unsigned NumElts = ResultTy->getVectorNumElements() * 8;
522 // Bitcast from a 64-bit element type to a byte element type.
523 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
524 Op = Builder.CreateBitCast(Op, VecTy, "cast");
526 // We'll be shuffling in zeroes.
527 Value *Res = Constant::getNullValue(VecTy);
529 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
530 // we'll just return the zero vector.
533 // 256/512-bit version is split into 2/4 16-byte lanes.
534 for (unsigned l = 0; l != NumElts; l += 16)
535 for (unsigned i = 0; i != 16; ++i) {
536 unsigned Idx = i + Shift;
538 Idx += NumElts - 16; // end of lane, switch operand.
539 Idxs[l + i] = Idx + l;
542 Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
545 // Bitcast back to a 64-bit element type.
546 return Builder.CreateBitCast(Res, ResultTy, "cast");
549 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
551 llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(),
552 cast<IntegerType>(Mask->getType())->getBitWidth());
553 Mask = Builder.CreateBitCast(Mask, MaskTy);
555 // If we have less than 8 elements, then the starting mask was an i8 and
556 // we need to extract down to the right number of elements.
559 for (unsigned i = 0; i != NumElts; ++i)
561 Mask = Builder.CreateShuffleVector(Mask, Mask,
562 makeArrayRef(Indices, NumElts),
569 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
570 Value *Op0, Value *Op1) {
571 // If the mask is all ones just emit the align operation.
572 if (const auto *C = dyn_cast<Constant>(Mask))
573 if (C->isAllOnesValue())
576 Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements());
577 return Builder.CreateSelect(Mask, Op0, Op1);
580 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
581 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
582 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
583 static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
584 Value *Op1, Value *Shift,
585 Value *Passthru, Value *Mask,
587 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
589 unsigned NumElts = Op0->getType()->getVectorNumElements();
590 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
591 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
592 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
594 // Mask the immediate for VALIGN.
596 ShiftVal &= (NumElts - 1);
598 // If palignr is shifting the pair of vectors more than the size of two
601 return llvm::Constant::getNullValue(Op0->getType());
603 // If palignr is shifting the pair of input vectors more than one lane,
604 // but less than two lanes, convert to shifting in zeroes.
608 Op0 = llvm::Constant::getNullValue(Op0->getType());
611 uint32_t Indices[64];
612 // 256-bit palignr operates on 128-bit lanes so we need to handle that
613 for (unsigned l = 0; l < NumElts; l += 16) {
614 for (unsigned i = 0; i != 16; ++i) {
615 unsigned Idx = ShiftVal + i;
616 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
617 Idx += NumElts - 16; // End of lane, switch operand.
618 Indices[l + i] = Idx + l;
622 Value *Align = Builder.CreateShuffleVector(Op1, Op0,
623 makeArrayRef(Indices, NumElts),
626 return EmitX86Select(Builder, Mask, Align, Passthru);
629 static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
630 Value *Ptr, Value *Data, Value *Mask,
632 // Cast the pointer to the right type.
633 Ptr = Builder.CreateBitCast(Ptr,
634 llvm::PointerType::getUnqual(Data->getType()));
636 Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1;
638 // If the mask is all ones just emit a regular store.
639 if (const auto *C = dyn_cast<Constant>(Mask))
640 if (C->isAllOnesValue())
641 return Builder.CreateAlignedStore(Data, Ptr, Align);
643 // Convert the mask from an integer type to a vector of i1.
644 unsigned NumElts = Data->getType()->getVectorNumElements();
645 Mask = getX86MaskVec(Builder, Mask, NumElts);
646 return Builder.CreateMaskedStore(Data, Ptr, Align, Mask);
649 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
650 Value *Ptr, Value *Passthru, Value *Mask,
652 // Cast the pointer to the right type.
653 Ptr = Builder.CreateBitCast(Ptr,
654 llvm::PointerType::getUnqual(Passthru->getType()));
656 Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1;
658 // If the mask is all ones just emit a regular store.
659 if (const auto *C = dyn_cast<Constant>(Mask))
660 if (C->isAllOnesValue())
661 return Builder.CreateAlignedLoad(Ptr, Align);
663 // Convert the mask from an integer type to a vector of i1.
664 unsigned NumElts = Passthru->getType()->getVectorNumElements();
665 Mask = getX86MaskVec(Builder, Mask, NumElts);
666 return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru);
669 static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
670 ICmpInst::Predicate Pred) {
671 Value *Op0 = CI.getArgOperand(0);
672 Value *Op1 = CI.getArgOperand(1);
673 Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1);
674 Value *Res = Builder.CreateSelect(Cmp, Op0, Op1);
676 if (CI.getNumArgOperands() == 4)
677 Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
682 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
683 ICmpInst::Predicate Pred) {
684 Value *Op0 = CI.getArgOperand(0);
685 unsigned NumElts = Op0->getType()->getVectorNumElements();
686 Value *Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
688 Value *Mask = CI.getArgOperand(2);
689 const auto *C = dyn_cast<Constant>(Mask);
690 if (!C || !C->isAllOnesValue())
691 Cmp = Builder.CreateAnd(Cmp, getX86MaskVec(Builder, Mask, NumElts));
695 for (unsigned i = 0; i != NumElts; ++i)
697 for (unsigned i = NumElts; i != 8; ++i)
698 Indices[i] = NumElts + i % NumElts;
699 Cmp = Builder.CreateShuffleVector(Cmp,
700 Constant::getNullValue(Cmp->getType()),
703 return Builder.CreateBitCast(Cmp, IntegerType::get(CI.getContext(),
704 std::max(NumElts, 8U)));
707 // Replace a masked intrinsic with an older unmasked intrinsic.
708 static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI,
710 Function *F = CI.getCalledFunction();
711 Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID);
712 Value *Rep = Builder.CreateCall(Intrin,
713 { CI.getArgOperand(0), CI.getArgOperand(1) });
714 return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
717 static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
718 Value* A = CI.getArgOperand(0);
719 Value* B = CI.getArgOperand(1);
720 Value* Src = CI.getArgOperand(2);
721 Value* Mask = CI.getArgOperand(3);
723 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
724 Value* Cmp = Builder.CreateIsNotNull(AndNode);
725 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
726 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
727 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
728 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
731 /// Upgrade a call to an old intrinsic. All argument and return casting must be
732 /// provided to seamlessly integrate with existing context.
733 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
734 Function *F = CI->getCalledFunction();
735 LLVMContext &C = CI->getContext();
736 IRBuilder<> Builder(C);
737 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
739 assert(F && "Intrinsic call is not direct?");
742 // Get the Function's name.
743 StringRef Name = F->getName();
745 assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
746 Name = Name.substr(5);
748 bool IsX86 = Name.startswith("x86.");
750 Name = Name.substr(4);
752 if (IsX86 && Name.startswith("sse4a.movnt.")) {
753 Module *M = F->getParent();
754 SmallVector<Metadata *, 1> Elts;
756 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
757 MDNode *Node = MDNode::get(C, Elts);
759 Value *Arg0 = CI->getArgOperand(0);
760 Value *Arg1 = CI->getArgOperand(1);
762 // Nontemporal (unaligned) store of the 0'th element of the float/double
764 Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
765 PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
766 Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
768 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
770 StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, 1);
771 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
774 CI->eraseFromParent();
778 if (IsX86 && (Name.startswith("avx.movnt.") ||
779 Name.startswith("avx512.storent."))) {
780 Module *M = F->getParent();
781 SmallVector<Metadata *, 1> Elts;
783 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
784 MDNode *Node = MDNode::get(C, Elts);
786 Value *Arg0 = CI->getArgOperand(0);
787 Value *Arg1 = CI->getArgOperand(1);
789 // Convert the type of the pointer to a pointer to the stored type.
790 Value *BC = Builder.CreateBitCast(Arg0,
791 PointerType::getUnqual(Arg1->getType()),
793 VectorType *VTy = cast<VectorType>(Arg1->getType());
794 StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC,
795 VTy->getBitWidth() / 8);
796 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
799 CI->eraseFromParent();
803 if (IsX86 && Name == "sse2.storel.dq") {
804 Value *Arg0 = CI->getArgOperand(0);
805 Value *Arg1 = CI->getArgOperand(1);
807 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
808 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
809 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
810 Value *BC = Builder.CreateBitCast(Arg0,
811 PointerType::getUnqual(Elt->getType()),
813 Builder.CreateAlignedStore(Elt, BC, 1);
816 CI->eraseFromParent();
820 if (IsX86 && (Name.startswith("sse.storeu.") ||
821 Name.startswith("sse2.storeu.") ||
822 Name.startswith("avx.storeu."))) {
823 Value *Arg0 = CI->getArgOperand(0);
824 Value *Arg1 = CI->getArgOperand(1);
826 Arg0 = Builder.CreateBitCast(Arg0,
827 PointerType::getUnqual(Arg1->getType()),
829 Builder.CreateAlignedStore(Arg1, Arg0, 1);
832 CI->eraseFromParent();
836 if (IsX86 && (Name.startswith("avx512.mask.storeu."))) {
837 UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
838 CI->getArgOperand(2), /*Aligned*/false);
841 CI->eraseFromParent();
845 if (IsX86 && (Name.startswith("avx512.mask.store."))) {
846 UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
847 CI->getArgOperand(2), /*Aligned*/true);
850 CI->eraseFromParent();
855 // Upgrade packed integer vector compare intrinsics to compare instructions.
856 if (IsX86 && (Name.startswith("sse2.pcmpeq.") ||
857 Name.startswith("avx2.pcmpeq."))) {
858 Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1),
860 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
861 } else if (IsX86 && (Name.startswith("sse2.pcmpgt.") ||
862 Name.startswith("avx2.pcmpgt."))) {
863 Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1),
865 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
866 } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd")) {
867 Type *I32Ty = Type::getInt32Ty(C);
868 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
869 ConstantInt::get(I32Ty, 0));
870 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
871 ConstantInt::get(I32Ty, 0));
872 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
873 Builder.CreateFAdd(Elt0, Elt1),
874 ConstantInt::get(I32Ty, 0));
875 } else if (IsX86 && (Name == "sse.sub.ss" || Name == "sse2.sub.sd")) {
876 Type *I32Ty = Type::getInt32Ty(C);
877 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
878 ConstantInt::get(I32Ty, 0));
879 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
880 ConstantInt::get(I32Ty, 0));
881 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
882 Builder.CreateFSub(Elt0, Elt1),
883 ConstantInt::get(I32Ty, 0));
884 } else if (IsX86 && (Name == "sse.mul.ss" || Name == "sse2.mul.sd")) {
885 Type *I32Ty = Type::getInt32Ty(C);
886 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
887 ConstantInt::get(I32Ty, 0));
888 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
889 ConstantInt::get(I32Ty, 0));
890 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
891 Builder.CreateFMul(Elt0, Elt1),
892 ConstantInt::get(I32Ty, 0));
893 } else if (IsX86 && (Name == "sse.div.ss" || Name == "sse2.div.sd")) {
894 Type *I32Ty = Type::getInt32Ty(C);
895 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
896 ConstantInt::get(I32Ty, 0));
897 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
898 ConstantInt::get(I32Ty, 0));
899 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
900 Builder.CreateFDiv(Elt0, Elt1),
901 ConstantInt::get(I32Ty, 0));
902 } else if (IsX86 && Name.startswith("avx512.mask.pcmpeq.")) {
903 Rep = upgradeMaskedCompare(Builder, *CI, ICmpInst::ICMP_EQ);
904 } else if (IsX86 && Name.startswith("avx512.mask.pcmpgt.")) {
905 Rep = upgradeMaskedCompare(Builder, *CI, ICmpInst::ICMP_SGT);
906 } else if (IsX86 && (Name == "sse41.pmaxsb" ||
907 Name == "sse2.pmaxs.w" ||
908 Name == "sse41.pmaxsd" ||
909 Name.startswith("avx2.pmaxs") ||
910 Name.startswith("avx512.mask.pmaxs"))) {
911 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
912 } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
913 Name == "sse41.pmaxuw" ||
914 Name == "sse41.pmaxud" ||
915 Name.startswith("avx2.pmaxu") ||
916 Name.startswith("avx512.mask.pmaxu"))) {
917 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
918 } else if (IsX86 && (Name == "sse41.pminsb" ||
919 Name == "sse2.pmins.w" ||
920 Name == "sse41.pminsd" ||
921 Name.startswith("avx2.pmins") ||
922 Name.startswith("avx512.mask.pmins"))) {
923 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
924 } else if (IsX86 && (Name == "sse2.pminu.b" ||
925 Name == "sse41.pminuw" ||
926 Name == "sse41.pminud" ||
927 Name.startswith("avx2.pminu") ||
928 Name.startswith("avx512.mask.pminu"))) {
929 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
930 } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
931 Name == "sse2.cvtps2pd" ||
932 Name == "avx.cvtdq2.pd.256" ||
933 Name == "avx.cvt.ps2.pd.256" ||
934 Name.startswith("avx512.mask.cvtdq2pd.") ||
935 Name.startswith("avx512.mask.cvtudq2pd."))) {
936 // Lossless i32/float to double conversion.
937 // Extract the bottom elements if necessary and convert to double vector.
938 Value *Src = CI->getArgOperand(0);
939 VectorType *SrcTy = cast<VectorType>(Src->getType());
940 VectorType *DstTy = cast<VectorType>(CI->getType());
941 Rep = CI->getArgOperand(0);
943 unsigned NumDstElts = DstTy->getNumElements();
944 if (NumDstElts < SrcTy->getNumElements()) {
945 assert(NumDstElts == 2 && "Unexpected vector size");
946 uint32_t ShuffleMask[2] = { 0, 1 };
947 Rep = Builder.CreateShuffleVector(Rep, UndefValue::get(SrcTy),
951 bool SInt2Double = (StringRef::npos != Name.find("cvtdq2"));
952 bool UInt2Double = (StringRef::npos != Name.find("cvtudq2"));
954 Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd");
955 else if (UInt2Double)
956 Rep = Builder.CreateUIToFP(Rep, DstTy, "cvtudq2pd");
958 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
960 if (CI->getNumArgOperands() == 3)
961 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
962 CI->getArgOperand(1));
963 } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
964 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
965 CI->getArgOperand(1), CI->getArgOperand(2),
967 } else if (IsX86 && (Name.startswith("avx512.mask.load."))) {
968 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
969 CI->getArgOperand(1),CI->getArgOperand(2),
971 } else if (IsX86 && Name.startswith("xop.vpcom")) {
973 if (Name.endswith("ub"))
974 intID = Intrinsic::x86_xop_vpcomub;
975 else if (Name.endswith("uw"))
976 intID = Intrinsic::x86_xop_vpcomuw;
977 else if (Name.endswith("ud"))
978 intID = Intrinsic::x86_xop_vpcomud;
979 else if (Name.endswith("uq"))
980 intID = Intrinsic::x86_xop_vpcomuq;
981 else if (Name.endswith("b"))
982 intID = Intrinsic::x86_xop_vpcomb;
983 else if (Name.endswith("w"))
984 intID = Intrinsic::x86_xop_vpcomw;
985 else if (Name.endswith("d"))
986 intID = Intrinsic::x86_xop_vpcomd;
987 else if (Name.endswith("q"))
988 intID = Intrinsic::x86_xop_vpcomq;
990 llvm_unreachable("Unknown suffix");
992 Name = Name.substr(9); // strip off "xop.vpcom"
994 if (Name.startswith("lt"))
996 else if (Name.startswith("le"))
998 else if (Name.startswith("gt"))
1000 else if (Name.startswith("ge"))
1002 else if (Name.startswith("eq"))
1004 else if (Name.startswith("ne"))
1006 else if (Name.startswith("false"))
1008 else if (Name.startswith("true"))
1011 llvm_unreachable("Unknown condition");
1013 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
1015 Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
1016 Builder.getInt8(Imm)});
1017 } else if (IsX86 && Name == "xop.vpcmov") {
1018 Value *Arg0 = CI->getArgOperand(0);
1019 Value *Arg1 = CI->getArgOperand(1);
1020 Value *Sel = CI->getArgOperand(2);
1021 unsigned NumElts = CI->getType()->getVectorNumElements();
1022 Constant *MinusOne = ConstantVector::getSplat(NumElts, Builder.getInt64(-1));
1023 Value *NotSel = Builder.CreateXor(Sel, MinusOne);
1024 Value *Sel0 = Builder.CreateAnd(Arg0, Sel);
1025 Value *Sel1 = Builder.CreateAnd(Arg1, NotSel);
1026 Rep = Builder.CreateOr(Sel0, Sel1);
1027 } else if (IsX86 && Name == "sse42.crc32.64.8") {
1028 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
1029 Intrinsic::x86_sse42_crc32_32_8);
1030 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
1031 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
1032 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
1033 } else if (IsX86 && Name.startswith("avx.vbroadcast.s")) {
1034 // Replace broadcasts with a series of insertelements.
1035 Type *VecTy = CI->getType();
1036 Type *EltTy = VecTy->getVectorElementType();
1037 unsigned EltNum = VecTy->getVectorNumElements();
1038 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
1039 EltTy->getPointerTo());
1040 Value *Load = Builder.CreateLoad(EltTy, Cast);
1041 Type *I32Ty = Type::getInt32Ty(C);
1042 Rep = UndefValue::get(VecTy);
1043 for (unsigned I = 0; I < EltNum; ++I)
1044 Rep = Builder.CreateInsertElement(Rep, Load,
1045 ConstantInt::get(I32Ty, I));
1046 } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
1047 Name.startswith("sse41.pmovzx") ||
1048 Name.startswith("avx2.pmovsx") ||
1049 Name.startswith("avx2.pmovzx") ||
1050 Name.startswith("avx512.mask.pmovsx") ||
1051 Name.startswith("avx512.mask.pmovzx"))) {
1052 VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
1053 VectorType *DstTy = cast<VectorType>(CI->getType());
1054 unsigned NumDstElts = DstTy->getNumElements();
1056 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
1057 SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
1058 for (unsigned i = 0; i != NumDstElts; ++i)
1061 Value *SV = Builder.CreateShuffleVector(
1062 CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
1064 bool DoSext = (StringRef::npos != Name.find("pmovsx"));
1065 Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
1066 : Builder.CreateZExt(SV, DstTy);
1067 // If there are 3 arguments, it's a masked intrinsic so we need a select.
1068 if (CI->getNumArgOperands() == 3)
1069 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1070 CI->getArgOperand(1));
1071 } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
1072 Name == "avx2.vbroadcasti128")) {
1073 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
1074 Type *EltTy = CI->getType()->getVectorElementType();
1075 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
1076 Type *VT = VectorType::get(EltTy, NumSrcElts);
1077 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
1078 PointerType::getUnqual(VT));
1079 Value *Load = Builder.CreateAlignedLoad(Op, 1);
1080 if (NumSrcElts == 2)
1081 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
1084 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
1085 { 0, 1, 2, 3, 0, 1, 2, 3 });
1086 } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
1087 Name.startswith("avx2.vbroadcast") ||
1088 Name.startswith("avx512.pbroadcast") ||
1089 Name.startswith("avx512.mask.broadcast.s"))) {
1090 // Replace vp?broadcasts with a vector shuffle.
1091 Value *Op = CI->getArgOperand(0);
1092 unsigned NumElts = CI->getType()->getVectorNumElements();
1093 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
1094 Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
1095 Constant::getNullValue(MaskTy));
1097 if (CI->getNumArgOperands() == 3)
1098 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1099 CI->getArgOperand(1));
1100 } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
1101 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
1102 CI->getArgOperand(1),
1103 CI->getArgOperand(2),
1104 CI->getArgOperand(3),
1105 CI->getArgOperand(4),
1107 } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
1108 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
1109 CI->getArgOperand(1),
1110 CI->getArgOperand(2),
1111 CI->getArgOperand(3),
1112 CI->getArgOperand(4),
1114 } else if (IsX86 && (Name == "sse2.psll.dq" ||
1115 Name == "avx2.psll.dq")) {
1116 // 128/256-bit shift left specified in bits.
1117 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1118 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
1119 Shift / 8); // Shift is in bits.
1120 } else if (IsX86 && (Name == "sse2.psrl.dq" ||
1121 Name == "avx2.psrl.dq")) {
1122 // 128/256-bit shift right specified in bits.
1123 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1124 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
1125 Shift / 8); // Shift is in bits.
1126 } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
1127 Name == "avx2.psll.dq.bs" ||
1128 Name == "avx512.psll.dq.512")) {
1129 // 128/256/512-bit shift left specified in bytes.
1130 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1131 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
1132 } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
1133 Name == "avx2.psrl.dq.bs" ||
1134 Name == "avx512.psrl.dq.512")) {
1135 // 128/256/512-bit shift right specified in bytes.
1136 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1137 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
1138 } else if (IsX86 && (Name == "sse41.pblendw" ||
1139 Name.startswith("sse41.blendp") ||
1140 Name.startswith("avx.blend.p") ||
1141 Name == "avx2.pblendw" ||
1142 Name.startswith("avx2.pblendd."))) {
1143 Value *Op0 = CI->getArgOperand(0);
1144 Value *Op1 = CI->getArgOperand(1);
1145 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1146 VectorType *VecTy = cast<VectorType>(CI->getType());
1147 unsigned NumElts = VecTy->getNumElements();
1149 SmallVector<uint32_t, 16> Idxs(NumElts);
1150 for (unsigned i = 0; i != NumElts; ++i)
1151 Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
1153 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1154 } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
1155 Name == "avx2.vinserti128" ||
1156 Name.startswith("avx512.mask.insert"))) {
1157 Value *Op0 = CI->getArgOperand(0);
1158 Value *Op1 = CI->getArgOperand(1);
1159 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1160 unsigned DstNumElts = CI->getType()->getVectorNumElements();
1161 unsigned SrcNumElts = Op1->getType()->getVectorNumElements();
1162 unsigned Scale = DstNumElts / SrcNumElts;
1164 // Mask off the high bits of the immediate value; hardware ignores those.
1167 // Extend the second operand into a vector the size of the destination.
1168 Value *UndefV = UndefValue::get(Op1->getType());
1169 SmallVector<uint32_t, 8> Idxs(DstNumElts);
1170 for (unsigned i = 0; i != SrcNumElts; ++i)
1172 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
1173 Idxs[i] = SrcNumElts;
1174 Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);
1176 // Insert the second operand into the first operand.
1178 // Note that there is no guarantee that instruction lowering will actually
1179 // produce a vinsertf128 instruction for the created shuffles. In
1180 // particular, the 0 immediate case involves no lane changes, so it can
1181 // be handled as a blend.
1183 // Example of shuffle mask for 32-bit elements:
1184 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
1185 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
1187 // First fill with identify mask.
1188 for (unsigned i = 0; i != DstNumElts; ++i)
1190 // Then replace the elements where we need to insert.
1191 for (unsigned i = 0; i != SrcNumElts; ++i)
1192 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
1193 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
1195 // If the intrinsic has a mask operand, handle that.
1196 if (CI->getNumArgOperands() == 5)
1197 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
1198 CI->getArgOperand(3));
1199 } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
1200 Name == "avx2.vextracti128" ||
1201 Name.startswith("avx512.mask.vextract"))) {
1202 Value *Op0 = CI->getArgOperand(0);
1203 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1204 unsigned DstNumElts = CI->getType()->getVectorNumElements();
1205 unsigned SrcNumElts = Op0->getType()->getVectorNumElements();
1206 unsigned Scale = SrcNumElts / DstNumElts;
1208 // Mask off the high bits of the immediate value; hardware ignores those.
1211 // Get indexes for the subvector of the input vector.
1212 SmallVector<uint32_t, 8> Idxs(DstNumElts);
1213 for (unsigned i = 0; i != DstNumElts; ++i) {
1214 Idxs[i] = i + (Imm * DstNumElts);
1216 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1218 // If the intrinsic has a mask operand, handle that.
1219 if (CI->getNumArgOperands() == 4)
1220 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1221 CI->getArgOperand(2));
1222 } else if (!IsX86 && Name == "stackprotectorcheck") {
1224 } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
1225 Name.startswith("avx512.mask.perm.di."))) {
1226 Value *Op0 = CI->getArgOperand(0);
1227 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1228 VectorType *VecTy = cast<VectorType>(CI->getType());
1229 unsigned NumElts = VecTy->getNumElements();
1231 SmallVector<uint32_t, 8> Idxs(NumElts);
1232 for (unsigned i = 0; i != NumElts; ++i)
1233 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
1235 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1237 if (CI->getNumArgOperands() == 4)
1238 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1239 CI->getArgOperand(2));
1240 } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
1241 Name == "sse2.pshuf.d" ||
1242 Name.startswith("avx512.mask.vpermil.p") ||
1243 Name.startswith("avx512.mask.pshuf.d."))) {
1244 Value *Op0 = CI->getArgOperand(0);
1245 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1246 VectorType *VecTy = cast<VectorType>(CI->getType());
1247 unsigned NumElts = VecTy->getNumElements();
1248 // Calculate the size of each index in the immediate.
1249 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
1250 unsigned IdxMask = ((1 << IdxSize) - 1);
1252 SmallVector<uint32_t, 8> Idxs(NumElts);
1253 // Lookup the bits for this element, wrapping around the immediate every
1254 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
1255 // to offset by the first index of each group.
1256 for (unsigned i = 0; i != NumElts; ++i)
1257 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
1259 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1261 if (CI->getNumArgOperands() == 4)
1262 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1263 CI->getArgOperand(2));
1264 } else if (IsX86 && (Name == "sse2.pshufl.w" ||
1265 Name.startswith("avx512.mask.pshufl.w."))) {
1266 Value *Op0 = CI->getArgOperand(0);
1267 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1268 unsigned NumElts = CI->getType()->getVectorNumElements();
1270 SmallVector<uint32_t, 16> Idxs(NumElts);
1271 for (unsigned l = 0; l != NumElts; l += 8) {
1272 for (unsigned i = 0; i != 4; ++i)
1273 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
1274 for (unsigned i = 4; i != 8; ++i)
1275 Idxs[i + l] = i + l;
1278 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1280 if (CI->getNumArgOperands() == 4)
1281 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1282 CI->getArgOperand(2));
1283 } else if (IsX86 && (Name == "sse2.pshufh.w" ||
1284 Name.startswith("avx512.mask.pshufh.w."))) {
1285 Value *Op0 = CI->getArgOperand(0);
1286 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1287 unsigned NumElts = CI->getType()->getVectorNumElements();
1289 SmallVector<uint32_t, 16> Idxs(NumElts);
1290 for (unsigned l = 0; l != NumElts; l += 8) {
1291 for (unsigned i = 0; i != 4; ++i)
1292 Idxs[i + l] = i + l;
1293 for (unsigned i = 0; i != 4; ++i)
1294 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
1297 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1299 if (CI->getNumArgOperands() == 4)
1300 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1301 CI->getArgOperand(2));
1302 } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
1303 Value *Op0 = CI->getArgOperand(0);
1304 Value *Op1 = CI->getArgOperand(1);
1305 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1306 unsigned NumElts = CI->getType()->getVectorNumElements();
1308 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1309 unsigned HalfLaneElts = NumLaneElts / 2;
1311 SmallVector<uint32_t, 16> Idxs(NumElts);
1312 for (unsigned i = 0; i != NumElts; ++i) {
1313 // Base index is the starting element of the lane.
1314 Idxs[i] = i - (i % NumLaneElts);
1315 // If we are half way through the lane switch to the other source.
1316 if ((i % NumLaneElts) >= HalfLaneElts)
1318 // Now select the specific element. By adding HalfLaneElts bits from
1319 // the immediate. Wrapping around the immediate every 8-bits.
1320 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
1323 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1325 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
1326 CI->getArgOperand(3));
1327 } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
1328 Name.startswith("avx512.mask.movshdup") ||
1329 Name.startswith("avx512.mask.movsldup"))) {
1330 Value *Op0 = CI->getArgOperand(0);
1331 unsigned NumElts = CI->getType()->getVectorNumElements();
1332 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1334 unsigned Offset = 0;
1335 if (Name.startswith("avx512.mask.movshdup."))
1338 SmallVector<uint32_t, 16> Idxs(NumElts);
1339 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
1340 for (unsigned i = 0; i != NumLaneElts; i += 2) {
1341 Idxs[i + l + 0] = i + l + Offset;
1342 Idxs[i + l + 1] = i + l + Offset;
1345 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1347 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1348 CI->getArgOperand(1));
1349 } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
1350 Name.startswith("avx512.mask.unpckl."))) {
1351 Value *Op0 = CI->getArgOperand(0);
1352 Value *Op1 = CI->getArgOperand(1);
1353 int NumElts = CI->getType()->getVectorNumElements();
1354 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1356 SmallVector<uint32_t, 64> Idxs(NumElts);
1357 for (int l = 0; l != NumElts; l += NumLaneElts)
1358 for (int i = 0; i != NumLaneElts; ++i)
1359 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
1361 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1363 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1364 CI->getArgOperand(2));
1365 } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
1366 Name.startswith("avx512.mask.unpckh."))) {
1367 Value *Op0 = CI->getArgOperand(0);
1368 Value *Op1 = CI->getArgOperand(1);
1369 int NumElts = CI->getType()->getVectorNumElements();
1370 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1372 SmallVector<uint32_t, 64> Idxs(NumElts);
1373 for (int l = 0; l != NumElts; l += NumLaneElts)
1374 for (int i = 0; i != NumLaneElts; ++i)
1375 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
1377 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1379 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1380 CI->getArgOperand(2));
1381 } else if (IsX86 && Name.startswith("avx512.mask.pand.")) {
1382 Rep = Builder.CreateAnd(CI->getArgOperand(0), CI->getArgOperand(1));
1383 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1384 CI->getArgOperand(2));
1385 } else if (IsX86 && Name.startswith("avx512.mask.pandn.")) {
1386 Rep = Builder.CreateAnd(Builder.CreateNot(CI->getArgOperand(0)),
1387 CI->getArgOperand(1));
1388 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1389 CI->getArgOperand(2));
1390 } else if (IsX86 && Name.startswith("avx512.mask.por.")) {
1391 Rep = Builder.CreateOr(CI->getArgOperand(0), CI->getArgOperand(1));
1392 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1393 CI->getArgOperand(2));
1394 } else if (IsX86 && Name.startswith("avx512.mask.pxor.")) {
1395 Rep = Builder.CreateXor(CI->getArgOperand(0), CI->getArgOperand(1));
1396 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1397 CI->getArgOperand(2));
1398 } else if (IsX86 && Name.startswith("avx512.mask.and.")) {
1399 VectorType *FTy = cast<VectorType>(CI->getType());
1400 VectorType *ITy = VectorType::getInteger(FTy);
1401 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
1402 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1403 Rep = Builder.CreateBitCast(Rep, FTy);
1404 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1405 CI->getArgOperand(2));
1406 } else if (IsX86 && Name.startswith("avx512.mask.andn.")) {
1407 VectorType *FTy = cast<VectorType>(CI->getType());
1408 VectorType *ITy = VectorType::getInteger(FTy);
1409 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
1410 Rep = Builder.CreateAnd(Rep,
1411 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1412 Rep = Builder.CreateBitCast(Rep, FTy);
1413 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1414 CI->getArgOperand(2));
1415 } else if (IsX86 && Name.startswith("avx512.mask.or.")) {
1416 VectorType *FTy = cast<VectorType>(CI->getType());
1417 VectorType *ITy = VectorType::getInteger(FTy);
1418 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
1419 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1420 Rep = Builder.CreateBitCast(Rep, FTy);
1421 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1422 CI->getArgOperand(2));
1423 } else if (IsX86 && Name.startswith("avx512.mask.xor.")) {
1424 VectorType *FTy = cast<VectorType>(CI->getType());
1425 VectorType *ITy = VectorType::getInteger(FTy);
1426 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
1427 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1428 Rep = Builder.CreateBitCast(Rep, FTy);
1429 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1430 CI->getArgOperand(2));
1431 } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
1432 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
1433 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1434 CI->getArgOperand(2));
1435 } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
1436 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
1437 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1438 CI->getArgOperand(2));
1439 } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
1440 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
1441 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1442 CI->getArgOperand(2));
1443 } else if (IsX86 && (Name.startswith("avx512.mask.add.p"))) {
1444 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
1445 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1446 CI->getArgOperand(2));
1447 } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
1448 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
1449 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1450 CI->getArgOperand(2));
1451 } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
1452 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
1453 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1454 CI->getArgOperand(2));
1455 } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
1456 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
1457 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1458 CI->getArgOperand(2));
1459 } else if (IsX86 && Name.startswith("avx512.mask.pshuf.b.")) {
1460 VectorType *VecTy = cast<VectorType>(CI->getType());
1462 if (VecTy->getPrimitiveSizeInBits() == 128)
1463 IID = Intrinsic::x86_ssse3_pshuf_b_128;
1464 else if (VecTy->getPrimitiveSizeInBits() == 256)
1465 IID = Intrinsic::x86_avx2_pshuf_b;
1466 else if (VecTy->getPrimitiveSizeInBits() == 512)
1467 IID = Intrinsic::x86_avx512_pshuf_b_512;
1469 llvm_unreachable("Unexpected intrinsic");
1471 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1472 { CI->getArgOperand(0), CI->getArgOperand(1) });
1473 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1474 CI->getArgOperand(2));
1475 } else if (IsX86 && (Name.startswith("avx512.mask.pmul.dq.") ||
1476 Name.startswith("avx512.mask.pmulu.dq."))) {
1477 bool IsUnsigned = Name[16] == 'u';
1478 VectorType *VecTy = cast<VectorType>(CI->getType());
1480 if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 128)
1481 IID = Intrinsic::x86_sse41_pmuldq;
1482 else if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 256)
1483 IID = Intrinsic::x86_avx2_pmul_dq;
1484 else if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 512)
1485 IID = Intrinsic::x86_avx512_pmul_dq_512;
1486 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 128)
1487 IID = Intrinsic::x86_sse2_pmulu_dq;
1488 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 256)
1489 IID = Intrinsic::x86_avx2_pmulu_dq;
1490 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 512)
1491 IID = Intrinsic::x86_avx512_pmulu_dq_512;
1493 llvm_unreachable("Unexpected intrinsic");
1495 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1496 { CI->getArgOperand(0), CI->getArgOperand(1) });
1497 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1498 CI->getArgOperand(2));
1499 } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
1500 bool IsImmediate = Name[16] == 'i' ||
1501 (Name.size() > 18 && Name[18] == 'i');
1502 bool IsVariable = Name[16] == 'v';
1503 char Size = Name[16] == '.' ? Name[17] :
1504 Name[17] == '.' ? Name[18] :
1505 Name[18] == '.' ? Name[19] :
1509 if (IsVariable && Name[17] != '.') {
1510 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
1511 IID = Intrinsic::x86_avx2_psllv_q;
1512 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
1513 IID = Intrinsic::x86_avx2_psllv_q_256;
1514 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
1515 IID = Intrinsic::x86_avx2_psllv_d;
1516 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
1517 IID = Intrinsic::x86_avx2_psllv_d_256;
1518 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
1519 IID = Intrinsic::x86_avx512_psllv_w_128;
1520 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
1521 IID = Intrinsic::x86_avx512_psllv_w_256;
1522 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
1523 IID = Intrinsic::x86_avx512_psllv_w_512;
1525 llvm_unreachable("Unexpected size");
1526 } else if (Name.endswith(".128")) {
1527 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
1528 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
1529 : Intrinsic::x86_sse2_psll_d;
1530 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
1531 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
1532 : Intrinsic::x86_sse2_psll_q;
1533 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
1534 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
1535 : Intrinsic::x86_sse2_psll_w;
1537 llvm_unreachable("Unexpected size");
1538 } else if (Name.endswith(".256")) {
1539 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
1540 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
1541 : Intrinsic::x86_avx2_psll_d;
1542 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
1543 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
1544 : Intrinsic::x86_avx2_psll_q;
1545 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
1546 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
1547 : Intrinsic::x86_avx2_psll_w;
1549 llvm_unreachable("Unexpected size");
1551 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
1552 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
1553 IsVariable ? Intrinsic::x86_avx512_psllv_d_512 :
1554 Intrinsic::x86_avx512_psll_d_512;
1555 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
1556 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
1557 IsVariable ? Intrinsic::x86_avx512_psllv_q_512 :
1558 Intrinsic::x86_avx512_psll_q_512;
1559 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
1560 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
1561 : Intrinsic::x86_avx512_psll_w_512;
1563 llvm_unreachable("Unexpected size");
1566 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
1567 } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
1568 bool IsImmediate = Name[16] == 'i' ||
1569 (Name.size() > 18 && Name[18] == 'i');
1570 bool IsVariable = Name[16] == 'v';
1571 char Size = Name[16] == '.' ? Name[17] :
1572 Name[17] == '.' ? Name[18] :
1573 Name[18] == '.' ? Name[19] :
1577 if (IsVariable && Name[17] != '.') {
1578 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
1579 IID = Intrinsic::x86_avx2_psrlv_q;
1580 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
1581 IID = Intrinsic::x86_avx2_psrlv_q_256;
1582 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
1583 IID = Intrinsic::x86_avx2_psrlv_d;
1584 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
1585 IID = Intrinsic::x86_avx2_psrlv_d_256;
1586 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
1587 IID = Intrinsic::x86_avx512_psrlv_w_128;
1588 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
1589 IID = Intrinsic::x86_avx512_psrlv_w_256;
1590 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
1591 IID = Intrinsic::x86_avx512_psrlv_w_512;
1593 llvm_unreachable("Unexpected size");
1594 } else if (Name.endswith(".128")) {
1595 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
1596 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
1597 : Intrinsic::x86_sse2_psrl_d;
1598 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
1599 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
1600 : Intrinsic::x86_sse2_psrl_q;
1601 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
1602 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
1603 : Intrinsic::x86_sse2_psrl_w;
1605 llvm_unreachable("Unexpected size");
1606 } else if (Name.endswith(".256")) {
1607 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
1608 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
1609 : Intrinsic::x86_avx2_psrl_d;
1610 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
1611 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
1612 : Intrinsic::x86_avx2_psrl_q;
1613 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
1614 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
1615 : Intrinsic::x86_avx2_psrl_w;
1617 llvm_unreachable("Unexpected size");
1619 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
1620 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
1621 IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 :
1622 Intrinsic::x86_avx512_psrl_d_512;
1623 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
1624 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
1625 IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 :
1626 Intrinsic::x86_avx512_psrl_q_512;
1627 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
1628 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
1629 : Intrinsic::x86_avx512_psrl_w_512;
1631 llvm_unreachable("Unexpected size");
1634 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
1635 } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
1636 bool IsImmediate = Name[16] == 'i' ||
1637 (Name.size() > 18 && Name[18] == 'i');
1638 bool IsVariable = Name[16] == 'v';
1639 char Size = Name[16] == '.' ? Name[17] :
1640 Name[17] == '.' ? Name[18] :
1641 Name[18] == '.' ? Name[19] :
1645 if (IsVariable && Name[17] != '.') {
1646 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
1647 IID = Intrinsic::x86_avx2_psrav_d;
1648 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
1649 IID = Intrinsic::x86_avx2_psrav_d_256;
1650 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
1651 IID = Intrinsic::x86_avx512_psrav_w_128;
1652 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
1653 IID = Intrinsic::x86_avx512_psrav_w_256;
1654 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
1655 IID = Intrinsic::x86_avx512_psrav_w_512;
1657 llvm_unreachable("Unexpected size");
1658 } else if (Name.endswith(".128")) {
1659 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
1660 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
1661 : Intrinsic::x86_sse2_psra_d;
1662 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
1663 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
1664 IsVariable ? Intrinsic::x86_avx512_psrav_q_128 :
1665 Intrinsic::x86_avx512_psra_q_128;
1666 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
1667 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
1668 : Intrinsic::x86_sse2_psra_w;
1670 llvm_unreachable("Unexpected size");
1671 } else if (Name.endswith(".256")) {
1672 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
1673 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
1674 : Intrinsic::x86_avx2_psra_d;
1675 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
1676 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
1677 IsVariable ? Intrinsic::x86_avx512_psrav_q_256 :
1678 Intrinsic::x86_avx512_psra_q_256;
1679 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
1680 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
1681 : Intrinsic::x86_avx2_psra_w;
1683 llvm_unreachable("Unexpected size");
1685 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
1686 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
1687 IsVariable ? Intrinsic::x86_avx512_psrav_d_512 :
1688 Intrinsic::x86_avx512_psra_d_512;
1689 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
1690 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
1691 IsVariable ? Intrinsic::x86_avx512_psrav_q_512 :
1692 Intrinsic::x86_avx512_psra_q_512;
1693 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
1694 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
1695 : Intrinsic::x86_avx512_psra_w_512;
1697 llvm_unreachable("Unexpected size");
1700 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
1701 } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
1702 Rep = upgradeMaskedMove(Builder, *CI);
1703 } else if (IsX86 && Name.startswith("avx512.mask.vpermilvar.")) {
1705 if (Name.endswith("ps.128"))
1706 IID = Intrinsic::x86_avx_vpermilvar_ps;
1707 else if (Name.endswith("pd.128"))
1708 IID = Intrinsic::x86_avx_vpermilvar_pd;
1709 else if (Name.endswith("ps.256"))
1710 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
1711 else if (Name.endswith("pd.256"))
1712 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
1713 else if (Name.endswith("ps.512"))
1714 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
1715 else if (Name.endswith("pd.512"))
1716 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
1718 llvm_unreachable("Unexpected vpermilvar intrinsic");
1720 Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID);
1721 Rep = Builder.CreateCall(Intrin,
1722 { CI->getArgOperand(0), CI->getArgOperand(1) });
1723 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1724 CI->getArgOperand(2));
1726 llvm_unreachable("Unknown function for CallInst upgrade.");
1730 CI->replaceAllUsesWith(Rep);
1731 CI->eraseFromParent();
1735 std::string Name = CI->getName();
1737 CI->setName(Name + ".old");
1739 switch (NewFn->getIntrinsicID()) {
1741 llvm_unreachable("Unknown function for CallInst upgrade.");
1743 case Intrinsic::arm_neon_vld1:
1744 case Intrinsic::arm_neon_vld2:
1745 case Intrinsic::arm_neon_vld3:
1746 case Intrinsic::arm_neon_vld4:
1747 case Intrinsic::arm_neon_vld2lane:
1748 case Intrinsic::arm_neon_vld3lane:
1749 case Intrinsic::arm_neon_vld4lane:
1750 case Intrinsic::arm_neon_vst1:
1751 case Intrinsic::arm_neon_vst2:
1752 case Intrinsic::arm_neon_vst3:
1753 case Intrinsic::arm_neon_vst4:
1754 case Intrinsic::arm_neon_vst2lane:
1755 case Intrinsic::arm_neon_vst3lane:
1756 case Intrinsic::arm_neon_vst4lane: {
1757 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
1758 CI->arg_operands().end());
1759 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args));
1760 CI->eraseFromParent();
1764 case Intrinsic::ctlz:
1765 case Intrinsic::cttz:
1766 assert(CI->getNumArgOperands() == 1 &&
1767 "Mismatch between function args and call args");
1768 CI->replaceAllUsesWith(Builder.CreateCall(
1769 NewFn, {CI->getArgOperand(0), Builder.getFalse()}, Name));
1770 CI->eraseFromParent();
1773 case Intrinsic::objectsize:
1774 CI->replaceAllUsesWith(Builder.CreateCall(
1775 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)}, Name));
1776 CI->eraseFromParent();
1779 case Intrinsic::ctpop: {
1780 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {CI->getArgOperand(0)}));
1781 CI->eraseFromParent();
1785 case Intrinsic::x86_xop_vfrcz_ss:
1786 case Intrinsic::x86_xop_vfrcz_sd:
1787 CI->replaceAllUsesWith(
1788 Builder.CreateCall(NewFn, {CI->getArgOperand(1)}, Name));
1789 CI->eraseFromParent();
1792 case Intrinsic::x86_xop_vpermil2pd:
1793 case Intrinsic::x86_xop_vpermil2ps:
1794 case Intrinsic::x86_xop_vpermil2pd_256:
1795 case Intrinsic::x86_xop_vpermil2ps_256: {
1796 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
1797 CI->arg_operands().end());
1798 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
1799 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
1800 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
1801 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args, Name));
1802 CI->eraseFromParent();
1806 case Intrinsic::x86_sse41_ptestc:
1807 case Intrinsic::x86_sse41_ptestz:
1808 case Intrinsic::x86_sse41_ptestnzc: {
1809 // The arguments for these intrinsics used to be v4f32, and changed
1810 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
1811 // So, the only thing required is a bitcast for both arguments.
1812 // First, check the arguments have the old type.
1813 Value *Arg0 = CI->getArgOperand(0);
1814 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
1817 // Old intrinsic, add bitcasts
1818 Value *Arg1 = CI->getArgOperand(1);
1820 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
1822 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
1823 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
1825 CallInst *NewCall = Builder.CreateCall(NewFn, {BC0, BC1}, Name);
1826 CI->replaceAllUsesWith(NewCall);
1827 CI->eraseFromParent();
1831 case Intrinsic::x86_sse41_insertps:
1832 case Intrinsic::x86_sse41_dppd:
1833 case Intrinsic::x86_sse41_dpps:
1834 case Intrinsic::x86_sse41_mpsadbw:
1835 case Intrinsic::x86_avx_dp_ps_256:
1836 case Intrinsic::x86_avx2_mpsadbw: {
1837 // Need to truncate the last argument from i32 to i8 -- this argument models
1838 // an inherently 8-bit immediate operand to these x86 instructions.
1839 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
1840 CI->arg_operands().end());
1842 // Replace the last argument with a trunc.
1843 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
1845 CallInst *NewCall = Builder.CreateCall(NewFn, Args);
1846 CI->replaceAllUsesWith(NewCall);
1847 CI->eraseFromParent();
1851 case Intrinsic::thread_pointer: {
1852 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {}));
1853 CI->eraseFromParent();
1857 case Intrinsic::invariant_start:
1858 case Intrinsic::invariant_end:
1859 case Intrinsic::masked_load:
1860 case Intrinsic::masked_store: {
1861 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
1862 CI->arg_operands().end());
1863 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args));
1864 CI->eraseFromParent();
1870 void llvm::UpgradeCallsToIntrinsic(Function *F) {
1871 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
1873 // Check if this function should be upgraded and get the replacement function
1876 if (UpgradeIntrinsicFunction(F, NewFn)) {
1877 // Replace all users of the old function with the new function or new
1878 // instructions. This is not a range loop because the call is deleted.
1879 for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; )
1880 if (CallInst *CI = dyn_cast<CallInst>(*UI++))
1881 UpgradeIntrinsicCall(CI, NewFn);
1883 // Remove old function, no longer used, from the module.
1884 F->eraseFromParent();
1888 MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
1889 // Check if the tag uses struct-path aware TBAA format.
1890 if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
1893 auto &Context = MD.getContext();
1894 if (MD.getNumOperands() == 3) {
1895 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
1896 MDNode *ScalarType = MDNode::get(Context, Elts);
1897 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
1898 Metadata *Elts2[] = {ScalarType, ScalarType,
1899 ConstantAsMetadata::get(
1900 Constant::getNullValue(Type::getInt64Ty(Context))),
1902 return MDNode::get(Context, Elts2);
1904 // Create a MDNode <MD, MD, offset 0>
1905 Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
1906 Type::getInt64Ty(Context)))};
1907 return MDNode::get(Context, Elts);
1910 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
1911 Instruction *&Temp) {
1912 if (Opc != Instruction::BitCast)
1916 Type *SrcTy = V->getType();
1917 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
1918 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
1919 LLVMContext &Context = V->getContext();
1921 // We have no information about target data layout, so we assume that
1922 // the maximum pointer size is 64bit.
1923 Type *MidTy = Type::getInt64Ty(Context);
1924 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
1926 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
1932 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
1933 if (Opc != Instruction::BitCast)
1936 Type *SrcTy = C->getType();
1937 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
1938 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
1939 LLVMContext &Context = C->getContext();
1941 // We have no information about target data layout, so we assume that
1942 // the maximum pointer size is 64bit.
1943 Type *MidTy = Type::getInt64Ty(Context);
1945 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
1952 /// Check the debug info version number, if it is out-dated, drop the debug
1953 /// info. Return true if module is modified.
1954 bool llvm::UpgradeDebugInfo(Module &M) {
1955 unsigned Version = getDebugMetadataVersionFromModule(M);
1956 if (Version == DEBUG_METADATA_VERSION)
1959 bool RetCode = StripDebugInfo(M);
1961 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
1962 M.getContext().diagnose(DiagVersion);
1967 bool llvm::UpgradeModuleFlags(Module &M) {
1968 const NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
1972 bool HasObjCFlag = false, HasClassProperties = false;
1973 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
1974 MDNode *Op = ModFlags->getOperand(I);
1975 if (Op->getNumOperands() < 2)
1977 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
1980 if (ID->getString() == "Objective-C Image Info Version")
1982 if (ID->getString() == "Objective-C Class Properties")
1983 HasClassProperties = true;
1985 // "Objective-C Class Properties" is recently added for Objective-C. We
1986 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
1987 // flag of value 0, so we can correclty downgrade this flag when trying to
1988 // link an ObjC bitcode without this module flag with an ObjC bitcode with
1989 // this module flag.
1990 if (HasObjCFlag && !HasClassProperties) {
1991 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
1998 static bool isOldLoopArgument(Metadata *MD) {
1999 auto *T = dyn_cast_or_null<MDTuple>(MD);
2002 if (T->getNumOperands() < 1)
2004 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
2007 return S->getString().startswith("llvm.vectorizer.");
2010 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
2011 StringRef OldPrefix = "llvm.vectorizer.";
2012 assert(OldTag.startswith(OldPrefix) && "Expected old prefix");
2014 if (OldTag == "llvm.vectorizer.unroll")
2015 return MDString::get(C, "llvm.loop.interleave.count");
2017 return MDString::get(
2018 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
2022 static Metadata *upgradeLoopArgument(Metadata *MD) {
2023 auto *T = dyn_cast_or_null<MDTuple>(MD);
2026 if (T->getNumOperands() < 1)
2028 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
2031 if (!OldTag->getString().startswith("llvm.vectorizer."))
2034 // This has an old tag. Upgrade it.
2035 SmallVector<Metadata *, 8> Ops;
2036 Ops.reserve(T->getNumOperands());
2037 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
2038 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
2039 Ops.push_back(T->getOperand(I));
2041 return MDTuple::get(T->getContext(), Ops);
2044 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
2045 auto *T = dyn_cast<MDTuple>(&N);
2049 if (none_of(T->operands(), isOldLoopArgument))
2052 SmallVector<Metadata *, 8> Ops;
2053 Ops.reserve(T->getNumOperands());
2054 for (Metadata *MD : T->operands())
2055 Ops.push_back(upgradeLoopArgument(MD));
2057 return MDTuple::get(T->getContext(), Ops);