1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the auto-upgrade helper functions.
11 // This is where deprecated IR intrinsics and other IR features are updated to
12 // current specifications.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/IR/AutoUpgrade.h"
17 #include "llvm/IR/CFG.h"
18 #include "llvm/IR/CallSite.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/DIBuilder.h"
21 #include "llvm/IR/DebugInfo.h"
22 #include "llvm/IR/DiagnosticInfo.h"
23 #include "llvm/IR/Function.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/Instruction.h"
26 #include "llvm/IR/IntrinsicInst.h"
27 #include "llvm/IR/LLVMContext.h"
28 #include "llvm/IR/Module.h"
29 #include "llvm/Support/ErrorHandling.h"
30 #include "llvm/Support/Regex.h"
34 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
36 // Upgrade the declarations of the SSE4.1 functions whose arguments have
37 // changed their type from v4f32 to v2i64.
38 static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID,
40 // Check whether this is an old version of the function, which received
42 Type *Arg0Type = F->getFunctionType()->getParamType(0);
43 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
46 // Yes, it's old, replace it with new version.
48 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
52 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
53 // arguments have changed their type from i32 to i8.
54 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
56 // Check that the last argument is an i32.
57 Type *LastArgType = F->getFunctionType()->getParamType(
58 F->getFunctionType()->getNumParams() - 1);
59 if (!LastArgType->isIntegerTy(32))
62 // Move this function aside and map down.
64 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
68 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
69 assert(F && "Illegal to upgrade a non-existent Function.");
71 // Quickly eliminate it, if it's not a candidate.
72 StringRef Name = F->getName();
73 if (Name.size() <= 8 || !Name.startswith("llvm."))
75 Name = Name.substr(5); // Strip off "llvm."
80 if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
81 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
82 F->arg_begin()->getType());
85 if (Name.startswith("arm.neon.vclz")) {
87 F->arg_begin()->getType(),
88 Type::getInt1Ty(F->getContext())
90 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
91 // the end of the name. Change name from llvm.arm.neon.vclz.* to
93 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
94 NewFn = Function::Create(fType, F->getLinkage(),
95 "llvm.ctlz." + Name.substr(14), F->getParent());
98 if (Name.startswith("arm.neon.vcnt")) {
99 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
100 F->arg_begin()->getType());
103 Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
104 if (vldRegex.match(Name)) {
105 auto fArgs = F->getFunctionType()->params();
106 SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
107 // Can't use Intrinsic::getDeclaration here as the return types might
108 // then only be structurally equal.
109 FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
110 NewFn = Function::Create(fType, F->getLinkage(),
111 "llvm." + Name + ".p0i8", F->getParent());
114 Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
115 if (vstRegex.match(Name)) {
116 static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
117 Intrinsic::arm_neon_vst2,
118 Intrinsic::arm_neon_vst3,
119 Intrinsic::arm_neon_vst4};
121 static const Intrinsic::ID StoreLaneInts[] = {
122 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
123 Intrinsic::arm_neon_vst4lane
126 auto fArgs = F->getFunctionType()->params();
127 Type *Tys[] = {fArgs[0], fArgs[1]};
128 if (Name.find("lane") == StringRef::npos)
129 NewFn = Intrinsic::getDeclaration(F->getParent(),
130 StoreInts[fArgs.size() - 3], Tys);
132 NewFn = Intrinsic::getDeclaration(F->getParent(),
133 StoreLaneInts[fArgs.size() - 5], Tys);
136 if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
137 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
144 if (Name.startswith("ctlz.") && F->arg_size() == 1) {
146 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
147 F->arg_begin()->getType());
150 if (Name.startswith("cttz.") && F->arg_size() == 1) {
152 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
153 F->arg_begin()->getType());
159 if (Name.startswith("invariant.start")) {
160 auto Args = F->getFunctionType()->params();
161 Type* ObjectPtr[1] = {Args[1]};
163 Intrinsic::getName(Intrinsic::invariant_start, ObjectPtr)) {
165 NewFn = Intrinsic::getDeclaration(
166 F->getParent(), Intrinsic::invariant_start, ObjectPtr);
170 if (Name.startswith("invariant.end")) {
171 auto Args = F->getFunctionType()->params();
172 Type* ObjectPtr[1] = {Args[2]};
174 Intrinsic::getName(Intrinsic::invariant_end, ObjectPtr)) {
176 NewFn = Intrinsic::getDeclaration(F->getParent(),
177 Intrinsic::invariant_end, ObjectPtr);
184 if (Name.startswith("masked.load.")) {
185 Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
186 if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) {
188 NewFn = Intrinsic::getDeclaration(F->getParent(),
189 Intrinsic::masked_load,
194 if (Name.startswith("masked.store.")) {
195 auto Args = F->getFunctionType()->params();
196 Type *Tys[] = { Args[0], Args[1] };
197 if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) {
199 NewFn = Intrinsic::getDeclaration(F->getParent(),
200 Intrinsic::masked_store,
209 // We only need to change the name to match the mangling including the
211 if (F->arg_size() == 2 && Name.startswith("objectsize.")) {
212 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
213 if (F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
215 NewFn = Intrinsic::getDeclaration(F->getParent(),
216 Intrinsic::objectsize, Tys);
223 if (Name == "stackprotectorcheck") {
230 bool IsX86 = Name.startswith("x86.");
232 Name = Name.substr(4);
234 // All of the intrinsics matches below should be marked with which llvm
235 // version started autoupgrading them. At some point in the future we would
236 // like to use this information to remove upgrade code for some older
237 // intrinsics. It is currently undecided how we will determine that future
240 (Name.startswith("sse2.pcmpeq.") || // Added in 3.1
241 Name.startswith("sse2.pcmpgt.") || // Added in 3.1
242 Name.startswith("avx2.pcmpeq.") || // Added in 3.1
243 Name.startswith("avx2.pcmpgt.") || // Added in 3.1
244 Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
245 Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
246 Name == "sse.add.ss" || // Added in 4.0
247 Name == "sse2.add.sd" || // Added in 4.0
248 Name == "sse.sub.ss" || // Added in 4.0
249 Name == "sse2.sub.sd" || // Added in 4.0
250 Name == "sse.mul.ss" || // Added in 4.0
251 Name == "sse2.mul.sd" || // Added in 4.0
252 Name == "sse.div.ss" || // Added in 4.0
253 Name == "sse2.div.sd" || // Added in 4.0
254 Name == "sse41.pmaxsb" || // Added in 3.9
255 Name == "sse2.pmaxs.w" || // Added in 3.9
256 Name == "sse41.pmaxsd" || // Added in 3.9
257 Name == "sse2.pmaxu.b" || // Added in 3.9
258 Name == "sse41.pmaxuw" || // Added in 3.9
259 Name == "sse41.pmaxud" || // Added in 3.9
260 Name == "sse41.pminsb" || // Added in 3.9
261 Name == "sse2.pmins.w" || // Added in 3.9
262 Name == "sse41.pminsd" || // Added in 3.9
263 Name == "sse2.pminu.b" || // Added in 3.9
264 Name == "sse41.pminuw" || // Added in 3.9
265 Name == "sse41.pminud" || // Added in 3.9
266 Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
267 Name.startswith("avx2.pmax") || // Added in 3.9
268 Name.startswith("avx2.pmin") || // Added in 3.9
269 Name.startswith("avx512.mask.pmax") || // Added in 4.0
270 Name.startswith("avx512.mask.pmin") || // Added in 4.0
271 Name.startswith("avx2.vbroadcast") || // Added in 3.8
272 Name.startswith("avx2.pbroadcast") || // Added in 3.8
273 Name.startswith("avx.vpermil.") || // Added in 3.1
274 Name.startswith("sse2.pshuf") || // Added in 3.9
275 Name.startswith("avx512.pbroadcast") || // Added in 3.9
276 Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
277 Name.startswith("avx512.mask.movddup") || // Added in 3.9
278 Name.startswith("avx512.mask.movshdup") || // Added in 3.9
279 Name.startswith("avx512.mask.movsldup") || // Added in 3.9
280 Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
281 Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
282 Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
283 Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
284 Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
285 Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
286 Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
287 Name.startswith("avx512.mask.punpckl") || // Added in 3.9
288 Name.startswith("avx512.mask.punpckh") || // Added in 3.9
289 Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
290 Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
291 Name.startswith("avx512.mask.pand.") || // Added in 3.9
292 Name.startswith("avx512.mask.pandn.") || // Added in 3.9
293 Name.startswith("avx512.mask.por.") || // Added in 3.9
294 Name.startswith("avx512.mask.pxor.") || // Added in 3.9
295 Name.startswith("avx512.mask.and.") || // Added in 3.9
296 Name.startswith("avx512.mask.andn.") || // Added in 3.9
297 Name.startswith("avx512.mask.or.") || // Added in 3.9
298 Name.startswith("avx512.mask.xor.") || // Added in 3.9
299 Name.startswith("avx512.mask.padd.") || // Added in 4.0
300 Name.startswith("avx512.mask.psub.") || // Added in 4.0
301 Name.startswith("avx512.mask.pmull.") || // Added in 4.0
302 Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
303 Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
304 Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
305 Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
306 Name == "avx512.mask.add.pd.128" || // Added in 4.0
307 Name == "avx512.mask.add.pd.256" || // Added in 4.0
308 Name == "avx512.mask.add.ps.128" || // Added in 4.0
309 Name == "avx512.mask.add.ps.256" || // Added in 4.0
310 Name == "avx512.mask.div.pd.128" || // Added in 4.0
311 Name == "avx512.mask.div.pd.256" || // Added in 4.0
312 Name == "avx512.mask.div.ps.128" || // Added in 4.0
313 Name == "avx512.mask.div.ps.256" || // Added in 4.0
314 Name == "avx512.mask.mul.pd.128" || // Added in 4.0
315 Name == "avx512.mask.mul.pd.256" || // Added in 4.0
316 Name == "avx512.mask.mul.ps.128" || // Added in 4.0
317 Name == "avx512.mask.mul.ps.256" || // Added in 4.0
318 Name == "avx512.mask.sub.pd.128" || // Added in 4.0
319 Name == "avx512.mask.sub.pd.256" || // Added in 4.0
320 Name == "avx512.mask.sub.ps.128" || // Added in 4.0
321 Name == "avx512.mask.sub.ps.256" || // Added in 4.0
322 Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
323 Name.startswith("avx512.mask.psll.d") || // Added in 4.0
324 Name.startswith("avx512.mask.psll.q") || // Added in 4.0
325 Name.startswith("avx512.mask.psll.w") || // Added in 4.0
326 Name.startswith("avx512.mask.psra.d") || // Added in 4.0
327 Name.startswith("avx512.mask.psra.q") || // Added in 4.0
328 Name.startswith("avx512.mask.psra.w") || // Added in 4.0
329 Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
330 Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
331 Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
332 Name.startswith("avx512.mask.pslli") || // Added in 4.0
333 Name.startswith("avx512.mask.psrai") || // Added in 4.0
334 Name.startswith("avx512.mask.psrli") || // Added in 4.0
335 Name.startswith("avx512.mask.psllv") || // Added in 4.0
336 Name.startswith("avx512.mask.psrav") || // Added in 4.0
337 Name.startswith("avx512.mask.psrlv") || // Added in 4.0
338 Name.startswith("sse41.pmovsx") || // Added in 3.8
339 Name.startswith("sse41.pmovzx") || // Added in 3.9
340 Name.startswith("avx2.pmovsx") || // Added in 3.9
341 Name.startswith("avx2.pmovzx") || // Added in 3.9
342 Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
343 Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
344 Name == "sse2.cvtdq2pd" || // Added in 3.9
345 Name == "sse2.cvtps2pd" || // Added in 3.9
346 Name == "avx.cvtdq2.pd.256" || // Added in 3.9
347 Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
348 Name.startswith("avx.vinsertf128.") || // Added in 3.7
349 Name == "avx2.vinserti128" || // Added in 3.7
350 Name.startswith("avx512.mask.insert") || // Added in 4.0
351 Name.startswith("avx.vextractf128.") || // Added in 3.7
352 Name == "avx2.vextracti128" || // Added in 3.7
353 Name.startswith("avx512.mask.vextract") || // Added in 4.0
354 Name.startswith("sse4a.movnt.") || // Added in 3.9
355 Name.startswith("avx.movnt.") || // Added in 3.2
356 Name.startswith("avx512.storent.") || // Added in 3.9
357 Name == "sse2.storel.dq" || // Added in 3.9
358 Name.startswith("sse.storeu.") || // Added in 3.9
359 Name.startswith("sse2.storeu.") || // Added in 3.9
360 Name.startswith("avx.storeu.") || // Added in 3.9
361 Name.startswith("avx512.mask.storeu.") || // Added in 3.9
362 Name.startswith("avx512.mask.store.p") || // Added in 3.9
363 Name.startswith("avx512.mask.store.b.") || // Added in 3.9
364 Name.startswith("avx512.mask.store.w.") || // Added in 3.9
365 Name.startswith("avx512.mask.store.d.") || // Added in 3.9
366 Name.startswith("avx512.mask.store.q.") || // Added in 3.9
367 Name.startswith("avx512.mask.loadu.") || // Added in 3.9
368 Name.startswith("avx512.mask.load.") || // Added in 3.9
369 Name == "sse42.crc32.64.8" || // Added in 3.4
370 Name.startswith("avx.vbroadcast.s") || // Added in 3.5
371 Name.startswith("avx512.mask.palignr.") || // Added in 3.9
372 Name.startswith("avx512.mask.valign.") || // Added in 4.0
373 Name.startswith("sse2.psll.dq") || // Added in 3.7
374 Name.startswith("sse2.psrl.dq") || // Added in 3.7
375 Name.startswith("avx2.psll.dq") || // Added in 3.7
376 Name.startswith("avx2.psrl.dq") || // Added in 3.7
377 Name.startswith("avx512.psll.dq") || // Added in 3.9
378 Name.startswith("avx512.psrl.dq") || // Added in 3.9
379 Name == "sse41.pblendw" || // Added in 3.7
380 Name.startswith("sse41.blendp") || // Added in 3.7
381 Name.startswith("avx.blend.p") || // Added in 3.7
382 Name == "avx2.pblendw" || // Added in 3.7
383 Name.startswith("avx2.pblendd.") || // Added in 3.7
384 Name.startswith("avx.vbroadcastf128") || // Added in 4.0
385 Name == "avx2.vbroadcasti128" || // Added in 3.7
386 Name == "xop.vpcmov" || // Added in 3.8
387 Name.startswith("avx512.mask.move.s") || // Added in 4.0
388 (Name.startswith("xop.vpcom") && // Added in 3.2
389 F->arg_size() == 2))) {
393 // SSE4.1 ptest functions may have an old signature.
394 if (IsX86 && Name.startswith("sse41.ptest")) { // Added in 3.2
395 if (Name.substr(11) == "c")
396 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn);
397 if (Name.substr(11) == "z")
398 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestz, NewFn);
399 if (Name.substr(11) == "nzc")
400 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
402 // Several blend and other instructions with masks used the wrong number of
404 if (IsX86 && Name == "sse41.insertps") // Added in 3.6
405 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
407 if (IsX86 && Name == "sse41.dppd") // Added in 3.6
408 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
410 if (IsX86 && Name == "sse41.dpps") // Added in 3.6
411 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
413 if (IsX86 && Name == "sse41.mpsadbw") // Added in 3.6
414 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
416 if (IsX86 && Name == "avx.dp.ps.256") // Added in 3.6
417 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
419 if (IsX86 && Name == "avx2.mpsadbw") // Added in 3.6
420 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
423 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
424 if (IsX86 && Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
426 NewFn = Intrinsic::getDeclaration(F->getParent(),
427 Intrinsic::x86_xop_vfrcz_ss);
430 if (IsX86 && Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
432 NewFn = Intrinsic::getDeclaration(F->getParent(),
433 Intrinsic::x86_xop_vfrcz_sd);
436 // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
437 if (IsX86 && Name.startswith("xop.vpermil2")) { // Added in 3.9
438 auto Params = F->getFunctionType()->params();
439 auto Idx = Params[2];
440 if (Idx->getScalarType()->isFloatingPointTy()) {
442 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
443 unsigned EltSize = Idx->getScalarSizeInBits();
444 Intrinsic::ID Permil2ID;
445 if (EltSize == 64 && IdxSize == 128)
446 Permil2ID = Intrinsic::x86_xop_vpermil2pd;
447 else if (EltSize == 32 && IdxSize == 128)
448 Permil2ID = Intrinsic::x86_xop_vpermil2ps;
449 else if (EltSize == 64 && IdxSize == 256)
450 Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
452 Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
453 NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
461 // This may not belong here. This function is effectively being overloaded
462 // to both detect an intrinsic which needs upgrading, and to provide the
463 // upgraded form of the intrinsic. We should perhaps have two separate
464 // functions for this.
468 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
470 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
471 assert(F != NewFn && "Intrinsic function upgraded to the same function");
473 // Upgrade intrinsic attributes. This does not change the function.
476 if (Intrinsic::ID id = F->getIntrinsicID())
477 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
481 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
482 // Nothing to do yet.
486 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
488 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
489 Value *Op, unsigned Shift) {
490 Type *ResultTy = Op->getType();
491 unsigned NumElts = ResultTy->getVectorNumElements() * 8;
493 // Bitcast from a 64-bit element type to a byte element type.
494 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
495 Op = Builder.CreateBitCast(Op, VecTy, "cast");
497 // We'll be shuffling in zeroes.
498 Value *Res = Constant::getNullValue(VecTy);
500 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
501 // we'll just return the zero vector.
504 // 256/512-bit version is split into 2/4 16-byte lanes.
505 for (unsigned l = 0; l != NumElts; l += 16)
506 for (unsigned i = 0; i != 16; ++i) {
507 unsigned Idx = NumElts + i - Shift;
509 Idx -= NumElts - 16; // end of lane, switch operand.
510 Idxs[l + i] = Idx + l;
513 Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
516 // Bitcast back to a 64-bit element type.
517 return Builder.CreateBitCast(Res, ResultTy, "cast");
520 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
522 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
524 Type *ResultTy = Op->getType();
525 unsigned NumElts = ResultTy->getVectorNumElements() * 8;
527 // Bitcast from a 64-bit element type to a byte element type.
528 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
529 Op = Builder.CreateBitCast(Op, VecTy, "cast");
531 // We'll be shuffling in zeroes.
532 Value *Res = Constant::getNullValue(VecTy);
534 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
535 // we'll just return the zero vector.
538 // 256/512-bit version is split into 2/4 16-byte lanes.
539 for (unsigned l = 0; l != NumElts; l += 16)
540 for (unsigned i = 0; i != 16; ++i) {
541 unsigned Idx = i + Shift;
543 Idx += NumElts - 16; // end of lane, switch operand.
544 Idxs[l + i] = Idx + l;
547 Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
550 // Bitcast back to a 64-bit element type.
551 return Builder.CreateBitCast(Res, ResultTy, "cast");
554 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
556 llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(),
557 cast<IntegerType>(Mask->getType())->getBitWidth());
558 Mask = Builder.CreateBitCast(Mask, MaskTy);
560 // If we have less than 8 elements, then the starting mask was an i8 and
561 // we need to extract down to the right number of elements.
564 for (unsigned i = 0; i != NumElts; ++i)
566 Mask = Builder.CreateShuffleVector(Mask, Mask,
567 makeArrayRef(Indices, NumElts),
574 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
575 Value *Op0, Value *Op1) {
576 // If the mask is all ones just emit the align operation.
577 if (const auto *C = dyn_cast<Constant>(Mask))
578 if (C->isAllOnesValue())
581 Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements());
582 return Builder.CreateSelect(Mask, Op0, Op1);
585 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
586 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
587 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
588 static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
589 Value *Op1, Value *Shift,
590 Value *Passthru, Value *Mask,
592 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
594 unsigned NumElts = Op0->getType()->getVectorNumElements();
595 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
596 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
597 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
599 // Mask the immediate for VALIGN.
601 ShiftVal &= (NumElts - 1);
603 // If palignr is shifting the pair of vectors more than the size of two
606 return llvm::Constant::getNullValue(Op0->getType());
608 // If palignr is shifting the pair of input vectors more than one lane,
609 // but less than two lanes, convert to shifting in zeroes.
613 Op0 = llvm::Constant::getNullValue(Op0->getType());
616 uint32_t Indices[64];
617 // 256-bit palignr operates on 128-bit lanes so we need to handle that
618 for (unsigned l = 0; l < NumElts; l += 16) {
619 for (unsigned i = 0; i != 16; ++i) {
620 unsigned Idx = ShiftVal + i;
621 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
622 Idx += NumElts - 16; // End of lane, switch operand.
623 Indices[l + i] = Idx + l;
627 Value *Align = Builder.CreateShuffleVector(Op1, Op0,
628 makeArrayRef(Indices, NumElts),
631 return EmitX86Select(Builder, Mask, Align, Passthru);
634 static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
635 Value *Ptr, Value *Data, Value *Mask,
637 // Cast the pointer to the right type.
638 Ptr = Builder.CreateBitCast(Ptr,
639 llvm::PointerType::getUnqual(Data->getType()));
641 Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1;
643 // If the mask is all ones just emit a regular store.
644 if (const auto *C = dyn_cast<Constant>(Mask))
645 if (C->isAllOnesValue())
646 return Builder.CreateAlignedStore(Data, Ptr, Align);
648 // Convert the mask from an integer type to a vector of i1.
649 unsigned NumElts = Data->getType()->getVectorNumElements();
650 Mask = getX86MaskVec(Builder, Mask, NumElts);
651 return Builder.CreateMaskedStore(Data, Ptr, Align, Mask);
654 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
655 Value *Ptr, Value *Passthru, Value *Mask,
657 // Cast the pointer to the right type.
658 Ptr = Builder.CreateBitCast(Ptr,
659 llvm::PointerType::getUnqual(Passthru->getType()));
661 Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1;
663 // If the mask is all ones just emit a regular store.
664 if (const auto *C = dyn_cast<Constant>(Mask))
665 if (C->isAllOnesValue())
666 return Builder.CreateAlignedLoad(Ptr, Align);
668 // Convert the mask from an integer type to a vector of i1.
669 unsigned NumElts = Passthru->getType()->getVectorNumElements();
670 Mask = getX86MaskVec(Builder, Mask, NumElts);
671 return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru);
674 static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
675 ICmpInst::Predicate Pred) {
676 Value *Op0 = CI.getArgOperand(0);
677 Value *Op1 = CI.getArgOperand(1);
678 Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1);
679 Value *Res = Builder.CreateSelect(Cmp, Op0, Op1);
681 if (CI.getNumArgOperands() == 4)
682 Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
687 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
688 ICmpInst::Predicate Pred) {
689 Value *Op0 = CI.getArgOperand(0);
690 unsigned NumElts = Op0->getType()->getVectorNumElements();
691 Value *Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
693 Value *Mask = CI.getArgOperand(2);
694 const auto *C = dyn_cast<Constant>(Mask);
695 if (!C || !C->isAllOnesValue())
696 Cmp = Builder.CreateAnd(Cmp, getX86MaskVec(Builder, Mask, NumElts));
700 for (unsigned i = 0; i != NumElts; ++i)
702 for (unsigned i = NumElts; i != 8; ++i)
703 Indices[i] = NumElts + i % NumElts;
704 Cmp = Builder.CreateShuffleVector(Cmp,
705 Constant::getNullValue(Cmp->getType()),
708 return Builder.CreateBitCast(Cmp, IntegerType::get(CI.getContext(),
709 std::max(NumElts, 8U)));
712 // Replace a masked intrinsic with an older unmasked intrinsic.
713 static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI,
715 Function *F = CI.getCalledFunction();
716 Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID);
717 Value *Rep = Builder.CreateCall(Intrin,
718 { CI.getArgOperand(0), CI.getArgOperand(1) });
719 return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
722 static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
723 Value* A = CI.getArgOperand(0);
724 Value* B = CI.getArgOperand(1);
725 Value* Src = CI.getArgOperand(2);
726 Value* Mask = CI.getArgOperand(3);
728 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
729 Value* Cmp = Builder.CreateIsNotNull(AndNode);
730 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
731 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
732 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
733 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
736 /// Upgrade a call to an old intrinsic. All argument and return casting must be
737 /// provided to seamlessly integrate with existing context.
738 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
739 Function *F = CI->getCalledFunction();
740 LLVMContext &C = CI->getContext();
741 IRBuilder<> Builder(C);
742 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
744 assert(F && "Intrinsic call is not direct?");
747 // Get the Function's name.
748 StringRef Name = F->getName();
750 assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
751 Name = Name.substr(5);
753 bool IsX86 = Name.startswith("x86.");
755 Name = Name.substr(4);
757 if (IsX86 && Name.startswith("sse4a.movnt.")) {
758 Module *M = F->getParent();
759 SmallVector<Metadata *, 1> Elts;
761 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
762 MDNode *Node = MDNode::get(C, Elts);
764 Value *Arg0 = CI->getArgOperand(0);
765 Value *Arg1 = CI->getArgOperand(1);
767 // Nontemporal (unaligned) store of the 0'th element of the float/double
769 Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
770 PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
771 Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
773 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
775 StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, 1);
776 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
779 CI->eraseFromParent();
783 if (IsX86 && (Name.startswith("avx.movnt.") ||
784 Name.startswith("avx512.storent."))) {
785 Module *M = F->getParent();
786 SmallVector<Metadata *, 1> Elts;
788 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
789 MDNode *Node = MDNode::get(C, Elts);
791 Value *Arg0 = CI->getArgOperand(0);
792 Value *Arg1 = CI->getArgOperand(1);
794 // Convert the type of the pointer to a pointer to the stored type.
795 Value *BC = Builder.CreateBitCast(Arg0,
796 PointerType::getUnqual(Arg1->getType()),
798 VectorType *VTy = cast<VectorType>(Arg1->getType());
799 StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC,
800 VTy->getBitWidth() / 8);
801 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
804 CI->eraseFromParent();
808 if (IsX86 && Name == "sse2.storel.dq") {
809 Value *Arg0 = CI->getArgOperand(0);
810 Value *Arg1 = CI->getArgOperand(1);
812 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
813 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
814 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
815 Value *BC = Builder.CreateBitCast(Arg0,
816 PointerType::getUnqual(Elt->getType()),
818 Builder.CreateAlignedStore(Elt, BC, 1);
821 CI->eraseFromParent();
825 if (IsX86 && (Name.startswith("sse.storeu.") ||
826 Name.startswith("sse2.storeu.") ||
827 Name.startswith("avx.storeu."))) {
828 Value *Arg0 = CI->getArgOperand(0);
829 Value *Arg1 = CI->getArgOperand(1);
831 Arg0 = Builder.CreateBitCast(Arg0,
832 PointerType::getUnqual(Arg1->getType()),
834 Builder.CreateAlignedStore(Arg1, Arg0, 1);
837 CI->eraseFromParent();
841 if (IsX86 && (Name.startswith("avx512.mask.storeu."))) {
842 UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
843 CI->getArgOperand(2), /*Aligned*/false);
846 CI->eraseFromParent();
850 if (IsX86 && (Name.startswith("avx512.mask.store."))) {
851 UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
852 CI->getArgOperand(2), /*Aligned*/true);
855 CI->eraseFromParent();
860 // Upgrade packed integer vector compare intrinsics to compare instructions.
861 if (IsX86 && (Name.startswith("sse2.pcmpeq.") ||
862 Name.startswith("avx2.pcmpeq."))) {
863 Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1),
865 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
866 } else if (IsX86 && (Name.startswith("sse2.pcmpgt.") ||
867 Name.startswith("avx2.pcmpgt."))) {
868 Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1),
870 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
871 } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd")) {
872 Type *I32Ty = Type::getInt32Ty(C);
873 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
874 ConstantInt::get(I32Ty, 0));
875 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
876 ConstantInt::get(I32Ty, 0));
877 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
878 Builder.CreateFAdd(Elt0, Elt1),
879 ConstantInt::get(I32Ty, 0));
880 } else if (IsX86 && (Name == "sse.sub.ss" || Name == "sse2.sub.sd")) {
881 Type *I32Ty = Type::getInt32Ty(C);
882 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
883 ConstantInt::get(I32Ty, 0));
884 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
885 ConstantInt::get(I32Ty, 0));
886 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
887 Builder.CreateFSub(Elt0, Elt1),
888 ConstantInt::get(I32Ty, 0));
889 } else if (IsX86 && (Name == "sse.mul.ss" || Name == "sse2.mul.sd")) {
890 Type *I32Ty = Type::getInt32Ty(C);
891 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
892 ConstantInt::get(I32Ty, 0));
893 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
894 ConstantInt::get(I32Ty, 0));
895 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
896 Builder.CreateFMul(Elt0, Elt1),
897 ConstantInt::get(I32Ty, 0));
898 } else if (IsX86 && (Name == "sse.div.ss" || Name == "sse2.div.sd")) {
899 Type *I32Ty = Type::getInt32Ty(C);
900 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
901 ConstantInt::get(I32Ty, 0));
902 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
903 ConstantInt::get(I32Ty, 0));
904 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
905 Builder.CreateFDiv(Elt0, Elt1),
906 ConstantInt::get(I32Ty, 0));
907 } else if (IsX86 && Name.startswith("avx512.mask.pcmpeq.")) {
908 Rep = upgradeMaskedCompare(Builder, *CI, ICmpInst::ICMP_EQ);
909 } else if (IsX86 && Name.startswith("avx512.mask.pcmpgt.")) {
910 Rep = upgradeMaskedCompare(Builder, *CI, ICmpInst::ICMP_SGT);
911 } else if (IsX86 && (Name == "sse41.pmaxsb" ||
912 Name == "sse2.pmaxs.w" ||
913 Name == "sse41.pmaxsd" ||
914 Name.startswith("avx2.pmaxs") ||
915 Name.startswith("avx512.mask.pmaxs"))) {
916 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
917 } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
918 Name == "sse41.pmaxuw" ||
919 Name == "sse41.pmaxud" ||
920 Name.startswith("avx2.pmaxu") ||
921 Name.startswith("avx512.mask.pmaxu"))) {
922 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
923 } else if (IsX86 && (Name == "sse41.pminsb" ||
924 Name == "sse2.pmins.w" ||
925 Name == "sse41.pminsd" ||
926 Name.startswith("avx2.pmins") ||
927 Name.startswith("avx512.mask.pmins"))) {
928 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
929 } else if (IsX86 && (Name == "sse2.pminu.b" ||
930 Name == "sse41.pminuw" ||
931 Name == "sse41.pminud" ||
932 Name.startswith("avx2.pminu") ||
933 Name.startswith("avx512.mask.pminu"))) {
934 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
935 } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
936 Name == "sse2.cvtps2pd" ||
937 Name == "avx.cvtdq2.pd.256" ||
938 Name == "avx.cvt.ps2.pd.256" ||
939 Name.startswith("avx512.mask.cvtdq2pd.") ||
940 Name.startswith("avx512.mask.cvtudq2pd."))) {
941 // Lossless i32/float to double conversion.
942 // Extract the bottom elements if necessary and convert to double vector.
943 Value *Src = CI->getArgOperand(0);
944 VectorType *SrcTy = cast<VectorType>(Src->getType());
945 VectorType *DstTy = cast<VectorType>(CI->getType());
946 Rep = CI->getArgOperand(0);
948 unsigned NumDstElts = DstTy->getNumElements();
949 if (NumDstElts < SrcTy->getNumElements()) {
950 assert(NumDstElts == 2 && "Unexpected vector size");
951 uint32_t ShuffleMask[2] = { 0, 1 };
952 Rep = Builder.CreateShuffleVector(Rep, UndefValue::get(SrcTy),
956 bool SInt2Double = (StringRef::npos != Name.find("cvtdq2"));
957 bool UInt2Double = (StringRef::npos != Name.find("cvtudq2"));
959 Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd");
960 else if (UInt2Double)
961 Rep = Builder.CreateUIToFP(Rep, DstTy, "cvtudq2pd");
963 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
965 if (CI->getNumArgOperands() == 3)
966 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
967 CI->getArgOperand(1));
968 } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
969 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
970 CI->getArgOperand(1), CI->getArgOperand(2),
972 } else if (IsX86 && (Name.startswith("avx512.mask.load."))) {
973 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
974 CI->getArgOperand(1),CI->getArgOperand(2),
976 } else if (IsX86 && Name.startswith("xop.vpcom")) {
978 if (Name.endswith("ub"))
979 intID = Intrinsic::x86_xop_vpcomub;
980 else if (Name.endswith("uw"))
981 intID = Intrinsic::x86_xop_vpcomuw;
982 else if (Name.endswith("ud"))
983 intID = Intrinsic::x86_xop_vpcomud;
984 else if (Name.endswith("uq"))
985 intID = Intrinsic::x86_xop_vpcomuq;
986 else if (Name.endswith("b"))
987 intID = Intrinsic::x86_xop_vpcomb;
988 else if (Name.endswith("w"))
989 intID = Intrinsic::x86_xop_vpcomw;
990 else if (Name.endswith("d"))
991 intID = Intrinsic::x86_xop_vpcomd;
992 else if (Name.endswith("q"))
993 intID = Intrinsic::x86_xop_vpcomq;
995 llvm_unreachable("Unknown suffix");
997 Name = Name.substr(9); // strip off "xop.vpcom"
999 if (Name.startswith("lt"))
1001 else if (Name.startswith("le"))
1003 else if (Name.startswith("gt"))
1005 else if (Name.startswith("ge"))
1007 else if (Name.startswith("eq"))
1009 else if (Name.startswith("ne"))
1011 else if (Name.startswith("false"))
1013 else if (Name.startswith("true"))
1016 llvm_unreachable("Unknown condition");
1018 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
1020 Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
1021 Builder.getInt8(Imm)});
1022 } else if (IsX86 && Name == "xop.vpcmov") {
1023 Value *Arg0 = CI->getArgOperand(0);
1024 Value *Arg1 = CI->getArgOperand(1);
1025 Value *Sel = CI->getArgOperand(2);
1026 unsigned NumElts = CI->getType()->getVectorNumElements();
1027 Constant *MinusOne = ConstantVector::getSplat(NumElts, Builder.getInt64(-1));
1028 Value *NotSel = Builder.CreateXor(Sel, MinusOne);
1029 Value *Sel0 = Builder.CreateAnd(Arg0, Sel);
1030 Value *Sel1 = Builder.CreateAnd(Arg1, NotSel);
1031 Rep = Builder.CreateOr(Sel0, Sel1);
1032 } else if (IsX86 && Name == "sse42.crc32.64.8") {
1033 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
1034 Intrinsic::x86_sse42_crc32_32_8);
1035 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
1036 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
1037 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
1038 } else if (IsX86 && Name.startswith("avx.vbroadcast.s")) {
1039 // Replace broadcasts with a series of insertelements.
1040 Type *VecTy = CI->getType();
1041 Type *EltTy = VecTy->getVectorElementType();
1042 unsigned EltNum = VecTy->getVectorNumElements();
1043 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
1044 EltTy->getPointerTo());
1045 Value *Load = Builder.CreateLoad(EltTy, Cast);
1046 Type *I32Ty = Type::getInt32Ty(C);
1047 Rep = UndefValue::get(VecTy);
1048 for (unsigned I = 0; I < EltNum; ++I)
1049 Rep = Builder.CreateInsertElement(Rep, Load,
1050 ConstantInt::get(I32Ty, I));
1051 } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
1052 Name.startswith("sse41.pmovzx") ||
1053 Name.startswith("avx2.pmovsx") ||
1054 Name.startswith("avx2.pmovzx") ||
1055 Name.startswith("avx512.mask.pmovsx") ||
1056 Name.startswith("avx512.mask.pmovzx"))) {
1057 VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
1058 VectorType *DstTy = cast<VectorType>(CI->getType());
1059 unsigned NumDstElts = DstTy->getNumElements();
1061 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
1062 SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
1063 for (unsigned i = 0; i != NumDstElts; ++i)
1066 Value *SV = Builder.CreateShuffleVector(
1067 CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
1069 bool DoSext = (StringRef::npos != Name.find("pmovsx"));
1070 Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
1071 : Builder.CreateZExt(SV, DstTy);
1072 // If there are 3 arguments, it's a masked intrinsic so we need a select.
1073 if (CI->getNumArgOperands() == 3)
1074 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1075 CI->getArgOperand(1));
1076 } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
1077 Name == "avx2.vbroadcasti128")) {
1078 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
1079 Type *EltTy = CI->getType()->getVectorElementType();
1080 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
1081 Type *VT = VectorType::get(EltTy, NumSrcElts);
1082 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
1083 PointerType::getUnqual(VT));
1084 Value *Load = Builder.CreateAlignedLoad(Op, 1);
1085 if (NumSrcElts == 2)
1086 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
1089 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
1090 { 0, 1, 2, 3, 0, 1, 2, 3 });
1091 } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
1092 Name.startswith("avx2.vbroadcast") ||
1093 Name.startswith("avx512.pbroadcast") ||
1094 Name.startswith("avx512.mask.broadcast.s"))) {
1095 // Replace vp?broadcasts with a vector shuffle.
1096 Value *Op = CI->getArgOperand(0);
1097 unsigned NumElts = CI->getType()->getVectorNumElements();
1098 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
1099 Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
1100 Constant::getNullValue(MaskTy));
1102 if (CI->getNumArgOperands() == 3)
1103 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1104 CI->getArgOperand(1));
1105 } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
1106 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
1107 CI->getArgOperand(1),
1108 CI->getArgOperand(2),
1109 CI->getArgOperand(3),
1110 CI->getArgOperand(4),
1112 } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
1113 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
1114 CI->getArgOperand(1),
1115 CI->getArgOperand(2),
1116 CI->getArgOperand(3),
1117 CI->getArgOperand(4),
1119 } else if (IsX86 && (Name == "sse2.psll.dq" ||
1120 Name == "avx2.psll.dq")) {
1121 // 128/256-bit shift left specified in bits.
1122 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1123 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
1124 Shift / 8); // Shift is in bits.
1125 } else if (IsX86 && (Name == "sse2.psrl.dq" ||
1126 Name == "avx2.psrl.dq")) {
1127 // 128/256-bit shift right specified in bits.
1128 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1129 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
1130 Shift / 8); // Shift is in bits.
1131 } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
1132 Name == "avx2.psll.dq.bs" ||
1133 Name == "avx512.psll.dq.512")) {
1134 // 128/256/512-bit shift left specified in bytes.
1135 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1136 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
1137 } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
1138 Name == "avx2.psrl.dq.bs" ||
1139 Name == "avx512.psrl.dq.512")) {
1140 // 128/256/512-bit shift right specified in bytes.
1141 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1142 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
1143 } else if (IsX86 && (Name == "sse41.pblendw" ||
1144 Name.startswith("sse41.blendp") ||
1145 Name.startswith("avx.blend.p") ||
1146 Name == "avx2.pblendw" ||
1147 Name.startswith("avx2.pblendd."))) {
1148 Value *Op0 = CI->getArgOperand(0);
1149 Value *Op1 = CI->getArgOperand(1);
1150 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1151 VectorType *VecTy = cast<VectorType>(CI->getType());
1152 unsigned NumElts = VecTy->getNumElements();
1154 SmallVector<uint32_t, 16> Idxs(NumElts);
1155 for (unsigned i = 0; i != NumElts; ++i)
1156 Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
1158 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1159 } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
1160 Name == "avx2.vinserti128" ||
1161 Name.startswith("avx512.mask.insert"))) {
1162 Value *Op0 = CI->getArgOperand(0);
1163 Value *Op1 = CI->getArgOperand(1);
1164 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1165 unsigned DstNumElts = CI->getType()->getVectorNumElements();
1166 unsigned SrcNumElts = Op1->getType()->getVectorNumElements();
1167 unsigned Scale = DstNumElts / SrcNumElts;
1169 // Mask off the high bits of the immediate value; hardware ignores those.
1172 // Extend the second operand into a vector the size of the destination.
1173 Value *UndefV = UndefValue::get(Op1->getType());
1174 SmallVector<uint32_t, 8> Idxs(DstNumElts);
1175 for (unsigned i = 0; i != SrcNumElts; ++i)
1177 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
1178 Idxs[i] = SrcNumElts;
1179 Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);
1181 // Insert the second operand into the first operand.
1183 // Note that there is no guarantee that instruction lowering will actually
1184 // produce a vinsertf128 instruction for the created shuffles. In
1185 // particular, the 0 immediate case involves no lane changes, so it can
1186 // be handled as a blend.
1188 // Example of shuffle mask for 32-bit elements:
1189 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
1190 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
1192 // First fill with identify mask.
1193 for (unsigned i = 0; i != DstNumElts; ++i)
1195 // Then replace the elements where we need to insert.
1196 for (unsigned i = 0; i != SrcNumElts; ++i)
1197 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
1198 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
1200 // If the intrinsic has a mask operand, handle that.
1201 if (CI->getNumArgOperands() == 5)
1202 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
1203 CI->getArgOperand(3));
1204 } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
1205 Name == "avx2.vextracti128" ||
1206 Name.startswith("avx512.mask.vextract"))) {
1207 Value *Op0 = CI->getArgOperand(0);
1208 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1209 unsigned DstNumElts = CI->getType()->getVectorNumElements();
1210 unsigned SrcNumElts = Op0->getType()->getVectorNumElements();
1211 unsigned Scale = SrcNumElts / DstNumElts;
1213 // Mask off the high bits of the immediate value; hardware ignores those.
1216 // Get indexes for the subvector of the input vector.
1217 SmallVector<uint32_t, 8> Idxs(DstNumElts);
1218 for (unsigned i = 0; i != DstNumElts; ++i) {
1219 Idxs[i] = i + (Imm * DstNumElts);
1221 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1223 // If the intrinsic has a mask operand, handle that.
1224 if (CI->getNumArgOperands() == 4)
1225 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1226 CI->getArgOperand(2));
1227 } else if (!IsX86 && Name == "stackprotectorcheck") {
1229 } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
1230 Name.startswith("avx512.mask.perm.di."))) {
1231 Value *Op0 = CI->getArgOperand(0);
1232 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1233 VectorType *VecTy = cast<VectorType>(CI->getType());
1234 unsigned NumElts = VecTy->getNumElements();
1236 SmallVector<uint32_t, 8> Idxs(NumElts);
1237 for (unsigned i = 0; i != NumElts; ++i)
1238 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
1240 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1242 if (CI->getNumArgOperands() == 4)
1243 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1244 CI->getArgOperand(2));
1245 } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
1246 Name == "sse2.pshuf.d" ||
1247 Name.startswith("avx512.mask.vpermil.p") ||
1248 Name.startswith("avx512.mask.pshuf.d."))) {
1249 Value *Op0 = CI->getArgOperand(0);
1250 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1251 VectorType *VecTy = cast<VectorType>(CI->getType());
1252 unsigned NumElts = VecTy->getNumElements();
1253 // Calculate the size of each index in the immediate.
1254 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
1255 unsigned IdxMask = ((1 << IdxSize) - 1);
1257 SmallVector<uint32_t, 8> Idxs(NumElts);
1258 // Lookup the bits for this element, wrapping around the immediate every
1259 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
1260 // to offset by the first index of each group.
1261 for (unsigned i = 0; i != NumElts; ++i)
1262 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
1264 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1266 if (CI->getNumArgOperands() == 4)
1267 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1268 CI->getArgOperand(2));
1269 } else if (IsX86 && (Name == "sse2.pshufl.w" ||
1270 Name.startswith("avx512.mask.pshufl.w."))) {
1271 Value *Op0 = CI->getArgOperand(0);
1272 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1273 unsigned NumElts = CI->getType()->getVectorNumElements();
1275 SmallVector<uint32_t, 16> Idxs(NumElts);
1276 for (unsigned l = 0; l != NumElts; l += 8) {
1277 for (unsigned i = 0; i != 4; ++i)
1278 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
1279 for (unsigned i = 4; i != 8; ++i)
1280 Idxs[i + l] = i + l;
1283 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1285 if (CI->getNumArgOperands() == 4)
1286 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1287 CI->getArgOperand(2));
1288 } else if (IsX86 && (Name == "sse2.pshufh.w" ||
1289 Name.startswith("avx512.mask.pshufh.w."))) {
1290 Value *Op0 = CI->getArgOperand(0);
1291 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1292 unsigned NumElts = CI->getType()->getVectorNumElements();
1294 SmallVector<uint32_t, 16> Idxs(NumElts);
1295 for (unsigned l = 0; l != NumElts; l += 8) {
1296 for (unsigned i = 0; i != 4; ++i)
1297 Idxs[i + l] = i + l;
1298 for (unsigned i = 0; i != 4; ++i)
1299 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
1302 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1304 if (CI->getNumArgOperands() == 4)
1305 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1306 CI->getArgOperand(2));
1307 } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
1308 Value *Op0 = CI->getArgOperand(0);
1309 Value *Op1 = CI->getArgOperand(1);
1310 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1311 unsigned NumElts = CI->getType()->getVectorNumElements();
1313 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1314 unsigned HalfLaneElts = NumLaneElts / 2;
1316 SmallVector<uint32_t, 16> Idxs(NumElts);
1317 for (unsigned i = 0; i != NumElts; ++i) {
1318 // Base index is the starting element of the lane.
1319 Idxs[i] = i - (i % NumLaneElts);
1320 // If we are half way through the lane switch to the other source.
1321 if ((i % NumLaneElts) >= HalfLaneElts)
1323 // Now select the specific element. By adding HalfLaneElts bits from
1324 // the immediate. Wrapping around the immediate every 8-bits.
1325 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
1328 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1330 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
1331 CI->getArgOperand(3));
1332 } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
1333 Name.startswith("avx512.mask.movshdup") ||
1334 Name.startswith("avx512.mask.movsldup"))) {
1335 Value *Op0 = CI->getArgOperand(0);
1336 unsigned NumElts = CI->getType()->getVectorNumElements();
1337 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1339 unsigned Offset = 0;
1340 if (Name.startswith("avx512.mask.movshdup."))
1343 SmallVector<uint32_t, 16> Idxs(NumElts);
1344 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
1345 for (unsigned i = 0; i != NumLaneElts; i += 2) {
1346 Idxs[i + l + 0] = i + l + Offset;
1347 Idxs[i + l + 1] = i + l + Offset;
1350 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1352 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1353 CI->getArgOperand(1));
1354 } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
1355 Name.startswith("avx512.mask.unpckl."))) {
1356 Value *Op0 = CI->getArgOperand(0);
1357 Value *Op1 = CI->getArgOperand(1);
1358 int NumElts = CI->getType()->getVectorNumElements();
1359 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1361 SmallVector<uint32_t, 64> Idxs(NumElts);
1362 for (int l = 0; l != NumElts; l += NumLaneElts)
1363 for (int i = 0; i != NumLaneElts; ++i)
1364 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
1366 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1368 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1369 CI->getArgOperand(2));
1370 } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
1371 Name.startswith("avx512.mask.unpckh."))) {
1372 Value *Op0 = CI->getArgOperand(0);
1373 Value *Op1 = CI->getArgOperand(1);
1374 int NumElts = CI->getType()->getVectorNumElements();
1375 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1377 SmallVector<uint32_t, 64> Idxs(NumElts);
1378 for (int l = 0; l != NumElts; l += NumLaneElts)
1379 for (int i = 0; i != NumLaneElts; ++i)
1380 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
1382 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1384 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1385 CI->getArgOperand(2));
1386 } else if (IsX86 && Name.startswith("avx512.mask.pand.")) {
1387 Rep = Builder.CreateAnd(CI->getArgOperand(0), CI->getArgOperand(1));
1388 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1389 CI->getArgOperand(2));
1390 } else if (IsX86 && Name.startswith("avx512.mask.pandn.")) {
1391 Rep = Builder.CreateAnd(Builder.CreateNot(CI->getArgOperand(0)),
1392 CI->getArgOperand(1));
1393 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1394 CI->getArgOperand(2));
1395 } else if (IsX86 && Name.startswith("avx512.mask.por.")) {
1396 Rep = Builder.CreateOr(CI->getArgOperand(0), CI->getArgOperand(1));
1397 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1398 CI->getArgOperand(2));
1399 } else if (IsX86 && Name.startswith("avx512.mask.pxor.")) {
1400 Rep = Builder.CreateXor(CI->getArgOperand(0), CI->getArgOperand(1));
1401 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1402 CI->getArgOperand(2));
1403 } else if (IsX86 && Name.startswith("avx512.mask.and.")) {
1404 VectorType *FTy = cast<VectorType>(CI->getType());
1405 VectorType *ITy = VectorType::getInteger(FTy);
1406 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
1407 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1408 Rep = Builder.CreateBitCast(Rep, FTy);
1409 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1410 CI->getArgOperand(2));
1411 } else if (IsX86 && Name.startswith("avx512.mask.andn.")) {
1412 VectorType *FTy = cast<VectorType>(CI->getType());
1413 VectorType *ITy = VectorType::getInteger(FTy);
1414 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
1415 Rep = Builder.CreateAnd(Rep,
1416 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1417 Rep = Builder.CreateBitCast(Rep, FTy);
1418 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1419 CI->getArgOperand(2));
1420 } else if (IsX86 && Name.startswith("avx512.mask.or.")) {
1421 VectorType *FTy = cast<VectorType>(CI->getType());
1422 VectorType *ITy = VectorType::getInteger(FTy);
1423 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
1424 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1425 Rep = Builder.CreateBitCast(Rep, FTy);
1426 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1427 CI->getArgOperand(2));
1428 } else if (IsX86 && Name.startswith("avx512.mask.xor.")) {
1429 VectorType *FTy = cast<VectorType>(CI->getType());
1430 VectorType *ITy = VectorType::getInteger(FTy);
1431 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
1432 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1433 Rep = Builder.CreateBitCast(Rep, FTy);
1434 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1435 CI->getArgOperand(2));
1436 } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
1437 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
1438 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1439 CI->getArgOperand(2));
1440 } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
1441 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
1442 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1443 CI->getArgOperand(2));
1444 } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
1445 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
1446 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1447 CI->getArgOperand(2));
1448 } else if (IsX86 && (Name.startswith("avx512.mask.add.p"))) {
1449 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
1450 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1451 CI->getArgOperand(2));
1452 } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
1453 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
1454 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1455 CI->getArgOperand(2));
1456 } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
1457 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
1458 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1459 CI->getArgOperand(2));
1460 } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
1461 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
1462 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1463 CI->getArgOperand(2));
1464 } else if (IsX86 && Name.startswith("avx512.mask.pshuf.b.")) {
1465 VectorType *VecTy = cast<VectorType>(CI->getType());
1467 if (VecTy->getPrimitiveSizeInBits() == 128)
1468 IID = Intrinsic::x86_ssse3_pshuf_b_128;
1469 else if (VecTy->getPrimitiveSizeInBits() == 256)
1470 IID = Intrinsic::x86_avx2_pshuf_b;
1471 else if (VecTy->getPrimitiveSizeInBits() == 512)
1472 IID = Intrinsic::x86_avx512_pshuf_b_512;
1474 llvm_unreachable("Unexpected intrinsic");
1476 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1477 { CI->getArgOperand(0), CI->getArgOperand(1) });
1478 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1479 CI->getArgOperand(2));
1480 } else if (IsX86 && (Name.startswith("avx512.mask.pmul.dq.") ||
1481 Name.startswith("avx512.mask.pmulu.dq."))) {
1482 bool IsUnsigned = Name[16] == 'u';
1483 VectorType *VecTy = cast<VectorType>(CI->getType());
1485 if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 128)
1486 IID = Intrinsic::x86_sse41_pmuldq;
1487 else if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 256)
1488 IID = Intrinsic::x86_avx2_pmul_dq;
1489 else if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 512)
1490 IID = Intrinsic::x86_avx512_pmul_dq_512;
1491 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 128)
1492 IID = Intrinsic::x86_sse2_pmulu_dq;
1493 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 256)
1494 IID = Intrinsic::x86_avx2_pmulu_dq;
1495 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 512)
1496 IID = Intrinsic::x86_avx512_pmulu_dq_512;
1498 llvm_unreachable("Unexpected intrinsic");
1500 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1501 { CI->getArgOperand(0), CI->getArgOperand(1) });
1502 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1503 CI->getArgOperand(2));
1504 } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
1505 bool IsImmediate = Name[16] == 'i' ||
1506 (Name.size() > 18 && Name[18] == 'i');
1507 bool IsVariable = Name[16] == 'v';
1508 char Size = Name[16] == '.' ? Name[17] :
1509 Name[17] == '.' ? Name[18] :
1510 Name[18] == '.' ? Name[19] :
1514 if (IsVariable && Name[17] != '.') {
1515 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
1516 IID = Intrinsic::x86_avx2_psllv_q;
1517 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
1518 IID = Intrinsic::x86_avx2_psllv_q_256;
1519 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
1520 IID = Intrinsic::x86_avx2_psllv_d;
1521 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
1522 IID = Intrinsic::x86_avx2_psllv_d_256;
1523 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
1524 IID = Intrinsic::x86_avx512_psllv_w_128;
1525 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
1526 IID = Intrinsic::x86_avx512_psllv_w_256;
1527 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
1528 IID = Intrinsic::x86_avx512_psllv_w_512;
1530 llvm_unreachable("Unexpected size");
1531 } else if (Name.endswith(".128")) {
1532 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
1533 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
1534 : Intrinsic::x86_sse2_psll_d;
1535 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
1536 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
1537 : Intrinsic::x86_sse2_psll_q;
1538 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
1539 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
1540 : Intrinsic::x86_sse2_psll_w;
1542 llvm_unreachable("Unexpected size");
1543 } else if (Name.endswith(".256")) {
1544 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
1545 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
1546 : Intrinsic::x86_avx2_psll_d;
1547 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
1548 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
1549 : Intrinsic::x86_avx2_psll_q;
1550 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
1551 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
1552 : Intrinsic::x86_avx2_psll_w;
1554 llvm_unreachable("Unexpected size");
1556 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
1557 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
1558 IsVariable ? Intrinsic::x86_avx512_psllv_d_512 :
1559 Intrinsic::x86_avx512_psll_d_512;
1560 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
1561 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
1562 IsVariable ? Intrinsic::x86_avx512_psllv_q_512 :
1563 Intrinsic::x86_avx512_psll_q_512;
1564 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
1565 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
1566 : Intrinsic::x86_avx512_psll_w_512;
1568 llvm_unreachable("Unexpected size");
1571 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
1572 } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
1573 bool IsImmediate = Name[16] == 'i' ||
1574 (Name.size() > 18 && Name[18] == 'i');
1575 bool IsVariable = Name[16] == 'v';
1576 char Size = Name[16] == '.' ? Name[17] :
1577 Name[17] == '.' ? Name[18] :
1578 Name[18] == '.' ? Name[19] :
1582 if (IsVariable && Name[17] != '.') {
1583 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
1584 IID = Intrinsic::x86_avx2_psrlv_q;
1585 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
1586 IID = Intrinsic::x86_avx2_psrlv_q_256;
1587 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
1588 IID = Intrinsic::x86_avx2_psrlv_d;
1589 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
1590 IID = Intrinsic::x86_avx2_psrlv_d_256;
1591 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
1592 IID = Intrinsic::x86_avx512_psrlv_w_128;
1593 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
1594 IID = Intrinsic::x86_avx512_psrlv_w_256;
1595 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
1596 IID = Intrinsic::x86_avx512_psrlv_w_512;
1598 llvm_unreachable("Unexpected size");
1599 } else if (Name.endswith(".128")) {
1600 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
1601 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
1602 : Intrinsic::x86_sse2_psrl_d;
1603 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
1604 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
1605 : Intrinsic::x86_sse2_psrl_q;
1606 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
1607 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
1608 : Intrinsic::x86_sse2_psrl_w;
1610 llvm_unreachable("Unexpected size");
1611 } else if (Name.endswith(".256")) {
1612 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
1613 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
1614 : Intrinsic::x86_avx2_psrl_d;
1615 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
1616 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
1617 : Intrinsic::x86_avx2_psrl_q;
1618 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
1619 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
1620 : Intrinsic::x86_avx2_psrl_w;
1622 llvm_unreachable("Unexpected size");
1624 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
1625 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
1626 IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 :
1627 Intrinsic::x86_avx512_psrl_d_512;
1628 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
1629 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
1630 IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 :
1631 Intrinsic::x86_avx512_psrl_q_512;
1632 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
1633 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
1634 : Intrinsic::x86_avx512_psrl_w_512;
1636 llvm_unreachable("Unexpected size");
1639 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
1640 } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
1641 bool IsImmediate = Name[16] == 'i' ||
1642 (Name.size() > 18 && Name[18] == 'i');
1643 bool IsVariable = Name[16] == 'v';
1644 char Size = Name[16] == '.' ? Name[17] :
1645 Name[17] == '.' ? Name[18] :
1646 Name[18] == '.' ? Name[19] :
1650 if (IsVariable && Name[17] != '.') {
1651 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
1652 IID = Intrinsic::x86_avx2_psrav_d;
1653 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
1654 IID = Intrinsic::x86_avx2_psrav_d_256;
1655 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
1656 IID = Intrinsic::x86_avx512_psrav_w_128;
1657 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
1658 IID = Intrinsic::x86_avx512_psrav_w_256;
1659 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
1660 IID = Intrinsic::x86_avx512_psrav_w_512;
1662 llvm_unreachable("Unexpected size");
1663 } else if (Name.endswith(".128")) {
1664 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
1665 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
1666 : Intrinsic::x86_sse2_psra_d;
1667 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
1668 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
1669 IsVariable ? Intrinsic::x86_avx512_psrav_q_128 :
1670 Intrinsic::x86_avx512_psra_q_128;
1671 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
1672 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
1673 : Intrinsic::x86_sse2_psra_w;
1675 llvm_unreachable("Unexpected size");
1676 } else if (Name.endswith(".256")) {
1677 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
1678 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
1679 : Intrinsic::x86_avx2_psra_d;
1680 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
1681 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
1682 IsVariable ? Intrinsic::x86_avx512_psrav_q_256 :
1683 Intrinsic::x86_avx512_psra_q_256;
1684 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
1685 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
1686 : Intrinsic::x86_avx2_psra_w;
1688 llvm_unreachable("Unexpected size");
1690 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
1691 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
1692 IsVariable ? Intrinsic::x86_avx512_psrav_d_512 :
1693 Intrinsic::x86_avx512_psra_d_512;
1694 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
1695 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
1696 IsVariable ? Intrinsic::x86_avx512_psrav_q_512 :
1697 Intrinsic::x86_avx512_psra_q_512;
1698 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
1699 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
1700 : Intrinsic::x86_avx512_psra_w_512;
1702 llvm_unreachable("Unexpected size");
1705 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
1706 } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
1707 Rep = upgradeMaskedMove(Builder, *CI);
1708 } else if (IsX86 && Name.startswith("avx512.mask.vpermilvar.")) {
1710 if (Name.endswith("ps.128"))
1711 IID = Intrinsic::x86_avx_vpermilvar_ps;
1712 else if (Name.endswith("pd.128"))
1713 IID = Intrinsic::x86_avx_vpermilvar_pd;
1714 else if (Name.endswith("ps.256"))
1715 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
1716 else if (Name.endswith("pd.256"))
1717 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
1718 else if (Name.endswith("ps.512"))
1719 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
1720 else if (Name.endswith("pd.512"))
1721 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
1723 llvm_unreachable("Unexpected vpermilvar intrinsic");
1725 Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID);
1726 Rep = Builder.CreateCall(Intrin,
1727 { CI->getArgOperand(0), CI->getArgOperand(1) });
1728 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1729 CI->getArgOperand(2));
1731 llvm_unreachable("Unknown function for CallInst upgrade.");
1735 CI->replaceAllUsesWith(Rep);
1736 CI->eraseFromParent();
1740 std::string Name = CI->getName();
1742 CI->setName(Name + ".old");
1744 switch (NewFn->getIntrinsicID()) {
1746 llvm_unreachable("Unknown function for CallInst upgrade.");
1748 case Intrinsic::arm_neon_vld1:
1749 case Intrinsic::arm_neon_vld2:
1750 case Intrinsic::arm_neon_vld3:
1751 case Intrinsic::arm_neon_vld4:
1752 case Intrinsic::arm_neon_vld2lane:
1753 case Intrinsic::arm_neon_vld3lane:
1754 case Intrinsic::arm_neon_vld4lane:
1755 case Intrinsic::arm_neon_vst1:
1756 case Intrinsic::arm_neon_vst2:
1757 case Intrinsic::arm_neon_vst3:
1758 case Intrinsic::arm_neon_vst4:
1759 case Intrinsic::arm_neon_vst2lane:
1760 case Intrinsic::arm_neon_vst3lane:
1761 case Intrinsic::arm_neon_vst4lane: {
1762 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
1763 CI->arg_operands().end());
1764 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args));
1765 CI->eraseFromParent();
1769 case Intrinsic::bitreverse:
1770 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {CI->getArgOperand(0)}));
1771 CI->eraseFromParent();
1774 case Intrinsic::ctlz:
1775 case Intrinsic::cttz:
1776 assert(CI->getNumArgOperands() == 1 &&
1777 "Mismatch between function args and call args");
1778 CI->replaceAllUsesWith(Builder.CreateCall(
1779 NewFn, {CI->getArgOperand(0), Builder.getFalse()}, Name));
1780 CI->eraseFromParent();
1783 case Intrinsic::objectsize:
1784 CI->replaceAllUsesWith(Builder.CreateCall(
1785 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)}, Name));
1786 CI->eraseFromParent();
1789 case Intrinsic::ctpop: {
1790 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {CI->getArgOperand(0)}));
1791 CI->eraseFromParent();
1795 case Intrinsic::x86_xop_vfrcz_ss:
1796 case Intrinsic::x86_xop_vfrcz_sd:
1797 CI->replaceAllUsesWith(
1798 Builder.CreateCall(NewFn, {CI->getArgOperand(1)}, Name));
1799 CI->eraseFromParent();
1802 case Intrinsic::x86_xop_vpermil2pd:
1803 case Intrinsic::x86_xop_vpermil2ps:
1804 case Intrinsic::x86_xop_vpermil2pd_256:
1805 case Intrinsic::x86_xop_vpermil2ps_256: {
1806 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
1807 CI->arg_operands().end());
1808 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
1809 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
1810 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
1811 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args, Name));
1812 CI->eraseFromParent();
1816 case Intrinsic::x86_sse41_ptestc:
1817 case Intrinsic::x86_sse41_ptestz:
1818 case Intrinsic::x86_sse41_ptestnzc: {
1819 // The arguments for these intrinsics used to be v4f32, and changed
1820 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
1821 // So, the only thing required is a bitcast for both arguments.
1822 // First, check the arguments have the old type.
1823 Value *Arg0 = CI->getArgOperand(0);
1824 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
1827 // Old intrinsic, add bitcasts
1828 Value *Arg1 = CI->getArgOperand(1);
1830 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
1832 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
1833 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
1835 CallInst *NewCall = Builder.CreateCall(NewFn, {BC0, BC1}, Name);
1836 CI->replaceAllUsesWith(NewCall);
1837 CI->eraseFromParent();
1841 case Intrinsic::x86_sse41_insertps:
1842 case Intrinsic::x86_sse41_dppd:
1843 case Intrinsic::x86_sse41_dpps:
1844 case Intrinsic::x86_sse41_mpsadbw:
1845 case Intrinsic::x86_avx_dp_ps_256:
1846 case Intrinsic::x86_avx2_mpsadbw: {
1847 // Need to truncate the last argument from i32 to i8 -- this argument models
1848 // an inherently 8-bit immediate operand to these x86 instructions.
1849 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
1850 CI->arg_operands().end());
1852 // Replace the last argument with a trunc.
1853 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
1855 CallInst *NewCall = Builder.CreateCall(NewFn, Args);
1856 CI->replaceAllUsesWith(NewCall);
1857 CI->eraseFromParent();
1861 case Intrinsic::thread_pointer: {
1862 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {}));
1863 CI->eraseFromParent();
1867 case Intrinsic::invariant_start:
1868 case Intrinsic::invariant_end:
1869 case Intrinsic::masked_load:
1870 case Intrinsic::masked_store: {
1871 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
1872 CI->arg_operands().end());
1873 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args));
1874 CI->eraseFromParent();
1880 void llvm::UpgradeCallsToIntrinsic(Function *F) {
1881 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
1883 // Check if this function should be upgraded and get the replacement function
1886 if (UpgradeIntrinsicFunction(F, NewFn)) {
1887 // Replace all users of the old function with the new function or new
1888 // instructions. This is not a range loop because the call is deleted.
1889 for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; )
1890 if (CallInst *CI = dyn_cast<CallInst>(*UI++))
1891 UpgradeIntrinsicCall(CI, NewFn);
1893 // Remove old function, no longer used, from the module.
1894 F->eraseFromParent();
1898 MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
1899 // Check if the tag uses struct-path aware TBAA format.
1900 if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
1903 auto &Context = MD.getContext();
1904 if (MD.getNumOperands() == 3) {
1905 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
1906 MDNode *ScalarType = MDNode::get(Context, Elts);
1907 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
1908 Metadata *Elts2[] = {ScalarType, ScalarType,
1909 ConstantAsMetadata::get(
1910 Constant::getNullValue(Type::getInt64Ty(Context))),
1912 return MDNode::get(Context, Elts2);
1914 // Create a MDNode <MD, MD, offset 0>
1915 Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
1916 Type::getInt64Ty(Context)))};
1917 return MDNode::get(Context, Elts);
1920 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
1921 Instruction *&Temp) {
1922 if (Opc != Instruction::BitCast)
1926 Type *SrcTy = V->getType();
1927 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
1928 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
1929 LLVMContext &Context = V->getContext();
1931 // We have no information about target data layout, so we assume that
1932 // the maximum pointer size is 64bit.
1933 Type *MidTy = Type::getInt64Ty(Context);
1934 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
1936 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
1942 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
1943 if (Opc != Instruction::BitCast)
1946 Type *SrcTy = C->getType();
1947 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
1948 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
1949 LLVMContext &Context = C->getContext();
1951 // We have no information about target data layout, so we assume that
1952 // the maximum pointer size is 64bit.
1953 Type *MidTy = Type::getInt64Ty(Context);
1955 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
1962 /// Check the debug info version number, if it is out-dated, drop the debug
1963 /// info. Return true if module is modified.
1964 bool llvm::UpgradeDebugInfo(Module &M) {
1965 unsigned Version = getDebugMetadataVersionFromModule(M);
1966 if (Version == DEBUG_METADATA_VERSION)
1969 bool RetCode = StripDebugInfo(M);
1971 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
1972 M.getContext().diagnose(DiagVersion);
1977 bool llvm::UpgradeModuleFlags(Module &M) {
1978 const NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
1982 bool HasObjCFlag = false, HasClassProperties = false;
1983 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
1984 MDNode *Op = ModFlags->getOperand(I);
1985 if (Op->getNumOperands() < 2)
1987 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
1990 if (ID->getString() == "Objective-C Image Info Version")
1992 if (ID->getString() == "Objective-C Class Properties")
1993 HasClassProperties = true;
1995 // "Objective-C Class Properties" is recently added for Objective-C. We
1996 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
1997 // flag of value 0, so we can correclty downgrade this flag when trying to
1998 // link an ObjC bitcode without this module flag with an ObjC bitcode with
1999 // this module flag.
2000 if (HasObjCFlag && !HasClassProperties) {
2001 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
2008 static bool isOldLoopArgument(Metadata *MD) {
2009 auto *T = dyn_cast_or_null<MDTuple>(MD);
2012 if (T->getNumOperands() < 1)
2014 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
2017 return S->getString().startswith("llvm.vectorizer.");
2020 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
2021 StringRef OldPrefix = "llvm.vectorizer.";
2022 assert(OldTag.startswith(OldPrefix) && "Expected old prefix");
2024 if (OldTag == "llvm.vectorizer.unroll")
2025 return MDString::get(C, "llvm.loop.interleave.count");
2027 return MDString::get(
2028 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
2032 static Metadata *upgradeLoopArgument(Metadata *MD) {
2033 auto *T = dyn_cast_or_null<MDTuple>(MD);
2036 if (T->getNumOperands() < 1)
2038 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
2041 if (!OldTag->getString().startswith("llvm.vectorizer."))
2044 // This has an old tag. Upgrade it.
2045 SmallVector<Metadata *, 8> Ops;
2046 Ops.reserve(T->getNumOperands());
2047 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
2048 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
2049 Ops.push_back(T->getOperand(I));
2051 return MDTuple::get(T->getContext(), Ops);
2054 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
2055 auto *T = dyn_cast<MDTuple>(&N);
2059 if (none_of(T->operands(), isOldLoopArgument))
2062 SmallVector<Metadata *, 8> Ops;
2063 Ops.reserve(T->getNumOperands());
2064 for (Metadata *MD : T->operands())
2065 Ops.push_back(upgradeLoopArgument(MD));
2067 return MDTuple::get(T->getContext(), Ops);