1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the auto-upgrade helper functions.
11 // This is where deprecated IR intrinsics and other IR features are updated to
12 // current specifications.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/IR/AutoUpgrade.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/IR/CFG.h"
19 #include "llvm/IR/CallSite.h"
20 #include "llvm/IR/Constants.h"
21 #include "llvm/IR/DIBuilder.h"
22 #include "llvm/IR/DebugInfo.h"
23 #include "llvm/IR/DiagnosticInfo.h"
24 #include "llvm/IR/Function.h"
25 #include "llvm/IR/IRBuilder.h"
26 #include "llvm/IR/Instruction.h"
27 #include "llvm/IR/IntrinsicInst.h"
28 #include "llvm/IR/LLVMContext.h"
29 #include "llvm/IR/Module.h"
30 #include "llvm/Support/ErrorHandling.h"
31 #include "llvm/Support/Regex.h"
35 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
37 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
38 // changed their type from v4f32 to v2i64.
39 static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
41 // Check whether this is an old version of the function, which received
43 Type *Arg0Type = F->getFunctionType()->getParamType(0);
44 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
47 // Yes, it's old, replace it with new version.
49 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
53 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
54 // arguments have changed their type from i32 to i8.
55 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
57 // Check that the last argument is an i32.
58 Type *LastArgType = F->getFunctionType()->getParamType(
59 F->getFunctionType()->getNumParams() - 1);
60 if (!LastArgType->isIntegerTy(32))
63 // Move this function aside and map down.
65 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
69 static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
70 // All of the intrinsics matches below should be marked with which llvm
71 // version started autoupgrading them. At some point in the future we would
72 // like to use this information to remove upgrade code for some older
73 // intrinsics. It is currently undecided how we will determine that future
75 if (Name.startswith("sse2.pcmpeq.") || // Added in 3.1
76 Name.startswith("sse2.pcmpgt.") || // Added in 3.1
77 Name.startswith("avx2.pcmpeq.") || // Added in 3.1
78 Name.startswith("avx2.pcmpgt.") || // Added in 3.1
79 Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
80 Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
81 Name == "sse.add.ss" || // Added in 4.0
82 Name == "sse2.add.sd" || // Added in 4.0
83 Name == "sse.sub.ss" || // Added in 4.0
84 Name == "sse2.sub.sd" || // Added in 4.0
85 Name == "sse.mul.ss" || // Added in 4.0
86 Name == "sse2.mul.sd" || // Added in 4.0
87 Name == "sse.div.ss" || // Added in 4.0
88 Name == "sse2.div.sd" || // Added in 4.0
89 Name == "sse41.pmaxsb" || // Added in 3.9
90 Name == "sse2.pmaxs.w" || // Added in 3.9
91 Name == "sse41.pmaxsd" || // Added in 3.9
92 Name == "sse2.pmaxu.b" || // Added in 3.9
93 Name == "sse41.pmaxuw" || // Added in 3.9
94 Name == "sse41.pmaxud" || // Added in 3.9
95 Name == "sse41.pminsb" || // Added in 3.9
96 Name == "sse2.pmins.w" || // Added in 3.9
97 Name == "sse41.pminsd" || // Added in 3.9
98 Name == "sse2.pminu.b" || // Added in 3.9
99 Name == "sse41.pminuw" || // Added in 3.9
100 Name == "sse41.pminud" || // Added in 3.9
101 Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
102 Name.startswith("avx2.pmax") || // Added in 3.9
103 Name.startswith("avx2.pmin") || // Added in 3.9
104 Name.startswith("avx512.mask.pmax") || // Added in 4.0
105 Name.startswith("avx512.mask.pmin") || // Added in 4.0
106 Name.startswith("avx2.vbroadcast") || // Added in 3.8
107 Name.startswith("avx2.pbroadcast") || // Added in 3.8
108 Name.startswith("avx.vpermil.") || // Added in 3.1
109 Name.startswith("sse2.pshuf") || // Added in 3.9
110 Name.startswith("avx512.pbroadcast") || // Added in 3.9
111 Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
112 Name.startswith("avx512.mask.movddup") || // Added in 3.9
113 Name.startswith("avx512.mask.movshdup") || // Added in 3.9
114 Name.startswith("avx512.mask.movsldup") || // Added in 3.9
115 Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
116 Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
117 Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
118 Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
119 Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
120 Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
121 Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
122 Name.startswith("avx512.mask.punpckl") || // Added in 3.9
123 Name.startswith("avx512.mask.punpckh") || // Added in 3.9
124 Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
125 Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
126 Name.startswith("avx512.mask.pand.") || // Added in 3.9
127 Name.startswith("avx512.mask.pandn.") || // Added in 3.9
128 Name.startswith("avx512.mask.por.") || // Added in 3.9
129 Name.startswith("avx512.mask.pxor.") || // Added in 3.9
130 Name.startswith("avx512.mask.and.") || // Added in 3.9
131 Name.startswith("avx512.mask.andn.") || // Added in 3.9
132 Name.startswith("avx512.mask.or.") || // Added in 3.9
133 Name.startswith("avx512.mask.xor.") || // Added in 3.9
134 Name.startswith("avx512.mask.padd.") || // Added in 4.0
135 Name.startswith("avx512.mask.psub.") || // Added in 4.0
136 Name.startswith("avx512.mask.pmull.") || // Added in 4.0
137 Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
138 Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
139 Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
140 Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
141 Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
142 Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
143 Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
144 Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
145 Name == "avx512.mask.add.pd.128" || // Added in 4.0
146 Name == "avx512.mask.add.pd.256" || // Added in 4.0
147 Name == "avx512.mask.add.ps.128" || // Added in 4.0
148 Name == "avx512.mask.add.ps.256" || // Added in 4.0
149 Name == "avx512.mask.div.pd.128" || // Added in 4.0
150 Name == "avx512.mask.div.pd.256" || // Added in 4.0
151 Name == "avx512.mask.div.ps.128" || // Added in 4.0
152 Name == "avx512.mask.div.ps.256" || // Added in 4.0
153 Name == "avx512.mask.mul.pd.128" || // Added in 4.0
154 Name == "avx512.mask.mul.pd.256" || // Added in 4.0
155 Name == "avx512.mask.mul.ps.128" || // Added in 4.0
156 Name == "avx512.mask.mul.ps.256" || // Added in 4.0
157 Name == "avx512.mask.sub.pd.128" || // Added in 4.0
158 Name == "avx512.mask.sub.pd.256" || // Added in 4.0
159 Name == "avx512.mask.sub.ps.128" || // Added in 4.0
160 Name == "avx512.mask.sub.ps.256" || // Added in 4.0
161 Name == "avx512.mask.max.pd.128" || // Added in 5.0
162 Name == "avx512.mask.max.pd.256" || // Added in 5.0
163 Name == "avx512.mask.max.ps.128" || // Added in 5.0
164 Name == "avx512.mask.max.ps.256" || // Added in 5.0
165 Name == "avx512.mask.min.pd.128" || // Added in 5.0
166 Name == "avx512.mask.min.pd.256" || // Added in 5.0
167 Name == "avx512.mask.min.ps.128" || // Added in 5.0
168 Name == "avx512.mask.min.ps.256" || // Added in 5.0
169 Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
170 Name.startswith("avx512.mask.psll.d") || // Added in 4.0
171 Name.startswith("avx512.mask.psll.q") || // Added in 4.0
172 Name.startswith("avx512.mask.psll.w") || // Added in 4.0
173 Name.startswith("avx512.mask.psra.d") || // Added in 4.0
174 Name.startswith("avx512.mask.psra.q") || // Added in 4.0
175 Name.startswith("avx512.mask.psra.w") || // Added in 4.0
176 Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
177 Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
178 Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
179 Name.startswith("avx512.mask.pslli") || // Added in 4.0
180 Name.startswith("avx512.mask.psrai") || // Added in 4.0
181 Name.startswith("avx512.mask.psrli") || // Added in 4.0
182 Name.startswith("avx512.mask.psllv") || // Added in 4.0
183 Name.startswith("avx512.mask.psrav") || // Added in 4.0
184 Name.startswith("avx512.mask.psrlv") || // Added in 4.0
185 Name.startswith("sse41.pmovsx") || // Added in 3.8
186 Name.startswith("sse41.pmovzx") || // Added in 3.9
187 Name.startswith("avx2.pmovsx") || // Added in 3.9
188 Name.startswith("avx2.pmovzx") || // Added in 3.9
189 Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
190 Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
191 Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
192 Name == "sse2.cvtdq2pd" || // Added in 3.9
193 Name == "sse2.cvtps2pd" || // Added in 3.9
194 Name == "avx.cvtdq2.pd.256" || // Added in 3.9
195 Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
196 Name.startswith("avx.vinsertf128.") || // Added in 3.7
197 Name == "avx2.vinserti128" || // Added in 3.7
198 Name.startswith("avx512.mask.insert") || // Added in 4.0
199 Name.startswith("avx.vextractf128.") || // Added in 3.7
200 Name == "avx2.vextracti128" || // Added in 3.7
201 Name.startswith("avx512.mask.vextract") || // Added in 4.0
202 Name.startswith("sse4a.movnt.") || // Added in 3.9
203 Name.startswith("avx.movnt.") || // Added in 3.2
204 Name.startswith("avx512.storent.") || // Added in 3.9
205 Name == "sse41.movntdqa" || // Added in 5.0
206 Name == "avx2.movntdqa" || // Added in 5.0
207 Name == "avx512.movntdqa" || // Added in 5.0
208 Name == "sse2.storel.dq" || // Added in 3.9
209 Name.startswith("sse.storeu.") || // Added in 3.9
210 Name.startswith("sse2.storeu.") || // Added in 3.9
211 Name.startswith("avx.storeu.") || // Added in 3.9
212 Name.startswith("avx512.mask.storeu.") || // Added in 3.9
213 Name.startswith("avx512.mask.store.p") || // Added in 3.9
214 Name.startswith("avx512.mask.store.b.") || // Added in 3.9
215 Name.startswith("avx512.mask.store.w.") || // Added in 3.9
216 Name.startswith("avx512.mask.store.d.") || // Added in 3.9
217 Name.startswith("avx512.mask.store.q.") || // Added in 3.9
218 Name.startswith("avx512.mask.loadu.") || // Added in 3.9
219 Name.startswith("avx512.mask.load.") || // Added in 3.9
220 Name == "sse42.crc32.64.8" || // Added in 3.4
221 Name.startswith("avx.vbroadcast.s") || // Added in 3.5
222 Name.startswith("avx512.mask.palignr.") || // Added in 3.9
223 Name.startswith("avx512.mask.valign.") || // Added in 4.0
224 Name.startswith("sse2.psll.dq") || // Added in 3.7
225 Name.startswith("sse2.psrl.dq") || // Added in 3.7
226 Name.startswith("avx2.psll.dq") || // Added in 3.7
227 Name.startswith("avx2.psrl.dq") || // Added in 3.7
228 Name.startswith("avx512.psll.dq") || // Added in 3.9
229 Name.startswith("avx512.psrl.dq") || // Added in 3.9
230 Name == "sse41.pblendw" || // Added in 3.7
231 Name.startswith("sse41.blendp") || // Added in 3.7
232 Name.startswith("avx.blend.p") || // Added in 3.7
233 Name == "avx2.pblendw" || // Added in 3.7
234 Name.startswith("avx2.pblendd.") || // Added in 3.7
235 Name.startswith("avx.vbroadcastf128") || // Added in 4.0
236 Name == "avx2.vbroadcasti128" || // Added in 3.7
237 Name == "xop.vpcmov" || // Added in 3.8
238 Name == "xop.vpcmov.256" || // Added in 5.0
239 Name.startswith("avx512.mask.move.s") || // Added in 4.0
240 Name.startswith("avx512.cvtmask2") || // Added in 5.0
241 (Name.startswith("xop.vpcom") && // Added in 3.2
248 static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
250 // Only handle intrinsics that start with "x86.".
251 if (!Name.startswith("x86."))
253 // Remove "x86." prefix.
254 Name = Name.substr(4);
256 if (ShouldUpgradeX86Intrinsic(F, Name)) {
261 // SSE4.1 ptest functions may have an old signature.
262 if (Name.startswith("sse41.ptest")) { // Added in 3.2
263 if (Name.substr(11) == "c")
264 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
265 if (Name.substr(11) == "z")
266 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
267 if (Name.substr(11) == "nzc")
268 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
270 // Several blend and other instructions with masks used the wrong number of
272 if (Name == "sse41.insertps") // Added in 3.6
273 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
275 if (Name == "sse41.dppd") // Added in 3.6
276 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
278 if (Name == "sse41.dpps") // Added in 3.6
279 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
281 if (Name == "sse41.mpsadbw") // Added in 3.6
282 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
284 if (Name == "avx.dp.ps.256") // Added in 3.6
285 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
287 if (Name == "avx2.mpsadbw") // Added in 3.6
288 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
291 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
292 if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
294 NewFn = Intrinsic::getDeclaration(F->getParent(),
295 Intrinsic::x86_xop_vfrcz_ss);
298 if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
300 NewFn = Intrinsic::getDeclaration(F->getParent(),
301 Intrinsic::x86_xop_vfrcz_sd);
304 // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
305 if (Name.startswith("xop.vpermil2")) { // Added in 3.9
306 auto Idx = F->getFunctionType()->getParamType(2);
307 if (Idx->isFPOrFPVectorTy()) {
309 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
310 unsigned EltSize = Idx->getScalarSizeInBits();
311 Intrinsic::ID Permil2ID;
312 if (EltSize == 64 && IdxSize == 128)
313 Permil2ID = Intrinsic::x86_xop_vpermil2pd;
314 else if (EltSize == 32 && IdxSize == 128)
315 Permil2ID = Intrinsic::x86_xop_vpermil2ps;
316 else if (EltSize == 64 && IdxSize == 256)
317 Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
319 Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
320 NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
328 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
329 assert(F && "Illegal to upgrade a non-existent Function.");
331 // Quickly eliminate it, if it's not a candidate.
332 StringRef Name = F->getName();
333 if (Name.size() <= 8 || !Name.startswith("llvm."))
335 Name = Name.substr(5); // Strip off "llvm."
340 if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
341 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
342 F->arg_begin()->getType());
345 if (Name.startswith("arm.neon.vclz")) {
347 F->arg_begin()->getType(),
348 Type::getInt1Ty(F->getContext())
350 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
351 // the end of the name. Change name from llvm.arm.neon.vclz.* to
353 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
354 NewFn = Function::Create(fType, F->getLinkage(),
355 "llvm.ctlz." + Name.substr(14), F->getParent());
358 if (Name.startswith("arm.neon.vcnt")) {
359 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
360 F->arg_begin()->getType());
363 Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
364 if (vldRegex.match(Name)) {
365 auto fArgs = F->getFunctionType()->params();
366 SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
367 // Can't use Intrinsic::getDeclaration here as the return types might
368 // then only be structurally equal.
369 FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
370 NewFn = Function::Create(fType, F->getLinkage(),
371 "llvm." + Name + ".p0i8", F->getParent());
374 Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
375 if (vstRegex.match(Name)) {
376 static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
377 Intrinsic::arm_neon_vst2,
378 Intrinsic::arm_neon_vst3,
379 Intrinsic::arm_neon_vst4};
381 static const Intrinsic::ID StoreLaneInts[] = {
382 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
383 Intrinsic::arm_neon_vst4lane
386 auto fArgs = F->getFunctionType()->params();
387 Type *Tys[] = {fArgs[0], fArgs[1]};
388 if (Name.find("lane") == StringRef::npos)
389 NewFn = Intrinsic::getDeclaration(F->getParent(),
390 StoreInts[fArgs.size() - 3], Tys);
392 NewFn = Intrinsic::getDeclaration(F->getParent(),
393 StoreLaneInts[fArgs.size() - 5], Tys);
396 if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
397 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
404 if (Name.startswith("ctlz.") && F->arg_size() == 1) {
406 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
407 F->arg_begin()->getType());
410 if (Name.startswith("cttz.") && F->arg_size() == 1) {
412 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
413 F->arg_begin()->getType());
420 bool IsLifetimeStart = Name.startswith("lifetime.start");
421 if (IsLifetimeStart || Name.startswith("invariant.start")) {
422 Intrinsic::ID ID = IsLifetimeStart ?
423 Intrinsic::lifetime_start : Intrinsic::invariant_start;
424 auto Args = F->getFunctionType()->params();
425 Type* ObjectPtr[1] = {Args[1]};
426 if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
428 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
433 bool IsLifetimeEnd = Name.startswith("lifetime.end");
434 if (IsLifetimeEnd || Name.startswith("invariant.end")) {
435 Intrinsic::ID ID = IsLifetimeEnd ?
436 Intrinsic::lifetime_end : Intrinsic::invariant_end;
438 auto Args = F->getFunctionType()->params();
439 Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
440 if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
442 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
449 if (Name.startswith("masked.load.")) {
450 Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
451 if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) {
453 NewFn = Intrinsic::getDeclaration(F->getParent(),
454 Intrinsic::masked_load,
459 if (Name.startswith("masked.store.")) {
460 auto Args = F->getFunctionType()->params();
461 Type *Tys[] = { Args[0], Args[1] };
462 if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) {
464 NewFn = Intrinsic::getDeclaration(F->getParent(),
465 Intrinsic::masked_store,
470 // Renaming gather/scatter intrinsics with no address space overloading
471 // to the new overload which includes an address space
472 if (Name.startswith("masked.gather.")) {
473 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
474 if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) {
476 NewFn = Intrinsic::getDeclaration(F->getParent(),
477 Intrinsic::masked_gather, Tys);
481 if (Name.startswith("masked.scatter.")) {
482 auto Args = F->getFunctionType()->params();
483 Type *Tys[] = {Args[0], Args[1]};
484 if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) {
486 NewFn = Intrinsic::getDeclaration(F->getParent(),
487 Intrinsic::masked_scatter, Tys);
494 if (Name.startswith("nvvm.")) {
495 Name = Name.substr(5);
497 // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
498 Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name)
499 .Cases("brev32", "brev64", Intrinsic::bitreverse)
500 .Case("clz.i", Intrinsic::ctlz)
501 .Case("popc.i", Intrinsic::ctpop)
502 .Default(Intrinsic::not_intrinsic);
503 if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
504 NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
505 {F->getReturnType()});
509 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
510 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
512 // TODO: We could add lohi.i2d.
513 bool Expand = StringSwitch<bool>(Name)
514 .Cases("abs.i", "abs.ll", true)
515 .Cases("clz.ll", "popc.ll", "h2f", true)
516 .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
517 .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
527 // We only need to change the name to match the mangling including the
529 if (Name.startswith("objectsize.")) {
530 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
531 if (F->arg_size() == 2 ||
532 F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
534 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
542 if (Name == "stackprotectorcheck") {
549 if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
552 // Remangle our intrinsic since we upgrade the mangling
553 auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
554 if (Result != None) {
555 NewFn = Result.getValue();
559 // This may not belong here. This function is effectively being overloaded
560 // to both detect an intrinsic which needs upgrading, and to provide the
561 // upgraded form of the intrinsic. We should perhaps have two separate
562 // functions for this.
566 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
568 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
569 assert(F != NewFn && "Intrinsic function upgraded to the same function");
571 // Upgrade intrinsic attributes. This does not change the function.
574 if (Intrinsic::ID id = F->getIntrinsicID())
575 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
579 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
580 // Nothing to do yet.
584 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
586 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
587 Value *Op, unsigned Shift) {
588 Type *ResultTy = Op->getType();
589 unsigned NumElts = ResultTy->getVectorNumElements() * 8;
591 // Bitcast from a 64-bit element type to a byte element type.
592 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
593 Op = Builder.CreateBitCast(Op, VecTy, "cast");
595 // We'll be shuffling in zeroes.
596 Value *Res = Constant::getNullValue(VecTy);
598 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
599 // we'll just return the zero vector.
602 // 256/512-bit version is split into 2/4 16-byte lanes.
603 for (unsigned l = 0; l != NumElts; l += 16)
604 for (unsigned i = 0; i != 16; ++i) {
605 unsigned Idx = NumElts + i - Shift;
607 Idx -= NumElts - 16; // end of lane, switch operand.
608 Idxs[l + i] = Idx + l;
611 Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
614 // Bitcast back to a 64-bit element type.
615 return Builder.CreateBitCast(Res, ResultTy, "cast");
618 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
620 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
622 Type *ResultTy = Op->getType();
623 unsigned NumElts = ResultTy->getVectorNumElements() * 8;
625 // Bitcast from a 64-bit element type to a byte element type.
626 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
627 Op = Builder.CreateBitCast(Op, VecTy, "cast");
629 // We'll be shuffling in zeroes.
630 Value *Res = Constant::getNullValue(VecTy);
632 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
633 // we'll just return the zero vector.
636 // 256/512-bit version is split into 2/4 16-byte lanes.
637 for (unsigned l = 0; l != NumElts; l += 16)
638 for (unsigned i = 0; i != 16; ++i) {
639 unsigned Idx = i + Shift;
641 Idx += NumElts - 16; // end of lane, switch operand.
642 Idxs[l + i] = Idx + l;
645 Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
648 // Bitcast back to a 64-bit element type.
649 return Builder.CreateBitCast(Res, ResultTy, "cast");
652 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
654 llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(),
655 cast<IntegerType>(Mask->getType())->getBitWidth());
656 Mask = Builder.CreateBitCast(Mask, MaskTy);
658 // If we have less than 8 elements, then the starting mask was an i8 and
659 // we need to extract down to the right number of elements.
662 for (unsigned i = 0; i != NumElts; ++i)
664 Mask = Builder.CreateShuffleVector(Mask, Mask,
665 makeArrayRef(Indices, NumElts),
672 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
673 Value *Op0, Value *Op1) {
674 // If the mask is all ones just emit the align operation.
675 if (const auto *C = dyn_cast<Constant>(Mask))
676 if (C->isAllOnesValue())
679 Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements());
680 return Builder.CreateSelect(Mask, Op0, Op1);
683 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
684 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
685 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
686 static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
687 Value *Op1, Value *Shift,
688 Value *Passthru, Value *Mask,
690 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
692 unsigned NumElts = Op0->getType()->getVectorNumElements();
693 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
694 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
695 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
697 // Mask the immediate for VALIGN.
699 ShiftVal &= (NumElts - 1);
701 // If palignr is shifting the pair of vectors more than the size of two
704 return llvm::Constant::getNullValue(Op0->getType());
706 // If palignr is shifting the pair of input vectors more than one lane,
707 // but less than two lanes, convert to shifting in zeroes.
711 Op0 = llvm::Constant::getNullValue(Op0->getType());
714 uint32_t Indices[64];
715 // 256-bit palignr operates on 128-bit lanes so we need to handle that
716 for (unsigned l = 0; l < NumElts; l += 16) {
717 for (unsigned i = 0; i != 16; ++i) {
718 unsigned Idx = ShiftVal + i;
719 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
720 Idx += NumElts - 16; // End of lane, switch operand.
721 Indices[l + i] = Idx + l;
725 Value *Align = Builder.CreateShuffleVector(Op1, Op0,
726 makeArrayRef(Indices, NumElts),
729 return EmitX86Select(Builder, Mask, Align, Passthru);
732 static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
733 Value *Ptr, Value *Data, Value *Mask,
735 // Cast the pointer to the right type.
736 Ptr = Builder.CreateBitCast(Ptr,
737 llvm::PointerType::getUnqual(Data->getType()));
739 Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1;
741 // If the mask is all ones just emit a regular store.
742 if (const auto *C = dyn_cast<Constant>(Mask))
743 if (C->isAllOnesValue())
744 return Builder.CreateAlignedStore(Data, Ptr, Align);
746 // Convert the mask from an integer type to a vector of i1.
747 unsigned NumElts = Data->getType()->getVectorNumElements();
748 Mask = getX86MaskVec(Builder, Mask, NumElts);
749 return Builder.CreateMaskedStore(Data, Ptr, Align, Mask);
752 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
753 Value *Ptr, Value *Passthru, Value *Mask,
755 // Cast the pointer to the right type.
756 Ptr = Builder.CreateBitCast(Ptr,
757 llvm::PointerType::getUnqual(Passthru->getType()));
759 Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1;
761 // If the mask is all ones just emit a regular store.
762 if (const auto *C = dyn_cast<Constant>(Mask))
763 if (C->isAllOnesValue())
764 return Builder.CreateAlignedLoad(Ptr, Align);
766 // Convert the mask from an integer type to a vector of i1.
767 unsigned NumElts = Passthru->getType()->getVectorNumElements();
768 Mask = getX86MaskVec(Builder, Mask, NumElts);
769 return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru);
772 static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
773 ICmpInst::Predicate Pred) {
774 Value *Op0 = CI.getArgOperand(0);
775 Value *Op1 = CI.getArgOperand(1);
776 Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1);
777 Value *Res = Builder.CreateSelect(Cmp, Op0, Op1);
779 if (CI.getNumArgOperands() == 4)
780 Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
785 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
786 ICmpInst::Predicate Pred) {
787 Value *Op0 = CI.getArgOperand(0);
788 unsigned NumElts = Op0->getType()->getVectorNumElements();
789 Value *Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
791 Value *Mask = CI.getArgOperand(2);
792 const auto *C = dyn_cast<Constant>(Mask);
793 if (!C || !C->isAllOnesValue())
794 Cmp = Builder.CreateAnd(Cmp, getX86MaskVec(Builder, Mask, NumElts));
798 for (unsigned i = 0; i != NumElts; ++i)
800 for (unsigned i = NumElts; i != 8; ++i)
801 Indices[i] = NumElts + i % NumElts;
802 Cmp = Builder.CreateShuffleVector(Cmp,
803 Constant::getNullValue(Cmp->getType()),
806 return Builder.CreateBitCast(Cmp, IntegerType::get(CI.getContext(),
807 std::max(NumElts, 8U)));
810 // Replace a masked intrinsic with an older unmasked intrinsic.
811 static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI,
813 Function *F = CI.getCalledFunction();
814 Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID);
815 Value *Rep = Builder.CreateCall(Intrin,
816 { CI.getArgOperand(0), CI.getArgOperand(1) });
817 return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
820 static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
821 Value* A = CI.getArgOperand(0);
822 Value* B = CI.getArgOperand(1);
823 Value* Src = CI.getArgOperand(2);
824 Value* Mask = CI.getArgOperand(3);
826 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
827 Value* Cmp = Builder.CreateIsNotNull(AndNode);
828 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
829 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
830 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
831 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
835 static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) {
836 Value* Op = CI.getArgOperand(0);
837 Type* ReturnOp = CI.getType();
838 unsigned NumElts = CI.getType()->getVectorNumElements();
839 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
840 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
843 /// Upgrade a call to an old intrinsic. All argument and return casting must be
844 /// provided to seamlessly integrate with existing context.
845 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
846 Function *F = CI->getCalledFunction();
847 LLVMContext &C = CI->getContext();
848 IRBuilder<> Builder(C);
849 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
851 assert(F && "Intrinsic call is not direct?");
854 // Get the Function's name.
855 StringRef Name = F->getName();
857 assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
858 Name = Name.substr(5);
860 bool IsX86 = Name.startswith("x86.");
862 Name = Name.substr(4);
863 bool IsNVVM = Name.startswith("nvvm.");
865 Name = Name.substr(5);
867 if (IsX86 && Name.startswith("sse4a.movnt.")) {
868 Module *M = F->getParent();
869 SmallVector<Metadata *, 1> Elts;
871 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
872 MDNode *Node = MDNode::get(C, Elts);
874 Value *Arg0 = CI->getArgOperand(0);
875 Value *Arg1 = CI->getArgOperand(1);
877 // Nontemporal (unaligned) store of the 0'th element of the float/double
879 Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
880 PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
881 Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
883 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
885 StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, 1);
886 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
889 CI->eraseFromParent();
893 if (IsX86 && (Name.startswith("avx.movnt.") ||
894 Name.startswith("avx512.storent."))) {
895 Module *M = F->getParent();
896 SmallVector<Metadata *, 1> Elts;
898 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
899 MDNode *Node = MDNode::get(C, Elts);
901 Value *Arg0 = CI->getArgOperand(0);
902 Value *Arg1 = CI->getArgOperand(1);
904 // Convert the type of the pointer to a pointer to the stored type.
905 Value *BC = Builder.CreateBitCast(Arg0,
906 PointerType::getUnqual(Arg1->getType()),
908 VectorType *VTy = cast<VectorType>(Arg1->getType());
909 StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC,
910 VTy->getBitWidth() / 8);
911 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
914 CI->eraseFromParent();
918 if (IsX86 && Name == "sse2.storel.dq") {
919 Value *Arg0 = CI->getArgOperand(0);
920 Value *Arg1 = CI->getArgOperand(1);
922 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
923 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
924 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
925 Value *BC = Builder.CreateBitCast(Arg0,
926 PointerType::getUnqual(Elt->getType()),
928 Builder.CreateAlignedStore(Elt, BC, 1);
931 CI->eraseFromParent();
935 if (IsX86 && (Name.startswith("sse.storeu.") ||
936 Name.startswith("sse2.storeu.") ||
937 Name.startswith("avx.storeu."))) {
938 Value *Arg0 = CI->getArgOperand(0);
939 Value *Arg1 = CI->getArgOperand(1);
941 Arg0 = Builder.CreateBitCast(Arg0,
942 PointerType::getUnqual(Arg1->getType()),
944 Builder.CreateAlignedStore(Arg1, Arg0, 1);
947 CI->eraseFromParent();
951 if (IsX86 && (Name.startswith("avx512.mask.store"))) {
952 // "avx512.mask.storeu." or "avx512.mask.store."
953 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
954 UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
955 CI->getArgOperand(2), Aligned);
958 CI->eraseFromParent();
963 // Upgrade packed integer vector compare intrinsics to compare instructions.
964 if (IsX86 && (Name.startswith("sse2.pcmp") ||
965 Name.startswith("avx2.pcmp"))) {
966 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
967 bool CmpEq = Name[9] == 'e';
968 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
969 CI->getArgOperand(0), CI->getArgOperand(1));
970 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
971 } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd")) {
972 Type *I32Ty = Type::getInt32Ty(C);
973 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
974 ConstantInt::get(I32Ty, 0));
975 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
976 ConstantInt::get(I32Ty, 0));
977 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
978 Builder.CreateFAdd(Elt0, Elt1),
979 ConstantInt::get(I32Ty, 0));
980 } else if (IsX86 && (Name == "sse.sub.ss" || Name == "sse2.sub.sd")) {
981 Type *I32Ty = Type::getInt32Ty(C);
982 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
983 ConstantInt::get(I32Ty, 0));
984 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
985 ConstantInt::get(I32Ty, 0));
986 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
987 Builder.CreateFSub(Elt0, Elt1),
988 ConstantInt::get(I32Ty, 0));
989 } else if (IsX86 && (Name == "sse.mul.ss" || Name == "sse2.mul.sd")) {
990 Type *I32Ty = Type::getInt32Ty(C);
991 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
992 ConstantInt::get(I32Ty, 0));
993 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
994 ConstantInt::get(I32Ty, 0));
995 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
996 Builder.CreateFMul(Elt0, Elt1),
997 ConstantInt::get(I32Ty, 0));
998 } else if (IsX86 && (Name == "sse.div.ss" || Name == "sse2.div.sd")) {
999 Type *I32Ty = Type::getInt32Ty(C);
1000 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
1001 ConstantInt::get(I32Ty, 0));
1002 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
1003 ConstantInt::get(I32Ty, 0));
1004 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
1005 Builder.CreateFDiv(Elt0, Elt1),
1006 ConstantInt::get(I32Ty, 0));
1007 } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
1008 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
1009 bool CmpEq = Name[16] == 'e';
1010 Rep = upgradeMaskedCompare(Builder, *CI,
1011 CmpEq ? ICmpInst::ICMP_EQ
1012 : ICmpInst::ICMP_SGT);
1013 } else if (IsX86 && (Name == "sse41.pmaxsb" ||
1014 Name == "sse2.pmaxs.w" ||
1015 Name == "sse41.pmaxsd" ||
1016 Name.startswith("avx2.pmaxs") ||
1017 Name.startswith("avx512.mask.pmaxs"))) {
1018 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
1019 } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
1020 Name == "sse41.pmaxuw" ||
1021 Name == "sse41.pmaxud" ||
1022 Name.startswith("avx2.pmaxu") ||
1023 Name.startswith("avx512.mask.pmaxu"))) {
1024 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
1025 } else if (IsX86 && (Name == "sse41.pminsb" ||
1026 Name == "sse2.pmins.w" ||
1027 Name == "sse41.pminsd" ||
1028 Name.startswith("avx2.pmins") ||
1029 Name.startswith("avx512.mask.pmins"))) {
1030 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
1031 } else if (IsX86 && (Name == "sse2.pminu.b" ||
1032 Name == "sse41.pminuw" ||
1033 Name == "sse41.pminud" ||
1034 Name.startswith("avx2.pminu") ||
1035 Name.startswith("avx512.mask.pminu"))) {
1036 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
1037 } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
1038 Name == "sse2.cvtps2pd" ||
1039 Name == "avx.cvtdq2.pd.256" ||
1040 Name == "avx.cvt.ps2.pd.256" ||
1041 Name.startswith("avx512.mask.cvtdq2pd.") ||
1042 Name.startswith("avx512.mask.cvtudq2pd."))) {
1043 // Lossless i32/float to double conversion.
1044 // Extract the bottom elements if necessary and convert to double vector.
1045 Value *Src = CI->getArgOperand(0);
1046 VectorType *SrcTy = cast<VectorType>(Src->getType());
1047 VectorType *DstTy = cast<VectorType>(CI->getType());
1048 Rep = CI->getArgOperand(0);
1050 unsigned NumDstElts = DstTy->getNumElements();
1051 if (NumDstElts < SrcTy->getNumElements()) {
1052 assert(NumDstElts == 2 && "Unexpected vector size");
1053 uint32_t ShuffleMask[2] = { 0, 1 };
1054 Rep = Builder.CreateShuffleVector(Rep, UndefValue::get(SrcTy),
1058 bool SInt2Double = (StringRef::npos != Name.find("cvtdq2"));
1059 bool UInt2Double = (StringRef::npos != Name.find("cvtudq2"));
1061 Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd");
1062 else if (UInt2Double)
1063 Rep = Builder.CreateUIToFP(Rep, DstTy, "cvtudq2pd");
1065 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
1067 if (CI->getNumArgOperands() == 3)
1068 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1069 CI->getArgOperand(1));
1070 } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
1071 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
1072 CI->getArgOperand(1), CI->getArgOperand(2),
1074 } else if (IsX86 && (Name.startswith("avx512.mask.load."))) {
1075 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
1076 CI->getArgOperand(1),CI->getArgOperand(2),
1078 } else if (IsX86 && Name.startswith("xop.vpcom")) {
1079 Intrinsic::ID intID;
1080 if (Name.endswith("ub"))
1081 intID = Intrinsic::x86_xop_vpcomub;
1082 else if (Name.endswith("uw"))
1083 intID = Intrinsic::x86_xop_vpcomuw;
1084 else if (Name.endswith("ud"))
1085 intID = Intrinsic::x86_xop_vpcomud;
1086 else if (Name.endswith("uq"))
1087 intID = Intrinsic::x86_xop_vpcomuq;
1088 else if (Name.endswith("b"))
1089 intID = Intrinsic::x86_xop_vpcomb;
1090 else if (Name.endswith("w"))
1091 intID = Intrinsic::x86_xop_vpcomw;
1092 else if (Name.endswith("d"))
1093 intID = Intrinsic::x86_xop_vpcomd;
1094 else if (Name.endswith("q"))
1095 intID = Intrinsic::x86_xop_vpcomq;
1097 llvm_unreachable("Unknown suffix");
1099 Name = Name.substr(9); // strip off "xop.vpcom"
1101 if (Name.startswith("lt"))
1103 else if (Name.startswith("le"))
1105 else if (Name.startswith("gt"))
1107 else if (Name.startswith("ge"))
1109 else if (Name.startswith("eq"))
1111 else if (Name.startswith("ne"))
1113 else if (Name.startswith("false"))
1115 else if (Name.startswith("true"))
1118 llvm_unreachable("Unknown condition");
1120 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
1122 Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
1123 Builder.getInt8(Imm)});
1124 } else if (IsX86 && Name.startswith("xop.vpcmov")) {
1125 Value *Sel = CI->getArgOperand(2);
1126 Value *NotSel = Builder.CreateNot(Sel);
1127 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
1128 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
1129 Rep = Builder.CreateOr(Sel0, Sel1);
1130 } else if (IsX86 && Name == "sse42.crc32.64.8") {
1131 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
1132 Intrinsic::x86_sse42_crc32_32_8);
1133 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
1134 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
1135 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
1136 } else if (IsX86 && Name.startswith("avx.vbroadcast.s")) {
1137 // Replace broadcasts with a series of insertelements.
1138 Type *VecTy = CI->getType();
1139 Type *EltTy = VecTy->getVectorElementType();
1140 unsigned EltNum = VecTy->getVectorNumElements();
1141 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
1142 EltTy->getPointerTo());
1143 Value *Load = Builder.CreateLoad(EltTy, Cast);
1144 Type *I32Ty = Type::getInt32Ty(C);
1145 Rep = UndefValue::get(VecTy);
1146 for (unsigned I = 0; I < EltNum; ++I)
1147 Rep = Builder.CreateInsertElement(Rep, Load,
1148 ConstantInt::get(I32Ty, I));
1149 } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
1150 Name.startswith("sse41.pmovzx") ||
1151 Name.startswith("avx2.pmovsx") ||
1152 Name.startswith("avx2.pmovzx") ||
1153 Name.startswith("avx512.mask.pmovsx") ||
1154 Name.startswith("avx512.mask.pmovzx"))) {
1155 VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
1156 VectorType *DstTy = cast<VectorType>(CI->getType());
1157 unsigned NumDstElts = DstTy->getNumElements();
1159 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
1160 SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
1161 for (unsigned i = 0; i != NumDstElts; ++i)
1164 Value *SV = Builder.CreateShuffleVector(
1165 CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
1167 bool DoSext = (StringRef::npos != Name.find("pmovsx"));
1168 Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
1169 : Builder.CreateZExt(SV, DstTy);
1170 // If there are 3 arguments, it's a masked intrinsic so we need a select.
1171 if (CI->getNumArgOperands() == 3)
1172 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1173 CI->getArgOperand(1));
1174 } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
1175 Name == "avx2.vbroadcasti128")) {
1176 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
1177 Type *EltTy = CI->getType()->getVectorElementType();
1178 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
1179 Type *VT = VectorType::get(EltTy, NumSrcElts);
1180 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
1181 PointerType::getUnqual(VT));
1182 Value *Load = Builder.CreateAlignedLoad(Op, 1);
1183 if (NumSrcElts == 2)
1184 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
1187 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
1188 { 0, 1, 2, 3, 0, 1, 2, 3 });
1189 } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
1190 Name.startswith("avx2.vbroadcast") ||
1191 Name.startswith("avx512.pbroadcast") ||
1192 Name.startswith("avx512.mask.broadcast.s"))) {
1193 // Replace vp?broadcasts with a vector shuffle.
1194 Value *Op = CI->getArgOperand(0);
1195 unsigned NumElts = CI->getType()->getVectorNumElements();
1196 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
1197 Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
1198 Constant::getNullValue(MaskTy));
1200 if (CI->getNumArgOperands() == 3)
1201 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1202 CI->getArgOperand(1));
1203 } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
1204 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
1205 CI->getArgOperand(1),
1206 CI->getArgOperand(2),
1207 CI->getArgOperand(3),
1208 CI->getArgOperand(4),
1210 } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
1211 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
1212 CI->getArgOperand(1),
1213 CI->getArgOperand(2),
1214 CI->getArgOperand(3),
1215 CI->getArgOperand(4),
1217 } else if (IsX86 && (Name == "sse2.psll.dq" ||
1218 Name == "avx2.psll.dq")) {
1219 // 128/256-bit shift left specified in bits.
1220 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1221 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
1222 Shift / 8); // Shift is in bits.
1223 } else if (IsX86 && (Name == "sse2.psrl.dq" ||
1224 Name == "avx2.psrl.dq")) {
1225 // 128/256-bit shift right specified in bits.
1226 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1227 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
1228 Shift / 8); // Shift is in bits.
1229 } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
1230 Name == "avx2.psll.dq.bs" ||
1231 Name == "avx512.psll.dq.512")) {
1232 // 128/256/512-bit shift left specified in bytes.
1233 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1234 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
1235 } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
1236 Name == "avx2.psrl.dq.bs" ||
1237 Name == "avx512.psrl.dq.512")) {
1238 // 128/256/512-bit shift right specified in bytes.
1239 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1240 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
1241 } else if (IsX86 && (Name == "sse41.pblendw" ||
1242 Name.startswith("sse41.blendp") ||
1243 Name.startswith("avx.blend.p") ||
1244 Name == "avx2.pblendw" ||
1245 Name.startswith("avx2.pblendd."))) {
1246 Value *Op0 = CI->getArgOperand(0);
1247 Value *Op1 = CI->getArgOperand(1);
1248 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1249 VectorType *VecTy = cast<VectorType>(CI->getType());
1250 unsigned NumElts = VecTy->getNumElements();
1252 SmallVector<uint32_t, 16> Idxs(NumElts);
1253 for (unsigned i = 0; i != NumElts; ++i)
1254 Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
1256 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1257 } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
1258 Name == "avx2.vinserti128" ||
1259 Name.startswith("avx512.mask.insert"))) {
1260 Value *Op0 = CI->getArgOperand(0);
1261 Value *Op1 = CI->getArgOperand(1);
1262 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1263 unsigned DstNumElts = CI->getType()->getVectorNumElements();
1264 unsigned SrcNumElts = Op1->getType()->getVectorNumElements();
1265 unsigned Scale = DstNumElts / SrcNumElts;
1267 // Mask off the high bits of the immediate value; hardware ignores those.
1270 // Extend the second operand into a vector the size of the destination.
1271 Value *UndefV = UndefValue::get(Op1->getType());
1272 SmallVector<uint32_t, 8> Idxs(DstNumElts);
1273 for (unsigned i = 0; i != SrcNumElts; ++i)
1275 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
1276 Idxs[i] = SrcNumElts;
1277 Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);
1279 // Insert the second operand into the first operand.
1281 // Note that there is no guarantee that instruction lowering will actually
1282 // produce a vinsertf128 instruction for the created shuffles. In
1283 // particular, the 0 immediate case involves no lane changes, so it can
1284 // be handled as a blend.
1286 // Example of shuffle mask for 32-bit elements:
1287 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
1288 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
1290 // First fill with identify mask.
1291 for (unsigned i = 0; i != DstNumElts; ++i)
1293 // Then replace the elements where we need to insert.
1294 for (unsigned i = 0; i != SrcNumElts; ++i)
1295 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
1296 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
1298 // If the intrinsic has a mask operand, handle that.
1299 if (CI->getNumArgOperands() == 5)
1300 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
1301 CI->getArgOperand(3));
1302 } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
1303 Name == "avx2.vextracti128" ||
1304 Name.startswith("avx512.mask.vextract"))) {
1305 Value *Op0 = CI->getArgOperand(0);
1306 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1307 unsigned DstNumElts = CI->getType()->getVectorNumElements();
1308 unsigned SrcNumElts = Op0->getType()->getVectorNumElements();
1309 unsigned Scale = SrcNumElts / DstNumElts;
1311 // Mask off the high bits of the immediate value; hardware ignores those.
1314 // Get indexes for the subvector of the input vector.
1315 SmallVector<uint32_t, 8> Idxs(DstNumElts);
1316 for (unsigned i = 0; i != DstNumElts; ++i) {
1317 Idxs[i] = i + (Imm * DstNumElts);
1319 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1321 // If the intrinsic has a mask operand, handle that.
1322 if (CI->getNumArgOperands() == 4)
1323 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1324 CI->getArgOperand(2));
1325 } else if (!IsX86 && Name == "stackprotectorcheck") {
1327 } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
1328 Name.startswith("avx512.mask.perm.di."))) {
1329 Value *Op0 = CI->getArgOperand(0);
1330 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1331 VectorType *VecTy = cast<VectorType>(CI->getType());
1332 unsigned NumElts = VecTy->getNumElements();
1334 SmallVector<uint32_t, 8> Idxs(NumElts);
1335 for (unsigned i = 0; i != NumElts; ++i)
1336 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
1338 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1340 if (CI->getNumArgOperands() == 4)
1341 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1342 CI->getArgOperand(2));
1343 } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
1344 Name == "sse2.pshuf.d" ||
1345 Name.startswith("avx512.mask.vpermil.p") ||
1346 Name.startswith("avx512.mask.pshuf.d."))) {
1347 Value *Op0 = CI->getArgOperand(0);
1348 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1349 VectorType *VecTy = cast<VectorType>(CI->getType());
1350 unsigned NumElts = VecTy->getNumElements();
1351 // Calculate the size of each index in the immediate.
1352 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
1353 unsigned IdxMask = ((1 << IdxSize) - 1);
1355 SmallVector<uint32_t, 8> Idxs(NumElts);
1356 // Lookup the bits for this element, wrapping around the immediate every
1357 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
1358 // to offset by the first index of each group.
1359 for (unsigned i = 0; i != NumElts; ++i)
1360 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
1362 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1364 if (CI->getNumArgOperands() == 4)
1365 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1366 CI->getArgOperand(2));
1367 } else if (IsX86 && (Name == "sse2.pshufl.w" ||
1368 Name.startswith("avx512.mask.pshufl.w."))) {
1369 Value *Op0 = CI->getArgOperand(0);
1370 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1371 unsigned NumElts = CI->getType()->getVectorNumElements();
1373 SmallVector<uint32_t, 16> Idxs(NumElts);
1374 for (unsigned l = 0; l != NumElts; l += 8) {
1375 for (unsigned i = 0; i != 4; ++i)
1376 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
1377 for (unsigned i = 4; i != 8; ++i)
1378 Idxs[i + l] = i + l;
1381 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1383 if (CI->getNumArgOperands() == 4)
1384 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1385 CI->getArgOperand(2));
1386 } else if (IsX86 && (Name == "sse2.pshufh.w" ||
1387 Name.startswith("avx512.mask.pshufh.w."))) {
1388 Value *Op0 = CI->getArgOperand(0);
1389 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1390 unsigned NumElts = CI->getType()->getVectorNumElements();
1392 SmallVector<uint32_t, 16> Idxs(NumElts);
1393 for (unsigned l = 0; l != NumElts; l += 8) {
1394 for (unsigned i = 0; i != 4; ++i)
1395 Idxs[i + l] = i + l;
1396 for (unsigned i = 0; i != 4; ++i)
1397 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
1400 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1402 if (CI->getNumArgOperands() == 4)
1403 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1404 CI->getArgOperand(2));
1405 } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
1406 Value *Op0 = CI->getArgOperand(0);
1407 Value *Op1 = CI->getArgOperand(1);
1408 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1409 unsigned NumElts = CI->getType()->getVectorNumElements();
1411 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1412 unsigned HalfLaneElts = NumLaneElts / 2;
1414 SmallVector<uint32_t, 16> Idxs(NumElts);
1415 for (unsigned i = 0; i != NumElts; ++i) {
1416 // Base index is the starting element of the lane.
1417 Idxs[i] = i - (i % NumLaneElts);
1418 // If we are half way through the lane switch to the other source.
1419 if ((i % NumLaneElts) >= HalfLaneElts)
1421 // Now select the specific element. By adding HalfLaneElts bits from
1422 // the immediate. Wrapping around the immediate every 8-bits.
1423 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
1426 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1428 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
1429 CI->getArgOperand(3));
1430 } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
1431 Name.startswith("avx512.mask.movshdup") ||
1432 Name.startswith("avx512.mask.movsldup"))) {
1433 Value *Op0 = CI->getArgOperand(0);
1434 unsigned NumElts = CI->getType()->getVectorNumElements();
1435 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1437 unsigned Offset = 0;
1438 if (Name.startswith("avx512.mask.movshdup."))
1441 SmallVector<uint32_t, 16> Idxs(NumElts);
1442 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
1443 for (unsigned i = 0; i != NumLaneElts; i += 2) {
1444 Idxs[i + l + 0] = i + l + Offset;
1445 Idxs[i + l + 1] = i + l + Offset;
1448 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1450 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1451 CI->getArgOperand(1));
1452 } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
1453 Name.startswith("avx512.mask.unpckl."))) {
1454 Value *Op0 = CI->getArgOperand(0);
1455 Value *Op1 = CI->getArgOperand(1);
1456 int NumElts = CI->getType()->getVectorNumElements();
1457 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1459 SmallVector<uint32_t, 64> Idxs(NumElts);
1460 for (int l = 0; l != NumElts; l += NumLaneElts)
1461 for (int i = 0; i != NumLaneElts; ++i)
1462 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
1464 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1466 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1467 CI->getArgOperand(2));
1468 } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
1469 Name.startswith("avx512.mask.unpckh."))) {
1470 Value *Op0 = CI->getArgOperand(0);
1471 Value *Op1 = CI->getArgOperand(1);
1472 int NumElts = CI->getType()->getVectorNumElements();
1473 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1475 SmallVector<uint32_t, 64> Idxs(NumElts);
1476 for (int l = 0; l != NumElts; l += NumLaneElts)
1477 for (int i = 0; i != NumLaneElts; ++i)
1478 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
1480 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1482 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1483 CI->getArgOperand(2));
1484 } else if (IsX86 && Name.startswith("avx512.mask.pand.")) {
1485 Rep = Builder.CreateAnd(CI->getArgOperand(0), CI->getArgOperand(1));
1486 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1487 CI->getArgOperand(2));
1488 } else if (IsX86 && Name.startswith("avx512.mask.pandn.")) {
1489 Rep = Builder.CreateAnd(Builder.CreateNot(CI->getArgOperand(0)),
1490 CI->getArgOperand(1));
1491 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1492 CI->getArgOperand(2));
1493 } else if (IsX86 && Name.startswith("avx512.mask.por.")) {
1494 Rep = Builder.CreateOr(CI->getArgOperand(0), CI->getArgOperand(1));
1495 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1496 CI->getArgOperand(2));
1497 } else if (IsX86 && Name.startswith("avx512.mask.pxor.")) {
1498 Rep = Builder.CreateXor(CI->getArgOperand(0), CI->getArgOperand(1));
1499 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1500 CI->getArgOperand(2));
1501 } else if (IsX86 && Name.startswith("avx512.mask.and.")) {
1502 VectorType *FTy = cast<VectorType>(CI->getType());
1503 VectorType *ITy = VectorType::getInteger(FTy);
1504 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
1505 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1506 Rep = Builder.CreateBitCast(Rep, FTy);
1507 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1508 CI->getArgOperand(2));
1509 } else if (IsX86 && Name.startswith("avx512.mask.andn.")) {
1510 VectorType *FTy = cast<VectorType>(CI->getType());
1511 VectorType *ITy = VectorType::getInteger(FTy);
1512 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
1513 Rep = Builder.CreateAnd(Rep,
1514 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1515 Rep = Builder.CreateBitCast(Rep, FTy);
1516 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1517 CI->getArgOperand(2));
1518 } else if (IsX86 && Name.startswith("avx512.mask.or.")) {
1519 VectorType *FTy = cast<VectorType>(CI->getType());
1520 VectorType *ITy = VectorType::getInteger(FTy);
1521 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
1522 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1523 Rep = Builder.CreateBitCast(Rep, FTy);
1524 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1525 CI->getArgOperand(2));
1526 } else if (IsX86 && Name.startswith("avx512.mask.xor.")) {
1527 VectorType *FTy = cast<VectorType>(CI->getType());
1528 VectorType *ITy = VectorType::getInteger(FTy);
1529 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
1530 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1531 Rep = Builder.CreateBitCast(Rep, FTy);
1532 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1533 CI->getArgOperand(2));
1534 } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
1535 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
1536 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1537 CI->getArgOperand(2));
1538 } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
1539 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
1540 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1541 CI->getArgOperand(2));
1542 } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
1543 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
1544 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1545 CI->getArgOperand(2));
1546 } else if (IsX86 && (Name.startswith("avx512.mask.add.p"))) {
1547 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
1548 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1549 CI->getArgOperand(2));
1550 } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
1551 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
1552 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1553 CI->getArgOperand(2));
1554 } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
1555 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
1556 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1557 CI->getArgOperand(2));
1558 } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
1559 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
1560 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1561 CI->getArgOperand(2));
1562 } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
1563 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1566 { CI->getArgOperand(0), Builder.getInt1(false) });
1567 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1568 CI->getArgOperand(1));
1569 } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
1570 Name.startswith("avx512.mask.min.p"))) {
1571 bool IsMin = Name[13] == 'i';
1572 VectorType *VecTy = cast<VectorType>(CI->getType());
1573 unsigned VecWidth = VecTy->getPrimitiveSizeInBits();
1574 unsigned EltWidth = VecTy->getScalarSizeInBits();
1576 if (!IsMin && VecWidth == 128 && EltWidth == 32)
1577 IID = Intrinsic::x86_sse_max_ps;
1578 else if (!IsMin && VecWidth == 128 && EltWidth == 64)
1579 IID = Intrinsic::x86_sse2_max_pd;
1580 else if (!IsMin && VecWidth == 256 && EltWidth == 32)
1581 IID = Intrinsic::x86_avx_max_ps_256;
1582 else if (!IsMin && VecWidth == 256 && EltWidth == 64)
1583 IID = Intrinsic::x86_avx_max_pd_256;
1584 else if (IsMin && VecWidth == 128 && EltWidth == 32)
1585 IID = Intrinsic::x86_sse_min_ps;
1586 else if (IsMin && VecWidth == 128 && EltWidth == 64)
1587 IID = Intrinsic::x86_sse2_min_pd;
1588 else if (IsMin && VecWidth == 256 && EltWidth == 32)
1589 IID = Intrinsic::x86_avx_min_ps_256;
1590 else if (IsMin && VecWidth == 256 && EltWidth == 64)
1591 IID = Intrinsic::x86_avx_min_pd_256;
1593 llvm_unreachable("Unexpected intrinsic");
1595 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1596 { CI->getArgOperand(0), CI->getArgOperand(1) });
1597 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1598 CI->getArgOperand(2));
1599 } else if (IsX86 && Name.startswith("avx512.mask.pshuf.b.")) {
1600 VectorType *VecTy = cast<VectorType>(CI->getType());
1602 if (VecTy->getPrimitiveSizeInBits() == 128)
1603 IID = Intrinsic::x86_ssse3_pshuf_b_128;
1604 else if (VecTy->getPrimitiveSizeInBits() == 256)
1605 IID = Intrinsic::x86_avx2_pshuf_b;
1606 else if (VecTy->getPrimitiveSizeInBits() == 512)
1607 IID = Intrinsic::x86_avx512_pshuf_b_512;
1609 llvm_unreachable("Unexpected intrinsic");
1611 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1612 { CI->getArgOperand(0), CI->getArgOperand(1) });
1613 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1614 CI->getArgOperand(2));
1615 } else if (IsX86 && (Name.startswith("avx512.mask.pmul.dq.") ||
1616 Name.startswith("avx512.mask.pmulu.dq."))) {
1617 bool IsUnsigned = Name[16] == 'u';
1618 VectorType *VecTy = cast<VectorType>(CI->getType());
1620 if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 128)
1621 IID = Intrinsic::x86_sse41_pmuldq;
1622 else if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 256)
1623 IID = Intrinsic::x86_avx2_pmul_dq;
1624 else if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 512)
1625 IID = Intrinsic::x86_avx512_pmul_dq_512;
1626 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 128)
1627 IID = Intrinsic::x86_sse2_pmulu_dq;
1628 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 256)
1629 IID = Intrinsic::x86_avx2_pmulu_dq;
1630 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 512)
1631 IID = Intrinsic::x86_avx512_pmulu_dq_512;
1633 llvm_unreachable("Unexpected intrinsic");
1635 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1636 { CI->getArgOperand(0), CI->getArgOperand(1) });
1637 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1638 CI->getArgOperand(2));
1639 } else if (IsX86 && Name.startswith("avx512.mask.pack")) {
1640 bool IsUnsigned = Name[16] == 'u';
1641 bool IsDW = Name[18] == 'd';
1642 VectorType *VecTy = cast<VectorType>(CI->getType());
1644 if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 128)
1645 IID = Intrinsic::x86_sse2_packsswb_128;
1646 else if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 256)
1647 IID = Intrinsic::x86_avx2_packsswb;
1648 else if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 512)
1649 IID = Intrinsic::x86_avx512_packsswb_512;
1650 else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 128)
1651 IID = Intrinsic::x86_sse2_packssdw_128;
1652 else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 256)
1653 IID = Intrinsic::x86_avx2_packssdw;
1654 else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 512)
1655 IID = Intrinsic::x86_avx512_packssdw_512;
1656 else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 128)
1657 IID = Intrinsic::x86_sse2_packuswb_128;
1658 else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 256)
1659 IID = Intrinsic::x86_avx2_packuswb;
1660 else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 512)
1661 IID = Intrinsic::x86_avx512_packuswb_512;
1662 else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 128)
1663 IID = Intrinsic::x86_sse41_packusdw;
1664 else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 256)
1665 IID = Intrinsic::x86_avx2_packusdw;
1666 else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 512)
1667 IID = Intrinsic::x86_avx512_packusdw_512;
1669 llvm_unreachable("Unexpected intrinsic");
1671 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1672 { CI->getArgOperand(0), CI->getArgOperand(1) });
1673 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1674 CI->getArgOperand(2));
1675 } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
1676 bool IsImmediate = Name[16] == 'i' ||
1677 (Name.size() > 18 && Name[18] == 'i');
1678 bool IsVariable = Name[16] == 'v';
1679 char Size = Name[16] == '.' ? Name[17] :
1680 Name[17] == '.' ? Name[18] :
1681 Name[18] == '.' ? Name[19] :
1685 if (IsVariable && Name[17] != '.') {
1686 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
1687 IID = Intrinsic::x86_avx2_psllv_q;
1688 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
1689 IID = Intrinsic::x86_avx2_psllv_q_256;
1690 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
1691 IID = Intrinsic::x86_avx2_psllv_d;
1692 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
1693 IID = Intrinsic::x86_avx2_psllv_d_256;
1694 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
1695 IID = Intrinsic::x86_avx512_psllv_w_128;
1696 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
1697 IID = Intrinsic::x86_avx512_psllv_w_256;
1698 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
1699 IID = Intrinsic::x86_avx512_psllv_w_512;
1701 llvm_unreachable("Unexpected size");
1702 } else if (Name.endswith(".128")) {
1703 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
1704 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
1705 : Intrinsic::x86_sse2_psll_d;
1706 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
1707 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
1708 : Intrinsic::x86_sse2_psll_q;
1709 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
1710 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
1711 : Intrinsic::x86_sse2_psll_w;
1713 llvm_unreachable("Unexpected size");
1714 } else if (Name.endswith(".256")) {
1715 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
1716 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
1717 : Intrinsic::x86_avx2_psll_d;
1718 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
1719 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
1720 : Intrinsic::x86_avx2_psll_q;
1721 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
1722 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
1723 : Intrinsic::x86_avx2_psll_w;
1725 llvm_unreachable("Unexpected size");
1727 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
1728 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
1729 IsVariable ? Intrinsic::x86_avx512_psllv_d_512 :
1730 Intrinsic::x86_avx512_psll_d_512;
1731 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
1732 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
1733 IsVariable ? Intrinsic::x86_avx512_psllv_q_512 :
1734 Intrinsic::x86_avx512_psll_q_512;
1735 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
1736 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
1737 : Intrinsic::x86_avx512_psll_w_512;
1739 llvm_unreachable("Unexpected size");
1742 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
1743 } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
1744 bool IsImmediate = Name[16] == 'i' ||
1745 (Name.size() > 18 && Name[18] == 'i');
1746 bool IsVariable = Name[16] == 'v';
1747 char Size = Name[16] == '.' ? Name[17] :
1748 Name[17] == '.' ? Name[18] :
1749 Name[18] == '.' ? Name[19] :
1753 if (IsVariable && Name[17] != '.') {
1754 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
1755 IID = Intrinsic::x86_avx2_psrlv_q;
1756 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
1757 IID = Intrinsic::x86_avx2_psrlv_q_256;
1758 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
1759 IID = Intrinsic::x86_avx2_psrlv_d;
1760 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
1761 IID = Intrinsic::x86_avx2_psrlv_d_256;
1762 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
1763 IID = Intrinsic::x86_avx512_psrlv_w_128;
1764 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
1765 IID = Intrinsic::x86_avx512_psrlv_w_256;
1766 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
1767 IID = Intrinsic::x86_avx512_psrlv_w_512;
1769 llvm_unreachable("Unexpected size");
1770 } else if (Name.endswith(".128")) {
1771 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
1772 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
1773 : Intrinsic::x86_sse2_psrl_d;
1774 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
1775 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
1776 : Intrinsic::x86_sse2_psrl_q;
1777 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
1778 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
1779 : Intrinsic::x86_sse2_psrl_w;
1781 llvm_unreachable("Unexpected size");
1782 } else if (Name.endswith(".256")) {
1783 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
1784 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
1785 : Intrinsic::x86_avx2_psrl_d;
1786 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
1787 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
1788 : Intrinsic::x86_avx2_psrl_q;
1789 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
1790 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
1791 : Intrinsic::x86_avx2_psrl_w;
1793 llvm_unreachable("Unexpected size");
1795 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
1796 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
1797 IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 :
1798 Intrinsic::x86_avx512_psrl_d_512;
1799 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
1800 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
1801 IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 :
1802 Intrinsic::x86_avx512_psrl_q_512;
1803 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
1804 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
1805 : Intrinsic::x86_avx512_psrl_w_512;
1807 llvm_unreachable("Unexpected size");
1810 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
1811 } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
1812 bool IsImmediate = Name[16] == 'i' ||
1813 (Name.size() > 18 && Name[18] == 'i');
1814 bool IsVariable = Name[16] == 'v';
1815 char Size = Name[16] == '.' ? Name[17] :
1816 Name[17] == '.' ? Name[18] :
1817 Name[18] == '.' ? Name[19] :
1821 if (IsVariable && Name[17] != '.') {
1822 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
1823 IID = Intrinsic::x86_avx2_psrav_d;
1824 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
1825 IID = Intrinsic::x86_avx2_psrav_d_256;
1826 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
1827 IID = Intrinsic::x86_avx512_psrav_w_128;
1828 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
1829 IID = Intrinsic::x86_avx512_psrav_w_256;
1830 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
1831 IID = Intrinsic::x86_avx512_psrav_w_512;
1833 llvm_unreachable("Unexpected size");
1834 } else if (Name.endswith(".128")) {
1835 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
1836 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
1837 : Intrinsic::x86_sse2_psra_d;
1838 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
1839 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
1840 IsVariable ? Intrinsic::x86_avx512_psrav_q_128 :
1841 Intrinsic::x86_avx512_psra_q_128;
1842 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
1843 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
1844 : Intrinsic::x86_sse2_psra_w;
1846 llvm_unreachable("Unexpected size");
1847 } else if (Name.endswith(".256")) {
1848 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
1849 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
1850 : Intrinsic::x86_avx2_psra_d;
1851 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
1852 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
1853 IsVariable ? Intrinsic::x86_avx512_psrav_q_256 :
1854 Intrinsic::x86_avx512_psra_q_256;
1855 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
1856 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
1857 : Intrinsic::x86_avx2_psra_w;
1859 llvm_unreachable("Unexpected size");
1861 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
1862 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
1863 IsVariable ? Intrinsic::x86_avx512_psrav_d_512 :
1864 Intrinsic::x86_avx512_psra_d_512;
1865 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
1866 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
1867 IsVariable ? Intrinsic::x86_avx512_psrav_q_512 :
1868 Intrinsic::x86_avx512_psra_q_512;
1869 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
1870 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
1871 : Intrinsic::x86_avx512_psra_w_512;
1873 llvm_unreachable("Unexpected size");
1876 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
1877 } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
1878 Rep = upgradeMaskedMove(Builder, *CI);
1879 } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
1880 Rep = UpgradeMaskToInt(Builder, *CI);
1881 } else if (IsX86 && Name.startswith("avx512.mask.vpermilvar.")) {
1883 if (Name.endswith("ps.128"))
1884 IID = Intrinsic::x86_avx_vpermilvar_ps;
1885 else if (Name.endswith("pd.128"))
1886 IID = Intrinsic::x86_avx_vpermilvar_pd;
1887 else if (Name.endswith("ps.256"))
1888 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
1889 else if (Name.endswith("pd.256"))
1890 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
1891 else if (Name.endswith("ps.512"))
1892 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
1893 else if (Name.endswith("pd.512"))
1894 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
1896 llvm_unreachable("Unexpected vpermilvar intrinsic");
1898 Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID);
1899 Rep = Builder.CreateCall(Intrin,
1900 { CI->getArgOperand(0), CI->getArgOperand(1) });
1901 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1902 CI->getArgOperand(2));
1903 } else if (IsX86 && Name.endswith(".movntdqa")) {
1904 Module *M = F->getParent();
1905 MDNode *Node = MDNode::get(
1906 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1908 Value *Ptr = CI->getArgOperand(0);
1909 VectorType *VTy = cast<VectorType>(CI->getType());
1911 // Convert the type of the pointer to a pointer to the stored type.
1913 Builder.CreateBitCast(Ptr, PointerType::getUnqual(VTy), "cast");
1914 LoadInst *LI = Builder.CreateAlignedLoad(BC, VTy->getBitWidth() / 8);
1915 LI->setMetadata(M->getMDKindID("nontemporal"), Node);
1917 } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
1918 Value *Arg = CI->getArgOperand(0);
1919 Value *Neg = Builder.CreateNeg(Arg, "neg");
1920 Value *Cmp = Builder.CreateICmpSGE(
1921 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
1922 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
1923 } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
1924 Name == "max.ui" || Name == "max.ull")) {
1925 Value *Arg0 = CI->getArgOperand(0);
1926 Value *Arg1 = CI->getArgOperand(1);
1927 Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
1928 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
1929 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
1930 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
1931 } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
1932 Name == "min.ui" || Name == "min.ull")) {
1933 Value *Arg0 = CI->getArgOperand(0);
1934 Value *Arg1 = CI->getArgOperand(1);
1935 Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
1936 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
1937 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
1938 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
1939 } else if (IsNVVM && Name == "clz.ll") {
1940 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
1941 Value *Arg = CI->getArgOperand(0);
1942 Value *Ctlz = Builder.CreateCall(
1943 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
1945 {Arg, Builder.getFalse()}, "ctlz");
1946 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
1947 } else if (IsNVVM && Name == "popc.ll") {
1948 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
1950 Value *Arg = CI->getArgOperand(0);
1951 Value *Popc = Builder.CreateCall(
1952 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
1955 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
1956 } else if (IsNVVM && Name == "h2f") {
1957 Rep = Builder.CreateCall(Intrinsic::getDeclaration(
1958 F->getParent(), Intrinsic::convert_from_fp16,
1959 {Builder.getFloatTy()}),
1960 CI->getArgOperand(0), "h2f");
1962 llvm_unreachable("Unknown function for CallInst upgrade.");
1966 CI->replaceAllUsesWith(Rep);
1967 CI->eraseFromParent();
1971 CallInst *NewCall = nullptr;
1972 switch (NewFn->getIntrinsicID()) {
1974 // Handle generic mangling change, but nothing else
1976 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
1977 "Unknown function for CallInst upgrade and isn't just a name change");
1978 CI->setCalledFunction(NewFn);
1982 case Intrinsic::arm_neon_vld1:
1983 case Intrinsic::arm_neon_vld2:
1984 case Intrinsic::arm_neon_vld3:
1985 case Intrinsic::arm_neon_vld4:
1986 case Intrinsic::arm_neon_vld2lane:
1987 case Intrinsic::arm_neon_vld3lane:
1988 case Intrinsic::arm_neon_vld4lane:
1989 case Intrinsic::arm_neon_vst1:
1990 case Intrinsic::arm_neon_vst2:
1991 case Intrinsic::arm_neon_vst3:
1992 case Intrinsic::arm_neon_vst4:
1993 case Intrinsic::arm_neon_vst2lane:
1994 case Intrinsic::arm_neon_vst3lane:
1995 case Intrinsic::arm_neon_vst4lane: {
1996 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
1997 CI->arg_operands().end());
1998 NewCall = Builder.CreateCall(NewFn, Args);
2002 case Intrinsic::bitreverse:
2003 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
2006 case Intrinsic::ctlz:
2007 case Intrinsic::cttz:
2008 assert(CI->getNumArgOperands() == 1 &&
2009 "Mismatch between function args and call args");
2011 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
2014 case Intrinsic::objectsize: {
2015 Value *NullIsUnknownSize = CI->getNumArgOperands() == 2
2016 ? Builder.getFalse()
2017 : CI->getArgOperand(2);
2018 NewCall = Builder.CreateCall(
2019 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize});
2023 case Intrinsic::ctpop:
2024 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
2027 case Intrinsic::convert_from_fp16:
2028 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
2031 case Intrinsic::x86_xop_vfrcz_ss:
2032 case Intrinsic::x86_xop_vfrcz_sd:
2033 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
2036 case Intrinsic::x86_xop_vpermil2pd:
2037 case Intrinsic::x86_xop_vpermil2ps:
2038 case Intrinsic::x86_xop_vpermil2pd_256:
2039 case Intrinsic::x86_xop_vpermil2ps_256: {
2040 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
2041 CI->arg_operands().end());
2042 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
2043 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
2044 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
2045 NewCall = Builder.CreateCall(NewFn, Args);
2049 case Intrinsic::x86_sse41_ptestc:
2050 case Intrinsic::x86_sse41_ptestz:
2051 case Intrinsic::x86_sse41_ptestnzc: {
2052 // The arguments for these intrinsics used to be v4f32, and changed
2053 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
2054 // So, the only thing required is a bitcast for both arguments.
2055 // First, check the arguments have the old type.
2056 Value *Arg0 = CI->getArgOperand(0);
2057 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
2060 // Old intrinsic, add bitcasts
2061 Value *Arg1 = CI->getArgOperand(1);
2063 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
2065 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
2066 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2068 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
2072 case Intrinsic::x86_sse41_insertps:
2073 case Intrinsic::x86_sse41_dppd:
2074 case Intrinsic::x86_sse41_dpps:
2075 case Intrinsic::x86_sse41_mpsadbw:
2076 case Intrinsic::x86_avx_dp_ps_256:
2077 case Intrinsic::x86_avx2_mpsadbw: {
2078 // Need to truncate the last argument from i32 to i8 -- this argument models
2079 // an inherently 8-bit immediate operand to these x86 instructions.
2080 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
2081 CI->arg_operands().end());
2083 // Replace the last argument with a trunc.
2084 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
2085 NewCall = Builder.CreateCall(NewFn, Args);
2089 case Intrinsic::thread_pointer: {
2090 NewCall = Builder.CreateCall(NewFn, {});
2094 case Intrinsic::invariant_start:
2095 case Intrinsic::invariant_end:
2096 case Intrinsic::masked_load:
2097 case Intrinsic::masked_store:
2098 case Intrinsic::masked_gather:
2099 case Intrinsic::masked_scatter: {
2100 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
2101 CI->arg_operands().end());
2102 NewCall = Builder.CreateCall(NewFn, Args);
2106 assert(NewCall && "Should have either set this variable or returned through "
2107 "the default case");
2108 std::string Name = CI->getName();
2109 if (!Name.empty()) {
2110 CI->setName(Name + ".old");
2111 NewCall->setName(Name);
2113 CI->replaceAllUsesWith(NewCall);
2114 CI->eraseFromParent();
2117 void llvm::UpgradeCallsToIntrinsic(Function *F) {
2118 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
2120 // Check if this function should be upgraded and get the replacement function
2123 if (UpgradeIntrinsicFunction(F, NewFn)) {
2124 // Replace all users of the old function with the new function or new
2125 // instructions. This is not a range loop because the call is deleted.
2126 for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; )
2127 if (CallInst *CI = dyn_cast<CallInst>(*UI++))
2128 UpgradeIntrinsicCall(CI, NewFn);
2130 // Remove old function, no longer used, from the module.
2131 F->eraseFromParent();
2135 MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
2136 // Check if the tag uses struct-path aware TBAA format.
2137 if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
2140 auto &Context = MD.getContext();
2141 if (MD.getNumOperands() == 3) {
2142 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
2143 MDNode *ScalarType = MDNode::get(Context, Elts);
2144 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
2145 Metadata *Elts2[] = {ScalarType, ScalarType,
2146 ConstantAsMetadata::get(
2147 Constant::getNullValue(Type::getInt64Ty(Context))),
2149 return MDNode::get(Context, Elts2);
2151 // Create a MDNode <MD, MD, offset 0>
2152 Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
2153 Type::getInt64Ty(Context)))};
2154 return MDNode::get(Context, Elts);
2157 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
2158 Instruction *&Temp) {
2159 if (Opc != Instruction::BitCast)
2163 Type *SrcTy = V->getType();
2164 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
2165 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
2166 LLVMContext &Context = V->getContext();
2168 // We have no information about target data layout, so we assume that
2169 // the maximum pointer size is 64bit.
2170 Type *MidTy = Type::getInt64Ty(Context);
2171 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
2173 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
2179 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
2180 if (Opc != Instruction::BitCast)
2183 Type *SrcTy = C->getType();
2184 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
2185 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
2186 LLVMContext &Context = C->getContext();
2188 // We have no information about target data layout, so we assume that
2189 // the maximum pointer size is 64bit.
2190 Type *MidTy = Type::getInt64Ty(Context);
2192 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
2199 /// Check the debug info version number, if it is out-dated, drop the debug
2200 /// info. Return true if module is modified.
2201 bool llvm::UpgradeDebugInfo(Module &M) {
2202 unsigned Version = getDebugMetadataVersionFromModule(M);
2203 if (Version == DEBUG_METADATA_VERSION)
2206 bool RetCode = StripDebugInfo(M);
2208 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
2209 M.getContext().diagnose(DiagVersion);
2214 bool llvm::UpgradeModuleFlags(Module &M) {
2215 const NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
2219 bool HasObjCFlag = false, HasClassProperties = false;
2220 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
2221 MDNode *Op = ModFlags->getOperand(I);
2222 if (Op->getNumOperands() < 2)
2224 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
2227 if (ID->getString() == "Objective-C Image Info Version")
2229 if (ID->getString() == "Objective-C Class Properties")
2230 HasClassProperties = true;
2232 // "Objective-C Class Properties" is recently added for Objective-C. We
2233 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
2234 // flag of value 0, so we can correclty downgrade this flag when trying to
2235 // link an ObjC bitcode without this module flag with an ObjC bitcode with
2236 // this module flag.
2237 if (HasObjCFlag && !HasClassProperties) {
2238 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
2245 static bool isOldLoopArgument(Metadata *MD) {
2246 auto *T = dyn_cast_or_null<MDTuple>(MD);
2249 if (T->getNumOperands() < 1)
2251 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
2254 return S->getString().startswith("llvm.vectorizer.");
2257 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
2258 StringRef OldPrefix = "llvm.vectorizer.";
2259 assert(OldTag.startswith(OldPrefix) && "Expected old prefix");
2261 if (OldTag == "llvm.vectorizer.unroll")
2262 return MDString::get(C, "llvm.loop.interleave.count");
2264 return MDString::get(
2265 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
2269 static Metadata *upgradeLoopArgument(Metadata *MD) {
2270 auto *T = dyn_cast_or_null<MDTuple>(MD);
2273 if (T->getNumOperands() < 1)
2275 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
2278 if (!OldTag->getString().startswith("llvm.vectorizer."))
2281 // This has an old tag. Upgrade it.
2282 SmallVector<Metadata *, 8> Ops;
2283 Ops.reserve(T->getNumOperands());
2284 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
2285 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
2286 Ops.push_back(T->getOperand(I));
2288 return MDTuple::get(T->getContext(), Ops);
2291 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
2292 auto *T = dyn_cast<MDTuple>(&N);
2296 if (none_of(T->operands(), isOldLoopArgument))
2299 SmallVector<Metadata *, 8> Ops;
2300 Ops.reserve(T->getNumOperands());
2301 for (Metadata *MD : T->operands())
2302 Ops.push_back(upgradeLoopArgument(MD));
2304 return MDTuple::get(T->getContext(), Ops);