1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the auto-upgrade helper functions.
11 // This is where deprecated IR intrinsics and other IR features are updated to
12 // current specifications.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/IR/AutoUpgrade.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/IR/Constants.h"
19 #include "llvm/IR/DIBuilder.h"
20 #include "llvm/IR/DebugInfo.h"
21 #include "llvm/IR/DiagnosticInfo.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/IRBuilder.h"
24 #include "llvm/IR/Instruction.h"
25 #include "llvm/IR/LLVMContext.h"
26 #include "llvm/IR/Module.h"
27 #include "llvm/IR/Verifier.h"
28 #include "llvm/Support/ErrorHandling.h"
29 #include "llvm/Support/Regex.h"
33 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
35 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
36 // changed their type from v4f32 to v2i64.
37 static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
39 // Check whether this is an old version of the function, which received
41 Type *Arg0Type = F->getFunctionType()->getParamType(0);
42 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
45 // Yes, it's old, replace it with new version.
47 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
51 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
52 // arguments have changed their type from i32 to i8.
53 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
55 // Check that the last argument is an i32.
56 Type *LastArgType = F->getFunctionType()->getParamType(
57 F->getFunctionType()->getNumParams() - 1);
58 if (!LastArgType->isIntegerTy(32))
61 // Move this function aside and map down.
63 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
67 static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
68 // All of the intrinsics matches below should be marked with which llvm
69 // version started autoupgrading them. At some point in the future we would
70 // like to use this information to remove upgrade code for some older
71 // intrinsics. It is currently undecided how we will determine that future
73 if (Name=="ssse3.pabs.b.128" || // Added in 6.0
74 Name=="ssse3.pabs.w.128" || // Added in 6.0
75 Name=="ssse3.pabs.d.128" || // Added in 6.0
76 Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
77 Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
78 Name.startswith("avx512.kunpck") || //added in 6.0
79 Name.startswith("avx2.pabs.") || // Added in 6.0
80 Name.startswith("avx512.mask.pabs.") || // Added in 6.0
81 Name.startswith("avx512.broadcastm") || // Added in 6.0
82 Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0
83 Name.startswith("sse2.pcmpeq.") || // Added in 3.1
84 Name.startswith("sse2.pcmpgt.") || // Added in 3.1
85 Name.startswith("avx2.pcmpeq.") || // Added in 3.1
86 Name.startswith("avx2.pcmpgt.") || // Added in 3.1
87 Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
88 Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
89 Name.startswith("avx.vperm2f128.") || // Added in 6.0
90 Name == "avx2.vperm2i128" || // Added in 6.0
91 Name == "sse.add.ss" || // Added in 4.0
92 Name == "sse2.add.sd" || // Added in 4.0
93 Name == "sse.sub.ss" || // Added in 4.0
94 Name == "sse2.sub.sd" || // Added in 4.0
95 Name == "sse.mul.ss" || // Added in 4.0
96 Name == "sse2.mul.sd" || // Added in 4.0
97 Name == "sse.div.ss" || // Added in 4.0
98 Name == "sse2.div.sd" || // Added in 4.0
99 Name == "sse41.pmaxsb" || // Added in 3.9
100 Name == "sse2.pmaxs.w" || // Added in 3.9
101 Name == "sse41.pmaxsd" || // Added in 3.9
102 Name == "sse2.pmaxu.b" || // Added in 3.9
103 Name == "sse41.pmaxuw" || // Added in 3.9
104 Name == "sse41.pmaxud" || // Added in 3.9
105 Name == "sse41.pminsb" || // Added in 3.9
106 Name == "sse2.pmins.w" || // Added in 3.9
107 Name == "sse41.pminsd" || // Added in 3.9
108 Name == "sse2.pminu.b" || // Added in 3.9
109 Name == "sse41.pminuw" || // Added in 3.9
110 Name == "sse41.pminud" || // Added in 3.9
111 Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
112 Name.startswith("avx2.pmax") || // Added in 3.9
113 Name.startswith("avx2.pmin") || // Added in 3.9
114 Name.startswith("avx512.mask.pmax") || // Added in 4.0
115 Name.startswith("avx512.mask.pmin") || // Added in 4.0
116 Name.startswith("avx2.vbroadcast") || // Added in 3.8
117 Name.startswith("avx2.pbroadcast") || // Added in 3.8
118 Name.startswith("avx.vpermil.") || // Added in 3.1
119 Name.startswith("sse2.pshuf") || // Added in 3.9
120 Name.startswith("avx512.pbroadcast") || // Added in 3.9
121 Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
122 Name.startswith("avx512.mask.movddup") || // Added in 3.9
123 Name.startswith("avx512.mask.movshdup") || // Added in 3.9
124 Name.startswith("avx512.mask.movsldup") || // Added in 3.9
125 Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
126 Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
127 Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
128 Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
129 Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
130 Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
131 Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
132 Name.startswith("avx512.mask.punpckl") || // Added in 3.9
133 Name.startswith("avx512.mask.punpckh") || // Added in 3.9
134 Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
135 Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
136 Name.startswith("avx512.mask.pand.") || // Added in 3.9
137 Name.startswith("avx512.mask.pandn.") || // Added in 3.9
138 Name.startswith("avx512.mask.por.") || // Added in 3.9
139 Name.startswith("avx512.mask.pxor.") || // Added in 3.9
140 Name.startswith("avx512.mask.and.") || // Added in 3.9
141 Name.startswith("avx512.mask.andn.") || // Added in 3.9
142 Name.startswith("avx512.mask.or.") || // Added in 3.9
143 Name.startswith("avx512.mask.xor.") || // Added in 3.9
144 Name.startswith("avx512.mask.padd.") || // Added in 4.0
145 Name.startswith("avx512.mask.psub.") || // Added in 4.0
146 Name.startswith("avx512.mask.pmull.") || // Added in 4.0
147 Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
148 Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
149 Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
150 Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
151 Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
152 Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
153 Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
154 Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
155 Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
156 Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
157 Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
158 Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
159 Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
160 Name == "avx512.mask.add.pd.128" || // Added in 4.0
161 Name == "avx512.mask.add.pd.256" || // Added in 4.0
162 Name == "avx512.mask.add.ps.128" || // Added in 4.0
163 Name == "avx512.mask.add.ps.256" || // Added in 4.0
164 Name == "avx512.mask.div.pd.128" || // Added in 4.0
165 Name == "avx512.mask.div.pd.256" || // Added in 4.0
166 Name == "avx512.mask.div.ps.128" || // Added in 4.0
167 Name == "avx512.mask.div.ps.256" || // Added in 4.0
168 Name == "avx512.mask.mul.pd.128" || // Added in 4.0
169 Name == "avx512.mask.mul.pd.256" || // Added in 4.0
170 Name == "avx512.mask.mul.ps.128" || // Added in 4.0
171 Name == "avx512.mask.mul.ps.256" || // Added in 4.0
172 Name == "avx512.mask.sub.pd.128" || // Added in 4.0
173 Name == "avx512.mask.sub.pd.256" || // Added in 4.0
174 Name == "avx512.mask.sub.ps.128" || // Added in 4.0
175 Name == "avx512.mask.sub.ps.256" || // Added in 4.0
176 Name == "avx512.mask.max.pd.128" || // Added in 5.0
177 Name == "avx512.mask.max.pd.256" || // Added in 5.0
178 Name == "avx512.mask.max.ps.128" || // Added in 5.0
179 Name == "avx512.mask.max.ps.256" || // Added in 5.0
180 Name == "avx512.mask.min.pd.128" || // Added in 5.0
181 Name == "avx512.mask.min.pd.256" || // Added in 5.0
182 Name == "avx512.mask.min.ps.128" || // Added in 5.0
183 Name == "avx512.mask.min.ps.256" || // Added in 5.0
184 Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
185 Name.startswith("avx512.mask.psll.d") || // Added in 4.0
186 Name.startswith("avx512.mask.psll.q") || // Added in 4.0
187 Name.startswith("avx512.mask.psll.w") || // Added in 4.0
188 Name.startswith("avx512.mask.psra.d") || // Added in 4.0
189 Name.startswith("avx512.mask.psra.q") || // Added in 4.0
190 Name.startswith("avx512.mask.psra.w") || // Added in 4.0
191 Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
192 Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
193 Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
194 Name.startswith("avx512.mask.pslli") || // Added in 4.0
195 Name.startswith("avx512.mask.psrai") || // Added in 4.0
196 Name.startswith("avx512.mask.psrli") || // Added in 4.0
197 Name.startswith("avx512.mask.psllv") || // Added in 4.0
198 Name.startswith("avx512.mask.psrav") || // Added in 4.0
199 Name.startswith("avx512.mask.psrlv") || // Added in 4.0
200 Name.startswith("sse41.pmovsx") || // Added in 3.8
201 Name.startswith("sse41.pmovzx") || // Added in 3.9
202 Name.startswith("avx2.pmovsx") || // Added in 3.9
203 Name.startswith("avx2.pmovzx") || // Added in 3.9
204 Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
205 Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
206 Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
207 Name == "sse2.cvtdq2pd" || // Added in 3.9
208 Name == "sse2.cvtps2pd" || // Added in 3.9
209 Name == "avx.cvtdq2.pd.256" || // Added in 3.9
210 Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
211 Name.startswith("avx.vinsertf128.") || // Added in 3.7
212 Name == "avx2.vinserti128" || // Added in 3.7
213 Name.startswith("avx512.mask.insert") || // Added in 4.0
214 Name.startswith("avx.vextractf128.") || // Added in 3.7
215 Name == "avx2.vextracti128" || // Added in 3.7
216 Name.startswith("avx512.mask.vextract") || // Added in 4.0
217 Name.startswith("sse4a.movnt.") || // Added in 3.9
218 Name.startswith("avx.movnt.") || // Added in 3.2
219 Name.startswith("avx512.storent.") || // Added in 3.9
220 Name == "sse41.movntdqa" || // Added in 5.0
221 Name == "avx2.movntdqa" || // Added in 5.0
222 Name == "avx512.movntdqa" || // Added in 5.0
223 Name == "sse2.storel.dq" || // Added in 3.9
224 Name.startswith("sse.storeu.") || // Added in 3.9
225 Name.startswith("sse2.storeu.") || // Added in 3.9
226 Name.startswith("avx.storeu.") || // Added in 3.9
227 Name.startswith("avx512.mask.storeu.") || // Added in 3.9
228 Name.startswith("avx512.mask.store.p") || // Added in 3.9
229 Name.startswith("avx512.mask.store.b.") || // Added in 3.9
230 Name.startswith("avx512.mask.store.w.") || // Added in 3.9
231 Name.startswith("avx512.mask.store.d.") || // Added in 3.9
232 Name.startswith("avx512.mask.store.q.") || // Added in 3.9
233 Name.startswith("avx512.mask.loadu.") || // Added in 3.9
234 Name.startswith("avx512.mask.load.") || // Added in 3.9
235 Name == "sse42.crc32.64.8" || // Added in 3.4
236 Name.startswith("avx.vbroadcast.s") || // Added in 3.5
237 Name.startswith("avx512.mask.palignr.") || // Added in 3.9
238 Name.startswith("avx512.mask.valign.") || // Added in 4.0
239 Name.startswith("sse2.psll.dq") || // Added in 3.7
240 Name.startswith("sse2.psrl.dq") || // Added in 3.7
241 Name.startswith("avx2.psll.dq") || // Added in 3.7
242 Name.startswith("avx2.psrl.dq") || // Added in 3.7
243 Name.startswith("avx512.psll.dq") || // Added in 3.9
244 Name.startswith("avx512.psrl.dq") || // Added in 3.9
245 Name == "sse41.pblendw" || // Added in 3.7
246 Name.startswith("sse41.blendp") || // Added in 3.7
247 Name.startswith("avx.blend.p") || // Added in 3.7
248 Name == "avx2.pblendw" || // Added in 3.7
249 Name.startswith("avx2.pblendd.") || // Added in 3.7
250 Name.startswith("avx.vbroadcastf128") || // Added in 4.0
251 Name == "avx2.vbroadcasti128" || // Added in 3.7
252 Name.startswith("avx512.mask.broadcastf") || // Added in 6.0
253 Name.startswith("avx512.mask.broadcasti") || // Added in 6.0
254 Name == "xop.vpcmov" || // Added in 3.8
255 Name == "xop.vpcmov.256" || // Added in 5.0
256 Name.startswith("avx512.mask.move.s") || // Added in 4.0
257 Name.startswith("avx512.cvtmask2") || // Added in 5.0
258 (Name.startswith("xop.vpcom") && // Added in 3.2
259 F->arg_size() == 2) ||
260 Name.startswith("avx512.ptestm") || //Added in 6.0
261 Name.startswith("avx512.ptestnm") || //Added in 6.0
262 Name.startswith("sse2.pavg") || // Added in 6.0
263 Name.startswith("avx2.pavg") || // Added in 6.0
264 Name.startswith("avx512.mask.pavg")) // Added in 6.0
270 static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
272 // Only handle intrinsics that start with "x86.".
273 if (!Name.startswith("x86."))
275 // Remove "x86." prefix.
276 Name = Name.substr(4);
278 if (ShouldUpgradeX86Intrinsic(F, Name)) {
283 // SSE4.1 ptest functions may have an old signature.
284 if (Name.startswith("sse41.ptest")) { // Added in 3.2
285 if (Name.substr(11) == "c")
286 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
287 if (Name.substr(11) == "z")
288 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
289 if (Name.substr(11) == "nzc")
290 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
292 // Several blend and other instructions with masks used the wrong number of
294 if (Name == "sse41.insertps") // Added in 3.6
295 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
297 if (Name == "sse41.dppd") // Added in 3.6
298 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
300 if (Name == "sse41.dpps") // Added in 3.6
301 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
303 if (Name == "sse41.mpsadbw") // Added in 3.6
304 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
306 if (Name == "avx.dp.ps.256") // Added in 3.6
307 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
309 if (Name == "avx2.mpsadbw") // Added in 3.6
310 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
313 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
314 if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
316 NewFn = Intrinsic::getDeclaration(F->getParent(),
317 Intrinsic::x86_xop_vfrcz_ss);
320 if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
322 NewFn = Intrinsic::getDeclaration(F->getParent(),
323 Intrinsic::x86_xop_vfrcz_sd);
326 // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
327 if (Name.startswith("xop.vpermil2")) { // Added in 3.9
328 auto Idx = F->getFunctionType()->getParamType(2);
329 if (Idx->isFPOrFPVectorTy()) {
331 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
332 unsigned EltSize = Idx->getScalarSizeInBits();
333 Intrinsic::ID Permil2ID;
334 if (EltSize == 64 && IdxSize == 128)
335 Permil2ID = Intrinsic::x86_xop_vpermil2pd;
336 else if (EltSize == 32 && IdxSize == 128)
337 Permil2ID = Intrinsic::x86_xop_vpermil2ps;
338 else if (EltSize == 64 && IdxSize == 256)
339 Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
341 Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
342 NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
350 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
351 assert(F && "Illegal to upgrade a non-existent Function.");
353 // Quickly eliminate it, if it's not a candidate.
354 StringRef Name = F->getName();
355 if (Name.size() <= 8 || !Name.startswith("llvm."))
357 Name = Name.substr(5); // Strip off "llvm."
362 if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
363 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
364 F->arg_begin()->getType());
367 if (Name.startswith("arm.neon.vclz")) {
369 F->arg_begin()->getType(),
370 Type::getInt1Ty(F->getContext())
372 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
373 // the end of the name. Change name from llvm.arm.neon.vclz.* to
375 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
376 NewFn = Function::Create(fType, F->getLinkage(),
377 "llvm.ctlz." + Name.substr(14), F->getParent());
380 if (Name.startswith("arm.neon.vcnt")) {
381 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
382 F->arg_begin()->getType());
385 Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
386 if (vldRegex.match(Name)) {
387 auto fArgs = F->getFunctionType()->params();
388 SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
389 // Can't use Intrinsic::getDeclaration here as the return types might
390 // then only be structurally equal.
391 FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
392 NewFn = Function::Create(fType, F->getLinkage(),
393 "llvm." + Name + ".p0i8", F->getParent());
396 Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
397 if (vstRegex.match(Name)) {
398 static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
399 Intrinsic::arm_neon_vst2,
400 Intrinsic::arm_neon_vst3,
401 Intrinsic::arm_neon_vst4};
403 static const Intrinsic::ID StoreLaneInts[] = {
404 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
405 Intrinsic::arm_neon_vst4lane
408 auto fArgs = F->getFunctionType()->params();
409 Type *Tys[] = {fArgs[0], fArgs[1]};
410 if (Name.find("lane") == StringRef::npos)
411 NewFn = Intrinsic::getDeclaration(F->getParent(),
412 StoreInts[fArgs.size() - 3], Tys);
414 NewFn = Intrinsic::getDeclaration(F->getParent(),
415 StoreLaneInts[fArgs.size() - 5], Tys);
418 if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
419 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
426 if (Name.startswith("ctlz.") && F->arg_size() == 1) {
428 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
429 F->arg_begin()->getType());
432 if (Name.startswith("cttz.") && F->arg_size() == 1) {
434 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
435 F->arg_begin()->getType());
441 if (Name == "dbg.value" && F->arg_size() == 4) {
443 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
450 bool IsLifetimeStart = Name.startswith("lifetime.start");
451 if (IsLifetimeStart || Name.startswith("invariant.start")) {
452 Intrinsic::ID ID = IsLifetimeStart ?
453 Intrinsic::lifetime_start : Intrinsic::invariant_start;
454 auto Args = F->getFunctionType()->params();
455 Type* ObjectPtr[1] = {Args[1]};
456 if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
458 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
463 bool IsLifetimeEnd = Name.startswith("lifetime.end");
464 if (IsLifetimeEnd || Name.startswith("invariant.end")) {
465 Intrinsic::ID ID = IsLifetimeEnd ?
466 Intrinsic::lifetime_end : Intrinsic::invariant_end;
468 auto Args = F->getFunctionType()->params();
469 Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
470 if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
472 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
479 if (Name.startswith("masked.load.")) {
480 Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
481 if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) {
483 NewFn = Intrinsic::getDeclaration(F->getParent(),
484 Intrinsic::masked_load,
489 if (Name.startswith("masked.store.")) {
490 auto Args = F->getFunctionType()->params();
491 Type *Tys[] = { Args[0], Args[1] };
492 if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) {
494 NewFn = Intrinsic::getDeclaration(F->getParent(),
495 Intrinsic::masked_store,
500 // Renaming gather/scatter intrinsics with no address space overloading
501 // to the new overload which includes an address space
502 if (Name.startswith("masked.gather.")) {
503 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
504 if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) {
506 NewFn = Intrinsic::getDeclaration(F->getParent(),
507 Intrinsic::masked_gather, Tys);
511 if (Name.startswith("masked.scatter.")) {
512 auto Args = F->getFunctionType()->params();
513 Type *Tys[] = {Args[0], Args[1]};
514 if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) {
516 NewFn = Intrinsic::getDeclaration(F->getParent(),
517 Intrinsic::masked_scatter, Tys);
524 if (Name.startswith("nvvm.")) {
525 Name = Name.substr(5);
527 // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
528 Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name)
529 .Cases("brev32", "brev64", Intrinsic::bitreverse)
530 .Case("clz.i", Intrinsic::ctlz)
531 .Case("popc.i", Intrinsic::ctpop)
532 .Default(Intrinsic::not_intrinsic);
533 if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
534 NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
535 {F->getReturnType()});
539 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
540 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
542 // TODO: We could add lohi.i2d.
543 bool Expand = StringSwitch<bool>(Name)
544 .Cases("abs.i", "abs.ll", true)
545 .Cases("clz.ll", "popc.ll", "h2f", true)
546 .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
547 .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
557 // We only need to change the name to match the mangling including the
559 if (Name.startswith("objectsize.")) {
560 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
561 if (F->arg_size() == 2 ||
562 F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
564 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
572 if (Name == "stackprotectorcheck") {
579 if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
582 // Remangle our intrinsic since we upgrade the mangling
583 auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
584 if (Result != None) {
585 NewFn = Result.getValue();
589 // This may not belong here. This function is effectively being overloaded
590 // to both detect an intrinsic which needs upgrading, and to provide the
591 // upgraded form of the intrinsic. We should perhaps have two separate
592 // functions for this.
596 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
598 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
599 assert(F != NewFn && "Intrinsic function upgraded to the same function");
601 // Upgrade intrinsic attributes. This does not change the function.
604 if (Intrinsic::ID id = F->getIntrinsicID())
605 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
609 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
610 // Nothing to do yet.
614 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
616 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
617 Value *Op, unsigned Shift) {
618 Type *ResultTy = Op->getType();
619 unsigned NumElts = ResultTy->getVectorNumElements() * 8;
621 // Bitcast from a 64-bit element type to a byte element type.
622 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
623 Op = Builder.CreateBitCast(Op, VecTy, "cast");
625 // We'll be shuffling in zeroes.
626 Value *Res = Constant::getNullValue(VecTy);
628 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
629 // we'll just return the zero vector.
632 // 256/512-bit version is split into 2/4 16-byte lanes.
633 for (unsigned l = 0; l != NumElts; l += 16)
634 for (unsigned i = 0; i != 16; ++i) {
635 unsigned Idx = NumElts + i - Shift;
637 Idx -= NumElts - 16; // end of lane, switch operand.
638 Idxs[l + i] = Idx + l;
641 Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
644 // Bitcast back to a 64-bit element type.
645 return Builder.CreateBitCast(Res, ResultTy, "cast");
648 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
650 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
652 Type *ResultTy = Op->getType();
653 unsigned NumElts = ResultTy->getVectorNumElements() * 8;
655 // Bitcast from a 64-bit element type to a byte element type.
656 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
657 Op = Builder.CreateBitCast(Op, VecTy, "cast");
659 // We'll be shuffling in zeroes.
660 Value *Res = Constant::getNullValue(VecTy);
662 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
663 // we'll just return the zero vector.
666 // 256/512-bit version is split into 2/4 16-byte lanes.
667 for (unsigned l = 0; l != NumElts; l += 16)
668 for (unsigned i = 0; i != 16; ++i) {
669 unsigned Idx = i + Shift;
671 Idx += NumElts - 16; // end of lane, switch operand.
672 Idxs[l + i] = Idx + l;
675 Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
678 // Bitcast back to a 64-bit element type.
679 return Builder.CreateBitCast(Res, ResultTy, "cast");
682 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
684 llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(),
685 cast<IntegerType>(Mask->getType())->getBitWidth());
686 Mask = Builder.CreateBitCast(Mask, MaskTy);
688 // If we have less than 8 elements, then the starting mask was an i8 and
689 // we need to extract down to the right number of elements.
692 for (unsigned i = 0; i != NumElts; ++i)
694 Mask = Builder.CreateShuffleVector(Mask, Mask,
695 makeArrayRef(Indices, NumElts),
702 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
703 Value *Op0, Value *Op1) {
704 // If the mask is all ones just emit the align operation.
705 if (const auto *C = dyn_cast<Constant>(Mask))
706 if (C->isAllOnesValue())
709 Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements());
710 return Builder.CreateSelect(Mask, Op0, Op1);
713 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
714 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
715 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
716 static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
717 Value *Op1, Value *Shift,
718 Value *Passthru, Value *Mask,
720 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
722 unsigned NumElts = Op0->getType()->getVectorNumElements();
723 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
724 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
725 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
727 // Mask the immediate for VALIGN.
729 ShiftVal &= (NumElts - 1);
731 // If palignr is shifting the pair of vectors more than the size of two
734 return llvm::Constant::getNullValue(Op0->getType());
736 // If palignr is shifting the pair of input vectors more than one lane,
737 // but less than two lanes, convert to shifting in zeroes.
741 Op0 = llvm::Constant::getNullValue(Op0->getType());
744 uint32_t Indices[64];
745 // 256-bit palignr operates on 128-bit lanes so we need to handle that
746 for (unsigned l = 0; l < NumElts; l += 16) {
747 for (unsigned i = 0; i != 16; ++i) {
748 unsigned Idx = ShiftVal + i;
749 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
750 Idx += NumElts - 16; // End of lane, switch operand.
751 Indices[l + i] = Idx + l;
755 Value *Align = Builder.CreateShuffleVector(Op1, Op0,
756 makeArrayRef(Indices, NumElts),
759 return EmitX86Select(Builder, Mask, Align, Passthru);
762 static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
763 Value *Ptr, Value *Data, Value *Mask,
765 // Cast the pointer to the right type.
766 Ptr = Builder.CreateBitCast(Ptr,
767 llvm::PointerType::getUnqual(Data->getType()));
769 Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1;
771 // If the mask is all ones just emit a regular store.
772 if (const auto *C = dyn_cast<Constant>(Mask))
773 if (C->isAllOnesValue())
774 return Builder.CreateAlignedStore(Data, Ptr, Align);
776 // Convert the mask from an integer type to a vector of i1.
777 unsigned NumElts = Data->getType()->getVectorNumElements();
778 Mask = getX86MaskVec(Builder, Mask, NumElts);
779 return Builder.CreateMaskedStore(Data, Ptr, Align, Mask);
782 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
783 Value *Ptr, Value *Passthru, Value *Mask,
785 // Cast the pointer to the right type.
786 Ptr = Builder.CreateBitCast(Ptr,
787 llvm::PointerType::getUnqual(Passthru->getType()));
789 Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1;
791 // If the mask is all ones just emit a regular store.
792 if (const auto *C = dyn_cast<Constant>(Mask))
793 if (C->isAllOnesValue())
794 return Builder.CreateAlignedLoad(Ptr, Align);
796 // Convert the mask from an integer type to a vector of i1.
797 unsigned NumElts = Passthru->getType()->getVectorNumElements();
798 Mask = getX86MaskVec(Builder, Mask, NumElts);
799 return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru);
802 static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) {
803 Value *Op0 = CI.getArgOperand(0);
804 llvm::Type *Ty = Op0->getType();
805 Value *Zero = llvm::Constant::getNullValue(Ty);
806 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Op0, Zero);
807 Value *Neg = Builder.CreateNeg(Op0);
808 Value *Res = Builder.CreateSelect(Cmp, Op0, Neg);
810 if (CI.getNumArgOperands() == 3)
811 Res = EmitX86Select(Builder,CI.getArgOperand(2), Res, CI.getArgOperand(1));
816 static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
817 ICmpInst::Predicate Pred) {
818 Value *Op0 = CI.getArgOperand(0);
819 Value *Op1 = CI.getArgOperand(1);
820 Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1);
821 Value *Res = Builder.CreateSelect(Cmp, Op0, Op1);
823 if (CI.getNumArgOperands() == 4)
824 Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
829 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
830 static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder,Value *Vec, Value *Mask,
832 const auto *C = dyn_cast<Constant>(Mask);
833 if (!C || !C->isAllOnesValue())
834 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
838 for (unsigned i = 0; i != NumElts; ++i)
840 for (unsigned i = NumElts; i != 8; ++i)
841 Indices[i] = NumElts + i % NumElts;
842 Vec = Builder.CreateShuffleVector(Vec,
843 Constant::getNullValue(Vec->getType()),
846 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
849 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
850 unsigned CC, bool Signed) {
851 Value *Op0 = CI.getArgOperand(0);
852 unsigned NumElts = Op0->getType()->getVectorNumElements();
856 Cmp = Constant::getNullValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
857 } else if (CC == 7) {
858 Cmp = Constant::getAllOnesValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
860 ICmpInst::Predicate Pred;
862 default: llvm_unreachable("Unknown condition code");
863 case 0: Pred = ICmpInst::ICMP_EQ; break;
864 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
865 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
866 case 4: Pred = ICmpInst::ICMP_NE; break;
867 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
868 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
870 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
873 Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1);
875 return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask, NumElts);
878 // Replace a masked intrinsic with an older unmasked intrinsic.
879 static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI,
881 Function *F = CI.getCalledFunction();
882 Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID);
883 Value *Rep = Builder.CreateCall(Intrin,
884 { CI.getArgOperand(0), CI.getArgOperand(1) });
885 return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
888 static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
889 Value* A = CI.getArgOperand(0);
890 Value* B = CI.getArgOperand(1);
891 Value* Src = CI.getArgOperand(2);
892 Value* Mask = CI.getArgOperand(3);
894 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
895 Value* Cmp = Builder.CreateIsNotNull(AndNode);
896 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
897 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
898 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
899 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
903 static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) {
904 Value* Op = CI.getArgOperand(0);
905 Type* ReturnOp = CI.getType();
906 unsigned NumElts = CI.getType()->getVectorNumElements();
907 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
908 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
911 /// Upgrade a call to an old intrinsic. All argument and return casting must be
912 /// provided to seamlessly integrate with existing context.
913 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
914 Function *F = CI->getCalledFunction();
915 LLVMContext &C = CI->getContext();
916 IRBuilder<> Builder(C);
917 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
919 assert(F && "Intrinsic call is not direct?");
922 // Get the Function's name.
923 StringRef Name = F->getName();
925 assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
926 Name = Name.substr(5);
928 bool IsX86 = Name.startswith("x86.");
930 Name = Name.substr(4);
931 bool IsNVVM = Name.startswith("nvvm.");
933 Name = Name.substr(5);
935 if (IsX86 && Name.startswith("sse4a.movnt.")) {
936 Module *M = F->getParent();
937 SmallVector<Metadata *, 1> Elts;
939 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
940 MDNode *Node = MDNode::get(C, Elts);
942 Value *Arg0 = CI->getArgOperand(0);
943 Value *Arg1 = CI->getArgOperand(1);
945 // Nontemporal (unaligned) store of the 0'th element of the float/double
947 Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
948 PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
949 Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
951 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
953 StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, 1);
954 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
957 CI->eraseFromParent();
961 if (IsX86 && (Name.startswith("avx.movnt.") ||
962 Name.startswith("avx512.storent."))) {
963 Module *M = F->getParent();
964 SmallVector<Metadata *, 1> Elts;
966 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
967 MDNode *Node = MDNode::get(C, Elts);
969 Value *Arg0 = CI->getArgOperand(0);
970 Value *Arg1 = CI->getArgOperand(1);
972 // Convert the type of the pointer to a pointer to the stored type.
973 Value *BC = Builder.CreateBitCast(Arg0,
974 PointerType::getUnqual(Arg1->getType()),
976 VectorType *VTy = cast<VectorType>(Arg1->getType());
977 StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC,
978 VTy->getBitWidth() / 8);
979 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
982 CI->eraseFromParent();
986 if (IsX86 && Name == "sse2.storel.dq") {
987 Value *Arg0 = CI->getArgOperand(0);
988 Value *Arg1 = CI->getArgOperand(1);
990 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
991 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
992 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
993 Value *BC = Builder.CreateBitCast(Arg0,
994 PointerType::getUnqual(Elt->getType()),
996 Builder.CreateAlignedStore(Elt, BC, 1);
999 CI->eraseFromParent();
1003 if (IsX86 && (Name.startswith("sse.storeu.") ||
1004 Name.startswith("sse2.storeu.") ||
1005 Name.startswith("avx.storeu."))) {
1006 Value *Arg0 = CI->getArgOperand(0);
1007 Value *Arg1 = CI->getArgOperand(1);
1009 Arg0 = Builder.CreateBitCast(Arg0,
1010 PointerType::getUnqual(Arg1->getType()),
1012 Builder.CreateAlignedStore(Arg1, Arg0, 1);
1014 // Remove intrinsic.
1015 CI->eraseFromParent();
1019 if (IsX86 && (Name.startswith("avx512.mask.store"))) {
1020 // "avx512.mask.storeu." or "avx512.mask.store."
1021 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
1022 UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
1023 CI->getArgOperand(2), Aligned);
1025 // Remove intrinsic.
1026 CI->eraseFromParent();
1031 // Upgrade packed integer vector compare intrinsics to compare instructions.
1032 if (IsX86 && (Name.startswith("sse2.pcmp") ||
1033 Name.startswith("avx2.pcmp"))) {
1034 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
1035 bool CmpEq = Name[9] == 'e';
1036 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
1037 CI->getArgOperand(0), CI->getArgOperand(1));
1038 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
1039 } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) {
1040 Type *ExtTy = Type::getInt32Ty(C);
1041 if (CI->getOperand(0)->getType()->isIntegerTy(8))
1042 ExtTy = Type::getInt64Ty(C);
1043 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
1044 ExtTy->getPrimitiveSizeInBits();
1045 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
1046 Rep = Builder.CreateVectorSplat(NumElts, Rep);
1047 } else if (IsX86 && (Name.startswith("avx512.ptestm") ||
1048 Name.startswith("avx512.ptestnm"))) {
1049 Value *Op0 = CI->getArgOperand(0);
1050 Value *Op1 = CI->getArgOperand(1);
1051 Value *Mask = CI->getArgOperand(2);
1052 Rep = Builder.CreateAnd(Op0, Op1);
1053 llvm::Type *Ty = Op0->getType();
1054 Value *Zero = llvm::Constant::getNullValue(Ty);
1055 ICmpInst::Predicate Pred =
1056 Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1057 Rep = Builder.CreateICmp(Pred, Rep, Zero);
1058 unsigned NumElts = Op0->getType()->getVectorNumElements();
1059 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask, NumElts);
1060 } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
1062 CI->getArgOperand(1)->getType()->getVectorNumElements();
1063 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
1064 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1065 CI->getArgOperand(1));
1066 } else if (IsX86 && (Name.startswith("avx512.kunpck"))) {
1067 uint64_t Shift = CI->getType()->getScalarSizeInBits() / 2;
1068 uint64_t And = (1ULL << Shift) - 1;
1069 Value* LowBits = Builder.CreateAnd(CI->getArgOperand(0), And);
1070 Value* HighBits = Builder.CreateShl(CI->getArgOperand(1), Shift);
1071 Rep = Builder.CreateOr(LowBits, HighBits);
1072 } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd")) {
1073 Type *I32Ty = Type::getInt32Ty(C);
1074 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
1075 ConstantInt::get(I32Ty, 0));
1076 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
1077 ConstantInt::get(I32Ty, 0));
1078 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
1079 Builder.CreateFAdd(Elt0, Elt1),
1080 ConstantInt::get(I32Ty, 0));
1081 } else if (IsX86 && (Name == "sse.sub.ss" || Name == "sse2.sub.sd")) {
1082 Type *I32Ty = Type::getInt32Ty(C);
1083 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
1084 ConstantInt::get(I32Ty, 0));
1085 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
1086 ConstantInt::get(I32Ty, 0));
1087 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
1088 Builder.CreateFSub(Elt0, Elt1),
1089 ConstantInt::get(I32Ty, 0));
1090 } else if (IsX86 && (Name == "sse.mul.ss" || Name == "sse2.mul.sd")) {
1091 Type *I32Ty = Type::getInt32Ty(C);
1092 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
1093 ConstantInt::get(I32Ty, 0));
1094 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
1095 ConstantInt::get(I32Ty, 0));
1096 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
1097 Builder.CreateFMul(Elt0, Elt1),
1098 ConstantInt::get(I32Ty, 0));
1099 } else if (IsX86 && (Name == "sse.div.ss" || Name == "sse2.div.sd")) {
1100 Type *I32Ty = Type::getInt32Ty(C);
1101 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
1102 ConstantInt::get(I32Ty, 0));
1103 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
1104 ConstantInt::get(I32Ty, 0));
1105 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
1106 Builder.CreateFDiv(Elt0, Elt1),
1107 ConstantInt::get(I32Ty, 0));
1108 } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
1109 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
1110 bool CmpEq = Name[16] == 'e';
1111 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
1112 } else if (IsX86 && Name.startswith("avx512.mask.cmp")) {
1113 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1114 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
1115 } else if (IsX86 && Name.startswith("avx512.mask.ucmp")) {
1116 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1117 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
1118 } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
1119 Name == "ssse3.pabs.w.128" ||
1120 Name == "ssse3.pabs.d.128" ||
1121 Name.startswith("avx2.pabs") ||
1122 Name.startswith("avx512.mask.pabs"))) {
1123 Rep = upgradeAbs(Builder, *CI);
1124 } else if (IsX86 && (Name == "sse41.pmaxsb" ||
1125 Name == "sse2.pmaxs.w" ||
1126 Name == "sse41.pmaxsd" ||
1127 Name.startswith("avx2.pmaxs") ||
1128 Name.startswith("avx512.mask.pmaxs"))) {
1129 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
1130 } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
1131 Name == "sse41.pmaxuw" ||
1132 Name == "sse41.pmaxud" ||
1133 Name.startswith("avx2.pmaxu") ||
1134 Name.startswith("avx512.mask.pmaxu"))) {
1135 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
1136 } else if (IsX86 && (Name == "sse41.pminsb" ||
1137 Name == "sse2.pmins.w" ||
1138 Name == "sse41.pminsd" ||
1139 Name.startswith("avx2.pmins") ||
1140 Name.startswith("avx512.mask.pmins"))) {
1141 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
1142 } else if (IsX86 && (Name == "sse2.pminu.b" ||
1143 Name == "sse41.pminuw" ||
1144 Name == "sse41.pminud" ||
1145 Name.startswith("avx2.pminu") ||
1146 Name.startswith("avx512.mask.pminu"))) {
1147 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
1148 } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
1149 Name == "sse2.cvtps2pd" ||
1150 Name == "avx.cvtdq2.pd.256" ||
1151 Name == "avx.cvt.ps2.pd.256" ||
1152 Name.startswith("avx512.mask.cvtdq2pd.") ||
1153 Name.startswith("avx512.mask.cvtudq2pd."))) {
1154 // Lossless i32/float to double conversion.
1155 // Extract the bottom elements if necessary and convert to double vector.
1156 Value *Src = CI->getArgOperand(0);
1157 VectorType *SrcTy = cast<VectorType>(Src->getType());
1158 VectorType *DstTy = cast<VectorType>(CI->getType());
1159 Rep = CI->getArgOperand(0);
1161 unsigned NumDstElts = DstTy->getNumElements();
1162 if (NumDstElts < SrcTy->getNumElements()) {
1163 assert(NumDstElts == 2 && "Unexpected vector size");
1164 uint32_t ShuffleMask[2] = { 0, 1 };
1165 Rep = Builder.CreateShuffleVector(Rep, UndefValue::get(SrcTy),
1169 bool SInt2Double = (StringRef::npos != Name.find("cvtdq2"));
1170 bool UInt2Double = (StringRef::npos != Name.find("cvtudq2"));
1172 Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd");
1173 else if (UInt2Double)
1174 Rep = Builder.CreateUIToFP(Rep, DstTy, "cvtudq2pd");
1176 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
1178 if (CI->getNumArgOperands() == 3)
1179 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1180 CI->getArgOperand(1));
1181 } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
1182 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
1183 CI->getArgOperand(1), CI->getArgOperand(2),
1185 } else if (IsX86 && (Name.startswith("avx512.mask.load."))) {
1186 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
1187 CI->getArgOperand(1),CI->getArgOperand(2),
1189 } else if (IsX86 && Name.startswith("xop.vpcom")) {
1190 Intrinsic::ID intID;
1191 if (Name.endswith("ub"))
1192 intID = Intrinsic::x86_xop_vpcomub;
1193 else if (Name.endswith("uw"))
1194 intID = Intrinsic::x86_xop_vpcomuw;
1195 else if (Name.endswith("ud"))
1196 intID = Intrinsic::x86_xop_vpcomud;
1197 else if (Name.endswith("uq"))
1198 intID = Intrinsic::x86_xop_vpcomuq;
1199 else if (Name.endswith("b"))
1200 intID = Intrinsic::x86_xop_vpcomb;
1201 else if (Name.endswith("w"))
1202 intID = Intrinsic::x86_xop_vpcomw;
1203 else if (Name.endswith("d"))
1204 intID = Intrinsic::x86_xop_vpcomd;
1205 else if (Name.endswith("q"))
1206 intID = Intrinsic::x86_xop_vpcomq;
1208 llvm_unreachable("Unknown suffix");
1210 Name = Name.substr(9); // strip off "xop.vpcom"
1212 if (Name.startswith("lt"))
1214 else if (Name.startswith("le"))
1216 else if (Name.startswith("gt"))
1218 else if (Name.startswith("ge"))
1220 else if (Name.startswith("eq"))
1222 else if (Name.startswith("ne"))
1224 else if (Name.startswith("false"))
1226 else if (Name.startswith("true"))
1229 llvm_unreachable("Unknown condition");
1231 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
1233 Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
1234 Builder.getInt8(Imm)});
1235 } else if (IsX86 && Name.startswith("xop.vpcmov")) {
1236 Value *Sel = CI->getArgOperand(2);
1237 Value *NotSel = Builder.CreateNot(Sel);
1238 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
1239 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
1240 Rep = Builder.CreateOr(Sel0, Sel1);
1241 } else if (IsX86 && Name == "sse42.crc32.64.8") {
1242 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
1243 Intrinsic::x86_sse42_crc32_32_8);
1244 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
1245 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
1246 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
1247 } else if (IsX86 && Name.startswith("avx.vbroadcast.s")) {
1248 // Replace broadcasts with a series of insertelements.
1249 Type *VecTy = CI->getType();
1250 Type *EltTy = VecTy->getVectorElementType();
1251 unsigned EltNum = VecTy->getVectorNumElements();
1252 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
1253 EltTy->getPointerTo());
1254 Value *Load = Builder.CreateLoad(EltTy, Cast);
1255 Type *I32Ty = Type::getInt32Ty(C);
1256 Rep = UndefValue::get(VecTy);
1257 for (unsigned I = 0; I < EltNum; ++I)
1258 Rep = Builder.CreateInsertElement(Rep, Load,
1259 ConstantInt::get(I32Ty, I));
1260 } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
1261 Name.startswith("sse41.pmovzx") ||
1262 Name.startswith("avx2.pmovsx") ||
1263 Name.startswith("avx2.pmovzx") ||
1264 Name.startswith("avx512.mask.pmovsx") ||
1265 Name.startswith("avx512.mask.pmovzx"))) {
1266 VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
1267 VectorType *DstTy = cast<VectorType>(CI->getType());
1268 unsigned NumDstElts = DstTy->getNumElements();
1270 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
1271 SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
1272 for (unsigned i = 0; i != NumDstElts; ++i)
1275 Value *SV = Builder.CreateShuffleVector(
1276 CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
1278 bool DoSext = (StringRef::npos != Name.find("pmovsx"));
1279 Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
1280 : Builder.CreateZExt(SV, DstTy);
1281 // If there are 3 arguments, it's a masked intrinsic so we need a select.
1282 if (CI->getNumArgOperands() == 3)
1283 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1284 CI->getArgOperand(1));
1285 } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
1286 Name == "avx2.vbroadcasti128")) {
1287 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
1288 Type *EltTy = CI->getType()->getVectorElementType();
1289 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
1290 Type *VT = VectorType::get(EltTy, NumSrcElts);
1291 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
1292 PointerType::getUnqual(VT));
1293 Value *Load = Builder.CreateAlignedLoad(Op, 1);
1294 if (NumSrcElts == 2)
1295 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
1298 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
1299 { 0, 1, 2, 3, 0, 1, 2, 3 });
1300 } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") ||
1301 Name.startswith("avx512.mask.shuf.f"))) {
1302 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1303 Type *VT = CI->getType();
1304 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
1305 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
1306 unsigned ControlBitsMask = NumLanes - 1;
1307 unsigned NumControlBits = NumLanes / 2;
1308 SmallVector<uint32_t, 8> ShuffleMask(0);
1310 for (unsigned l = 0; l != NumLanes; ++l) {
1311 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
1312 // We actually need the other source.
1313 if (l >= NumLanes / 2)
1314 LaneMask += NumLanes;
1315 for (unsigned i = 0; i != NumElementsInLane; ++i)
1316 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
1318 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
1319 CI->getArgOperand(1), ShuffleMask);
1320 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
1321 CI->getArgOperand(3));
1322 }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") ||
1323 Name.startswith("avx512.mask.broadcasti"))) {
1324 unsigned NumSrcElts =
1325 CI->getArgOperand(0)->getType()->getVectorNumElements();
1326 unsigned NumDstElts = CI->getType()->getVectorNumElements();
1328 SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
1329 for (unsigned i = 0; i != NumDstElts; ++i)
1330 ShuffleMask[i] = i % NumSrcElts;
1332 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
1333 CI->getArgOperand(0),
1335 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1336 CI->getArgOperand(1));
1337 } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
1338 Name.startswith("avx2.vbroadcast") ||
1339 Name.startswith("avx512.pbroadcast") ||
1340 Name.startswith("avx512.mask.broadcast.s"))) {
1341 // Replace vp?broadcasts with a vector shuffle.
1342 Value *Op = CI->getArgOperand(0);
1343 unsigned NumElts = CI->getType()->getVectorNumElements();
1344 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
1345 Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
1346 Constant::getNullValue(MaskTy));
1348 if (CI->getNumArgOperands() == 3)
1349 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1350 CI->getArgOperand(1));
1351 } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
1352 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
1353 CI->getArgOperand(1),
1354 CI->getArgOperand(2),
1355 CI->getArgOperand(3),
1356 CI->getArgOperand(4),
1358 } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
1359 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
1360 CI->getArgOperand(1),
1361 CI->getArgOperand(2),
1362 CI->getArgOperand(3),
1363 CI->getArgOperand(4),
1365 } else if (IsX86 && (Name == "sse2.psll.dq" ||
1366 Name == "avx2.psll.dq")) {
1367 // 128/256-bit shift left specified in bits.
1368 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1369 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
1370 Shift / 8); // Shift is in bits.
1371 } else if (IsX86 && (Name == "sse2.psrl.dq" ||
1372 Name == "avx2.psrl.dq")) {
1373 // 128/256-bit shift right specified in bits.
1374 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1375 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
1376 Shift / 8); // Shift is in bits.
1377 } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
1378 Name == "avx2.psll.dq.bs" ||
1379 Name == "avx512.psll.dq.512")) {
1380 // 128/256/512-bit shift left specified in bytes.
1381 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1382 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
1383 } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
1384 Name == "avx2.psrl.dq.bs" ||
1385 Name == "avx512.psrl.dq.512")) {
1386 // 128/256/512-bit shift right specified in bytes.
1387 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1388 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
1389 } else if (IsX86 && (Name == "sse41.pblendw" ||
1390 Name.startswith("sse41.blendp") ||
1391 Name.startswith("avx.blend.p") ||
1392 Name == "avx2.pblendw" ||
1393 Name.startswith("avx2.pblendd."))) {
1394 Value *Op0 = CI->getArgOperand(0);
1395 Value *Op1 = CI->getArgOperand(1);
1396 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1397 VectorType *VecTy = cast<VectorType>(CI->getType());
1398 unsigned NumElts = VecTy->getNumElements();
1400 SmallVector<uint32_t, 16> Idxs(NumElts);
1401 for (unsigned i = 0; i != NumElts; ++i)
1402 Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
1404 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1405 } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
1406 Name == "avx2.vinserti128" ||
1407 Name.startswith("avx512.mask.insert"))) {
1408 Value *Op0 = CI->getArgOperand(0);
1409 Value *Op1 = CI->getArgOperand(1);
1410 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1411 unsigned DstNumElts = CI->getType()->getVectorNumElements();
1412 unsigned SrcNumElts = Op1->getType()->getVectorNumElements();
1413 unsigned Scale = DstNumElts / SrcNumElts;
1415 // Mask off the high bits of the immediate value; hardware ignores those.
1418 // Extend the second operand into a vector the size of the destination.
1419 Value *UndefV = UndefValue::get(Op1->getType());
1420 SmallVector<uint32_t, 8> Idxs(DstNumElts);
1421 for (unsigned i = 0; i != SrcNumElts; ++i)
1423 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
1424 Idxs[i] = SrcNumElts;
1425 Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);
1427 // Insert the second operand into the first operand.
1429 // Note that there is no guarantee that instruction lowering will actually
1430 // produce a vinsertf128 instruction for the created shuffles. In
1431 // particular, the 0 immediate case involves no lane changes, so it can
1432 // be handled as a blend.
1434 // Example of shuffle mask for 32-bit elements:
1435 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
1436 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
1438 // First fill with identify mask.
1439 for (unsigned i = 0; i != DstNumElts; ++i)
1441 // Then replace the elements where we need to insert.
1442 for (unsigned i = 0; i != SrcNumElts; ++i)
1443 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
1444 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
1446 // If the intrinsic has a mask operand, handle that.
1447 if (CI->getNumArgOperands() == 5)
1448 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
1449 CI->getArgOperand(3));
1450 } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
1451 Name == "avx2.vextracti128" ||
1452 Name.startswith("avx512.mask.vextract"))) {
1453 Value *Op0 = CI->getArgOperand(0);
1454 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1455 unsigned DstNumElts = CI->getType()->getVectorNumElements();
1456 unsigned SrcNumElts = Op0->getType()->getVectorNumElements();
1457 unsigned Scale = SrcNumElts / DstNumElts;
1459 // Mask off the high bits of the immediate value; hardware ignores those.
1462 // Get indexes for the subvector of the input vector.
1463 SmallVector<uint32_t, 8> Idxs(DstNumElts);
1464 for (unsigned i = 0; i != DstNumElts; ++i) {
1465 Idxs[i] = i + (Imm * DstNumElts);
1467 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1469 // If the intrinsic has a mask operand, handle that.
1470 if (CI->getNumArgOperands() == 4)
1471 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1472 CI->getArgOperand(2));
1473 } else if (!IsX86 && Name == "stackprotectorcheck") {
1475 } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
1476 Name.startswith("avx512.mask.perm.di."))) {
1477 Value *Op0 = CI->getArgOperand(0);
1478 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1479 VectorType *VecTy = cast<VectorType>(CI->getType());
1480 unsigned NumElts = VecTy->getNumElements();
1482 SmallVector<uint32_t, 8> Idxs(NumElts);
1483 for (unsigned i = 0; i != NumElts; ++i)
1484 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
1486 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1488 if (CI->getNumArgOperands() == 4)
1489 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1490 CI->getArgOperand(2));
1491 } else if (IsX86 && (Name.startswith("avx.vperm2f128.") ||
1492 Name == "avx2.vperm2i128")) {
1493 // The immediate permute control byte looks like this:
1494 // [1:0] - select 128 bits from sources for low half of destination
1496 // [3] - zero low half of destination
1497 // [5:4] - select 128 bits from sources for high half of destination
1499 // [7] - zero high half of destination
1501 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1503 unsigned NumElts = CI->getType()->getVectorNumElements();
1504 unsigned HalfSize = NumElts / 2;
1505 SmallVector<uint32_t, 8> ShuffleMask(NumElts);
1507 // Determine which operand(s) are actually in use for this instruction.
1508 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
1509 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
1511 // If needed, replace operands based on zero mask.
1512 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
1513 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
1515 // Permute low half of result.
1516 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
1517 for (unsigned i = 0; i < HalfSize; ++i)
1518 ShuffleMask[i] = StartIndex + i;
1520 // Permute high half of result.
1521 StartIndex = (Imm & 0x10) ? HalfSize : 0;
1522 for (unsigned i = 0; i < HalfSize; ++i)
1523 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
1525 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
1527 } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
1528 Name == "sse2.pshuf.d" ||
1529 Name.startswith("avx512.mask.vpermil.p") ||
1530 Name.startswith("avx512.mask.pshuf.d."))) {
1531 Value *Op0 = CI->getArgOperand(0);
1532 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1533 VectorType *VecTy = cast<VectorType>(CI->getType());
1534 unsigned NumElts = VecTy->getNumElements();
1535 // Calculate the size of each index in the immediate.
1536 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
1537 unsigned IdxMask = ((1 << IdxSize) - 1);
1539 SmallVector<uint32_t, 8> Idxs(NumElts);
1540 // Lookup the bits for this element, wrapping around the immediate every
1541 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
1542 // to offset by the first index of each group.
1543 for (unsigned i = 0; i != NumElts; ++i)
1544 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
1546 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1548 if (CI->getNumArgOperands() == 4)
1549 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1550 CI->getArgOperand(2));
1551 } else if (IsX86 && (Name == "sse2.pshufl.w" ||
1552 Name.startswith("avx512.mask.pshufl.w."))) {
1553 Value *Op0 = CI->getArgOperand(0);
1554 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1555 unsigned NumElts = CI->getType()->getVectorNumElements();
1557 SmallVector<uint32_t, 16> Idxs(NumElts);
1558 for (unsigned l = 0; l != NumElts; l += 8) {
1559 for (unsigned i = 0; i != 4; ++i)
1560 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
1561 for (unsigned i = 4; i != 8; ++i)
1562 Idxs[i + l] = i + l;
1565 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1567 if (CI->getNumArgOperands() == 4)
1568 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1569 CI->getArgOperand(2));
1570 } else if (IsX86 && (Name == "sse2.pshufh.w" ||
1571 Name.startswith("avx512.mask.pshufh.w."))) {
1572 Value *Op0 = CI->getArgOperand(0);
1573 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1574 unsigned NumElts = CI->getType()->getVectorNumElements();
1576 SmallVector<uint32_t, 16> Idxs(NumElts);
1577 for (unsigned l = 0; l != NumElts; l += 8) {
1578 for (unsigned i = 0; i != 4; ++i)
1579 Idxs[i + l] = i + l;
1580 for (unsigned i = 0; i != 4; ++i)
1581 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
1584 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1586 if (CI->getNumArgOperands() == 4)
1587 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1588 CI->getArgOperand(2));
1589 } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
1590 Value *Op0 = CI->getArgOperand(0);
1591 Value *Op1 = CI->getArgOperand(1);
1592 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1593 unsigned NumElts = CI->getType()->getVectorNumElements();
1595 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1596 unsigned HalfLaneElts = NumLaneElts / 2;
1598 SmallVector<uint32_t, 16> Idxs(NumElts);
1599 for (unsigned i = 0; i != NumElts; ++i) {
1600 // Base index is the starting element of the lane.
1601 Idxs[i] = i - (i % NumLaneElts);
1602 // If we are half way through the lane switch to the other source.
1603 if ((i % NumLaneElts) >= HalfLaneElts)
1605 // Now select the specific element. By adding HalfLaneElts bits from
1606 // the immediate. Wrapping around the immediate every 8-bits.
1607 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
1610 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1612 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
1613 CI->getArgOperand(3));
1614 } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
1615 Name.startswith("avx512.mask.movshdup") ||
1616 Name.startswith("avx512.mask.movsldup"))) {
1617 Value *Op0 = CI->getArgOperand(0);
1618 unsigned NumElts = CI->getType()->getVectorNumElements();
1619 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1621 unsigned Offset = 0;
1622 if (Name.startswith("avx512.mask.movshdup."))
1625 SmallVector<uint32_t, 16> Idxs(NumElts);
1626 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
1627 for (unsigned i = 0; i != NumLaneElts; i += 2) {
1628 Idxs[i + l + 0] = i + l + Offset;
1629 Idxs[i + l + 1] = i + l + Offset;
1632 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1634 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1635 CI->getArgOperand(1));
1636 } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
1637 Name.startswith("avx512.mask.unpckl."))) {
1638 Value *Op0 = CI->getArgOperand(0);
1639 Value *Op1 = CI->getArgOperand(1);
1640 int NumElts = CI->getType()->getVectorNumElements();
1641 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1643 SmallVector<uint32_t, 64> Idxs(NumElts);
1644 for (int l = 0; l != NumElts; l += NumLaneElts)
1645 for (int i = 0; i != NumLaneElts; ++i)
1646 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
1648 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1650 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1651 CI->getArgOperand(2));
1652 } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
1653 Name.startswith("avx512.mask.unpckh."))) {
1654 Value *Op0 = CI->getArgOperand(0);
1655 Value *Op1 = CI->getArgOperand(1);
1656 int NumElts = CI->getType()->getVectorNumElements();
1657 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1659 SmallVector<uint32_t, 64> Idxs(NumElts);
1660 for (int l = 0; l != NumElts; l += NumLaneElts)
1661 for (int i = 0; i != NumLaneElts; ++i)
1662 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
1664 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1666 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1667 CI->getArgOperand(2));
1668 } else if (IsX86 && Name.startswith("avx512.mask.pand.")) {
1669 Rep = Builder.CreateAnd(CI->getArgOperand(0), CI->getArgOperand(1));
1670 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1671 CI->getArgOperand(2));
1672 } else if (IsX86 && Name.startswith("avx512.mask.pandn.")) {
1673 Rep = Builder.CreateAnd(Builder.CreateNot(CI->getArgOperand(0)),
1674 CI->getArgOperand(1));
1675 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1676 CI->getArgOperand(2));
1677 } else if (IsX86 && Name.startswith("avx512.mask.por.")) {
1678 Rep = Builder.CreateOr(CI->getArgOperand(0), CI->getArgOperand(1));
1679 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1680 CI->getArgOperand(2));
1681 } else if (IsX86 && Name.startswith("avx512.mask.pxor.")) {
1682 Rep = Builder.CreateXor(CI->getArgOperand(0), CI->getArgOperand(1));
1683 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1684 CI->getArgOperand(2));
1685 } else if (IsX86 && Name.startswith("avx512.mask.and.")) {
1686 VectorType *FTy = cast<VectorType>(CI->getType());
1687 VectorType *ITy = VectorType::getInteger(FTy);
1688 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
1689 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1690 Rep = Builder.CreateBitCast(Rep, FTy);
1691 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1692 CI->getArgOperand(2));
1693 } else if (IsX86 && Name.startswith("avx512.mask.andn.")) {
1694 VectorType *FTy = cast<VectorType>(CI->getType());
1695 VectorType *ITy = VectorType::getInteger(FTy);
1696 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
1697 Rep = Builder.CreateAnd(Rep,
1698 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1699 Rep = Builder.CreateBitCast(Rep, FTy);
1700 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1701 CI->getArgOperand(2));
1702 } else if (IsX86 && Name.startswith("avx512.mask.or.")) {
1703 VectorType *FTy = cast<VectorType>(CI->getType());
1704 VectorType *ITy = VectorType::getInteger(FTy);
1705 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
1706 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1707 Rep = Builder.CreateBitCast(Rep, FTy);
1708 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1709 CI->getArgOperand(2));
1710 } else if (IsX86 && Name.startswith("avx512.mask.xor.")) {
1711 VectorType *FTy = cast<VectorType>(CI->getType());
1712 VectorType *ITy = VectorType::getInteger(FTy);
1713 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
1714 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1715 Rep = Builder.CreateBitCast(Rep, FTy);
1716 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1717 CI->getArgOperand(2));
1718 } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
1719 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
1720 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1721 CI->getArgOperand(2));
1722 } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
1723 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
1724 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1725 CI->getArgOperand(2));
1726 } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
1727 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
1728 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1729 CI->getArgOperand(2));
1730 } else if (IsX86 && (Name.startswith("avx512.mask.add.p"))) {
1731 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
1732 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1733 CI->getArgOperand(2));
1734 } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
1735 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
1736 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1737 CI->getArgOperand(2));
1738 } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
1739 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
1740 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1741 CI->getArgOperand(2));
1742 } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
1743 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
1744 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1745 CI->getArgOperand(2));
1746 } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
1747 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1750 { CI->getArgOperand(0), Builder.getInt1(false) });
1751 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1752 CI->getArgOperand(1));
1753 } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
1754 Name.startswith("avx512.mask.min.p"))) {
1755 bool IsMin = Name[13] == 'i';
1756 VectorType *VecTy = cast<VectorType>(CI->getType());
1757 unsigned VecWidth = VecTy->getPrimitiveSizeInBits();
1758 unsigned EltWidth = VecTy->getScalarSizeInBits();
1760 if (!IsMin && VecWidth == 128 && EltWidth == 32)
1761 IID = Intrinsic::x86_sse_max_ps;
1762 else if (!IsMin && VecWidth == 128 && EltWidth == 64)
1763 IID = Intrinsic::x86_sse2_max_pd;
1764 else if (!IsMin && VecWidth == 256 && EltWidth == 32)
1765 IID = Intrinsic::x86_avx_max_ps_256;
1766 else if (!IsMin && VecWidth == 256 && EltWidth == 64)
1767 IID = Intrinsic::x86_avx_max_pd_256;
1768 else if (IsMin && VecWidth == 128 && EltWidth == 32)
1769 IID = Intrinsic::x86_sse_min_ps;
1770 else if (IsMin && VecWidth == 128 && EltWidth == 64)
1771 IID = Intrinsic::x86_sse2_min_pd;
1772 else if (IsMin && VecWidth == 256 && EltWidth == 32)
1773 IID = Intrinsic::x86_avx_min_ps_256;
1774 else if (IsMin && VecWidth == 256 && EltWidth == 64)
1775 IID = Intrinsic::x86_avx_min_pd_256;
1777 llvm_unreachable("Unexpected intrinsic");
1779 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1780 { CI->getArgOperand(0), CI->getArgOperand(1) });
1781 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1782 CI->getArgOperand(2));
1783 } else if (IsX86 && Name.startswith("avx512.mask.pshuf.b.")) {
1784 VectorType *VecTy = cast<VectorType>(CI->getType());
1786 if (VecTy->getPrimitiveSizeInBits() == 128)
1787 IID = Intrinsic::x86_ssse3_pshuf_b_128;
1788 else if (VecTy->getPrimitiveSizeInBits() == 256)
1789 IID = Intrinsic::x86_avx2_pshuf_b;
1790 else if (VecTy->getPrimitiveSizeInBits() == 512)
1791 IID = Intrinsic::x86_avx512_pshuf_b_512;
1793 llvm_unreachable("Unexpected intrinsic");
1795 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1796 { CI->getArgOperand(0), CI->getArgOperand(1) });
1797 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1798 CI->getArgOperand(2));
1799 } else if (IsX86 && (Name.startswith("avx512.mask.pmul.dq.") ||
1800 Name.startswith("avx512.mask.pmulu.dq."))) {
1801 bool IsUnsigned = Name[16] == 'u';
1802 VectorType *VecTy = cast<VectorType>(CI->getType());
1804 if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 128)
1805 IID = Intrinsic::x86_sse41_pmuldq;
1806 else if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 256)
1807 IID = Intrinsic::x86_avx2_pmul_dq;
1808 else if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 512)
1809 IID = Intrinsic::x86_avx512_pmul_dq_512;
1810 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 128)
1811 IID = Intrinsic::x86_sse2_pmulu_dq;
1812 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 256)
1813 IID = Intrinsic::x86_avx2_pmulu_dq;
1814 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 512)
1815 IID = Intrinsic::x86_avx512_pmulu_dq_512;
1817 llvm_unreachable("Unexpected intrinsic");
1819 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1820 { CI->getArgOperand(0), CI->getArgOperand(1) });
1821 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1822 CI->getArgOperand(2));
1823 } else if (IsX86 && Name.startswith("avx512.mask.pack")) {
1824 bool IsUnsigned = Name[16] == 'u';
1825 bool IsDW = Name[18] == 'd';
1826 VectorType *VecTy = cast<VectorType>(CI->getType());
1828 if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 128)
1829 IID = Intrinsic::x86_sse2_packsswb_128;
1830 else if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 256)
1831 IID = Intrinsic::x86_avx2_packsswb;
1832 else if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 512)
1833 IID = Intrinsic::x86_avx512_packsswb_512;
1834 else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 128)
1835 IID = Intrinsic::x86_sse2_packssdw_128;
1836 else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 256)
1837 IID = Intrinsic::x86_avx2_packssdw;
1838 else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 512)
1839 IID = Intrinsic::x86_avx512_packssdw_512;
1840 else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 128)
1841 IID = Intrinsic::x86_sse2_packuswb_128;
1842 else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 256)
1843 IID = Intrinsic::x86_avx2_packuswb;
1844 else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 512)
1845 IID = Intrinsic::x86_avx512_packuswb_512;
1846 else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 128)
1847 IID = Intrinsic::x86_sse41_packusdw;
1848 else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 256)
1849 IID = Intrinsic::x86_avx2_packusdw;
1850 else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 512)
1851 IID = Intrinsic::x86_avx512_packusdw_512;
1853 llvm_unreachable("Unexpected intrinsic");
1855 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1856 { CI->getArgOperand(0), CI->getArgOperand(1) });
1857 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1858 CI->getArgOperand(2));
1859 } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
1860 bool IsImmediate = Name[16] == 'i' ||
1861 (Name.size() > 18 && Name[18] == 'i');
1862 bool IsVariable = Name[16] == 'v';
1863 char Size = Name[16] == '.' ? Name[17] :
1864 Name[17] == '.' ? Name[18] :
1865 Name[18] == '.' ? Name[19] :
1869 if (IsVariable && Name[17] != '.') {
1870 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
1871 IID = Intrinsic::x86_avx2_psllv_q;
1872 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
1873 IID = Intrinsic::x86_avx2_psllv_q_256;
1874 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
1875 IID = Intrinsic::x86_avx2_psllv_d;
1876 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
1877 IID = Intrinsic::x86_avx2_psllv_d_256;
1878 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
1879 IID = Intrinsic::x86_avx512_psllv_w_128;
1880 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
1881 IID = Intrinsic::x86_avx512_psllv_w_256;
1882 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
1883 IID = Intrinsic::x86_avx512_psllv_w_512;
1885 llvm_unreachable("Unexpected size");
1886 } else if (Name.endswith(".128")) {
1887 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
1888 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
1889 : Intrinsic::x86_sse2_psll_d;
1890 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
1891 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
1892 : Intrinsic::x86_sse2_psll_q;
1893 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
1894 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
1895 : Intrinsic::x86_sse2_psll_w;
1897 llvm_unreachable("Unexpected size");
1898 } else if (Name.endswith(".256")) {
1899 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
1900 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
1901 : Intrinsic::x86_avx2_psll_d;
1902 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
1903 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
1904 : Intrinsic::x86_avx2_psll_q;
1905 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
1906 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
1907 : Intrinsic::x86_avx2_psll_w;
1909 llvm_unreachable("Unexpected size");
1911 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
1912 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
1913 IsVariable ? Intrinsic::x86_avx512_psllv_d_512 :
1914 Intrinsic::x86_avx512_psll_d_512;
1915 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
1916 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
1917 IsVariable ? Intrinsic::x86_avx512_psllv_q_512 :
1918 Intrinsic::x86_avx512_psll_q_512;
1919 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
1920 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
1921 : Intrinsic::x86_avx512_psll_w_512;
1923 llvm_unreachable("Unexpected size");
1926 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
1927 } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
1928 bool IsImmediate = Name[16] == 'i' ||
1929 (Name.size() > 18 && Name[18] == 'i');
1930 bool IsVariable = Name[16] == 'v';
1931 char Size = Name[16] == '.' ? Name[17] :
1932 Name[17] == '.' ? Name[18] :
1933 Name[18] == '.' ? Name[19] :
1937 if (IsVariable && Name[17] != '.') {
1938 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
1939 IID = Intrinsic::x86_avx2_psrlv_q;
1940 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
1941 IID = Intrinsic::x86_avx2_psrlv_q_256;
1942 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
1943 IID = Intrinsic::x86_avx2_psrlv_d;
1944 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
1945 IID = Intrinsic::x86_avx2_psrlv_d_256;
1946 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
1947 IID = Intrinsic::x86_avx512_psrlv_w_128;
1948 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
1949 IID = Intrinsic::x86_avx512_psrlv_w_256;
1950 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
1951 IID = Intrinsic::x86_avx512_psrlv_w_512;
1953 llvm_unreachable("Unexpected size");
1954 } else if (Name.endswith(".128")) {
1955 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
1956 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
1957 : Intrinsic::x86_sse2_psrl_d;
1958 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
1959 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
1960 : Intrinsic::x86_sse2_psrl_q;
1961 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
1962 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
1963 : Intrinsic::x86_sse2_psrl_w;
1965 llvm_unreachable("Unexpected size");
1966 } else if (Name.endswith(".256")) {
1967 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
1968 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
1969 : Intrinsic::x86_avx2_psrl_d;
1970 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
1971 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
1972 : Intrinsic::x86_avx2_psrl_q;
1973 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
1974 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
1975 : Intrinsic::x86_avx2_psrl_w;
1977 llvm_unreachable("Unexpected size");
1979 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
1980 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
1981 IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 :
1982 Intrinsic::x86_avx512_psrl_d_512;
1983 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
1984 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
1985 IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 :
1986 Intrinsic::x86_avx512_psrl_q_512;
1987 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
1988 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
1989 : Intrinsic::x86_avx512_psrl_w_512;
1991 llvm_unreachable("Unexpected size");
1994 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
1995 } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
1996 bool IsImmediate = Name[16] == 'i' ||
1997 (Name.size() > 18 && Name[18] == 'i');
1998 bool IsVariable = Name[16] == 'v';
1999 char Size = Name[16] == '.' ? Name[17] :
2000 Name[17] == '.' ? Name[18] :
2001 Name[18] == '.' ? Name[19] :
2005 if (IsVariable && Name[17] != '.') {
2006 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
2007 IID = Intrinsic::x86_avx2_psrav_d;
2008 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
2009 IID = Intrinsic::x86_avx2_psrav_d_256;
2010 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
2011 IID = Intrinsic::x86_avx512_psrav_w_128;
2012 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
2013 IID = Intrinsic::x86_avx512_psrav_w_256;
2014 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
2015 IID = Intrinsic::x86_avx512_psrav_w_512;
2017 llvm_unreachable("Unexpected size");
2018 } else if (Name.endswith(".128")) {
2019 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
2020 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
2021 : Intrinsic::x86_sse2_psra_d;
2022 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
2023 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
2024 IsVariable ? Intrinsic::x86_avx512_psrav_q_128 :
2025 Intrinsic::x86_avx512_psra_q_128;
2026 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
2027 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
2028 : Intrinsic::x86_sse2_psra_w;
2030 llvm_unreachable("Unexpected size");
2031 } else if (Name.endswith(".256")) {
2032 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
2033 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
2034 : Intrinsic::x86_avx2_psra_d;
2035 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
2036 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
2037 IsVariable ? Intrinsic::x86_avx512_psrav_q_256 :
2038 Intrinsic::x86_avx512_psra_q_256;
2039 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
2040 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
2041 : Intrinsic::x86_avx2_psra_w;
2043 llvm_unreachable("Unexpected size");
2045 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
2046 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
2047 IsVariable ? Intrinsic::x86_avx512_psrav_d_512 :
2048 Intrinsic::x86_avx512_psra_d_512;
2049 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
2050 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
2051 IsVariable ? Intrinsic::x86_avx512_psrav_q_512 :
2052 Intrinsic::x86_avx512_psra_q_512;
2053 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
2054 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
2055 : Intrinsic::x86_avx512_psra_w_512;
2057 llvm_unreachable("Unexpected size");
2060 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2061 } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
2062 Rep = upgradeMaskedMove(Builder, *CI);
2063 } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
2064 Rep = UpgradeMaskToInt(Builder, *CI);
2065 } else if (IsX86 && Name.startswith("avx512.mask.vpermilvar.")) {
2067 if (Name.endswith("ps.128"))
2068 IID = Intrinsic::x86_avx_vpermilvar_ps;
2069 else if (Name.endswith("pd.128"))
2070 IID = Intrinsic::x86_avx_vpermilvar_pd;
2071 else if (Name.endswith("ps.256"))
2072 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2073 else if (Name.endswith("pd.256"))
2074 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2075 else if (Name.endswith("ps.512"))
2076 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2077 else if (Name.endswith("pd.512"))
2078 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2080 llvm_unreachable("Unexpected vpermilvar intrinsic");
2082 Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID);
2083 Rep = Builder.CreateCall(Intrin,
2084 { CI->getArgOperand(0), CI->getArgOperand(1) });
2085 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2086 CI->getArgOperand(2));
2087 } else if (IsX86 && Name.endswith(".movntdqa")) {
2088 Module *M = F->getParent();
2089 MDNode *Node = MDNode::get(
2090 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2092 Value *Ptr = CI->getArgOperand(0);
2093 VectorType *VTy = cast<VectorType>(CI->getType());
2095 // Convert the type of the pointer to a pointer to the stored type.
2097 Builder.CreateBitCast(Ptr, PointerType::getUnqual(VTy), "cast");
2098 LoadInst *LI = Builder.CreateAlignedLoad(BC, VTy->getBitWidth() / 8);
2099 LI->setMetadata(M->getMDKindID("nontemporal"), Node);
2102 (Name.startswith("sse2.pavg") || Name.startswith("avx2.pavg") ||
2103 Name.startswith("avx512.mask.pavg"))) {
2104 // llvm.x86.sse2.pavg.b/w, llvm.x86.avx2.pavg.b/w,
2105 // llvm.x86.avx512.mask.pavg.b/w
2106 Value *A = CI->getArgOperand(0);
2107 Value *B = CI->getArgOperand(1);
2108 VectorType *ZextType = VectorType::getExtendedElementVectorType(
2109 cast<VectorType>(A->getType()));
2110 Value *ExtendedA = Builder.CreateZExt(A, ZextType);
2111 Value *ExtendedB = Builder.CreateZExt(B, ZextType);
2112 Value *Sum = Builder.CreateAdd(ExtendedA, ExtendedB);
2113 Value *AddOne = Builder.CreateAdd(Sum, ConstantInt::get(ZextType, 1));
2114 Value *ShiftR = Builder.CreateLShr(AddOne, ConstantInt::get(ZextType, 1));
2115 Rep = Builder.CreateTrunc(ShiftR, A->getType());
2116 if (CI->getNumArgOperands() > 2) {
2117 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2118 CI->getArgOperand(2));
2120 } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
2121 Value *Arg = CI->getArgOperand(0);
2122 Value *Neg = Builder.CreateNeg(Arg, "neg");
2123 Value *Cmp = Builder.CreateICmpSGE(
2124 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
2125 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
2126 } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
2127 Name == "max.ui" || Name == "max.ull")) {
2128 Value *Arg0 = CI->getArgOperand(0);
2129 Value *Arg1 = CI->getArgOperand(1);
2130 Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
2131 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
2132 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
2133 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
2134 } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
2135 Name == "min.ui" || Name == "min.ull")) {
2136 Value *Arg0 = CI->getArgOperand(0);
2137 Value *Arg1 = CI->getArgOperand(1);
2138 Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
2139 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
2140 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
2141 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
2142 } else if (IsNVVM && Name == "clz.ll") {
2143 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
2144 Value *Arg = CI->getArgOperand(0);
2145 Value *Ctlz = Builder.CreateCall(
2146 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
2148 {Arg, Builder.getFalse()}, "ctlz");
2149 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
2150 } else if (IsNVVM && Name == "popc.ll") {
2151 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
2153 Value *Arg = CI->getArgOperand(0);
2154 Value *Popc = Builder.CreateCall(
2155 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
2158 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
2159 } else if (IsNVVM && Name == "h2f") {
2160 Rep = Builder.CreateCall(Intrinsic::getDeclaration(
2161 F->getParent(), Intrinsic::convert_from_fp16,
2162 {Builder.getFloatTy()}),
2163 CI->getArgOperand(0), "h2f");
2165 llvm_unreachable("Unknown function for CallInst upgrade.");
2169 CI->replaceAllUsesWith(Rep);
2170 CI->eraseFromParent();
2174 CallInst *NewCall = nullptr;
2175 switch (NewFn->getIntrinsicID()) {
2177 // Handle generic mangling change, but nothing else
2179 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
2180 "Unknown function for CallInst upgrade and isn't just a name change");
2181 CI->setCalledFunction(NewFn);
2185 case Intrinsic::arm_neon_vld1:
2186 case Intrinsic::arm_neon_vld2:
2187 case Intrinsic::arm_neon_vld3:
2188 case Intrinsic::arm_neon_vld4:
2189 case Intrinsic::arm_neon_vld2lane:
2190 case Intrinsic::arm_neon_vld3lane:
2191 case Intrinsic::arm_neon_vld4lane:
2192 case Intrinsic::arm_neon_vst1:
2193 case Intrinsic::arm_neon_vst2:
2194 case Intrinsic::arm_neon_vst3:
2195 case Intrinsic::arm_neon_vst4:
2196 case Intrinsic::arm_neon_vst2lane:
2197 case Intrinsic::arm_neon_vst3lane:
2198 case Intrinsic::arm_neon_vst4lane: {
2199 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
2200 CI->arg_operands().end());
2201 NewCall = Builder.CreateCall(NewFn, Args);
2205 case Intrinsic::bitreverse:
2206 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
2209 case Intrinsic::ctlz:
2210 case Intrinsic::cttz:
2211 assert(CI->getNumArgOperands() == 1 &&
2212 "Mismatch between function args and call args");
2214 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
2217 case Intrinsic::objectsize: {
2218 Value *NullIsUnknownSize = CI->getNumArgOperands() == 2
2219 ? Builder.getFalse()
2220 : CI->getArgOperand(2);
2221 NewCall = Builder.CreateCall(
2222 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize});
2226 case Intrinsic::ctpop:
2227 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
2230 case Intrinsic::convert_from_fp16:
2231 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
2234 case Intrinsic::dbg_value:
2235 // Upgrade from the old version that had an extra offset argument.
2236 assert(CI->getNumArgOperands() == 4);
2237 // Drop nonzero offsets instead of attempting to upgrade them.
2238 if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
2239 if (Offset->isZeroValue()) {
2240 NewCall = Builder.CreateCall(
2242 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
2245 CI->eraseFromParent();
2248 case Intrinsic::x86_xop_vfrcz_ss:
2249 case Intrinsic::x86_xop_vfrcz_sd:
2250 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
2253 case Intrinsic::x86_xop_vpermil2pd:
2254 case Intrinsic::x86_xop_vpermil2ps:
2255 case Intrinsic::x86_xop_vpermil2pd_256:
2256 case Intrinsic::x86_xop_vpermil2ps_256: {
2257 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
2258 CI->arg_operands().end());
2259 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
2260 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
2261 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
2262 NewCall = Builder.CreateCall(NewFn, Args);
2266 case Intrinsic::x86_sse41_ptestc:
2267 case Intrinsic::x86_sse41_ptestz:
2268 case Intrinsic::x86_sse41_ptestnzc: {
2269 // The arguments for these intrinsics used to be v4f32, and changed
2270 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
2271 // So, the only thing required is a bitcast for both arguments.
2272 // First, check the arguments have the old type.
2273 Value *Arg0 = CI->getArgOperand(0);
2274 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
2277 // Old intrinsic, add bitcasts
2278 Value *Arg1 = CI->getArgOperand(1);
2280 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
2282 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
2283 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2285 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
2289 case Intrinsic::x86_sse41_insertps:
2290 case Intrinsic::x86_sse41_dppd:
2291 case Intrinsic::x86_sse41_dpps:
2292 case Intrinsic::x86_sse41_mpsadbw:
2293 case Intrinsic::x86_avx_dp_ps_256:
2294 case Intrinsic::x86_avx2_mpsadbw: {
2295 // Need to truncate the last argument from i32 to i8 -- this argument models
2296 // an inherently 8-bit immediate operand to these x86 instructions.
2297 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
2298 CI->arg_operands().end());
2300 // Replace the last argument with a trunc.
2301 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
2302 NewCall = Builder.CreateCall(NewFn, Args);
2306 case Intrinsic::thread_pointer: {
2307 NewCall = Builder.CreateCall(NewFn, {});
2311 case Intrinsic::invariant_start:
2312 case Intrinsic::invariant_end:
2313 case Intrinsic::masked_load:
2314 case Intrinsic::masked_store:
2315 case Intrinsic::masked_gather:
2316 case Intrinsic::masked_scatter: {
2317 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
2318 CI->arg_operands().end());
2319 NewCall = Builder.CreateCall(NewFn, Args);
2323 assert(NewCall && "Should have either set this variable or returned through "
2324 "the default case");
2325 std::string Name = CI->getName();
2326 if (!Name.empty()) {
2327 CI->setName(Name + ".old");
2328 NewCall->setName(Name);
2330 CI->replaceAllUsesWith(NewCall);
2331 CI->eraseFromParent();
2334 void llvm::UpgradeCallsToIntrinsic(Function *F) {
2335 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
2337 // Check if this function should be upgraded and get the replacement function
2340 if (UpgradeIntrinsicFunction(F, NewFn)) {
2341 // Replace all users of the old function with the new function or new
2342 // instructions. This is not a range loop because the call is deleted.
2343 for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; )
2344 if (CallInst *CI = dyn_cast<CallInst>(*UI++))
2345 UpgradeIntrinsicCall(CI, NewFn);
2347 // Remove old function, no longer used, from the module.
2348 F->eraseFromParent();
2352 MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
2353 // Check if the tag uses struct-path aware TBAA format.
2354 if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
2357 auto &Context = MD.getContext();
2358 if (MD.getNumOperands() == 3) {
2359 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
2360 MDNode *ScalarType = MDNode::get(Context, Elts);
2361 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
2362 Metadata *Elts2[] = {ScalarType, ScalarType,
2363 ConstantAsMetadata::get(
2364 Constant::getNullValue(Type::getInt64Ty(Context))),
2366 return MDNode::get(Context, Elts2);
2368 // Create a MDNode <MD, MD, offset 0>
2369 Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
2370 Type::getInt64Ty(Context)))};
2371 return MDNode::get(Context, Elts);
2374 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
2375 Instruction *&Temp) {
2376 if (Opc != Instruction::BitCast)
2380 Type *SrcTy = V->getType();
2381 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
2382 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
2383 LLVMContext &Context = V->getContext();
2385 // We have no information about target data layout, so we assume that
2386 // the maximum pointer size is 64bit.
2387 Type *MidTy = Type::getInt64Ty(Context);
2388 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
2390 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
2396 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
2397 if (Opc != Instruction::BitCast)
2400 Type *SrcTy = C->getType();
2401 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
2402 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
2403 LLVMContext &Context = C->getContext();
2405 // We have no information about target data layout, so we assume that
2406 // the maximum pointer size is 64bit.
2407 Type *MidTy = Type::getInt64Ty(Context);
2409 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
2416 /// Check the debug info version number, if it is out-dated, drop the debug
2417 /// info. Return true if module is modified.
2418 bool llvm::UpgradeDebugInfo(Module &M) {
2419 unsigned Version = getDebugMetadataVersionFromModule(M);
2420 if (Version == DEBUG_METADATA_VERSION) {
2421 bool BrokenDebugInfo = false;
2422 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
2423 report_fatal_error("Broken module found, compilation aborted!");
2424 if (!BrokenDebugInfo)
2425 // Everything is ok.
2428 // Diagnose malformed debug info.
2429 DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
2430 M.getContext().diagnose(Diag);
2433 bool Modified = StripDebugInfo(M);
2434 if (Modified && Version != DEBUG_METADATA_VERSION) {
2435 // Diagnose a version mismatch.
2436 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
2437 M.getContext().diagnose(DiagVersion);
2442 bool llvm::UpgradeModuleFlags(Module &M) {
2443 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
2447 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
2448 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
2449 MDNode *Op = ModFlags->getOperand(I);
2450 if (Op->getNumOperands() != 3)
2452 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
2455 if (ID->getString() == "Objective-C Image Info Version")
2457 if (ID->getString() == "Objective-C Class Properties")
2458 HasClassProperties = true;
2459 // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
2460 // field was Error and now they are Max.
2461 if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") {
2462 if (auto *Behavior =
2463 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
2464 if (Behavior->getLimitedValue() == Module::Error) {
2465 Type *Int32Ty = Type::getInt32Ty(M.getContext());
2466 Metadata *Ops[3] = {
2467 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)),
2468 MDString::get(M.getContext(), ID->getString()),
2470 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
2475 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
2476 // section name so that llvm-lto will not complain about mismatching
2477 // module flags that is functionally the same.
2478 if (ID->getString() == "Objective-C Image Info Section") {
2479 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
2480 SmallVector<StringRef, 4> ValueComp;
2481 Value->getString().split(ValueComp, " ");
2482 if (ValueComp.size() != 1) {
2483 std::string NewValue;
2484 for (auto &S : ValueComp)
2485 NewValue += S.str();
2486 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
2487 MDString::get(M.getContext(), NewValue)};
2488 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
2495 // "Objective-C Class Properties" is recently added for Objective-C. We
2496 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
2497 // flag of value 0, so we can correclty downgrade this flag when trying to
2498 // link an ObjC bitcode without this module flag with an ObjC bitcode with
2499 // this module flag.
2500 if (HasObjCFlag && !HasClassProperties) {
2501 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
2509 void llvm::UpgradeSectionAttributes(Module &M) {
2510 auto TrimSpaces = [](StringRef Section) -> std::string {
2511 SmallVector<StringRef, 5> Components;
2512 Section.split(Components, ',');
2514 SmallString<32> Buffer;
2515 raw_svector_ostream OS(Buffer);
2517 for (auto Component : Components)
2518 OS << ',' << Component.trim();
2520 return OS.str().substr(1);
2523 for (auto &GV : M.globals()) {
2524 if (!GV.hasSection())
2527 StringRef Section = GV.getSection();
2529 if (!Section.startswith("__DATA, __objc_catlist"))
2532 // __DATA, __objc_catlist, regular, no_dead_strip
2533 // __DATA,__objc_catlist,regular,no_dead_strip
2534 GV.setSection(TrimSpaces(Section));
2538 static bool isOldLoopArgument(Metadata *MD) {
2539 auto *T = dyn_cast_or_null<MDTuple>(MD);
2542 if (T->getNumOperands() < 1)
2544 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
2547 return S->getString().startswith("llvm.vectorizer.");
2550 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
2551 StringRef OldPrefix = "llvm.vectorizer.";
2552 assert(OldTag.startswith(OldPrefix) && "Expected old prefix");
2554 if (OldTag == "llvm.vectorizer.unroll")
2555 return MDString::get(C, "llvm.loop.interleave.count");
2557 return MDString::get(
2558 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
2562 static Metadata *upgradeLoopArgument(Metadata *MD) {
2563 auto *T = dyn_cast_or_null<MDTuple>(MD);
2566 if (T->getNumOperands() < 1)
2568 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
2571 if (!OldTag->getString().startswith("llvm.vectorizer."))
2574 // This has an old tag. Upgrade it.
2575 SmallVector<Metadata *, 8> Ops;
2576 Ops.reserve(T->getNumOperands());
2577 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
2578 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
2579 Ops.push_back(T->getOperand(I));
2581 return MDTuple::get(T->getContext(), Ops);
2584 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
2585 auto *T = dyn_cast<MDTuple>(&N);
2589 if (none_of(T->operands(), isOldLoopArgument))
2592 SmallVector<Metadata *, 8> Ops;
2593 Ops.reserve(T->getNumOperands());
2594 for (Metadata *MD : T->operands())
2595 Ops.push_back(upgradeLoopArgument(MD));
2597 return MDTuple::get(T->getContext(), Ops);