1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the auto-upgrade helper functions.
11 // This is where deprecated IR intrinsics and other IR features are updated to
12 // current specifications.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/IR/AutoUpgrade.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/IR/Constants.h"
19 #include "llvm/IR/DIBuilder.h"
20 #include "llvm/IR/DebugInfo.h"
21 #include "llvm/IR/DiagnosticInfo.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/IRBuilder.h"
24 #include "llvm/IR/Instruction.h"
25 #include "llvm/IR/LLVMContext.h"
26 #include "llvm/IR/Module.h"
27 #include "llvm/IR/Verifier.h"
28 #include "llvm/Support/ErrorHandling.h"
29 #include "llvm/Support/Regex.h"
33 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
35 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
36 // changed their type from v4f32 to v2i64.
37 static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
39 // Check whether this is an old version of the function, which received
41 Type *Arg0Type = F->getFunctionType()->getParamType(0);
42 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
45 // Yes, it's old, replace it with new version.
47 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
51 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
52 // arguments have changed their type from i32 to i8.
53 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
55 // Check that the last argument is an i32.
56 Type *LastArgType = F->getFunctionType()->getParamType(
57 F->getFunctionType()->getNumParams() - 1);
58 if (!LastArgType->isIntegerTy(32))
61 // Move this function aside and map down.
63 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
67 static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
68 // All of the intrinsics matches below should be marked with which llvm
69 // version started autoupgrading them. At some point in the future we would
70 // like to use this information to remove upgrade code for some older
71 // intrinsics. It is currently undecided how we will determine that future
73 if (Name=="ssse3.pabs.b.128" || // Added in 6.0
74 Name=="ssse3.pabs.w.128" || // Added in 6.0
75 Name=="ssse3.pabs.d.128" || // Added in 6.0
76 Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
77 Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
78 Name.startswith("avx2.pabs.") || // Added in 6.0
79 Name.startswith("avx512.mask.pabs.") || // Added in 6.0
80 Name.startswith("avx512.broadcastm") || // Added in 6.0
81 Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0
82 Name.startswith("sse2.pcmpeq.") || // Added in 3.1
83 Name.startswith("sse2.pcmpgt.") || // Added in 3.1
84 Name.startswith("avx2.pcmpeq.") || // Added in 3.1
85 Name.startswith("avx2.pcmpgt.") || // Added in 3.1
86 Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
87 Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
88 Name.startswith("avx.vperm2f128.") || // Added in 6.0
89 Name == "avx2.vperm2i128" || // Added in 6.0
90 Name == "sse.add.ss" || // Added in 4.0
91 Name == "sse2.add.sd" || // Added in 4.0
92 Name == "sse.sub.ss" || // Added in 4.0
93 Name == "sse2.sub.sd" || // Added in 4.0
94 Name == "sse.mul.ss" || // Added in 4.0
95 Name == "sse2.mul.sd" || // Added in 4.0
96 Name == "sse.div.ss" || // Added in 4.0
97 Name == "sse2.div.sd" || // Added in 4.0
98 Name == "sse41.pmaxsb" || // Added in 3.9
99 Name == "sse2.pmaxs.w" || // Added in 3.9
100 Name == "sse41.pmaxsd" || // Added in 3.9
101 Name == "sse2.pmaxu.b" || // Added in 3.9
102 Name == "sse41.pmaxuw" || // Added in 3.9
103 Name == "sse41.pmaxud" || // Added in 3.9
104 Name == "sse41.pminsb" || // Added in 3.9
105 Name == "sse2.pmins.w" || // Added in 3.9
106 Name == "sse41.pminsd" || // Added in 3.9
107 Name == "sse2.pminu.b" || // Added in 3.9
108 Name == "sse41.pminuw" || // Added in 3.9
109 Name == "sse41.pminud" || // Added in 3.9
110 Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
111 Name.startswith("avx2.pmax") || // Added in 3.9
112 Name.startswith("avx2.pmin") || // Added in 3.9
113 Name.startswith("avx512.mask.pmax") || // Added in 4.0
114 Name.startswith("avx512.mask.pmin") || // Added in 4.0
115 Name.startswith("avx2.vbroadcast") || // Added in 3.8
116 Name.startswith("avx2.pbroadcast") || // Added in 3.8
117 Name.startswith("avx.vpermil.") || // Added in 3.1
118 Name.startswith("sse2.pshuf") || // Added in 3.9
119 Name.startswith("avx512.pbroadcast") || // Added in 3.9
120 Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
121 Name.startswith("avx512.mask.movddup") || // Added in 3.9
122 Name.startswith("avx512.mask.movshdup") || // Added in 3.9
123 Name.startswith("avx512.mask.movsldup") || // Added in 3.9
124 Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
125 Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
126 Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
127 Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
128 Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
129 Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
130 Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
131 Name.startswith("avx512.mask.punpckl") || // Added in 3.9
132 Name.startswith("avx512.mask.punpckh") || // Added in 3.9
133 Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
134 Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
135 Name.startswith("avx512.mask.pand.") || // Added in 3.9
136 Name.startswith("avx512.mask.pandn.") || // Added in 3.9
137 Name.startswith("avx512.mask.por.") || // Added in 3.9
138 Name.startswith("avx512.mask.pxor.") || // Added in 3.9
139 Name.startswith("avx512.mask.and.") || // Added in 3.9
140 Name.startswith("avx512.mask.andn.") || // Added in 3.9
141 Name.startswith("avx512.mask.or.") || // Added in 3.9
142 Name.startswith("avx512.mask.xor.") || // Added in 3.9
143 Name.startswith("avx512.mask.padd.") || // Added in 4.0
144 Name.startswith("avx512.mask.psub.") || // Added in 4.0
145 Name.startswith("avx512.mask.pmull.") || // Added in 4.0
146 Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
147 Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
148 Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
149 Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
150 Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
151 Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
152 Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
153 Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
154 Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
155 Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
156 Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
157 Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
158 Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
159 Name == "avx512.mask.add.pd.128" || // Added in 4.0
160 Name == "avx512.mask.add.pd.256" || // Added in 4.0
161 Name == "avx512.mask.add.ps.128" || // Added in 4.0
162 Name == "avx512.mask.add.ps.256" || // Added in 4.0
163 Name == "avx512.mask.div.pd.128" || // Added in 4.0
164 Name == "avx512.mask.div.pd.256" || // Added in 4.0
165 Name == "avx512.mask.div.ps.128" || // Added in 4.0
166 Name == "avx512.mask.div.ps.256" || // Added in 4.0
167 Name == "avx512.mask.mul.pd.128" || // Added in 4.0
168 Name == "avx512.mask.mul.pd.256" || // Added in 4.0
169 Name == "avx512.mask.mul.ps.128" || // Added in 4.0
170 Name == "avx512.mask.mul.ps.256" || // Added in 4.0
171 Name == "avx512.mask.sub.pd.128" || // Added in 4.0
172 Name == "avx512.mask.sub.pd.256" || // Added in 4.0
173 Name == "avx512.mask.sub.ps.128" || // Added in 4.0
174 Name == "avx512.mask.sub.ps.256" || // Added in 4.0
175 Name == "avx512.mask.max.pd.128" || // Added in 5.0
176 Name == "avx512.mask.max.pd.256" || // Added in 5.0
177 Name == "avx512.mask.max.ps.128" || // Added in 5.0
178 Name == "avx512.mask.max.ps.256" || // Added in 5.0
179 Name == "avx512.mask.min.pd.128" || // Added in 5.0
180 Name == "avx512.mask.min.pd.256" || // Added in 5.0
181 Name == "avx512.mask.min.ps.128" || // Added in 5.0
182 Name == "avx512.mask.min.ps.256" || // Added in 5.0
183 Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
184 Name.startswith("avx512.mask.psll.d") || // Added in 4.0
185 Name.startswith("avx512.mask.psll.q") || // Added in 4.0
186 Name.startswith("avx512.mask.psll.w") || // Added in 4.0
187 Name.startswith("avx512.mask.psra.d") || // Added in 4.0
188 Name.startswith("avx512.mask.psra.q") || // Added in 4.0
189 Name.startswith("avx512.mask.psra.w") || // Added in 4.0
190 Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
191 Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
192 Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
193 Name.startswith("avx512.mask.pslli") || // Added in 4.0
194 Name.startswith("avx512.mask.psrai") || // Added in 4.0
195 Name.startswith("avx512.mask.psrli") || // Added in 4.0
196 Name.startswith("avx512.mask.psllv") || // Added in 4.0
197 Name.startswith("avx512.mask.psrav") || // Added in 4.0
198 Name.startswith("avx512.mask.psrlv") || // Added in 4.0
199 Name.startswith("sse41.pmovsx") || // Added in 3.8
200 Name.startswith("sse41.pmovzx") || // Added in 3.9
201 Name.startswith("avx2.pmovsx") || // Added in 3.9
202 Name.startswith("avx2.pmovzx") || // Added in 3.9
203 Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
204 Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
205 Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
206 Name == "sse2.cvtdq2pd" || // Added in 3.9
207 Name == "sse2.cvtps2pd" || // Added in 3.9
208 Name == "avx.cvtdq2.pd.256" || // Added in 3.9
209 Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
210 Name.startswith("avx.vinsertf128.") || // Added in 3.7
211 Name == "avx2.vinserti128" || // Added in 3.7
212 Name.startswith("avx512.mask.insert") || // Added in 4.0
213 Name.startswith("avx.vextractf128.") || // Added in 3.7
214 Name == "avx2.vextracti128" || // Added in 3.7
215 Name.startswith("avx512.mask.vextract") || // Added in 4.0
216 Name.startswith("sse4a.movnt.") || // Added in 3.9
217 Name.startswith("avx.movnt.") || // Added in 3.2
218 Name.startswith("avx512.storent.") || // Added in 3.9
219 Name == "sse41.movntdqa" || // Added in 5.0
220 Name == "avx2.movntdqa" || // Added in 5.0
221 Name == "avx512.movntdqa" || // Added in 5.0
222 Name == "sse2.storel.dq" || // Added in 3.9
223 Name.startswith("sse.storeu.") || // Added in 3.9
224 Name.startswith("sse2.storeu.") || // Added in 3.9
225 Name.startswith("avx.storeu.") || // Added in 3.9
226 Name.startswith("avx512.mask.storeu.") || // Added in 3.9
227 Name.startswith("avx512.mask.store.p") || // Added in 3.9
228 Name.startswith("avx512.mask.store.b.") || // Added in 3.9
229 Name.startswith("avx512.mask.store.w.") || // Added in 3.9
230 Name.startswith("avx512.mask.store.d.") || // Added in 3.9
231 Name.startswith("avx512.mask.store.q.") || // Added in 3.9
232 Name.startswith("avx512.mask.loadu.") || // Added in 3.9
233 Name.startswith("avx512.mask.load.") || // Added in 3.9
234 Name == "sse42.crc32.64.8" || // Added in 3.4
235 Name.startswith("avx.vbroadcast.s") || // Added in 3.5
236 Name.startswith("avx512.mask.palignr.") || // Added in 3.9
237 Name.startswith("avx512.mask.valign.") || // Added in 4.0
238 Name.startswith("sse2.psll.dq") || // Added in 3.7
239 Name.startswith("sse2.psrl.dq") || // Added in 3.7
240 Name.startswith("avx2.psll.dq") || // Added in 3.7
241 Name.startswith("avx2.psrl.dq") || // Added in 3.7
242 Name.startswith("avx512.psll.dq") || // Added in 3.9
243 Name.startswith("avx512.psrl.dq") || // Added in 3.9
244 Name == "sse41.pblendw" || // Added in 3.7
245 Name.startswith("sse41.blendp") || // Added in 3.7
246 Name.startswith("avx.blend.p") || // Added in 3.7
247 Name == "avx2.pblendw" || // Added in 3.7
248 Name.startswith("avx2.pblendd.") || // Added in 3.7
249 Name.startswith("avx.vbroadcastf128") || // Added in 4.0
250 Name == "avx2.vbroadcasti128" || // Added in 3.7
251 Name.startswith("avx512.mask.broadcastf") || // Added in 6.0
252 Name.startswith("avx512.mask.broadcasti") || // Added in 6.0
253 Name == "xop.vpcmov" || // Added in 3.8
254 Name == "xop.vpcmov.256" || // Added in 5.0
255 Name.startswith("avx512.mask.move.s") || // Added in 4.0
256 Name.startswith("avx512.cvtmask2") || // Added in 5.0
257 (Name.startswith("xop.vpcom") && // Added in 3.2
258 F->arg_size() == 2) ||
259 Name.startswith("avx512.ptestm") || //Added in 6.0
260 Name.startswith("avx512.ptestnm") || //Added in 6.0
261 Name.startswith("sse2.pavg") || // Added in 6.0
262 Name.startswith("avx2.pavg") || // Added in 6.0
263 Name.startswith("avx512.mask.pavg")) // Added in 6.0
269 static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
271 // Only handle intrinsics that start with "x86.".
272 if (!Name.startswith("x86."))
274 // Remove "x86." prefix.
275 Name = Name.substr(4);
277 if (ShouldUpgradeX86Intrinsic(F, Name)) {
282 // SSE4.1 ptest functions may have an old signature.
283 if (Name.startswith("sse41.ptest")) { // Added in 3.2
284 if (Name.substr(11) == "c")
285 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
286 if (Name.substr(11) == "z")
287 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
288 if (Name.substr(11) == "nzc")
289 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
291 // Several blend and other instructions with masks used the wrong number of
293 if (Name == "sse41.insertps") // Added in 3.6
294 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
296 if (Name == "sse41.dppd") // Added in 3.6
297 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
299 if (Name == "sse41.dpps") // Added in 3.6
300 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
302 if (Name == "sse41.mpsadbw") // Added in 3.6
303 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
305 if (Name == "avx.dp.ps.256") // Added in 3.6
306 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
308 if (Name == "avx2.mpsadbw") // Added in 3.6
309 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
312 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
313 if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
315 NewFn = Intrinsic::getDeclaration(F->getParent(),
316 Intrinsic::x86_xop_vfrcz_ss);
319 if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
321 NewFn = Intrinsic::getDeclaration(F->getParent(),
322 Intrinsic::x86_xop_vfrcz_sd);
325 // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
326 if (Name.startswith("xop.vpermil2")) { // Added in 3.9
327 auto Idx = F->getFunctionType()->getParamType(2);
328 if (Idx->isFPOrFPVectorTy()) {
330 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
331 unsigned EltSize = Idx->getScalarSizeInBits();
332 Intrinsic::ID Permil2ID;
333 if (EltSize == 64 && IdxSize == 128)
334 Permil2ID = Intrinsic::x86_xop_vpermil2pd;
335 else if (EltSize == 32 && IdxSize == 128)
336 Permil2ID = Intrinsic::x86_xop_vpermil2ps;
337 else if (EltSize == 64 && IdxSize == 256)
338 Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
340 Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
341 NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
349 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
350 assert(F && "Illegal to upgrade a non-existent Function.");
352 // Quickly eliminate it, if it's not a candidate.
353 StringRef Name = F->getName();
354 if (Name.size() <= 8 || !Name.startswith("llvm."))
356 Name = Name.substr(5); // Strip off "llvm."
361 if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
362 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
363 F->arg_begin()->getType());
366 if (Name.startswith("arm.neon.vclz")) {
368 F->arg_begin()->getType(),
369 Type::getInt1Ty(F->getContext())
371 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
372 // the end of the name. Change name from llvm.arm.neon.vclz.* to
374 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
375 NewFn = Function::Create(fType, F->getLinkage(),
376 "llvm.ctlz." + Name.substr(14), F->getParent());
379 if (Name.startswith("arm.neon.vcnt")) {
380 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
381 F->arg_begin()->getType());
384 Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
385 if (vldRegex.match(Name)) {
386 auto fArgs = F->getFunctionType()->params();
387 SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
388 // Can't use Intrinsic::getDeclaration here as the return types might
389 // then only be structurally equal.
390 FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
391 NewFn = Function::Create(fType, F->getLinkage(),
392 "llvm." + Name + ".p0i8", F->getParent());
395 Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
396 if (vstRegex.match(Name)) {
397 static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
398 Intrinsic::arm_neon_vst2,
399 Intrinsic::arm_neon_vst3,
400 Intrinsic::arm_neon_vst4};
402 static const Intrinsic::ID StoreLaneInts[] = {
403 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
404 Intrinsic::arm_neon_vst4lane
407 auto fArgs = F->getFunctionType()->params();
408 Type *Tys[] = {fArgs[0], fArgs[1]};
409 if (Name.find("lane") == StringRef::npos)
410 NewFn = Intrinsic::getDeclaration(F->getParent(),
411 StoreInts[fArgs.size() - 3], Tys);
413 NewFn = Intrinsic::getDeclaration(F->getParent(),
414 StoreLaneInts[fArgs.size() - 5], Tys);
417 if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
418 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
425 if (Name.startswith("ctlz.") && F->arg_size() == 1) {
427 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
428 F->arg_begin()->getType());
431 if (Name.startswith("cttz.") && F->arg_size() == 1) {
433 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
434 F->arg_begin()->getType());
440 if (Name == "dbg.value" && F->arg_size() == 4) {
442 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
449 bool IsLifetimeStart = Name.startswith("lifetime.start");
450 if (IsLifetimeStart || Name.startswith("invariant.start")) {
451 Intrinsic::ID ID = IsLifetimeStart ?
452 Intrinsic::lifetime_start : Intrinsic::invariant_start;
453 auto Args = F->getFunctionType()->params();
454 Type* ObjectPtr[1] = {Args[1]};
455 if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
457 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
462 bool IsLifetimeEnd = Name.startswith("lifetime.end");
463 if (IsLifetimeEnd || Name.startswith("invariant.end")) {
464 Intrinsic::ID ID = IsLifetimeEnd ?
465 Intrinsic::lifetime_end : Intrinsic::invariant_end;
467 auto Args = F->getFunctionType()->params();
468 Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
469 if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
471 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
478 if (Name.startswith("masked.load.")) {
479 Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
480 if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) {
482 NewFn = Intrinsic::getDeclaration(F->getParent(),
483 Intrinsic::masked_load,
488 if (Name.startswith("masked.store.")) {
489 auto Args = F->getFunctionType()->params();
490 Type *Tys[] = { Args[0], Args[1] };
491 if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) {
493 NewFn = Intrinsic::getDeclaration(F->getParent(),
494 Intrinsic::masked_store,
499 // Renaming gather/scatter intrinsics with no address space overloading
500 // to the new overload which includes an address space
501 if (Name.startswith("masked.gather.")) {
502 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
503 if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) {
505 NewFn = Intrinsic::getDeclaration(F->getParent(),
506 Intrinsic::masked_gather, Tys);
510 if (Name.startswith("masked.scatter.")) {
511 auto Args = F->getFunctionType()->params();
512 Type *Tys[] = {Args[0], Args[1]};
513 if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) {
515 NewFn = Intrinsic::getDeclaration(F->getParent(),
516 Intrinsic::masked_scatter, Tys);
523 if (Name.startswith("nvvm.")) {
524 Name = Name.substr(5);
526 // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
527 Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name)
528 .Cases("brev32", "brev64", Intrinsic::bitreverse)
529 .Case("clz.i", Intrinsic::ctlz)
530 .Case("popc.i", Intrinsic::ctpop)
531 .Default(Intrinsic::not_intrinsic);
532 if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
533 NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
534 {F->getReturnType()});
538 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
539 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
541 // TODO: We could add lohi.i2d.
542 bool Expand = StringSwitch<bool>(Name)
543 .Cases("abs.i", "abs.ll", true)
544 .Cases("clz.ll", "popc.ll", "h2f", true)
545 .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
546 .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
556 // We only need to change the name to match the mangling including the
558 if (Name.startswith("objectsize.")) {
559 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
560 if (F->arg_size() == 2 ||
561 F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
563 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
571 if (Name == "stackprotectorcheck") {
578 if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
581 // Remangle our intrinsic since we upgrade the mangling
582 auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
583 if (Result != None) {
584 NewFn = Result.getValue();
588 // This may not belong here. This function is effectively being overloaded
589 // to both detect an intrinsic which needs upgrading, and to provide the
590 // upgraded form of the intrinsic. We should perhaps have two separate
591 // functions for this.
595 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
597 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
598 assert(F != NewFn && "Intrinsic function upgraded to the same function");
600 // Upgrade intrinsic attributes. This does not change the function.
603 if (Intrinsic::ID id = F->getIntrinsicID())
604 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
608 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
609 // Nothing to do yet.
613 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
615 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
616 Value *Op, unsigned Shift) {
617 Type *ResultTy = Op->getType();
618 unsigned NumElts = ResultTy->getVectorNumElements() * 8;
620 // Bitcast from a 64-bit element type to a byte element type.
621 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
622 Op = Builder.CreateBitCast(Op, VecTy, "cast");
624 // We'll be shuffling in zeroes.
625 Value *Res = Constant::getNullValue(VecTy);
627 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
628 // we'll just return the zero vector.
631 // 256/512-bit version is split into 2/4 16-byte lanes.
632 for (unsigned l = 0; l != NumElts; l += 16)
633 for (unsigned i = 0; i != 16; ++i) {
634 unsigned Idx = NumElts + i - Shift;
636 Idx -= NumElts - 16; // end of lane, switch operand.
637 Idxs[l + i] = Idx + l;
640 Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
643 // Bitcast back to a 64-bit element type.
644 return Builder.CreateBitCast(Res, ResultTy, "cast");
647 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
649 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
651 Type *ResultTy = Op->getType();
652 unsigned NumElts = ResultTy->getVectorNumElements() * 8;
654 // Bitcast from a 64-bit element type to a byte element type.
655 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
656 Op = Builder.CreateBitCast(Op, VecTy, "cast");
658 // We'll be shuffling in zeroes.
659 Value *Res = Constant::getNullValue(VecTy);
661 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
662 // we'll just return the zero vector.
665 // 256/512-bit version is split into 2/4 16-byte lanes.
666 for (unsigned l = 0; l != NumElts; l += 16)
667 for (unsigned i = 0; i != 16; ++i) {
668 unsigned Idx = i + Shift;
670 Idx += NumElts - 16; // end of lane, switch operand.
671 Idxs[l + i] = Idx + l;
674 Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
677 // Bitcast back to a 64-bit element type.
678 return Builder.CreateBitCast(Res, ResultTy, "cast");
681 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
683 llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(),
684 cast<IntegerType>(Mask->getType())->getBitWidth());
685 Mask = Builder.CreateBitCast(Mask, MaskTy);
687 // If we have less than 8 elements, then the starting mask was an i8 and
688 // we need to extract down to the right number of elements.
691 for (unsigned i = 0; i != NumElts; ++i)
693 Mask = Builder.CreateShuffleVector(Mask, Mask,
694 makeArrayRef(Indices, NumElts),
701 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
702 Value *Op0, Value *Op1) {
703 // If the mask is all ones just emit the align operation.
704 if (const auto *C = dyn_cast<Constant>(Mask))
705 if (C->isAllOnesValue())
708 Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements());
709 return Builder.CreateSelect(Mask, Op0, Op1);
712 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
713 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
714 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
715 static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
716 Value *Op1, Value *Shift,
717 Value *Passthru, Value *Mask,
719 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
721 unsigned NumElts = Op0->getType()->getVectorNumElements();
722 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
723 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
724 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
726 // Mask the immediate for VALIGN.
728 ShiftVal &= (NumElts - 1);
730 // If palignr is shifting the pair of vectors more than the size of two
733 return llvm::Constant::getNullValue(Op0->getType());
735 // If palignr is shifting the pair of input vectors more than one lane,
736 // but less than two lanes, convert to shifting in zeroes.
740 Op0 = llvm::Constant::getNullValue(Op0->getType());
743 uint32_t Indices[64];
744 // 256-bit palignr operates on 128-bit lanes so we need to handle that
745 for (unsigned l = 0; l < NumElts; l += 16) {
746 for (unsigned i = 0; i != 16; ++i) {
747 unsigned Idx = ShiftVal + i;
748 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
749 Idx += NumElts - 16; // End of lane, switch operand.
750 Indices[l + i] = Idx + l;
754 Value *Align = Builder.CreateShuffleVector(Op1, Op0,
755 makeArrayRef(Indices, NumElts),
758 return EmitX86Select(Builder, Mask, Align, Passthru);
761 static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
762 Value *Ptr, Value *Data, Value *Mask,
764 // Cast the pointer to the right type.
765 Ptr = Builder.CreateBitCast(Ptr,
766 llvm::PointerType::getUnqual(Data->getType()));
768 Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1;
770 // If the mask is all ones just emit a regular store.
771 if (const auto *C = dyn_cast<Constant>(Mask))
772 if (C->isAllOnesValue())
773 return Builder.CreateAlignedStore(Data, Ptr, Align);
775 // Convert the mask from an integer type to a vector of i1.
776 unsigned NumElts = Data->getType()->getVectorNumElements();
777 Mask = getX86MaskVec(Builder, Mask, NumElts);
778 return Builder.CreateMaskedStore(Data, Ptr, Align, Mask);
781 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
782 Value *Ptr, Value *Passthru, Value *Mask,
784 // Cast the pointer to the right type.
785 Ptr = Builder.CreateBitCast(Ptr,
786 llvm::PointerType::getUnqual(Passthru->getType()));
788 Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1;
790 // If the mask is all ones just emit a regular store.
791 if (const auto *C = dyn_cast<Constant>(Mask))
792 if (C->isAllOnesValue())
793 return Builder.CreateAlignedLoad(Ptr, Align);
795 // Convert the mask from an integer type to a vector of i1.
796 unsigned NumElts = Passthru->getType()->getVectorNumElements();
797 Mask = getX86MaskVec(Builder, Mask, NumElts);
798 return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru);
801 static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) {
802 Value *Op0 = CI.getArgOperand(0);
803 llvm::Type *Ty = Op0->getType();
804 Value *Zero = llvm::Constant::getNullValue(Ty);
805 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Op0, Zero);
806 Value *Neg = Builder.CreateNeg(Op0);
807 Value *Res = Builder.CreateSelect(Cmp, Op0, Neg);
809 if (CI.getNumArgOperands() == 3)
810 Res = EmitX86Select(Builder,CI.getArgOperand(2), Res, CI.getArgOperand(1));
815 static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
816 ICmpInst::Predicate Pred) {
817 Value *Op0 = CI.getArgOperand(0);
818 Value *Op1 = CI.getArgOperand(1);
819 Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1);
820 Value *Res = Builder.CreateSelect(Cmp, Op0, Op1);
822 if (CI.getNumArgOperands() == 4)
823 Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
828 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
829 static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder,Value *Vec, Value *Mask,
831 const auto *C = dyn_cast<Constant>(Mask);
832 if (!C || !C->isAllOnesValue())
833 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
837 for (unsigned i = 0; i != NumElts; ++i)
839 for (unsigned i = NumElts; i != 8; ++i)
840 Indices[i] = NumElts + i % NumElts;
841 Vec = Builder.CreateShuffleVector(Vec,
842 Constant::getNullValue(Vec->getType()),
845 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
848 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
849 unsigned CC, bool Signed) {
850 Value *Op0 = CI.getArgOperand(0);
851 unsigned NumElts = Op0->getType()->getVectorNumElements();
855 Cmp = Constant::getNullValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
856 } else if (CC == 7) {
857 Cmp = Constant::getAllOnesValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
859 ICmpInst::Predicate Pred;
861 default: llvm_unreachable("Unknown condition code");
862 case 0: Pred = ICmpInst::ICMP_EQ; break;
863 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
864 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
865 case 4: Pred = ICmpInst::ICMP_NE; break;
866 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
867 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
869 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
872 Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1);
874 return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask, NumElts);
877 // Replace a masked intrinsic with an older unmasked intrinsic.
878 static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI,
880 Function *F = CI.getCalledFunction();
881 Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID);
882 Value *Rep = Builder.CreateCall(Intrin,
883 { CI.getArgOperand(0), CI.getArgOperand(1) });
884 return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
887 static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
888 Value* A = CI.getArgOperand(0);
889 Value* B = CI.getArgOperand(1);
890 Value* Src = CI.getArgOperand(2);
891 Value* Mask = CI.getArgOperand(3);
893 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
894 Value* Cmp = Builder.CreateIsNotNull(AndNode);
895 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
896 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
897 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
898 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
902 static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) {
903 Value* Op = CI.getArgOperand(0);
904 Type* ReturnOp = CI.getType();
905 unsigned NumElts = CI.getType()->getVectorNumElements();
906 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
907 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
910 /// Upgrade a call to an old intrinsic. All argument and return casting must be
911 /// provided to seamlessly integrate with existing context.
912 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
913 Function *F = CI->getCalledFunction();
914 LLVMContext &C = CI->getContext();
915 IRBuilder<> Builder(C);
916 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
918 assert(F && "Intrinsic call is not direct?");
921 // Get the Function's name.
922 StringRef Name = F->getName();
924 assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
925 Name = Name.substr(5);
927 bool IsX86 = Name.startswith("x86.");
929 Name = Name.substr(4);
930 bool IsNVVM = Name.startswith("nvvm.");
932 Name = Name.substr(5);
934 if (IsX86 && Name.startswith("sse4a.movnt.")) {
935 Module *M = F->getParent();
936 SmallVector<Metadata *, 1> Elts;
938 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
939 MDNode *Node = MDNode::get(C, Elts);
941 Value *Arg0 = CI->getArgOperand(0);
942 Value *Arg1 = CI->getArgOperand(1);
944 // Nontemporal (unaligned) store of the 0'th element of the float/double
946 Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
947 PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
948 Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
950 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
952 StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, 1);
953 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
956 CI->eraseFromParent();
960 if (IsX86 && (Name.startswith("avx.movnt.") ||
961 Name.startswith("avx512.storent."))) {
962 Module *M = F->getParent();
963 SmallVector<Metadata *, 1> Elts;
965 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
966 MDNode *Node = MDNode::get(C, Elts);
968 Value *Arg0 = CI->getArgOperand(0);
969 Value *Arg1 = CI->getArgOperand(1);
971 // Convert the type of the pointer to a pointer to the stored type.
972 Value *BC = Builder.CreateBitCast(Arg0,
973 PointerType::getUnqual(Arg1->getType()),
975 VectorType *VTy = cast<VectorType>(Arg1->getType());
976 StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC,
977 VTy->getBitWidth() / 8);
978 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
981 CI->eraseFromParent();
985 if (IsX86 && Name == "sse2.storel.dq") {
986 Value *Arg0 = CI->getArgOperand(0);
987 Value *Arg1 = CI->getArgOperand(1);
989 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
990 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
991 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
992 Value *BC = Builder.CreateBitCast(Arg0,
993 PointerType::getUnqual(Elt->getType()),
995 Builder.CreateAlignedStore(Elt, BC, 1);
998 CI->eraseFromParent();
1002 if (IsX86 && (Name.startswith("sse.storeu.") ||
1003 Name.startswith("sse2.storeu.") ||
1004 Name.startswith("avx.storeu."))) {
1005 Value *Arg0 = CI->getArgOperand(0);
1006 Value *Arg1 = CI->getArgOperand(1);
1008 Arg0 = Builder.CreateBitCast(Arg0,
1009 PointerType::getUnqual(Arg1->getType()),
1011 Builder.CreateAlignedStore(Arg1, Arg0, 1);
1013 // Remove intrinsic.
1014 CI->eraseFromParent();
1018 if (IsX86 && (Name.startswith("avx512.mask.store"))) {
1019 // "avx512.mask.storeu." or "avx512.mask.store."
1020 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
1021 UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
1022 CI->getArgOperand(2), Aligned);
1024 // Remove intrinsic.
1025 CI->eraseFromParent();
1030 // Upgrade packed integer vector compare intrinsics to compare instructions.
1031 if (IsX86 && (Name.startswith("sse2.pcmp") ||
1032 Name.startswith("avx2.pcmp"))) {
1033 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
1034 bool CmpEq = Name[9] == 'e';
1035 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
1036 CI->getArgOperand(0), CI->getArgOperand(1));
1037 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
1038 } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) {
1039 Type *ExtTy = Type::getInt32Ty(C);
1040 if (CI->getOperand(0)->getType()->isIntegerTy(8))
1041 ExtTy = Type::getInt64Ty(C);
1042 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
1043 ExtTy->getPrimitiveSizeInBits();
1044 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
1045 Rep = Builder.CreateVectorSplat(NumElts, Rep);
1046 } else if (IsX86 && (Name.startswith("avx512.ptestm") ||
1047 Name.startswith("avx512.ptestnm"))) {
1048 Value *Op0 = CI->getArgOperand(0);
1049 Value *Op1 = CI->getArgOperand(1);
1050 Value *Mask = CI->getArgOperand(2);
1051 Rep = Builder.CreateAnd(Op0, Op1);
1052 llvm::Type *Ty = Op0->getType();
1053 Value *Zero = llvm::Constant::getNullValue(Ty);
1054 ICmpInst::Predicate Pred =
1055 Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1056 Rep = Builder.CreateICmp(Pred, Rep, Zero);
1057 unsigned NumElts = Op0->getType()->getVectorNumElements();
1058 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask, NumElts);
1059 } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
1061 CI->getArgOperand(1)->getType()->getVectorNumElements();
1062 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
1063 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1064 CI->getArgOperand(1));
1065 } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd")) {
1066 Type *I32Ty = Type::getInt32Ty(C);
1067 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
1068 ConstantInt::get(I32Ty, 0));
1069 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
1070 ConstantInt::get(I32Ty, 0));
1071 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
1072 Builder.CreateFAdd(Elt0, Elt1),
1073 ConstantInt::get(I32Ty, 0));
1074 } else if (IsX86 && (Name == "sse.sub.ss" || Name == "sse2.sub.sd")) {
1075 Type *I32Ty = Type::getInt32Ty(C);
1076 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
1077 ConstantInt::get(I32Ty, 0));
1078 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
1079 ConstantInt::get(I32Ty, 0));
1080 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
1081 Builder.CreateFSub(Elt0, Elt1),
1082 ConstantInt::get(I32Ty, 0));
1083 } else if (IsX86 && (Name == "sse.mul.ss" || Name == "sse2.mul.sd")) {
1084 Type *I32Ty = Type::getInt32Ty(C);
1085 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
1086 ConstantInt::get(I32Ty, 0));
1087 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
1088 ConstantInt::get(I32Ty, 0));
1089 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
1090 Builder.CreateFMul(Elt0, Elt1),
1091 ConstantInt::get(I32Ty, 0));
1092 } else if (IsX86 && (Name == "sse.div.ss" || Name == "sse2.div.sd")) {
1093 Type *I32Ty = Type::getInt32Ty(C);
1094 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
1095 ConstantInt::get(I32Ty, 0));
1096 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
1097 ConstantInt::get(I32Ty, 0));
1098 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
1099 Builder.CreateFDiv(Elt0, Elt1),
1100 ConstantInt::get(I32Ty, 0));
1101 } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
1102 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
1103 bool CmpEq = Name[16] == 'e';
1104 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
1105 } else if (IsX86 && Name.startswith("avx512.mask.cmp")) {
1106 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1107 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
1108 } else if (IsX86 && Name.startswith("avx512.mask.ucmp")) {
1109 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1110 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
1111 } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
1112 Name == "ssse3.pabs.w.128" ||
1113 Name == "ssse3.pabs.d.128" ||
1114 Name.startswith("avx2.pabs") ||
1115 Name.startswith("avx512.mask.pabs"))) {
1116 Rep = upgradeAbs(Builder, *CI);
1117 } else if (IsX86 && (Name == "sse41.pmaxsb" ||
1118 Name == "sse2.pmaxs.w" ||
1119 Name == "sse41.pmaxsd" ||
1120 Name.startswith("avx2.pmaxs") ||
1121 Name.startswith("avx512.mask.pmaxs"))) {
1122 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
1123 } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
1124 Name == "sse41.pmaxuw" ||
1125 Name == "sse41.pmaxud" ||
1126 Name.startswith("avx2.pmaxu") ||
1127 Name.startswith("avx512.mask.pmaxu"))) {
1128 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
1129 } else if (IsX86 && (Name == "sse41.pminsb" ||
1130 Name == "sse2.pmins.w" ||
1131 Name == "sse41.pminsd" ||
1132 Name.startswith("avx2.pmins") ||
1133 Name.startswith("avx512.mask.pmins"))) {
1134 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
1135 } else if (IsX86 && (Name == "sse2.pminu.b" ||
1136 Name == "sse41.pminuw" ||
1137 Name == "sse41.pminud" ||
1138 Name.startswith("avx2.pminu") ||
1139 Name.startswith("avx512.mask.pminu"))) {
1140 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
1141 } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
1142 Name == "sse2.cvtps2pd" ||
1143 Name == "avx.cvtdq2.pd.256" ||
1144 Name == "avx.cvt.ps2.pd.256" ||
1145 Name.startswith("avx512.mask.cvtdq2pd.") ||
1146 Name.startswith("avx512.mask.cvtudq2pd."))) {
1147 // Lossless i32/float to double conversion.
1148 // Extract the bottom elements if necessary and convert to double vector.
1149 Value *Src = CI->getArgOperand(0);
1150 VectorType *SrcTy = cast<VectorType>(Src->getType());
1151 VectorType *DstTy = cast<VectorType>(CI->getType());
1152 Rep = CI->getArgOperand(0);
1154 unsigned NumDstElts = DstTy->getNumElements();
1155 if (NumDstElts < SrcTy->getNumElements()) {
1156 assert(NumDstElts == 2 && "Unexpected vector size");
1157 uint32_t ShuffleMask[2] = { 0, 1 };
1158 Rep = Builder.CreateShuffleVector(Rep, UndefValue::get(SrcTy),
1162 bool SInt2Double = (StringRef::npos != Name.find("cvtdq2"));
1163 bool UInt2Double = (StringRef::npos != Name.find("cvtudq2"));
1165 Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd");
1166 else if (UInt2Double)
1167 Rep = Builder.CreateUIToFP(Rep, DstTy, "cvtudq2pd");
1169 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
1171 if (CI->getNumArgOperands() == 3)
1172 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1173 CI->getArgOperand(1));
1174 } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
1175 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
1176 CI->getArgOperand(1), CI->getArgOperand(2),
1178 } else if (IsX86 && (Name.startswith("avx512.mask.load."))) {
1179 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
1180 CI->getArgOperand(1),CI->getArgOperand(2),
1182 } else if (IsX86 && Name.startswith("xop.vpcom")) {
1183 Intrinsic::ID intID;
1184 if (Name.endswith("ub"))
1185 intID = Intrinsic::x86_xop_vpcomub;
1186 else if (Name.endswith("uw"))
1187 intID = Intrinsic::x86_xop_vpcomuw;
1188 else if (Name.endswith("ud"))
1189 intID = Intrinsic::x86_xop_vpcomud;
1190 else if (Name.endswith("uq"))
1191 intID = Intrinsic::x86_xop_vpcomuq;
1192 else if (Name.endswith("b"))
1193 intID = Intrinsic::x86_xop_vpcomb;
1194 else if (Name.endswith("w"))
1195 intID = Intrinsic::x86_xop_vpcomw;
1196 else if (Name.endswith("d"))
1197 intID = Intrinsic::x86_xop_vpcomd;
1198 else if (Name.endswith("q"))
1199 intID = Intrinsic::x86_xop_vpcomq;
1201 llvm_unreachable("Unknown suffix");
1203 Name = Name.substr(9); // strip off "xop.vpcom"
1205 if (Name.startswith("lt"))
1207 else if (Name.startswith("le"))
1209 else if (Name.startswith("gt"))
1211 else if (Name.startswith("ge"))
1213 else if (Name.startswith("eq"))
1215 else if (Name.startswith("ne"))
1217 else if (Name.startswith("false"))
1219 else if (Name.startswith("true"))
1222 llvm_unreachable("Unknown condition");
1224 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
1226 Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
1227 Builder.getInt8(Imm)});
1228 } else if (IsX86 && Name.startswith("xop.vpcmov")) {
1229 Value *Sel = CI->getArgOperand(2);
1230 Value *NotSel = Builder.CreateNot(Sel);
1231 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
1232 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
1233 Rep = Builder.CreateOr(Sel0, Sel1);
1234 } else if (IsX86 && Name == "sse42.crc32.64.8") {
1235 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
1236 Intrinsic::x86_sse42_crc32_32_8);
1237 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
1238 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
1239 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
1240 } else if (IsX86 && Name.startswith("avx.vbroadcast.s")) {
1241 // Replace broadcasts with a series of insertelements.
1242 Type *VecTy = CI->getType();
1243 Type *EltTy = VecTy->getVectorElementType();
1244 unsigned EltNum = VecTy->getVectorNumElements();
1245 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
1246 EltTy->getPointerTo());
1247 Value *Load = Builder.CreateLoad(EltTy, Cast);
1248 Type *I32Ty = Type::getInt32Ty(C);
1249 Rep = UndefValue::get(VecTy);
1250 for (unsigned I = 0; I < EltNum; ++I)
1251 Rep = Builder.CreateInsertElement(Rep, Load,
1252 ConstantInt::get(I32Ty, I));
1253 } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
1254 Name.startswith("sse41.pmovzx") ||
1255 Name.startswith("avx2.pmovsx") ||
1256 Name.startswith("avx2.pmovzx") ||
1257 Name.startswith("avx512.mask.pmovsx") ||
1258 Name.startswith("avx512.mask.pmovzx"))) {
1259 VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
1260 VectorType *DstTy = cast<VectorType>(CI->getType());
1261 unsigned NumDstElts = DstTy->getNumElements();
1263 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
1264 SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
1265 for (unsigned i = 0; i != NumDstElts; ++i)
1268 Value *SV = Builder.CreateShuffleVector(
1269 CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
1271 bool DoSext = (StringRef::npos != Name.find("pmovsx"));
1272 Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
1273 : Builder.CreateZExt(SV, DstTy);
1274 // If there are 3 arguments, it's a masked intrinsic so we need a select.
1275 if (CI->getNumArgOperands() == 3)
1276 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1277 CI->getArgOperand(1));
1278 } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
1279 Name == "avx2.vbroadcasti128")) {
1280 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
1281 Type *EltTy = CI->getType()->getVectorElementType();
1282 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
1283 Type *VT = VectorType::get(EltTy, NumSrcElts);
1284 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
1285 PointerType::getUnqual(VT));
1286 Value *Load = Builder.CreateAlignedLoad(Op, 1);
1287 if (NumSrcElts == 2)
1288 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
1291 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
1292 { 0, 1, 2, 3, 0, 1, 2, 3 });
1293 } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") ||
1294 Name.startswith("avx512.mask.shuf.f"))) {
1295 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1296 Type *VT = CI->getType();
1297 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
1298 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
1299 unsigned ControlBitsMask = NumLanes - 1;
1300 unsigned NumControlBits = NumLanes / 2;
1301 SmallVector<uint32_t, 8> ShuffleMask(0);
1303 for (unsigned l = 0; l != NumLanes; ++l) {
1304 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
1305 // We actually need the other source.
1306 if (l >= NumLanes / 2)
1307 LaneMask += NumLanes;
1308 for (unsigned i = 0; i != NumElementsInLane; ++i)
1309 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
1311 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
1312 CI->getArgOperand(1), ShuffleMask);
1313 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
1314 CI->getArgOperand(3));
1315 }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") ||
1316 Name.startswith("avx512.mask.broadcasti"))) {
1317 unsigned NumSrcElts =
1318 CI->getArgOperand(0)->getType()->getVectorNumElements();
1319 unsigned NumDstElts = CI->getType()->getVectorNumElements();
1321 SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
1322 for (unsigned i = 0; i != NumDstElts; ++i)
1323 ShuffleMask[i] = i % NumSrcElts;
1325 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
1326 CI->getArgOperand(0),
1328 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1329 CI->getArgOperand(1));
1330 } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
1331 Name.startswith("avx2.vbroadcast") ||
1332 Name.startswith("avx512.pbroadcast") ||
1333 Name.startswith("avx512.mask.broadcast.s"))) {
1334 // Replace vp?broadcasts with a vector shuffle.
1335 Value *Op = CI->getArgOperand(0);
1336 unsigned NumElts = CI->getType()->getVectorNumElements();
1337 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
1338 Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
1339 Constant::getNullValue(MaskTy));
1341 if (CI->getNumArgOperands() == 3)
1342 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1343 CI->getArgOperand(1));
1344 } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
1345 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
1346 CI->getArgOperand(1),
1347 CI->getArgOperand(2),
1348 CI->getArgOperand(3),
1349 CI->getArgOperand(4),
1351 } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
1352 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
1353 CI->getArgOperand(1),
1354 CI->getArgOperand(2),
1355 CI->getArgOperand(3),
1356 CI->getArgOperand(4),
1358 } else if (IsX86 && (Name == "sse2.psll.dq" ||
1359 Name == "avx2.psll.dq")) {
1360 // 128/256-bit shift left specified in bits.
1361 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1362 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
1363 Shift / 8); // Shift is in bits.
1364 } else if (IsX86 && (Name == "sse2.psrl.dq" ||
1365 Name == "avx2.psrl.dq")) {
1366 // 128/256-bit shift right specified in bits.
1367 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1368 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
1369 Shift / 8); // Shift is in bits.
1370 } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
1371 Name == "avx2.psll.dq.bs" ||
1372 Name == "avx512.psll.dq.512")) {
1373 // 128/256/512-bit shift left specified in bytes.
1374 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1375 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
1376 } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
1377 Name == "avx2.psrl.dq.bs" ||
1378 Name == "avx512.psrl.dq.512")) {
1379 // 128/256/512-bit shift right specified in bytes.
1380 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1381 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
1382 } else if (IsX86 && (Name == "sse41.pblendw" ||
1383 Name.startswith("sse41.blendp") ||
1384 Name.startswith("avx.blend.p") ||
1385 Name == "avx2.pblendw" ||
1386 Name.startswith("avx2.pblendd."))) {
1387 Value *Op0 = CI->getArgOperand(0);
1388 Value *Op1 = CI->getArgOperand(1);
1389 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1390 VectorType *VecTy = cast<VectorType>(CI->getType());
1391 unsigned NumElts = VecTy->getNumElements();
1393 SmallVector<uint32_t, 16> Idxs(NumElts);
1394 for (unsigned i = 0; i != NumElts; ++i)
1395 Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
1397 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1398 } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
1399 Name == "avx2.vinserti128" ||
1400 Name.startswith("avx512.mask.insert"))) {
1401 Value *Op0 = CI->getArgOperand(0);
1402 Value *Op1 = CI->getArgOperand(1);
1403 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1404 unsigned DstNumElts = CI->getType()->getVectorNumElements();
1405 unsigned SrcNumElts = Op1->getType()->getVectorNumElements();
1406 unsigned Scale = DstNumElts / SrcNumElts;
1408 // Mask off the high bits of the immediate value; hardware ignores those.
1411 // Extend the second operand into a vector the size of the destination.
1412 Value *UndefV = UndefValue::get(Op1->getType());
1413 SmallVector<uint32_t, 8> Idxs(DstNumElts);
1414 for (unsigned i = 0; i != SrcNumElts; ++i)
1416 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
1417 Idxs[i] = SrcNumElts;
1418 Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);
1420 // Insert the second operand into the first operand.
1422 // Note that there is no guarantee that instruction lowering will actually
1423 // produce a vinsertf128 instruction for the created shuffles. In
1424 // particular, the 0 immediate case involves no lane changes, so it can
1425 // be handled as a blend.
1427 // Example of shuffle mask for 32-bit elements:
1428 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
1429 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
1431 // First fill with identify mask.
1432 for (unsigned i = 0; i != DstNumElts; ++i)
1434 // Then replace the elements where we need to insert.
1435 for (unsigned i = 0; i != SrcNumElts; ++i)
1436 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
1437 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
1439 // If the intrinsic has a mask operand, handle that.
1440 if (CI->getNumArgOperands() == 5)
1441 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
1442 CI->getArgOperand(3));
1443 } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
1444 Name == "avx2.vextracti128" ||
1445 Name.startswith("avx512.mask.vextract"))) {
1446 Value *Op0 = CI->getArgOperand(0);
1447 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1448 unsigned DstNumElts = CI->getType()->getVectorNumElements();
1449 unsigned SrcNumElts = Op0->getType()->getVectorNumElements();
1450 unsigned Scale = SrcNumElts / DstNumElts;
1452 // Mask off the high bits of the immediate value; hardware ignores those.
1455 // Get indexes for the subvector of the input vector.
1456 SmallVector<uint32_t, 8> Idxs(DstNumElts);
1457 for (unsigned i = 0; i != DstNumElts; ++i) {
1458 Idxs[i] = i + (Imm * DstNumElts);
1460 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1462 // If the intrinsic has a mask operand, handle that.
1463 if (CI->getNumArgOperands() == 4)
1464 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1465 CI->getArgOperand(2));
1466 } else if (!IsX86 && Name == "stackprotectorcheck") {
1468 } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
1469 Name.startswith("avx512.mask.perm.di."))) {
1470 Value *Op0 = CI->getArgOperand(0);
1471 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1472 VectorType *VecTy = cast<VectorType>(CI->getType());
1473 unsigned NumElts = VecTy->getNumElements();
1475 SmallVector<uint32_t, 8> Idxs(NumElts);
1476 for (unsigned i = 0; i != NumElts; ++i)
1477 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
1479 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1481 if (CI->getNumArgOperands() == 4)
1482 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1483 CI->getArgOperand(2));
1484 } else if (IsX86 && (Name.startswith("avx.vperm2f128.") ||
1485 Name == "avx2.vperm2i128")) {
1486 // The immediate permute control byte looks like this:
1487 // [1:0] - select 128 bits from sources for low half of destination
1489 // [3] - zero low half of destination
1490 // [5:4] - select 128 bits from sources for high half of destination
1492 // [7] - zero high half of destination
1494 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1496 unsigned NumElts = CI->getType()->getVectorNumElements();
1497 unsigned HalfSize = NumElts / 2;
1498 SmallVector<uint32_t, 8> ShuffleMask(NumElts);
1500 // Determine which operand(s) are actually in use for this instruction.
1501 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
1502 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
1504 // If needed, replace operands based on zero mask.
1505 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
1506 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
1508 // Permute low half of result.
1509 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
1510 for (unsigned i = 0; i < HalfSize; ++i)
1511 ShuffleMask[i] = StartIndex + i;
1513 // Permute high half of result.
1514 StartIndex = (Imm & 0x10) ? HalfSize : 0;
1515 for (unsigned i = 0; i < HalfSize; ++i)
1516 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
1518 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
1520 } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
1521 Name == "sse2.pshuf.d" ||
1522 Name.startswith("avx512.mask.vpermil.p") ||
1523 Name.startswith("avx512.mask.pshuf.d."))) {
1524 Value *Op0 = CI->getArgOperand(0);
1525 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1526 VectorType *VecTy = cast<VectorType>(CI->getType());
1527 unsigned NumElts = VecTy->getNumElements();
1528 // Calculate the size of each index in the immediate.
1529 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
1530 unsigned IdxMask = ((1 << IdxSize) - 1);
1532 SmallVector<uint32_t, 8> Idxs(NumElts);
1533 // Lookup the bits for this element, wrapping around the immediate every
1534 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
1535 // to offset by the first index of each group.
1536 for (unsigned i = 0; i != NumElts; ++i)
1537 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
1539 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1541 if (CI->getNumArgOperands() == 4)
1542 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1543 CI->getArgOperand(2));
1544 } else if (IsX86 && (Name == "sse2.pshufl.w" ||
1545 Name.startswith("avx512.mask.pshufl.w."))) {
1546 Value *Op0 = CI->getArgOperand(0);
1547 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1548 unsigned NumElts = CI->getType()->getVectorNumElements();
1550 SmallVector<uint32_t, 16> Idxs(NumElts);
1551 for (unsigned l = 0; l != NumElts; l += 8) {
1552 for (unsigned i = 0; i != 4; ++i)
1553 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
1554 for (unsigned i = 4; i != 8; ++i)
1555 Idxs[i + l] = i + l;
1558 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1560 if (CI->getNumArgOperands() == 4)
1561 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1562 CI->getArgOperand(2));
1563 } else if (IsX86 && (Name == "sse2.pshufh.w" ||
1564 Name.startswith("avx512.mask.pshufh.w."))) {
1565 Value *Op0 = CI->getArgOperand(0);
1566 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1567 unsigned NumElts = CI->getType()->getVectorNumElements();
1569 SmallVector<uint32_t, 16> Idxs(NumElts);
1570 for (unsigned l = 0; l != NumElts; l += 8) {
1571 for (unsigned i = 0; i != 4; ++i)
1572 Idxs[i + l] = i + l;
1573 for (unsigned i = 0; i != 4; ++i)
1574 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
1577 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1579 if (CI->getNumArgOperands() == 4)
1580 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1581 CI->getArgOperand(2));
1582 } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
1583 Value *Op0 = CI->getArgOperand(0);
1584 Value *Op1 = CI->getArgOperand(1);
1585 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1586 unsigned NumElts = CI->getType()->getVectorNumElements();
1588 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1589 unsigned HalfLaneElts = NumLaneElts / 2;
1591 SmallVector<uint32_t, 16> Idxs(NumElts);
1592 for (unsigned i = 0; i != NumElts; ++i) {
1593 // Base index is the starting element of the lane.
1594 Idxs[i] = i - (i % NumLaneElts);
1595 // If we are half way through the lane switch to the other source.
1596 if ((i % NumLaneElts) >= HalfLaneElts)
1598 // Now select the specific element. By adding HalfLaneElts bits from
1599 // the immediate. Wrapping around the immediate every 8-bits.
1600 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
1603 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1605 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
1606 CI->getArgOperand(3));
1607 } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
1608 Name.startswith("avx512.mask.movshdup") ||
1609 Name.startswith("avx512.mask.movsldup"))) {
1610 Value *Op0 = CI->getArgOperand(0);
1611 unsigned NumElts = CI->getType()->getVectorNumElements();
1612 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1614 unsigned Offset = 0;
1615 if (Name.startswith("avx512.mask.movshdup."))
1618 SmallVector<uint32_t, 16> Idxs(NumElts);
1619 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
1620 for (unsigned i = 0; i != NumLaneElts; i += 2) {
1621 Idxs[i + l + 0] = i + l + Offset;
1622 Idxs[i + l + 1] = i + l + Offset;
1625 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1627 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1628 CI->getArgOperand(1));
1629 } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
1630 Name.startswith("avx512.mask.unpckl."))) {
1631 Value *Op0 = CI->getArgOperand(0);
1632 Value *Op1 = CI->getArgOperand(1);
1633 int NumElts = CI->getType()->getVectorNumElements();
1634 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1636 SmallVector<uint32_t, 64> Idxs(NumElts);
1637 for (int l = 0; l != NumElts; l += NumLaneElts)
1638 for (int i = 0; i != NumLaneElts; ++i)
1639 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
1641 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1643 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1644 CI->getArgOperand(2));
1645 } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
1646 Name.startswith("avx512.mask.unpckh."))) {
1647 Value *Op0 = CI->getArgOperand(0);
1648 Value *Op1 = CI->getArgOperand(1);
1649 int NumElts = CI->getType()->getVectorNumElements();
1650 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1652 SmallVector<uint32_t, 64> Idxs(NumElts);
1653 for (int l = 0; l != NumElts; l += NumLaneElts)
1654 for (int i = 0; i != NumLaneElts; ++i)
1655 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
1657 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1659 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1660 CI->getArgOperand(2));
1661 } else if (IsX86 && Name.startswith("avx512.mask.pand.")) {
1662 Rep = Builder.CreateAnd(CI->getArgOperand(0), CI->getArgOperand(1));
1663 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1664 CI->getArgOperand(2));
1665 } else if (IsX86 && Name.startswith("avx512.mask.pandn.")) {
1666 Rep = Builder.CreateAnd(Builder.CreateNot(CI->getArgOperand(0)),
1667 CI->getArgOperand(1));
1668 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1669 CI->getArgOperand(2));
1670 } else if (IsX86 && Name.startswith("avx512.mask.por.")) {
1671 Rep = Builder.CreateOr(CI->getArgOperand(0), CI->getArgOperand(1));
1672 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1673 CI->getArgOperand(2));
1674 } else if (IsX86 && Name.startswith("avx512.mask.pxor.")) {
1675 Rep = Builder.CreateXor(CI->getArgOperand(0), CI->getArgOperand(1));
1676 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1677 CI->getArgOperand(2));
1678 } else if (IsX86 && Name.startswith("avx512.mask.and.")) {
1679 VectorType *FTy = cast<VectorType>(CI->getType());
1680 VectorType *ITy = VectorType::getInteger(FTy);
1681 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
1682 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1683 Rep = Builder.CreateBitCast(Rep, FTy);
1684 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1685 CI->getArgOperand(2));
1686 } else if (IsX86 && Name.startswith("avx512.mask.andn.")) {
1687 VectorType *FTy = cast<VectorType>(CI->getType());
1688 VectorType *ITy = VectorType::getInteger(FTy);
1689 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
1690 Rep = Builder.CreateAnd(Rep,
1691 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1692 Rep = Builder.CreateBitCast(Rep, FTy);
1693 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1694 CI->getArgOperand(2));
1695 } else if (IsX86 && Name.startswith("avx512.mask.or.")) {
1696 VectorType *FTy = cast<VectorType>(CI->getType());
1697 VectorType *ITy = VectorType::getInteger(FTy);
1698 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
1699 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1700 Rep = Builder.CreateBitCast(Rep, FTy);
1701 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1702 CI->getArgOperand(2));
1703 } else if (IsX86 && Name.startswith("avx512.mask.xor.")) {
1704 VectorType *FTy = cast<VectorType>(CI->getType());
1705 VectorType *ITy = VectorType::getInteger(FTy);
1706 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
1707 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1708 Rep = Builder.CreateBitCast(Rep, FTy);
1709 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1710 CI->getArgOperand(2));
1711 } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
1712 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
1713 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1714 CI->getArgOperand(2));
1715 } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
1716 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
1717 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1718 CI->getArgOperand(2));
1719 } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
1720 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
1721 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1722 CI->getArgOperand(2));
1723 } else if (IsX86 && (Name.startswith("avx512.mask.add.p"))) {
1724 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
1725 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1726 CI->getArgOperand(2));
1727 } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
1728 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
1729 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1730 CI->getArgOperand(2));
1731 } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
1732 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
1733 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1734 CI->getArgOperand(2));
1735 } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
1736 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
1737 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1738 CI->getArgOperand(2));
1739 } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
1740 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1743 { CI->getArgOperand(0), Builder.getInt1(false) });
1744 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1745 CI->getArgOperand(1));
1746 } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
1747 Name.startswith("avx512.mask.min.p"))) {
1748 bool IsMin = Name[13] == 'i';
1749 VectorType *VecTy = cast<VectorType>(CI->getType());
1750 unsigned VecWidth = VecTy->getPrimitiveSizeInBits();
1751 unsigned EltWidth = VecTy->getScalarSizeInBits();
1753 if (!IsMin && VecWidth == 128 && EltWidth == 32)
1754 IID = Intrinsic::x86_sse_max_ps;
1755 else if (!IsMin && VecWidth == 128 && EltWidth == 64)
1756 IID = Intrinsic::x86_sse2_max_pd;
1757 else if (!IsMin && VecWidth == 256 && EltWidth == 32)
1758 IID = Intrinsic::x86_avx_max_ps_256;
1759 else if (!IsMin && VecWidth == 256 && EltWidth == 64)
1760 IID = Intrinsic::x86_avx_max_pd_256;
1761 else if (IsMin && VecWidth == 128 && EltWidth == 32)
1762 IID = Intrinsic::x86_sse_min_ps;
1763 else if (IsMin && VecWidth == 128 && EltWidth == 64)
1764 IID = Intrinsic::x86_sse2_min_pd;
1765 else if (IsMin && VecWidth == 256 && EltWidth == 32)
1766 IID = Intrinsic::x86_avx_min_ps_256;
1767 else if (IsMin && VecWidth == 256 && EltWidth == 64)
1768 IID = Intrinsic::x86_avx_min_pd_256;
1770 llvm_unreachable("Unexpected intrinsic");
1772 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1773 { CI->getArgOperand(0), CI->getArgOperand(1) });
1774 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1775 CI->getArgOperand(2));
1776 } else if (IsX86 && Name.startswith("avx512.mask.pshuf.b.")) {
1777 VectorType *VecTy = cast<VectorType>(CI->getType());
1779 if (VecTy->getPrimitiveSizeInBits() == 128)
1780 IID = Intrinsic::x86_ssse3_pshuf_b_128;
1781 else if (VecTy->getPrimitiveSizeInBits() == 256)
1782 IID = Intrinsic::x86_avx2_pshuf_b;
1783 else if (VecTy->getPrimitiveSizeInBits() == 512)
1784 IID = Intrinsic::x86_avx512_pshuf_b_512;
1786 llvm_unreachable("Unexpected intrinsic");
1788 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1789 { CI->getArgOperand(0), CI->getArgOperand(1) });
1790 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1791 CI->getArgOperand(2));
1792 } else if (IsX86 && (Name.startswith("avx512.mask.pmul.dq.") ||
1793 Name.startswith("avx512.mask.pmulu.dq."))) {
1794 bool IsUnsigned = Name[16] == 'u';
1795 VectorType *VecTy = cast<VectorType>(CI->getType());
1797 if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 128)
1798 IID = Intrinsic::x86_sse41_pmuldq;
1799 else if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 256)
1800 IID = Intrinsic::x86_avx2_pmul_dq;
1801 else if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 512)
1802 IID = Intrinsic::x86_avx512_pmul_dq_512;
1803 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 128)
1804 IID = Intrinsic::x86_sse2_pmulu_dq;
1805 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 256)
1806 IID = Intrinsic::x86_avx2_pmulu_dq;
1807 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 512)
1808 IID = Intrinsic::x86_avx512_pmulu_dq_512;
1810 llvm_unreachable("Unexpected intrinsic");
1812 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1813 { CI->getArgOperand(0), CI->getArgOperand(1) });
1814 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1815 CI->getArgOperand(2));
1816 } else if (IsX86 && Name.startswith("avx512.mask.pack")) {
1817 bool IsUnsigned = Name[16] == 'u';
1818 bool IsDW = Name[18] == 'd';
1819 VectorType *VecTy = cast<VectorType>(CI->getType());
1821 if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 128)
1822 IID = Intrinsic::x86_sse2_packsswb_128;
1823 else if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 256)
1824 IID = Intrinsic::x86_avx2_packsswb;
1825 else if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 512)
1826 IID = Intrinsic::x86_avx512_packsswb_512;
1827 else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 128)
1828 IID = Intrinsic::x86_sse2_packssdw_128;
1829 else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 256)
1830 IID = Intrinsic::x86_avx2_packssdw;
1831 else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 512)
1832 IID = Intrinsic::x86_avx512_packssdw_512;
1833 else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 128)
1834 IID = Intrinsic::x86_sse2_packuswb_128;
1835 else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 256)
1836 IID = Intrinsic::x86_avx2_packuswb;
1837 else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 512)
1838 IID = Intrinsic::x86_avx512_packuswb_512;
1839 else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 128)
1840 IID = Intrinsic::x86_sse41_packusdw;
1841 else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 256)
1842 IID = Intrinsic::x86_avx2_packusdw;
1843 else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 512)
1844 IID = Intrinsic::x86_avx512_packusdw_512;
1846 llvm_unreachable("Unexpected intrinsic");
1848 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1849 { CI->getArgOperand(0), CI->getArgOperand(1) });
1850 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1851 CI->getArgOperand(2));
1852 } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
1853 bool IsImmediate = Name[16] == 'i' ||
1854 (Name.size() > 18 && Name[18] == 'i');
1855 bool IsVariable = Name[16] == 'v';
1856 char Size = Name[16] == '.' ? Name[17] :
1857 Name[17] == '.' ? Name[18] :
1858 Name[18] == '.' ? Name[19] :
1862 if (IsVariable && Name[17] != '.') {
1863 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
1864 IID = Intrinsic::x86_avx2_psllv_q;
1865 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
1866 IID = Intrinsic::x86_avx2_psllv_q_256;
1867 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
1868 IID = Intrinsic::x86_avx2_psllv_d;
1869 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
1870 IID = Intrinsic::x86_avx2_psllv_d_256;
1871 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
1872 IID = Intrinsic::x86_avx512_psllv_w_128;
1873 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
1874 IID = Intrinsic::x86_avx512_psllv_w_256;
1875 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
1876 IID = Intrinsic::x86_avx512_psllv_w_512;
1878 llvm_unreachable("Unexpected size");
1879 } else if (Name.endswith(".128")) {
1880 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
1881 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
1882 : Intrinsic::x86_sse2_psll_d;
1883 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
1884 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
1885 : Intrinsic::x86_sse2_psll_q;
1886 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
1887 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
1888 : Intrinsic::x86_sse2_psll_w;
1890 llvm_unreachable("Unexpected size");
1891 } else if (Name.endswith(".256")) {
1892 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
1893 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
1894 : Intrinsic::x86_avx2_psll_d;
1895 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
1896 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
1897 : Intrinsic::x86_avx2_psll_q;
1898 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
1899 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
1900 : Intrinsic::x86_avx2_psll_w;
1902 llvm_unreachable("Unexpected size");
1904 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
1905 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
1906 IsVariable ? Intrinsic::x86_avx512_psllv_d_512 :
1907 Intrinsic::x86_avx512_psll_d_512;
1908 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
1909 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
1910 IsVariable ? Intrinsic::x86_avx512_psllv_q_512 :
1911 Intrinsic::x86_avx512_psll_q_512;
1912 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
1913 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
1914 : Intrinsic::x86_avx512_psll_w_512;
1916 llvm_unreachable("Unexpected size");
1919 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
1920 } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
1921 bool IsImmediate = Name[16] == 'i' ||
1922 (Name.size() > 18 && Name[18] == 'i');
1923 bool IsVariable = Name[16] == 'v';
1924 char Size = Name[16] == '.' ? Name[17] :
1925 Name[17] == '.' ? Name[18] :
1926 Name[18] == '.' ? Name[19] :
1930 if (IsVariable && Name[17] != '.') {
1931 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
1932 IID = Intrinsic::x86_avx2_psrlv_q;
1933 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
1934 IID = Intrinsic::x86_avx2_psrlv_q_256;
1935 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
1936 IID = Intrinsic::x86_avx2_psrlv_d;
1937 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
1938 IID = Intrinsic::x86_avx2_psrlv_d_256;
1939 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
1940 IID = Intrinsic::x86_avx512_psrlv_w_128;
1941 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
1942 IID = Intrinsic::x86_avx512_psrlv_w_256;
1943 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
1944 IID = Intrinsic::x86_avx512_psrlv_w_512;
1946 llvm_unreachable("Unexpected size");
1947 } else if (Name.endswith(".128")) {
1948 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
1949 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
1950 : Intrinsic::x86_sse2_psrl_d;
1951 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
1952 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
1953 : Intrinsic::x86_sse2_psrl_q;
1954 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
1955 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
1956 : Intrinsic::x86_sse2_psrl_w;
1958 llvm_unreachable("Unexpected size");
1959 } else if (Name.endswith(".256")) {
1960 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
1961 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
1962 : Intrinsic::x86_avx2_psrl_d;
1963 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
1964 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
1965 : Intrinsic::x86_avx2_psrl_q;
1966 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
1967 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
1968 : Intrinsic::x86_avx2_psrl_w;
1970 llvm_unreachable("Unexpected size");
1972 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
1973 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
1974 IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 :
1975 Intrinsic::x86_avx512_psrl_d_512;
1976 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
1977 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
1978 IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 :
1979 Intrinsic::x86_avx512_psrl_q_512;
1980 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
1981 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
1982 : Intrinsic::x86_avx512_psrl_w_512;
1984 llvm_unreachable("Unexpected size");
1987 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
1988 } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
1989 bool IsImmediate = Name[16] == 'i' ||
1990 (Name.size() > 18 && Name[18] == 'i');
1991 bool IsVariable = Name[16] == 'v';
1992 char Size = Name[16] == '.' ? Name[17] :
1993 Name[17] == '.' ? Name[18] :
1994 Name[18] == '.' ? Name[19] :
1998 if (IsVariable && Name[17] != '.') {
1999 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
2000 IID = Intrinsic::x86_avx2_psrav_d;
2001 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
2002 IID = Intrinsic::x86_avx2_psrav_d_256;
2003 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
2004 IID = Intrinsic::x86_avx512_psrav_w_128;
2005 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
2006 IID = Intrinsic::x86_avx512_psrav_w_256;
2007 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
2008 IID = Intrinsic::x86_avx512_psrav_w_512;
2010 llvm_unreachable("Unexpected size");
2011 } else if (Name.endswith(".128")) {
2012 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
2013 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
2014 : Intrinsic::x86_sse2_psra_d;
2015 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
2016 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
2017 IsVariable ? Intrinsic::x86_avx512_psrav_q_128 :
2018 Intrinsic::x86_avx512_psra_q_128;
2019 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
2020 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
2021 : Intrinsic::x86_sse2_psra_w;
2023 llvm_unreachable("Unexpected size");
2024 } else if (Name.endswith(".256")) {
2025 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
2026 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
2027 : Intrinsic::x86_avx2_psra_d;
2028 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
2029 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
2030 IsVariable ? Intrinsic::x86_avx512_psrav_q_256 :
2031 Intrinsic::x86_avx512_psra_q_256;
2032 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
2033 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
2034 : Intrinsic::x86_avx2_psra_w;
2036 llvm_unreachable("Unexpected size");
2038 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
2039 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
2040 IsVariable ? Intrinsic::x86_avx512_psrav_d_512 :
2041 Intrinsic::x86_avx512_psra_d_512;
2042 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
2043 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
2044 IsVariable ? Intrinsic::x86_avx512_psrav_q_512 :
2045 Intrinsic::x86_avx512_psra_q_512;
2046 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
2047 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
2048 : Intrinsic::x86_avx512_psra_w_512;
2050 llvm_unreachable("Unexpected size");
2053 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2054 } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
2055 Rep = upgradeMaskedMove(Builder, *CI);
2056 } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
2057 Rep = UpgradeMaskToInt(Builder, *CI);
2058 } else if (IsX86 && Name.startswith("avx512.mask.vpermilvar.")) {
2060 if (Name.endswith("ps.128"))
2061 IID = Intrinsic::x86_avx_vpermilvar_ps;
2062 else if (Name.endswith("pd.128"))
2063 IID = Intrinsic::x86_avx_vpermilvar_pd;
2064 else if (Name.endswith("ps.256"))
2065 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2066 else if (Name.endswith("pd.256"))
2067 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2068 else if (Name.endswith("ps.512"))
2069 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2070 else if (Name.endswith("pd.512"))
2071 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2073 llvm_unreachable("Unexpected vpermilvar intrinsic");
2075 Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID);
2076 Rep = Builder.CreateCall(Intrin,
2077 { CI->getArgOperand(0), CI->getArgOperand(1) });
2078 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2079 CI->getArgOperand(2));
2080 } else if (IsX86 && Name.endswith(".movntdqa")) {
2081 Module *M = F->getParent();
2082 MDNode *Node = MDNode::get(
2083 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2085 Value *Ptr = CI->getArgOperand(0);
2086 VectorType *VTy = cast<VectorType>(CI->getType());
2088 // Convert the type of the pointer to a pointer to the stored type.
2090 Builder.CreateBitCast(Ptr, PointerType::getUnqual(VTy), "cast");
2091 LoadInst *LI = Builder.CreateAlignedLoad(BC, VTy->getBitWidth() / 8);
2092 LI->setMetadata(M->getMDKindID("nontemporal"), Node);
2095 (Name.startswith("sse2.pavg") || Name.startswith("avx2.pavg") ||
2096 Name.startswith("avx512.mask.pavg"))) {
2097 // llvm.x86.sse2.pavg.b/w, llvm.x86.avx2.pavg.b/w,
2098 // llvm.x86.avx512.mask.pavg.b/w
2099 Value *A = CI->getArgOperand(0);
2100 Value *B = CI->getArgOperand(1);
2101 VectorType *ZextType = VectorType::getExtendedElementVectorType(
2102 cast<VectorType>(A->getType()));
2103 Value *ExtendedA = Builder.CreateZExt(A, ZextType);
2104 Value *ExtendedB = Builder.CreateZExt(B, ZextType);
2105 Value *Sum = Builder.CreateAdd(ExtendedA, ExtendedB);
2106 Value *AddOne = Builder.CreateAdd(Sum, ConstantInt::get(ZextType, 1));
2107 Value *ShiftR = Builder.CreateLShr(AddOne, ConstantInt::get(ZextType, 1));
2108 Rep = Builder.CreateTrunc(ShiftR, A->getType());
2109 if (CI->getNumArgOperands() > 2) {
2110 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2111 CI->getArgOperand(2));
2113 } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
2114 Value *Arg = CI->getArgOperand(0);
2115 Value *Neg = Builder.CreateNeg(Arg, "neg");
2116 Value *Cmp = Builder.CreateICmpSGE(
2117 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
2118 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
2119 } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
2120 Name == "max.ui" || Name == "max.ull")) {
2121 Value *Arg0 = CI->getArgOperand(0);
2122 Value *Arg1 = CI->getArgOperand(1);
2123 Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
2124 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
2125 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
2126 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
2127 } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
2128 Name == "min.ui" || Name == "min.ull")) {
2129 Value *Arg0 = CI->getArgOperand(0);
2130 Value *Arg1 = CI->getArgOperand(1);
2131 Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
2132 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
2133 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
2134 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
2135 } else if (IsNVVM && Name == "clz.ll") {
2136 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
2137 Value *Arg = CI->getArgOperand(0);
2138 Value *Ctlz = Builder.CreateCall(
2139 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
2141 {Arg, Builder.getFalse()}, "ctlz");
2142 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
2143 } else if (IsNVVM && Name == "popc.ll") {
2144 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
2146 Value *Arg = CI->getArgOperand(0);
2147 Value *Popc = Builder.CreateCall(
2148 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
2151 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
2152 } else if (IsNVVM && Name == "h2f") {
2153 Rep = Builder.CreateCall(Intrinsic::getDeclaration(
2154 F->getParent(), Intrinsic::convert_from_fp16,
2155 {Builder.getFloatTy()}),
2156 CI->getArgOperand(0), "h2f");
2158 llvm_unreachable("Unknown function for CallInst upgrade.");
2162 CI->replaceAllUsesWith(Rep);
2163 CI->eraseFromParent();
2167 CallInst *NewCall = nullptr;
2168 switch (NewFn->getIntrinsicID()) {
2170 // Handle generic mangling change, but nothing else
2172 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
2173 "Unknown function for CallInst upgrade and isn't just a name change");
2174 CI->setCalledFunction(NewFn);
2178 case Intrinsic::arm_neon_vld1:
2179 case Intrinsic::arm_neon_vld2:
2180 case Intrinsic::arm_neon_vld3:
2181 case Intrinsic::arm_neon_vld4:
2182 case Intrinsic::arm_neon_vld2lane:
2183 case Intrinsic::arm_neon_vld3lane:
2184 case Intrinsic::arm_neon_vld4lane:
2185 case Intrinsic::arm_neon_vst1:
2186 case Intrinsic::arm_neon_vst2:
2187 case Intrinsic::arm_neon_vst3:
2188 case Intrinsic::arm_neon_vst4:
2189 case Intrinsic::arm_neon_vst2lane:
2190 case Intrinsic::arm_neon_vst3lane:
2191 case Intrinsic::arm_neon_vst4lane: {
2192 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
2193 CI->arg_operands().end());
2194 NewCall = Builder.CreateCall(NewFn, Args);
2198 case Intrinsic::bitreverse:
2199 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
2202 case Intrinsic::ctlz:
2203 case Intrinsic::cttz:
2204 assert(CI->getNumArgOperands() == 1 &&
2205 "Mismatch between function args and call args");
2207 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
2210 case Intrinsic::objectsize: {
2211 Value *NullIsUnknownSize = CI->getNumArgOperands() == 2
2212 ? Builder.getFalse()
2213 : CI->getArgOperand(2);
2214 NewCall = Builder.CreateCall(
2215 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize});
2219 case Intrinsic::ctpop:
2220 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
2223 case Intrinsic::convert_from_fp16:
2224 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
2227 case Intrinsic::dbg_value:
2228 // Upgrade from the old version that had an extra offset argument.
2229 assert(CI->getNumArgOperands() == 4);
2230 // Drop nonzero offsets instead of attempting to upgrade them.
2231 if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
2232 if (Offset->isZeroValue()) {
2233 NewCall = Builder.CreateCall(
2235 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
2238 CI->eraseFromParent();
2241 case Intrinsic::x86_xop_vfrcz_ss:
2242 case Intrinsic::x86_xop_vfrcz_sd:
2243 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
2246 case Intrinsic::x86_xop_vpermil2pd:
2247 case Intrinsic::x86_xop_vpermil2ps:
2248 case Intrinsic::x86_xop_vpermil2pd_256:
2249 case Intrinsic::x86_xop_vpermil2ps_256: {
2250 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
2251 CI->arg_operands().end());
2252 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
2253 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
2254 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
2255 NewCall = Builder.CreateCall(NewFn, Args);
2259 case Intrinsic::x86_sse41_ptestc:
2260 case Intrinsic::x86_sse41_ptestz:
2261 case Intrinsic::x86_sse41_ptestnzc: {
2262 // The arguments for these intrinsics used to be v4f32, and changed
2263 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
2264 // So, the only thing required is a bitcast for both arguments.
2265 // First, check the arguments have the old type.
2266 Value *Arg0 = CI->getArgOperand(0);
2267 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
2270 // Old intrinsic, add bitcasts
2271 Value *Arg1 = CI->getArgOperand(1);
2273 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
2275 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
2276 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2278 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
2282 case Intrinsic::x86_sse41_insertps:
2283 case Intrinsic::x86_sse41_dppd:
2284 case Intrinsic::x86_sse41_dpps:
2285 case Intrinsic::x86_sse41_mpsadbw:
2286 case Intrinsic::x86_avx_dp_ps_256:
2287 case Intrinsic::x86_avx2_mpsadbw: {
2288 // Need to truncate the last argument from i32 to i8 -- this argument models
2289 // an inherently 8-bit immediate operand to these x86 instructions.
2290 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
2291 CI->arg_operands().end());
2293 // Replace the last argument with a trunc.
2294 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
2295 NewCall = Builder.CreateCall(NewFn, Args);
2299 case Intrinsic::thread_pointer: {
2300 NewCall = Builder.CreateCall(NewFn, {});
2304 case Intrinsic::invariant_start:
2305 case Intrinsic::invariant_end:
2306 case Intrinsic::masked_load:
2307 case Intrinsic::masked_store:
2308 case Intrinsic::masked_gather:
2309 case Intrinsic::masked_scatter: {
2310 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
2311 CI->arg_operands().end());
2312 NewCall = Builder.CreateCall(NewFn, Args);
2316 assert(NewCall && "Should have either set this variable or returned through "
2317 "the default case");
2318 std::string Name = CI->getName();
2319 if (!Name.empty()) {
2320 CI->setName(Name + ".old");
2321 NewCall->setName(Name);
2323 CI->replaceAllUsesWith(NewCall);
2324 CI->eraseFromParent();
2327 void llvm::UpgradeCallsToIntrinsic(Function *F) {
2328 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
2330 // Check if this function should be upgraded and get the replacement function
2333 if (UpgradeIntrinsicFunction(F, NewFn)) {
2334 // Replace all users of the old function with the new function or new
2335 // instructions. This is not a range loop because the call is deleted.
2336 for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; )
2337 if (CallInst *CI = dyn_cast<CallInst>(*UI++))
2338 UpgradeIntrinsicCall(CI, NewFn);
2340 // Remove old function, no longer used, from the module.
2341 F->eraseFromParent();
2345 MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
2346 // Check if the tag uses struct-path aware TBAA format.
2347 if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
2350 auto &Context = MD.getContext();
2351 if (MD.getNumOperands() == 3) {
2352 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
2353 MDNode *ScalarType = MDNode::get(Context, Elts);
2354 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
2355 Metadata *Elts2[] = {ScalarType, ScalarType,
2356 ConstantAsMetadata::get(
2357 Constant::getNullValue(Type::getInt64Ty(Context))),
2359 return MDNode::get(Context, Elts2);
2361 // Create a MDNode <MD, MD, offset 0>
2362 Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
2363 Type::getInt64Ty(Context)))};
2364 return MDNode::get(Context, Elts);
2367 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
2368 Instruction *&Temp) {
2369 if (Opc != Instruction::BitCast)
2373 Type *SrcTy = V->getType();
2374 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
2375 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
2376 LLVMContext &Context = V->getContext();
2378 // We have no information about target data layout, so we assume that
2379 // the maximum pointer size is 64bit.
2380 Type *MidTy = Type::getInt64Ty(Context);
2381 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
2383 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
2389 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
2390 if (Opc != Instruction::BitCast)
2393 Type *SrcTy = C->getType();
2394 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
2395 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
2396 LLVMContext &Context = C->getContext();
2398 // We have no information about target data layout, so we assume that
2399 // the maximum pointer size is 64bit.
2400 Type *MidTy = Type::getInt64Ty(Context);
2402 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
2409 /// Check the debug info version number, if it is out-dated, drop the debug
2410 /// info. Return true if module is modified.
2411 bool llvm::UpgradeDebugInfo(Module &M) {
2412 unsigned Version = getDebugMetadataVersionFromModule(M);
2413 if (Version == DEBUG_METADATA_VERSION) {
2414 bool BrokenDebugInfo = false;
2415 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
2416 report_fatal_error("Broken module found, compilation aborted!");
2417 if (!BrokenDebugInfo)
2418 // Everything is ok.
2421 // Diagnose malformed debug info.
2422 DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
2423 M.getContext().diagnose(Diag);
2426 bool Modified = StripDebugInfo(M);
2427 if (Modified && Version != DEBUG_METADATA_VERSION) {
2428 // Diagnose a version mismatch.
2429 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
2430 M.getContext().diagnose(DiagVersion);
2435 bool llvm::UpgradeModuleFlags(Module &M) {
2436 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
2440 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
2441 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
2442 MDNode *Op = ModFlags->getOperand(I);
2443 if (Op->getNumOperands() != 3)
2445 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
2448 if (ID->getString() == "Objective-C Image Info Version")
2450 if (ID->getString() == "Objective-C Class Properties")
2451 HasClassProperties = true;
2452 // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
2453 // field was Error and now they are Max.
2454 if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") {
2455 if (auto *Behavior =
2456 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
2457 if (Behavior->getLimitedValue() == Module::Error) {
2458 Type *Int32Ty = Type::getInt32Ty(M.getContext());
2459 Metadata *Ops[3] = {
2460 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)),
2461 MDString::get(M.getContext(), ID->getString()),
2463 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
2468 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
2469 // section name so that llvm-lto will not complain about mismatching
2470 // module flags that is functionally the same.
2471 if (ID->getString() == "Objective-C Image Info Section") {
2472 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
2473 SmallVector<StringRef, 4> ValueComp;
2474 Value->getString().split(ValueComp, " ");
2475 if (ValueComp.size() != 1) {
2476 std::string NewValue;
2477 for (auto &S : ValueComp)
2478 NewValue += S.str();
2479 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
2480 MDString::get(M.getContext(), NewValue)};
2481 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
2488 // "Objective-C Class Properties" is recently added for Objective-C. We
2489 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
2490 // flag of value 0, so we can correclty downgrade this flag when trying to
2491 // link an ObjC bitcode without this module flag with an ObjC bitcode with
2492 // this module flag.
2493 if (HasObjCFlag && !HasClassProperties) {
2494 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
2502 void llvm::UpgradeSectionAttributes(Module &M) {
2503 auto TrimSpaces = [](StringRef Section) -> std::string {
2504 SmallVector<StringRef, 5> Components;
2505 Section.split(Components, ',');
2507 SmallString<32> Buffer;
2508 raw_svector_ostream OS(Buffer);
2510 for (auto Component : Components)
2511 OS << ',' << Component.trim();
2513 return OS.str().substr(1);
2516 for (auto &GV : M.globals()) {
2517 if (!GV.hasSection())
2520 StringRef Section = GV.getSection();
2522 if (!Section.startswith("__DATA, __objc_catlist"))
2525 // __DATA, __objc_catlist, regular, no_dead_strip
2526 // __DATA,__objc_catlist,regular,no_dead_strip
2527 GV.setSection(TrimSpaces(Section));
2531 static bool isOldLoopArgument(Metadata *MD) {
2532 auto *T = dyn_cast_or_null<MDTuple>(MD);
2535 if (T->getNumOperands() < 1)
2537 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
2540 return S->getString().startswith("llvm.vectorizer.");
2543 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
2544 StringRef OldPrefix = "llvm.vectorizer.";
2545 assert(OldTag.startswith(OldPrefix) && "Expected old prefix");
2547 if (OldTag == "llvm.vectorizer.unroll")
2548 return MDString::get(C, "llvm.loop.interleave.count");
2550 return MDString::get(
2551 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
2555 static Metadata *upgradeLoopArgument(Metadata *MD) {
2556 auto *T = dyn_cast_or_null<MDTuple>(MD);
2559 if (T->getNumOperands() < 1)
2561 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
2564 if (!OldTag->getString().startswith("llvm.vectorizer."))
2567 // This has an old tag. Upgrade it.
2568 SmallVector<Metadata *, 8> Ops;
2569 Ops.reserve(T->getNumOperands());
2570 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
2571 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
2572 Ops.push_back(T->getOperand(I));
2574 return MDTuple::get(T->getContext(), Ops);
2577 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
2578 auto *T = dyn_cast<MDTuple>(&N);
2582 if (none_of(T->operands(), isOldLoopArgument))
2585 SmallVector<Metadata *, 8> Ops;
2586 Ops.reserve(T->getNumOperands());
2587 for (Metadata *MD : T->operands())
2588 Ops.push_back(upgradeLoopArgument(MD));
2590 return MDTuple::get(T->getContext(), Ops);