1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the auto-upgrade helper functions.
11 // This is where deprecated IR intrinsics and other IR features are updated to
12 // current specifications.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/IR/AutoUpgrade.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/IR/CFG.h"
19 #include "llvm/IR/CallSite.h"
20 #include "llvm/IR/Constants.h"
21 #include "llvm/IR/DIBuilder.h"
22 #include "llvm/IR/DebugInfo.h"
23 #include "llvm/IR/DiagnosticInfo.h"
24 #include "llvm/IR/Function.h"
25 #include "llvm/IR/IRBuilder.h"
26 #include "llvm/IR/Instruction.h"
27 #include "llvm/IR/IntrinsicInst.h"
28 #include "llvm/IR/LLVMContext.h"
29 #include "llvm/IR/Module.h"
30 #include "llvm/Support/ErrorHandling.h"
31 #include "llvm/Support/Regex.h"
35 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
37 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
38 // changed their type from v4f32 to v2i64.
39 static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
41 // Check whether this is an old version of the function, which received
43 Type *Arg0Type = F->getFunctionType()->getParamType(0);
44 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
47 // Yes, it's old, replace it with new version.
49 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
53 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
54 // arguments have changed their type from i32 to i8.
55 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
57 // Check that the last argument is an i32.
58 Type *LastArgType = F->getFunctionType()->getParamType(
59 F->getFunctionType()->getNumParams() - 1);
60 if (!LastArgType->isIntegerTy(32))
63 // Move this function aside and map down.
65 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
69 static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
70 // All of the intrinsics matches below should be marked with which llvm
71 // version started autoupgrading them. At some point in the future we would
72 // like to use this information to remove upgrade code for some older
73 // intrinsics. It is currently undecided how we will determine that future
75 if (Name.startswith("sse2.pcmpeq.") || // Added in 3.1
76 Name.startswith("sse2.pcmpgt.") || // Added in 3.1
77 Name.startswith("avx2.pcmpeq.") || // Added in 3.1
78 Name.startswith("avx2.pcmpgt.") || // Added in 3.1
79 Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
80 Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
81 Name == "sse.add.ss" || // Added in 4.0
82 Name == "sse2.add.sd" || // Added in 4.0
83 Name == "sse.sub.ss" || // Added in 4.0
84 Name == "sse2.sub.sd" || // Added in 4.0
85 Name == "sse.mul.ss" || // Added in 4.0
86 Name == "sse2.mul.sd" || // Added in 4.0
87 Name == "sse.div.ss" || // Added in 4.0
88 Name == "sse2.div.sd" || // Added in 4.0
89 Name == "sse41.pmaxsb" || // Added in 3.9
90 Name == "sse2.pmaxs.w" || // Added in 3.9
91 Name == "sse41.pmaxsd" || // Added in 3.9
92 Name == "sse2.pmaxu.b" || // Added in 3.9
93 Name == "sse41.pmaxuw" || // Added in 3.9
94 Name == "sse41.pmaxud" || // Added in 3.9
95 Name == "sse41.pminsb" || // Added in 3.9
96 Name == "sse2.pmins.w" || // Added in 3.9
97 Name == "sse41.pminsd" || // Added in 3.9
98 Name == "sse2.pminu.b" || // Added in 3.9
99 Name == "sse41.pminuw" || // Added in 3.9
100 Name == "sse41.pminud" || // Added in 3.9
101 Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
102 Name.startswith("avx2.pmax") || // Added in 3.9
103 Name.startswith("avx2.pmin") || // Added in 3.9
104 Name.startswith("avx512.mask.pmax") || // Added in 4.0
105 Name.startswith("avx512.mask.pmin") || // Added in 4.0
106 Name.startswith("avx2.vbroadcast") || // Added in 3.8
107 Name.startswith("avx2.pbroadcast") || // Added in 3.8
108 Name.startswith("avx.vpermil.") || // Added in 3.1
109 Name.startswith("sse2.pshuf") || // Added in 3.9
110 Name.startswith("avx512.pbroadcast") || // Added in 3.9
111 Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
112 Name.startswith("avx512.mask.movddup") || // Added in 3.9
113 Name.startswith("avx512.mask.movshdup") || // Added in 3.9
114 Name.startswith("avx512.mask.movsldup") || // Added in 3.9
115 Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
116 Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
117 Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
118 Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
119 Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
120 Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
121 Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
122 Name.startswith("avx512.mask.punpckl") || // Added in 3.9
123 Name.startswith("avx512.mask.punpckh") || // Added in 3.9
124 Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
125 Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
126 Name.startswith("avx512.mask.pand.") || // Added in 3.9
127 Name.startswith("avx512.mask.pandn.") || // Added in 3.9
128 Name.startswith("avx512.mask.por.") || // Added in 3.9
129 Name.startswith("avx512.mask.pxor.") || // Added in 3.9
130 Name.startswith("avx512.mask.and.") || // Added in 3.9
131 Name.startswith("avx512.mask.andn.") || // Added in 3.9
132 Name.startswith("avx512.mask.or.") || // Added in 3.9
133 Name.startswith("avx512.mask.xor.") || // Added in 3.9
134 Name.startswith("avx512.mask.padd.") || // Added in 4.0
135 Name.startswith("avx512.mask.psub.") || // Added in 4.0
136 Name.startswith("avx512.mask.pmull.") || // Added in 4.0
137 Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
138 Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
139 Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
140 Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
141 Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
142 Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
143 Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
144 Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
145 Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
146 Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
147 Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
148 Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
149 Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
150 Name == "avx512.mask.add.pd.128" || // Added in 4.0
151 Name == "avx512.mask.add.pd.256" || // Added in 4.0
152 Name == "avx512.mask.add.ps.128" || // Added in 4.0
153 Name == "avx512.mask.add.ps.256" || // Added in 4.0
154 Name == "avx512.mask.div.pd.128" || // Added in 4.0
155 Name == "avx512.mask.div.pd.256" || // Added in 4.0
156 Name == "avx512.mask.div.ps.128" || // Added in 4.0
157 Name == "avx512.mask.div.ps.256" || // Added in 4.0
158 Name == "avx512.mask.mul.pd.128" || // Added in 4.0
159 Name == "avx512.mask.mul.pd.256" || // Added in 4.0
160 Name == "avx512.mask.mul.ps.128" || // Added in 4.0
161 Name == "avx512.mask.mul.ps.256" || // Added in 4.0
162 Name == "avx512.mask.sub.pd.128" || // Added in 4.0
163 Name == "avx512.mask.sub.pd.256" || // Added in 4.0
164 Name == "avx512.mask.sub.ps.128" || // Added in 4.0
165 Name == "avx512.mask.sub.ps.256" || // Added in 4.0
166 Name == "avx512.mask.max.pd.128" || // Added in 5.0
167 Name == "avx512.mask.max.pd.256" || // Added in 5.0
168 Name == "avx512.mask.max.ps.128" || // Added in 5.0
169 Name == "avx512.mask.max.ps.256" || // Added in 5.0
170 Name == "avx512.mask.min.pd.128" || // Added in 5.0
171 Name == "avx512.mask.min.pd.256" || // Added in 5.0
172 Name == "avx512.mask.min.ps.128" || // Added in 5.0
173 Name == "avx512.mask.min.ps.256" || // Added in 5.0
174 Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
175 Name.startswith("avx512.mask.psll.d") || // Added in 4.0
176 Name.startswith("avx512.mask.psll.q") || // Added in 4.0
177 Name.startswith("avx512.mask.psll.w") || // Added in 4.0
178 Name.startswith("avx512.mask.psra.d") || // Added in 4.0
179 Name.startswith("avx512.mask.psra.q") || // Added in 4.0
180 Name.startswith("avx512.mask.psra.w") || // Added in 4.0
181 Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
182 Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
183 Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
184 Name.startswith("avx512.mask.pslli") || // Added in 4.0
185 Name.startswith("avx512.mask.psrai") || // Added in 4.0
186 Name.startswith("avx512.mask.psrli") || // Added in 4.0
187 Name.startswith("avx512.mask.psllv") || // Added in 4.0
188 Name.startswith("avx512.mask.psrav") || // Added in 4.0
189 Name.startswith("avx512.mask.psrlv") || // Added in 4.0
190 Name.startswith("sse41.pmovsx") || // Added in 3.8
191 Name.startswith("sse41.pmovzx") || // Added in 3.9
192 Name.startswith("avx2.pmovsx") || // Added in 3.9
193 Name.startswith("avx2.pmovzx") || // Added in 3.9
194 Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
195 Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
196 Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
197 Name == "sse2.cvtdq2pd" || // Added in 3.9
198 Name == "sse2.cvtps2pd" || // Added in 3.9
199 Name == "avx.cvtdq2.pd.256" || // Added in 3.9
200 Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
201 Name.startswith("avx.vinsertf128.") || // Added in 3.7
202 Name == "avx2.vinserti128" || // Added in 3.7
203 Name.startswith("avx512.mask.insert") || // Added in 4.0
204 Name.startswith("avx.vextractf128.") || // Added in 3.7
205 Name == "avx2.vextracti128" || // Added in 3.7
206 Name.startswith("avx512.mask.vextract") || // Added in 4.0
207 Name.startswith("sse4a.movnt.") || // Added in 3.9
208 Name.startswith("avx.movnt.") || // Added in 3.2
209 Name.startswith("avx512.storent.") || // Added in 3.9
210 Name == "sse41.movntdqa" || // Added in 5.0
211 Name == "avx2.movntdqa" || // Added in 5.0
212 Name == "avx512.movntdqa" || // Added in 5.0
213 Name == "sse2.storel.dq" || // Added in 3.9
214 Name.startswith("sse.storeu.") || // Added in 3.9
215 Name.startswith("sse2.storeu.") || // Added in 3.9
216 Name.startswith("avx.storeu.") || // Added in 3.9
217 Name.startswith("avx512.mask.storeu.") || // Added in 3.9
218 Name.startswith("avx512.mask.store.p") || // Added in 3.9
219 Name.startswith("avx512.mask.store.b.") || // Added in 3.9
220 Name.startswith("avx512.mask.store.w.") || // Added in 3.9
221 Name.startswith("avx512.mask.store.d.") || // Added in 3.9
222 Name.startswith("avx512.mask.store.q.") || // Added in 3.9
223 Name.startswith("avx512.mask.loadu.") || // Added in 3.9
224 Name.startswith("avx512.mask.load.") || // Added in 3.9
225 Name == "sse42.crc32.64.8" || // Added in 3.4
226 Name.startswith("avx.vbroadcast.s") || // Added in 3.5
227 Name.startswith("avx512.mask.palignr.") || // Added in 3.9
228 Name.startswith("avx512.mask.valign.") || // Added in 4.0
229 Name.startswith("sse2.psll.dq") || // Added in 3.7
230 Name.startswith("sse2.psrl.dq") || // Added in 3.7
231 Name.startswith("avx2.psll.dq") || // Added in 3.7
232 Name.startswith("avx2.psrl.dq") || // Added in 3.7
233 Name.startswith("avx512.psll.dq") || // Added in 3.9
234 Name.startswith("avx512.psrl.dq") || // Added in 3.9
235 Name == "sse41.pblendw" || // Added in 3.7
236 Name.startswith("sse41.blendp") || // Added in 3.7
237 Name.startswith("avx.blend.p") || // Added in 3.7
238 Name == "avx2.pblendw" || // Added in 3.7
239 Name.startswith("avx2.pblendd.") || // Added in 3.7
240 Name.startswith("avx.vbroadcastf128") || // Added in 4.0
241 Name == "avx2.vbroadcasti128" || // Added in 3.7
242 Name == "xop.vpcmov" || // Added in 3.8
243 Name == "xop.vpcmov.256" || // Added in 5.0
244 Name.startswith("avx512.mask.move.s") || // Added in 4.0
245 Name.startswith("avx512.cvtmask2") || // Added in 5.0
246 (Name.startswith("xop.vpcom") && // Added in 3.2
253 static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
255 // Only handle intrinsics that start with "x86.".
256 if (!Name.startswith("x86."))
258 // Remove "x86." prefix.
259 Name = Name.substr(4);
261 if (ShouldUpgradeX86Intrinsic(F, Name)) {
266 // SSE4.1 ptest functions may have an old signature.
267 if (Name.startswith("sse41.ptest")) { // Added in 3.2
268 if (Name.substr(11) == "c")
269 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
270 if (Name.substr(11) == "z")
271 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
272 if (Name.substr(11) == "nzc")
273 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
275 // Several blend and other instructions with masks used the wrong number of
277 if (Name == "sse41.insertps") // Added in 3.6
278 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
280 if (Name == "sse41.dppd") // Added in 3.6
281 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
283 if (Name == "sse41.dpps") // Added in 3.6
284 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
286 if (Name == "sse41.mpsadbw") // Added in 3.6
287 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
289 if (Name == "avx.dp.ps.256") // Added in 3.6
290 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
292 if (Name == "avx2.mpsadbw") // Added in 3.6
293 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
296 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
297 if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
299 NewFn = Intrinsic::getDeclaration(F->getParent(),
300 Intrinsic::x86_xop_vfrcz_ss);
303 if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
305 NewFn = Intrinsic::getDeclaration(F->getParent(),
306 Intrinsic::x86_xop_vfrcz_sd);
309 // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
310 if (Name.startswith("xop.vpermil2")) { // Added in 3.9
311 auto Idx = F->getFunctionType()->getParamType(2);
312 if (Idx->isFPOrFPVectorTy()) {
314 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
315 unsigned EltSize = Idx->getScalarSizeInBits();
316 Intrinsic::ID Permil2ID;
317 if (EltSize == 64 && IdxSize == 128)
318 Permil2ID = Intrinsic::x86_xop_vpermil2pd;
319 else if (EltSize == 32 && IdxSize == 128)
320 Permil2ID = Intrinsic::x86_xop_vpermil2ps;
321 else if (EltSize == 64 && IdxSize == 256)
322 Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
324 Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
325 NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
333 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
334 assert(F && "Illegal to upgrade a non-existent Function.");
336 // Quickly eliminate it, if it's not a candidate.
337 StringRef Name = F->getName();
338 if (Name.size() <= 8 || !Name.startswith("llvm."))
340 Name = Name.substr(5); // Strip off "llvm."
345 if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
346 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
347 F->arg_begin()->getType());
350 if (Name.startswith("arm.neon.vclz")) {
352 F->arg_begin()->getType(),
353 Type::getInt1Ty(F->getContext())
355 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
356 // the end of the name. Change name from llvm.arm.neon.vclz.* to
358 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
359 NewFn = Function::Create(fType, F->getLinkage(),
360 "llvm.ctlz." + Name.substr(14), F->getParent());
363 if (Name.startswith("arm.neon.vcnt")) {
364 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
365 F->arg_begin()->getType());
368 Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
369 if (vldRegex.match(Name)) {
370 auto fArgs = F->getFunctionType()->params();
371 SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
372 // Can't use Intrinsic::getDeclaration here as the return types might
373 // then only be structurally equal.
374 FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
375 NewFn = Function::Create(fType, F->getLinkage(),
376 "llvm." + Name + ".p0i8", F->getParent());
379 Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
380 if (vstRegex.match(Name)) {
381 static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
382 Intrinsic::arm_neon_vst2,
383 Intrinsic::arm_neon_vst3,
384 Intrinsic::arm_neon_vst4};
386 static const Intrinsic::ID StoreLaneInts[] = {
387 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
388 Intrinsic::arm_neon_vst4lane
391 auto fArgs = F->getFunctionType()->params();
392 Type *Tys[] = {fArgs[0], fArgs[1]};
393 if (Name.find("lane") == StringRef::npos)
394 NewFn = Intrinsic::getDeclaration(F->getParent(),
395 StoreInts[fArgs.size() - 3], Tys);
397 NewFn = Intrinsic::getDeclaration(F->getParent(),
398 StoreLaneInts[fArgs.size() - 5], Tys);
401 if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
402 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
409 if (Name.startswith("ctlz.") && F->arg_size() == 1) {
411 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
412 F->arg_begin()->getType());
415 if (Name.startswith("cttz.") && F->arg_size() == 1) {
417 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
418 F->arg_begin()->getType());
425 bool IsLifetimeStart = Name.startswith("lifetime.start");
426 if (IsLifetimeStart || Name.startswith("invariant.start")) {
427 Intrinsic::ID ID = IsLifetimeStart ?
428 Intrinsic::lifetime_start : Intrinsic::invariant_start;
429 auto Args = F->getFunctionType()->params();
430 Type* ObjectPtr[1] = {Args[1]};
431 if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
433 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
438 bool IsLifetimeEnd = Name.startswith("lifetime.end");
439 if (IsLifetimeEnd || Name.startswith("invariant.end")) {
440 Intrinsic::ID ID = IsLifetimeEnd ?
441 Intrinsic::lifetime_end : Intrinsic::invariant_end;
443 auto Args = F->getFunctionType()->params();
444 Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
445 if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
447 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
454 if (Name.startswith("masked.load.")) {
455 Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
456 if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) {
458 NewFn = Intrinsic::getDeclaration(F->getParent(),
459 Intrinsic::masked_load,
464 if (Name.startswith("masked.store.")) {
465 auto Args = F->getFunctionType()->params();
466 Type *Tys[] = { Args[0], Args[1] };
467 if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) {
469 NewFn = Intrinsic::getDeclaration(F->getParent(),
470 Intrinsic::masked_store,
475 // Renaming gather/scatter intrinsics with no address space overloading
476 // to the new overload which includes an address space
477 if (Name.startswith("masked.gather.")) {
478 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
479 if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) {
481 NewFn = Intrinsic::getDeclaration(F->getParent(),
482 Intrinsic::masked_gather, Tys);
486 if (Name.startswith("masked.scatter.")) {
487 auto Args = F->getFunctionType()->params();
488 Type *Tys[] = {Args[0], Args[1]};
489 if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) {
491 NewFn = Intrinsic::getDeclaration(F->getParent(),
492 Intrinsic::masked_scatter, Tys);
499 if (Name.startswith("nvvm.")) {
500 Name = Name.substr(5);
502 // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
503 Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name)
504 .Cases("brev32", "brev64", Intrinsic::bitreverse)
505 .Case("clz.i", Intrinsic::ctlz)
506 .Case("popc.i", Intrinsic::ctpop)
507 .Default(Intrinsic::not_intrinsic);
508 if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
509 NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
510 {F->getReturnType()});
514 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
515 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
517 // TODO: We could add lohi.i2d.
518 bool Expand = StringSwitch<bool>(Name)
519 .Cases("abs.i", "abs.ll", true)
520 .Cases("clz.ll", "popc.ll", "h2f", true)
521 .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
522 .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
532 // We only need to change the name to match the mangling including the
534 if (Name.startswith("objectsize.")) {
535 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
536 if (F->arg_size() == 2 ||
537 F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
539 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
547 if (Name == "stackprotectorcheck") {
554 if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
557 // Remangle our intrinsic since we upgrade the mangling
558 auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
559 if (Result != None) {
560 NewFn = Result.getValue();
564 // This may not belong here. This function is effectively being overloaded
565 // to both detect an intrinsic which needs upgrading, and to provide the
566 // upgraded form of the intrinsic. We should perhaps have two separate
567 // functions for this.
571 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
573 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
574 assert(F != NewFn && "Intrinsic function upgraded to the same function");
576 // Upgrade intrinsic attributes. This does not change the function.
579 if (Intrinsic::ID id = F->getIntrinsicID())
580 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
584 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
585 // Nothing to do yet.
589 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
591 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
592 Value *Op, unsigned Shift) {
593 Type *ResultTy = Op->getType();
594 unsigned NumElts = ResultTy->getVectorNumElements() * 8;
596 // Bitcast from a 64-bit element type to a byte element type.
597 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
598 Op = Builder.CreateBitCast(Op, VecTy, "cast");
600 // We'll be shuffling in zeroes.
601 Value *Res = Constant::getNullValue(VecTy);
603 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
604 // we'll just return the zero vector.
607 // 256/512-bit version is split into 2/4 16-byte lanes.
608 for (unsigned l = 0; l != NumElts; l += 16)
609 for (unsigned i = 0; i != 16; ++i) {
610 unsigned Idx = NumElts + i - Shift;
612 Idx -= NumElts - 16; // end of lane, switch operand.
613 Idxs[l + i] = Idx + l;
616 Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
619 // Bitcast back to a 64-bit element type.
620 return Builder.CreateBitCast(Res, ResultTy, "cast");
623 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
625 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
627 Type *ResultTy = Op->getType();
628 unsigned NumElts = ResultTy->getVectorNumElements() * 8;
630 // Bitcast from a 64-bit element type to a byte element type.
631 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
632 Op = Builder.CreateBitCast(Op, VecTy, "cast");
634 // We'll be shuffling in zeroes.
635 Value *Res = Constant::getNullValue(VecTy);
637 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
638 // we'll just return the zero vector.
641 // 256/512-bit version is split into 2/4 16-byte lanes.
642 for (unsigned l = 0; l != NumElts; l += 16)
643 for (unsigned i = 0; i != 16; ++i) {
644 unsigned Idx = i + Shift;
646 Idx += NumElts - 16; // end of lane, switch operand.
647 Idxs[l + i] = Idx + l;
650 Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
653 // Bitcast back to a 64-bit element type.
654 return Builder.CreateBitCast(Res, ResultTy, "cast");
657 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
659 llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(),
660 cast<IntegerType>(Mask->getType())->getBitWidth());
661 Mask = Builder.CreateBitCast(Mask, MaskTy);
663 // If we have less than 8 elements, then the starting mask was an i8 and
664 // we need to extract down to the right number of elements.
667 for (unsigned i = 0; i != NumElts; ++i)
669 Mask = Builder.CreateShuffleVector(Mask, Mask,
670 makeArrayRef(Indices, NumElts),
677 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
678 Value *Op0, Value *Op1) {
679 // If the mask is all ones just emit the align operation.
680 if (const auto *C = dyn_cast<Constant>(Mask))
681 if (C->isAllOnesValue())
684 Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements());
685 return Builder.CreateSelect(Mask, Op0, Op1);
688 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
689 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
690 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
691 static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
692 Value *Op1, Value *Shift,
693 Value *Passthru, Value *Mask,
695 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
697 unsigned NumElts = Op0->getType()->getVectorNumElements();
698 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
699 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
700 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
702 // Mask the immediate for VALIGN.
704 ShiftVal &= (NumElts - 1);
706 // If palignr is shifting the pair of vectors more than the size of two
709 return llvm::Constant::getNullValue(Op0->getType());
711 // If palignr is shifting the pair of input vectors more than one lane,
712 // but less than two lanes, convert to shifting in zeroes.
716 Op0 = llvm::Constant::getNullValue(Op0->getType());
719 uint32_t Indices[64];
720 // 256-bit palignr operates on 128-bit lanes so we need to handle that
721 for (unsigned l = 0; l < NumElts; l += 16) {
722 for (unsigned i = 0; i != 16; ++i) {
723 unsigned Idx = ShiftVal + i;
724 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
725 Idx += NumElts - 16; // End of lane, switch operand.
726 Indices[l + i] = Idx + l;
730 Value *Align = Builder.CreateShuffleVector(Op1, Op0,
731 makeArrayRef(Indices, NumElts),
734 return EmitX86Select(Builder, Mask, Align, Passthru);
737 static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
738 Value *Ptr, Value *Data, Value *Mask,
740 // Cast the pointer to the right type.
741 Ptr = Builder.CreateBitCast(Ptr,
742 llvm::PointerType::getUnqual(Data->getType()));
744 Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1;
746 // If the mask is all ones just emit a regular store.
747 if (const auto *C = dyn_cast<Constant>(Mask))
748 if (C->isAllOnesValue())
749 return Builder.CreateAlignedStore(Data, Ptr, Align);
751 // Convert the mask from an integer type to a vector of i1.
752 unsigned NumElts = Data->getType()->getVectorNumElements();
753 Mask = getX86MaskVec(Builder, Mask, NumElts);
754 return Builder.CreateMaskedStore(Data, Ptr, Align, Mask);
757 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
758 Value *Ptr, Value *Passthru, Value *Mask,
760 // Cast the pointer to the right type.
761 Ptr = Builder.CreateBitCast(Ptr,
762 llvm::PointerType::getUnqual(Passthru->getType()));
764 Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1;
766 // If the mask is all ones just emit a regular store.
767 if (const auto *C = dyn_cast<Constant>(Mask))
768 if (C->isAllOnesValue())
769 return Builder.CreateAlignedLoad(Ptr, Align);
771 // Convert the mask from an integer type to a vector of i1.
772 unsigned NumElts = Passthru->getType()->getVectorNumElements();
773 Mask = getX86MaskVec(Builder, Mask, NumElts);
774 return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru);
777 static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
778 ICmpInst::Predicate Pred) {
779 Value *Op0 = CI.getArgOperand(0);
780 Value *Op1 = CI.getArgOperand(1);
781 Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1);
782 Value *Res = Builder.CreateSelect(Cmp, Op0, Op1);
784 if (CI.getNumArgOperands() == 4)
785 Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
790 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
791 unsigned CC, bool Signed) {
792 Value *Op0 = CI.getArgOperand(0);
793 unsigned NumElts = Op0->getType()->getVectorNumElements();
797 Cmp = Constant::getNullValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
798 } else if (CC == 7) {
799 Cmp = Constant::getAllOnesValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
801 ICmpInst::Predicate Pred;
803 default: llvm_unreachable("Unknown condition code");
804 case 0: Pred = ICmpInst::ICMP_EQ; break;
805 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
806 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
807 case 4: Pred = ICmpInst::ICMP_NE; break;
808 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
809 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
811 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
814 Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1);
815 const auto *C = dyn_cast<Constant>(Mask);
816 if (!C || !C->isAllOnesValue())
817 Cmp = Builder.CreateAnd(Cmp, getX86MaskVec(Builder, Mask, NumElts));
821 for (unsigned i = 0; i != NumElts; ++i)
823 for (unsigned i = NumElts; i != 8; ++i)
824 Indices[i] = NumElts + i % NumElts;
825 Cmp = Builder.CreateShuffleVector(Cmp,
826 Constant::getNullValue(Cmp->getType()),
829 return Builder.CreateBitCast(Cmp, IntegerType::get(CI.getContext(),
830 std::max(NumElts, 8U)));
833 // Replace a masked intrinsic with an older unmasked intrinsic.
834 static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI,
836 Function *F = CI.getCalledFunction();
837 Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID);
838 Value *Rep = Builder.CreateCall(Intrin,
839 { CI.getArgOperand(0), CI.getArgOperand(1) });
840 return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
843 static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
844 Value* A = CI.getArgOperand(0);
845 Value* B = CI.getArgOperand(1);
846 Value* Src = CI.getArgOperand(2);
847 Value* Mask = CI.getArgOperand(3);
849 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
850 Value* Cmp = Builder.CreateIsNotNull(AndNode);
851 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
852 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
853 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
854 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
858 static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) {
859 Value* Op = CI.getArgOperand(0);
860 Type* ReturnOp = CI.getType();
861 unsigned NumElts = CI.getType()->getVectorNumElements();
862 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
863 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
866 /// Upgrade a call to an old intrinsic. All argument and return casting must be
867 /// provided to seamlessly integrate with existing context.
868 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
869 Function *F = CI->getCalledFunction();
870 LLVMContext &C = CI->getContext();
871 IRBuilder<> Builder(C);
872 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
874 assert(F && "Intrinsic call is not direct?");
877 // Get the Function's name.
878 StringRef Name = F->getName();
880 assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
881 Name = Name.substr(5);
883 bool IsX86 = Name.startswith("x86.");
885 Name = Name.substr(4);
886 bool IsNVVM = Name.startswith("nvvm.");
888 Name = Name.substr(5);
890 if (IsX86 && Name.startswith("sse4a.movnt.")) {
891 Module *M = F->getParent();
892 SmallVector<Metadata *, 1> Elts;
894 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
895 MDNode *Node = MDNode::get(C, Elts);
897 Value *Arg0 = CI->getArgOperand(0);
898 Value *Arg1 = CI->getArgOperand(1);
900 // Nontemporal (unaligned) store of the 0'th element of the float/double
902 Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
903 PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
904 Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
906 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
908 StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, 1);
909 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
912 CI->eraseFromParent();
916 if (IsX86 && (Name.startswith("avx.movnt.") ||
917 Name.startswith("avx512.storent."))) {
918 Module *M = F->getParent();
919 SmallVector<Metadata *, 1> Elts;
921 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
922 MDNode *Node = MDNode::get(C, Elts);
924 Value *Arg0 = CI->getArgOperand(0);
925 Value *Arg1 = CI->getArgOperand(1);
927 // Convert the type of the pointer to a pointer to the stored type.
928 Value *BC = Builder.CreateBitCast(Arg0,
929 PointerType::getUnqual(Arg1->getType()),
931 VectorType *VTy = cast<VectorType>(Arg1->getType());
932 StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC,
933 VTy->getBitWidth() / 8);
934 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
937 CI->eraseFromParent();
941 if (IsX86 && Name == "sse2.storel.dq") {
942 Value *Arg0 = CI->getArgOperand(0);
943 Value *Arg1 = CI->getArgOperand(1);
945 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
946 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
947 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
948 Value *BC = Builder.CreateBitCast(Arg0,
949 PointerType::getUnqual(Elt->getType()),
951 Builder.CreateAlignedStore(Elt, BC, 1);
954 CI->eraseFromParent();
958 if (IsX86 && (Name.startswith("sse.storeu.") ||
959 Name.startswith("sse2.storeu.") ||
960 Name.startswith("avx.storeu."))) {
961 Value *Arg0 = CI->getArgOperand(0);
962 Value *Arg1 = CI->getArgOperand(1);
964 Arg0 = Builder.CreateBitCast(Arg0,
965 PointerType::getUnqual(Arg1->getType()),
967 Builder.CreateAlignedStore(Arg1, Arg0, 1);
970 CI->eraseFromParent();
974 if (IsX86 && (Name.startswith("avx512.mask.store"))) {
975 // "avx512.mask.storeu." or "avx512.mask.store."
976 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
977 UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
978 CI->getArgOperand(2), Aligned);
981 CI->eraseFromParent();
986 // Upgrade packed integer vector compare intrinsics to compare instructions.
987 if (IsX86 && (Name.startswith("sse2.pcmp") ||
988 Name.startswith("avx2.pcmp"))) {
989 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
990 bool CmpEq = Name[9] == 'e';
991 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
992 CI->getArgOperand(0), CI->getArgOperand(1));
993 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
994 } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd")) {
995 Type *I32Ty = Type::getInt32Ty(C);
996 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
997 ConstantInt::get(I32Ty, 0));
998 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
999 ConstantInt::get(I32Ty, 0));
1000 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
1001 Builder.CreateFAdd(Elt0, Elt1),
1002 ConstantInt::get(I32Ty, 0));
1003 } else if (IsX86 && (Name == "sse.sub.ss" || Name == "sse2.sub.sd")) {
1004 Type *I32Ty = Type::getInt32Ty(C);
1005 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
1006 ConstantInt::get(I32Ty, 0));
1007 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
1008 ConstantInt::get(I32Ty, 0));
1009 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
1010 Builder.CreateFSub(Elt0, Elt1),
1011 ConstantInt::get(I32Ty, 0));
1012 } else if (IsX86 && (Name == "sse.mul.ss" || Name == "sse2.mul.sd")) {
1013 Type *I32Ty = Type::getInt32Ty(C);
1014 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
1015 ConstantInt::get(I32Ty, 0));
1016 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
1017 ConstantInt::get(I32Ty, 0));
1018 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
1019 Builder.CreateFMul(Elt0, Elt1),
1020 ConstantInt::get(I32Ty, 0));
1021 } else if (IsX86 && (Name == "sse.div.ss" || Name == "sse2.div.sd")) {
1022 Type *I32Ty = Type::getInt32Ty(C);
1023 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
1024 ConstantInt::get(I32Ty, 0));
1025 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
1026 ConstantInt::get(I32Ty, 0));
1027 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
1028 Builder.CreateFDiv(Elt0, Elt1),
1029 ConstantInt::get(I32Ty, 0));
1030 } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
1031 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
1032 bool CmpEq = Name[16] == 'e';
1033 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
1034 } else if (IsX86 && Name.startswith("avx512.mask.cmp")) {
1035 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1036 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
1037 } else if (IsX86 && Name.startswith("avx512.mask.ucmp")) {
1038 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1039 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
1040 } else if (IsX86 && (Name == "sse41.pmaxsb" ||
1041 Name == "sse2.pmaxs.w" ||
1042 Name == "sse41.pmaxsd" ||
1043 Name.startswith("avx2.pmaxs") ||
1044 Name.startswith("avx512.mask.pmaxs"))) {
1045 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
1046 } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
1047 Name == "sse41.pmaxuw" ||
1048 Name == "sse41.pmaxud" ||
1049 Name.startswith("avx2.pmaxu") ||
1050 Name.startswith("avx512.mask.pmaxu"))) {
1051 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
1052 } else if (IsX86 && (Name == "sse41.pminsb" ||
1053 Name == "sse2.pmins.w" ||
1054 Name == "sse41.pminsd" ||
1055 Name.startswith("avx2.pmins") ||
1056 Name.startswith("avx512.mask.pmins"))) {
1057 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
1058 } else if (IsX86 && (Name == "sse2.pminu.b" ||
1059 Name == "sse41.pminuw" ||
1060 Name == "sse41.pminud" ||
1061 Name.startswith("avx2.pminu") ||
1062 Name.startswith("avx512.mask.pminu"))) {
1063 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
1064 } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
1065 Name == "sse2.cvtps2pd" ||
1066 Name == "avx.cvtdq2.pd.256" ||
1067 Name == "avx.cvt.ps2.pd.256" ||
1068 Name.startswith("avx512.mask.cvtdq2pd.") ||
1069 Name.startswith("avx512.mask.cvtudq2pd."))) {
1070 // Lossless i32/float to double conversion.
1071 // Extract the bottom elements if necessary and convert to double vector.
1072 Value *Src = CI->getArgOperand(0);
1073 VectorType *SrcTy = cast<VectorType>(Src->getType());
1074 VectorType *DstTy = cast<VectorType>(CI->getType());
1075 Rep = CI->getArgOperand(0);
1077 unsigned NumDstElts = DstTy->getNumElements();
1078 if (NumDstElts < SrcTy->getNumElements()) {
1079 assert(NumDstElts == 2 && "Unexpected vector size");
1080 uint32_t ShuffleMask[2] = { 0, 1 };
1081 Rep = Builder.CreateShuffleVector(Rep, UndefValue::get(SrcTy),
1085 bool SInt2Double = (StringRef::npos != Name.find("cvtdq2"));
1086 bool UInt2Double = (StringRef::npos != Name.find("cvtudq2"));
1088 Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd");
1089 else if (UInt2Double)
1090 Rep = Builder.CreateUIToFP(Rep, DstTy, "cvtudq2pd");
1092 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
1094 if (CI->getNumArgOperands() == 3)
1095 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1096 CI->getArgOperand(1));
1097 } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
1098 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
1099 CI->getArgOperand(1), CI->getArgOperand(2),
1101 } else if (IsX86 && (Name.startswith("avx512.mask.load."))) {
1102 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
1103 CI->getArgOperand(1),CI->getArgOperand(2),
1105 } else if (IsX86 && Name.startswith("xop.vpcom")) {
1106 Intrinsic::ID intID;
1107 if (Name.endswith("ub"))
1108 intID = Intrinsic::x86_xop_vpcomub;
1109 else if (Name.endswith("uw"))
1110 intID = Intrinsic::x86_xop_vpcomuw;
1111 else if (Name.endswith("ud"))
1112 intID = Intrinsic::x86_xop_vpcomud;
1113 else if (Name.endswith("uq"))
1114 intID = Intrinsic::x86_xop_vpcomuq;
1115 else if (Name.endswith("b"))
1116 intID = Intrinsic::x86_xop_vpcomb;
1117 else if (Name.endswith("w"))
1118 intID = Intrinsic::x86_xop_vpcomw;
1119 else if (Name.endswith("d"))
1120 intID = Intrinsic::x86_xop_vpcomd;
1121 else if (Name.endswith("q"))
1122 intID = Intrinsic::x86_xop_vpcomq;
1124 llvm_unreachable("Unknown suffix");
1126 Name = Name.substr(9); // strip off "xop.vpcom"
1128 if (Name.startswith("lt"))
1130 else if (Name.startswith("le"))
1132 else if (Name.startswith("gt"))
1134 else if (Name.startswith("ge"))
1136 else if (Name.startswith("eq"))
1138 else if (Name.startswith("ne"))
1140 else if (Name.startswith("false"))
1142 else if (Name.startswith("true"))
1145 llvm_unreachable("Unknown condition");
1147 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
1149 Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
1150 Builder.getInt8(Imm)});
1151 } else if (IsX86 && Name.startswith("xop.vpcmov")) {
1152 Value *Sel = CI->getArgOperand(2);
1153 Value *NotSel = Builder.CreateNot(Sel);
1154 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
1155 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
1156 Rep = Builder.CreateOr(Sel0, Sel1);
1157 } else if (IsX86 && Name == "sse42.crc32.64.8") {
1158 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
1159 Intrinsic::x86_sse42_crc32_32_8);
1160 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
1161 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
1162 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
1163 } else if (IsX86 && Name.startswith("avx.vbroadcast.s")) {
1164 // Replace broadcasts with a series of insertelements.
1165 Type *VecTy = CI->getType();
1166 Type *EltTy = VecTy->getVectorElementType();
1167 unsigned EltNum = VecTy->getVectorNumElements();
1168 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
1169 EltTy->getPointerTo());
1170 Value *Load = Builder.CreateLoad(EltTy, Cast);
1171 Type *I32Ty = Type::getInt32Ty(C);
1172 Rep = UndefValue::get(VecTy);
1173 for (unsigned I = 0; I < EltNum; ++I)
1174 Rep = Builder.CreateInsertElement(Rep, Load,
1175 ConstantInt::get(I32Ty, I));
1176 } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
1177 Name.startswith("sse41.pmovzx") ||
1178 Name.startswith("avx2.pmovsx") ||
1179 Name.startswith("avx2.pmovzx") ||
1180 Name.startswith("avx512.mask.pmovsx") ||
1181 Name.startswith("avx512.mask.pmovzx"))) {
1182 VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
1183 VectorType *DstTy = cast<VectorType>(CI->getType());
1184 unsigned NumDstElts = DstTy->getNumElements();
1186 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
1187 SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
1188 for (unsigned i = 0; i != NumDstElts; ++i)
1191 Value *SV = Builder.CreateShuffleVector(
1192 CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
1194 bool DoSext = (StringRef::npos != Name.find("pmovsx"));
1195 Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
1196 : Builder.CreateZExt(SV, DstTy);
1197 // If there are 3 arguments, it's a masked intrinsic so we need a select.
1198 if (CI->getNumArgOperands() == 3)
1199 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1200 CI->getArgOperand(1));
1201 } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
1202 Name == "avx2.vbroadcasti128")) {
1203 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
1204 Type *EltTy = CI->getType()->getVectorElementType();
1205 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
1206 Type *VT = VectorType::get(EltTy, NumSrcElts);
1207 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
1208 PointerType::getUnqual(VT));
1209 Value *Load = Builder.CreateAlignedLoad(Op, 1);
1210 if (NumSrcElts == 2)
1211 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
1214 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
1215 { 0, 1, 2, 3, 0, 1, 2, 3 });
1216 } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
1217 Name.startswith("avx2.vbroadcast") ||
1218 Name.startswith("avx512.pbroadcast") ||
1219 Name.startswith("avx512.mask.broadcast.s"))) {
1220 // Replace vp?broadcasts with a vector shuffle.
1221 Value *Op = CI->getArgOperand(0);
1222 unsigned NumElts = CI->getType()->getVectorNumElements();
1223 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
1224 Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
1225 Constant::getNullValue(MaskTy));
1227 if (CI->getNumArgOperands() == 3)
1228 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1229 CI->getArgOperand(1));
1230 } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
1231 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
1232 CI->getArgOperand(1),
1233 CI->getArgOperand(2),
1234 CI->getArgOperand(3),
1235 CI->getArgOperand(4),
1237 } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
1238 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
1239 CI->getArgOperand(1),
1240 CI->getArgOperand(2),
1241 CI->getArgOperand(3),
1242 CI->getArgOperand(4),
1244 } else if (IsX86 && (Name == "sse2.psll.dq" ||
1245 Name == "avx2.psll.dq")) {
1246 // 128/256-bit shift left specified in bits.
1247 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1248 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
1249 Shift / 8); // Shift is in bits.
1250 } else if (IsX86 && (Name == "sse2.psrl.dq" ||
1251 Name == "avx2.psrl.dq")) {
1252 // 128/256-bit shift right specified in bits.
1253 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1254 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
1255 Shift / 8); // Shift is in bits.
1256 } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
1257 Name == "avx2.psll.dq.bs" ||
1258 Name == "avx512.psll.dq.512")) {
1259 // 128/256/512-bit shift left specified in bytes.
1260 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1261 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
1262 } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
1263 Name == "avx2.psrl.dq.bs" ||
1264 Name == "avx512.psrl.dq.512")) {
1265 // 128/256/512-bit shift right specified in bytes.
1266 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1267 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
1268 } else if (IsX86 && (Name == "sse41.pblendw" ||
1269 Name.startswith("sse41.blendp") ||
1270 Name.startswith("avx.blend.p") ||
1271 Name == "avx2.pblendw" ||
1272 Name.startswith("avx2.pblendd."))) {
1273 Value *Op0 = CI->getArgOperand(0);
1274 Value *Op1 = CI->getArgOperand(1);
1275 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1276 VectorType *VecTy = cast<VectorType>(CI->getType());
1277 unsigned NumElts = VecTy->getNumElements();
1279 SmallVector<uint32_t, 16> Idxs(NumElts);
1280 for (unsigned i = 0; i != NumElts; ++i)
1281 Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
1283 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1284 } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
1285 Name == "avx2.vinserti128" ||
1286 Name.startswith("avx512.mask.insert"))) {
1287 Value *Op0 = CI->getArgOperand(0);
1288 Value *Op1 = CI->getArgOperand(1);
1289 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1290 unsigned DstNumElts = CI->getType()->getVectorNumElements();
1291 unsigned SrcNumElts = Op1->getType()->getVectorNumElements();
1292 unsigned Scale = DstNumElts / SrcNumElts;
1294 // Mask off the high bits of the immediate value; hardware ignores those.
1297 // Extend the second operand into a vector the size of the destination.
1298 Value *UndefV = UndefValue::get(Op1->getType());
1299 SmallVector<uint32_t, 8> Idxs(DstNumElts);
1300 for (unsigned i = 0; i != SrcNumElts; ++i)
1302 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
1303 Idxs[i] = SrcNumElts;
1304 Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);
1306 // Insert the second operand into the first operand.
1308 // Note that there is no guarantee that instruction lowering will actually
1309 // produce a vinsertf128 instruction for the created shuffles. In
1310 // particular, the 0 immediate case involves no lane changes, so it can
1311 // be handled as a blend.
1313 // Example of shuffle mask for 32-bit elements:
1314 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
1315 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
1317 // First fill with identify mask.
1318 for (unsigned i = 0; i != DstNumElts; ++i)
1320 // Then replace the elements where we need to insert.
1321 for (unsigned i = 0; i != SrcNumElts; ++i)
1322 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
1323 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
1325 // If the intrinsic has a mask operand, handle that.
1326 if (CI->getNumArgOperands() == 5)
1327 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
1328 CI->getArgOperand(3));
1329 } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
1330 Name == "avx2.vextracti128" ||
1331 Name.startswith("avx512.mask.vextract"))) {
1332 Value *Op0 = CI->getArgOperand(0);
1333 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1334 unsigned DstNumElts = CI->getType()->getVectorNumElements();
1335 unsigned SrcNumElts = Op0->getType()->getVectorNumElements();
1336 unsigned Scale = SrcNumElts / DstNumElts;
1338 // Mask off the high bits of the immediate value; hardware ignores those.
1341 // Get indexes for the subvector of the input vector.
1342 SmallVector<uint32_t, 8> Idxs(DstNumElts);
1343 for (unsigned i = 0; i != DstNumElts; ++i) {
1344 Idxs[i] = i + (Imm * DstNumElts);
1346 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1348 // If the intrinsic has a mask operand, handle that.
1349 if (CI->getNumArgOperands() == 4)
1350 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1351 CI->getArgOperand(2));
1352 } else if (!IsX86 && Name == "stackprotectorcheck") {
1354 } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
1355 Name.startswith("avx512.mask.perm.di."))) {
1356 Value *Op0 = CI->getArgOperand(0);
1357 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1358 VectorType *VecTy = cast<VectorType>(CI->getType());
1359 unsigned NumElts = VecTy->getNumElements();
1361 SmallVector<uint32_t, 8> Idxs(NumElts);
1362 for (unsigned i = 0; i != NumElts; ++i)
1363 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
1365 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1367 if (CI->getNumArgOperands() == 4)
1368 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1369 CI->getArgOperand(2));
1370 } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
1371 Name == "sse2.pshuf.d" ||
1372 Name.startswith("avx512.mask.vpermil.p") ||
1373 Name.startswith("avx512.mask.pshuf.d."))) {
1374 Value *Op0 = CI->getArgOperand(0);
1375 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1376 VectorType *VecTy = cast<VectorType>(CI->getType());
1377 unsigned NumElts = VecTy->getNumElements();
1378 // Calculate the size of each index in the immediate.
1379 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
1380 unsigned IdxMask = ((1 << IdxSize) - 1);
1382 SmallVector<uint32_t, 8> Idxs(NumElts);
1383 // Lookup the bits for this element, wrapping around the immediate every
1384 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
1385 // to offset by the first index of each group.
1386 for (unsigned i = 0; i != NumElts; ++i)
1387 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
1389 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1391 if (CI->getNumArgOperands() == 4)
1392 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1393 CI->getArgOperand(2));
1394 } else if (IsX86 && (Name == "sse2.pshufl.w" ||
1395 Name.startswith("avx512.mask.pshufl.w."))) {
1396 Value *Op0 = CI->getArgOperand(0);
1397 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1398 unsigned NumElts = CI->getType()->getVectorNumElements();
1400 SmallVector<uint32_t, 16> Idxs(NumElts);
1401 for (unsigned l = 0; l != NumElts; l += 8) {
1402 for (unsigned i = 0; i != 4; ++i)
1403 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
1404 for (unsigned i = 4; i != 8; ++i)
1405 Idxs[i + l] = i + l;
1408 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1410 if (CI->getNumArgOperands() == 4)
1411 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1412 CI->getArgOperand(2));
1413 } else if (IsX86 && (Name == "sse2.pshufh.w" ||
1414 Name.startswith("avx512.mask.pshufh.w."))) {
1415 Value *Op0 = CI->getArgOperand(0);
1416 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1417 unsigned NumElts = CI->getType()->getVectorNumElements();
1419 SmallVector<uint32_t, 16> Idxs(NumElts);
1420 for (unsigned l = 0; l != NumElts; l += 8) {
1421 for (unsigned i = 0; i != 4; ++i)
1422 Idxs[i + l] = i + l;
1423 for (unsigned i = 0; i != 4; ++i)
1424 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
1427 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1429 if (CI->getNumArgOperands() == 4)
1430 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1431 CI->getArgOperand(2));
1432 } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
1433 Value *Op0 = CI->getArgOperand(0);
1434 Value *Op1 = CI->getArgOperand(1);
1435 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1436 unsigned NumElts = CI->getType()->getVectorNumElements();
1438 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1439 unsigned HalfLaneElts = NumLaneElts / 2;
1441 SmallVector<uint32_t, 16> Idxs(NumElts);
1442 for (unsigned i = 0; i != NumElts; ++i) {
1443 // Base index is the starting element of the lane.
1444 Idxs[i] = i - (i % NumLaneElts);
1445 // If we are half way through the lane switch to the other source.
1446 if ((i % NumLaneElts) >= HalfLaneElts)
1448 // Now select the specific element. By adding HalfLaneElts bits from
1449 // the immediate. Wrapping around the immediate every 8-bits.
1450 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
1453 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1455 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
1456 CI->getArgOperand(3));
1457 } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
1458 Name.startswith("avx512.mask.movshdup") ||
1459 Name.startswith("avx512.mask.movsldup"))) {
1460 Value *Op0 = CI->getArgOperand(0);
1461 unsigned NumElts = CI->getType()->getVectorNumElements();
1462 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1464 unsigned Offset = 0;
1465 if (Name.startswith("avx512.mask.movshdup."))
1468 SmallVector<uint32_t, 16> Idxs(NumElts);
1469 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
1470 for (unsigned i = 0; i != NumLaneElts; i += 2) {
1471 Idxs[i + l + 0] = i + l + Offset;
1472 Idxs[i + l + 1] = i + l + Offset;
1475 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1477 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1478 CI->getArgOperand(1));
1479 } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
1480 Name.startswith("avx512.mask.unpckl."))) {
1481 Value *Op0 = CI->getArgOperand(0);
1482 Value *Op1 = CI->getArgOperand(1);
1483 int NumElts = CI->getType()->getVectorNumElements();
1484 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1486 SmallVector<uint32_t, 64> Idxs(NumElts);
1487 for (int l = 0; l != NumElts; l += NumLaneElts)
1488 for (int i = 0; i != NumLaneElts; ++i)
1489 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
1491 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1493 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1494 CI->getArgOperand(2));
1495 } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
1496 Name.startswith("avx512.mask.unpckh."))) {
1497 Value *Op0 = CI->getArgOperand(0);
1498 Value *Op1 = CI->getArgOperand(1);
1499 int NumElts = CI->getType()->getVectorNumElements();
1500 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1502 SmallVector<uint32_t, 64> Idxs(NumElts);
1503 for (int l = 0; l != NumElts; l += NumLaneElts)
1504 for (int i = 0; i != NumLaneElts; ++i)
1505 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
1507 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1509 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1510 CI->getArgOperand(2));
1511 } else if (IsX86 && Name.startswith("avx512.mask.pand.")) {
1512 Rep = Builder.CreateAnd(CI->getArgOperand(0), CI->getArgOperand(1));
1513 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1514 CI->getArgOperand(2));
1515 } else if (IsX86 && Name.startswith("avx512.mask.pandn.")) {
1516 Rep = Builder.CreateAnd(Builder.CreateNot(CI->getArgOperand(0)),
1517 CI->getArgOperand(1));
1518 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1519 CI->getArgOperand(2));
1520 } else if (IsX86 && Name.startswith("avx512.mask.por.")) {
1521 Rep = Builder.CreateOr(CI->getArgOperand(0), CI->getArgOperand(1));
1522 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1523 CI->getArgOperand(2));
1524 } else if (IsX86 && Name.startswith("avx512.mask.pxor.")) {
1525 Rep = Builder.CreateXor(CI->getArgOperand(0), CI->getArgOperand(1));
1526 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1527 CI->getArgOperand(2));
1528 } else if (IsX86 && Name.startswith("avx512.mask.and.")) {
1529 VectorType *FTy = cast<VectorType>(CI->getType());
1530 VectorType *ITy = VectorType::getInteger(FTy);
1531 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
1532 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1533 Rep = Builder.CreateBitCast(Rep, FTy);
1534 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1535 CI->getArgOperand(2));
1536 } else if (IsX86 && Name.startswith("avx512.mask.andn.")) {
1537 VectorType *FTy = cast<VectorType>(CI->getType());
1538 VectorType *ITy = VectorType::getInteger(FTy);
1539 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
1540 Rep = Builder.CreateAnd(Rep,
1541 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1542 Rep = Builder.CreateBitCast(Rep, FTy);
1543 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1544 CI->getArgOperand(2));
1545 } else if (IsX86 && Name.startswith("avx512.mask.or.")) {
1546 VectorType *FTy = cast<VectorType>(CI->getType());
1547 VectorType *ITy = VectorType::getInteger(FTy);
1548 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
1549 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1550 Rep = Builder.CreateBitCast(Rep, FTy);
1551 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1552 CI->getArgOperand(2));
1553 } else if (IsX86 && Name.startswith("avx512.mask.xor.")) {
1554 VectorType *FTy = cast<VectorType>(CI->getType());
1555 VectorType *ITy = VectorType::getInteger(FTy);
1556 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
1557 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1558 Rep = Builder.CreateBitCast(Rep, FTy);
1559 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1560 CI->getArgOperand(2));
1561 } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
1562 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
1563 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1564 CI->getArgOperand(2));
1565 } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
1566 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
1567 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1568 CI->getArgOperand(2));
1569 } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
1570 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
1571 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1572 CI->getArgOperand(2));
1573 } else if (IsX86 && (Name.startswith("avx512.mask.add.p"))) {
1574 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
1575 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1576 CI->getArgOperand(2));
1577 } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
1578 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
1579 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1580 CI->getArgOperand(2));
1581 } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
1582 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
1583 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1584 CI->getArgOperand(2));
1585 } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
1586 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
1587 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1588 CI->getArgOperand(2));
1589 } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
1590 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1593 { CI->getArgOperand(0), Builder.getInt1(false) });
1594 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1595 CI->getArgOperand(1));
1596 } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
1597 Name.startswith("avx512.mask.min.p"))) {
1598 bool IsMin = Name[13] == 'i';
1599 VectorType *VecTy = cast<VectorType>(CI->getType());
1600 unsigned VecWidth = VecTy->getPrimitiveSizeInBits();
1601 unsigned EltWidth = VecTy->getScalarSizeInBits();
1603 if (!IsMin && VecWidth == 128 && EltWidth == 32)
1604 IID = Intrinsic::x86_sse_max_ps;
1605 else if (!IsMin && VecWidth == 128 && EltWidth == 64)
1606 IID = Intrinsic::x86_sse2_max_pd;
1607 else if (!IsMin && VecWidth == 256 && EltWidth == 32)
1608 IID = Intrinsic::x86_avx_max_ps_256;
1609 else if (!IsMin && VecWidth == 256 && EltWidth == 64)
1610 IID = Intrinsic::x86_avx_max_pd_256;
1611 else if (IsMin && VecWidth == 128 && EltWidth == 32)
1612 IID = Intrinsic::x86_sse_min_ps;
1613 else if (IsMin && VecWidth == 128 && EltWidth == 64)
1614 IID = Intrinsic::x86_sse2_min_pd;
1615 else if (IsMin && VecWidth == 256 && EltWidth == 32)
1616 IID = Intrinsic::x86_avx_min_ps_256;
1617 else if (IsMin && VecWidth == 256 && EltWidth == 64)
1618 IID = Intrinsic::x86_avx_min_pd_256;
1620 llvm_unreachable("Unexpected intrinsic");
1622 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1623 { CI->getArgOperand(0), CI->getArgOperand(1) });
1624 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1625 CI->getArgOperand(2));
1626 } else if (IsX86 && Name.startswith("avx512.mask.pshuf.b.")) {
1627 VectorType *VecTy = cast<VectorType>(CI->getType());
1629 if (VecTy->getPrimitiveSizeInBits() == 128)
1630 IID = Intrinsic::x86_ssse3_pshuf_b_128;
1631 else if (VecTy->getPrimitiveSizeInBits() == 256)
1632 IID = Intrinsic::x86_avx2_pshuf_b;
1633 else if (VecTy->getPrimitiveSizeInBits() == 512)
1634 IID = Intrinsic::x86_avx512_pshuf_b_512;
1636 llvm_unreachable("Unexpected intrinsic");
1638 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1639 { CI->getArgOperand(0), CI->getArgOperand(1) });
1640 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1641 CI->getArgOperand(2));
1642 } else if (IsX86 && (Name.startswith("avx512.mask.pmul.dq.") ||
1643 Name.startswith("avx512.mask.pmulu.dq."))) {
1644 bool IsUnsigned = Name[16] == 'u';
1645 VectorType *VecTy = cast<VectorType>(CI->getType());
1647 if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 128)
1648 IID = Intrinsic::x86_sse41_pmuldq;
1649 else if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 256)
1650 IID = Intrinsic::x86_avx2_pmul_dq;
1651 else if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 512)
1652 IID = Intrinsic::x86_avx512_pmul_dq_512;
1653 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 128)
1654 IID = Intrinsic::x86_sse2_pmulu_dq;
1655 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 256)
1656 IID = Intrinsic::x86_avx2_pmulu_dq;
1657 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 512)
1658 IID = Intrinsic::x86_avx512_pmulu_dq_512;
1660 llvm_unreachable("Unexpected intrinsic");
1662 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1663 { CI->getArgOperand(0), CI->getArgOperand(1) });
1664 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1665 CI->getArgOperand(2));
1666 } else if (IsX86 && Name.startswith("avx512.mask.pack")) {
1667 bool IsUnsigned = Name[16] == 'u';
1668 bool IsDW = Name[18] == 'd';
1669 VectorType *VecTy = cast<VectorType>(CI->getType());
1671 if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 128)
1672 IID = Intrinsic::x86_sse2_packsswb_128;
1673 else if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 256)
1674 IID = Intrinsic::x86_avx2_packsswb;
1675 else if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 512)
1676 IID = Intrinsic::x86_avx512_packsswb_512;
1677 else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 128)
1678 IID = Intrinsic::x86_sse2_packssdw_128;
1679 else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 256)
1680 IID = Intrinsic::x86_avx2_packssdw;
1681 else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 512)
1682 IID = Intrinsic::x86_avx512_packssdw_512;
1683 else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 128)
1684 IID = Intrinsic::x86_sse2_packuswb_128;
1685 else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 256)
1686 IID = Intrinsic::x86_avx2_packuswb;
1687 else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 512)
1688 IID = Intrinsic::x86_avx512_packuswb_512;
1689 else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 128)
1690 IID = Intrinsic::x86_sse41_packusdw;
1691 else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 256)
1692 IID = Intrinsic::x86_avx2_packusdw;
1693 else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 512)
1694 IID = Intrinsic::x86_avx512_packusdw_512;
1696 llvm_unreachable("Unexpected intrinsic");
1698 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1699 { CI->getArgOperand(0), CI->getArgOperand(1) });
1700 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1701 CI->getArgOperand(2));
1702 } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
1703 bool IsImmediate = Name[16] == 'i' ||
1704 (Name.size() > 18 && Name[18] == 'i');
1705 bool IsVariable = Name[16] == 'v';
1706 char Size = Name[16] == '.' ? Name[17] :
1707 Name[17] == '.' ? Name[18] :
1708 Name[18] == '.' ? Name[19] :
1712 if (IsVariable && Name[17] != '.') {
1713 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
1714 IID = Intrinsic::x86_avx2_psllv_q;
1715 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
1716 IID = Intrinsic::x86_avx2_psllv_q_256;
1717 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
1718 IID = Intrinsic::x86_avx2_psllv_d;
1719 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
1720 IID = Intrinsic::x86_avx2_psllv_d_256;
1721 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
1722 IID = Intrinsic::x86_avx512_psllv_w_128;
1723 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
1724 IID = Intrinsic::x86_avx512_psllv_w_256;
1725 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
1726 IID = Intrinsic::x86_avx512_psllv_w_512;
1728 llvm_unreachable("Unexpected size");
1729 } else if (Name.endswith(".128")) {
1730 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
1731 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
1732 : Intrinsic::x86_sse2_psll_d;
1733 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
1734 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
1735 : Intrinsic::x86_sse2_psll_q;
1736 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
1737 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
1738 : Intrinsic::x86_sse2_psll_w;
1740 llvm_unreachable("Unexpected size");
1741 } else if (Name.endswith(".256")) {
1742 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
1743 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
1744 : Intrinsic::x86_avx2_psll_d;
1745 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
1746 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
1747 : Intrinsic::x86_avx2_psll_q;
1748 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
1749 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
1750 : Intrinsic::x86_avx2_psll_w;
1752 llvm_unreachable("Unexpected size");
1754 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
1755 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
1756 IsVariable ? Intrinsic::x86_avx512_psllv_d_512 :
1757 Intrinsic::x86_avx512_psll_d_512;
1758 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
1759 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
1760 IsVariable ? Intrinsic::x86_avx512_psllv_q_512 :
1761 Intrinsic::x86_avx512_psll_q_512;
1762 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
1763 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
1764 : Intrinsic::x86_avx512_psll_w_512;
1766 llvm_unreachable("Unexpected size");
1769 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
1770 } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
1771 bool IsImmediate = Name[16] == 'i' ||
1772 (Name.size() > 18 && Name[18] == 'i');
1773 bool IsVariable = Name[16] == 'v';
1774 char Size = Name[16] == '.' ? Name[17] :
1775 Name[17] == '.' ? Name[18] :
1776 Name[18] == '.' ? Name[19] :
1780 if (IsVariable && Name[17] != '.') {
1781 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
1782 IID = Intrinsic::x86_avx2_psrlv_q;
1783 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
1784 IID = Intrinsic::x86_avx2_psrlv_q_256;
1785 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
1786 IID = Intrinsic::x86_avx2_psrlv_d;
1787 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
1788 IID = Intrinsic::x86_avx2_psrlv_d_256;
1789 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
1790 IID = Intrinsic::x86_avx512_psrlv_w_128;
1791 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
1792 IID = Intrinsic::x86_avx512_psrlv_w_256;
1793 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
1794 IID = Intrinsic::x86_avx512_psrlv_w_512;
1796 llvm_unreachable("Unexpected size");
1797 } else if (Name.endswith(".128")) {
1798 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
1799 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
1800 : Intrinsic::x86_sse2_psrl_d;
1801 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
1802 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
1803 : Intrinsic::x86_sse2_psrl_q;
1804 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
1805 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
1806 : Intrinsic::x86_sse2_psrl_w;
1808 llvm_unreachable("Unexpected size");
1809 } else if (Name.endswith(".256")) {
1810 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
1811 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
1812 : Intrinsic::x86_avx2_psrl_d;
1813 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
1814 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
1815 : Intrinsic::x86_avx2_psrl_q;
1816 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
1817 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
1818 : Intrinsic::x86_avx2_psrl_w;
1820 llvm_unreachable("Unexpected size");
1822 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
1823 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
1824 IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 :
1825 Intrinsic::x86_avx512_psrl_d_512;
1826 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
1827 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
1828 IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 :
1829 Intrinsic::x86_avx512_psrl_q_512;
1830 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
1831 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
1832 : Intrinsic::x86_avx512_psrl_w_512;
1834 llvm_unreachable("Unexpected size");
1837 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
1838 } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
1839 bool IsImmediate = Name[16] == 'i' ||
1840 (Name.size() > 18 && Name[18] == 'i');
1841 bool IsVariable = Name[16] == 'v';
1842 char Size = Name[16] == '.' ? Name[17] :
1843 Name[17] == '.' ? Name[18] :
1844 Name[18] == '.' ? Name[19] :
1848 if (IsVariable && Name[17] != '.') {
1849 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
1850 IID = Intrinsic::x86_avx2_psrav_d;
1851 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
1852 IID = Intrinsic::x86_avx2_psrav_d_256;
1853 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
1854 IID = Intrinsic::x86_avx512_psrav_w_128;
1855 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
1856 IID = Intrinsic::x86_avx512_psrav_w_256;
1857 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
1858 IID = Intrinsic::x86_avx512_psrav_w_512;
1860 llvm_unreachable("Unexpected size");
1861 } else if (Name.endswith(".128")) {
1862 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
1863 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
1864 : Intrinsic::x86_sse2_psra_d;
1865 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
1866 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
1867 IsVariable ? Intrinsic::x86_avx512_psrav_q_128 :
1868 Intrinsic::x86_avx512_psra_q_128;
1869 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
1870 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
1871 : Intrinsic::x86_sse2_psra_w;
1873 llvm_unreachable("Unexpected size");
1874 } else if (Name.endswith(".256")) {
1875 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
1876 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
1877 : Intrinsic::x86_avx2_psra_d;
1878 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
1879 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
1880 IsVariable ? Intrinsic::x86_avx512_psrav_q_256 :
1881 Intrinsic::x86_avx512_psra_q_256;
1882 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
1883 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
1884 : Intrinsic::x86_avx2_psra_w;
1886 llvm_unreachable("Unexpected size");
1888 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
1889 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
1890 IsVariable ? Intrinsic::x86_avx512_psrav_d_512 :
1891 Intrinsic::x86_avx512_psra_d_512;
1892 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
1893 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
1894 IsVariable ? Intrinsic::x86_avx512_psrav_q_512 :
1895 Intrinsic::x86_avx512_psra_q_512;
1896 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
1897 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
1898 : Intrinsic::x86_avx512_psra_w_512;
1900 llvm_unreachable("Unexpected size");
1903 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
1904 } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
1905 Rep = upgradeMaskedMove(Builder, *CI);
1906 } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
1907 Rep = UpgradeMaskToInt(Builder, *CI);
1908 } else if (IsX86 && Name.startswith("avx512.mask.vpermilvar.")) {
1910 if (Name.endswith("ps.128"))
1911 IID = Intrinsic::x86_avx_vpermilvar_ps;
1912 else if (Name.endswith("pd.128"))
1913 IID = Intrinsic::x86_avx_vpermilvar_pd;
1914 else if (Name.endswith("ps.256"))
1915 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
1916 else if (Name.endswith("pd.256"))
1917 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
1918 else if (Name.endswith("ps.512"))
1919 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
1920 else if (Name.endswith("pd.512"))
1921 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
1923 llvm_unreachable("Unexpected vpermilvar intrinsic");
1925 Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID);
1926 Rep = Builder.CreateCall(Intrin,
1927 { CI->getArgOperand(0), CI->getArgOperand(1) });
1928 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1929 CI->getArgOperand(2));
1930 } else if (IsX86 && Name.endswith(".movntdqa")) {
1931 Module *M = F->getParent();
1932 MDNode *Node = MDNode::get(
1933 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1935 Value *Ptr = CI->getArgOperand(0);
1936 VectorType *VTy = cast<VectorType>(CI->getType());
1938 // Convert the type of the pointer to a pointer to the stored type.
1940 Builder.CreateBitCast(Ptr, PointerType::getUnqual(VTy), "cast");
1941 LoadInst *LI = Builder.CreateAlignedLoad(BC, VTy->getBitWidth() / 8);
1942 LI->setMetadata(M->getMDKindID("nontemporal"), Node);
1944 } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
1945 Value *Arg = CI->getArgOperand(0);
1946 Value *Neg = Builder.CreateNeg(Arg, "neg");
1947 Value *Cmp = Builder.CreateICmpSGE(
1948 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
1949 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
1950 } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
1951 Name == "max.ui" || Name == "max.ull")) {
1952 Value *Arg0 = CI->getArgOperand(0);
1953 Value *Arg1 = CI->getArgOperand(1);
1954 Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
1955 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
1956 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
1957 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
1958 } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
1959 Name == "min.ui" || Name == "min.ull")) {
1960 Value *Arg0 = CI->getArgOperand(0);
1961 Value *Arg1 = CI->getArgOperand(1);
1962 Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
1963 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
1964 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
1965 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
1966 } else if (IsNVVM && Name == "clz.ll") {
1967 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
1968 Value *Arg = CI->getArgOperand(0);
1969 Value *Ctlz = Builder.CreateCall(
1970 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
1972 {Arg, Builder.getFalse()}, "ctlz");
1973 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
1974 } else if (IsNVVM && Name == "popc.ll") {
1975 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
1977 Value *Arg = CI->getArgOperand(0);
1978 Value *Popc = Builder.CreateCall(
1979 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
1982 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
1983 } else if (IsNVVM && Name == "h2f") {
1984 Rep = Builder.CreateCall(Intrinsic::getDeclaration(
1985 F->getParent(), Intrinsic::convert_from_fp16,
1986 {Builder.getFloatTy()}),
1987 CI->getArgOperand(0), "h2f");
1989 llvm_unreachable("Unknown function for CallInst upgrade.");
1993 CI->replaceAllUsesWith(Rep);
1994 CI->eraseFromParent();
1998 CallInst *NewCall = nullptr;
1999 switch (NewFn->getIntrinsicID()) {
2001 // Handle generic mangling change, but nothing else
2003 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
2004 "Unknown function for CallInst upgrade and isn't just a name change");
2005 CI->setCalledFunction(NewFn);
2009 case Intrinsic::arm_neon_vld1:
2010 case Intrinsic::arm_neon_vld2:
2011 case Intrinsic::arm_neon_vld3:
2012 case Intrinsic::arm_neon_vld4:
2013 case Intrinsic::arm_neon_vld2lane:
2014 case Intrinsic::arm_neon_vld3lane:
2015 case Intrinsic::arm_neon_vld4lane:
2016 case Intrinsic::arm_neon_vst1:
2017 case Intrinsic::arm_neon_vst2:
2018 case Intrinsic::arm_neon_vst3:
2019 case Intrinsic::arm_neon_vst4:
2020 case Intrinsic::arm_neon_vst2lane:
2021 case Intrinsic::arm_neon_vst3lane:
2022 case Intrinsic::arm_neon_vst4lane: {
2023 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
2024 CI->arg_operands().end());
2025 NewCall = Builder.CreateCall(NewFn, Args);
2029 case Intrinsic::bitreverse:
2030 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
2033 case Intrinsic::ctlz:
2034 case Intrinsic::cttz:
2035 assert(CI->getNumArgOperands() == 1 &&
2036 "Mismatch between function args and call args");
2038 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
2041 case Intrinsic::objectsize: {
2042 Value *NullIsUnknownSize = CI->getNumArgOperands() == 2
2043 ? Builder.getFalse()
2044 : CI->getArgOperand(2);
2045 NewCall = Builder.CreateCall(
2046 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize});
2050 case Intrinsic::ctpop:
2051 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
2054 case Intrinsic::convert_from_fp16:
2055 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
2058 case Intrinsic::x86_xop_vfrcz_ss:
2059 case Intrinsic::x86_xop_vfrcz_sd:
2060 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
2063 case Intrinsic::x86_xop_vpermil2pd:
2064 case Intrinsic::x86_xop_vpermil2ps:
2065 case Intrinsic::x86_xop_vpermil2pd_256:
2066 case Intrinsic::x86_xop_vpermil2ps_256: {
2067 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
2068 CI->arg_operands().end());
2069 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
2070 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
2071 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
2072 NewCall = Builder.CreateCall(NewFn, Args);
2076 case Intrinsic::x86_sse41_ptestc:
2077 case Intrinsic::x86_sse41_ptestz:
2078 case Intrinsic::x86_sse41_ptestnzc: {
2079 // The arguments for these intrinsics used to be v4f32, and changed
2080 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
2081 // So, the only thing required is a bitcast for both arguments.
2082 // First, check the arguments have the old type.
2083 Value *Arg0 = CI->getArgOperand(0);
2084 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
2087 // Old intrinsic, add bitcasts
2088 Value *Arg1 = CI->getArgOperand(1);
2090 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
2092 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
2093 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2095 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
2099 case Intrinsic::x86_sse41_insertps:
2100 case Intrinsic::x86_sse41_dppd:
2101 case Intrinsic::x86_sse41_dpps:
2102 case Intrinsic::x86_sse41_mpsadbw:
2103 case Intrinsic::x86_avx_dp_ps_256:
2104 case Intrinsic::x86_avx2_mpsadbw: {
2105 // Need to truncate the last argument from i32 to i8 -- this argument models
2106 // an inherently 8-bit immediate operand to these x86 instructions.
2107 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
2108 CI->arg_operands().end());
2110 // Replace the last argument with a trunc.
2111 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
2112 NewCall = Builder.CreateCall(NewFn, Args);
2116 case Intrinsic::thread_pointer: {
2117 NewCall = Builder.CreateCall(NewFn, {});
2121 case Intrinsic::invariant_start:
2122 case Intrinsic::invariant_end:
2123 case Intrinsic::masked_load:
2124 case Intrinsic::masked_store:
2125 case Intrinsic::masked_gather:
2126 case Intrinsic::masked_scatter: {
2127 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
2128 CI->arg_operands().end());
2129 NewCall = Builder.CreateCall(NewFn, Args);
2133 assert(NewCall && "Should have either set this variable or returned through "
2134 "the default case");
2135 std::string Name = CI->getName();
2136 if (!Name.empty()) {
2137 CI->setName(Name + ".old");
2138 NewCall->setName(Name);
2140 CI->replaceAllUsesWith(NewCall);
2141 CI->eraseFromParent();
2144 void llvm::UpgradeCallsToIntrinsic(Function *F) {
2145 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
2147 // Check if this function should be upgraded and get the replacement function
2150 if (UpgradeIntrinsicFunction(F, NewFn)) {
2151 // Replace all users of the old function with the new function or new
2152 // instructions. This is not a range loop because the call is deleted.
2153 for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; )
2154 if (CallInst *CI = dyn_cast<CallInst>(*UI++))
2155 UpgradeIntrinsicCall(CI, NewFn);
2157 // Remove old function, no longer used, from the module.
2158 F->eraseFromParent();
2162 MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
2163 // Check if the tag uses struct-path aware TBAA format.
2164 if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
2167 auto &Context = MD.getContext();
2168 if (MD.getNumOperands() == 3) {
2169 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
2170 MDNode *ScalarType = MDNode::get(Context, Elts);
2171 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
2172 Metadata *Elts2[] = {ScalarType, ScalarType,
2173 ConstantAsMetadata::get(
2174 Constant::getNullValue(Type::getInt64Ty(Context))),
2176 return MDNode::get(Context, Elts2);
2178 // Create a MDNode <MD, MD, offset 0>
2179 Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
2180 Type::getInt64Ty(Context)))};
2181 return MDNode::get(Context, Elts);
2184 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
2185 Instruction *&Temp) {
2186 if (Opc != Instruction::BitCast)
2190 Type *SrcTy = V->getType();
2191 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
2192 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
2193 LLVMContext &Context = V->getContext();
2195 // We have no information about target data layout, so we assume that
2196 // the maximum pointer size is 64bit.
2197 Type *MidTy = Type::getInt64Ty(Context);
2198 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
2200 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
2206 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
2207 if (Opc != Instruction::BitCast)
2210 Type *SrcTy = C->getType();
2211 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
2212 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
2213 LLVMContext &Context = C->getContext();
2215 // We have no information about target data layout, so we assume that
2216 // the maximum pointer size is 64bit.
2217 Type *MidTy = Type::getInt64Ty(Context);
2219 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
2226 /// Check the debug info version number, if it is out-dated, drop the debug
2227 /// info. Return true if module is modified.
2228 bool llvm::UpgradeDebugInfo(Module &M) {
2229 unsigned Version = getDebugMetadataVersionFromModule(M);
2230 if (Version == DEBUG_METADATA_VERSION)
2233 bool RetCode = StripDebugInfo(M);
2235 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
2236 M.getContext().diagnose(DiagVersion);
2241 bool llvm::UpgradeModuleFlags(Module &M) {
2242 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
2246 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
2247 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
2248 MDNode *Op = ModFlags->getOperand(I);
2249 if (Op->getNumOperands() != 3)
2251 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
2254 if (ID->getString() == "Objective-C Image Info Version")
2256 if (ID->getString() == "Objective-C Class Properties")
2257 HasClassProperties = true;
2258 // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
2259 // field was Error and now they are Max.
2260 if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") {
2261 if (auto *Behavior =
2262 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
2263 if (Behavior->getLimitedValue() == Module::Error) {
2264 Type *Int32Ty = Type::getInt32Ty(M.getContext());
2265 Metadata *Ops[3] = {
2266 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)),
2267 MDString::get(M.getContext(), ID->getString()),
2269 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
2276 // "Objective-C Class Properties" is recently added for Objective-C. We
2277 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
2278 // flag of value 0, so we can correclty downgrade this flag when trying to
2279 // link an ObjC bitcode without this module flag with an ObjC bitcode with
2280 // this module flag.
2281 if (HasObjCFlag && !HasClassProperties) {
2282 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
2290 static bool isOldLoopArgument(Metadata *MD) {
2291 auto *T = dyn_cast_or_null<MDTuple>(MD);
2294 if (T->getNumOperands() < 1)
2296 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
2299 return S->getString().startswith("llvm.vectorizer.");
2302 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
2303 StringRef OldPrefix = "llvm.vectorizer.";
2304 assert(OldTag.startswith(OldPrefix) && "Expected old prefix");
2306 if (OldTag == "llvm.vectorizer.unroll")
2307 return MDString::get(C, "llvm.loop.interleave.count");
2309 return MDString::get(
2310 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
2314 static Metadata *upgradeLoopArgument(Metadata *MD) {
2315 auto *T = dyn_cast_or_null<MDTuple>(MD);
2318 if (T->getNumOperands() < 1)
2320 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
2323 if (!OldTag->getString().startswith("llvm.vectorizer."))
2326 // This has an old tag. Upgrade it.
2327 SmallVector<Metadata *, 8> Ops;
2328 Ops.reserve(T->getNumOperands());
2329 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
2330 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
2331 Ops.push_back(T->getOperand(I));
2333 return MDTuple::get(T->getContext(), Ops);
2336 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
2337 auto *T = dyn_cast<MDTuple>(&N);
2341 if (none_of(T->operands(), isOldLoopArgument))
2344 SmallVector<Metadata *, 8> Ops;
2345 Ops.reserve(T->getNumOperands());
2346 for (Metadata *MD : T->operands())
2347 Ops.push_back(upgradeLoopArgument(MD));
2349 return MDTuple::get(T->getContext(), Ops);