1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the auto-upgrade helper functions.
11 // This is where deprecated IR intrinsics and other IR features are updated to
12 // current specifications.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/IR/AutoUpgrade.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/IR/Constants.h"
19 #include "llvm/IR/DIBuilder.h"
20 #include "llvm/IR/DebugInfo.h"
21 #include "llvm/IR/DiagnosticInfo.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/IRBuilder.h"
24 #include "llvm/IR/Instruction.h"
25 #include "llvm/IR/IntrinsicInst.h"
26 #include "llvm/IR/LLVMContext.h"
27 #include "llvm/IR/Module.h"
28 #include "llvm/IR/Verifier.h"
29 #include "llvm/Support/ErrorHandling.h"
30 #include "llvm/Support/Regex.h"
34 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
36 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
37 // changed their type from v4f32 to v2i64.
38 static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
40 // Check whether this is an old version of the function, which received
42 Type *Arg0Type = F->getFunctionType()->getParamType(0);
43 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
46 // Yes, it's old, replace it with new version.
48 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
52 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
53 // arguments have changed their type from i32 to i8.
54 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
56 // Check that the last argument is an i32.
57 Type *LastArgType = F->getFunctionType()->getParamType(
58 F->getFunctionType()->getNumParams() - 1);
59 if (!LastArgType->isIntegerTy(32))
62 // Move this function aside and map down.
64 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
68 static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
69 // All of the intrinsics matches below should be marked with which llvm
70 // version started autoupgrading them. At some point in the future we would
71 // like to use this information to remove upgrade code for some older
72 // intrinsics. It is currently undecided how we will determine that future
74 if (Name == "addcarryx.u32" || // Added in 8.0
75 Name == "addcarryx.u64" || // Added in 8.0
76 Name == "addcarry.u32" || // Added in 8.0
77 Name == "addcarry.u64" || // Added in 8.0
78 Name == "subborrow.u32" || // Added in 8.0
79 Name == "subborrow.u64" || // Added in 8.0
80 Name.startswith("sse2.padds.") || // Added in 8.0
81 Name.startswith("sse2.psubs.") || // Added in 8.0
82 Name.startswith("sse2.paddus.") || // Added in 8.0
83 Name.startswith("sse2.psubus.") || // Added in 8.0
84 Name.startswith("avx2.padds.") || // Added in 8.0
85 Name.startswith("avx2.psubs.") || // Added in 8.0
86 Name.startswith("avx2.paddus.") || // Added in 8.0
87 Name.startswith("avx2.psubus.") || // Added in 8.0
88 Name.startswith("avx512.padds.") || // Added in 8.0
89 Name.startswith("avx512.psubs.") || // Added in 8.0
90 Name.startswith("avx512.mask.padds.") || // Added in 8.0
91 Name.startswith("avx512.mask.psubs.") || // Added in 8.0
92 Name.startswith("avx512.mask.paddus.") || // Added in 8.0
93 Name.startswith("avx512.mask.psubus.") || // Added in 8.0
94 Name=="ssse3.pabs.b.128" || // Added in 6.0
95 Name=="ssse3.pabs.w.128" || // Added in 6.0
96 Name=="ssse3.pabs.d.128" || // Added in 6.0
97 Name.startswith("fma4.vfmadd.s") || // Added in 7.0
98 Name.startswith("fma.vfmadd.") || // Added in 7.0
99 Name.startswith("fma.vfmsub.") || // Added in 7.0
100 Name.startswith("fma.vfmaddsub.") || // Added in 7.0
101 Name.startswith("fma.vfmsubadd.") || // Added in 7.0
102 Name.startswith("fma.vfnmadd.") || // Added in 7.0
103 Name.startswith("fma.vfnmsub.") || // Added in 7.0
104 Name.startswith("avx512.mask.vfmadd.") || // Added in 7.0
105 Name.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
106 Name.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
107 Name.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
108 Name.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
109 Name.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
110 Name.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
111 Name.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
112 Name.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
113 Name.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
114 Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
115 Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
116 Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
117 Name.startswith("avx512.kunpck") || //added in 6.0
118 Name.startswith("avx2.pabs.") || // Added in 6.0
119 Name.startswith("avx512.mask.pabs.") || // Added in 6.0
120 Name.startswith("avx512.broadcastm") || // Added in 6.0
121 Name == "sse.sqrt.ss" || // Added in 7.0
122 Name == "sse2.sqrt.sd" || // Added in 7.0
123 Name.startswith("avx512.mask.sqrt.p") || // Added in 7.0
124 Name.startswith("avx.sqrt.p") || // Added in 7.0
125 Name.startswith("sse2.sqrt.p") || // Added in 7.0
126 Name.startswith("sse.sqrt.p") || // Added in 7.0
127 Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0
128 Name.startswith("sse2.pcmpeq.") || // Added in 3.1
129 Name.startswith("sse2.pcmpgt.") || // Added in 3.1
130 Name.startswith("avx2.pcmpeq.") || // Added in 3.1
131 Name.startswith("avx2.pcmpgt.") || // Added in 3.1
132 Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
133 Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
134 Name.startswith("avx.vperm2f128.") || // Added in 6.0
135 Name == "avx2.vperm2i128" || // Added in 6.0
136 Name == "sse.add.ss" || // Added in 4.0
137 Name == "sse2.add.sd" || // Added in 4.0
138 Name == "sse.sub.ss" || // Added in 4.0
139 Name == "sse2.sub.sd" || // Added in 4.0
140 Name == "sse.mul.ss" || // Added in 4.0
141 Name == "sse2.mul.sd" || // Added in 4.0
142 Name == "sse.div.ss" || // Added in 4.0
143 Name == "sse2.div.sd" || // Added in 4.0
144 Name == "sse41.pmaxsb" || // Added in 3.9
145 Name == "sse2.pmaxs.w" || // Added in 3.9
146 Name == "sse41.pmaxsd" || // Added in 3.9
147 Name == "sse2.pmaxu.b" || // Added in 3.9
148 Name == "sse41.pmaxuw" || // Added in 3.9
149 Name == "sse41.pmaxud" || // Added in 3.9
150 Name == "sse41.pminsb" || // Added in 3.9
151 Name == "sse2.pmins.w" || // Added in 3.9
152 Name == "sse41.pminsd" || // Added in 3.9
153 Name == "sse2.pminu.b" || // Added in 3.9
154 Name == "sse41.pminuw" || // Added in 3.9
155 Name == "sse41.pminud" || // Added in 3.9
156 Name == "avx512.kand.w" || // Added in 7.0
157 Name == "avx512.kandn.w" || // Added in 7.0
158 Name == "avx512.knot.w" || // Added in 7.0
159 Name == "avx512.kor.w" || // Added in 7.0
160 Name == "avx512.kxor.w" || // Added in 7.0
161 Name == "avx512.kxnor.w" || // Added in 7.0
162 Name == "avx512.kortestc.w" || // Added in 7.0
163 Name == "avx512.kortestz.w" || // Added in 7.0
164 Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
165 Name.startswith("avx2.pmax") || // Added in 3.9
166 Name.startswith("avx2.pmin") || // Added in 3.9
167 Name.startswith("avx512.mask.pmax") || // Added in 4.0
168 Name.startswith("avx512.mask.pmin") || // Added in 4.0
169 Name.startswith("avx2.vbroadcast") || // Added in 3.8
170 Name.startswith("avx2.pbroadcast") || // Added in 3.8
171 Name.startswith("avx.vpermil.") || // Added in 3.1
172 Name.startswith("sse2.pshuf") || // Added in 3.9
173 Name.startswith("avx512.pbroadcast") || // Added in 3.9
174 Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
175 Name.startswith("avx512.mask.movddup") || // Added in 3.9
176 Name.startswith("avx512.mask.movshdup") || // Added in 3.9
177 Name.startswith("avx512.mask.movsldup") || // Added in 3.9
178 Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
179 Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
180 Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
181 Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
182 Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
183 Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
184 Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
185 Name.startswith("avx512.mask.punpckl") || // Added in 3.9
186 Name.startswith("avx512.mask.punpckh") || // Added in 3.9
187 Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
188 Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
189 Name.startswith("avx512.mask.pand.") || // Added in 3.9
190 Name.startswith("avx512.mask.pandn.") || // Added in 3.9
191 Name.startswith("avx512.mask.por.") || // Added in 3.9
192 Name.startswith("avx512.mask.pxor.") || // Added in 3.9
193 Name.startswith("avx512.mask.and.") || // Added in 3.9
194 Name.startswith("avx512.mask.andn.") || // Added in 3.9
195 Name.startswith("avx512.mask.or.") || // Added in 3.9
196 Name.startswith("avx512.mask.xor.") || // Added in 3.9
197 Name.startswith("avx512.mask.padd.") || // Added in 4.0
198 Name.startswith("avx512.mask.psub.") || // Added in 4.0
199 Name.startswith("avx512.mask.pmull.") || // Added in 4.0
200 Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
201 Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
202 Name == "avx512.mask.cvtudq2ps.128" || // Added in 7.0
203 Name == "avx512.mask.cvtudq2ps.256" || // Added in 7.0
204 Name == "avx512.mask.cvtqq2pd.128" || // Added in 7.0
205 Name == "avx512.mask.cvtqq2pd.256" || // Added in 7.0
206 Name == "avx512.mask.cvtuqq2pd.128" || // Added in 7.0
207 Name == "avx512.mask.cvtuqq2pd.256" || // Added in 7.0
208 Name == "avx512.mask.cvtdq2ps.128" || // Added in 7.0
209 Name == "avx512.mask.cvtdq2ps.256" || // Added in 7.0
210 Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0
211 Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0
212 Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0
213 Name == "avx512.mask.cvttps2dq.128" || // Added in 7.0
214 Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0
215 Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
216 Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
217 Name == "avx512.cvtusi2sd" || // Added in 7.0
218 Name.startswith("avx512.mask.permvar.") || // Added in 7.0
219 Name.startswith("avx512.mask.permvar.") || // Added in 7.0
220 Name == "sse2.pmulu.dq" || // Added in 7.0
221 Name == "sse41.pmuldq" || // Added in 7.0
222 Name == "avx2.pmulu.dq" || // Added in 7.0
223 Name == "avx2.pmul.dq" || // Added in 7.0
224 Name == "avx512.pmulu.dq.512" || // Added in 7.0
225 Name == "avx512.pmul.dq.512" || // Added in 7.0
226 Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
227 Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
228 Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
229 Name.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
230 Name.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
231 Name.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
232 Name.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
233 Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
234 Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
235 Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
236 Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
237 Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
238 Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
239 Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
240 Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
241 Name.startswith("avx512.mask.cmp.p") || // Added in 7.0
242 Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
243 Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
244 Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
245 Name.startswith("avx512.cvtd2mask.") || // Added in 7.0
246 Name.startswith("avx512.cvtq2mask.") || // Added in 7.0
247 Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
248 Name.startswith("avx512.mask.psll.d") || // Added in 4.0
249 Name.startswith("avx512.mask.psll.q") || // Added in 4.0
250 Name.startswith("avx512.mask.psll.w") || // Added in 4.0
251 Name.startswith("avx512.mask.psra.d") || // Added in 4.0
252 Name.startswith("avx512.mask.psra.q") || // Added in 4.0
253 Name.startswith("avx512.mask.psra.w") || // Added in 4.0
254 Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
255 Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
256 Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
257 Name.startswith("avx512.mask.pslli") || // Added in 4.0
258 Name.startswith("avx512.mask.psrai") || // Added in 4.0
259 Name.startswith("avx512.mask.psrli") || // Added in 4.0
260 Name.startswith("avx512.mask.psllv") || // Added in 4.0
261 Name.startswith("avx512.mask.psrav") || // Added in 4.0
262 Name.startswith("avx512.mask.psrlv") || // Added in 4.0
263 Name.startswith("sse41.pmovsx") || // Added in 3.8
264 Name.startswith("sse41.pmovzx") || // Added in 3.9
265 Name.startswith("avx2.pmovsx") || // Added in 3.9
266 Name.startswith("avx2.pmovzx") || // Added in 3.9
267 Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
268 Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
269 Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
270 Name.startswith("avx512.mask.pternlog.") || // Added in 7.0
271 Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0
272 Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0
273 Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
274 Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
275 Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
276 Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
277 Name.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
278 Name.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
279 Name.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
280 Name.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
281 Name.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
282 Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
283 Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
284 Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
285 Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
286 Name.startswith("avx512.mask.vpshld.") || // Added in 7.0
287 Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0
288 Name.startswith("avx512.mask.vpshldv.") || // Added in 8.0
289 Name.startswith("avx512.mask.vpshrdv.") || // Added in 8.0
290 Name.startswith("avx512.maskz.vpshldv.") || // Added in 8.0
291 Name.startswith("avx512.maskz.vpshrdv.") || // Added in 8.0
292 Name.startswith("avx512.vpshld.") || // Added in 8.0
293 Name.startswith("avx512.vpshrd.") || // Added in 8.0
294 Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
295 Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
296 Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
297 Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
298 Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
299 Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
300 Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
301 Name.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0
302 Name.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0
303 Name == "sse.cvtsi2ss" || // Added in 7.0
304 Name == "sse.cvtsi642ss" || // Added in 7.0
305 Name == "sse2.cvtsi2sd" || // Added in 7.0
306 Name == "sse2.cvtsi642sd" || // Added in 7.0
307 Name == "sse2.cvtss2sd" || // Added in 7.0
308 Name == "sse2.cvtdq2pd" || // Added in 3.9
309 Name == "sse2.cvtdq2ps" || // Added in 7.0
310 Name == "sse2.cvtps2pd" || // Added in 3.9
311 Name == "avx.cvtdq2.pd.256" || // Added in 3.9
312 Name == "avx.cvtdq2.ps.256" || // Added in 7.0
313 Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
314 Name.startswith("avx.vinsertf128.") || // Added in 3.7
315 Name == "avx2.vinserti128" || // Added in 3.7
316 Name.startswith("avx512.mask.insert") || // Added in 4.0
317 Name.startswith("avx.vextractf128.") || // Added in 3.7
318 Name == "avx2.vextracti128" || // Added in 3.7
319 Name.startswith("avx512.mask.vextract") || // Added in 4.0
320 Name.startswith("sse4a.movnt.") || // Added in 3.9
321 Name.startswith("avx.movnt.") || // Added in 3.2
322 Name.startswith("avx512.storent.") || // Added in 3.9
323 Name == "sse41.movntdqa" || // Added in 5.0
324 Name == "avx2.movntdqa" || // Added in 5.0
325 Name == "avx512.movntdqa" || // Added in 5.0
326 Name == "sse2.storel.dq" || // Added in 3.9
327 Name.startswith("sse.storeu.") || // Added in 3.9
328 Name.startswith("sse2.storeu.") || // Added in 3.9
329 Name.startswith("avx.storeu.") || // Added in 3.9
330 Name.startswith("avx512.mask.storeu.") || // Added in 3.9
331 Name.startswith("avx512.mask.store.p") || // Added in 3.9
332 Name.startswith("avx512.mask.store.b.") || // Added in 3.9
333 Name.startswith("avx512.mask.store.w.") || // Added in 3.9
334 Name.startswith("avx512.mask.store.d.") || // Added in 3.9
335 Name.startswith("avx512.mask.store.q.") || // Added in 3.9
336 Name == "avx512.mask.store.ss" || // Added in 7.0
337 Name.startswith("avx512.mask.loadu.") || // Added in 3.9
338 Name.startswith("avx512.mask.load.") || // Added in 3.9
339 Name.startswith("avx512.mask.expand.load.") || // Added in 7.0
340 Name.startswith("avx512.mask.compress.store.") || // Added in 7.0
341 Name == "sse42.crc32.64.8" || // Added in 3.4
342 Name.startswith("avx.vbroadcast.s") || // Added in 3.5
343 Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
344 Name.startswith("avx512.mask.palignr.") || // Added in 3.9
345 Name.startswith("avx512.mask.valign.") || // Added in 4.0
346 Name.startswith("sse2.psll.dq") || // Added in 3.7
347 Name.startswith("sse2.psrl.dq") || // Added in 3.7
348 Name.startswith("avx2.psll.dq") || // Added in 3.7
349 Name.startswith("avx2.psrl.dq") || // Added in 3.7
350 Name.startswith("avx512.psll.dq") || // Added in 3.9
351 Name.startswith("avx512.psrl.dq") || // Added in 3.9
352 Name == "sse41.pblendw" || // Added in 3.7
353 Name.startswith("sse41.blendp") || // Added in 3.7
354 Name.startswith("avx.blend.p") || // Added in 3.7
355 Name == "avx2.pblendw" || // Added in 3.7
356 Name.startswith("avx2.pblendd.") || // Added in 3.7
357 Name.startswith("avx.vbroadcastf128") || // Added in 4.0
358 Name == "avx2.vbroadcasti128" || // Added in 3.7
359 Name.startswith("avx512.mask.broadcastf") || // Added in 6.0
360 Name.startswith("avx512.mask.broadcasti") || // Added in 6.0
361 Name == "xop.vpcmov" || // Added in 3.8
362 Name == "xop.vpcmov.256" || // Added in 5.0
363 Name.startswith("avx512.mask.move.s") || // Added in 4.0
364 Name.startswith("avx512.cvtmask2") || // Added in 5.0
365 (Name.startswith("xop.vpcom") && // Added in 3.2
366 F->arg_size() == 2) ||
367 Name.startswith("xop.vprot") || // Added in 8.0
368 Name.startswith("avx512.prol") || // Added in 8.0
369 Name.startswith("avx512.pror") || // Added in 8.0
370 Name.startswith("avx512.mask.prorv.") || // Added in 8.0
371 Name.startswith("avx512.mask.pror.") || // Added in 8.0
372 Name.startswith("avx512.mask.prolv.") || // Added in 8.0
373 Name.startswith("avx512.mask.prol.") || // Added in 8.0
374 Name.startswith("avx512.ptestm") || //Added in 6.0
375 Name.startswith("avx512.ptestnm") || //Added in 6.0
376 Name.startswith("sse2.pavg") || // Added in 6.0
377 Name.startswith("avx2.pavg") || // Added in 6.0
378 Name.startswith("avx512.mask.pavg")) // Added in 6.0
384 static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
386 // Only handle intrinsics that start with "x86.".
387 if (!Name.startswith("x86."))
389 // Remove "x86." prefix.
390 Name = Name.substr(4);
392 if (ShouldUpgradeX86Intrinsic(F, Name)) {
397 if (Name == "rdtscp") { // Added in 8.0
398 // If this intrinsic has 0 operands, it's the new version.
399 if (F->getFunctionType()->getNumParams() == 0)
403 NewFn = Intrinsic::getDeclaration(F->getParent(),
404 Intrinsic::x86_rdtscp);
408 // SSE4.1 ptest functions may have an old signature.
409 if (Name.startswith("sse41.ptest")) { // Added in 3.2
410 if (Name.substr(11) == "c")
411 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
412 if (Name.substr(11) == "z")
413 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
414 if (Name.substr(11) == "nzc")
415 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
417 // Several blend and other instructions with masks used the wrong number of
419 if (Name == "sse41.insertps") // Added in 3.6
420 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
422 if (Name == "sse41.dppd") // Added in 3.6
423 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
425 if (Name == "sse41.dpps") // Added in 3.6
426 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
428 if (Name == "sse41.mpsadbw") // Added in 3.6
429 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
431 if (Name == "avx.dp.ps.256") // Added in 3.6
432 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
434 if (Name == "avx2.mpsadbw") // Added in 3.6
435 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
438 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
439 if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
441 NewFn = Intrinsic::getDeclaration(F->getParent(),
442 Intrinsic::x86_xop_vfrcz_ss);
445 if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
447 NewFn = Intrinsic::getDeclaration(F->getParent(),
448 Intrinsic::x86_xop_vfrcz_sd);
451 // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
452 if (Name.startswith("xop.vpermil2")) { // Added in 3.9
453 auto Idx = F->getFunctionType()->getParamType(2);
454 if (Idx->isFPOrFPVectorTy()) {
456 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
457 unsigned EltSize = Idx->getScalarSizeInBits();
458 Intrinsic::ID Permil2ID;
459 if (EltSize == 64 && IdxSize == 128)
460 Permil2ID = Intrinsic::x86_xop_vpermil2pd;
461 else if (EltSize == 32 && IdxSize == 128)
462 Permil2ID = Intrinsic::x86_xop_vpermil2ps;
463 else if (EltSize == 64 && IdxSize == 256)
464 Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
466 Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
467 NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
472 if (Name == "seh.recoverfp") {
473 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
480 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
481 assert(F && "Illegal to upgrade a non-existent Function.");
483 // Quickly eliminate it, if it's not a candidate.
484 StringRef Name = F->getName();
485 if (Name.size() <= 8 || !Name.startswith("llvm."))
487 Name = Name.substr(5); // Strip off "llvm."
492 if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
493 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
494 F->arg_begin()->getType());
497 if (Name.startswith("arm.neon.vclz")) {
499 F->arg_begin()->getType(),
500 Type::getInt1Ty(F->getContext())
502 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
503 // the end of the name. Change name from llvm.arm.neon.vclz.* to
505 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
506 NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
507 "llvm.ctlz." + Name.substr(14), F->getParent());
510 if (Name.startswith("arm.neon.vcnt")) {
511 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
512 F->arg_begin()->getType());
515 Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
516 if (vldRegex.match(Name)) {
517 auto fArgs = F->getFunctionType()->params();
518 SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
519 // Can't use Intrinsic::getDeclaration here as the return types might
520 // then only be structurally equal.
521 FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
522 NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
523 "llvm." + Name + ".p0i8", F->getParent());
526 Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
527 if (vstRegex.match(Name)) {
528 static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
529 Intrinsic::arm_neon_vst2,
530 Intrinsic::arm_neon_vst3,
531 Intrinsic::arm_neon_vst4};
533 static const Intrinsic::ID StoreLaneInts[] = {
534 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
535 Intrinsic::arm_neon_vst4lane
538 auto fArgs = F->getFunctionType()->params();
539 Type *Tys[] = {fArgs[0], fArgs[1]};
540 if (Name.find("lane") == StringRef::npos)
541 NewFn = Intrinsic::getDeclaration(F->getParent(),
542 StoreInts[fArgs.size() - 3], Tys);
544 NewFn = Intrinsic::getDeclaration(F->getParent(),
545 StoreLaneInts[fArgs.size() - 5], Tys);
548 if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
549 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
556 if (Name.startswith("ctlz.") && F->arg_size() == 1) {
558 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
559 F->arg_begin()->getType());
562 if (Name.startswith("cttz.") && F->arg_size() == 1) {
564 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
565 F->arg_begin()->getType());
571 if (Name == "dbg.value" && F->arg_size() == 4) {
573 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
580 bool IsLifetimeStart = Name.startswith("lifetime.start");
581 if (IsLifetimeStart || Name.startswith("invariant.start")) {
582 Intrinsic::ID ID = IsLifetimeStart ?
583 Intrinsic::lifetime_start : Intrinsic::invariant_start;
584 auto Args = F->getFunctionType()->params();
585 Type* ObjectPtr[1] = {Args[1]};
586 if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
588 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
593 bool IsLifetimeEnd = Name.startswith("lifetime.end");
594 if (IsLifetimeEnd || Name.startswith("invariant.end")) {
595 Intrinsic::ID ID = IsLifetimeEnd ?
596 Intrinsic::lifetime_end : Intrinsic::invariant_end;
598 auto Args = F->getFunctionType()->params();
599 Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
600 if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
602 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
606 if (Name.startswith("invariant.group.barrier")) {
607 // Rename invariant.group.barrier to launder.invariant.group
608 auto Args = F->getFunctionType()->params();
609 Type* ObjectPtr[1] = {Args[0]};
611 NewFn = Intrinsic::getDeclaration(F->getParent(),
612 Intrinsic::launder_invariant_group, ObjectPtr);
620 if (Name.startswith("masked.load.")) {
621 Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
622 if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) {
624 NewFn = Intrinsic::getDeclaration(F->getParent(),
625 Intrinsic::masked_load,
630 if (Name.startswith("masked.store.")) {
631 auto Args = F->getFunctionType()->params();
632 Type *Tys[] = { Args[0], Args[1] };
633 if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) {
635 NewFn = Intrinsic::getDeclaration(F->getParent(),
636 Intrinsic::masked_store,
641 // Renaming gather/scatter intrinsics with no address space overloading
642 // to the new overload which includes an address space
643 if (Name.startswith("masked.gather.")) {
644 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
645 if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) {
647 NewFn = Intrinsic::getDeclaration(F->getParent(),
648 Intrinsic::masked_gather, Tys);
652 if (Name.startswith("masked.scatter.")) {
653 auto Args = F->getFunctionType()->params();
654 Type *Tys[] = {Args[0], Args[1]};
655 if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) {
657 NewFn = Intrinsic::getDeclaration(F->getParent(),
658 Intrinsic::masked_scatter, Tys);
662 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
663 // alignment parameter to embedding the alignment as an attribute of
665 if (Name.startswith("memcpy.") && F->arg_size() == 5) {
667 // Get the types of dest, src, and len
668 ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
669 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy,
673 if (Name.startswith("memmove.") && F->arg_size() == 5) {
675 // Get the types of dest, src, and len
676 ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
677 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove,
681 if (Name.startswith("memset.") && F->arg_size() == 5) {
683 // Get the types of dest, and len
684 const auto *FT = F->getFunctionType();
685 Type *ParamTypes[2] = {
686 FT->getParamType(0), // Dest
687 FT->getParamType(2) // len
689 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
696 if (Name.startswith("nvvm.")) {
697 Name = Name.substr(5);
699 // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
700 Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name)
701 .Cases("brev32", "brev64", Intrinsic::bitreverse)
702 .Case("clz.i", Intrinsic::ctlz)
703 .Case("popc.i", Intrinsic::ctpop)
704 .Default(Intrinsic::not_intrinsic);
705 if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
706 NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
707 {F->getReturnType()});
711 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
712 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
714 // TODO: We could add lohi.i2d.
715 bool Expand = StringSwitch<bool>(Name)
716 .Cases("abs.i", "abs.ll", true)
717 .Cases("clz.ll", "popc.ll", "h2f", true)
718 .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
719 .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
729 // We only need to change the name to match the mangling including the
731 if (Name.startswith("objectsize.")) {
732 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
733 if (F->arg_size() == 2 ||
734 F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
736 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
744 if (Name == "stackprotectorcheck") {
751 if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
754 // Remangle our intrinsic since we upgrade the mangling
755 auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
756 if (Result != None) {
757 NewFn = Result.getValue();
761 // This may not belong here. This function is effectively being overloaded
762 // to both detect an intrinsic which needs upgrading, and to provide the
763 // upgraded form of the intrinsic. We should perhaps have two separate
764 // functions for this.
768 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
770 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
771 assert(F != NewFn && "Intrinsic function upgraded to the same function");
773 // Upgrade intrinsic attributes. This does not change the function.
776 if (Intrinsic::ID id = F->getIntrinsicID())
777 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
781 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
782 // Nothing to do yet.
786 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
788 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
789 Value *Op, unsigned Shift) {
790 Type *ResultTy = Op->getType();
791 unsigned NumElts = ResultTy->getVectorNumElements() * 8;
793 // Bitcast from a 64-bit element type to a byte element type.
794 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
795 Op = Builder.CreateBitCast(Op, VecTy, "cast");
797 // We'll be shuffling in zeroes.
798 Value *Res = Constant::getNullValue(VecTy);
800 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
801 // we'll just return the zero vector.
804 // 256/512-bit version is split into 2/4 16-byte lanes.
805 for (unsigned l = 0; l != NumElts; l += 16)
806 for (unsigned i = 0; i != 16; ++i) {
807 unsigned Idx = NumElts + i - Shift;
809 Idx -= NumElts - 16; // end of lane, switch operand.
810 Idxs[l + i] = Idx + l;
813 Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
816 // Bitcast back to a 64-bit element type.
817 return Builder.CreateBitCast(Res, ResultTy, "cast");
820 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
822 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
824 Type *ResultTy = Op->getType();
825 unsigned NumElts = ResultTy->getVectorNumElements() * 8;
827 // Bitcast from a 64-bit element type to a byte element type.
828 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
829 Op = Builder.CreateBitCast(Op, VecTy, "cast");
831 // We'll be shuffling in zeroes.
832 Value *Res = Constant::getNullValue(VecTy);
834 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
835 // we'll just return the zero vector.
838 // 256/512-bit version is split into 2/4 16-byte lanes.
839 for (unsigned l = 0; l != NumElts; l += 16)
840 for (unsigned i = 0; i != 16; ++i) {
841 unsigned Idx = i + Shift;
843 Idx += NumElts - 16; // end of lane, switch operand.
844 Idxs[l + i] = Idx + l;
847 Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
850 // Bitcast back to a 64-bit element type.
851 return Builder.CreateBitCast(Res, ResultTy, "cast");
854 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
856 llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(),
857 cast<IntegerType>(Mask->getType())->getBitWidth());
858 Mask = Builder.CreateBitCast(Mask, MaskTy);
860 // If we have less than 8 elements, then the starting mask was an i8 and
861 // we need to extract down to the right number of elements.
864 for (unsigned i = 0; i != NumElts; ++i)
866 Mask = Builder.CreateShuffleVector(Mask, Mask,
867 makeArrayRef(Indices, NumElts),
874 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
875 Value *Op0, Value *Op1) {
876 // If the mask is all ones just emit the first operation.
877 if (const auto *C = dyn_cast<Constant>(Mask))
878 if (C->isAllOnesValue())
881 Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements());
882 return Builder.CreateSelect(Mask, Op0, Op1);
885 static Value *EmitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask,
886 Value *Op0, Value *Op1) {
887 // If the mask is all ones just emit the first operation.
888 if (const auto *C = dyn_cast<Constant>(Mask))
889 if (C->isAllOnesValue())
892 llvm::VectorType *MaskTy =
893 llvm::VectorType::get(Builder.getInt1Ty(),
894 Mask->getType()->getIntegerBitWidth());
895 Mask = Builder.CreateBitCast(Mask, MaskTy);
896 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
897 return Builder.CreateSelect(Mask, Op0, Op1);
900 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
901 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
902 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
903 static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
904 Value *Op1, Value *Shift,
905 Value *Passthru, Value *Mask,
907 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
909 unsigned NumElts = Op0->getType()->getVectorNumElements();
910 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
911 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
912 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
914 // Mask the immediate for VALIGN.
916 ShiftVal &= (NumElts - 1);
918 // If palignr is shifting the pair of vectors more than the size of two
921 return llvm::Constant::getNullValue(Op0->getType());
923 // If palignr is shifting the pair of input vectors more than one lane,
924 // but less than two lanes, convert to shifting in zeroes.
928 Op0 = llvm::Constant::getNullValue(Op0->getType());
931 uint32_t Indices[64];
932 // 256-bit palignr operates on 128-bit lanes so we need to handle that
933 for (unsigned l = 0; l < NumElts; l += 16) {
934 for (unsigned i = 0; i != 16; ++i) {
935 unsigned Idx = ShiftVal + i;
936 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
937 Idx += NumElts - 16; // End of lane, switch operand.
938 Indices[l + i] = Idx + l;
942 Value *Align = Builder.CreateShuffleVector(Op1, Op0,
943 makeArrayRef(Indices, NumElts),
946 return EmitX86Select(Builder, Mask, Align, Passthru);
949 static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallInst &CI,
950 bool ZeroMask, bool IndexForm) {
951 Type *Ty = CI.getType();
952 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
953 unsigned EltWidth = Ty->getScalarSizeInBits();
954 bool IsFloat = Ty->isFPOrFPVectorTy();
956 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
957 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
958 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
959 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
960 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
961 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
962 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
963 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
964 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
965 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
966 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
967 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
968 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
969 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
970 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
971 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
972 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
973 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
974 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
975 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
976 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
977 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
978 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
979 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
980 else if (VecWidth == 128 && EltWidth == 16)
981 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
982 else if (VecWidth == 256 && EltWidth == 16)
983 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
984 else if (VecWidth == 512 && EltWidth == 16)
985 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
986 else if (VecWidth == 128 && EltWidth == 8)
987 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
988 else if (VecWidth == 256 && EltWidth == 8)
989 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
990 else if (VecWidth == 512 && EltWidth == 8)
991 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
993 llvm_unreachable("Unexpected intrinsic");
995 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
996 CI.getArgOperand(2) };
998 // If this isn't index form we need to swap operand 0 and 1.
1000 std::swap(Args[0], Args[1]);
1002 Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1004 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1005 : Builder.CreateBitCast(CI.getArgOperand(1),
1007 return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1010 static Value *UpgradeX86AddSubSatIntrinsics(IRBuilder<> &Builder, CallInst &CI,
1011 bool IsSigned, bool IsAddition) {
1012 Type *Ty = CI.getType();
1013 Value *Op0 = CI.getOperand(0);
1014 Value *Op1 = CI.getOperand(1);
1017 IsSigned ? (IsAddition ? Intrinsic::sadd_sat : Intrinsic::ssub_sat)
1018 : (IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat);
1019 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1020 Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
1022 if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
1023 Value *VecSrc = CI.getOperand(2);
1024 Value *Mask = CI.getOperand(3);
1025 Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1030 static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallInst &CI,
1031 bool IsRotateRight) {
1032 Type *Ty = CI.getType();
1033 Value *Src = CI.getArgOperand(0);
1034 Value *Amt = CI.getArgOperand(1);
1036 // Amount may be scalar immediate, in which case create a splat vector.
1037 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1038 // we only care about the lowest log2 bits anyway.
1039 if (Amt->getType() != Ty) {
1040 unsigned NumElts = Ty->getVectorNumElements();
1041 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1042 Amt = Builder.CreateVectorSplat(NumElts, Amt);
1045 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1046 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1047 Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
1049 if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
1050 Value *VecSrc = CI.getOperand(2);
1051 Value *Mask = CI.getOperand(3);
1052 Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1057 static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallInst &CI,
1058 bool IsShiftRight, bool ZeroMask) {
1059 Type *Ty = CI.getType();
1060 Value *Op0 = CI.getArgOperand(0);
1061 Value *Op1 = CI.getArgOperand(1);
1062 Value *Amt = CI.getArgOperand(2);
1065 std::swap(Op0, Op1);
1067 // Amount may be scalar immediate, in which case create a splat vector.
1068 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1069 // we only care about the lowest log2 bits anyway.
1070 if (Amt->getType() != Ty) {
1071 unsigned NumElts = Ty->getVectorNumElements();
1072 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1073 Amt = Builder.CreateVectorSplat(NumElts, Amt);
1076 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1077 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1078 Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
1080 unsigned NumArgs = CI.getNumArgOperands();
1081 if (NumArgs >= 4) { // For masked intrinsics.
1082 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1083 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
1084 CI.getArgOperand(0);
1085 Value *Mask = CI.getOperand(NumArgs - 1);
1086 Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1091 static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
1092 Value *Ptr, Value *Data, Value *Mask,
1094 // Cast the pointer to the right type.
1095 Ptr = Builder.CreateBitCast(Ptr,
1096 llvm::PointerType::getUnqual(Data->getType()));
1098 Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1;
1100 // If the mask is all ones just emit a regular store.
1101 if (const auto *C = dyn_cast<Constant>(Mask))
1102 if (C->isAllOnesValue())
1103 return Builder.CreateAlignedStore(Data, Ptr, Align);
1105 // Convert the mask from an integer type to a vector of i1.
1106 unsigned NumElts = Data->getType()->getVectorNumElements();
1107 Mask = getX86MaskVec(Builder, Mask, NumElts);
1108 return Builder.CreateMaskedStore(Data, Ptr, Align, Mask);
1111 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
1112 Value *Ptr, Value *Passthru, Value *Mask,
1114 // Cast the pointer to the right type.
1115 Ptr = Builder.CreateBitCast(Ptr,
1116 llvm::PointerType::getUnqual(Passthru->getType()));
1118 Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1;
1120 // If the mask is all ones just emit a regular store.
1121 if (const auto *C = dyn_cast<Constant>(Mask))
1122 if (C->isAllOnesValue())
1123 return Builder.CreateAlignedLoad(Ptr, Align);
1125 // Convert the mask from an integer type to a vector of i1.
1126 unsigned NumElts = Passthru->getType()->getVectorNumElements();
1127 Mask = getX86MaskVec(Builder, Mask, NumElts);
1128 return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru);
1131 static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) {
1132 Value *Op0 = CI.getArgOperand(0);
1133 llvm::Type *Ty = Op0->getType();
1134 Value *Zero = llvm::Constant::getNullValue(Ty);
1135 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Op0, Zero);
1136 Value *Neg = Builder.CreateNeg(Op0);
1137 Value *Res = Builder.CreateSelect(Cmp, Op0, Neg);
1139 if (CI.getNumArgOperands() == 3)
1140 Res = EmitX86Select(Builder,CI.getArgOperand(2), Res, CI.getArgOperand(1));
1145 static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
1146 ICmpInst::Predicate Pred) {
1147 Value *Op0 = CI.getArgOperand(0);
1148 Value *Op1 = CI.getArgOperand(1);
1149 Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1);
1150 Value *Res = Builder.CreateSelect(Cmp, Op0, Op1);
1152 if (CI.getNumArgOperands() == 4)
1153 Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1158 static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) {
1159 Type *Ty = CI.getType();
1161 // Arguments have a vXi32 type so cast to vXi64.
1162 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1163 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1166 // Shift left then arithmetic shift right.
1167 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1168 LHS = Builder.CreateShl(LHS, ShiftAmt);
1169 LHS = Builder.CreateAShr(LHS, ShiftAmt);
1170 RHS = Builder.CreateShl(RHS, ShiftAmt);
1171 RHS = Builder.CreateAShr(RHS, ShiftAmt);
1173 // Clear the upper bits.
1174 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1175 LHS = Builder.CreateAnd(LHS, Mask);
1176 RHS = Builder.CreateAnd(RHS, Mask);
1179 Value *Res = Builder.CreateMul(LHS, RHS);
1181 if (CI.getNumArgOperands() == 4)
1182 Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1187 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1188 static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
1190 unsigned NumElts = Vec->getType()->getVectorNumElements();
1192 const auto *C = dyn_cast<Constant>(Mask);
1193 if (!C || !C->isAllOnesValue())
1194 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1198 uint32_t Indices[8];
1199 for (unsigned i = 0; i != NumElts; ++i)
1201 for (unsigned i = NumElts; i != 8; ++i)
1202 Indices[i] = NumElts + i % NumElts;
1203 Vec = Builder.CreateShuffleVector(Vec,
1204 Constant::getNullValue(Vec->getType()),
1207 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1210 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
1211 unsigned CC, bool Signed) {
1212 Value *Op0 = CI.getArgOperand(0);
1213 unsigned NumElts = Op0->getType()->getVectorNumElements();
1217 Cmp = Constant::getNullValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
1218 } else if (CC == 7) {
1219 Cmp = Constant::getAllOnesValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
1221 ICmpInst::Predicate Pred;
1223 default: llvm_unreachable("Unknown condition code");
1224 case 0: Pred = ICmpInst::ICMP_EQ; break;
1225 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
1226 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
1227 case 4: Pred = ICmpInst::ICMP_NE; break;
1228 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
1229 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
1231 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
1234 Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1);
1236 return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
1239 // Replace a masked intrinsic with an older unmasked intrinsic.
1240 static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI,
1241 Intrinsic::ID IID) {
1242 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
1243 Value *Rep = Builder.CreateCall(Intrin,
1244 { CI.getArgOperand(0), CI.getArgOperand(1) });
1245 return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
1248 static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
1249 Value* A = CI.getArgOperand(0);
1250 Value* B = CI.getArgOperand(1);
1251 Value* Src = CI.getArgOperand(2);
1252 Value* Mask = CI.getArgOperand(3);
1254 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
1255 Value* Cmp = Builder.CreateIsNotNull(AndNode);
1256 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
1257 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
1258 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
1259 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
1263 static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) {
1264 Value* Op = CI.getArgOperand(0);
1265 Type* ReturnOp = CI.getType();
1266 unsigned NumElts = CI.getType()->getVectorNumElements();
1267 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
1268 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
1271 // Replace intrinsic with unmasked version and a select.
1272 static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
1273 CallInst &CI, Value *&Rep) {
1274 Name = Name.substr(12); // Remove avx512.mask.
1276 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
1277 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
1279 if (Name.startswith("max.p")) {
1280 if (VecWidth == 128 && EltWidth == 32)
1281 IID = Intrinsic::x86_sse_max_ps;
1282 else if (VecWidth == 128 && EltWidth == 64)
1283 IID = Intrinsic::x86_sse2_max_pd;
1284 else if (VecWidth == 256 && EltWidth == 32)
1285 IID = Intrinsic::x86_avx_max_ps_256;
1286 else if (VecWidth == 256 && EltWidth == 64)
1287 IID = Intrinsic::x86_avx_max_pd_256;
1289 llvm_unreachable("Unexpected intrinsic");
1290 } else if (Name.startswith("min.p")) {
1291 if (VecWidth == 128 && EltWidth == 32)
1292 IID = Intrinsic::x86_sse_min_ps;
1293 else if (VecWidth == 128 && EltWidth == 64)
1294 IID = Intrinsic::x86_sse2_min_pd;
1295 else if (VecWidth == 256 && EltWidth == 32)
1296 IID = Intrinsic::x86_avx_min_ps_256;
1297 else if (VecWidth == 256 && EltWidth == 64)
1298 IID = Intrinsic::x86_avx_min_pd_256;
1300 llvm_unreachable("Unexpected intrinsic");
1301 } else if (Name.startswith("pshuf.b.")) {
1302 if (VecWidth == 128)
1303 IID = Intrinsic::x86_ssse3_pshuf_b_128;
1304 else if (VecWidth == 256)
1305 IID = Intrinsic::x86_avx2_pshuf_b;
1306 else if (VecWidth == 512)
1307 IID = Intrinsic::x86_avx512_pshuf_b_512;
1309 llvm_unreachable("Unexpected intrinsic");
1310 } else if (Name.startswith("pmul.hr.sw.")) {
1311 if (VecWidth == 128)
1312 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
1313 else if (VecWidth == 256)
1314 IID = Intrinsic::x86_avx2_pmul_hr_sw;
1315 else if (VecWidth == 512)
1316 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
1318 llvm_unreachable("Unexpected intrinsic");
1319 } else if (Name.startswith("pmulh.w.")) {
1320 if (VecWidth == 128)
1321 IID = Intrinsic::x86_sse2_pmulh_w;
1322 else if (VecWidth == 256)
1323 IID = Intrinsic::x86_avx2_pmulh_w;
1324 else if (VecWidth == 512)
1325 IID = Intrinsic::x86_avx512_pmulh_w_512;
1327 llvm_unreachable("Unexpected intrinsic");
1328 } else if (Name.startswith("pmulhu.w.")) {
1329 if (VecWidth == 128)
1330 IID = Intrinsic::x86_sse2_pmulhu_w;
1331 else if (VecWidth == 256)
1332 IID = Intrinsic::x86_avx2_pmulhu_w;
1333 else if (VecWidth == 512)
1334 IID = Intrinsic::x86_avx512_pmulhu_w_512;
1336 llvm_unreachable("Unexpected intrinsic");
1337 } else if (Name.startswith("pmaddw.d.")) {
1338 if (VecWidth == 128)
1339 IID = Intrinsic::x86_sse2_pmadd_wd;
1340 else if (VecWidth == 256)
1341 IID = Intrinsic::x86_avx2_pmadd_wd;
1342 else if (VecWidth == 512)
1343 IID = Intrinsic::x86_avx512_pmaddw_d_512;
1345 llvm_unreachable("Unexpected intrinsic");
1346 } else if (Name.startswith("pmaddubs.w.")) {
1347 if (VecWidth == 128)
1348 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
1349 else if (VecWidth == 256)
1350 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
1351 else if (VecWidth == 512)
1352 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
1354 llvm_unreachable("Unexpected intrinsic");
1355 } else if (Name.startswith("packsswb.")) {
1356 if (VecWidth == 128)
1357 IID = Intrinsic::x86_sse2_packsswb_128;
1358 else if (VecWidth == 256)
1359 IID = Intrinsic::x86_avx2_packsswb;
1360 else if (VecWidth == 512)
1361 IID = Intrinsic::x86_avx512_packsswb_512;
1363 llvm_unreachable("Unexpected intrinsic");
1364 } else if (Name.startswith("packssdw.")) {
1365 if (VecWidth == 128)
1366 IID = Intrinsic::x86_sse2_packssdw_128;
1367 else if (VecWidth == 256)
1368 IID = Intrinsic::x86_avx2_packssdw;
1369 else if (VecWidth == 512)
1370 IID = Intrinsic::x86_avx512_packssdw_512;
1372 llvm_unreachable("Unexpected intrinsic");
1373 } else if (Name.startswith("packuswb.")) {
1374 if (VecWidth == 128)
1375 IID = Intrinsic::x86_sse2_packuswb_128;
1376 else if (VecWidth == 256)
1377 IID = Intrinsic::x86_avx2_packuswb;
1378 else if (VecWidth == 512)
1379 IID = Intrinsic::x86_avx512_packuswb_512;
1381 llvm_unreachable("Unexpected intrinsic");
1382 } else if (Name.startswith("packusdw.")) {
1383 if (VecWidth == 128)
1384 IID = Intrinsic::x86_sse41_packusdw;
1385 else if (VecWidth == 256)
1386 IID = Intrinsic::x86_avx2_packusdw;
1387 else if (VecWidth == 512)
1388 IID = Intrinsic::x86_avx512_packusdw_512;
1390 llvm_unreachable("Unexpected intrinsic");
1391 } else if (Name.startswith("vpermilvar.")) {
1392 if (VecWidth == 128 && EltWidth == 32)
1393 IID = Intrinsic::x86_avx_vpermilvar_ps;
1394 else if (VecWidth == 128 && EltWidth == 64)
1395 IID = Intrinsic::x86_avx_vpermilvar_pd;
1396 else if (VecWidth == 256 && EltWidth == 32)
1397 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
1398 else if (VecWidth == 256 && EltWidth == 64)
1399 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
1400 else if (VecWidth == 512 && EltWidth == 32)
1401 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
1402 else if (VecWidth == 512 && EltWidth == 64)
1403 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
1405 llvm_unreachable("Unexpected intrinsic");
1406 } else if (Name == "cvtpd2dq.256") {
1407 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
1408 } else if (Name == "cvtpd2ps.256") {
1409 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
1410 } else if (Name == "cvttpd2dq.256") {
1411 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
1412 } else if (Name == "cvttps2dq.128") {
1413 IID = Intrinsic::x86_sse2_cvttps2dq;
1414 } else if (Name == "cvttps2dq.256") {
1415 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
1416 } else if (Name.startswith("permvar.")) {
1417 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
1418 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1419 IID = Intrinsic::x86_avx2_permps;
1420 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1421 IID = Intrinsic::x86_avx2_permd;
1422 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1423 IID = Intrinsic::x86_avx512_permvar_df_256;
1424 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1425 IID = Intrinsic::x86_avx512_permvar_di_256;
1426 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1427 IID = Intrinsic::x86_avx512_permvar_sf_512;
1428 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1429 IID = Intrinsic::x86_avx512_permvar_si_512;
1430 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1431 IID = Intrinsic::x86_avx512_permvar_df_512;
1432 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1433 IID = Intrinsic::x86_avx512_permvar_di_512;
1434 else if (VecWidth == 128 && EltWidth == 16)
1435 IID = Intrinsic::x86_avx512_permvar_hi_128;
1436 else if (VecWidth == 256 && EltWidth == 16)
1437 IID = Intrinsic::x86_avx512_permvar_hi_256;
1438 else if (VecWidth == 512 && EltWidth == 16)
1439 IID = Intrinsic::x86_avx512_permvar_hi_512;
1440 else if (VecWidth == 128 && EltWidth == 8)
1441 IID = Intrinsic::x86_avx512_permvar_qi_128;
1442 else if (VecWidth == 256 && EltWidth == 8)
1443 IID = Intrinsic::x86_avx512_permvar_qi_256;
1444 else if (VecWidth == 512 && EltWidth == 8)
1445 IID = Intrinsic::x86_avx512_permvar_qi_512;
1447 llvm_unreachable("Unexpected intrinsic");
1448 } else if (Name.startswith("dbpsadbw.")) {
1449 if (VecWidth == 128)
1450 IID = Intrinsic::x86_avx512_dbpsadbw_128;
1451 else if (VecWidth == 256)
1452 IID = Intrinsic::x86_avx512_dbpsadbw_256;
1453 else if (VecWidth == 512)
1454 IID = Intrinsic::x86_avx512_dbpsadbw_512;
1456 llvm_unreachable("Unexpected intrinsic");
1457 } else if (Name.startswith("pmultishift.qb.")) {
1458 if (VecWidth == 128)
1459 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
1460 else if (VecWidth == 256)
1461 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
1462 else if (VecWidth == 512)
1463 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
1465 llvm_unreachable("Unexpected intrinsic");
1469 SmallVector<Value *, 4> Args(CI.arg_operands().begin(),
1470 CI.arg_operands().end());
1473 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1475 unsigned NumArgs = CI.getNumArgOperands();
1476 Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
1477 CI.getArgOperand(NumArgs - 2));
1481 /// Upgrade comment in call to inline asm that represents an objc retain release
1483 void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
1485 if (AsmStr->find("mov\tfp") == 0 &&
1486 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
1487 (Pos = AsmStr->find("# marker")) != std::string::npos) {
1488 AsmStr->replace(Pos, 1, ";");
1493 /// Upgrade a call to an old intrinsic. All argument and return casting must be
1494 /// provided to seamlessly integrate with existing context.
1495 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
1496 Function *F = CI->getCalledFunction();
1497 LLVMContext &C = CI->getContext();
1498 IRBuilder<> Builder(C);
1499 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
1501 assert(F && "Intrinsic call is not direct?");
1504 // Get the Function's name.
1505 StringRef Name = F->getName();
1507 assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
1508 Name = Name.substr(5);
1510 bool IsX86 = Name.startswith("x86.");
1512 Name = Name.substr(4);
1513 bool IsNVVM = Name.startswith("nvvm.");
1515 Name = Name.substr(5);
1517 if (IsX86 && Name.startswith("sse4a.movnt.")) {
1518 Module *M = F->getParent();
1519 SmallVector<Metadata *, 1> Elts;
1521 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1522 MDNode *Node = MDNode::get(C, Elts);
1524 Value *Arg0 = CI->getArgOperand(0);
1525 Value *Arg1 = CI->getArgOperand(1);
1527 // Nontemporal (unaligned) store of the 0'th element of the float/double
1529 Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
1530 PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
1531 Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
1533 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
1535 StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, 1);
1536 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1538 // Remove intrinsic.
1539 CI->eraseFromParent();
1543 if (IsX86 && (Name.startswith("avx.movnt.") ||
1544 Name.startswith("avx512.storent."))) {
1545 Module *M = F->getParent();
1546 SmallVector<Metadata *, 1> Elts;
1548 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1549 MDNode *Node = MDNode::get(C, Elts);
1551 Value *Arg0 = CI->getArgOperand(0);
1552 Value *Arg1 = CI->getArgOperand(1);
1554 // Convert the type of the pointer to a pointer to the stored type.
1555 Value *BC = Builder.CreateBitCast(Arg0,
1556 PointerType::getUnqual(Arg1->getType()),
1558 VectorType *VTy = cast<VectorType>(Arg1->getType());
1559 StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC,
1560 VTy->getBitWidth() / 8);
1561 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1563 // Remove intrinsic.
1564 CI->eraseFromParent();
1568 if (IsX86 && Name == "sse2.storel.dq") {
1569 Value *Arg0 = CI->getArgOperand(0);
1570 Value *Arg1 = CI->getArgOperand(1);
1572 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
1573 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
1574 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
1575 Value *BC = Builder.CreateBitCast(Arg0,
1576 PointerType::getUnqual(Elt->getType()),
1578 Builder.CreateAlignedStore(Elt, BC, 1);
1580 // Remove intrinsic.
1581 CI->eraseFromParent();
1585 if (IsX86 && (Name.startswith("sse.storeu.") ||
1586 Name.startswith("sse2.storeu.") ||
1587 Name.startswith("avx.storeu."))) {
1588 Value *Arg0 = CI->getArgOperand(0);
1589 Value *Arg1 = CI->getArgOperand(1);
1591 Arg0 = Builder.CreateBitCast(Arg0,
1592 PointerType::getUnqual(Arg1->getType()),
1594 Builder.CreateAlignedStore(Arg1, Arg0, 1);
1596 // Remove intrinsic.
1597 CI->eraseFromParent();
1601 if (IsX86 && Name == "avx512.mask.store.ss") {
1602 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
1603 UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
1606 // Remove intrinsic.
1607 CI->eraseFromParent();
1611 if (IsX86 && (Name.startswith("avx512.mask.store"))) {
1612 // "avx512.mask.storeu." or "avx512.mask.store."
1613 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
1614 UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
1615 CI->getArgOperand(2), Aligned);
1617 // Remove intrinsic.
1618 CI->eraseFromParent();
1623 // Upgrade packed integer vector compare intrinsics to compare instructions.
1624 if (IsX86 && (Name.startswith("sse2.pcmp") ||
1625 Name.startswith("avx2.pcmp"))) {
1626 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
1627 bool CmpEq = Name[9] == 'e';
1628 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
1629 CI->getArgOperand(0), CI->getArgOperand(1));
1630 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
1631 } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) {
1632 Type *ExtTy = Type::getInt32Ty(C);
1633 if (CI->getOperand(0)->getType()->isIntegerTy(8))
1634 ExtTy = Type::getInt64Ty(C);
1635 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
1636 ExtTy->getPrimitiveSizeInBits();
1637 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
1638 Rep = Builder.CreateVectorSplat(NumElts, Rep);
1639 } else if (IsX86 && (Name == "sse.sqrt.ss" ||
1640 Name == "sse2.sqrt.sd")) {
1641 Value *Vec = CI->getArgOperand(0);
1642 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
1643 Function *Intr = Intrinsic::getDeclaration(F->getParent(),
1644 Intrinsic::sqrt, Elt0->getType());
1645 Elt0 = Builder.CreateCall(Intr, Elt0);
1646 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
1647 } else if (IsX86 && (Name.startswith("avx.sqrt.p") ||
1648 Name.startswith("sse2.sqrt.p") ||
1649 Name.startswith("sse.sqrt.p"))) {
1650 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1653 {CI->getArgOperand(0)});
1654 } else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) {
1655 if (CI->getNumArgOperands() == 4 &&
1656 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
1657 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
1658 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
1659 : Intrinsic::x86_avx512_sqrt_pd_512;
1661 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) };
1662 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
1665 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1668 {CI->getArgOperand(0)});
1670 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1671 CI->getArgOperand(1));
1672 } else if (IsX86 && (Name.startswith("avx512.ptestm") ||
1673 Name.startswith("avx512.ptestnm"))) {
1674 Value *Op0 = CI->getArgOperand(0);
1675 Value *Op1 = CI->getArgOperand(1);
1676 Value *Mask = CI->getArgOperand(2);
1677 Rep = Builder.CreateAnd(Op0, Op1);
1678 llvm::Type *Ty = Op0->getType();
1679 Value *Zero = llvm::Constant::getNullValue(Ty);
1680 ICmpInst::Predicate Pred =
1681 Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1682 Rep = Builder.CreateICmp(Pred, Rep, Zero);
1683 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
1684 } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
1686 CI->getArgOperand(1)->getType()->getVectorNumElements();
1687 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
1688 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1689 CI->getArgOperand(1));
1690 } else if (IsX86 && (Name.startswith("avx512.kunpck"))) {
1691 unsigned NumElts = CI->getType()->getScalarSizeInBits();
1692 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
1693 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
1694 uint32_t Indices[64];
1695 for (unsigned i = 0; i != NumElts; ++i)
1698 // First extract half of each vector. This gives better codegen than
1699 // doing it in a single shuffle.
1700 LHS = Builder.CreateShuffleVector(LHS, LHS,
1701 makeArrayRef(Indices, NumElts / 2));
1702 RHS = Builder.CreateShuffleVector(RHS, RHS,
1703 makeArrayRef(Indices, NumElts / 2));
1704 // Concat the vectors.
1705 // NOTE: Operands have to be swapped to match intrinsic definition.
1706 Rep = Builder.CreateShuffleVector(RHS, LHS,
1707 makeArrayRef(Indices, NumElts));
1708 Rep = Builder.CreateBitCast(Rep, CI->getType());
1709 } else if (IsX86 && Name == "avx512.kand.w") {
1710 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1711 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1712 Rep = Builder.CreateAnd(LHS, RHS);
1713 Rep = Builder.CreateBitCast(Rep, CI->getType());
1714 } else if (IsX86 && Name == "avx512.kandn.w") {
1715 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1716 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1717 LHS = Builder.CreateNot(LHS);
1718 Rep = Builder.CreateAnd(LHS, RHS);
1719 Rep = Builder.CreateBitCast(Rep, CI->getType());
1720 } else if (IsX86 && Name == "avx512.kor.w") {
1721 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1722 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1723 Rep = Builder.CreateOr(LHS, RHS);
1724 Rep = Builder.CreateBitCast(Rep, CI->getType());
1725 } else if (IsX86 && Name == "avx512.kxor.w") {
1726 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1727 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1728 Rep = Builder.CreateXor(LHS, RHS);
1729 Rep = Builder.CreateBitCast(Rep, CI->getType());
1730 } else if (IsX86 && Name == "avx512.kxnor.w") {
1731 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1732 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1733 LHS = Builder.CreateNot(LHS);
1734 Rep = Builder.CreateXor(LHS, RHS);
1735 Rep = Builder.CreateBitCast(Rep, CI->getType());
1736 } else if (IsX86 && Name == "avx512.knot.w") {
1737 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1738 Rep = Builder.CreateNot(Rep);
1739 Rep = Builder.CreateBitCast(Rep, CI->getType());
1741 (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
1742 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1743 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1744 Rep = Builder.CreateOr(LHS, RHS);
1745 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
1747 if (Name[14] == 'c')
1748 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
1750 C = ConstantInt::getNullValue(Builder.getInt16Ty());
1751 Rep = Builder.CreateICmpEQ(Rep, C);
1752 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
1753 } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
1754 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
1755 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
1756 Name == "sse.div.ss" || Name == "sse2.div.sd")) {
1757 Type *I32Ty = Type::getInt32Ty(C);
1758 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
1759 ConstantInt::get(I32Ty, 0));
1760 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
1761 ConstantInt::get(I32Ty, 0));
1763 if (Name.contains(".add."))
1764 EltOp = Builder.CreateFAdd(Elt0, Elt1);
1765 else if (Name.contains(".sub."))
1766 EltOp = Builder.CreateFSub(Elt0, Elt1);
1767 else if (Name.contains(".mul."))
1768 EltOp = Builder.CreateFMul(Elt0, Elt1);
1770 EltOp = Builder.CreateFDiv(Elt0, Elt1);
1771 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
1772 ConstantInt::get(I32Ty, 0));
1773 } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
1774 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
1775 bool CmpEq = Name[16] == 'e';
1776 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
1777 } else if (IsX86 && Name.startswith("avx512.mask.vpshufbitqmb.")) {
1778 Type *OpTy = CI->getArgOperand(0)->getType();
1779 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1782 default: llvm_unreachable("Unexpected intrinsic");
1783 case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break;
1784 case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break;
1785 case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break;
1788 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1789 { CI->getOperand(0), CI->getArgOperand(1) });
1790 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
1791 } else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) {
1792 Type *OpTy = CI->getArgOperand(0)->getType();
1793 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1794 unsigned EltWidth = OpTy->getScalarSizeInBits();
1796 if (VecWidth == 128 && EltWidth == 32)
1797 IID = Intrinsic::x86_avx512_fpclass_ps_128;
1798 else if (VecWidth == 256 && EltWidth == 32)
1799 IID = Intrinsic::x86_avx512_fpclass_ps_256;
1800 else if (VecWidth == 512 && EltWidth == 32)
1801 IID = Intrinsic::x86_avx512_fpclass_ps_512;
1802 else if (VecWidth == 128 && EltWidth == 64)
1803 IID = Intrinsic::x86_avx512_fpclass_pd_128;
1804 else if (VecWidth == 256 && EltWidth == 64)
1805 IID = Intrinsic::x86_avx512_fpclass_pd_256;
1806 else if (VecWidth == 512 && EltWidth == 64)
1807 IID = Intrinsic::x86_avx512_fpclass_pd_512;
1809 llvm_unreachable("Unexpected intrinsic");
1811 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1812 { CI->getOperand(0), CI->getArgOperand(1) });
1813 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
1814 } else if (IsX86 && Name.startswith("avx512.mask.cmp.p")) {
1815 Type *OpTy = CI->getArgOperand(0)->getType();
1816 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1817 unsigned EltWidth = OpTy->getScalarSizeInBits();
1819 if (VecWidth == 128 && EltWidth == 32)
1820 IID = Intrinsic::x86_avx512_cmp_ps_128;
1821 else if (VecWidth == 256 && EltWidth == 32)
1822 IID = Intrinsic::x86_avx512_cmp_ps_256;
1823 else if (VecWidth == 512 && EltWidth == 32)
1824 IID = Intrinsic::x86_avx512_cmp_ps_512;
1825 else if (VecWidth == 128 && EltWidth == 64)
1826 IID = Intrinsic::x86_avx512_cmp_pd_128;
1827 else if (VecWidth == 256 && EltWidth == 64)
1828 IID = Intrinsic::x86_avx512_cmp_pd_256;
1829 else if (VecWidth == 512 && EltWidth == 64)
1830 IID = Intrinsic::x86_avx512_cmp_pd_512;
1832 llvm_unreachable("Unexpected intrinsic");
1834 SmallVector<Value *, 4> Args;
1835 Args.push_back(CI->getArgOperand(0));
1836 Args.push_back(CI->getArgOperand(1));
1837 Args.push_back(CI->getArgOperand(2));
1838 if (CI->getNumArgOperands() == 5)
1839 Args.push_back(CI->getArgOperand(4));
1841 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1843 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(3));
1844 } else if (IsX86 && Name.startswith("avx512.mask.cmp.") &&
1846 // Integer compare intrinsics.
1847 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1848 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
1849 } else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) {
1850 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1851 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
1852 } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") ||
1853 Name.startswith("avx512.cvtw2mask.") ||
1854 Name.startswith("avx512.cvtd2mask.") ||
1855 Name.startswith("avx512.cvtq2mask."))) {
1856 Value *Op = CI->getArgOperand(0);
1857 Value *Zero = llvm::Constant::getNullValue(Op->getType());
1858 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
1859 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr);
1860 } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
1861 Name == "ssse3.pabs.w.128" ||
1862 Name == "ssse3.pabs.d.128" ||
1863 Name.startswith("avx2.pabs") ||
1864 Name.startswith("avx512.mask.pabs"))) {
1865 Rep = upgradeAbs(Builder, *CI);
1866 } else if (IsX86 && (Name == "sse41.pmaxsb" ||
1867 Name == "sse2.pmaxs.w" ||
1868 Name == "sse41.pmaxsd" ||
1869 Name.startswith("avx2.pmaxs") ||
1870 Name.startswith("avx512.mask.pmaxs"))) {
1871 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
1872 } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
1873 Name == "sse41.pmaxuw" ||
1874 Name == "sse41.pmaxud" ||
1875 Name.startswith("avx2.pmaxu") ||
1876 Name.startswith("avx512.mask.pmaxu"))) {
1877 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
1878 } else if (IsX86 && (Name == "sse41.pminsb" ||
1879 Name == "sse2.pmins.w" ||
1880 Name == "sse41.pminsd" ||
1881 Name.startswith("avx2.pmins") ||
1882 Name.startswith("avx512.mask.pmins"))) {
1883 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
1884 } else if (IsX86 && (Name == "sse2.pminu.b" ||
1885 Name == "sse41.pminuw" ||
1886 Name == "sse41.pminud" ||
1887 Name.startswith("avx2.pminu") ||
1888 Name.startswith("avx512.mask.pminu"))) {
1889 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
1890 } else if (IsX86 && (Name == "sse2.pmulu.dq" ||
1891 Name == "avx2.pmulu.dq" ||
1892 Name == "avx512.pmulu.dq.512" ||
1893 Name.startswith("avx512.mask.pmulu.dq."))) {
1894 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
1895 } else if (IsX86 && (Name == "sse41.pmuldq" ||
1896 Name == "avx2.pmul.dq" ||
1897 Name == "avx512.pmul.dq.512" ||
1898 Name.startswith("avx512.mask.pmul.dq."))) {
1899 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
1900 } else if (IsX86 && (Name == "sse.cvtsi2ss" ||
1901 Name == "sse2.cvtsi2sd" ||
1902 Name == "sse.cvtsi642ss" ||
1903 Name == "sse2.cvtsi642sd")) {
1904 Rep = Builder.CreateSIToFP(CI->getArgOperand(1),
1905 CI->getType()->getVectorElementType());
1906 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
1907 } else if (IsX86 && Name == "avx512.cvtusi2sd") {
1908 Rep = Builder.CreateUIToFP(CI->getArgOperand(1),
1909 CI->getType()->getVectorElementType());
1910 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
1911 } else if (IsX86 && Name == "sse2.cvtss2sd") {
1912 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
1913 Rep = Builder.CreateFPExt(Rep, CI->getType()->getVectorElementType());
1914 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
1915 } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
1916 Name == "sse2.cvtdq2ps" ||
1917 Name == "avx.cvtdq2.pd.256" ||
1918 Name == "avx.cvtdq2.ps.256" ||
1919 Name.startswith("avx512.mask.cvtdq2pd.") ||
1920 Name.startswith("avx512.mask.cvtudq2pd.") ||
1921 Name == "avx512.mask.cvtdq2ps.128" ||
1922 Name == "avx512.mask.cvtdq2ps.256" ||
1923 Name == "avx512.mask.cvtudq2ps.128" ||
1924 Name == "avx512.mask.cvtudq2ps.256" ||
1925 Name == "avx512.mask.cvtqq2pd.128" ||
1926 Name == "avx512.mask.cvtqq2pd.256" ||
1927 Name == "avx512.mask.cvtuqq2pd.128" ||
1928 Name == "avx512.mask.cvtuqq2pd.256" ||
1929 Name == "sse2.cvtps2pd" ||
1930 Name == "avx.cvt.ps2.pd.256" ||
1931 Name == "avx512.mask.cvtps2pd.128" ||
1932 Name == "avx512.mask.cvtps2pd.256")) {
1933 Type *DstTy = CI->getType();
1934 Rep = CI->getArgOperand(0);
1936 unsigned NumDstElts = DstTy->getVectorNumElements();
1937 if (NumDstElts < Rep->getType()->getVectorNumElements()) {
1938 assert(NumDstElts == 2 && "Unexpected vector size");
1939 uint32_t ShuffleMask[2] = { 0, 1 };
1940 Rep = Builder.CreateShuffleVector(Rep, Rep, ShuffleMask);
1943 bool IsPS2PD = (StringRef::npos != Name.find("ps2"));
1944 bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
1946 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
1947 else if (IsUnsigned)
1948 Rep = Builder.CreateUIToFP(Rep, DstTy, "cvt");
1950 Rep = Builder.CreateSIToFP(Rep, DstTy, "cvt");
1952 if (CI->getNumArgOperands() == 3)
1953 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1954 CI->getArgOperand(1));
1955 } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
1956 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
1957 CI->getArgOperand(1), CI->getArgOperand(2),
1959 } else if (IsX86 && (Name.startswith("avx512.mask.load."))) {
1960 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
1961 CI->getArgOperand(1),CI->getArgOperand(2),
1963 } else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) {
1964 Type *ResultTy = CI->getType();
1965 Type *PtrTy = ResultTy->getVectorElementType();
1967 // Cast the pointer to element type.
1968 Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
1969 llvm::PointerType::getUnqual(PtrTy));
1971 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
1972 ResultTy->getVectorNumElements());
1974 Function *ELd = Intrinsic::getDeclaration(F->getParent(),
1975 Intrinsic::masked_expandload,
1977 Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
1978 } else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) {
1979 Type *ResultTy = CI->getArgOperand(1)->getType();
1980 Type *PtrTy = ResultTy->getVectorElementType();
1982 // Cast the pointer to element type.
1983 Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
1984 llvm::PointerType::getUnqual(PtrTy));
1986 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
1987 ResultTy->getVectorNumElements());
1989 Function *CSt = Intrinsic::getDeclaration(F->getParent(),
1990 Intrinsic::masked_compressstore,
1992 Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
1993 } else if (IsX86 && Name.startswith("xop.vpcom")) {
1994 Intrinsic::ID intID;
1995 if (Name.endswith("ub"))
1996 intID = Intrinsic::x86_xop_vpcomub;
1997 else if (Name.endswith("uw"))
1998 intID = Intrinsic::x86_xop_vpcomuw;
1999 else if (Name.endswith("ud"))
2000 intID = Intrinsic::x86_xop_vpcomud;
2001 else if (Name.endswith("uq"))
2002 intID = Intrinsic::x86_xop_vpcomuq;
2003 else if (Name.endswith("b"))
2004 intID = Intrinsic::x86_xop_vpcomb;
2005 else if (Name.endswith("w"))
2006 intID = Intrinsic::x86_xop_vpcomw;
2007 else if (Name.endswith("d"))
2008 intID = Intrinsic::x86_xop_vpcomd;
2009 else if (Name.endswith("q"))
2010 intID = Intrinsic::x86_xop_vpcomq;
2012 llvm_unreachable("Unknown suffix");
2014 Name = Name.substr(9); // strip off "xop.vpcom"
2016 if (Name.startswith("lt"))
2018 else if (Name.startswith("le"))
2020 else if (Name.startswith("gt"))
2022 else if (Name.startswith("ge"))
2024 else if (Name.startswith("eq"))
2026 else if (Name.startswith("ne"))
2028 else if (Name.startswith("false"))
2030 else if (Name.startswith("true"))
2033 llvm_unreachable("Unknown condition");
2035 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
2037 Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
2038 Builder.getInt8(Imm)});
2039 } else if (IsX86 && Name.startswith("xop.vpcmov")) {
2040 Value *Sel = CI->getArgOperand(2);
2041 Value *NotSel = Builder.CreateNot(Sel);
2042 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2043 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2044 Rep = Builder.CreateOr(Sel0, Sel1);
2045 } else if (IsX86 && (Name.startswith("xop.vprot") ||
2046 Name.startswith("avx512.prol") ||
2047 Name.startswith("avx512.mask.prol"))) {
2048 Rep = upgradeX86Rotate(Builder, *CI, false);
2049 } else if (IsX86 && (Name.startswith("avx512.pror") ||
2050 Name.startswith("avx512.mask.pror"))) {
2051 Rep = upgradeX86Rotate(Builder, *CI, true);
2052 } else if (IsX86 && (Name.startswith("avx512.vpshld.") ||
2053 Name.startswith("avx512.mask.vpshld") ||
2054 Name.startswith("avx512.maskz.vpshld"))) {
2055 bool ZeroMask = Name[11] == 'z';
2056 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
2057 } else if (IsX86 && (Name.startswith("avx512.vpshrd.") ||
2058 Name.startswith("avx512.mask.vpshrd") ||
2059 Name.startswith("avx512.maskz.vpshrd"))) {
2060 bool ZeroMask = Name[11] == 'z';
2061 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
2062 } else if (IsX86 && Name == "sse42.crc32.64.8") {
2063 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
2064 Intrinsic::x86_sse42_crc32_32_8);
2065 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
2066 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
2067 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
2068 } else if (IsX86 && (Name.startswith("avx.vbroadcast.s") ||
2069 Name.startswith("avx512.vbroadcast.s"))) {
2070 // Replace broadcasts with a series of insertelements.
2071 Type *VecTy = CI->getType();
2072 Type *EltTy = VecTy->getVectorElementType();
2073 unsigned EltNum = VecTy->getVectorNumElements();
2074 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
2075 EltTy->getPointerTo());
2076 Value *Load = Builder.CreateLoad(EltTy, Cast);
2077 Type *I32Ty = Type::getInt32Ty(C);
2078 Rep = UndefValue::get(VecTy);
2079 for (unsigned I = 0; I < EltNum; ++I)
2080 Rep = Builder.CreateInsertElement(Rep, Load,
2081 ConstantInt::get(I32Ty, I));
2082 } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
2083 Name.startswith("sse41.pmovzx") ||
2084 Name.startswith("avx2.pmovsx") ||
2085 Name.startswith("avx2.pmovzx") ||
2086 Name.startswith("avx512.mask.pmovsx") ||
2087 Name.startswith("avx512.mask.pmovzx"))) {
2088 VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
2089 VectorType *DstTy = cast<VectorType>(CI->getType());
2090 unsigned NumDstElts = DstTy->getNumElements();
2092 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2093 SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
2094 for (unsigned i = 0; i != NumDstElts; ++i)
2097 Value *SV = Builder.CreateShuffleVector(
2098 CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
2100 bool DoSext = (StringRef::npos != Name.find("pmovsx"));
2101 Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
2102 : Builder.CreateZExt(SV, DstTy);
2103 // If there are 3 arguments, it's a masked intrinsic so we need a select.
2104 if (CI->getNumArgOperands() == 3)
2105 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2106 CI->getArgOperand(1));
2107 } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
2108 Name == "avx2.vbroadcasti128")) {
2109 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2110 Type *EltTy = CI->getType()->getVectorElementType();
2111 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2112 Type *VT = VectorType::get(EltTy, NumSrcElts);
2113 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
2114 PointerType::getUnqual(VT));
2115 Value *Load = Builder.CreateAlignedLoad(Op, 1);
2116 if (NumSrcElts == 2)
2117 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
2120 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
2121 { 0, 1, 2, 3, 0, 1, 2, 3 });
2122 } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") ||
2123 Name.startswith("avx512.mask.shuf.f"))) {
2124 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2125 Type *VT = CI->getType();
2126 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2127 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2128 unsigned ControlBitsMask = NumLanes - 1;
2129 unsigned NumControlBits = NumLanes / 2;
2130 SmallVector<uint32_t, 8> ShuffleMask(0);
2132 for (unsigned l = 0; l != NumLanes; ++l) {
2133 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
2134 // We actually need the other source.
2135 if (l >= NumLanes / 2)
2136 LaneMask += NumLanes;
2137 for (unsigned i = 0; i != NumElementsInLane; ++i)
2138 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
2140 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2141 CI->getArgOperand(1), ShuffleMask);
2142 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2143 CI->getArgOperand(3));
2144 }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") ||
2145 Name.startswith("avx512.mask.broadcasti"))) {
2146 unsigned NumSrcElts =
2147 CI->getArgOperand(0)->getType()->getVectorNumElements();
2148 unsigned NumDstElts = CI->getType()->getVectorNumElements();
2150 SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
2151 for (unsigned i = 0; i != NumDstElts; ++i)
2152 ShuffleMask[i] = i % NumSrcElts;
2154 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2155 CI->getArgOperand(0),
2157 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2158 CI->getArgOperand(1));
2159 } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
2160 Name.startswith("avx2.vbroadcast") ||
2161 Name.startswith("avx512.pbroadcast") ||
2162 Name.startswith("avx512.mask.broadcast.s"))) {
2163 // Replace vp?broadcasts with a vector shuffle.
2164 Value *Op = CI->getArgOperand(0);
2165 unsigned NumElts = CI->getType()->getVectorNumElements();
2166 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
2167 Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
2168 Constant::getNullValue(MaskTy));
2170 if (CI->getNumArgOperands() == 3)
2171 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2172 CI->getArgOperand(1));
2173 } else if (IsX86 && (Name.startswith("sse2.padds.") ||
2174 Name.startswith("sse2.psubs.") ||
2175 Name.startswith("avx2.padds.") ||
2176 Name.startswith("avx2.psubs.") ||
2177 Name.startswith("avx512.padds.") ||
2178 Name.startswith("avx512.psubs.") ||
2179 Name.startswith("avx512.mask.padds.") ||
2180 Name.startswith("avx512.mask.psubs."))) {
2181 bool IsAdd = Name.contains(".padds");
2182 Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, true, IsAdd);
2183 } else if (IsX86 && (Name.startswith("sse2.paddus.") ||
2184 Name.startswith("sse2.psubus.") ||
2185 Name.startswith("avx2.paddus.") ||
2186 Name.startswith("avx2.psubus.") ||
2187 Name.startswith("avx512.mask.paddus.") ||
2188 Name.startswith("avx512.mask.psubus."))) {
2189 bool IsAdd = Name.contains(".paddus");
2190 Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, false, IsAdd);
2191 } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
2192 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2193 CI->getArgOperand(1),
2194 CI->getArgOperand(2),
2195 CI->getArgOperand(3),
2196 CI->getArgOperand(4),
2198 } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
2199 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2200 CI->getArgOperand(1),
2201 CI->getArgOperand(2),
2202 CI->getArgOperand(3),
2203 CI->getArgOperand(4),
2205 } else if (IsX86 && (Name == "sse2.psll.dq" ||
2206 Name == "avx2.psll.dq")) {
2207 // 128/256-bit shift left specified in bits.
2208 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2209 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
2210 Shift / 8); // Shift is in bits.
2211 } else if (IsX86 && (Name == "sse2.psrl.dq" ||
2212 Name == "avx2.psrl.dq")) {
2213 // 128/256-bit shift right specified in bits.
2214 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2215 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
2216 Shift / 8); // Shift is in bits.
2217 } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
2218 Name == "avx2.psll.dq.bs" ||
2219 Name == "avx512.psll.dq.512")) {
2220 // 128/256/512-bit shift left specified in bytes.
2221 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2222 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2223 } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
2224 Name == "avx2.psrl.dq.bs" ||
2225 Name == "avx512.psrl.dq.512")) {
2226 // 128/256/512-bit shift right specified in bytes.
2227 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2228 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2229 } else if (IsX86 && (Name == "sse41.pblendw" ||
2230 Name.startswith("sse41.blendp") ||
2231 Name.startswith("avx.blend.p") ||
2232 Name == "avx2.pblendw" ||
2233 Name.startswith("avx2.pblendd."))) {
2234 Value *Op0 = CI->getArgOperand(0);
2235 Value *Op1 = CI->getArgOperand(1);
2236 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2237 VectorType *VecTy = cast<VectorType>(CI->getType());
2238 unsigned NumElts = VecTy->getNumElements();
2240 SmallVector<uint32_t, 16> Idxs(NumElts);
2241 for (unsigned i = 0; i != NumElts; ++i)
2242 Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
2244 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2245 } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
2246 Name == "avx2.vinserti128" ||
2247 Name.startswith("avx512.mask.insert"))) {
2248 Value *Op0 = CI->getArgOperand(0);
2249 Value *Op1 = CI->getArgOperand(1);
2250 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2251 unsigned DstNumElts = CI->getType()->getVectorNumElements();
2252 unsigned SrcNumElts = Op1->getType()->getVectorNumElements();
2253 unsigned Scale = DstNumElts / SrcNumElts;
2255 // Mask off the high bits of the immediate value; hardware ignores those.
2258 // Extend the second operand into a vector the size of the destination.
2259 Value *UndefV = UndefValue::get(Op1->getType());
2260 SmallVector<uint32_t, 8> Idxs(DstNumElts);
2261 for (unsigned i = 0; i != SrcNumElts; ++i)
2263 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
2264 Idxs[i] = SrcNumElts;
2265 Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);
2267 // Insert the second operand into the first operand.
2269 // Note that there is no guarantee that instruction lowering will actually
2270 // produce a vinsertf128 instruction for the created shuffles. In
2271 // particular, the 0 immediate case involves no lane changes, so it can
2272 // be handled as a blend.
2274 // Example of shuffle mask for 32-bit elements:
2275 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
2276 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
2278 // First fill with identify mask.
2279 for (unsigned i = 0; i != DstNumElts; ++i)
2281 // Then replace the elements where we need to insert.
2282 for (unsigned i = 0; i != SrcNumElts; ++i)
2283 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
2284 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
2286 // If the intrinsic has a mask operand, handle that.
2287 if (CI->getNumArgOperands() == 5)
2288 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2289 CI->getArgOperand(3));
2290 } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
2291 Name == "avx2.vextracti128" ||
2292 Name.startswith("avx512.mask.vextract"))) {
2293 Value *Op0 = CI->getArgOperand(0);
2294 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2295 unsigned DstNumElts = CI->getType()->getVectorNumElements();
2296 unsigned SrcNumElts = Op0->getType()->getVectorNumElements();
2297 unsigned Scale = SrcNumElts / DstNumElts;
2299 // Mask off the high bits of the immediate value; hardware ignores those.
2302 // Get indexes for the subvector of the input vector.
2303 SmallVector<uint32_t, 8> Idxs(DstNumElts);
2304 for (unsigned i = 0; i != DstNumElts; ++i) {
2305 Idxs[i] = i + (Imm * DstNumElts);
2307 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2309 // If the intrinsic has a mask operand, handle that.
2310 if (CI->getNumArgOperands() == 4)
2311 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2312 CI->getArgOperand(2));
2313 } else if (!IsX86 && Name == "stackprotectorcheck") {
2315 } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
2316 Name.startswith("avx512.mask.perm.di."))) {
2317 Value *Op0 = CI->getArgOperand(0);
2318 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2319 VectorType *VecTy = cast<VectorType>(CI->getType());
2320 unsigned NumElts = VecTy->getNumElements();
2322 SmallVector<uint32_t, 8> Idxs(NumElts);
2323 for (unsigned i = 0; i != NumElts; ++i)
2324 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
2326 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2328 if (CI->getNumArgOperands() == 4)
2329 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2330 CI->getArgOperand(2));
2331 } else if (IsX86 && (Name.startswith("avx.vperm2f128.") ||
2332 Name == "avx2.vperm2i128")) {
2333 // The immediate permute control byte looks like this:
2334 // [1:0] - select 128 bits from sources for low half of destination
2336 // [3] - zero low half of destination
2337 // [5:4] - select 128 bits from sources for high half of destination
2339 // [7] - zero high half of destination
2341 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2343 unsigned NumElts = CI->getType()->getVectorNumElements();
2344 unsigned HalfSize = NumElts / 2;
2345 SmallVector<uint32_t, 8> ShuffleMask(NumElts);
2347 // Determine which operand(s) are actually in use for this instruction.
2348 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2349 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2351 // If needed, replace operands based on zero mask.
2352 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
2353 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
2355 // Permute low half of result.
2356 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
2357 for (unsigned i = 0; i < HalfSize; ++i)
2358 ShuffleMask[i] = StartIndex + i;
2360 // Permute high half of result.
2361 StartIndex = (Imm & 0x10) ? HalfSize : 0;
2362 for (unsigned i = 0; i < HalfSize; ++i)
2363 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
2365 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2367 } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
2368 Name == "sse2.pshuf.d" ||
2369 Name.startswith("avx512.mask.vpermil.p") ||
2370 Name.startswith("avx512.mask.pshuf.d."))) {
2371 Value *Op0 = CI->getArgOperand(0);
2372 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2373 VectorType *VecTy = cast<VectorType>(CI->getType());
2374 unsigned NumElts = VecTy->getNumElements();
2375 // Calculate the size of each index in the immediate.
2376 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
2377 unsigned IdxMask = ((1 << IdxSize) - 1);
2379 SmallVector<uint32_t, 8> Idxs(NumElts);
2380 // Lookup the bits for this element, wrapping around the immediate every
2381 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
2382 // to offset by the first index of each group.
2383 for (unsigned i = 0; i != NumElts; ++i)
2384 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
2386 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2388 if (CI->getNumArgOperands() == 4)
2389 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2390 CI->getArgOperand(2));
2391 } else if (IsX86 && (Name == "sse2.pshufl.w" ||
2392 Name.startswith("avx512.mask.pshufl.w."))) {
2393 Value *Op0 = CI->getArgOperand(0);
2394 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2395 unsigned NumElts = CI->getType()->getVectorNumElements();
2397 SmallVector<uint32_t, 16> Idxs(NumElts);
2398 for (unsigned l = 0; l != NumElts; l += 8) {
2399 for (unsigned i = 0; i != 4; ++i)
2400 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
2401 for (unsigned i = 4; i != 8; ++i)
2402 Idxs[i + l] = i + l;
2405 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2407 if (CI->getNumArgOperands() == 4)
2408 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2409 CI->getArgOperand(2));
2410 } else if (IsX86 && (Name == "sse2.pshufh.w" ||
2411 Name.startswith("avx512.mask.pshufh.w."))) {
2412 Value *Op0 = CI->getArgOperand(0);
2413 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2414 unsigned NumElts = CI->getType()->getVectorNumElements();
2416 SmallVector<uint32_t, 16> Idxs(NumElts);
2417 for (unsigned l = 0; l != NumElts; l += 8) {
2418 for (unsigned i = 0; i != 4; ++i)
2419 Idxs[i + l] = i + l;
2420 for (unsigned i = 0; i != 4; ++i)
2421 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
2424 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2426 if (CI->getNumArgOperands() == 4)
2427 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2428 CI->getArgOperand(2));
2429 } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
2430 Value *Op0 = CI->getArgOperand(0);
2431 Value *Op1 = CI->getArgOperand(1);
2432 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2433 unsigned NumElts = CI->getType()->getVectorNumElements();
2435 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2436 unsigned HalfLaneElts = NumLaneElts / 2;
2438 SmallVector<uint32_t, 16> Idxs(NumElts);
2439 for (unsigned i = 0; i != NumElts; ++i) {
2440 // Base index is the starting element of the lane.
2441 Idxs[i] = i - (i % NumLaneElts);
2442 // If we are half way through the lane switch to the other source.
2443 if ((i % NumLaneElts) >= HalfLaneElts)
2445 // Now select the specific element. By adding HalfLaneElts bits from
2446 // the immediate. Wrapping around the immediate every 8-bits.
2447 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
2450 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2452 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2453 CI->getArgOperand(3));
2454 } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
2455 Name.startswith("avx512.mask.movshdup") ||
2456 Name.startswith("avx512.mask.movsldup"))) {
2457 Value *Op0 = CI->getArgOperand(0);
2458 unsigned NumElts = CI->getType()->getVectorNumElements();
2459 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2461 unsigned Offset = 0;
2462 if (Name.startswith("avx512.mask.movshdup."))
2465 SmallVector<uint32_t, 16> Idxs(NumElts);
2466 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
2467 for (unsigned i = 0; i != NumLaneElts; i += 2) {
2468 Idxs[i + l + 0] = i + l + Offset;
2469 Idxs[i + l + 1] = i + l + Offset;
2472 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2474 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2475 CI->getArgOperand(1));
2476 } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
2477 Name.startswith("avx512.mask.unpckl."))) {
2478 Value *Op0 = CI->getArgOperand(0);
2479 Value *Op1 = CI->getArgOperand(1);
2480 int NumElts = CI->getType()->getVectorNumElements();
2481 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2483 SmallVector<uint32_t, 64> Idxs(NumElts);
2484 for (int l = 0; l != NumElts; l += NumLaneElts)
2485 for (int i = 0; i != NumLaneElts; ++i)
2486 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
2488 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2490 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2491 CI->getArgOperand(2));
2492 } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
2493 Name.startswith("avx512.mask.unpckh."))) {
2494 Value *Op0 = CI->getArgOperand(0);
2495 Value *Op1 = CI->getArgOperand(1);
2496 int NumElts = CI->getType()->getVectorNumElements();
2497 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2499 SmallVector<uint32_t, 64> Idxs(NumElts);
2500 for (int l = 0; l != NumElts; l += NumLaneElts)
2501 for (int i = 0; i != NumLaneElts; ++i)
2502 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
2504 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2506 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2507 CI->getArgOperand(2));
2508 } else if (IsX86 && (Name.startswith("avx512.mask.and.") ||
2509 Name.startswith("avx512.mask.pand."))) {
2510 VectorType *FTy = cast<VectorType>(CI->getType());
2511 VectorType *ITy = VectorType::getInteger(FTy);
2512 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2513 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2514 Rep = Builder.CreateBitCast(Rep, FTy);
2515 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2516 CI->getArgOperand(2));
2517 } else if (IsX86 && (Name.startswith("avx512.mask.andn.") ||
2518 Name.startswith("avx512.mask.pandn."))) {
2519 VectorType *FTy = cast<VectorType>(CI->getType());
2520 VectorType *ITy = VectorType::getInteger(FTy);
2521 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
2522 Rep = Builder.CreateAnd(Rep,
2523 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2524 Rep = Builder.CreateBitCast(Rep, FTy);
2525 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2526 CI->getArgOperand(2));
2527 } else if (IsX86 && (Name.startswith("avx512.mask.or.") ||
2528 Name.startswith("avx512.mask.por."))) {
2529 VectorType *FTy = cast<VectorType>(CI->getType());
2530 VectorType *ITy = VectorType::getInteger(FTy);
2531 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2532 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2533 Rep = Builder.CreateBitCast(Rep, FTy);
2534 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2535 CI->getArgOperand(2));
2536 } else if (IsX86 && (Name.startswith("avx512.mask.xor.") ||
2537 Name.startswith("avx512.mask.pxor."))) {
2538 VectorType *FTy = cast<VectorType>(CI->getType());
2539 VectorType *ITy = VectorType::getInteger(FTy);
2540 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2541 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2542 Rep = Builder.CreateBitCast(Rep, FTy);
2543 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2544 CI->getArgOperand(2));
2545 } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
2546 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
2547 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2548 CI->getArgOperand(2));
2549 } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
2550 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
2551 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2552 CI->getArgOperand(2));
2553 } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
2554 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
2555 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2556 CI->getArgOperand(2));
2557 } else if (IsX86 && Name.startswith("avx512.mask.add.p")) {
2558 if (Name.endswith(".512")) {
2560 if (Name[17] == 's')
2561 IID = Intrinsic::x86_avx512_add_ps_512;
2563 IID = Intrinsic::x86_avx512_add_pd_512;
2565 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2566 { CI->getArgOperand(0), CI->getArgOperand(1),
2567 CI->getArgOperand(4) });
2569 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
2571 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2572 CI->getArgOperand(2));
2573 } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
2574 if (Name.endswith(".512")) {
2576 if (Name[17] == 's')
2577 IID = Intrinsic::x86_avx512_div_ps_512;
2579 IID = Intrinsic::x86_avx512_div_pd_512;
2581 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2582 { CI->getArgOperand(0), CI->getArgOperand(1),
2583 CI->getArgOperand(4) });
2585 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
2587 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2588 CI->getArgOperand(2));
2589 } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
2590 if (Name.endswith(".512")) {
2592 if (Name[17] == 's')
2593 IID = Intrinsic::x86_avx512_mul_ps_512;
2595 IID = Intrinsic::x86_avx512_mul_pd_512;
2597 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2598 { CI->getArgOperand(0), CI->getArgOperand(1),
2599 CI->getArgOperand(4) });
2601 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
2603 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2604 CI->getArgOperand(2));
2605 } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
2606 if (Name.endswith(".512")) {
2608 if (Name[17] == 's')
2609 IID = Intrinsic::x86_avx512_sub_ps_512;
2611 IID = Intrinsic::x86_avx512_sub_pd_512;
2613 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2614 { CI->getArgOperand(0), CI->getArgOperand(1),
2615 CI->getArgOperand(4) });
2617 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
2619 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2620 CI->getArgOperand(2));
2621 } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
2622 Name.startswith("avx512.mask.min.p")) &&
2623 Name.drop_front(18) == ".512") {
2624 bool IsDouble = Name[17] == 'd';
2625 bool IsMin = Name[13] == 'i';
2626 static const Intrinsic::ID MinMaxTbl[2][2] = {
2627 { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 },
2628 { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 }
2630 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
2632 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2633 { CI->getArgOperand(0), CI->getArgOperand(1),
2634 CI->getArgOperand(4) });
2635 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2636 CI->getArgOperand(2));
2637 } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
2638 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2641 { CI->getArgOperand(0), Builder.getInt1(false) });
2642 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2643 CI->getArgOperand(1));
2644 } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
2645 bool IsImmediate = Name[16] == 'i' ||
2646 (Name.size() > 18 && Name[18] == 'i');
2647 bool IsVariable = Name[16] == 'v';
2648 char Size = Name[16] == '.' ? Name[17] :
2649 Name[17] == '.' ? Name[18] :
2650 Name[18] == '.' ? Name[19] :
2654 if (IsVariable && Name[17] != '.') {
2655 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
2656 IID = Intrinsic::x86_avx2_psllv_q;
2657 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
2658 IID = Intrinsic::x86_avx2_psllv_q_256;
2659 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
2660 IID = Intrinsic::x86_avx2_psllv_d;
2661 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
2662 IID = Intrinsic::x86_avx2_psllv_d_256;
2663 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
2664 IID = Intrinsic::x86_avx512_psllv_w_128;
2665 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
2666 IID = Intrinsic::x86_avx512_psllv_w_256;
2667 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
2668 IID = Intrinsic::x86_avx512_psllv_w_512;
2670 llvm_unreachable("Unexpected size");
2671 } else if (Name.endswith(".128")) {
2672 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
2673 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
2674 : Intrinsic::x86_sse2_psll_d;
2675 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
2676 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
2677 : Intrinsic::x86_sse2_psll_q;
2678 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
2679 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
2680 : Intrinsic::x86_sse2_psll_w;
2682 llvm_unreachable("Unexpected size");
2683 } else if (Name.endswith(".256")) {
2684 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
2685 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
2686 : Intrinsic::x86_avx2_psll_d;
2687 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
2688 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
2689 : Intrinsic::x86_avx2_psll_q;
2690 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
2691 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
2692 : Intrinsic::x86_avx2_psll_w;
2694 llvm_unreachable("Unexpected size");
2696 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
2697 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
2698 IsVariable ? Intrinsic::x86_avx512_psllv_d_512 :
2699 Intrinsic::x86_avx512_psll_d_512;
2700 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
2701 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
2702 IsVariable ? Intrinsic::x86_avx512_psllv_q_512 :
2703 Intrinsic::x86_avx512_psll_q_512;
2704 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
2705 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
2706 : Intrinsic::x86_avx512_psll_w_512;
2708 llvm_unreachable("Unexpected size");
2711 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2712 } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
2713 bool IsImmediate = Name[16] == 'i' ||
2714 (Name.size() > 18 && Name[18] == 'i');
2715 bool IsVariable = Name[16] == 'v';
2716 char Size = Name[16] == '.' ? Name[17] :
2717 Name[17] == '.' ? Name[18] :
2718 Name[18] == '.' ? Name[19] :
2722 if (IsVariable && Name[17] != '.') {
2723 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
2724 IID = Intrinsic::x86_avx2_psrlv_q;
2725 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
2726 IID = Intrinsic::x86_avx2_psrlv_q_256;
2727 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
2728 IID = Intrinsic::x86_avx2_psrlv_d;
2729 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
2730 IID = Intrinsic::x86_avx2_psrlv_d_256;
2731 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
2732 IID = Intrinsic::x86_avx512_psrlv_w_128;
2733 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
2734 IID = Intrinsic::x86_avx512_psrlv_w_256;
2735 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
2736 IID = Intrinsic::x86_avx512_psrlv_w_512;
2738 llvm_unreachable("Unexpected size");
2739 } else if (Name.endswith(".128")) {
2740 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
2741 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
2742 : Intrinsic::x86_sse2_psrl_d;
2743 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
2744 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
2745 : Intrinsic::x86_sse2_psrl_q;
2746 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
2747 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
2748 : Intrinsic::x86_sse2_psrl_w;
2750 llvm_unreachable("Unexpected size");
2751 } else if (Name.endswith(".256")) {
2752 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
2753 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
2754 : Intrinsic::x86_avx2_psrl_d;
2755 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
2756 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
2757 : Intrinsic::x86_avx2_psrl_q;
2758 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
2759 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
2760 : Intrinsic::x86_avx2_psrl_w;
2762 llvm_unreachable("Unexpected size");
2764 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
2765 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
2766 IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 :
2767 Intrinsic::x86_avx512_psrl_d_512;
2768 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
2769 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
2770 IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 :
2771 Intrinsic::x86_avx512_psrl_q_512;
2772 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
2773 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
2774 : Intrinsic::x86_avx512_psrl_w_512;
2776 llvm_unreachable("Unexpected size");
2779 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2780 } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
2781 bool IsImmediate = Name[16] == 'i' ||
2782 (Name.size() > 18 && Name[18] == 'i');
2783 bool IsVariable = Name[16] == 'v';
2784 char Size = Name[16] == '.' ? Name[17] :
2785 Name[17] == '.' ? Name[18] :
2786 Name[18] == '.' ? Name[19] :
2790 if (IsVariable && Name[17] != '.') {
2791 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
2792 IID = Intrinsic::x86_avx2_psrav_d;
2793 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
2794 IID = Intrinsic::x86_avx2_psrav_d_256;
2795 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
2796 IID = Intrinsic::x86_avx512_psrav_w_128;
2797 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
2798 IID = Intrinsic::x86_avx512_psrav_w_256;
2799 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
2800 IID = Intrinsic::x86_avx512_psrav_w_512;
2802 llvm_unreachable("Unexpected size");
2803 } else if (Name.endswith(".128")) {
2804 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
2805 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
2806 : Intrinsic::x86_sse2_psra_d;
2807 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
2808 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
2809 IsVariable ? Intrinsic::x86_avx512_psrav_q_128 :
2810 Intrinsic::x86_avx512_psra_q_128;
2811 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
2812 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
2813 : Intrinsic::x86_sse2_psra_w;
2815 llvm_unreachable("Unexpected size");
2816 } else if (Name.endswith(".256")) {
2817 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
2818 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
2819 : Intrinsic::x86_avx2_psra_d;
2820 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
2821 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
2822 IsVariable ? Intrinsic::x86_avx512_psrav_q_256 :
2823 Intrinsic::x86_avx512_psra_q_256;
2824 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
2825 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
2826 : Intrinsic::x86_avx2_psra_w;
2828 llvm_unreachable("Unexpected size");
2830 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
2831 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
2832 IsVariable ? Intrinsic::x86_avx512_psrav_d_512 :
2833 Intrinsic::x86_avx512_psra_d_512;
2834 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
2835 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
2836 IsVariable ? Intrinsic::x86_avx512_psrav_q_512 :
2837 Intrinsic::x86_avx512_psra_q_512;
2838 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
2839 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
2840 : Intrinsic::x86_avx512_psra_w_512;
2842 llvm_unreachable("Unexpected size");
2845 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2846 } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
2847 Rep = upgradeMaskedMove(Builder, *CI);
2848 } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
2849 Rep = UpgradeMaskToInt(Builder, *CI);
2850 } else if (IsX86 && Name.endswith(".movntdqa")) {
2851 Module *M = F->getParent();
2852 MDNode *Node = MDNode::get(
2853 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2855 Value *Ptr = CI->getArgOperand(0);
2856 VectorType *VTy = cast<VectorType>(CI->getType());
2858 // Convert the type of the pointer to a pointer to the stored type.
2860 Builder.CreateBitCast(Ptr, PointerType::getUnqual(VTy), "cast");
2861 LoadInst *LI = Builder.CreateAlignedLoad(BC, VTy->getBitWidth() / 8);
2862 LI->setMetadata(M->getMDKindID("nontemporal"), Node);
2865 (Name.startswith("sse2.pavg") || Name.startswith("avx2.pavg") ||
2866 Name.startswith("avx512.mask.pavg"))) {
2867 // llvm.x86.sse2.pavg.b/w, llvm.x86.avx2.pavg.b/w,
2868 // llvm.x86.avx512.mask.pavg.b/w
2869 Value *A = CI->getArgOperand(0);
2870 Value *B = CI->getArgOperand(1);
2871 VectorType *ZextType = VectorType::getExtendedElementVectorType(
2872 cast<VectorType>(A->getType()));
2873 Value *ExtendedA = Builder.CreateZExt(A, ZextType);
2874 Value *ExtendedB = Builder.CreateZExt(B, ZextType);
2875 Value *Sum = Builder.CreateAdd(ExtendedA, ExtendedB);
2876 Value *AddOne = Builder.CreateAdd(Sum, ConstantInt::get(ZextType, 1));
2877 Value *ShiftR = Builder.CreateLShr(AddOne, ConstantInt::get(ZextType, 1));
2878 Rep = Builder.CreateTrunc(ShiftR, A->getType());
2879 if (CI->getNumArgOperands() > 2) {
2880 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2881 CI->getArgOperand(2));
2883 } else if (IsX86 && (Name.startswith("fma.vfmadd.") ||
2884 Name.startswith("fma.vfmsub.") ||
2885 Name.startswith("fma.vfnmadd.") ||
2886 Name.startswith("fma.vfnmsub."))) {
2887 bool NegMul = Name[6] == 'n';
2888 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
2889 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
2891 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
2892 CI->getArgOperand(2) };
2895 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
2896 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
2897 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
2900 if (NegMul && !IsScalar)
2901 Ops[0] = Builder.CreateFNeg(Ops[0]);
2902 if (NegMul && IsScalar)
2903 Ops[1] = Builder.CreateFNeg(Ops[1]);
2905 Ops[2] = Builder.CreateFNeg(Ops[2]);
2907 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
2913 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
2915 } else if (IsX86 && Name.startswith("fma4.vfmadd.s")) {
2916 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
2917 CI->getArgOperand(2) };
2919 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
2920 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
2921 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
2923 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
2928 Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
2930 } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.s") ||
2931 Name.startswith("avx512.maskz.vfmadd.s") ||
2932 Name.startswith("avx512.mask3.vfmadd.s") ||
2933 Name.startswith("avx512.mask3.vfmsub.s") ||
2934 Name.startswith("avx512.mask3.vfnmsub.s"))) {
2935 bool IsMask3 = Name[11] == '3';
2936 bool IsMaskZ = Name[11] == 'z';
2937 // Drop the "avx512.mask." to make it easier.
2938 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
2939 bool NegMul = Name[2] == 'n';
2940 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
2942 Value *A = CI->getArgOperand(0);
2943 Value *B = CI->getArgOperand(1);
2944 Value *C = CI->getArgOperand(2);
2946 if (NegMul && (IsMask3 || IsMaskZ))
2947 A = Builder.CreateFNeg(A);
2948 if (NegMul && !(IsMask3 || IsMaskZ))
2949 B = Builder.CreateFNeg(B);
2951 C = Builder.CreateFNeg(C);
2953 A = Builder.CreateExtractElement(A, (uint64_t)0);
2954 B = Builder.CreateExtractElement(B, (uint64_t)0);
2955 C = Builder.CreateExtractElement(C, (uint64_t)0);
2957 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
2958 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
2959 Value *Ops[] = { A, B, C, CI->getArgOperand(4) };
2962 if (Name.back() == 'd')
2963 IID = Intrinsic::x86_avx512_vfmadd_f64;
2965 IID = Intrinsic::x86_avx512_vfmadd_f32;
2966 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID);
2967 Rep = Builder.CreateCall(FMA, Ops);
2969 Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
2972 Rep = Builder.CreateCall(FMA, { A, B, C });
2975 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) :
2978 // For Mask3 with NegAcc, we need to create a new extractelement that
2979 // avoids the negation above.
2980 if (NegAcc && IsMask3)
2981 PassThru = Builder.CreateExtractElement(CI->getArgOperand(2),
2984 Rep = EmitX86ScalarSelect(Builder, CI->getArgOperand(3),
2986 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
2988 } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") ||
2989 Name.startswith("avx512.mask.vfnmadd.p") ||
2990 Name.startswith("avx512.mask.vfnmsub.p") ||
2991 Name.startswith("avx512.mask3.vfmadd.p") ||
2992 Name.startswith("avx512.mask3.vfmsub.p") ||
2993 Name.startswith("avx512.mask3.vfnmsub.p") ||
2994 Name.startswith("avx512.maskz.vfmadd.p"))) {
2995 bool IsMask3 = Name[11] == '3';
2996 bool IsMaskZ = Name[11] == 'z';
2997 // Drop the "avx512.mask." to make it easier.
2998 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
2999 bool NegMul = Name[2] == 'n';
3000 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3002 Value *A = CI->getArgOperand(0);
3003 Value *B = CI->getArgOperand(1);
3004 Value *C = CI->getArgOperand(2);
3006 if (NegMul && (IsMask3 || IsMaskZ))
3007 A = Builder.CreateFNeg(A);
3008 if (NegMul && !(IsMask3 || IsMaskZ))
3009 B = Builder.CreateFNeg(B);
3011 C = Builder.CreateFNeg(C);
3013 if (CI->getNumArgOperands() == 5 &&
3014 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3015 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3017 // Check the character before ".512" in string.
3018 if (Name[Name.size()-5] == 's')
3019 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3021 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3023 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3024 { A, B, C, CI->getArgOperand(4) });
3026 Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3029 Rep = Builder.CreateCall(FMA, { A, B, C });
3032 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3033 IsMask3 ? CI->getArgOperand(2) :
3034 CI->getArgOperand(0);
3036 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3037 } else if (IsX86 && (Name.startswith("fma.vfmaddsub.p") ||
3038 Name.startswith("fma.vfmsubadd.p"))) {
3039 bool IsSubAdd = Name[7] == 's';
3040 int NumElts = CI->getType()->getVectorNumElements();
3042 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3043 CI->getArgOperand(2) };
3045 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3047 Value *Odd = Builder.CreateCall(FMA, Ops);
3048 Ops[2] = Builder.CreateFNeg(Ops[2]);
3049 Value *Even = Builder.CreateCall(FMA, Ops);
3052 std::swap(Even, Odd);
3054 SmallVector<uint32_t, 32> Idxs(NumElts);
3055 for (int i = 0; i != NumElts; ++i)
3056 Idxs[i] = i + (i % 2) * NumElts;
3058 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3059 } else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") ||
3060 Name.startswith("avx512.mask3.vfmaddsub.p") ||
3061 Name.startswith("avx512.maskz.vfmaddsub.p") ||
3062 Name.startswith("avx512.mask3.vfmsubadd.p"))) {
3063 bool IsMask3 = Name[11] == '3';
3064 bool IsMaskZ = Name[11] == 'z';
3065 // Drop the "avx512.mask." to make it easier.
3066 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3067 bool IsSubAdd = Name[3] == 's';
3068 if (CI->getNumArgOperands() == 5 &&
3069 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3070 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3072 // Check the character before ".512" in string.
3073 if (Name[Name.size()-5] == 's')
3074 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3076 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3078 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3079 CI->getArgOperand(2), CI->getArgOperand(4) };
3081 Ops[2] = Builder.CreateFNeg(Ops[2]);
3083 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3084 {CI->getArgOperand(0), CI->getArgOperand(1),
3085 CI->getArgOperand(2), CI->getArgOperand(4)});
3087 int NumElts = CI->getType()->getVectorNumElements();
3089 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3090 CI->getArgOperand(2) };
3092 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3094 Value *Odd = Builder.CreateCall(FMA, Ops);
3095 Ops[2] = Builder.CreateFNeg(Ops[2]);
3096 Value *Even = Builder.CreateCall(FMA, Ops);
3099 std::swap(Even, Odd);
3101 SmallVector<uint32_t, 32> Idxs(NumElts);
3102 for (int i = 0; i != NumElts; ++i)
3103 Idxs[i] = i + (i % 2) * NumElts;
3105 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3108 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3109 IsMask3 ? CI->getArgOperand(2) :
3110 CI->getArgOperand(0);
3112 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3113 } else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
3114 Name.startswith("avx512.maskz.pternlog."))) {
3115 bool ZeroMask = Name[11] == 'z';
3116 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3117 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3119 if (VecWidth == 128 && EltWidth == 32)
3120 IID = Intrinsic::x86_avx512_pternlog_d_128;
3121 else if (VecWidth == 256 && EltWidth == 32)
3122 IID = Intrinsic::x86_avx512_pternlog_d_256;
3123 else if (VecWidth == 512 && EltWidth == 32)
3124 IID = Intrinsic::x86_avx512_pternlog_d_512;
3125 else if (VecWidth == 128 && EltWidth == 64)
3126 IID = Intrinsic::x86_avx512_pternlog_q_128;
3127 else if (VecWidth == 256 && EltWidth == 64)
3128 IID = Intrinsic::x86_avx512_pternlog_q_256;
3129 else if (VecWidth == 512 && EltWidth == 64)
3130 IID = Intrinsic::x86_avx512_pternlog_q_512;
3132 llvm_unreachable("Unexpected intrinsic");
3134 Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3135 CI->getArgOperand(2), CI->getArgOperand(3) };
3136 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3138 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3139 : CI->getArgOperand(0);
3140 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3141 } else if (IsX86 && (Name.startswith("avx512.mask.vpmadd52") ||
3142 Name.startswith("avx512.maskz.vpmadd52"))) {
3143 bool ZeroMask = Name[11] == 'z';
3144 bool High = Name[20] == 'h' || Name[21] == 'h';
3145 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3147 if (VecWidth == 128 && !High)
3148 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3149 else if (VecWidth == 256 && !High)
3150 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3151 else if (VecWidth == 512 && !High)
3152 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3153 else if (VecWidth == 128 && High)
3154 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
3155 else if (VecWidth == 256 && High)
3156 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
3157 else if (VecWidth == 512 && High)
3158 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
3160 llvm_unreachable("Unexpected intrinsic");
3162 Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3163 CI->getArgOperand(2) };
3164 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3166 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3167 : CI->getArgOperand(0);
3168 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3169 } else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") ||
3170 Name.startswith("avx512.mask.vpermt2var.") ||
3171 Name.startswith("avx512.maskz.vpermt2var."))) {
3172 bool ZeroMask = Name[11] == 'z';
3173 bool IndexForm = Name[17] == 'i';
3174 Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
3175 } else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") ||
3176 Name.startswith("avx512.maskz.vpdpbusd.") ||
3177 Name.startswith("avx512.mask.vpdpbusds.") ||
3178 Name.startswith("avx512.maskz.vpdpbusds."))) {
3179 bool ZeroMask = Name[11] == 'z';
3180 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3181 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3183 if (VecWidth == 128 && !IsSaturating)
3184 IID = Intrinsic::x86_avx512_vpdpbusd_128;
3185 else if (VecWidth == 256 && !IsSaturating)
3186 IID = Intrinsic::x86_avx512_vpdpbusd_256;
3187 else if (VecWidth == 512 && !IsSaturating)
3188 IID = Intrinsic::x86_avx512_vpdpbusd_512;
3189 else if (VecWidth == 128 && IsSaturating)
3190 IID = Intrinsic::x86_avx512_vpdpbusds_128;
3191 else if (VecWidth == 256 && IsSaturating)
3192 IID = Intrinsic::x86_avx512_vpdpbusds_256;
3193 else if (VecWidth == 512 && IsSaturating)
3194 IID = Intrinsic::x86_avx512_vpdpbusds_512;
3196 llvm_unreachable("Unexpected intrinsic");
3198 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3199 CI->getArgOperand(2) };
3200 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3202 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3203 : CI->getArgOperand(0);
3204 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3205 } else if (IsX86 && (Name.startswith("avx512.mask.vpdpwssd.") ||
3206 Name.startswith("avx512.maskz.vpdpwssd.") ||
3207 Name.startswith("avx512.mask.vpdpwssds.") ||
3208 Name.startswith("avx512.maskz.vpdpwssds."))) {
3209 bool ZeroMask = Name[11] == 'z';
3210 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3211 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3213 if (VecWidth == 128 && !IsSaturating)
3214 IID = Intrinsic::x86_avx512_vpdpwssd_128;
3215 else if (VecWidth == 256 && !IsSaturating)
3216 IID = Intrinsic::x86_avx512_vpdpwssd_256;
3217 else if (VecWidth == 512 && !IsSaturating)
3218 IID = Intrinsic::x86_avx512_vpdpwssd_512;
3219 else if (VecWidth == 128 && IsSaturating)
3220 IID = Intrinsic::x86_avx512_vpdpwssds_128;
3221 else if (VecWidth == 256 && IsSaturating)
3222 IID = Intrinsic::x86_avx512_vpdpwssds_256;
3223 else if (VecWidth == 512 && IsSaturating)
3224 IID = Intrinsic::x86_avx512_vpdpwssds_512;
3226 llvm_unreachable("Unexpected intrinsic");
3228 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3229 CI->getArgOperand(2) };
3230 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3232 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3233 : CI->getArgOperand(0);
3234 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3235 } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
3236 Name == "addcarry.u32" || Name == "addcarry.u64" ||
3237 Name == "subborrow.u32" || Name == "subborrow.u64")) {
3239 if (Name[0] == 'a' && Name.back() == '2')
3240 IID = Intrinsic::x86_addcarry_32;
3241 else if (Name[0] == 'a' && Name.back() == '4')
3242 IID = Intrinsic::x86_addcarry_64;
3243 else if (Name[0] == 's' && Name.back() == '2')
3244 IID = Intrinsic::x86_subborrow_32;
3245 else if (Name[0] == 's' && Name.back() == '4')
3246 IID = Intrinsic::x86_subborrow_64;
3248 llvm_unreachable("Unexpected intrinsic");
3250 // Make a call with 3 operands.
3251 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3252 CI->getArgOperand(2)};
3253 Value *NewCall = Builder.CreateCall(
3254 Intrinsic::getDeclaration(CI->getModule(), IID),
3257 // Extract the second result and store it.
3258 Value *Data = Builder.CreateExtractValue(NewCall, 1);
3259 // Cast the pointer to the right type.
3260 Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3),
3261 llvm::PointerType::getUnqual(Data->getType()));
3262 Builder.CreateAlignedStore(Data, Ptr, 1);
3263 // Replace the original call result with the first result of the new call.
3264 Value *CF = Builder.CreateExtractValue(NewCall, 0);
3266 CI->replaceAllUsesWith(CF);
3268 } else if (IsX86 && Name.startswith("avx512.mask.") &&
3269 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
3270 // Rep will be updated by the call in the condition.
3271 } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
3272 Value *Arg = CI->getArgOperand(0);
3273 Value *Neg = Builder.CreateNeg(Arg, "neg");
3274 Value *Cmp = Builder.CreateICmpSGE(
3275 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
3276 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
3277 } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
3278 Name == "max.ui" || Name == "max.ull")) {
3279 Value *Arg0 = CI->getArgOperand(0);
3280 Value *Arg1 = CI->getArgOperand(1);
3281 Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3282 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
3283 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
3284 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
3285 } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
3286 Name == "min.ui" || Name == "min.ull")) {
3287 Value *Arg0 = CI->getArgOperand(0);
3288 Value *Arg1 = CI->getArgOperand(1);
3289 Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3290 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
3291 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
3292 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
3293 } else if (IsNVVM && Name == "clz.ll") {
3294 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
3295 Value *Arg = CI->getArgOperand(0);
3296 Value *Ctlz = Builder.CreateCall(
3297 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
3299 {Arg, Builder.getFalse()}, "ctlz");
3300 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
3301 } else if (IsNVVM && Name == "popc.ll") {
3302 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
3304 Value *Arg = CI->getArgOperand(0);
3305 Value *Popc = Builder.CreateCall(
3306 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
3309 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
3310 } else if (IsNVVM && Name == "h2f") {
3311 Rep = Builder.CreateCall(Intrinsic::getDeclaration(
3312 F->getParent(), Intrinsic::convert_from_fp16,
3313 {Builder.getFloatTy()}),
3314 CI->getArgOperand(0), "h2f");
3316 llvm_unreachable("Unknown function for CallInst upgrade.");
3320 CI->replaceAllUsesWith(Rep);
3321 CI->eraseFromParent();
3325 const auto &DefaultCase = [&NewFn, &CI]() -> void {
3326 // Handle generic mangling change, but nothing else
3328 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
3329 "Unknown function for CallInst upgrade and isn't just a name change");
3330 CI->setCalledFunction(NewFn);
3332 CallInst *NewCall = nullptr;
3333 switch (NewFn->getIntrinsicID()) {
3339 case Intrinsic::arm_neon_vld1:
3340 case Intrinsic::arm_neon_vld2:
3341 case Intrinsic::arm_neon_vld3:
3342 case Intrinsic::arm_neon_vld4:
3343 case Intrinsic::arm_neon_vld2lane:
3344 case Intrinsic::arm_neon_vld3lane:
3345 case Intrinsic::arm_neon_vld4lane:
3346 case Intrinsic::arm_neon_vst1:
3347 case Intrinsic::arm_neon_vst2:
3348 case Intrinsic::arm_neon_vst3:
3349 case Intrinsic::arm_neon_vst4:
3350 case Intrinsic::arm_neon_vst2lane:
3351 case Intrinsic::arm_neon_vst3lane:
3352 case Intrinsic::arm_neon_vst4lane: {
3353 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3354 CI->arg_operands().end());
3355 NewCall = Builder.CreateCall(NewFn, Args);
3359 case Intrinsic::bitreverse:
3360 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3363 case Intrinsic::ctlz:
3364 case Intrinsic::cttz:
3365 assert(CI->getNumArgOperands() == 1 &&
3366 "Mismatch between function args and call args");
3368 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
3371 case Intrinsic::objectsize: {
3372 Value *NullIsUnknownSize = CI->getNumArgOperands() == 2
3373 ? Builder.getFalse()
3374 : CI->getArgOperand(2);
3375 NewCall = Builder.CreateCall(
3376 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize});
3380 case Intrinsic::ctpop:
3381 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3384 case Intrinsic::convert_from_fp16:
3385 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3388 case Intrinsic::dbg_value:
3389 // Upgrade from the old version that had an extra offset argument.
3390 assert(CI->getNumArgOperands() == 4);
3391 // Drop nonzero offsets instead of attempting to upgrade them.
3392 if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
3393 if (Offset->isZeroValue()) {
3394 NewCall = Builder.CreateCall(
3396 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
3399 CI->eraseFromParent();
3402 case Intrinsic::x86_xop_vfrcz_ss:
3403 case Intrinsic::x86_xop_vfrcz_sd:
3404 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
3407 case Intrinsic::x86_xop_vpermil2pd:
3408 case Intrinsic::x86_xop_vpermil2ps:
3409 case Intrinsic::x86_xop_vpermil2pd_256:
3410 case Intrinsic::x86_xop_vpermil2ps_256: {
3411 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3412 CI->arg_operands().end());
3413 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
3414 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
3415 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
3416 NewCall = Builder.CreateCall(NewFn, Args);
3420 case Intrinsic::x86_sse41_ptestc:
3421 case Intrinsic::x86_sse41_ptestz:
3422 case Intrinsic::x86_sse41_ptestnzc: {
3423 // The arguments for these intrinsics used to be v4f32, and changed
3424 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
3425 // So, the only thing required is a bitcast for both arguments.
3426 // First, check the arguments have the old type.
3427 Value *Arg0 = CI->getArgOperand(0);
3428 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
3431 // Old intrinsic, add bitcasts
3432 Value *Arg1 = CI->getArgOperand(1);
3434 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
3436 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
3437 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
3439 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
3443 case Intrinsic::x86_rdtscp: {
3444 // This used to take 1 arguments. If we have no arguments, it is already
3446 if (CI->getNumOperands() == 0)
3449 NewCall = Builder.CreateCall(NewFn);
3450 // Extract the second result and store it.
3451 Value *Data = Builder.CreateExtractValue(NewCall, 1);
3452 // Cast the pointer to the right type.
3453 Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
3454 llvm::PointerType::getUnqual(Data->getType()));
3455 Builder.CreateAlignedStore(Data, Ptr, 1);
3456 // Replace the original call result with the first result of the new call.
3457 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
3459 std::string Name = CI->getName();
3460 if (!Name.empty()) {
3461 CI->setName(Name + ".old");
3462 NewCall->setName(Name);
3464 CI->replaceAllUsesWith(TSC);
3465 CI->eraseFromParent();
3469 case Intrinsic::x86_sse41_insertps:
3470 case Intrinsic::x86_sse41_dppd:
3471 case Intrinsic::x86_sse41_dpps:
3472 case Intrinsic::x86_sse41_mpsadbw:
3473 case Intrinsic::x86_avx_dp_ps_256:
3474 case Intrinsic::x86_avx2_mpsadbw: {
3475 // Need to truncate the last argument from i32 to i8 -- this argument models
3476 // an inherently 8-bit immediate operand to these x86 instructions.
3477 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3478 CI->arg_operands().end());
3480 // Replace the last argument with a trunc.
3481 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
3482 NewCall = Builder.CreateCall(NewFn, Args);
3486 case Intrinsic::thread_pointer: {
3487 NewCall = Builder.CreateCall(NewFn, {});
3491 case Intrinsic::invariant_start:
3492 case Intrinsic::invariant_end:
3493 case Intrinsic::masked_load:
3494 case Intrinsic::masked_store:
3495 case Intrinsic::masked_gather:
3496 case Intrinsic::masked_scatter: {
3497 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3498 CI->arg_operands().end());
3499 NewCall = Builder.CreateCall(NewFn, Args);
3503 case Intrinsic::memcpy:
3504 case Intrinsic::memmove:
3505 case Intrinsic::memset: {
3506 // We have to make sure that the call signature is what we're expecting.
3507 // We only want to change the old signatures by removing the alignment arg:
3508 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
3509 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
3510 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
3511 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
3512 // Note: i8*'s in the above can be any pointer type
3513 if (CI->getNumArgOperands() != 5) {
3517 // Remove alignment argument (3), and add alignment attributes to the
3518 // dest/src pointers.
3519 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
3520 CI->getArgOperand(2), CI->getArgOperand(4)};
3521 NewCall = Builder.CreateCall(NewFn, Args);
3522 auto *MemCI = cast<MemIntrinsic>(NewCall);
3523 // All mem intrinsics support dest alignment.
3524 const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
3525 MemCI->setDestAlignment(Align->getZExtValue());
3526 // Memcpy/Memmove also support source alignment.
3527 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
3528 MTI->setSourceAlignment(Align->getZExtValue());
3532 assert(NewCall && "Should have either set this variable or returned through "
3533 "the default case");
3534 std::string Name = CI->getName();
3535 if (!Name.empty()) {
3536 CI->setName(Name + ".old");
3537 NewCall->setName(Name);
3539 CI->replaceAllUsesWith(NewCall);
3540 CI->eraseFromParent();
3543 void llvm::UpgradeCallsToIntrinsic(Function *F) {
3544 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
3546 // Check if this function should be upgraded and get the replacement function
3549 if (UpgradeIntrinsicFunction(F, NewFn)) {
3550 // Replace all users of the old function with the new function or new
3551 // instructions. This is not a range loop because the call is deleted.
3552 for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; )
3553 if (CallInst *CI = dyn_cast<CallInst>(*UI++))
3554 UpgradeIntrinsicCall(CI, NewFn);
3556 // Remove old function, no longer used, from the module.
3557 F->eraseFromParent();
3561 MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
3562 // Check if the tag uses struct-path aware TBAA format.
3563 if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
3566 auto &Context = MD.getContext();
3567 if (MD.getNumOperands() == 3) {
3568 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
3569 MDNode *ScalarType = MDNode::get(Context, Elts);
3570 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
3571 Metadata *Elts2[] = {ScalarType, ScalarType,
3572 ConstantAsMetadata::get(
3573 Constant::getNullValue(Type::getInt64Ty(Context))),
3575 return MDNode::get(Context, Elts2);
3577 // Create a MDNode <MD, MD, offset 0>
3578 Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
3579 Type::getInt64Ty(Context)))};
3580 return MDNode::get(Context, Elts);
3583 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
3584 Instruction *&Temp) {
3585 if (Opc != Instruction::BitCast)
3589 Type *SrcTy = V->getType();
3590 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
3591 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
3592 LLVMContext &Context = V->getContext();
3594 // We have no information about target data layout, so we assume that
3595 // the maximum pointer size is 64bit.
3596 Type *MidTy = Type::getInt64Ty(Context);
3597 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
3599 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
3605 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
3606 if (Opc != Instruction::BitCast)
3609 Type *SrcTy = C->getType();
3610 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
3611 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
3612 LLVMContext &Context = C->getContext();
3614 // We have no information about target data layout, so we assume that
3615 // the maximum pointer size is 64bit.
3616 Type *MidTy = Type::getInt64Ty(Context);
3618 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
3625 /// Check the debug info version number, if it is out-dated, drop the debug
3626 /// info. Return true if module is modified.
3627 bool llvm::UpgradeDebugInfo(Module &M) {
3628 unsigned Version = getDebugMetadataVersionFromModule(M);
3629 if (Version == DEBUG_METADATA_VERSION) {
3630 bool BrokenDebugInfo = false;
3631 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
3632 report_fatal_error("Broken module found, compilation aborted!");
3633 if (!BrokenDebugInfo)
3634 // Everything is ok.
3637 // Diagnose malformed debug info.
3638 DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
3639 M.getContext().diagnose(Diag);
3642 bool Modified = StripDebugInfo(M);
3643 if (Modified && Version != DEBUG_METADATA_VERSION) {
3644 // Diagnose a version mismatch.
3645 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
3646 M.getContext().diagnose(DiagVersion);
3651 bool llvm::UpgradeRetainReleaseMarker(Module &M) {
3652 bool Changed = false;
3653 NamedMDNode *ModRetainReleaseMarker =
3654 M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker");
3655 if (ModRetainReleaseMarker) {
3656 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
3658 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
3660 SmallVector<StringRef, 4> ValueComp;
3661 ID->getString().split(ValueComp, "#");
3662 if (ValueComp.size() == 2) {
3663 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
3664 Metadata *Ops[1] = {MDString::get(M.getContext(), NewValue)};
3665 ModRetainReleaseMarker->setOperand(0,
3666 MDNode::get(M.getContext(), Ops));
3675 bool llvm::UpgradeModuleFlags(Module &M) {
3676 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
3680 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
3681 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
3682 MDNode *Op = ModFlags->getOperand(I);
3683 if (Op->getNumOperands() != 3)
3685 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
3688 if (ID->getString() == "Objective-C Image Info Version")
3690 if (ID->getString() == "Objective-C Class Properties")
3691 HasClassProperties = true;
3692 // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
3693 // field was Error and now they are Max.
3694 if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") {
3695 if (auto *Behavior =
3696 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
3697 if (Behavior->getLimitedValue() == Module::Error) {
3698 Type *Int32Ty = Type::getInt32Ty(M.getContext());
3699 Metadata *Ops[3] = {
3700 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)),
3701 MDString::get(M.getContext(), ID->getString()),
3703 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
3708 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
3709 // section name so that llvm-lto will not complain about mismatching
3710 // module flags that is functionally the same.
3711 if (ID->getString() == "Objective-C Image Info Section") {
3712 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
3713 SmallVector<StringRef, 4> ValueComp;
3714 Value->getString().split(ValueComp, " ");
3715 if (ValueComp.size() != 1) {
3716 std::string NewValue;
3717 for (auto &S : ValueComp)
3718 NewValue += S.str();
3719 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
3720 MDString::get(M.getContext(), NewValue)};
3721 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
3728 // "Objective-C Class Properties" is recently added for Objective-C. We
3729 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
3730 // flag of value 0, so we can correclty downgrade this flag when trying to
3731 // link an ObjC bitcode without this module flag with an ObjC bitcode with
3732 // this module flag.
3733 if (HasObjCFlag && !HasClassProperties) {
3734 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
3742 void llvm::UpgradeSectionAttributes(Module &M) {
3743 auto TrimSpaces = [](StringRef Section) -> std::string {
3744 SmallVector<StringRef, 5> Components;
3745 Section.split(Components, ',');
3747 SmallString<32> Buffer;
3748 raw_svector_ostream OS(Buffer);
3750 for (auto Component : Components)
3751 OS << ',' << Component.trim();
3753 return OS.str().substr(1);
3756 for (auto &GV : M.globals()) {
3757 if (!GV.hasSection())
3760 StringRef Section = GV.getSection();
3762 if (!Section.startswith("__DATA, __objc_catlist"))
3765 // __DATA, __objc_catlist, regular, no_dead_strip
3766 // __DATA,__objc_catlist,regular,no_dead_strip
3767 GV.setSection(TrimSpaces(Section));
3771 static bool isOldLoopArgument(Metadata *MD) {
3772 auto *T = dyn_cast_or_null<MDTuple>(MD);
3775 if (T->getNumOperands() < 1)
3777 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
3780 return S->getString().startswith("llvm.vectorizer.");
3783 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
3784 StringRef OldPrefix = "llvm.vectorizer.";
3785 assert(OldTag.startswith(OldPrefix) && "Expected old prefix");
3787 if (OldTag == "llvm.vectorizer.unroll")
3788 return MDString::get(C, "llvm.loop.interleave.count");
3790 return MDString::get(
3791 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
3795 static Metadata *upgradeLoopArgument(Metadata *MD) {
3796 auto *T = dyn_cast_or_null<MDTuple>(MD);
3799 if (T->getNumOperands() < 1)
3801 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
3804 if (!OldTag->getString().startswith("llvm.vectorizer."))
3807 // This has an old tag. Upgrade it.
3808 SmallVector<Metadata *, 8> Ops;
3809 Ops.reserve(T->getNumOperands());
3810 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
3811 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
3812 Ops.push_back(T->getOperand(I));
3814 return MDTuple::get(T->getContext(), Ops);
3817 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
3818 auto *T = dyn_cast<MDTuple>(&N);
3822 if (none_of(T->operands(), isOldLoopArgument))
3825 SmallVector<Metadata *, 8> Ops;
3826 Ops.reserve(T->getNumOperands());
3827 for (Metadata *MD : T->operands())
3828 Ops.push_back(upgradeLoopArgument(MD));
3830 return MDTuple::get(T->getContext(), Ops);