1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the auto-upgrade helper functions.
10 // This is where deprecated IR intrinsics and other IR features are updated to
11 // current specifications.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/IR/AutoUpgrade.h"
16 #include "llvm/ADT/StringSwitch.h"
17 #include "llvm/IR/Constants.h"
18 #include "llvm/IR/DIBuilder.h"
19 #include "llvm/IR/DebugInfo.h"
20 #include "llvm/IR/DiagnosticInfo.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/IR/IRBuilder.h"
23 #include "llvm/IR/Instruction.h"
24 #include "llvm/IR/InstVisitor.h"
25 #include "llvm/IR/IntrinsicInst.h"
26 #include "llvm/IR/IntrinsicsAArch64.h"
27 #include "llvm/IR/IntrinsicsARM.h"
28 #include "llvm/IR/IntrinsicsX86.h"
29 #include "llvm/IR/LLVMContext.h"
30 #include "llvm/IR/Module.h"
31 #include "llvm/IR/Verifier.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/Regex.h"
37 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
39 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
40 // changed their type from v4f32 to v2i64.
41 static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
43 // Check whether this is an old version of the function, which received
45 Type *Arg0Type = F->getFunctionType()->getParamType(0);
46 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
49 // Yes, it's old, replace it with new version.
51 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
55 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
56 // arguments have changed their type from i32 to i8.
57 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
59 // Check that the last argument is an i32.
60 Type *LastArgType = F->getFunctionType()->getParamType(
61 F->getFunctionType()->getNumParams() - 1);
62 if (!LastArgType->isIntegerTy(32))
65 // Move this function aside and map down.
67 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
71 static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
72 // All of the intrinsics matches below should be marked with which llvm
73 // version started autoupgrading them. At some point in the future we would
74 // like to use this information to remove upgrade code for some older
75 // intrinsics. It is currently undecided how we will determine that future
77 if (Name == "addcarryx.u32" || // Added in 8.0
78 Name == "addcarryx.u64" || // Added in 8.0
79 Name == "addcarry.u32" || // Added in 8.0
80 Name == "addcarry.u64" || // Added in 8.0
81 Name == "subborrow.u32" || // Added in 8.0
82 Name == "subborrow.u64" || // Added in 8.0
83 Name.startswith("sse2.padds.") || // Added in 8.0
84 Name.startswith("sse2.psubs.") || // Added in 8.0
85 Name.startswith("sse2.paddus.") || // Added in 8.0
86 Name.startswith("sse2.psubus.") || // Added in 8.0
87 Name.startswith("avx2.padds.") || // Added in 8.0
88 Name.startswith("avx2.psubs.") || // Added in 8.0
89 Name.startswith("avx2.paddus.") || // Added in 8.0
90 Name.startswith("avx2.psubus.") || // Added in 8.0
91 Name.startswith("avx512.padds.") || // Added in 8.0
92 Name.startswith("avx512.psubs.") || // Added in 8.0
93 Name.startswith("avx512.mask.padds.") || // Added in 8.0
94 Name.startswith("avx512.mask.psubs.") || // Added in 8.0
95 Name.startswith("avx512.mask.paddus.") || // Added in 8.0
96 Name.startswith("avx512.mask.psubus.") || // Added in 8.0
97 Name=="ssse3.pabs.b.128" || // Added in 6.0
98 Name=="ssse3.pabs.w.128" || // Added in 6.0
99 Name=="ssse3.pabs.d.128" || // Added in 6.0
100 Name.startswith("fma4.vfmadd.s") || // Added in 7.0
101 Name.startswith("fma.vfmadd.") || // Added in 7.0
102 Name.startswith("fma.vfmsub.") || // Added in 7.0
103 Name.startswith("fma.vfmsubadd.") || // Added in 7.0
104 Name.startswith("fma.vfnmadd.") || // Added in 7.0
105 Name.startswith("fma.vfnmsub.") || // Added in 7.0
106 Name.startswith("avx512.mask.vfmadd.") || // Added in 7.0
107 Name.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
108 Name.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
109 Name.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
110 Name.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
111 Name.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
112 Name.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
113 Name.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
114 Name.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
115 Name.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
116 Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
117 Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
118 Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
119 Name.startswith("avx512.kunpck") || //added in 6.0
120 Name.startswith("avx2.pabs.") || // Added in 6.0
121 Name.startswith("avx512.mask.pabs.") || // Added in 6.0
122 Name.startswith("avx512.broadcastm") || // Added in 6.0
123 Name == "sse.sqrt.ss" || // Added in 7.0
124 Name == "sse2.sqrt.sd" || // Added in 7.0
125 Name.startswith("avx512.mask.sqrt.p") || // Added in 7.0
126 Name.startswith("avx.sqrt.p") || // Added in 7.0
127 Name.startswith("sse2.sqrt.p") || // Added in 7.0
128 Name.startswith("sse.sqrt.p") || // Added in 7.0
129 Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0
130 Name.startswith("sse2.pcmpeq.") || // Added in 3.1
131 Name.startswith("sse2.pcmpgt.") || // Added in 3.1
132 Name.startswith("avx2.pcmpeq.") || // Added in 3.1
133 Name.startswith("avx2.pcmpgt.") || // Added in 3.1
134 Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
135 Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
136 Name.startswith("avx.vperm2f128.") || // Added in 6.0
137 Name == "avx2.vperm2i128" || // Added in 6.0
138 Name == "sse.add.ss" || // Added in 4.0
139 Name == "sse2.add.sd" || // Added in 4.0
140 Name == "sse.sub.ss" || // Added in 4.0
141 Name == "sse2.sub.sd" || // Added in 4.0
142 Name == "sse.mul.ss" || // Added in 4.0
143 Name == "sse2.mul.sd" || // Added in 4.0
144 Name == "sse.div.ss" || // Added in 4.0
145 Name == "sse2.div.sd" || // Added in 4.0
146 Name == "sse41.pmaxsb" || // Added in 3.9
147 Name == "sse2.pmaxs.w" || // Added in 3.9
148 Name == "sse41.pmaxsd" || // Added in 3.9
149 Name == "sse2.pmaxu.b" || // Added in 3.9
150 Name == "sse41.pmaxuw" || // Added in 3.9
151 Name == "sse41.pmaxud" || // Added in 3.9
152 Name == "sse41.pminsb" || // Added in 3.9
153 Name == "sse2.pmins.w" || // Added in 3.9
154 Name == "sse41.pminsd" || // Added in 3.9
155 Name == "sse2.pminu.b" || // Added in 3.9
156 Name == "sse41.pminuw" || // Added in 3.9
157 Name == "sse41.pminud" || // Added in 3.9
158 Name == "avx512.kand.w" || // Added in 7.0
159 Name == "avx512.kandn.w" || // Added in 7.0
160 Name == "avx512.knot.w" || // Added in 7.0
161 Name == "avx512.kor.w" || // Added in 7.0
162 Name == "avx512.kxor.w" || // Added in 7.0
163 Name == "avx512.kxnor.w" || // Added in 7.0
164 Name == "avx512.kortestc.w" || // Added in 7.0
165 Name == "avx512.kortestz.w" || // Added in 7.0
166 Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
167 Name.startswith("avx2.pmax") || // Added in 3.9
168 Name.startswith("avx2.pmin") || // Added in 3.9
169 Name.startswith("avx512.mask.pmax") || // Added in 4.0
170 Name.startswith("avx512.mask.pmin") || // Added in 4.0
171 Name.startswith("avx2.vbroadcast") || // Added in 3.8
172 Name.startswith("avx2.pbroadcast") || // Added in 3.8
173 Name.startswith("avx.vpermil.") || // Added in 3.1
174 Name.startswith("sse2.pshuf") || // Added in 3.9
175 Name.startswith("avx512.pbroadcast") || // Added in 3.9
176 Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
177 Name.startswith("avx512.mask.movddup") || // Added in 3.9
178 Name.startswith("avx512.mask.movshdup") || // Added in 3.9
179 Name.startswith("avx512.mask.movsldup") || // Added in 3.9
180 Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
181 Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
182 Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
183 Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
184 Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
185 Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
186 Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
187 Name.startswith("avx512.mask.punpckl") || // Added in 3.9
188 Name.startswith("avx512.mask.punpckh") || // Added in 3.9
189 Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
190 Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
191 Name.startswith("avx512.mask.pand.") || // Added in 3.9
192 Name.startswith("avx512.mask.pandn.") || // Added in 3.9
193 Name.startswith("avx512.mask.por.") || // Added in 3.9
194 Name.startswith("avx512.mask.pxor.") || // Added in 3.9
195 Name.startswith("avx512.mask.and.") || // Added in 3.9
196 Name.startswith("avx512.mask.andn.") || // Added in 3.9
197 Name.startswith("avx512.mask.or.") || // Added in 3.9
198 Name.startswith("avx512.mask.xor.") || // Added in 3.9
199 Name.startswith("avx512.mask.padd.") || // Added in 4.0
200 Name.startswith("avx512.mask.psub.") || // Added in 4.0
201 Name.startswith("avx512.mask.pmull.") || // Added in 4.0
202 Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
203 Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
204 Name.startswith("avx512.mask.cvtudq2ps.") || // Added in 7.0 updated 9.0
205 Name.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0
206 Name.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0
207 Name.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0
208 Name == "avx512.mask.vcvtph2ps.128" || // Added in 11.0
209 Name == "avx512.mask.vcvtph2ps.256" || // Added in 11.0
210 Name == "avx512.mask.cvtqq2ps.256" || // Added in 9.0
211 Name == "avx512.mask.cvtqq2ps.512" || // Added in 9.0
212 Name == "avx512.mask.cvtuqq2ps.256" || // Added in 9.0
213 Name == "avx512.mask.cvtuqq2ps.512" || // Added in 9.0
214 Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0
215 Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0
216 Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0
217 Name == "avx512.mask.cvttps2dq.128" || // Added in 7.0
218 Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0
219 Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
220 Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
221 Name == "avx512.cvtusi2sd" || // Added in 7.0
222 Name.startswith("avx512.mask.permvar.") || // Added in 7.0
223 Name == "sse2.pmulu.dq" || // Added in 7.0
224 Name == "sse41.pmuldq" || // Added in 7.0
225 Name == "avx2.pmulu.dq" || // Added in 7.0
226 Name == "avx2.pmul.dq" || // Added in 7.0
227 Name == "avx512.pmulu.dq.512" || // Added in 7.0
228 Name == "avx512.pmul.dq.512" || // Added in 7.0
229 Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
230 Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
231 Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
232 Name.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
233 Name.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
234 Name.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
235 Name.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
236 Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
237 Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
238 Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
239 Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
240 Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
241 Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
242 Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
243 Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
244 Name.startswith("avx512.mask.cmp.p") || // Added in 7.0
245 Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
246 Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
247 Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
248 Name.startswith("avx512.cvtd2mask.") || // Added in 7.0
249 Name.startswith("avx512.cvtq2mask.") || // Added in 7.0
250 Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
251 Name.startswith("avx512.mask.psll.d") || // Added in 4.0
252 Name.startswith("avx512.mask.psll.q") || // Added in 4.0
253 Name.startswith("avx512.mask.psll.w") || // Added in 4.0
254 Name.startswith("avx512.mask.psra.d") || // Added in 4.0
255 Name.startswith("avx512.mask.psra.q") || // Added in 4.0
256 Name.startswith("avx512.mask.psra.w") || // Added in 4.0
257 Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
258 Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
259 Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
260 Name.startswith("avx512.mask.pslli") || // Added in 4.0
261 Name.startswith("avx512.mask.psrai") || // Added in 4.0
262 Name.startswith("avx512.mask.psrli") || // Added in 4.0
263 Name.startswith("avx512.mask.psllv") || // Added in 4.0
264 Name.startswith("avx512.mask.psrav") || // Added in 4.0
265 Name.startswith("avx512.mask.psrlv") || // Added in 4.0
266 Name.startswith("sse41.pmovsx") || // Added in 3.8
267 Name.startswith("sse41.pmovzx") || // Added in 3.9
268 Name.startswith("avx2.pmovsx") || // Added in 3.9
269 Name.startswith("avx2.pmovzx") || // Added in 3.9
270 Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
271 Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
272 Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
273 Name.startswith("avx512.mask.pternlog.") || // Added in 7.0
274 Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0
275 Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0
276 Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
277 Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
278 Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
279 Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
280 Name.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
281 Name.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
282 Name.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
283 Name.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
284 Name.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
285 Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
286 Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
287 Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
288 Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
289 Name.startswith("avx512.mask.vpshld.") || // Added in 7.0
290 Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0
291 Name.startswith("avx512.mask.vpshldv.") || // Added in 8.0
292 Name.startswith("avx512.mask.vpshrdv.") || // Added in 8.0
293 Name.startswith("avx512.maskz.vpshldv.") || // Added in 8.0
294 Name.startswith("avx512.maskz.vpshrdv.") || // Added in 8.0
295 Name.startswith("avx512.vpshld.") || // Added in 8.0
296 Name.startswith("avx512.vpshrd.") || // Added in 8.0
297 Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
298 Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
299 Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
300 Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
301 Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
302 Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
303 Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
304 Name.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0
305 Name.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0
306 Name.startswith("avx512.mask.conflict.") || // Added in 9.0
307 Name == "avx512.mask.pmov.qd.256" || // Added in 9.0
308 Name == "avx512.mask.pmov.qd.512" || // Added in 9.0
309 Name == "avx512.mask.pmov.wb.256" || // Added in 9.0
310 Name == "avx512.mask.pmov.wb.512" || // Added in 9.0
311 Name == "sse.cvtsi2ss" || // Added in 7.0
312 Name == "sse.cvtsi642ss" || // Added in 7.0
313 Name == "sse2.cvtsi2sd" || // Added in 7.0
314 Name == "sse2.cvtsi642sd" || // Added in 7.0
315 Name == "sse2.cvtss2sd" || // Added in 7.0
316 Name == "sse2.cvtdq2pd" || // Added in 3.9
317 Name == "sse2.cvtdq2ps" || // Added in 7.0
318 Name == "sse2.cvtps2pd" || // Added in 3.9
319 Name == "avx.cvtdq2.pd.256" || // Added in 3.9
320 Name == "avx.cvtdq2.ps.256" || // Added in 7.0
321 Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
322 Name.startswith("vcvtph2ps.") || // Added in 11.0
323 Name.startswith("avx.vinsertf128.") || // Added in 3.7
324 Name == "avx2.vinserti128" || // Added in 3.7
325 Name.startswith("avx512.mask.insert") || // Added in 4.0
326 Name.startswith("avx.vextractf128.") || // Added in 3.7
327 Name == "avx2.vextracti128" || // Added in 3.7
328 Name.startswith("avx512.mask.vextract") || // Added in 4.0
329 Name.startswith("sse4a.movnt.") || // Added in 3.9
330 Name.startswith("avx.movnt.") || // Added in 3.2
331 Name.startswith("avx512.storent.") || // Added in 3.9
332 Name == "sse41.movntdqa" || // Added in 5.0
333 Name == "avx2.movntdqa" || // Added in 5.0
334 Name == "avx512.movntdqa" || // Added in 5.0
335 Name == "sse2.storel.dq" || // Added in 3.9
336 Name.startswith("sse.storeu.") || // Added in 3.9
337 Name.startswith("sse2.storeu.") || // Added in 3.9
338 Name.startswith("avx.storeu.") || // Added in 3.9
339 Name.startswith("avx512.mask.storeu.") || // Added in 3.9
340 Name.startswith("avx512.mask.store.p") || // Added in 3.9
341 Name.startswith("avx512.mask.store.b.") || // Added in 3.9
342 Name.startswith("avx512.mask.store.w.") || // Added in 3.9
343 Name.startswith("avx512.mask.store.d.") || // Added in 3.9
344 Name.startswith("avx512.mask.store.q.") || // Added in 3.9
345 Name == "avx512.mask.store.ss" || // Added in 7.0
346 Name.startswith("avx512.mask.loadu.") || // Added in 3.9
347 Name.startswith("avx512.mask.load.") || // Added in 3.9
348 Name.startswith("avx512.mask.expand.load.") || // Added in 7.0
349 Name.startswith("avx512.mask.compress.store.") || // Added in 7.0
350 Name.startswith("avx512.mask.expand.b") || // Added in 9.0
351 Name.startswith("avx512.mask.expand.w") || // Added in 9.0
352 Name.startswith("avx512.mask.expand.d") || // Added in 9.0
353 Name.startswith("avx512.mask.expand.q") || // Added in 9.0
354 Name.startswith("avx512.mask.expand.p") || // Added in 9.0
355 Name.startswith("avx512.mask.compress.b") || // Added in 9.0
356 Name.startswith("avx512.mask.compress.w") || // Added in 9.0
357 Name.startswith("avx512.mask.compress.d") || // Added in 9.0
358 Name.startswith("avx512.mask.compress.q") || // Added in 9.0
359 Name.startswith("avx512.mask.compress.p") || // Added in 9.0
360 Name == "sse42.crc32.64.8" || // Added in 3.4
361 Name.startswith("avx.vbroadcast.s") || // Added in 3.5
362 Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
363 Name.startswith("avx512.mask.palignr.") || // Added in 3.9
364 Name.startswith("avx512.mask.valign.") || // Added in 4.0
365 Name.startswith("sse2.psll.dq") || // Added in 3.7
366 Name.startswith("sse2.psrl.dq") || // Added in 3.7
367 Name.startswith("avx2.psll.dq") || // Added in 3.7
368 Name.startswith("avx2.psrl.dq") || // Added in 3.7
369 Name.startswith("avx512.psll.dq") || // Added in 3.9
370 Name.startswith("avx512.psrl.dq") || // Added in 3.9
371 Name == "sse41.pblendw" || // Added in 3.7
372 Name.startswith("sse41.blendp") || // Added in 3.7
373 Name.startswith("avx.blend.p") || // Added in 3.7
374 Name == "avx2.pblendw" || // Added in 3.7
375 Name.startswith("avx2.pblendd.") || // Added in 3.7
376 Name.startswith("avx.vbroadcastf128") || // Added in 4.0
377 Name == "avx2.vbroadcasti128" || // Added in 3.7
378 Name.startswith("avx512.mask.broadcastf32x4.") || // Added in 6.0
379 Name.startswith("avx512.mask.broadcastf64x2.") || // Added in 6.0
380 Name.startswith("avx512.mask.broadcastf32x8.") || // Added in 6.0
381 Name.startswith("avx512.mask.broadcastf64x4.") || // Added in 6.0
382 Name.startswith("avx512.mask.broadcasti32x4.") || // Added in 6.0
383 Name.startswith("avx512.mask.broadcasti64x2.") || // Added in 6.0
384 Name.startswith("avx512.mask.broadcasti32x8.") || // Added in 6.0
385 Name.startswith("avx512.mask.broadcasti64x4.") || // Added in 6.0
386 Name == "xop.vpcmov" || // Added in 3.8
387 Name == "xop.vpcmov.256" || // Added in 5.0
388 Name.startswith("avx512.mask.move.s") || // Added in 4.0
389 Name.startswith("avx512.cvtmask2") || // Added in 5.0
390 Name.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0
391 Name.startswith("xop.vprot") || // Added in 8.0
392 Name.startswith("avx512.prol") || // Added in 8.0
393 Name.startswith("avx512.pror") || // Added in 8.0
394 Name.startswith("avx512.mask.prorv.") || // Added in 8.0
395 Name.startswith("avx512.mask.pror.") || // Added in 8.0
396 Name.startswith("avx512.mask.prolv.") || // Added in 8.0
397 Name.startswith("avx512.mask.prol.") || // Added in 8.0
398 Name.startswith("avx512.ptestm") || //Added in 6.0
399 Name.startswith("avx512.ptestnm") || //Added in 6.0
400 Name.startswith("avx512.mask.pavg")) // Added in 6.0
406 static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
408 // Only handle intrinsics that start with "x86.".
409 if (!Name.startswith("x86."))
411 // Remove "x86." prefix.
412 Name = Name.substr(4);
414 if (ShouldUpgradeX86Intrinsic(F, Name)) {
419 if (Name == "rdtscp") { // Added in 8.0
420 // If this intrinsic has 0 operands, it's the new version.
421 if (F->getFunctionType()->getNumParams() == 0)
425 NewFn = Intrinsic::getDeclaration(F->getParent(),
426 Intrinsic::x86_rdtscp);
430 // SSE4.1 ptest functions may have an old signature.
431 if (Name.startswith("sse41.ptest")) { // Added in 3.2
432 if (Name.substr(11) == "c")
433 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
434 if (Name.substr(11) == "z")
435 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
436 if (Name.substr(11) == "nzc")
437 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
439 // Several blend and other instructions with masks used the wrong number of
441 if (Name == "sse41.insertps") // Added in 3.6
442 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
444 if (Name == "sse41.dppd") // Added in 3.6
445 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
447 if (Name == "sse41.dpps") // Added in 3.6
448 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
450 if (Name == "sse41.mpsadbw") // Added in 3.6
451 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
453 if (Name == "avx.dp.ps.256") // Added in 3.6
454 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
456 if (Name == "avx2.mpsadbw") // Added in 3.6
457 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
460 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
461 if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
463 NewFn = Intrinsic::getDeclaration(F->getParent(),
464 Intrinsic::x86_xop_vfrcz_ss);
467 if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
469 NewFn = Intrinsic::getDeclaration(F->getParent(),
470 Intrinsic::x86_xop_vfrcz_sd);
473 // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
474 if (Name.startswith("xop.vpermil2")) { // Added in 3.9
475 auto Idx = F->getFunctionType()->getParamType(2);
476 if (Idx->isFPOrFPVectorTy()) {
478 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
479 unsigned EltSize = Idx->getScalarSizeInBits();
480 Intrinsic::ID Permil2ID;
481 if (EltSize == 64 && IdxSize == 128)
482 Permil2ID = Intrinsic::x86_xop_vpermil2pd;
483 else if (EltSize == 32 && IdxSize == 128)
484 Permil2ID = Intrinsic::x86_xop_vpermil2ps;
485 else if (EltSize == 64 && IdxSize == 256)
486 Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
488 Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
489 NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
494 if (Name == "seh.recoverfp") {
495 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
502 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
503 assert(F && "Illegal to upgrade a non-existent Function.");
505 // Quickly eliminate it, if it's not a candidate.
506 StringRef Name = F->getName();
507 if (Name.size() <= 8 || !Name.startswith("llvm."))
509 Name = Name.substr(5); // Strip off "llvm."
514 if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
515 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
516 F->arg_begin()->getType());
519 if (Name.startswith("arm.neon.vclz")) {
521 F->arg_begin()->getType(),
522 Type::getInt1Ty(F->getContext())
524 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
525 // the end of the name. Change name from llvm.arm.neon.vclz.* to
527 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
528 NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
529 "llvm.ctlz." + Name.substr(14), F->getParent());
532 if (Name.startswith("arm.neon.vcnt")) {
533 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
534 F->arg_begin()->getType());
537 static const Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
538 if (vldRegex.match(Name)) {
539 auto fArgs = F->getFunctionType()->params();
540 SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
541 // Can't use Intrinsic::getDeclaration here as the return types might
542 // then only be structurally equal.
543 FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
544 NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
545 "llvm." + Name + ".p0i8", F->getParent());
548 static const Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
549 if (vstRegex.match(Name)) {
550 static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
551 Intrinsic::arm_neon_vst2,
552 Intrinsic::arm_neon_vst3,
553 Intrinsic::arm_neon_vst4};
555 static const Intrinsic::ID StoreLaneInts[] = {
556 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
557 Intrinsic::arm_neon_vst4lane
560 auto fArgs = F->getFunctionType()->params();
561 Type *Tys[] = {fArgs[0], fArgs[1]};
562 if (Name.find("lane") == StringRef::npos)
563 NewFn = Intrinsic::getDeclaration(F->getParent(),
564 StoreInts[fArgs.size() - 3], Tys);
566 NewFn = Intrinsic::getDeclaration(F->getParent(),
567 StoreLaneInts[fArgs.size() - 5], Tys);
570 if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
571 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
574 if (Name.startswith("arm.neon.vqadds.")) {
575 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::sadd_sat,
576 F->arg_begin()->getType());
579 if (Name.startswith("arm.neon.vqaddu.")) {
580 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::uadd_sat,
581 F->arg_begin()->getType());
584 if (Name.startswith("arm.neon.vqsubs.")) {
585 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ssub_sat,
586 F->arg_begin()->getType());
589 if (Name.startswith("arm.neon.vqsubu.")) {
590 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::usub_sat,
591 F->arg_begin()->getType());
594 if (Name.startswith("aarch64.neon.addp")) {
595 if (F->arg_size() != 2)
596 break; // Invalid IR.
597 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
598 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
599 NewFn = Intrinsic::getDeclaration(F->getParent(),
600 Intrinsic::aarch64_neon_faddp, Ty);
608 if (Name.startswith("ctlz.") && F->arg_size() == 1) {
610 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
611 F->arg_begin()->getType());
614 if (Name.startswith("cttz.") && F->arg_size() == 1) {
616 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
617 F->arg_begin()->getType());
623 if (Name == "dbg.value" && F->arg_size() == 4) {
625 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
631 SmallVector<StringRef, 2> Groups;
632 static const Regex R("^experimental.vector.reduce.([a-z]+)\\.[fi][0-9]+");
633 if (R.match(Name, &Groups)) {
634 Intrinsic::ID ID = Intrinsic::not_intrinsic;
635 if (Groups[1] == "fadd")
636 ID = Intrinsic::experimental_vector_reduce_v2_fadd;
637 if (Groups[1] == "fmul")
638 ID = Intrinsic::experimental_vector_reduce_v2_fmul;
640 if (ID != Intrinsic::not_intrinsic) {
642 auto Args = F->getFunctionType()->params();
643 Type *Tys[] = {F->getFunctionType()->getReturnType(), Args[1]};
644 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
652 bool IsLifetimeStart = Name.startswith("lifetime.start");
653 if (IsLifetimeStart || Name.startswith("invariant.start")) {
654 Intrinsic::ID ID = IsLifetimeStart ?
655 Intrinsic::lifetime_start : Intrinsic::invariant_start;
656 auto Args = F->getFunctionType()->params();
657 Type* ObjectPtr[1] = {Args[1]};
658 if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
660 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
665 bool IsLifetimeEnd = Name.startswith("lifetime.end");
666 if (IsLifetimeEnd || Name.startswith("invariant.end")) {
667 Intrinsic::ID ID = IsLifetimeEnd ?
668 Intrinsic::lifetime_end : Intrinsic::invariant_end;
670 auto Args = F->getFunctionType()->params();
671 Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
672 if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
674 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
678 if (Name.startswith("invariant.group.barrier")) {
679 // Rename invariant.group.barrier to launder.invariant.group
680 auto Args = F->getFunctionType()->params();
681 Type* ObjectPtr[1] = {Args[0]};
683 NewFn = Intrinsic::getDeclaration(F->getParent(),
684 Intrinsic::launder_invariant_group, ObjectPtr);
692 if (Name.startswith("masked.load.")) {
693 Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
694 if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) {
696 NewFn = Intrinsic::getDeclaration(F->getParent(),
697 Intrinsic::masked_load,
702 if (Name.startswith("masked.store.")) {
703 auto Args = F->getFunctionType()->params();
704 Type *Tys[] = { Args[0], Args[1] };
705 if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) {
707 NewFn = Intrinsic::getDeclaration(F->getParent(),
708 Intrinsic::masked_store,
713 // Renaming gather/scatter intrinsics with no address space overloading
714 // to the new overload which includes an address space
715 if (Name.startswith("masked.gather.")) {
716 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
717 if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) {
719 NewFn = Intrinsic::getDeclaration(F->getParent(),
720 Intrinsic::masked_gather, Tys);
724 if (Name.startswith("masked.scatter.")) {
725 auto Args = F->getFunctionType()->params();
726 Type *Tys[] = {Args[0], Args[1]};
727 if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) {
729 NewFn = Intrinsic::getDeclaration(F->getParent(),
730 Intrinsic::masked_scatter, Tys);
734 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
735 // alignment parameter to embedding the alignment as an attribute of
737 if (Name.startswith("memcpy.") && F->arg_size() == 5) {
739 // Get the types of dest, src, and len
740 ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
741 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy,
745 if (Name.startswith("memmove.") && F->arg_size() == 5) {
747 // Get the types of dest, src, and len
748 ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
749 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove,
753 if (Name.startswith("memset.") && F->arg_size() == 5) {
755 // Get the types of dest, and len
756 const auto *FT = F->getFunctionType();
757 Type *ParamTypes[2] = {
758 FT->getParamType(0), // Dest
759 FT->getParamType(2) // len
761 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
768 if (Name.startswith("nvvm.")) {
769 Name = Name.substr(5);
771 // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
772 Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name)
773 .Cases("brev32", "brev64", Intrinsic::bitreverse)
774 .Case("clz.i", Intrinsic::ctlz)
775 .Case("popc.i", Intrinsic::ctpop)
776 .Default(Intrinsic::not_intrinsic);
777 if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
778 NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
779 {F->getReturnType()});
783 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
784 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
786 // TODO: We could add lohi.i2d.
787 bool Expand = StringSwitch<bool>(Name)
788 .Cases("abs.i", "abs.ll", true)
789 .Cases("clz.ll", "popc.ll", "h2f", true)
790 .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
791 .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
792 .StartsWith("atomic.load.add.f32.p", true)
793 .StartsWith("atomic.load.add.f64.p", true)
803 // We only need to change the name to match the mangling including the
805 if (Name.startswith("objectsize.")) {
806 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
807 if (F->arg_size() == 2 || F->arg_size() == 3 ||
808 F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
810 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
818 if (Name == "prefetch") {
819 // Handle address space overloading.
820 Type *Tys[] = {F->arg_begin()->getType()};
821 if (F->getName() != Intrinsic::getName(Intrinsic::prefetch, Tys)) {
824 Intrinsic::getDeclaration(F->getParent(), Intrinsic::prefetch, Tys);
831 if (Name == "stackprotectorcheck") {
838 if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
841 // Remangle our intrinsic since we upgrade the mangling
842 auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
843 if (Result != None) {
844 NewFn = Result.getValue();
848 // This may not belong here. This function is effectively being overloaded
849 // to both detect an intrinsic which needs upgrading, and to provide the
850 // upgraded form of the intrinsic. We should perhaps have two separate
851 // functions for this.
855 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
857 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
858 assert(F != NewFn && "Intrinsic function upgraded to the same function");
860 // Upgrade intrinsic attributes. This does not change the function.
863 if (Intrinsic::ID id = F->getIntrinsicID())
864 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
868 GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
869 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
870 GV->getName() == "llvm.global_dtors")) ||
871 !GV->hasInitializer())
873 ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
876 StructType *STy = dyn_cast<StructType>(ATy->getElementType());
877 if (!STy || STy->getNumElements() != 2)
880 LLVMContext &C = GV->getContext();
882 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
884 Constant *Init = GV->getInitializer();
885 unsigned N = Init->getNumOperands();
886 std::vector<Constant *> NewCtors(N);
887 for (unsigned i = 0; i != N; ++i) {
888 auto Ctor = cast<Constant>(Init->getOperand(i));
889 NewCtors[i] = ConstantStruct::get(
890 EltTy, Ctor->getAggregateElement(0u), Ctor->getAggregateElement(1),
891 Constant::getNullValue(IRB.getInt8PtrTy()));
893 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
895 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
896 NewInit, GV->getName());
899 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
901 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
902 Value *Op, unsigned Shift) {
903 auto *ResultTy = cast<VectorType>(Op->getType());
904 unsigned NumElts = ResultTy->getNumElements() * 8;
906 // Bitcast from a 64-bit element type to a byte element type.
907 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
908 Op = Builder.CreateBitCast(Op, VecTy, "cast");
910 // We'll be shuffling in zeroes.
911 Value *Res = Constant::getNullValue(VecTy);
913 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
914 // we'll just return the zero vector.
917 // 256/512-bit version is split into 2/4 16-byte lanes.
918 for (unsigned l = 0; l != NumElts; l += 16)
919 for (unsigned i = 0; i != 16; ++i) {
920 unsigned Idx = NumElts + i - Shift;
922 Idx -= NumElts - 16; // end of lane, switch operand.
923 Idxs[l + i] = Idx + l;
926 Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
929 // Bitcast back to a 64-bit element type.
930 return Builder.CreateBitCast(Res, ResultTy, "cast");
933 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
935 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
937 auto *ResultTy = cast<VectorType>(Op->getType());
938 unsigned NumElts = ResultTy->getNumElements() * 8;
940 // Bitcast from a 64-bit element type to a byte element type.
941 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
942 Op = Builder.CreateBitCast(Op, VecTy, "cast");
944 // We'll be shuffling in zeroes.
945 Value *Res = Constant::getNullValue(VecTy);
947 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
948 // we'll just return the zero vector.
951 // 256/512-bit version is split into 2/4 16-byte lanes.
952 for (unsigned l = 0; l != NumElts; l += 16)
953 for (unsigned i = 0; i != 16; ++i) {
954 unsigned Idx = i + Shift;
956 Idx += NumElts - 16; // end of lane, switch operand.
957 Idxs[l + i] = Idx + l;
960 Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
963 // Bitcast back to a 64-bit element type.
964 return Builder.CreateBitCast(Res, ResultTy, "cast");
967 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
969 llvm::VectorType *MaskTy = FixedVectorType::get(
970 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
971 Mask = Builder.CreateBitCast(Mask, MaskTy);
973 // If we have less than 8 elements, then the starting mask was an i8 and
974 // we need to extract down to the right number of elements.
977 for (unsigned i = 0; i != NumElts; ++i)
979 Mask = Builder.CreateShuffleVector(Mask, Mask,
980 makeArrayRef(Indices, NumElts),
987 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
988 Value *Op0, Value *Op1) {
989 // If the mask is all ones just emit the first operation.
990 if (const auto *C = dyn_cast<Constant>(Mask))
991 if (C->isAllOnesValue())
994 Mask = getX86MaskVec(Builder, Mask,
995 cast<VectorType>(Op0->getType())->getNumElements());
996 return Builder.CreateSelect(Mask, Op0, Op1);
999 static Value *EmitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask,
1000 Value *Op0, Value *Op1) {
1001 // If the mask is all ones just emit the first operation.
1002 if (const auto *C = dyn_cast<Constant>(Mask))
1003 if (C->isAllOnesValue())
1006 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1007 Mask->getType()->getIntegerBitWidth());
1008 Mask = Builder.CreateBitCast(Mask, MaskTy);
1009 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1010 return Builder.CreateSelect(Mask, Op0, Op1);
1013 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1014 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1015 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1016 static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
1017 Value *Op1, Value *Shift,
1018 Value *Passthru, Value *Mask,
1020 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1022 unsigned NumElts = cast<VectorType>(Op0->getType())->getNumElements();
1023 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1024 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1025 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1027 // Mask the immediate for VALIGN.
1029 ShiftVal &= (NumElts - 1);
1031 // If palignr is shifting the pair of vectors more than the size of two
1032 // lanes, emit zero.
1034 return llvm::Constant::getNullValue(Op0->getType());
1036 // If palignr is shifting the pair of input vectors more than one lane,
1037 // but less than two lanes, convert to shifting in zeroes.
1038 if (ShiftVal > 16) {
1041 Op0 = llvm::Constant::getNullValue(Op0->getType());
1045 // 256-bit palignr operates on 128-bit lanes so we need to handle that
1046 for (unsigned l = 0; l < NumElts; l += 16) {
1047 for (unsigned i = 0; i != 16; ++i) {
1048 unsigned Idx = ShiftVal + i;
1049 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1050 Idx += NumElts - 16; // End of lane, switch operand.
1051 Indices[l + i] = Idx + l;
1055 Value *Align = Builder.CreateShuffleVector(Op1, Op0,
1056 makeArrayRef(Indices, NumElts),
1059 return EmitX86Select(Builder, Mask, Align, Passthru);
1062 static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallInst &CI,
1063 bool ZeroMask, bool IndexForm) {
1064 Type *Ty = CI.getType();
1065 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1066 unsigned EltWidth = Ty->getScalarSizeInBits();
1067 bool IsFloat = Ty->isFPOrFPVectorTy();
1069 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1070 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1071 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1072 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1073 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1074 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1075 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1076 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1077 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1078 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1079 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1080 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1081 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1082 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1083 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1084 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1085 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1086 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1087 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1088 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1089 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1090 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1091 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1092 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1093 else if (VecWidth == 128 && EltWidth == 16)
1094 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1095 else if (VecWidth == 256 && EltWidth == 16)
1096 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1097 else if (VecWidth == 512 && EltWidth == 16)
1098 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1099 else if (VecWidth == 128 && EltWidth == 8)
1100 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1101 else if (VecWidth == 256 && EltWidth == 8)
1102 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1103 else if (VecWidth == 512 && EltWidth == 8)
1104 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1106 llvm_unreachable("Unexpected intrinsic");
1108 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1109 CI.getArgOperand(2) };
1111 // If this isn't index form we need to swap operand 0 and 1.
1113 std::swap(Args[0], Args[1]);
1115 Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1117 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1118 : Builder.CreateBitCast(CI.getArgOperand(1),
1120 return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1123 static Value *UpgradeX86AddSubSatIntrinsics(IRBuilder<> &Builder, CallInst &CI,
1124 bool IsSigned, bool IsAddition) {
1125 Type *Ty = CI.getType();
1126 Value *Op0 = CI.getOperand(0);
1127 Value *Op1 = CI.getOperand(1);
1130 IsSigned ? (IsAddition ? Intrinsic::sadd_sat : Intrinsic::ssub_sat)
1131 : (IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat);
1132 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1133 Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
1135 if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
1136 Value *VecSrc = CI.getOperand(2);
1137 Value *Mask = CI.getOperand(3);
1138 Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1143 static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallInst &CI,
1144 bool IsRotateRight) {
1145 Type *Ty = CI.getType();
1146 Value *Src = CI.getArgOperand(0);
1147 Value *Amt = CI.getArgOperand(1);
1149 // Amount may be scalar immediate, in which case create a splat vector.
1150 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1151 // we only care about the lowest log2 bits anyway.
1152 if (Amt->getType() != Ty) {
1153 unsigned NumElts = cast<VectorType>(Ty)->getNumElements();
1154 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1155 Amt = Builder.CreateVectorSplat(NumElts, Amt);
1158 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1159 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1160 Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
1162 if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
1163 Value *VecSrc = CI.getOperand(2);
1164 Value *Mask = CI.getOperand(3);
1165 Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1170 static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallInst &CI, unsigned Imm,
1172 Type *Ty = CI.getType();
1173 Value *LHS = CI.getArgOperand(0);
1174 Value *RHS = CI.getArgOperand(1);
1176 CmpInst::Predicate Pred;
1179 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1182 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1185 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1188 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1191 Pred = ICmpInst::ICMP_EQ;
1194 Pred = ICmpInst::ICMP_NE;
1197 return Constant::getNullValue(Ty); // FALSE
1199 return Constant::getAllOnesValue(Ty); // TRUE
1201 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1204 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1205 Value *Ext = Builder.CreateSExt(Cmp, Ty);
1209 static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallInst &CI,
1210 bool IsShiftRight, bool ZeroMask) {
1211 Type *Ty = CI.getType();
1212 Value *Op0 = CI.getArgOperand(0);
1213 Value *Op1 = CI.getArgOperand(1);
1214 Value *Amt = CI.getArgOperand(2);
1217 std::swap(Op0, Op1);
1219 // Amount may be scalar immediate, in which case create a splat vector.
1220 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1221 // we only care about the lowest log2 bits anyway.
1222 if (Amt->getType() != Ty) {
1223 unsigned NumElts = cast<VectorType>(Ty)->getNumElements();
1224 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1225 Amt = Builder.CreateVectorSplat(NumElts, Amt);
1228 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1229 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1230 Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
1232 unsigned NumArgs = CI.getNumArgOperands();
1233 if (NumArgs >= 4) { // For masked intrinsics.
1234 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1235 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
1236 CI.getArgOperand(0);
1237 Value *Mask = CI.getOperand(NumArgs - 1);
1238 Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1243 static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
1244 Value *Ptr, Value *Data, Value *Mask,
1246 // Cast the pointer to the right type.
1247 Ptr = Builder.CreateBitCast(Ptr,
1248 llvm::PointerType::getUnqual(Data->getType()));
1249 const Align Alignment =
1251 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedSize() / 8)
1254 // If the mask is all ones just emit a regular store.
1255 if (const auto *C = dyn_cast<Constant>(Mask))
1256 if (C->isAllOnesValue())
1257 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
1259 // Convert the mask from an integer type to a vector of i1.
1260 unsigned NumElts = cast<VectorType>(Data->getType())->getNumElements();
1261 Mask = getX86MaskVec(Builder, Mask, NumElts);
1262 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
1265 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
1266 Value *Ptr, Value *Passthru, Value *Mask,
1268 Type *ValTy = Passthru->getType();
1269 // Cast the pointer to the right type.
1270 Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(ValTy));
1271 const Align Alignment =
1273 ? Align(Passthru->getType()->getPrimitiveSizeInBits().getFixedSize() /
1277 // If the mask is all ones just emit a regular store.
1278 if (const auto *C = dyn_cast<Constant>(Mask))
1279 if (C->isAllOnesValue())
1280 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
1282 // Convert the mask from an integer type to a vector of i1.
1283 unsigned NumElts = cast<VectorType>(Passthru->getType())->getNumElements();
1284 Mask = getX86MaskVec(Builder, Mask, NumElts);
1285 return Builder.CreateMaskedLoad(Ptr, Alignment, Mask, Passthru);
1288 static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) {
1289 Value *Op0 = CI.getArgOperand(0);
1290 llvm::Type *Ty = Op0->getType();
1291 Value *Zero = llvm::Constant::getNullValue(Ty);
1292 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Op0, Zero);
1293 Value *Neg = Builder.CreateNeg(Op0);
1294 Value *Res = Builder.CreateSelect(Cmp, Op0, Neg);
1296 if (CI.getNumArgOperands() == 3)
1297 Res = EmitX86Select(Builder,CI.getArgOperand(2), Res, CI.getArgOperand(1));
1302 static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
1303 ICmpInst::Predicate Pred) {
1304 Value *Op0 = CI.getArgOperand(0);
1305 Value *Op1 = CI.getArgOperand(1);
1306 Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1);
1307 Value *Res = Builder.CreateSelect(Cmp, Op0, Op1);
1309 if (CI.getNumArgOperands() == 4)
1310 Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1315 static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) {
1316 Type *Ty = CI.getType();
1318 // Arguments have a vXi32 type so cast to vXi64.
1319 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1320 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1323 // Shift left then arithmetic shift right.
1324 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1325 LHS = Builder.CreateShl(LHS, ShiftAmt);
1326 LHS = Builder.CreateAShr(LHS, ShiftAmt);
1327 RHS = Builder.CreateShl(RHS, ShiftAmt);
1328 RHS = Builder.CreateAShr(RHS, ShiftAmt);
1330 // Clear the upper bits.
1331 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1332 LHS = Builder.CreateAnd(LHS, Mask);
1333 RHS = Builder.CreateAnd(RHS, Mask);
1336 Value *Res = Builder.CreateMul(LHS, RHS);
1338 if (CI.getNumArgOperands() == 4)
1339 Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1344 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1345 static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
1347 unsigned NumElts = cast<VectorType>(Vec->getType())->getNumElements();
1349 const auto *C = dyn_cast<Constant>(Mask);
1350 if (!C || !C->isAllOnesValue())
1351 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1356 for (unsigned i = 0; i != NumElts; ++i)
1358 for (unsigned i = NumElts; i != 8; ++i)
1359 Indices[i] = NumElts + i % NumElts;
1360 Vec = Builder.CreateShuffleVector(Vec,
1361 Constant::getNullValue(Vec->getType()),
1364 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1367 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
1368 unsigned CC, bool Signed) {
1369 Value *Op0 = CI.getArgOperand(0);
1370 unsigned NumElts = cast<VectorType>(Op0->getType())->getNumElements();
1374 Cmp = Constant::getNullValue(
1375 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1376 } else if (CC == 7) {
1377 Cmp = Constant::getAllOnesValue(
1378 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1380 ICmpInst::Predicate Pred;
1382 default: llvm_unreachable("Unknown condition code");
1383 case 0: Pred = ICmpInst::ICMP_EQ; break;
1384 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
1385 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
1386 case 4: Pred = ICmpInst::ICMP_NE; break;
1387 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
1388 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
1390 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
1393 Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1);
1395 return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
1398 // Replace a masked intrinsic with an older unmasked intrinsic.
1399 static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI,
1400 Intrinsic::ID IID) {
1401 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
1402 Value *Rep = Builder.CreateCall(Intrin,
1403 { CI.getArgOperand(0), CI.getArgOperand(1) });
1404 return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
1407 static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
1408 Value* A = CI.getArgOperand(0);
1409 Value* B = CI.getArgOperand(1);
1410 Value* Src = CI.getArgOperand(2);
1411 Value* Mask = CI.getArgOperand(3);
1413 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
1414 Value* Cmp = Builder.CreateIsNotNull(AndNode);
1415 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
1416 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
1417 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
1418 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
1422 static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) {
1423 Value* Op = CI.getArgOperand(0);
1424 Type* ReturnOp = CI.getType();
1425 unsigned NumElts = cast<VectorType>(CI.getType())->getNumElements();
1426 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
1427 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
1430 // Replace intrinsic with unmasked version and a select.
1431 static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
1432 CallInst &CI, Value *&Rep) {
1433 Name = Name.substr(12); // Remove avx512.mask.
1435 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
1436 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
1438 if (Name.startswith("max.p")) {
1439 if (VecWidth == 128 && EltWidth == 32)
1440 IID = Intrinsic::x86_sse_max_ps;
1441 else if (VecWidth == 128 && EltWidth == 64)
1442 IID = Intrinsic::x86_sse2_max_pd;
1443 else if (VecWidth == 256 && EltWidth == 32)
1444 IID = Intrinsic::x86_avx_max_ps_256;
1445 else if (VecWidth == 256 && EltWidth == 64)
1446 IID = Intrinsic::x86_avx_max_pd_256;
1448 llvm_unreachable("Unexpected intrinsic");
1449 } else if (Name.startswith("min.p")) {
1450 if (VecWidth == 128 && EltWidth == 32)
1451 IID = Intrinsic::x86_sse_min_ps;
1452 else if (VecWidth == 128 && EltWidth == 64)
1453 IID = Intrinsic::x86_sse2_min_pd;
1454 else if (VecWidth == 256 && EltWidth == 32)
1455 IID = Intrinsic::x86_avx_min_ps_256;
1456 else if (VecWidth == 256 && EltWidth == 64)
1457 IID = Intrinsic::x86_avx_min_pd_256;
1459 llvm_unreachable("Unexpected intrinsic");
1460 } else if (Name.startswith("pshuf.b.")) {
1461 if (VecWidth == 128)
1462 IID = Intrinsic::x86_ssse3_pshuf_b_128;
1463 else if (VecWidth == 256)
1464 IID = Intrinsic::x86_avx2_pshuf_b;
1465 else if (VecWidth == 512)
1466 IID = Intrinsic::x86_avx512_pshuf_b_512;
1468 llvm_unreachable("Unexpected intrinsic");
1469 } else if (Name.startswith("pmul.hr.sw.")) {
1470 if (VecWidth == 128)
1471 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
1472 else if (VecWidth == 256)
1473 IID = Intrinsic::x86_avx2_pmul_hr_sw;
1474 else if (VecWidth == 512)
1475 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
1477 llvm_unreachable("Unexpected intrinsic");
1478 } else if (Name.startswith("pmulh.w.")) {
1479 if (VecWidth == 128)
1480 IID = Intrinsic::x86_sse2_pmulh_w;
1481 else if (VecWidth == 256)
1482 IID = Intrinsic::x86_avx2_pmulh_w;
1483 else if (VecWidth == 512)
1484 IID = Intrinsic::x86_avx512_pmulh_w_512;
1486 llvm_unreachable("Unexpected intrinsic");
1487 } else if (Name.startswith("pmulhu.w.")) {
1488 if (VecWidth == 128)
1489 IID = Intrinsic::x86_sse2_pmulhu_w;
1490 else if (VecWidth == 256)
1491 IID = Intrinsic::x86_avx2_pmulhu_w;
1492 else if (VecWidth == 512)
1493 IID = Intrinsic::x86_avx512_pmulhu_w_512;
1495 llvm_unreachable("Unexpected intrinsic");
1496 } else if (Name.startswith("pmaddw.d.")) {
1497 if (VecWidth == 128)
1498 IID = Intrinsic::x86_sse2_pmadd_wd;
1499 else if (VecWidth == 256)
1500 IID = Intrinsic::x86_avx2_pmadd_wd;
1501 else if (VecWidth == 512)
1502 IID = Intrinsic::x86_avx512_pmaddw_d_512;
1504 llvm_unreachable("Unexpected intrinsic");
1505 } else if (Name.startswith("pmaddubs.w.")) {
1506 if (VecWidth == 128)
1507 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
1508 else if (VecWidth == 256)
1509 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
1510 else if (VecWidth == 512)
1511 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
1513 llvm_unreachable("Unexpected intrinsic");
1514 } else if (Name.startswith("packsswb.")) {
1515 if (VecWidth == 128)
1516 IID = Intrinsic::x86_sse2_packsswb_128;
1517 else if (VecWidth == 256)
1518 IID = Intrinsic::x86_avx2_packsswb;
1519 else if (VecWidth == 512)
1520 IID = Intrinsic::x86_avx512_packsswb_512;
1522 llvm_unreachable("Unexpected intrinsic");
1523 } else if (Name.startswith("packssdw.")) {
1524 if (VecWidth == 128)
1525 IID = Intrinsic::x86_sse2_packssdw_128;
1526 else if (VecWidth == 256)
1527 IID = Intrinsic::x86_avx2_packssdw;
1528 else if (VecWidth == 512)
1529 IID = Intrinsic::x86_avx512_packssdw_512;
1531 llvm_unreachable("Unexpected intrinsic");
1532 } else if (Name.startswith("packuswb.")) {
1533 if (VecWidth == 128)
1534 IID = Intrinsic::x86_sse2_packuswb_128;
1535 else if (VecWidth == 256)
1536 IID = Intrinsic::x86_avx2_packuswb;
1537 else if (VecWidth == 512)
1538 IID = Intrinsic::x86_avx512_packuswb_512;
1540 llvm_unreachable("Unexpected intrinsic");
1541 } else if (Name.startswith("packusdw.")) {
1542 if (VecWidth == 128)
1543 IID = Intrinsic::x86_sse41_packusdw;
1544 else if (VecWidth == 256)
1545 IID = Intrinsic::x86_avx2_packusdw;
1546 else if (VecWidth == 512)
1547 IID = Intrinsic::x86_avx512_packusdw_512;
1549 llvm_unreachable("Unexpected intrinsic");
1550 } else if (Name.startswith("vpermilvar.")) {
1551 if (VecWidth == 128 && EltWidth == 32)
1552 IID = Intrinsic::x86_avx_vpermilvar_ps;
1553 else if (VecWidth == 128 && EltWidth == 64)
1554 IID = Intrinsic::x86_avx_vpermilvar_pd;
1555 else if (VecWidth == 256 && EltWidth == 32)
1556 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
1557 else if (VecWidth == 256 && EltWidth == 64)
1558 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
1559 else if (VecWidth == 512 && EltWidth == 32)
1560 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
1561 else if (VecWidth == 512 && EltWidth == 64)
1562 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
1564 llvm_unreachable("Unexpected intrinsic");
1565 } else if (Name == "cvtpd2dq.256") {
1566 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
1567 } else if (Name == "cvtpd2ps.256") {
1568 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
1569 } else if (Name == "cvttpd2dq.256") {
1570 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
1571 } else if (Name == "cvttps2dq.128") {
1572 IID = Intrinsic::x86_sse2_cvttps2dq;
1573 } else if (Name == "cvttps2dq.256") {
1574 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
1575 } else if (Name.startswith("permvar.")) {
1576 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
1577 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1578 IID = Intrinsic::x86_avx2_permps;
1579 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1580 IID = Intrinsic::x86_avx2_permd;
1581 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1582 IID = Intrinsic::x86_avx512_permvar_df_256;
1583 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1584 IID = Intrinsic::x86_avx512_permvar_di_256;
1585 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1586 IID = Intrinsic::x86_avx512_permvar_sf_512;
1587 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1588 IID = Intrinsic::x86_avx512_permvar_si_512;
1589 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1590 IID = Intrinsic::x86_avx512_permvar_df_512;
1591 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1592 IID = Intrinsic::x86_avx512_permvar_di_512;
1593 else if (VecWidth == 128 && EltWidth == 16)
1594 IID = Intrinsic::x86_avx512_permvar_hi_128;
1595 else if (VecWidth == 256 && EltWidth == 16)
1596 IID = Intrinsic::x86_avx512_permvar_hi_256;
1597 else if (VecWidth == 512 && EltWidth == 16)
1598 IID = Intrinsic::x86_avx512_permvar_hi_512;
1599 else if (VecWidth == 128 && EltWidth == 8)
1600 IID = Intrinsic::x86_avx512_permvar_qi_128;
1601 else if (VecWidth == 256 && EltWidth == 8)
1602 IID = Intrinsic::x86_avx512_permvar_qi_256;
1603 else if (VecWidth == 512 && EltWidth == 8)
1604 IID = Intrinsic::x86_avx512_permvar_qi_512;
1606 llvm_unreachable("Unexpected intrinsic");
1607 } else if (Name.startswith("dbpsadbw.")) {
1608 if (VecWidth == 128)
1609 IID = Intrinsic::x86_avx512_dbpsadbw_128;
1610 else if (VecWidth == 256)
1611 IID = Intrinsic::x86_avx512_dbpsadbw_256;
1612 else if (VecWidth == 512)
1613 IID = Intrinsic::x86_avx512_dbpsadbw_512;
1615 llvm_unreachable("Unexpected intrinsic");
1616 } else if (Name.startswith("pmultishift.qb.")) {
1617 if (VecWidth == 128)
1618 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
1619 else if (VecWidth == 256)
1620 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
1621 else if (VecWidth == 512)
1622 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
1624 llvm_unreachable("Unexpected intrinsic");
1625 } else if (Name.startswith("conflict.")) {
1626 if (Name[9] == 'd' && VecWidth == 128)
1627 IID = Intrinsic::x86_avx512_conflict_d_128;
1628 else if (Name[9] == 'd' && VecWidth == 256)
1629 IID = Intrinsic::x86_avx512_conflict_d_256;
1630 else if (Name[9] == 'd' && VecWidth == 512)
1631 IID = Intrinsic::x86_avx512_conflict_d_512;
1632 else if (Name[9] == 'q' && VecWidth == 128)
1633 IID = Intrinsic::x86_avx512_conflict_q_128;
1634 else if (Name[9] == 'q' && VecWidth == 256)
1635 IID = Intrinsic::x86_avx512_conflict_q_256;
1636 else if (Name[9] == 'q' && VecWidth == 512)
1637 IID = Intrinsic::x86_avx512_conflict_q_512;
1639 llvm_unreachable("Unexpected intrinsic");
1640 } else if (Name.startswith("pavg.")) {
1641 if (Name[5] == 'b' && VecWidth == 128)
1642 IID = Intrinsic::x86_sse2_pavg_b;
1643 else if (Name[5] == 'b' && VecWidth == 256)
1644 IID = Intrinsic::x86_avx2_pavg_b;
1645 else if (Name[5] == 'b' && VecWidth == 512)
1646 IID = Intrinsic::x86_avx512_pavg_b_512;
1647 else if (Name[5] == 'w' && VecWidth == 128)
1648 IID = Intrinsic::x86_sse2_pavg_w;
1649 else if (Name[5] == 'w' && VecWidth == 256)
1650 IID = Intrinsic::x86_avx2_pavg_w;
1651 else if (Name[5] == 'w' && VecWidth == 512)
1652 IID = Intrinsic::x86_avx512_pavg_w_512;
1654 llvm_unreachable("Unexpected intrinsic");
1658 SmallVector<Value *, 4> Args(CI.arg_operands().begin(),
1659 CI.arg_operands().end());
1662 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1664 unsigned NumArgs = CI.getNumArgOperands();
1665 Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
1666 CI.getArgOperand(NumArgs - 2));
1670 /// Upgrade comment in call to inline asm that represents an objc retain release
1672 void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
1674 if (AsmStr->find("mov\tfp") == 0 &&
1675 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
1676 (Pos = AsmStr->find("# marker")) != std::string::npos) {
1677 AsmStr->replace(Pos, 1, ";");
1682 /// Upgrade a call to an old intrinsic. All argument and return casting must be
1683 /// provided to seamlessly integrate with existing context.
1684 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
1685 Function *F = CI->getCalledFunction();
1686 LLVMContext &C = CI->getContext();
1687 IRBuilder<> Builder(C);
1688 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
1690 assert(F && "Intrinsic call is not direct?");
1693 // Get the Function's name.
1694 StringRef Name = F->getName();
1696 assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
1697 Name = Name.substr(5);
1699 bool IsX86 = Name.startswith("x86.");
1701 Name = Name.substr(4);
1702 bool IsNVVM = Name.startswith("nvvm.");
1704 Name = Name.substr(5);
1706 if (IsX86 && Name.startswith("sse4a.movnt.")) {
1707 Module *M = F->getParent();
1708 SmallVector<Metadata *, 1> Elts;
1710 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1711 MDNode *Node = MDNode::get(C, Elts);
1713 Value *Arg0 = CI->getArgOperand(0);
1714 Value *Arg1 = CI->getArgOperand(1);
1716 // Nontemporal (unaligned) store of the 0'th element of the float/double
1718 Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
1719 PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
1720 Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
1722 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
1724 StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, Align(1));
1725 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1727 // Remove intrinsic.
1728 CI->eraseFromParent();
1732 if (IsX86 && (Name.startswith("avx.movnt.") ||
1733 Name.startswith("avx512.storent."))) {
1734 Module *M = F->getParent();
1735 SmallVector<Metadata *, 1> Elts;
1737 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1738 MDNode *Node = MDNode::get(C, Elts);
1740 Value *Arg0 = CI->getArgOperand(0);
1741 Value *Arg1 = CI->getArgOperand(1);
1743 // Convert the type of the pointer to a pointer to the stored type.
1744 Value *BC = Builder.CreateBitCast(Arg0,
1745 PointerType::getUnqual(Arg1->getType()),
1747 StoreInst *SI = Builder.CreateAlignedStore(
1749 Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedSize() / 8));
1750 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1752 // Remove intrinsic.
1753 CI->eraseFromParent();
1757 if (IsX86 && Name == "sse2.storel.dq") {
1758 Value *Arg0 = CI->getArgOperand(0);
1759 Value *Arg1 = CI->getArgOperand(1);
1761 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
1762 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
1763 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
1764 Value *BC = Builder.CreateBitCast(Arg0,
1765 PointerType::getUnqual(Elt->getType()),
1767 Builder.CreateAlignedStore(Elt, BC, Align(1));
1769 // Remove intrinsic.
1770 CI->eraseFromParent();
1774 if (IsX86 && (Name.startswith("sse.storeu.") ||
1775 Name.startswith("sse2.storeu.") ||
1776 Name.startswith("avx.storeu."))) {
1777 Value *Arg0 = CI->getArgOperand(0);
1778 Value *Arg1 = CI->getArgOperand(1);
1780 Arg0 = Builder.CreateBitCast(Arg0,
1781 PointerType::getUnqual(Arg1->getType()),
1783 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
1785 // Remove intrinsic.
1786 CI->eraseFromParent();
1790 if (IsX86 && Name == "avx512.mask.store.ss") {
1791 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
1792 UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
1795 // Remove intrinsic.
1796 CI->eraseFromParent();
1800 if (IsX86 && (Name.startswith("avx512.mask.store"))) {
1801 // "avx512.mask.storeu." or "avx512.mask.store."
1802 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
1803 UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
1804 CI->getArgOperand(2), Aligned);
1806 // Remove intrinsic.
1807 CI->eraseFromParent();
1812 // Upgrade packed integer vector compare intrinsics to compare instructions.
1813 if (IsX86 && (Name.startswith("sse2.pcmp") ||
1814 Name.startswith("avx2.pcmp"))) {
1815 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
1816 bool CmpEq = Name[9] == 'e';
1817 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
1818 CI->getArgOperand(0), CI->getArgOperand(1));
1819 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
1820 } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) {
1821 Type *ExtTy = Type::getInt32Ty(C);
1822 if (CI->getOperand(0)->getType()->isIntegerTy(8))
1823 ExtTy = Type::getInt64Ty(C);
1824 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
1825 ExtTy->getPrimitiveSizeInBits();
1826 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
1827 Rep = Builder.CreateVectorSplat(NumElts, Rep);
1828 } else if (IsX86 && (Name == "sse.sqrt.ss" ||
1829 Name == "sse2.sqrt.sd")) {
1830 Value *Vec = CI->getArgOperand(0);
1831 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
1832 Function *Intr = Intrinsic::getDeclaration(F->getParent(),
1833 Intrinsic::sqrt, Elt0->getType());
1834 Elt0 = Builder.CreateCall(Intr, Elt0);
1835 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
1836 } else if (IsX86 && (Name.startswith("avx.sqrt.p") ||
1837 Name.startswith("sse2.sqrt.p") ||
1838 Name.startswith("sse.sqrt.p"))) {
1839 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1842 {CI->getArgOperand(0)});
1843 } else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) {
1844 if (CI->getNumArgOperands() == 4 &&
1845 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
1846 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
1847 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
1848 : Intrinsic::x86_avx512_sqrt_pd_512;
1850 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) };
1851 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
1854 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1857 {CI->getArgOperand(0)});
1859 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1860 CI->getArgOperand(1));
1861 } else if (IsX86 && (Name.startswith("avx512.ptestm") ||
1862 Name.startswith("avx512.ptestnm"))) {
1863 Value *Op0 = CI->getArgOperand(0);
1864 Value *Op1 = CI->getArgOperand(1);
1865 Value *Mask = CI->getArgOperand(2);
1866 Rep = Builder.CreateAnd(Op0, Op1);
1867 llvm::Type *Ty = Op0->getType();
1868 Value *Zero = llvm::Constant::getNullValue(Ty);
1869 ICmpInst::Predicate Pred =
1870 Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1871 Rep = Builder.CreateICmp(Pred, Rep, Zero);
1872 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
1873 } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
1875 cast<VectorType>(CI->getArgOperand(1)->getType())->getNumElements();
1876 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
1877 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1878 CI->getArgOperand(1));
1879 } else if (IsX86 && (Name.startswith("avx512.kunpck"))) {
1880 unsigned NumElts = CI->getType()->getScalarSizeInBits();
1881 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
1882 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
1884 for (unsigned i = 0; i != NumElts; ++i)
1887 // First extract half of each vector. This gives better codegen than
1888 // doing it in a single shuffle.
1889 LHS = Builder.CreateShuffleVector(LHS, LHS,
1890 makeArrayRef(Indices, NumElts / 2));
1891 RHS = Builder.CreateShuffleVector(RHS, RHS,
1892 makeArrayRef(Indices, NumElts / 2));
1893 // Concat the vectors.
1894 // NOTE: Operands have to be swapped to match intrinsic definition.
1895 Rep = Builder.CreateShuffleVector(RHS, LHS,
1896 makeArrayRef(Indices, NumElts));
1897 Rep = Builder.CreateBitCast(Rep, CI->getType());
1898 } else if (IsX86 && Name == "avx512.kand.w") {
1899 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1900 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1901 Rep = Builder.CreateAnd(LHS, RHS);
1902 Rep = Builder.CreateBitCast(Rep, CI->getType());
1903 } else if (IsX86 && Name == "avx512.kandn.w") {
1904 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1905 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1906 LHS = Builder.CreateNot(LHS);
1907 Rep = Builder.CreateAnd(LHS, RHS);
1908 Rep = Builder.CreateBitCast(Rep, CI->getType());
1909 } else if (IsX86 && Name == "avx512.kor.w") {
1910 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1911 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1912 Rep = Builder.CreateOr(LHS, RHS);
1913 Rep = Builder.CreateBitCast(Rep, CI->getType());
1914 } else if (IsX86 && Name == "avx512.kxor.w") {
1915 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1916 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1917 Rep = Builder.CreateXor(LHS, RHS);
1918 Rep = Builder.CreateBitCast(Rep, CI->getType());
1919 } else if (IsX86 && Name == "avx512.kxnor.w") {
1920 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1921 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1922 LHS = Builder.CreateNot(LHS);
1923 Rep = Builder.CreateXor(LHS, RHS);
1924 Rep = Builder.CreateBitCast(Rep, CI->getType());
1925 } else if (IsX86 && Name == "avx512.knot.w") {
1926 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1927 Rep = Builder.CreateNot(Rep);
1928 Rep = Builder.CreateBitCast(Rep, CI->getType());
1930 (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
1931 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1932 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1933 Rep = Builder.CreateOr(LHS, RHS);
1934 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
1936 if (Name[14] == 'c')
1937 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
1939 C = ConstantInt::getNullValue(Builder.getInt16Ty());
1940 Rep = Builder.CreateICmpEQ(Rep, C);
1941 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
1942 } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
1943 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
1944 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
1945 Name == "sse.div.ss" || Name == "sse2.div.sd")) {
1946 Type *I32Ty = Type::getInt32Ty(C);
1947 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
1948 ConstantInt::get(I32Ty, 0));
1949 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
1950 ConstantInt::get(I32Ty, 0));
1952 if (Name.contains(".add."))
1953 EltOp = Builder.CreateFAdd(Elt0, Elt1);
1954 else if (Name.contains(".sub."))
1955 EltOp = Builder.CreateFSub(Elt0, Elt1);
1956 else if (Name.contains(".mul."))
1957 EltOp = Builder.CreateFMul(Elt0, Elt1);
1959 EltOp = Builder.CreateFDiv(Elt0, Elt1);
1960 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
1961 ConstantInt::get(I32Ty, 0));
1962 } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
1963 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
1964 bool CmpEq = Name[16] == 'e';
1965 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
1966 } else if (IsX86 && Name.startswith("avx512.mask.vpshufbitqmb.")) {
1967 Type *OpTy = CI->getArgOperand(0)->getType();
1968 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1971 default: llvm_unreachable("Unexpected intrinsic");
1972 case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break;
1973 case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break;
1974 case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break;
1977 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1978 { CI->getOperand(0), CI->getArgOperand(1) });
1979 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
1980 } else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) {
1981 Type *OpTy = CI->getArgOperand(0)->getType();
1982 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1983 unsigned EltWidth = OpTy->getScalarSizeInBits();
1985 if (VecWidth == 128 && EltWidth == 32)
1986 IID = Intrinsic::x86_avx512_fpclass_ps_128;
1987 else if (VecWidth == 256 && EltWidth == 32)
1988 IID = Intrinsic::x86_avx512_fpclass_ps_256;
1989 else if (VecWidth == 512 && EltWidth == 32)
1990 IID = Intrinsic::x86_avx512_fpclass_ps_512;
1991 else if (VecWidth == 128 && EltWidth == 64)
1992 IID = Intrinsic::x86_avx512_fpclass_pd_128;
1993 else if (VecWidth == 256 && EltWidth == 64)
1994 IID = Intrinsic::x86_avx512_fpclass_pd_256;
1995 else if (VecWidth == 512 && EltWidth == 64)
1996 IID = Intrinsic::x86_avx512_fpclass_pd_512;
1998 llvm_unreachable("Unexpected intrinsic");
2000 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2001 { CI->getOperand(0), CI->getArgOperand(1) });
2002 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2003 } else if (IsX86 && Name.startswith("avx512.mask.cmp.p")) {
2004 Type *OpTy = CI->getArgOperand(0)->getType();
2005 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2006 unsigned EltWidth = OpTy->getScalarSizeInBits();
2008 if (VecWidth == 128 && EltWidth == 32)
2009 IID = Intrinsic::x86_avx512_cmp_ps_128;
2010 else if (VecWidth == 256 && EltWidth == 32)
2011 IID = Intrinsic::x86_avx512_cmp_ps_256;
2012 else if (VecWidth == 512 && EltWidth == 32)
2013 IID = Intrinsic::x86_avx512_cmp_ps_512;
2014 else if (VecWidth == 128 && EltWidth == 64)
2015 IID = Intrinsic::x86_avx512_cmp_pd_128;
2016 else if (VecWidth == 256 && EltWidth == 64)
2017 IID = Intrinsic::x86_avx512_cmp_pd_256;
2018 else if (VecWidth == 512 && EltWidth == 64)
2019 IID = Intrinsic::x86_avx512_cmp_pd_512;
2021 llvm_unreachable("Unexpected intrinsic");
2023 SmallVector<Value *, 4> Args;
2024 Args.push_back(CI->getArgOperand(0));
2025 Args.push_back(CI->getArgOperand(1));
2026 Args.push_back(CI->getArgOperand(2));
2027 if (CI->getNumArgOperands() == 5)
2028 Args.push_back(CI->getArgOperand(4));
2030 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2032 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(3));
2033 } else if (IsX86 && Name.startswith("avx512.mask.cmp.") &&
2035 // Integer compare intrinsics.
2036 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2037 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2038 } else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) {
2039 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2040 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2041 } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") ||
2042 Name.startswith("avx512.cvtw2mask.") ||
2043 Name.startswith("avx512.cvtd2mask.") ||
2044 Name.startswith("avx512.cvtq2mask."))) {
2045 Value *Op = CI->getArgOperand(0);
2046 Value *Zero = llvm::Constant::getNullValue(Op->getType());
2047 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2048 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2049 } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
2050 Name == "ssse3.pabs.w.128" ||
2051 Name == "ssse3.pabs.d.128" ||
2052 Name.startswith("avx2.pabs") ||
2053 Name.startswith("avx512.mask.pabs"))) {
2054 Rep = upgradeAbs(Builder, *CI);
2055 } else if (IsX86 && (Name == "sse41.pmaxsb" ||
2056 Name == "sse2.pmaxs.w" ||
2057 Name == "sse41.pmaxsd" ||
2058 Name.startswith("avx2.pmaxs") ||
2059 Name.startswith("avx512.mask.pmaxs"))) {
2060 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
2061 } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
2062 Name == "sse41.pmaxuw" ||
2063 Name == "sse41.pmaxud" ||
2064 Name.startswith("avx2.pmaxu") ||
2065 Name.startswith("avx512.mask.pmaxu"))) {
2066 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
2067 } else if (IsX86 && (Name == "sse41.pminsb" ||
2068 Name == "sse2.pmins.w" ||
2069 Name == "sse41.pminsd" ||
2070 Name.startswith("avx2.pmins") ||
2071 Name.startswith("avx512.mask.pmins"))) {
2072 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
2073 } else if (IsX86 && (Name == "sse2.pminu.b" ||
2074 Name == "sse41.pminuw" ||
2075 Name == "sse41.pminud" ||
2076 Name.startswith("avx2.pminu") ||
2077 Name.startswith("avx512.mask.pminu"))) {
2078 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
2079 } else if (IsX86 && (Name == "sse2.pmulu.dq" ||
2080 Name == "avx2.pmulu.dq" ||
2081 Name == "avx512.pmulu.dq.512" ||
2082 Name.startswith("avx512.mask.pmulu.dq."))) {
2083 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
2084 } else if (IsX86 && (Name == "sse41.pmuldq" ||
2085 Name == "avx2.pmul.dq" ||
2086 Name == "avx512.pmul.dq.512" ||
2087 Name.startswith("avx512.mask.pmul.dq."))) {
2088 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
2089 } else if (IsX86 && (Name == "sse.cvtsi2ss" ||
2090 Name == "sse2.cvtsi2sd" ||
2091 Name == "sse.cvtsi642ss" ||
2092 Name == "sse2.cvtsi642sd")) {
2093 Rep = Builder.CreateSIToFP(
2094 CI->getArgOperand(1),
2095 cast<VectorType>(CI->getType())->getElementType());
2096 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2097 } else if (IsX86 && Name == "avx512.cvtusi2sd") {
2098 Rep = Builder.CreateUIToFP(
2099 CI->getArgOperand(1),
2100 cast<VectorType>(CI->getType())->getElementType());
2101 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2102 } else if (IsX86 && Name == "sse2.cvtss2sd") {
2103 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2104 Rep = Builder.CreateFPExt(
2105 Rep, cast<VectorType>(CI->getType())->getElementType());
2106 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2107 } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
2108 Name == "sse2.cvtdq2ps" ||
2109 Name == "avx.cvtdq2.pd.256" ||
2110 Name == "avx.cvtdq2.ps.256" ||
2111 Name.startswith("avx512.mask.cvtdq2pd.") ||
2112 Name.startswith("avx512.mask.cvtudq2pd.") ||
2113 Name.startswith("avx512.mask.cvtdq2ps.") ||
2114 Name.startswith("avx512.mask.cvtudq2ps.") ||
2115 Name.startswith("avx512.mask.cvtqq2pd.") ||
2116 Name.startswith("avx512.mask.cvtuqq2pd.") ||
2117 Name == "avx512.mask.cvtqq2ps.256" ||
2118 Name == "avx512.mask.cvtqq2ps.512" ||
2119 Name == "avx512.mask.cvtuqq2ps.256" ||
2120 Name == "avx512.mask.cvtuqq2ps.512" ||
2121 Name == "sse2.cvtps2pd" ||
2122 Name == "avx.cvt.ps2.pd.256" ||
2123 Name == "avx512.mask.cvtps2pd.128" ||
2124 Name == "avx512.mask.cvtps2pd.256")) {
2125 auto *DstTy = cast<VectorType>(CI->getType());
2126 Rep = CI->getArgOperand(0);
2127 auto *SrcTy = cast<VectorType>(Rep->getType());
2129 unsigned NumDstElts = DstTy->getNumElements();
2130 if (NumDstElts < SrcTy->getNumElements()) {
2131 assert(NumDstElts == 2 && "Unexpected vector size");
2132 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
2135 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2136 bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
2138 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2139 else if (CI->getNumArgOperands() == 4 &&
2140 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2141 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2142 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2143 : Intrinsic::x86_avx512_sitofp_round;
2144 Function *F = Intrinsic::getDeclaration(CI->getModule(), IID,
2146 Rep = Builder.CreateCall(F, { Rep, CI->getArgOperand(3) });
2148 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2149 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2152 if (CI->getNumArgOperands() >= 3)
2153 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2154 CI->getArgOperand(1));
2155 } else if (IsX86 && (Name.startswith("avx512.mask.vcvtph2ps.") ||
2156 Name.startswith("vcvtph2ps."))) {
2157 auto *DstTy = cast<VectorType>(CI->getType());
2158 Rep = CI->getArgOperand(0);
2159 auto *SrcTy = cast<VectorType>(Rep->getType());
2160 unsigned NumDstElts = DstTy->getNumElements();
2161 if (NumDstElts != SrcTy->getNumElements()) {
2162 assert(NumDstElts == 4 && "Unexpected vector size");
2163 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
2165 Rep = Builder.CreateBitCast(
2166 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
2167 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
2168 if (CI->getNumArgOperands() >= 3)
2169 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2170 CI->getArgOperand(1));
2171 } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
2172 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
2173 CI->getArgOperand(1), CI->getArgOperand(2),
2175 } else if (IsX86 && (Name.startswith("avx512.mask.load."))) {
2176 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
2177 CI->getArgOperand(1),CI->getArgOperand(2),
2179 } else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) {
2180 auto *ResultTy = cast<VectorType>(CI->getType());
2181 Type *PtrTy = ResultTy->getElementType();
2183 // Cast the pointer to element type.
2184 Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2185 llvm::PointerType::getUnqual(PtrTy));
2187 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2188 ResultTy->getNumElements());
2190 Function *ELd = Intrinsic::getDeclaration(F->getParent(),
2191 Intrinsic::masked_expandload,
2193 Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
2194 } else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) {
2195 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
2196 Type *PtrTy = ResultTy->getElementType();
2198 // Cast the pointer to element type.
2199 Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2200 llvm::PointerType::getUnqual(PtrTy));
2202 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2203 ResultTy->getNumElements());
2205 Function *CSt = Intrinsic::getDeclaration(F->getParent(),
2206 Intrinsic::masked_compressstore,
2208 Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
2209 } else if (IsX86 && (Name.startswith("avx512.mask.compress.") ||
2210 Name.startswith("avx512.mask.expand."))) {
2211 auto *ResultTy = cast<VectorType>(CI->getType());
2213 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2214 ResultTy->getNumElements());
2216 bool IsCompress = Name[12] == 'c';
2217 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2218 : Intrinsic::x86_avx512_mask_expand;
2219 Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
2220 Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1),
2222 } else if (IsX86 && Name.startswith("xop.vpcom")) {
2224 if (Name.endswith("ub") || Name.endswith("uw") || Name.endswith("ud") ||
2225 Name.endswith("uq"))
2227 else if (Name.endswith("b") || Name.endswith("w") || Name.endswith("d") ||
2231 llvm_unreachable("Unknown suffix");
2234 if (CI->getNumArgOperands() == 3) {
2235 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2237 Name = Name.substr(9); // strip off "xop.vpcom"
2238 if (Name.startswith("lt"))
2240 else if (Name.startswith("le"))
2242 else if (Name.startswith("gt"))
2244 else if (Name.startswith("ge"))
2246 else if (Name.startswith("eq"))
2248 else if (Name.startswith("ne"))
2250 else if (Name.startswith("false"))
2252 else if (Name.startswith("true"))
2255 llvm_unreachable("Unknown condition");
2258 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
2259 } else if (IsX86 && Name.startswith("xop.vpcmov")) {
2260 Value *Sel = CI->getArgOperand(2);
2261 Value *NotSel = Builder.CreateNot(Sel);
2262 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2263 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2264 Rep = Builder.CreateOr(Sel0, Sel1);
2265 } else if (IsX86 && (Name.startswith("xop.vprot") ||
2266 Name.startswith("avx512.prol") ||
2267 Name.startswith("avx512.mask.prol"))) {
2268 Rep = upgradeX86Rotate(Builder, *CI, false);
2269 } else if (IsX86 && (Name.startswith("avx512.pror") ||
2270 Name.startswith("avx512.mask.pror"))) {
2271 Rep = upgradeX86Rotate(Builder, *CI, true);
2272 } else if (IsX86 && (Name.startswith("avx512.vpshld.") ||
2273 Name.startswith("avx512.mask.vpshld") ||
2274 Name.startswith("avx512.maskz.vpshld"))) {
2275 bool ZeroMask = Name[11] == 'z';
2276 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
2277 } else if (IsX86 && (Name.startswith("avx512.vpshrd.") ||
2278 Name.startswith("avx512.mask.vpshrd") ||
2279 Name.startswith("avx512.maskz.vpshrd"))) {
2280 bool ZeroMask = Name[11] == 'z';
2281 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
2282 } else if (IsX86 && Name == "sse42.crc32.64.8") {
2283 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
2284 Intrinsic::x86_sse42_crc32_32_8);
2285 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
2286 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
2287 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
2288 } else if (IsX86 && (Name.startswith("avx.vbroadcast.s") ||
2289 Name.startswith("avx512.vbroadcast.s"))) {
2290 // Replace broadcasts with a series of insertelements.
2291 auto *VecTy = cast<VectorType>(CI->getType());
2292 Type *EltTy = VecTy->getElementType();
2293 unsigned EltNum = VecTy->getNumElements();
2294 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
2295 EltTy->getPointerTo());
2296 Value *Load = Builder.CreateLoad(EltTy, Cast);
2297 Type *I32Ty = Type::getInt32Ty(C);
2298 Rep = UndefValue::get(VecTy);
2299 for (unsigned I = 0; I < EltNum; ++I)
2300 Rep = Builder.CreateInsertElement(Rep, Load,
2301 ConstantInt::get(I32Ty, I));
2302 } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
2303 Name.startswith("sse41.pmovzx") ||
2304 Name.startswith("avx2.pmovsx") ||
2305 Name.startswith("avx2.pmovzx") ||
2306 Name.startswith("avx512.mask.pmovsx") ||
2307 Name.startswith("avx512.mask.pmovzx"))) {
2308 VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
2309 VectorType *DstTy = cast<VectorType>(CI->getType());
2310 unsigned NumDstElts = DstTy->getNumElements();
2312 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2313 SmallVector<int, 8> ShuffleMask(NumDstElts);
2314 for (unsigned i = 0; i != NumDstElts; ++i)
2317 Value *SV = Builder.CreateShuffleVector(
2318 CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
2320 bool DoSext = (StringRef::npos != Name.find("pmovsx"));
2321 Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
2322 : Builder.CreateZExt(SV, DstTy);
2323 // If there are 3 arguments, it's a masked intrinsic so we need a select.
2324 if (CI->getNumArgOperands() == 3)
2325 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2326 CI->getArgOperand(1));
2327 } else if (Name == "avx512.mask.pmov.qd.256" ||
2328 Name == "avx512.mask.pmov.qd.512" ||
2329 Name == "avx512.mask.pmov.wb.256" ||
2330 Name == "avx512.mask.pmov.wb.512") {
2331 Type *Ty = CI->getArgOperand(1)->getType();
2332 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
2333 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2334 CI->getArgOperand(1));
2335 } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
2336 Name == "avx2.vbroadcasti128")) {
2337 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2338 Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
2339 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2340 auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
2341 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
2342 PointerType::getUnqual(VT));
2343 Value *Load = Builder.CreateAlignedLoad(VT, Op, Align(1));
2344 if (NumSrcElts == 2)
2345 Rep = Builder.CreateShuffleVector(
2346 Load, UndefValue::get(Load->getType()), ArrayRef<int>{0, 1, 0, 1});
2349 Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
2350 ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
2351 } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") ||
2352 Name.startswith("avx512.mask.shuf.f"))) {
2353 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2354 Type *VT = CI->getType();
2355 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2356 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2357 unsigned ControlBitsMask = NumLanes - 1;
2358 unsigned NumControlBits = NumLanes / 2;
2359 SmallVector<int, 8> ShuffleMask(0);
2361 for (unsigned l = 0; l != NumLanes; ++l) {
2362 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
2363 // We actually need the other source.
2364 if (l >= NumLanes / 2)
2365 LaneMask += NumLanes;
2366 for (unsigned i = 0; i != NumElementsInLane; ++i)
2367 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
2369 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2370 CI->getArgOperand(1), ShuffleMask);
2371 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2372 CI->getArgOperand(3));
2373 }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") ||
2374 Name.startswith("avx512.mask.broadcasti"))) {
2375 unsigned NumSrcElts =
2376 cast<VectorType>(CI->getArgOperand(0)->getType())->getNumElements();
2377 unsigned NumDstElts = cast<VectorType>(CI->getType())->getNumElements();
2379 SmallVector<int, 8> ShuffleMask(NumDstElts);
2380 for (unsigned i = 0; i != NumDstElts; ++i)
2381 ShuffleMask[i] = i % NumSrcElts;
2383 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2384 CI->getArgOperand(0),
2386 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2387 CI->getArgOperand(1));
2388 } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
2389 Name.startswith("avx2.vbroadcast") ||
2390 Name.startswith("avx512.pbroadcast") ||
2391 Name.startswith("avx512.mask.broadcast.s"))) {
2392 // Replace vp?broadcasts with a vector shuffle.
2393 Value *Op = CI->getArgOperand(0);
2394 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
2395 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
2396 Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
2397 Constant::getNullValue(MaskTy));
2399 if (CI->getNumArgOperands() == 3)
2400 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2401 CI->getArgOperand(1));
2402 } else if (IsX86 && (Name.startswith("sse2.padds.") ||
2403 Name.startswith("sse2.psubs.") ||
2404 Name.startswith("avx2.padds.") ||
2405 Name.startswith("avx2.psubs.") ||
2406 Name.startswith("avx512.padds.") ||
2407 Name.startswith("avx512.psubs.") ||
2408 Name.startswith("avx512.mask.padds.") ||
2409 Name.startswith("avx512.mask.psubs."))) {
2410 bool IsAdd = Name.contains(".padds");
2411 Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, true, IsAdd);
2412 } else if (IsX86 && (Name.startswith("sse2.paddus.") ||
2413 Name.startswith("sse2.psubus.") ||
2414 Name.startswith("avx2.paddus.") ||
2415 Name.startswith("avx2.psubus.") ||
2416 Name.startswith("avx512.mask.paddus.") ||
2417 Name.startswith("avx512.mask.psubus."))) {
2418 bool IsAdd = Name.contains(".paddus");
2419 Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, false, IsAdd);
2420 } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
2421 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2422 CI->getArgOperand(1),
2423 CI->getArgOperand(2),
2424 CI->getArgOperand(3),
2425 CI->getArgOperand(4),
2427 } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
2428 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2429 CI->getArgOperand(1),
2430 CI->getArgOperand(2),
2431 CI->getArgOperand(3),
2432 CI->getArgOperand(4),
2434 } else if (IsX86 && (Name == "sse2.psll.dq" ||
2435 Name == "avx2.psll.dq")) {
2436 // 128/256-bit shift left specified in bits.
2437 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2438 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
2439 Shift / 8); // Shift is in bits.
2440 } else if (IsX86 && (Name == "sse2.psrl.dq" ||
2441 Name == "avx2.psrl.dq")) {
2442 // 128/256-bit shift right specified in bits.
2443 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2444 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
2445 Shift / 8); // Shift is in bits.
2446 } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
2447 Name == "avx2.psll.dq.bs" ||
2448 Name == "avx512.psll.dq.512")) {
2449 // 128/256/512-bit shift left specified in bytes.
2450 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2451 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2452 } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
2453 Name == "avx2.psrl.dq.bs" ||
2454 Name == "avx512.psrl.dq.512")) {
2455 // 128/256/512-bit shift right specified in bytes.
2456 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2457 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2458 } else if (IsX86 && (Name == "sse41.pblendw" ||
2459 Name.startswith("sse41.blendp") ||
2460 Name.startswith("avx.blend.p") ||
2461 Name == "avx2.pblendw" ||
2462 Name.startswith("avx2.pblendd."))) {
2463 Value *Op0 = CI->getArgOperand(0);
2464 Value *Op1 = CI->getArgOperand(1);
2465 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2466 VectorType *VecTy = cast<VectorType>(CI->getType());
2467 unsigned NumElts = VecTy->getNumElements();
2469 SmallVector<int, 16> Idxs(NumElts);
2470 for (unsigned i = 0; i != NumElts; ++i)
2471 Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
2473 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2474 } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
2475 Name == "avx2.vinserti128" ||
2476 Name.startswith("avx512.mask.insert"))) {
2477 Value *Op0 = CI->getArgOperand(0);
2478 Value *Op1 = CI->getArgOperand(1);
2479 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2480 unsigned DstNumElts = cast<VectorType>(CI->getType())->getNumElements();
2481 unsigned SrcNumElts = cast<VectorType>(Op1->getType())->getNumElements();
2482 unsigned Scale = DstNumElts / SrcNumElts;
2484 // Mask off the high bits of the immediate value; hardware ignores those.
2487 // Extend the second operand into a vector the size of the destination.
2488 Value *UndefV = UndefValue::get(Op1->getType());
2489 SmallVector<int, 8> Idxs(DstNumElts);
2490 for (unsigned i = 0; i != SrcNumElts; ++i)
2492 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
2493 Idxs[i] = SrcNumElts;
2494 Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);
2496 // Insert the second operand into the first operand.
2498 // Note that there is no guarantee that instruction lowering will actually
2499 // produce a vinsertf128 instruction for the created shuffles. In
2500 // particular, the 0 immediate case involves no lane changes, so it can
2501 // be handled as a blend.
2503 // Example of shuffle mask for 32-bit elements:
2504 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
2505 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
2507 // First fill with identify mask.
2508 for (unsigned i = 0; i != DstNumElts; ++i)
2510 // Then replace the elements where we need to insert.
2511 for (unsigned i = 0; i != SrcNumElts; ++i)
2512 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
2513 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
2515 // If the intrinsic has a mask operand, handle that.
2516 if (CI->getNumArgOperands() == 5)
2517 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2518 CI->getArgOperand(3));
2519 } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
2520 Name == "avx2.vextracti128" ||
2521 Name.startswith("avx512.mask.vextract"))) {
2522 Value *Op0 = CI->getArgOperand(0);
2523 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2524 unsigned DstNumElts = cast<VectorType>(CI->getType())->getNumElements();
2525 unsigned SrcNumElts = cast<VectorType>(Op0->getType())->getNumElements();
2526 unsigned Scale = SrcNumElts / DstNumElts;
2528 // Mask off the high bits of the immediate value; hardware ignores those.
2531 // Get indexes for the subvector of the input vector.
2532 SmallVector<int, 8> Idxs(DstNumElts);
2533 for (unsigned i = 0; i != DstNumElts; ++i) {
2534 Idxs[i] = i + (Imm * DstNumElts);
2536 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2538 // If the intrinsic has a mask operand, handle that.
2539 if (CI->getNumArgOperands() == 4)
2540 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2541 CI->getArgOperand(2));
2542 } else if (!IsX86 && Name == "stackprotectorcheck") {
2544 } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
2545 Name.startswith("avx512.mask.perm.di."))) {
2546 Value *Op0 = CI->getArgOperand(0);
2547 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2548 VectorType *VecTy = cast<VectorType>(CI->getType());
2549 unsigned NumElts = VecTy->getNumElements();
2551 SmallVector<int, 8> Idxs(NumElts);
2552 for (unsigned i = 0; i != NumElts; ++i)
2553 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
2555 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2557 if (CI->getNumArgOperands() == 4)
2558 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2559 CI->getArgOperand(2));
2560 } else if (IsX86 && (Name.startswith("avx.vperm2f128.") ||
2561 Name == "avx2.vperm2i128")) {
2562 // The immediate permute control byte looks like this:
2563 // [1:0] - select 128 bits from sources for low half of destination
2565 // [3] - zero low half of destination
2566 // [5:4] - select 128 bits from sources for high half of destination
2568 // [7] - zero high half of destination
2570 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2572 unsigned NumElts = cast<VectorType>(CI->getType())->getNumElements();
2573 unsigned HalfSize = NumElts / 2;
2574 SmallVector<int, 8> ShuffleMask(NumElts);
2576 // Determine which operand(s) are actually in use for this instruction.
2577 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2578 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2580 // If needed, replace operands based on zero mask.
2581 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
2582 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
2584 // Permute low half of result.
2585 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
2586 for (unsigned i = 0; i < HalfSize; ++i)
2587 ShuffleMask[i] = StartIndex + i;
2589 // Permute high half of result.
2590 StartIndex = (Imm & 0x10) ? HalfSize : 0;
2591 for (unsigned i = 0; i < HalfSize; ++i)
2592 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
2594 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2596 } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
2597 Name == "sse2.pshuf.d" ||
2598 Name.startswith("avx512.mask.vpermil.p") ||
2599 Name.startswith("avx512.mask.pshuf.d."))) {
2600 Value *Op0 = CI->getArgOperand(0);
2601 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2602 VectorType *VecTy = cast<VectorType>(CI->getType());
2603 unsigned NumElts = VecTy->getNumElements();
2604 // Calculate the size of each index in the immediate.
2605 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
2606 unsigned IdxMask = ((1 << IdxSize) - 1);
2608 SmallVector<int, 8> Idxs(NumElts);
2609 // Lookup the bits for this element, wrapping around the immediate every
2610 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
2611 // to offset by the first index of each group.
2612 for (unsigned i = 0; i != NumElts; ++i)
2613 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
2615 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2617 if (CI->getNumArgOperands() == 4)
2618 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2619 CI->getArgOperand(2));
2620 } else if (IsX86 && (Name == "sse2.pshufl.w" ||
2621 Name.startswith("avx512.mask.pshufl.w."))) {
2622 Value *Op0 = CI->getArgOperand(0);
2623 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2624 unsigned NumElts = cast<VectorType>(CI->getType())->getNumElements();
2626 SmallVector<int, 16> Idxs(NumElts);
2627 for (unsigned l = 0; l != NumElts; l += 8) {
2628 for (unsigned i = 0; i != 4; ++i)
2629 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
2630 for (unsigned i = 4; i != 8; ++i)
2631 Idxs[i + l] = i + l;
2634 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2636 if (CI->getNumArgOperands() == 4)
2637 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2638 CI->getArgOperand(2));
2639 } else if (IsX86 && (Name == "sse2.pshufh.w" ||
2640 Name.startswith("avx512.mask.pshufh.w."))) {
2641 Value *Op0 = CI->getArgOperand(0);
2642 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2643 unsigned NumElts = cast<VectorType>(CI->getType())->getNumElements();
2645 SmallVector<int, 16> Idxs(NumElts);
2646 for (unsigned l = 0; l != NumElts; l += 8) {
2647 for (unsigned i = 0; i != 4; ++i)
2648 Idxs[i + l] = i + l;
2649 for (unsigned i = 0; i != 4; ++i)
2650 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
2653 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2655 if (CI->getNumArgOperands() == 4)
2656 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2657 CI->getArgOperand(2));
2658 } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
2659 Value *Op0 = CI->getArgOperand(0);
2660 Value *Op1 = CI->getArgOperand(1);
2661 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2662 unsigned NumElts = cast<VectorType>(CI->getType())->getNumElements();
2664 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2665 unsigned HalfLaneElts = NumLaneElts / 2;
2667 SmallVector<int, 16> Idxs(NumElts);
2668 for (unsigned i = 0; i != NumElts; ++i) {
2669 // Base index is the starting element of the lane.
2670 Idxs[i] = i - (i % NumLaneElts);
2671 // If we are half way through the lane switch to the other source.
2672 if ((i % NumLaneElts) >= HalfLaneElts)
2674 // Now select the specific element. By adding HalfLaneElts bits from
2675 // the immediate. Wrapping around the immediate every 8-bits.
2676 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
2679 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2681 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2682 CI->getArgOperand(3));
2683 } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
2684 Name.startswith("avx512.mask.movshdup") ||
2685 Name.startswith("avx512.mask.movsldup"))) {
2686 Value *Op0 = CI->getArgOperand(0);
2687 unsigned NumElts = cast<VectorType>(CI->getType())->getNumElements();
2688 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2690 unsigned Offset = 0;
2691 if (Name.startswith("avx512.mask.movshdup."))
2694 SmallVector<int, 16> Idxs(NumElts);
2695 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
2696 for (unsigned i = 0; i != NumLaneElts; i += 2) {
2697 Idxs[i + l + 0] = i + l + Offset;
2698 Idxs[i + l + 1] = i + l + Offset;
2701 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2703 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2704 CI->getArgOperand(1));
2705 } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
2706 Name.startswith("avx512.mask.unpckl."))) {
2707 Value *Op0 = CI->getArgOperand(0);
2708 Value *Op1 = CI->getArgOperand(1);
2709 int NumElts = cast<VectorType>(CI->getType())->getNumElements();
2710 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2712 SmallVector<int, 64> Idxs(NumElts);
2713 for (int l = 0; l != NumElts; l += NumLaneElts)
2714 for (int i = 0; i != NumLaneElts; ++i)
2715 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
2717 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2719 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2720 CI->getArgOperand(2));
2721 } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
2722 Name.startswith("avx512.mask.unpckh."))) {
2723 Value *Op0 = CI->getArgOperand(0);
2724 Value *Op1 = CI->getArgOperand(1);
2725 int NumElts = cast<VectorType>(CI->getType())->getNumElements();
2726 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2728 SmallVector<int, 64> Idxs(NumElts);
2729 for (int l = 0; l != NumElts; l += NumLaneElts)
2730 for (int i = 0; i != NumLaneElts; ++i)
2731 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
2733 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2735 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2736 CI->getArgOperand(2));
2737 } else if (IsX86 && (Name.startswith("avx512.mask.and.") ||
2738 Name.startswith("avx512.mask.pand."))) {
2739 VectorType *FTy = cast<VectorType>(CI->getType());
2740 VectorType *ITy = VectorType::getInteger(FTy);
2741 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2742 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2743 Rep = Builder.CreateBitCast(Rep, FTy);
2744 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2745 CI->getArgOperand(2));
2746 } else if (IsX86 && (Name.startswith("avx512.mask.andn.") ||
2747 Name.startswith("avx512.mask.pandn."))) {
2748 VectorType *FTy = cast<VectorType>(CI->getType());
2749 VectorType *ITy = VectorType::getInteger(FTy);
2750 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
2751 Rep = Builder.CreateAnd(Rep,
2752 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2753 Rep = Builder.CreateBitCast(Rep, FTy);
2754 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2755 CI->getArgOperand(2));
2756 } else if (IsX86 && (Name.startswith("avx512.mask.or.") ||
2757 Name.startswith("avx512.mask.por."))) {
2758 VectorType *FTy = cast<VectorType>(CI->getType());
2759 VectorType *ITy = VectorType::getInteger(FTy);
2760 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2761 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2762 Rep = Builder.CreateBitCast(Rep, FTy);
2763 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2764 CI->getArgOperand(2));
2765 } else if (IsX86 && (Name.startswith("avx512.mask.xor.") ||
2766 Name.startswith("avx512.mask.pxor."))) {
2767 VectorType *FTy = cast<VectorType>(CI->getType());
2768 VectorType *ITy = VectorType::getInteger(FTy);
2769 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2770 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2771 Rep = Builder.CreateBitCast(Rep, FTy);
2772 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2773 CI->getArgOperand(2));
2774 } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
2775 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
2776 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2777 CI->getArgOperand(2));
2778 } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
2779 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
2780 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2781 CI->getArgOperand(2));
2782 } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
2783 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
2784 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2785 CI->getArgOperand(2));
2786 } else if (IsX86 && Name.startswith("avx512.mask.add.p")) {
2787 if (Name.endswith(".512")) {
2789 if (Name[17] == 's')
2790 IID = Intrinsic::x86_avx512_add_ps_512;
2792 IID = Intrinsic::x86_avx512_add_pd_512;
2794 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2795 { CI->getArgOperand(0), CI->getArgOperand(1),
2796 CI->getArgOperand(4) });
2798 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
2800 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2801 CI->getArgOperand(2));
2802 } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
2803 if (Name.endswith(".512")) {
2805 if (Name[17] == 's')
2806 IID = Intrinsic::x86_avx512_div_ps_512;
2808 IID = Intrinsic::x86_avx512_div_pd_512;
2810 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2811 { CI->getArgOperand(0), CI->getArgOperand(1),
2812 CI->getArgOperand(4) });
2814 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
2816 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2817 CI->getArgOperand(2));
2818 } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
2819 if (Name.endswith(".512")) {
2821 if (Name[17] == 's')
2822 IID = Intrinsic::x86_avx512_mul_ps_512;
2824 IID = Intrinsic::x86_avx512_mul_pd_512;
2826 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2827 { CI->getArgOperand(0), CI->getArgOperand(1),
2828 CI->getArgOperand(4) });
2830 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
2832 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2833 CI->getArgOperand(2));
2834 } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
2835 if (Name.endswith(".512")) {
2837 if (Name[17] == 's')
2838 IID = Intrinsic::x86_avx512_sub_ps_512;
2840 IID = Intrinsic::x86_avx512_sub_pd_512;
2842 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2843 { CI->getArgOperand(0), CI->getArgOperand(1),
2844 CI->getArgOperand(4) });
2846 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
2848 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2849 CI->getArgOperand(2));
2850 } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
2851 Name.startswith("avx512.mask.min.p")) &&
2852 Name.drop_front(18) == ".512") {
2853 bool IsDouble = Name[17] == 'd';
2854 bool IsMin = Name[13] == 'i';
2855 static const Intrinsic::ID MinMaxTbl[2][2] = {
2856 { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 },
2857 { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 }
2859 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
2861 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2862 { CI->getArgOperand(0), CI->getArgOperand(1),
2863 CI->getArgOperand(4) });
2864 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2865 CI->getArgOperand(2));
2866 } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
2867 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2870 { CI->getArgOperand(0), Builder.getInt1(false) });
2871 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2872 CI->getArgOperand(1));
2873 } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
2874 bool IsImmediate = Name[16] == 'i' ||
2875 (Name.size() > 18 && Name[18] == 'i');
2876 bool IsVariable = Name[16] == 'v';
2877 char Size = Name[16] == '.' ? Name[17] :
2878 Name[17] == '.' ? Name[18] :
2879 Name[18] == '.' ? Name[19] :
2883 if (IsVariable && Name[17] != '.') {
2884 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
2885 IID = Intrinsic::x86_avx2_psllv_q;
2886 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
2887 IID = Intrinsic::x86_avx2_psllv_q_256;
2888 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
2889 IID = Intrinsic::x86_avx2_psllv_d;
2890 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
2891 IID = Intrinsic::x86_avx2_psllv_d_256;
2892 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
2893 IID = Intrinsic::x86_avx512_psllv_w_128;
2894 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
2895 IID = Intrinsic::x86_avx512_psllv_w_256;
2896 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
2897 IID = Intrinsic::x86_avx512_psllv_w_512;
2899 llvm_unreachable("Unexpected size");
2900 } else if (Name.endswith(".128")) {
2901 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
2902 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
2903 : Intrinsic::x86_sse2_psll_d;
2904 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
2905 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
2906 : Intrinsic::x86_sse2_psll_q;
2907 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
2908 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
2909 : Intrinsic::x86_sse2_psll_w;
2911 llvm_unreachable("Unexpected size");
2912 } else if (Name.endswith(".256")) {
2913 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
2914 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
2915 : Intrinsic::x86_avx2_psll_d;
2916 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
2917 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
2918 : Intrinsic::x86_avx2_psll_q;
2919 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
2920 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
2921 : Intrinsic::x86_avx2_psll_w;
2923 llvm_unreachable("Unexpected size");
2925 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
2926 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
2927 IsVariable ? Intrinsic::x86_avx512_psllv_d_512 :
2928 Intrinsic::x86_avx512_psll_d_512;
2929 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
2930 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
2931 IsVariable ? Intrinsic::x86_avx512_psllv_q_512 :
2932 Intrinsic::x86_avx512_psll_q_512;
2933 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
2934 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
2935 : Intrinsic::x86_avx512_psll_w_512;
2937 llvm_unreachable("Unexpected size");
2940 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2941 } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
2942 bool IsImmediate = Name[16] == 'i' ||
2943 (Name.size() > 18 && Name[18] == 'i');
2944 bool IsVariable = Name[16] == 'v';
2945 char Size = Name[16] == '.' ? Name[17] :
2946 Name[17] == '.' ? Name[18] :
2947 Name[18] == '.' ? Name[19] :
2951 if (IsVariable && Name[17] != '.') {
2952 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
2953 IID = Intrinsic::x86_avx2_psrlv_q;
2954 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
2955 IID = Intrinsic::x86_avx2_psrlv_q_256;
2956 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
2957 IID = Intrinsic::x86_avx2_psrlv_d;
2958 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
2959 IID = Intrinsic::x86_avx2_psrlv_d_256;
2960 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
2961 IID = Intrinsic::x86_avx512_psrlv_w_128;
2962 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
2963 IID = Intrinsic::x86_avx512_psrlv_w_256;
2964 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
2965 IID = Intrinsic::x86_avx512_psrlv_w_512;
2967 llvm_unreachable("Unexpected size");
2968 } else if (Name.endswith(".128")) {
2969 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
2970 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
2971 : Intrinsic::x86_sse2_psrl_d;
2972 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
2973 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
2974 : Intrinsic::x86_sse2_psrl_q;
2975 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
2976 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
2977 : Intrinsic::x86_sse2_psrl_w;
2979 llvm_unreachable("Unexpected size");
2980 } else if (Name.endswith(".256")) {
2981 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
2982 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
2983 : Intrinsic::x86_avx2_psrl_d;
2984 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
2985 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
2986 : Intrinsic::x86_avx2_psrl_q;
2987 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
2988 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
2989 : Intrinsic::x86_avx2_psrl_w;
2991 llvm_unreachable("Unexpected size");
2993 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
2994 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
2995 IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 :
2996 Intrinsic::x86_avx512_psrl_d_512;
2997 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
2998 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
2999 IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 :
3000 Intrinsic::x86_avx512_psrl_q_512;
3001 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3002 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3003 : Intrinsic::x86_avx512_psrl_w_512;
3005 llvm_unreachable("Unexpected size");
3008 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3009 } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
3010 bool IsImmediate = Name[16] == 'i' ||
3011 (Name.size() > 18 && Name[18] == 'i');
3012 bool IsVariable = Name[16] == 'v';
3013 char Size = Name[16] == '.' ? Name[17] :
3014 Name[17] == '.' ? Name[18] :
3015 Name[18] == '.' ? Name[19] :
3019 if (IsVariable && Name[17] != '.') {
3020 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3021 IID = Intrinsic::x86_avx2_psrav_d;
3022 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3023 IID = Intrinsic::x86_avx2_psrav_d_256;
3024 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3025 IID = Intrinsic::x86_avx512_psrav_w_128;
3026 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3027 IID = Intrinsic::x86_avx512_psrav_w_256;
3028 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3029 IID = Intrinsic::x86_avx512_psrav_w_512;
3031 llvm_unreachable("Unexpected size");
3032 } else if (Name.endswith(".128")) {
3033 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3034 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3035 : Intrinsic::x86_sse2_psra_d;
3036 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3037 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
3038 IsVariable ? Intrinsic::x86_avx512_psrav_q_128 :
3039 Intrinsic::x86_avx512_psra_q_128;
3040 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3041 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3042 : Intrinsic::x86_sse2_psra_w;
3044 llvm_unreachable("Unexpected size");
3045 } else if (Name.endswith(".256")) {
3046 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3047 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3048 : Intrinsic::x86_avx2_psra_d;
3049 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3050 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
3051 IsVariable ? Intrinsic::x86_avx512_psrav_q_256 :
3052 Intrinsic::x86_avx512_psra_q_256;
3053 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3054 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3055 : Intrinsic::x86_avx2_psra_w;
3057 llvm_unreachable("Unexpected size");
3059 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3060 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
3061 IsVariable ? Intrinsic::x86_avx512_psrav_d_512 :
3062 Intrinsic::x86_avx512_psra_d_512;
3063 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3064 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
3065 IsVariable ? Intrinsic::x86_avx512_psrav_q_512 :
3066 Intrinsic::x86_avx512_psra_q_512;
3067 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3068 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3069 : Intrinsic::x86_avx512_psra_w_512;
3071 llvm_unreachable("Unexpected size");
3074 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3075 } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
3076 Rep = upgradeMaskedMove(Builder, *CI);
3077 } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
3078 Rep = UpgradeMaskToInt(Builder, *CI);
3079 } else if (IsX86 && Name.endswith(".movntdqa")) {
3080 Module *M = F->getParent();
3081 MDNode *Node = MDNode::get(
3082 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
3084 Value *Ptr = CI->getArgOperand(0);
3086 // Convert the type of the pointer to a pointer to the stored type.
3087 Value *BC = Builder.CreateBitCast(
3088 Ptr, PointerType::getUnqual(CI->getType()), "cast");
3089 LoadInst *LI = Builder.CreateAlignedLoad(
3091 Align(CI->getType()->getPrimitiveSizeInBits().getFixedSize() / 8));
3092 LI->setMetadata(M->getMDKindID("nontemporal"), Node);
3094 } else if (IsX86 && (Name.startswith("fma.vfmadd.") ||
3095 Name.startswith("fma.vfmsub.") ||
3096 Name.startswith("fma.vfnmadd.") ||
3097 Name.startswith("fma.vfnmsub."))) {
3098 bool NegMul = Name[6] == 'n';
3099 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3100 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3102 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3103 CI->getArgOperand(2) };
3106 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3107 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3108 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3111 if (NegMul && !IsScalar)
3112 Ops[0] = Builder.CreateFNeg(Ops[0]);
3113 if (NegMul && IsScalar)
3114 Ops[1] = Builder.CreateFNeg(Ops[1]);
3116 Ops[2] = Builder.CreateFNeg(Ops[2]);
3118 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3124 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
3126 } else if (IsX86 && Name.startswith("fma4.vfmadd.s")) {
3127 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3128 CI->getArgOperand(2) };
3130 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3131 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3132 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3134 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3139 Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
3141 } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.s") ||
3142 Name.startswith("avx512.maskz.vfmadd.s") ||
3143 Name.startswith("avx512.mask3.vfmadd.s") ||
3144 Name.startswith("avx512.mask3.vfmsub.s") ||
3145 Name.startswith("avx512.mask3.vfnmsub.s"))) {
3146 bool IsMask3 = Name[11] == '3';
3147 bool IsMaskZ = Name[11] == 'z';
3148 // Drop the "avx512.mask." to make it easier.
3149 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3150 bool NegMul = Name[2] == 'n';
3151 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3153 Value *A = CI->getArgOperand(0);
3154 Value *B = CI->getArgOperand(1);
3155 Value *C = CI->getArgOperand(2);
3157 if (NegMul && (IsMask3 || IsMaskZ))
3158 A = Builder.CreateFNeg(A);
3159 if (NegMul && !(IsMask3 || IsMaskZ))
3160 B = Builder.CreateFNeg(B);
3162 C = Builder.CreateFNeg(C);
3164 A = Builder.CreateExtractElement(A, (uint64_t)0);
3165 B = Builder.CreateExtractElement(B, (uint64_t)0);
3166 C = Builder.CreateExtractElement(C, (uint64_t)0);
3168 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3169 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3170 Value *Ops[] = { A, B, C, CI->getArgOperand(4) };
3173 if (Name.back() == 'd')
3174 IID = Intrinsic::x86_avx512_vfmadd_f64;
3176 IID = Intrinsic::x86_avx512_vfmadd_f32;
3177 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID);
3178 Rep = Builder.CreateCall(FMA, Ops);
3180 Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3183 Rep = Builder.CreateCall(FMA, { A, B, C });
3186 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) :
3189 // For Mask3 with NegAcc, we need to create a new extractelement that
3190 // avoids the negation above.
3191 if (NegAcc && IsMask3)
3192 PassThru = Builder.CreateExtractElement(CI->getArgOperand(2),
3195 Rep = EmitX86ScalarSelect(Builder, CI->getArgOperand(3),
3197 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
3199 } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") ||
3200 Name.startswith("avx512.mask.vfnmadd.p") ||
3201 Name.startswith("avx512.mask.vfnmsub.p") ||
3202 Name.startswith("avx512.mask3.vfmadd.p") ||
3203 Name.startswith("avx512.mask3.vfmsub.p") ||
3204 Name.startswith("avx512.mask3.vfnmsub.p") ||
3205 Name.startswith("avx512.maskz.vfmadd.p"))) {
3206 bool IsMask3 = Name[11] == '3';
3207 bool IsMaskZ = Name[11] == 'z';
3208 // Drop the "avx512.mask." to make it easier.
3209 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3210 bool NegMul = Name[2] == 'n';
3211 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3213 Value *A = CI->getArgOperand(0);
3214 Value *B = CI->getArgOperand(1);
3215 Value *C = CI->getArgOperand(2);
3217 if (NegMul && (IsMask3 || IsMaskZ))
3218 A = Builder.CreateFNeg(A);
3219 if (NegMul && !(IsMask3 || IsMaskZ))
3220 B = Builder.CreateFNeg(B);
3222 C = Builder.CreateFNeg(C);
3224 if (CI->getNumArgOperands() == 5 &&
3225 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3226 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3228 // Check the character before ".512" in string.
3229 if (Name[Name.size()-5] == 's')
3230 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3232 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3234 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3235 { A, B, C, CI->getArgOperand(4) });
3237 Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3240 Rep = Builder.CreateCall(FMA, { A, B, C });
3243 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3244 IsMask3 ? CI->getArgOperand(2) :
3245 CI->getArgOperand(0);
3247 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3248 } else if (IsX86 && Name.startswith("fma.vfmsubadd.p")) {
3249 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3250 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3252 if (VecWidth == 128 && EltWidth == 32)
3253 IID = Intrinsic::x86_fma_vfmaddsub_ps;
3254 else if (VecWidth == 256 && EltWidth == 32)
3255 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
3256 else if (VecWidth == 128 && EltWidth == 64)
3257 IID = Intrinsic::x86_fma_vfmaddsub_pd;
3258 else if (VecWidth == 256 && EltWidth == 64)
3259 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
3261 llvm_unreachable("Unexpected intrinsic");
3263 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3264 CI->getArgOperand(2) };
3265 Ops[2] = Builder.CreateFNeg(Ops[2]);
3266 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3268 } else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") ||
3269 Name.startswith("avx512.mask3.vfmaddsub.p") ||
3270 Name.startswith("avx512.maskz.vfmaddsub.p") ||
3271 Name.startswith("avx512.mask3.vfmsubadd.p"))) {
3272 bool IsMask3 = Name[11] == '3';
3273 bool IsMaskZ = Name[11] == 'z';
3274 // Drop the "avx512.mask." to make it easier.
3275 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3276 bool IsSubAdd = Name[3] == 's';
3277 if (CI->getNumArgOperands() == 5) {
3279 // Check the character before ".512" in string.
3280 if (Name[Name.size()-5] == 's')
3281 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3283 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3285 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3286 CI->getArgOperand(2), CI->getArgOperand(4) };
3288 Ops[2] = Builder.CreateFNeg(Ops[2]);
3290 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3293 int NumElts = cast<VectorType>(CI->getType())->getNumElements();
3295 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3296 CI->getArgOperand(2) };
3298 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3300 Value *Odd = Builder.CreateCall(FMA, Ops);
3301 Ops[2] = Builder.CreateFNeg(Ops[2]);
3302 Value *Even = Builder.CreateCall(FMA, Ops);
3305 std::swap(Even, Odd);
3307 SmallVector<int, 32> Idxs(NumElts);
3308 for (int i = 0; i != NumElts; ++i)
3309 Idxs[i] = i + (i % 2) * NumElts;
3311 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3314 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3315 IsMask3 ? CI->getArgOperand(2) :
3316 CI->getArgOperand(0);
3318 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3319 } else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
3320 Name.startswith("avx512.maskz.pternlog."))) {
3321 bool ZeroMask = Name[11] == 'z';
3322 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3323 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3325 if (VecWidth == 128 && EltWidth == 32)
3326 IID = Intrinsic::x86_avx512_pternlog_d_128;
3327 else if (VecWidth == 256 && EltWidth == 32)
3328 IID = Intrinsic::x86_avx512_pternlog_d_256;
3329 else if (VecWidth == 512 && EltWidth == 32)
3330 IID = Intrinsic::x86_avx512_pternlog_d_512;
3331 else if (VecWidth == 128 && EltWidth == 64)
3332 IID = Intrinsic::x86_avx512_pternlog_q_128;
3333 else if (VecWidth == 256 && EltWidth == 64)
3334 IID = Intrinsic::x86_avx512_pternlog_q_256;
3335 else if (VecWidth == 512 && EltWidth == 64)
3336 IID = Intrinsic::x86_avx512_pternlog_q_512;
3338 llvm_unreachable("Unexpected intrinsic");
3340 Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3341 CI->getArgOperand(2), CI->getArgOperand(3) };
3342 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3344 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3345 : CI->getArgOperand(0);
3346 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3347 } else if (IsX86 && (Name.startswith("avx512.mask.vpmadd52") ||
3348 Name.startswith("avx512.maskz.vpmadd52"))) {
3349 bool ZeroMask = Name[11] == 'z';
3350 bool High = Name[20] == 'h' || Name[21] == 'h';
3351 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3353 if (VecWidth == 128 && !High)
3354 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3355 else if (VecWidth == 256 && !High)
3356 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3357 else if (VecWidth == 512 && !High)
3358 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3359 else if (VecWidth == 128 && High)
3360 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
3361 else if (VecWidth == 256 && High)
3362 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
3363 else if (VecWidth == 512 && High)
3364 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
3366 llvm_unreachable("Unexpected intrinsic");
3368 Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3369 CI->getArgOperand(2) };
3370 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3372 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3373 : CI->getArgOperand(0);
3374 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3375 } else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") ||
3376 Name.startswith("avx512.mask.vpermt2var.") ||
3377 Name.startswith("avx512.maskz.vpermt2var."))) {
3378 bool ZeroMask = Name[11] == 'z';
3379 bool IndexForm = Name[17] == 'i';
3380 Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
3381 } else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") ||
3382 Name.startswith("avx512.maskz.vpdpbusd.") ||
3383 Name.startswith("avx512.mask.vpdpbusds.") ||
3384 Name.startswith("avx512.maskz.vpdpbusds."))) {
3385 bool ZeroMask = Name[11] == 'z';
3386 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3387 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3389 if (VecWidth == 128 && !IsSaturating)
3390 IID = Intrinsic::x86_avx512_vpdpbusd_128;
3391 else if (VecWidth == 256 && !IsSaturating)
3392 IID = Intrinsic::x86_avx512_vpdpbusd_256;
3393 else if (VecWidth == 512 && !IsSaturating)
3394 IID = Intrinsic::x86_avx512_vpdpbusd_512;
3395 else if (VecWidth == 128 && IsSaturating)
3396 IID = Intrinsic::x86_avx512_vpdpbusds_128;
3397 else if (VecWidth == 256 && IsSaturating)
3398 IID = Intrinsic::x86_avx512_vpdpbusds_256;
3399 else if (VecWidth == 512 && IsSaturating)
3400 IID = Intrinsic::x86_avx512_vpdpbusds_512;
3402 llvm_unreachable("Unexpected intrinsic");
3404 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3405 CI->getArgOperand(2) };
3406 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3408 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3409 : CI->getArgOperand(0);
3410 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3411 } else if (IsX86 && (Name.startswith("avx512.mask.vpdpwssd.") ||
3412 Name.startswith("avx512.maskz.vpdpwssd.") ||
3413 Name.startswith("avx512.mask.vpdpwssds.") ||
3414 Name.startswith("avx512.maskz.vpdpwssds."))) {
3415 bool ZeroMask = Name[11] == 'z';
3416 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3417 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3419 if (VecWidth == 128 && !IsSaturating)
3420 IID = Intrinsic::x86_avx512_vpdpwssd_128;
3421 else if (VecWidth == 256 && !IsSaturating)
3422 IID = Intrinsic::x86_avx512_vpdpwssd_256;
3423 else if (VecWidth == 512 && !IsSaturating)
3424 IID = Intrinsic::x86_avx512_vpdpwssd_512;
3425 else if (VecWidth == 128 && IsSaturating)
3426 IID = Intrinsic::x86_avx512_vpdpwssds_128;
3427 else if (VecWidth == 256 && IsSaturating)
3428 IID = Intrinsic::x86_avx512_vpdpwssds_256;
3429 else if (VecWidth == 512 && IsSaturating)
3430 IID = Intrinsic::x86_avx512_vpdpwssds_512;
3432 llvm_unreachable("Unexpected intrinsic");
3434 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3435 CI->getArgOperand(2) };
3436 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3438 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3439 : CI->getArgOperand(0);
3440 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3441 } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
3442 Name == "addcarry.u32" || Name == "addcarry.u64" ||
3443 Name == "subborrow.u32" || Name == "subborrow.u64")) {
3445 if (Name[0] == 'a' && Name.back() == '2')
3446 IID = Intrinsic::x86_addcarry_32;
3447 else if (Name[0] == 'a' && Name.back() == '4')
3448 IID = Intrinsic::x86_addcarry_64;
3449 else if (Name[0] == 's' && Name.back() == '2')
3450 IID = Intrinsic::x86_subborrow_32;
3451 else if (Name[0] == 's' && Name.back() == '4')
3452 IID = Intrinsic::x86_subborrow_64;
3454 llvm_unreachable("Unexpected intrinsic");
3456 // Make a call with 3 operands.
3457 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3458 CI->getArgOperand(2)};
3459 Value *NewCall = Builder.CreateCall(
3460 Intrinsic::getDeclaration(CI->getModule(), IID),
3463 // Extract the second result and store it.
3464 Value *Data = Builder.CreateExtractValue(NewCall, 1);
3465 // Cast the pointer to the right type.
3466 Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3),
3467 llvm::PointerType::getUnqual(Data->getType()));
3468 Builder.CreateAlignedStore(Data, Ptr, Align(1));
3469 // Replace the original call result with the first result of the new call.
3470 Value *CF = Builder.CreateExtractValue(NewCall, 0);
3472 CI->replaceAllUsesWith(CF);
3474 } else if (IsX86 && Name.startswith("avx512.mask.") &&
3475 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
3476 // Rep will be updated by the call in the condition.
3477 } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
3478 Value *Arg = CI->getArgOperand(0);
3479 Value *Neg = Builder.CreateNeg(Arg, "neg");
3480 Value *Cmp = Builder.CreateICmpSGE(
3481 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
3482 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
3483 } else if (IsNVVM && (Name.startswith("atomic.load.add.f32.p") ||
3484 Name.startswith("atomic.load.add.f64.p"))) {
3485 Value *Ptr = CI->getArgOperand(0);
3486 Value *Val = CI->getArgOperand(1);
3487 Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val,
3488 AtomicOrdering::SequentiallyConsistent);
3489 } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
3490 Name == "max.ui" || Name == "max.ull")) {
3491 Value *Arg0 = CI->getArgOperand(0);
3492 Value *Arg1 = CI->getArgOperand(1);
3493 Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3494 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
3495 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
3496 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
3497 } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
3498 Name == "min.ui" || Name == "min.ull")) {
3499 Value *Arg0 = CI->getArgOperand(0);
3500 Value *Arg1 = CI->getArgOperand(1);
3501 Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3502 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
3503 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
3504 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
3505 } else if (IsNVVM && Name == "clz.ll") {
3506 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
3507 Value *Arg = CI->getArgOperand(0);
3508 Value *Ctlz = Builder.CreateCall(
3509 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
3511 {Arg, Builder.getFalse()}, "ctlz");
3512 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
3513 } else if (IsNVVM && Name == "popc.ll") {
3514 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
3516 Value *Arg = CI->getArgOperand(0);
3517 Value *Popc = Builder.CreateCall(
3518 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
3521 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
3522 } else if (IsNVVM && Name == "h2f") {
3523 Rep = Builder.CreateCall(Intrinsic::getDeclaration(
3524 F->getParent(), Intrinsic::convert_from_fp16,
3525 {Builder.getFloatTy()}),
3526 CI->getArgOperand(0), "h2f");
3528 llvm_unreachable("Unknown function for CallInst upgrade.");
3532 CI->replaceAllUsesWith(Rep);
3533 CI->eraseFromParent();
3537 const auto &DefaultCase = [&NewFn, &CI]() -> void {
3538 // Handle generic mangling change, but nothing else
3540 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
3541 "Unknown function for CallInst upgrade and isn't just a name change");
3542 CI->setCalledFunction(NewFn);
3544 CallInst *NewCall = nullptr;
3545 switch (NewFn->getIntrinsicID()) {
3550 case Intrinsic::experimental_vector_reduce_v2_fmul: {
3551 SmallVector<Value *, 2> Args;
3553 Args.push_back(ConstantFP::get(CI->getOperand(0)->getType(), 1.0));
3555 Args.push_back(CI->getOperand(0));
3556 Args.push_back(CI->getOperand(1));
3557 NewCall = Builder.CreateCall(NewFn, Args);
3558 cast<Instruction>(NewCall)->copyFastMathFlags(CI);
3561 case Intrinsic::experimental_vector_reduce_v2_fadd: {
3562 SmallVector<Value *, 2> Args;
3564 Args.push_back(Constant::getNullValue(CI->getOperand(0)->getType()));
3566 Args.push_back(CI->getOperand(0));
3567 Args.push_back(CI->getOperand(1));
3568 NewCall = Builder.CreateCall(NewFn, Args);
3569 cast<Instruction>(NewCall)->copyFastMathFlags(CI);
3572 case Intrinsic::arm_neon_vld1:
3573 case Intrinsic::arm_neon_vld2:
3574 case Intrinsic::arm_neon_vld3:
3575 case Intrinsic::arm_neon_vld4:
3576 case Intrinsic::arm_neon_vld2lane:
3577 case Intrinsic::arm_neon_vld3lane:
3578 case Intrinsic::arm_neon_vld4lane:
3579 case Intrinsic::arm_neon_vst1:
3580 case Intrinsic::arm_neon_vst2:
3581 case Intrinsic::arm_neon_vst3:
3582 case Intrinsic::arm_neon_vst4:
3583 case Intrinsic::arm_neon_vst2lane:
3584 case Intrinsic::arm_neon_vst3lane:
3585 case Intrinsic::arm_neon_vst4lane: {
3586 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3587 CI->arg_operands().end());
3588 NewCall = Builder.CreateCall(NewFn, Args);
3592 case Intrinsic::bitreverse:
3593 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3596 case Intrinsic::ctlz:
3597 case Intrinsic::cttz:
3598 assert(CI->getNumArgOperands() == 1 &&
3599 "Mismatch between function args and call args");
3601 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
3604 case Intrinsic::objectsize: {
3605 Value *NullIsUnknownSize = CI->getNumArgOperands() == 2
3606 ? Builder.getFalse()
3607 : CI->getArgOperand(2);
3609 CI->getNumArgOperands() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
3610 NewCall = Builder.CreateCall(
3611 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
3615 case Intrinsic::ctpop:
3616 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3619 case Intrinsic::convert_from_fp16:
3620 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3623 case Intrinsic::dbg_value:
3624 // Upgrade from the old version that had an extra offset argument.
3625 assert(CI->getNumArgOperands() == 4);
3626 // Drop nonzero offsets instead of attempting to upgrade them.
3627 if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
3628 if (Offset->isZeroValue()) {
3629 NewCall = Builder.CreateCall(
3631 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
3634 CI->eraseFromParent();
3637 case Intrinsic::x86_xop_vfrcz_ss:
3638 case Intrinsic::x86_xop_vfrcz_sd:
3639 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
3642 case Intrinsic::x86_xop_vpermil2pd:
3643 case Intrinsic::x86_xop_vpermil2ps:
3644 case Intrinsic::x86_xop_vpermil2pd_256:
3645 case Intrinsic::x86_xop_vpermil2ps_256: {
3646 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3647 CI->arg_operands().end());
3648 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
3649 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
3650 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
3651 NewCall = Builder.CreateCall(NewFn, Args);
3655 case Intrinsic::x86_sse41_ptestc:
3656 case Intrinsic::x86_sse41_ptestz:
3657 case Intrinsic::x86_sse41_ptestnzc: {
3658 // The arguments for these intrinsics used to be v4f32, and changed
3659 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
3660 // So, the only thing required is a bitcast for both arguments.
3661 // First, check the arguments have the old type.
3662 Value *Arg0 = CI->getArgOperand(0);
3663 if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
3666 // Old intrinsic, add bitcasts
3667 Value *Arg1 = CI->getArgOperand(1);
3669 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
3671 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
3672 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
3674 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
3678 case Intrinsic::x86_rdtscp: {
3679 // This used to take 1 arguments. If we have no arguments, it is already
3681 if (CI->getNumOperands() == 0)
3684 NewCall = Builder.CreateCall(NewFn);
3685 // Extract the second result and store it.
3686 Value *Data = Builder.CreateExtractValue(NewCall, 1);
3687 // Cast the pointer to the right type.
3688 Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
3689 llvm::PointerType::getUnqual(Data->getType()));
3690 Builder.CreateAlignedStore(Data, Ptr, Align(1));
3691 // Replace the original call result with the first result of the new call.
3692 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
3694 std::string Name = std::string(CI->getName());
3695 if (!Name.empty()) {
3696 CI->setName(Name + ".old");
3697 NewCall->setName(Name);
3699 CI->replaceAllUsesWith(TSC);
3700 CI->eraseFromParent();
3704 case Intrinsic::x86_sse41_insertps:
3705 case Intrinsic::x86_sse41_dppd:
3706 case Intrinsic::x86_sse41_dpps:
3707 case Intrinsic::x86_sse41_mpsadbw:
3708 case Intrinsic::x86_avx_dp_ps_256:
3709 case Intrinsic::x86_avx2_mpsadbw: {
3710 // Need to truncate the last argument from i32 to i8 -- this argument models
3711 // an inherently 8-bit immediate operand to these x86 instructions.
3712 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3713 CI->arg_operands().end());
3715 // Replace the last argument with a trunc.
3716 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
3717 NewCall = Builder.CreateCall(NewFn, Args);
3721 case Intrinsic::thread_pointer: {
3722 NewCall = Builder.CreateCall(NewFn, {});
3726 case Intrinsic::invariant_start:
3727 case Intrinsic::invariant_end:
3728 case Intrinsic::masked_load:
3729 case Intrinsic::masked_store:
3730 case Intrinsic::masked_gather:
3731 case Intrinsic::masked_scatter: {
3732 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3733 CI->arg_operands().end());
3734 NewCall = Builder.CreateCall(NewFn, Args);
3738 case Intrinsic::memcpy:
3739 case Intrinsic::memmove:
3740 case Intrinsic::memset: {
3741 // We have to make sure that the call signature is what we're expecting.
3742 // We only want to change the old signatures by removing the alignment arg:
3743 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
3744 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
3745 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
3746 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
3747 // Note: i8*'s in the above can be any pointer type
3748 if (CI->getNumArgOperands() != 5) {
3752 // Remove alignment argument (3), and add alignment attributes to the
3753 // dest/src pointers.
3754 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
3755 CI->getArgOperand(2), CI->getArgOperand(4)};
3756 NewCall = Builder.CreateCall(NewFn, Args);
3757 auto *MemCI = cast<MemIntrinsic>(NewCall);
3758 // All mem intrinsics support dest alignment.
3759 const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
3760 MemCI->setDestAlignment(Align->getMaybeAlignValue());
3761 // Memcpy/Memmove also support source alignment.
3762 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
3763 MTI->setSourceAlignment(Align->getMaybeAlignValue());
3767 assert(NewCall && "Should have either set this variable or returned through "
3768 "the default case");
3769 std::string Name = std::string(CI->getName());
3770 if (!Name.empty()) {
3771 CI->setName(Name + ".old");
3772 NewCall->setName(Name);
3774 CI->replaceAllUsesWith(NewCall);
3775 CI->eraseFromParent();
3778 void llvm::UpgradeCallsToIntrinsic(Function *F) {
3779 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
3781 // Check if this function should be upgraded and get the replacement function
3784 if (UpgradeIntrinsicFunction(F, NewFn)) {
3785 // Replace all users of the old function with the new function or new
3786 // instructions. This is not a range loop because the call is deleted.
3787 for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; )
3788 if (CallInst *CI = dyn_cast<CallInst>(*UI++))
3789 UpgradeIntrinsicCall(CI, NewFn);
3791 // Remove old function, no longer used, from the module.
3792 F->eraseFromParent();
3796 MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
3797 // Check if the tag uses struct-path aware TBAA format.
3798 if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
3801 auto &Context = MD.getContext();
3802 if (MD.getNumOperands() == 3) {
3803 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
3804 MDNode *ScalarType = MDNode::get(Context, Elts);
3805 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
3806 Metadata *Elts2[] = {ScalarType, ScalarType,
3807 ConstantAsMetadata::get(
3808 Constant::getNullValue(Type::getInt64Ty(Context))),
3810 return MDNode::get(Context, Elts2);
3812 // Create a MDNode <MD, MD, offset 0>
3813 Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
3814 Type::getInt64Ty(Context)))};
3815 return MDNode::get(Context, Elts);
3818 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
3819 Instruction *&Temp) {
3820 if (Opc != Instruction::BitCast)
3824 Type *SrcTy = V->getType();
3825 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
3826 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
3827 LLVMContext &Context = V->getContext();
3829 // We have no information about target data layout, so we assume that
3830 // the maximum pointer size is 64bit.
3831 Type *MidTy = Type::getInt64Ty(Context);
3832 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
3834 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
3840 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
3841 if (Opc != Instruction::BitCast)
3844 Type *SrcTy = C->getType();
3845 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
3846 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
3847 LLVMContext &Context = C->getContext();
3849 // We have no information about target data layout, so we assume that
3850 // the maximum pointer size is 64bit.
3851 Type *MidTy = Type::getInt64Ty(Context);
3853 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
3860 /// Check the debug info version number, if it is out-dated, drop the debug
3861 /// info. Return true if module is modified.
3862 bool llvm::UpgradeDebugInfo(Module &M) {
3863 unsigned Version = getDebugMetadataVersionFromModule(M);
3864 if (Version == DEBUG_METADATA_VERSION) {
3865 bool BrokenDebugInfo = false;
3866 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
3867 report_fatal_error("Broken module found, compilation aborted!");
3868 if (!BrokenDebugInfo)
3869 // Everything is ok.
3872 // Diagnose malformed debug info.
3873 DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
3874 M.getContext().diagnose(Diag);
3877 bool Modified = StripDebugInfo(M);
3878 if (Modified && Version != DEBUG_METADATA_VERSION) {
3879 // Diagnose a version mismatch.
3880 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
3881 M.getContext().diagnose(DiagVersion);
3886 /// This checks for objc retain release marker which should be upgraded. It
3887 /// returns true if module is modified.
3888 static bool UpgradeRetainReleaseMarker(Module &M) {
3889 bool Changed = false;
3890 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
3891 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
3892 if (ModRetainReleaseMarker) {
3893 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
3895 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
3897 SmallVector<StringRef, 4> ValueComp;
3898 ID->getString().split(ValueComp, "#");
3899 if (ValueComp.size() == 2) {
3900 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
3901 ID = MDString::get(M.getContext(), NewValue);
3903 M.addModuleFlag(Module::Error, MarkerKey, ID);
3904 M.eraseNamedMetadata(ModRetainReleaseMarker);
3912 void llvm::UpgradeARCRuntime(Module &M) {
3913 // This lambda converts normal function calls to ARC runtime functions to
3915 auto UpgradeToIntrinsic = [&](const char *OldFunc,
3916 llvm::Intrinsic::ID IntrinsicFunc) {
3917 Function *Fn = M.getFunction(OldFunc);
3922 Function *NewFn = llvm::Intrinsic::getDeclaration(&M, IntrinsicFunc);
3924 for (auto I = Fn->user_begin(), E = Fn->user_end(); I != E;) {
3925 CallInst *CI = dyn_cast<CallInst>(*I++);
3926 if (!CI || CI->getCalledFunction() != Fn)
3929 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
3930 FunctionType *NewFuncTy = NewFn->getFunctionType();
3931 SmallVector<Value *, 2> Args;
3933 // Don't upgrade the intrinsic if it's not valid to bitcast the return
3934 // value to the return type of the old function.
3935 if (NewFuncTy->getReturnType() != CI->getType() &&
3936 !CastInst::castIsValid(Instruction::BitCast, CI,
3937 NewFuncTy->getReturnType()))
3940 bool InvalidCast = false;
3942 for (unsigned I = 0, E = CI->getNumArgOperands(); I != E; ++I) {
3943 Value *Arg = CI->getArgOperand(I);
3945 // Bitcast argument to the parameter type of the new function if it's
3946 // not a variadic argument.
3947 if (I < NewFuncTy->getNumParams()) {
3948 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
3949 // to the parameter type of the new function.
3950 if (!CastInst::castIsValid(Instruction::BitCast, Arg,
3951 NewFuncTy->getParamType(I))) {
3955 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
3957 Args.push_back(Arg);
3963 // Create a call instruction that calls the new function.
3964 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
3965 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
3966 NewCall->setName(CI->getName());
3968 // Bitcast the return value back to the type of the old call.
3969 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
3971 if (!CI->use_empty())
3972 CI->replaceAllUsesWith(NewRetVal);
3973 CI->eraseFromParent();
3976 if (Fn->use_empty())
3977 Fn->eraseFromParent();
3980 // Unconditionally convert a call to "clang.arc.use" to a call to
3981 // "llvm.objc.clang.arc.use".
3982 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
3984 // Upgrade the retain release marker. If there is no need to upgrade
3985 // the marker, that means either the module is already new enough to contain
3986 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
3987 if (!UpgradeRetainReleaseMarker(M))
3990 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
3991 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
3992 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
3993 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
3994 {"objc_autoreleaseReturnValue",
3995 llvm::Intrinsic::objc_autoreleaseReturnValue},
3996 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
3997 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
3998 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
3999 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
4000 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
4001 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
4002 {"objc_release", llvm::Intrinsic::objc_release},
4003 {"objc_retain", llvm::Intrinsic::objc_retain},
4004 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
4005 {"objc_retainAutoreleaseReturnValue",
4006 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
4007 {"objc_retainAutoreleasedReturnValue",
4008 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
4009 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
4010 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
4011 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
4012 {"objc_unsafeClaimAutoreleasedReturnValue",
4013 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
4014 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
4015 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
4016 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
4017 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
4018 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
4019 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
4020 {"objc_arc_annotation_topdown_bbstart",
4021 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
4022 {"objc_arc_annotation_topdown_bbend",
4023 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
4024 {"objc_arc_annotation_bottomup_bbstart",
4025 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
4026 {"objc_arc_annotation_bottomup_bbend",
4027 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
4029 for (auto &I : RuntimeFuncs)
4030 UpgradeToIntrinsic(I.first, I.second);
4033 bool llvm::UpgradeModuleFlags(Module &M) {
4034 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
4038 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
4039 bool HasSwiftVersionFlag = false;
4040 uint8_t SwiftMajorVersion, SwiftMinorVersion;
4041 uint32_t SwiftABIVersion;
4042 auto Int8Ty = Type::getInt8Ty(M.getContext());
4043 auto Int32Ty = Type::getInt32Ty(M.getContext());
4045 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
4046 MDNode *Op = ModFlags->getOperand(I);
4047 if (Op->getNumOperands() != 3)
4049 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
4052 if (ID->getString() == "Objective-C Image Info Version")
4054 if (ID->getString() == "Objective-C Class Properties")
4055 HasClassProperties = true;
4056 // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
4057 // field was Error and now they are Max.
4058 if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") {
4059 if (auto *Behavior =
4060 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
4061 if (Behavior->getLimitedValue() == Module::Error) {
4062 Type *Int32Ty = Type::getInt32Ty(M.getContext());
4063 Metadata *Ops[3] = {
4064 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)),
4065 MDString::get(M.getContext(), ID->getString()),
4067 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
4072 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
4073 // section name so that llvm-lto will not complain about mismatching
4074 // module flags that is functionally the same.
4075 if (ID->getString() == "Objective-C Image Info Section") {
4076 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
4077 SmallVector<StringRef, 4> ValueComp;
4078 Value->getString().split(ValueComp, " ");
4079 if (ValueComp.size() != 1) {
4080 std::string NewValue;
4081 for (auto &S : ValueComp)
4082 NewValue += S.str();
4083 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
4084 MDString::get(M.getContext(), NewValue)};
4085 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
4091 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
4092 // If the higher bits are set, it adds new module flag for swift info.
4093 if (ID->getString() == "Objective-C Garbage Collection") {
4094 auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
4096 assert(Md->getValue() && "Expected non-empty metadata");
4097 auto Type = Md->getValue()->getType();
4100 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
4101 if ((Val & 0xff) != Val) {
4102 HasSwiftVersionFlag = true;
4103 SwiftABIVersion = (Val & 0xff00) >> 8;
4104 SwiftMajorVersion = (Val & 0xff000000) >> 24;
4105 SwiftMinorVersion = (Val & 0xff0000) >> 16;
4107 Metadata *Ops[3] = {
4108 ConstantAsMetadata::get(ConstantInt::get(Int32Ty,Module::Error)),
4110 ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
4111 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
4117 // "Objective-C Class Properties" is recently added for Objective-C. We
4118 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
4119 // flag of value 0, so we can correclty downgrade this flag when trying to
4120 // link an ObjC bitcode without this module flag with an ObjC bitcode with
4121 // this module flag.
4122 if (HasObjCFlag && !HasClassProperties) {
4123 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
4128 if (HasSwiftVersionFlag) {
4129 M.addModuleFlag(Module::Error, "Swift ABI Version",
4131 M.addModuleFlag(Module::Error, "Swift Major Version",
4132 ConstantInt::get(Int8Ty, SwiftMajorVersion));
4133 M.addModuleFlag(Module::Error, "Swift Minor Version",
4134 ConstantInt::get(Int8Ty, SwiftMinorVersion));
4141 void llvm::UpgradeSectionAttributes(Module &M) {
4142 auto TrimSpaces = [](StringRef Section) -> std::string {
4143 SmallVector<StringRef, 5> Components;
4144 Section.split(Components, ',');
4146 SmallString<32> Buffer;
4147 raw_svector_ostream OS(Buffer);
4149 for (auto Component : Components)
4150 OS << ',' << Component.trim();
4152 return std::string(OS.str().substr(1));
4155 for (auto &GV : M.globals()) {
4156 if (!GV.hasSection())
4159 StringRef Section = GV.getSection();
4161 if (!Section.startswith("__DATA, __objc_catlist"))
4164 // __DATA, __objc_catlist, regular, no_dead_strip
4165 // __DATA,__objc_catlist,regular,no_dead_strip
4166 GV.setSection(TrimSpaces(Section));
4171 // Prior to LLVM 10.0, the strictfp attribute could be used on individual
4172 // callsites within a function that did not also have the strictfp attribute.
4173 // Since 10.0, if strict FP semantics are needed within a function, the
4174 // function must have the strictfp attribute and all calls within the function
4175 // must also have the strictfp attribute. This latter restriction is
4176 // necessary to prevent unwanted libcall simplification when a function is
4177 // being cloned (such as for inlining).
4179 // The "dangling" strictfp attribute usage was only used to prevent constant
4180 // folding and other libcall simplification. The nobuiltin attribute on the
4181 // callsite has the same effect.
4182 struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
4183 StrictFPUpgradeVisitor() {}
4185 void visitCallBase(CallBase &Call) {
4186 if (!Call.isStrictFP())
4188 if (isa<ConstrainedFPIntrinsic>(&Call))
4190 // If we get here, the caller doesn't have the strictfp attribute
4191 // but this callsite does. Replace the strictfp attribute with nobuiltin.
4192 Call.removeAttribute(AttributeList::FunctionIndex, Attribute::StrictFP);
4193 Call.addAttribute(AttributeList::FunctionIndex, Attribute::NoBuiltin);
4198 void llvm::UpgradeFunctionAttributes(Function &F) {
4199 // If a function definition doesn't have the strictfp attribute,
4200 // convert any callsite strictfp attributes to nobuiltin.
4201 if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
4202 StrictFPUpgradeVisitor SFPV;
4207 static bool isOldLoopArgument(Metadata *MD) {
4208 auto *T = dyn_cast_or_null<MDTuple>(MD);
4211 if (T->getNumOperands() < 1)
4213 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
4216 return S->getString().startswith("llvm.vectorizer.");
4219 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
4220 StringRef OldPrefix = "llvm.vectorizer.";
4221 assert(OldTag.startswith(OldPrefix) && "Expected old prefix");
4223 if (OldTag == "llvm.vectorizer.unroll")
4224 return MDString::get(C, "llvm.loop.interleave.count");
4226 return MDString::get(
4227 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
4231 static Metadata *upgradeLoopArgument(Metadata *MD) {
4232 auto *T = dyn_cast_or_null<MDTuple>(MD);
4235 if (T->getNumOperands() < 1)
4237 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
4240 if (!OldTag->getString().startswith("llvm.vectorizer."))
4243 // This has an old tag. Upgrade it.
4244 SmallVector<Metadata *, 8> Ops;
4245 Ops.reserve(T->getNumOperands());
4246 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
4247 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
4248 Ops.push_back(T->getOperand(I));
4250 return MDTuple::get(T->getContext(), Ops);
4253 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
4254 auto *T = dyn_cast<MDTuple>(&N);
4258 if (none_of(T->operands(), isOldLoopArgument))
4261 SmallVector<Metadata *, 8> Ops;
4262 Ops.reserve(T->getNumOperands());
4263 for (Metadata *MD : T->operands())
4264 Ops.push_back(upgradeLoopArgument(MD));
4266 return MDTuple::get(T->getContext(), Ops);
4269 std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
4270 std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64";
4272 // If X86, and the datalayout matches the expected format, add pointer size
4273 // address spaces to the datalayout.
4274 if (!Triple(TT).isX86() || DL.contains(AddrSpaces))
4275 return std::string(DL);
4277 SmallVector<StringRef, 4> Groups;
4278 Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)");
4279 if (!R.match(DL, &Groups))
4280 return std::string(DL);
4282 SmallString<1024> Buf;
4283 std::string Res = (Groups[1] + AddrSpaces + Groups[3]).toStringRef(Buf).str();
4287 void llvm::UpgradeAttributes(AttrBuilder &B) {
4288 StringRef FramePointer;
4289 if (B.contains("no-frame-pointer-elim")) {
4290 // The value can be "true" or "false".
4291 for (const auto &I : B.td_attrs())
4292 if (I.first == "no-frame-pointer-elim")
4293 FramePointer = I.second == "true" ? "all" : "none";
4294 B.removeAttribute("no-frame-pointer-elim");
4296 if (B.contains("no-frame-pointer-elim-non-leaf")) {
4297 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
4298 if (FramePointer != "all")
4299 FramePointer = "non-leaf";
4300 B.removeAttribute("no-frame-pointer-elim-non-leaf");
4302 if (!FramePointer.empty())
4303 B.addAttribute("frame-pointer", FramePointer);
4305 if (B.contains("null-pointer-is-valid")) {
4306 // The value can be "true" or "false".
4307 bool NullPointerIsValid = false;
4308 for (const auto &I : B.td_attrs())
4309 if (I.first == "null-pointer-is-valid")
4310 NullPointerIsValid = I.second == "true";
4311 B.removeAttribute("null-pointer-is-valid");
4312 if (NullPointerIsValid)
4313 B.addAttribute(Attribute::NullPointerIsValid);