1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the auto-upgrade helper functions.
10 // This is where deprecated IR intrinsics and other IR features are updated to
11 // current specifications.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/IR/AutoUpgrade.h"
16 #include "llvm/ADT/StringSwitch.h"
17 #include "llvm/IR/Constants.h"
18 #include "llvm/IR/DIBuilder.h"
19 #include "llvm/IR/DebugInfo.h"
20 #include "llvm/IR/DiagnosticInfo.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/IR/IRBuilder.h"
23 #include "llvm/IR/Instruction.h"
24 #include "llvm/IR/IntrinsicInst.h"
25 #include "llvm/IR/IntrinsicsAArch64.h"
26 #include "llvm/IR/IntrinsicsARM.h"
27 #include "llvm/IR/IntrinsicsX86.h"
28 #include "llvm/IR/LLVMContext.h"
29 #include "llvm/IR/Module.h"
30 #include "llvm/IR/Verifier.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/Support/Regex.h"
36 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
38 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
39 // changed their type from v4f32 to v2i64.
40 static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
42 // Check whether this is an old version of the function, which received
44 Type *Arg0Type = F->getFunctionType()->getParamType(0);
45 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
48 // Yes, it's old, replace it with new version.
50 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
54 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
55 // arguments have changed their type from i32 to i8.
56 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
58 // Check that the last argument is an i32.
59 Type *LastArgType = F->getFunctionType()->getParamType(
60 F->getFunctionType()->getNumParams() - 1);
61 if (!LastArgType->isIntegerTy(32))
64 // Move this function aside and map down.
66 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
70 static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
71 // All of the intrinsics matches below should be marked with which llvm
72 // version started autoupgrading them. At some point in the future we would
73 // like to use this information to remove upgrade code for some older
74 // intrinsics. It is currently undecided how we will determine that future
76 if (Name == "addcarryx.u32" || // Added in 8.0
77 Name == "addcarryx.u64" || // Added in 8.0
78 Name == "addcarry.u32" || // Added in 8.0
79 Name == "addcarry.u64" || // Added in 8.0
80 Name == "subborrow.u32" || // Added in 8.0
81 Name == "subborrow.u64" || // Added in 8.0
82 Name.startswith("sse2.padds.") || // Added in 8.0
83 Name.startswith("sse2.psubs.") || // Added in 8.0
84 Name.startswith("sse2.paddus.") || // Added in 8.0
85 Name.startswith("sse2.psubus.") || // Added in 8.0
86 Name.startswith("avx2.padds.") || // Added in 8.0
87 Name.startswith("avx2.psubs.") || // Added in 8.0
88 Name.startswith("avx2.paddus.") || // Added in 8.0
89 Name.startswith("avx2.psubus.") || // Added in 8.0
90 Name.startswith("avx512.padds.") || // Added in 8.0
91 Name.startswith("avx512.psubs.") || // Added in 8.0
92 Name.startswith("avx512.mask.padds.") || // Added in 8.0
93 Name.startswith("avx512.mask.psubs.") || // Added in 8.0
94 Name.startswith("avx512.mask.paddus.") || // Added in 8.0
95 Name.startswith("avx512.mask.psubus.") || // Added in 8.0
96 Name=="ssse3.pabs.b.128" || // Added in 6.0
97 Name=="ssse3.pabs.w.128" || // Added in 6.0
98 Name=="ssse3.pabs.d.128" || // Added in 6.0
99 Name.startswith("fma4.vfmadd.s") || // Added in 7.0
100 Name.startswith("fma.vfmadd.") || // Added in 7.0
101 Name.startswith("fma.vfmsub.") || // Added in 7.0
102 Name.startswith("fma.vfmaddsub.") || // Added in 7.0
103 Name.startswith("fma.vfmsubadd.") || // Added in 7.0
104 Name.startswith("fma.vfnmadd.") || // Added in 7.0
105 Name.startswith("fma.vfnmsub.") || // Added in 7.0
106 Name.startswith("avx512.mask.vfmadd.") || // Added in 7.0
107 Name.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
108 Name.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
109 Name.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
110 Name.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
111 Name.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
112 Name.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
113 Name.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
114 Name.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
115 Name.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
116 Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
117 Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
118 Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
119 Name.startswith("avx512.kunpck") || //added in 6.0
120 Name.startswith("avx2.pabs.") || // Added in 6.0
121 Name.startswith("avx512.mask.pabs.") || // Added in 6.0
122 Name.startswith("avx512.broadcastm") || // Added in 6.0
123 Name == "sse.sqrt.ss" || // Added in 7.0
124 Name == "sse2.sqrt.sd" || // Added in 7.0
125 Name.startswith("avx512.mask.sqrt.p") || // Added in 7.0
126 Name.startswith("avx.sqrt.p") || // Added in 7.0
127 Name.startswith("sse2.sqrt.p") || // Added in 7.0
128 Name.startswith("sse.sqrt.p") || // Added in 7.0
129 Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0
130 Name.startswith("sse2.pcmpeq.") || // Added in 3.1
131 Name.startswith("sse2.pcmpgt.") || // Added in 3.1
132 Name.startswith("avx2.pcmpeq.") || // Added in 3.1
133 Name.startswith("avx2.pcmpgt.") || // Added in 3.1
134 Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
135 Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
136 Name.startswith("avx.vperm2f128.") || // Added in 6.0
137 Name == "avx2.vperm2i128" || // Added in 6.0
138 Name == "sse.add.ss" || // Added in 4.0
139 Name == "sse2.add.sd" || // Added in 4.0
140 Name == "sse.sub.ss" || // Added in 4.0
141 Name == "sse2.sub.sd" || // Added in 4.0
142 Name == "sse.mul.ss" || // Added in 4.0
143 Name == "sse2.mul.sd" || // Added in 4.0
144 Name == "sse.div.ss" || // Added in 4.0
145 Name == "sse2.div.sd" || // Added in 4.0
146 Name == "sse41.pmaxsb" || // Added in 3.9
147 Name == "sse2.pmaxs.w" || // Added in 3.9
148 Name == "sse41.pmaxsd" || // Added in 3.9
149 Name == "sse2.pmaxu.b" || // Added in 3.9
150 Name == "sse41.pmaxuw" || // Added in 3.9
151 Name == "sse41.pmaxud" || // Added in 3.9
152 Name == "sse41.pminsb" || // Added in 3.9
153 Name == "sse2.pmins.w" || // Added in 3.9
154 Name == "sse41.pminsd" || // Added in 3.9
155 Name == "sse2.pminu.b" || // Added in 3.9
156 Name == "sse41.pminuw" || // Added in 3.9
157 Name == "sse41.pminud" || // Added in 3.9
158 Name == "avx512.kand.w" || // Added in 7.0
159 Name == "avx512.kandn.w" || // Added in 7.0
160 Name == "avx512.knot.w" || // Added in 7.0
161 Name == "avx512.kor.w" || // Added in 7.0
162 Name == "avx512.kxor.w" || // Added in 7.0
163 Name == "avx512.kxnor.w" || // Added in 7.0
164 Name == "avx512.kortestc.w" || // Added in 7.0
165 Name == "avx512.kortestz.w" || // Added in 7.0
166 Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
167 Name.startswith("avx2.pmax") || // Added in 3.9
168 Name.startswith("avx2.pmin") || // Added in 3.9
169 Name.startswith("avx512.mask.pmax") || // Added in 4.0
170 Name.startswith("avx512.mask.pmin") || // Added in 4.0
171 Name.startswith("avx2.vbroadcast") || // Added in 3.8
172 Name.startswith("avx2.pbroadcast") || // Added in 3.8
173 Name.startswith("avx.vpermil.") || // Added in 3.1
174 Name.startswith("sse2.pshuf") || // Added in 3.9
175 Name.startswith("avx512.pbroadcast") || // Added in 3.9
176 Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
177 Name.startswith("avx512.mask.movddup") || // Added in 3.9
178 Name.startswith("avx512.mask.movshdup") || // Added in 3.9
179 Name.startswith("avx512.mask.movsldup") || // Added in 3.9
180 Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
181 Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
182 Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
183 Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
184 Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
185 Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
186 Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
187 Name.startswith("avx512.mask.punpckl") || // Added in 3.9
188 Name.startswith("avx512.mask.punpckh") || // Added in 3.9
189 Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
190 Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
191 Name.startswith("avx512.mask.pand.") || // Added in 3.9
192 Name.startswith("avx512.mask.pandn.") || // Added in 3.9
193 Name.startswith("avx512.mask.por.") || // Added in 3.9
194 Name.startswith("avx512.mask.pxor.") || // Added in 3.9
195 Name.startswith("avx512.mask.and.") || // Added in 3.9
196 Name.startswith("avx512.mask.andn.") || // Added in 3.9
197 Name.startswith("avx512.mask.or.") || // Added in 3.9
198 Name.startswith("avx512.mask.xor.") || // Added in 3.9
199 Name.startswith("avx512.mask.padd.") || // Added in 4.0
200 Name.startswith("avx512.mask.psub.") || // Added in 4.0
201 Name.startswith("avx512.mask.pmull.") || // Added in 4.0
202 Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
203 Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
204 Name.startswith("avx512.mask.cvtudq2ps.") || // Added in 7.0 updated 9.0
205 Name.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0
206 Name.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0
207 Name.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0
208 Name == "avx512.mask.cvtqq2ps.256" || // Added in 9.0
209 Name == "avx512.mask.cvtqq2ps.512" || // Added in 9.0
210 Name == "avx512.mask.cvtuqq2ps.256" || // Added in 9.0
211 Name == "avx512.mask.cvtuqq2ps.512" || // Added in 9.0
212 Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0
213 Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0
214 Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0
215 Name == "avx512.mask.cvttps2dq.128" || // Added in 7.0
216 Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0
217 Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
218 Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
219 Name == "avx512.cvtusi2sd" || // Added in 7.0
220 Name.startswith("avx512.mask.permvar.") || // Added in 7.0
221 Name == "sse2.pmulu.dq" || // Added in 7.0
222 Name == "sse41.pmuldq" || // Added in 7.0
223 Name == "avx2.pmulu.dq" || // Added in 7.0
224 Name == "avx2.pmul.dq" || // Added in 7.0
225 Name == "avx512.pmulu.dq.512" || // Added in 7.0
226 Name == "avx512.pmul.dq.512" || // Added in 7.0
227 Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
228 Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
229 Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
230 Name.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
231 Name.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
232 Name.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
233 Name.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
234 Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
235 Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
236 Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
237 Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
238 Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
239 Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
240 Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
241 Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
242 Name.startswith("avx512.mask.cmp.p") || // Added in 7.0
243 Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
244 Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
245 Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
246 Name.startswith("avx512.cvtd2mask.") || // Added in 7.0
247 Name.startswith("avx512.cvtq2mask.") || // Added in 7.0
248 Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
249 Name.startswith("avx512.mask.psll.d") || // Added in 4.0
250 Name.startswith("avx512.mask.psll.q") || // Added in 4.0
251 Name.startswith("avx512.mask.psll.w") || // Added in 4.0
252 Name.startswith("avx512.mask.psra.d") || // Added in 4.0
253 Name.startswith("avx512.mask.psra.q") || // Added in 4.0
254 Name.startswith("avx512.mask.psra.w") || // Added in 4.0
255 Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
256 Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
257 Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
258 Name.startswith("avx512.mask.pslli") || // Added in 4.0
259 Name.startswith("avx512.mask.psrai") || // Added in 4.0
260 Name.startswith("avx512.mask.psrli") || // Added in 4.0
261 Name.startswith("avx512.mask.psllv") || // Added in 4.0
262 Name.startswith("avx512.mask.psrav") || // Added in 4.0
263 Name.startswith("avx512.mask.psrlv") || // Added in 4.0
264 Name.startswith("sse41.pmovsx") || // Added in 3.8
265 Name.startswith("sse41.pmovzx") || // Added in 3.9
266 Name.startswith("avx2.pmovsx") || // Added in 3.9
267 Name.startswith("avx2.pmovzx") || // Added in 3.9
268 Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
269 Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
270 Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
271 Name.startswith("avx512.mask.pternlog.") || // Added in 7.0
272 Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0
273 Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0
274 Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
275 Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
276 Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
277 Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
278 Name.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
279 Name.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
280 Name.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
281 Name.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
282 Name.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
283 Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
284 Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
285 Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
286 Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
287 Name.startswith("avx512.mask.vpshld.") || // Added in 7.0
288 Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0
289 Name.startswith("avx512.mask.vpshldv.") || // Added in 8.0
290 Name.startswith("avx512.mask.vpshrdv.") || // Added in 8.0
291 Name.startswith("avx512.maskz.vpshldv.") || // Added in 8.0
292 Name.startswith("avx512.maskz.vpshrdv.") || // Added in 8.0
293 Name.startswith("avx512.vpshld.") || // Added in 8.0
294 Name.startswith("avx512.vpshrd.") || // Added in 8.0
295 Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
296 Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
297 Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
298 Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
299 Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
300 Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
301 Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
302 Name.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0
303 Name.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0
304 Name.startswith("avx512.mask.conflict.") || // Added in 9.0
305 Name == "avx512.mask.pmov.qd.256" || // Added in 9.0
306 Name == "avx512.mask.pmov.qd.512" || // Added in 9.0
307 Name == "avx512.mask.pmov.wb.256" || // Added in 9.0
308 Name == "avx512.mask.pmov.wb.512" || // Added in 9.0
309 Name == "sse.cvtsi2ss" || // Added in 7.0
310 Name == "sse.cvtsi642ss" || // Added in 7.0
311 Name == "sse2.cvtsi2sd" || // Added in 7.0
312 Name == "sse2.cvtsi642sd" || // Added in 7.0
313 Name == "sse2.cvtss2sd" || // Added in 7.0
314 Name == "sse2.cvtdq2pd" || // Added in 3.9
315 Name == "sse2.cvtdq2ps" || // Added in 7.0
316 Name == "sse2.cvtps2pd" || // Added in 3.9
317 Name == "avx.cvtdq2.pd.256" || // Added in 3.9
318 Name == "avx.cvtdq2.ps.256" || // Added in 7.0
319 Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
320 Name.startswith("avx.vinsertf128.") || // Added in 3.7
321 Name == "avx2.vinserti128" || // Added in 3.7
322 Name.startswith("avx512.mask.insert") || // Added in 4.0
323 Name.startswith("avx.vextractf128.") || // Added in 3.7
324 Name == "avx2.vextracti128" || // Added in 3.7
325 Name.startswith("avx512.mask.vextract") || // Added in 4.0
326 Name.startswith("sse4a.movnt.") || // Added in 3.9
327 Name.startswith("avx.movnt.") || // Added in 3.2
328 Name.startswith("avx512.storent.") || // Added in 3.9
329 Name == "sse41.movntdqa" || // Added in 5.0
330 Name == "avx2.movntdqa" || // Added in 5.0
331 Name == "avx512.movntdqa" || // Added in 5.0
332 Name == "sse2.storel.dq" || // Added in 3.9
333 Name.startswith("sse.storeu.") || // Added in 3.9
334 Name.startswith("sse2.storeu.") || // Added in 3.9
335 Name.startswith("avx.storeu.") || // Added in 3.9
336 Name.startswith("avx512.mask.storeu.") || // Added in 3.9
337 Name.startswith("avx512.mask.store.p") || // Added in 3.9
338 Name.startswith("avx512.mask.store.b.") || // Added in 3.9
339 Name.startswith("avx512.mask.store.w.") || // Added in 3.9
340 Name.startswith("avx512.mask.store.d.") || // Added in 3.9
341 Name.startswith("avx512.mask.store.q.") || // Added in 3.9
342 Name == "avx512.mask.store.ss" || // Added in 7.0
343 Name.startswith("avx512.mask.loadu.") || // Added in 3.9
344 Name.startswith("avx512.mask.load.") || // Added in 3.9
345 Name.startswith("avx512.mask.expand.load.") || // Added in 7.0
346 Name.startswith("avx512.mask.compress.store.") || // Added in 7.0
347 Name.startswith("avx512.mask.expand.b") || // Added in 9.0
348 Name.startswith("avx512.mask.expand.w") || // Added in 9.0
349 Name.startswith("avx512.mask.expand.d") || // Added in 9.0
350 Name.startswith("avx512.mask.expand.q") || // Added in 9.0
351 Name.startswith("avx512.mask.expand.p") || // Added in 9.0
352 Name.startswith("avx512.mask.compress.b") || // Added in 9.0
353 Name.startswith("avx512.mask.compress.w") || // Added in 9.0
354 Name.startswith("avx512.mask.compress.d") || // Added in 9.0
355 Name.startswith("avx512.mask.compress.q") || // Added in 9.0
356 Name.startswith("avx512.mask.compress.p") || // Added in 9.0
357 Name == "sse42.crc32.64.8" || // Added in 3.4
358 Name.startswith("avx.vbroadcast.s") || // Added in 3.5
359 Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
360 Name.startswith("avx512.mask.palignr.") || // Added in 3.9
361 Name.startswith("avx512.mask.valign.") || // Added in 4.0
362 Name.startswith("sse2.psll.dq") || // Added in 3.7
363 Name.startswith("sse2.psrl.dq") || // Added in 3.7
364 Name.startswith("avx2.psll.dq") || // Added in 3.7
365 Name.startswith("avx2.psrl.dq") || // Added in 3.7
366 Name.startswith("avx512.psll.dq") || // Added in 3.9
367 Name.startswith("avx512.psrl.dq") || // Added in 3.9
368 Name == "sse41.pblendw" || // Added in 3.7
369 Name.startswith("sse41.blendp") || // Added in 3.7
370 Name.startswith("avx.blend.p") || // Added in 3.7
371 Name == "avx2.pblendw" || // Added in 3.7
372 Name.startswith("avx2.pblendd.") || // Added in 3.7
373 Name.startswith("avx.vbroadcastf128") || // Added in 4.0
374 Name == "avx2.vbroadcasti128" || // Added in 3.7
375 Name.startswith("avx512.mask.broadcastf") || // Added in 6.0
376 Name.startswith("avx512.mask.broadcasti") || // Added in 6.0
377 Name == "xop.vpcmov" || // Added in 3.8
378 Name == "xop.vpcmov.256" || // Added in 5.0
379 Name.startswith("avx512.mask.move.s") || // Added in 4.0
380 Name.startswith("avx512.cvtmask2") || // Added in 5.0
381 Name.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0
382 Name.startswith("xop.vprot") || // Added in 8.0
383 Name.startswith("avx512.prol") || // Added in 8.0
384 Name.startswith("avx512.pror") || // Added in 8.0
385 Name.startswith("avx512.mask.prorv.") || // Added in 8.0
386 Name.startswith("avx512.mask.pror.") || // Added in 8.0
387 Name.startswith("avx512.mask.prolv.") || // Added in 8.0
388 Name.startswith("avx512.mask.prol.") || // Added in 8.0
389 Name.startswith("avx512.ptestm") || //Added in 6.0
390 Name.startswith("avx512.ptestnm") || //Added in 6.0
391 Name.startswith("avx512.mask.pavg")) // Added in 6.0
397 static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
399 // Only handle intrinsics that start with "x86.".
400 if (!Name.startswith("x86."))
402 // Remove "x86." prefix.
403 Name = Name.substr(4);
405 if (ShouldUpgradeX86Intrinsic(F, Name)) {
410 if (Name == "rdtscp") { // Added in 8.0
411 // If this intrinsic has 0 operands, it's the new version.
412 if (F->getFunctionType()->getNumParams() == 0)
416 NewFn = Intrinsic::getDeclaration(F->getParent(),
417 Intrinsic::x86_rdtscp);
421 // SSE4.1 ptest functions may have an old signature.
422 if (Name.startswith("sse41.ptest")) { // Added in 3.2
423 if (Name.substr(11) == "c")
424 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
425 if (Name.substr(11) == "z")
426 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
427 if (Name.substr(11) == "nzc")
428 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
430 // Several blend and other instructions with masks used the wrong number of
432 if (Name == "sse41.insertps") // Added in 3.6
433 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
435 if (Name == "sse41.dppd") // Added in 3.6
436 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
438 if (Name == "sse41.dpps") // Added in 3.6
439 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
441 if (Name == "sse41.mpsadbw") // Added in 3.6
442 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
444 if (Name == "avx.dp.ps.256") // Added in 3.6
445 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
447 if (Name == "avx2.mpsadbw") // Added in 3.6
448 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
451 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
452 if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
454 NewFn = Intrinsic::getDeclaration(F->getParent(),
455 Intrinsic::x86_xop_vfrcz_ss);
458 if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
460 NewFn = Intrinsic::getDeclaration(F->getParent(),
461 Intrinsic::x86_xop_vfrcz_sd);
464 // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
465 if (Name.startswith("xop.vpermil2")) { // Added in 3.9
466 auto Idx = F->getFunctionType()->getParamType(2);
467 if (Idx->isFPOrFPVectorTy()) {
469 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
470 unsigned EltSize = Idx->getScalarSizeInBits();
471 Intrinsic::ID Permil2ID;
472 if (EltSize == 64 && IdxSize == 128)
473 Permil2ID = Intrinsic::x86_xop_vpermil2pd;
474 else if (EltSize == 32 && IdxSize == 128)
475 Permil2ID = Intrinsic::x86_xop_vpermil2ps;
476 else if (EltSize == 64 && IdxSize == 256)
477 Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
479 Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
480 NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
485 if (Name == "seh.recoverfp") {
486 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
493 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
494 assert(F && "Illegal to upgrade a non-existent Function.");
496 // Quickly eliminate it, if it's not a candidate.
497 StringRef Name = F->getName();
498 if (Name.size() <= 8 || !Name.startswith("llvm."))
500 Name = Name.substr(5); // Strip off "llvm."
505 if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
506 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
507 F->arg_begin()->getType());
510 if (Name.startswith("arm.neon.vclz")) {
512 F->arg_begin()->getType(),
513 Type::getInt1Ty(F->getContext())
515 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
516 // the end of the name. Change name from llvm.arm.neon.vclz.* to
518 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
519 NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
520 "llvm.ctlz." + Name.substr(14), F->getParent());
523 if (Name.startswith("arm.neon.vcnt")) {
524 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
525 F->arg_begin()->getType());
528 static const Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
529 if (vldRegex.match(Name)) {
530 auto fArgs = F->getFunctionType()->params();
531 SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
532 // Can't use Intrinsic::getDeclaration here as the return types might
533 // then only be structurally equal.
534 FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
535 NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
536 "llvm." + Name + ".p0i8", F->getParent());
539 static const Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
540 if (vstRegex.match(Name)) {
541 static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
542 Intrinsic::arm_neon_vst2,
543 Intrinsic::arm_neon_vst3,
544 Intrinsic::arm_neon_vst4};
546 static const Intrinsic::ID StoreLaneInts[] = {
547 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
548 Intrinsic::arm_neon_vst4lane
551 auto fArgs = F->getFunctionType()->params();
552 Type *Tys[] = {fArgs[0], fArgs[1]};
553 if (Name.find("lane") == StringRef::npos)
554 NewFn = Intrinsic::getDeclaration(F->getParent(),
555 StoreInts[fArgs.size() - 3], Tys);
557 NewFn = Intrinsic::getDeclaration(F->getParent(),
558 StoreLaneInts[fArgs.size() - 5], Tys);
561 if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
562 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
565 if (Name.startswith("arm.neon.vqadds.")) {
566 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::sadd_sat,
567 F->arg_begin()->getType());
570 if (Name.startswith("arm.neon.vqaddu.")) {
571 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::uadd_sat,
572 F->arg_begin()->getType());
575 if (Name.startswith("arm.neon.vqsubs.")) {
576 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ssub_sat,
577 F->arg_begin()->getType());
580 if (Name.startswith("arm.neon.vqsubu.")) {
581 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::usub_sat,
582 F->arg_begin()->getType());
585 if (Name.startswith("aarch64.neon.addp")) {
586 if (F->arg_size() != 2)
587 break; // Invalid IR.
588 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
589 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
590 NewFn = Intrinsic::getDeclaration(F->getParent(),
591 Intrinsic::aarch64_neon_faddp, Ty);
599 if (Name.startswith("ctlz.") && F->arg_size() == 1) {
601 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
602 F->arg_begin()->getType());
605 if (Name.startswith("cttz.") && F->arg_size() == 1) {
607 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
608 F->arg_begin()->getType());
614 if (Name == "dbg.value" && F->arg_size() == 4) {
616 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
622 SmallVector<StringRef, 2> Groups;
623 static const Regex R("^experimental.vector.reduce.([a-z]+)\\.[fi][0-9]+");
624 if (R.match(Name, &Groups)) {
625 Intrinsic::ID ID = Intrinsic::not_intrinsic;
626 if (Groups[1] == "fadd")
627 ID = Intrinsic::experimental_vector_reduce_v2_fadd;
628 if (Groups[1] == "fmul")
629 ID = Intrinsic::experimental_vector_reduce_v2_fmul;
631 if (ID != Intrinsic::not_intrinsic) {
633 auto Args = F->getFunctionType()->params();
634 Type *Tys[] = {F->getFunctionType()->getReturnType(), Args[1]};
635 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
643 bool IsLifetimeStart = Name.startswith("lifetime.start");
644 if (IsLifetimeStart || Name.startswith("invariant.start")) {
645 Intrinsic::ID ID = IsLifetimeStart ?
646 Intrinsic::lifetime_start : Intrinsic::invariant_start;
647 auto Args = F->getFunctionType()->params();
648 Type* ObjectPtr[1] = {Args[1]};
649 if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
651 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
656 bool IsLifetimeEnd = Name.startswith("lifetime.end");
657 if (IsLifetimeEnd || Name.startswith("invariant.end")) {
658 Intrinsic::ID ID = IsLifetimeEnd ?
659 Intrinsic::lifetime_end : Intrinsic::invariant_end;
661 auto Args = F->getFunctionType()->params();
662 Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
663 if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
665 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
669 if (Name.startswith("invariant.group.barrier")) {
670 // Rename invariant.group.barrier to launder.invariant.group
671 auto Args = F->getFunctionType()->params();
672 Type* ObjectPtr[1] = {Args[0]};
674 NewFn = Intrinsic::getDeclaration(F->getParent(),
675 Intrinsic::launder_invariant_group, ObjectPtr);
683 if (Name.startswith("masked.load.")) {
684 Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
685 if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) {
687 NewFn = Intrinsic::getDeclaration(F->getParent(),
688 Intrinsic::masked_load,
693 if (Name.startswith("masked.store.")) {
694 auto Args = F->getFunctionType()->params();
695 Type *Tys[] = { Args[0], Args[1] };
696 if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) {
698 NewFn = Intrinsic::getDeclaration(F->getParent(),
699 Intrinsic::masked_store,
704 // Renaming gather/scatter intrinsics with no address space overloading
705 // to the new overload which includes an address space
706 if (Name.startswith("masked.gather.")) {
707 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
708 if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) {
710 NewFn = Intrinsic::getDeclaration(F->getParent(),
711 Intrinsic::masked_gather, Tys);
715 if (Name.startswith("masked.scatter.")) {
716 auto Args = F->getFunctionType()->params();
717 Type *Tys[] = {Args[0], Args[1]};
718 if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) {
720 NewFn = Intrinsic::getDeclaration(F->getParent(),
721 Intrinsic::masked_scatter, Tys);
725 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
726 // alignment parameter to embedding the alignment as an attribute of
728 if (Name.startswith("memcpy.") && F->arg_size() == 5) {
730 // Get the types of dest, src, and len
731 ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
732 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy,
736 if (Name.startswith("memmove.") && F->arg_size() == 5) {
738 // Get the types of dest, src, and len
739 ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
740 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove,
744 if (Name.startswith("memset.") && F->arg_size() == 5) {
746 // Get the types of dest, and len
747 const auto *FT = F->getFunctionType();
748 Type *ParamTypes[2] = {
749 FT->getParamType(0), // Dest
750 FT->getParamType(2) // len
752 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
759 if (Name.startswith("nvvm.")) {
760 Name = Name.substr(5);
762 // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
763 Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name)
764 .Cases("brev32", "brev64", Intrinsic::bitreverse)
765 .Case("clz.i", Intrinsic::ctlz)
766 .Case("popc.i", Intrinsic::ctpop)
767 .Default(Intrinsic::not_intrinsic);
768 if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
769 NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
770 {F->getReturnType()});
774 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
775 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
777 // TODO: We could add lohi.i2d.
778 bool Expand = StringSwitch<bool>(Name)
779 .Cases("abs.i", "abs.ll", true)
780 .Cases("clz.ll", "popc.ll", "h2f", true)
781 .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
782 .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
783 .StartsWith("atomic.load.add.f32.p", true)
784 .StartsWith("atomic.load.add.f64.p", true)
794 // We only need to change the name to match the mangling including the
796 if (Name.startswith("objectsize.")) {
797 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
798 if (F->arg_size() == 2 || F->arg_size() == 3 ||
799 F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
801 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
809 if (Name == "prefetch") {
810 // Handle address space overloading.
811 Type *Tys[] = {F->arg_begin()->getType()};
812 if (F->getName() != Intrinsic::getName(Intrinsic::prefetch, Tys)) {
815 Intrinsic::getDeclaration(F->getParent(), Intrinsic::prefetch, Tys);
822 if (Name == "stackprotectorcheck") {
829 if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
832 // Remangle our intrinsic since we upgrade the mangling
833 auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
834 if (Result != None) {
835 NewFn = Result.getValue();
839 // This may not belong here. This function is effectively being overloaded
840 // to both detect an intrinsic which needs upgrading, and to provide the
841 // upgraded form of the intrinsic. We should perhaps have two separate
842 // functions for this.
846 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
848 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
849 assert(F != NewFn && "Intrinsic function upgraded to the same function");
851 // Upgrade intrinsic attributes. This does not change the function.
854 if (Intrinsic::ID id = F->getIntrinsicID())
855 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
859 GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
860 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
861 GV->getName() == "llvm.global_dtors")) ||
862 !GV->hasInitializer())
864 ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
867 StructType *STy = dyn_cast<StructType>(ATy->getElementType());
868 if (!STy || STy->getNumElements() != 2)
871 LLVMContext &C = GV->getContext();
873 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
875 Constant *Init = GV->getInitializer();
876 unsigned N = Init->getNumOperands();
877 std::vector<Constant *> NewCtors(N);
878 for (unsigned i = 0; i != N; ++i) {
879 auto Ctor = cast<Constant>(Init->getOperand(i));
880 NewCtors[i] = ConstantStruct::get(
881 EltTy, Ctor->getAggregateElement(0u), Ctor->getAggregateElement(1),
882 Constant::getNullValue(IRB.getInt8PtrTy()));
884 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
886 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
887 NewInit, GV->getName());
890 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
892 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
893 Value *Op, unsigned Shift) {
894 Type *ResultTy = Op->getType();
895 unsigned NumElts = ResultTy->getVectorNumElements() * 8;
897 // Bitcast from a 64-bit element type to a byte element type.
898 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
899 Op = Builder.CreateBitCast(Op, VecTy, "cast");
901 // We'll be shuffling in zeroes.
902 Value *Res = Constant::getNullValue(VecTy);
904 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
905 // we'll just return the zero vector.
908 // 256/512-bit version is split into 2/4 16-byte lanes.
909 for (unsigned l = 0; l != NumElts; l += 16)
910 for (unsigned i = 0; i != 16; ++i) {
911 unsigned Idx = NumElts + i - Shift;
913 Idx -= NumElts - 16; // end of lane, switch operand.
914 Idxs[l + i] = Idx + l;
917 Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
920 // Bitcast back to a 64-bit element type.
921 return Builder.CreateBitCast(Res, ResultTy, "cast");
924 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
926 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
928 Type *ResultTy = Op->getType();
929 unsigned NumElts = ResultTy->getVectorNumElements() * 8;
931 // Bitcast from a 64-bit element type to a byte element type.
932 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
933 Op = Builder.CreateBitCast(Op, VecTy, "cast");
935 // We'll be shuffling in zeroes.
936 Value *Res = Constant::getNullValue(VecTy);
938 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
939 // we'll just return the zero vector.
942 // 256/512-bit version is split into 2/4 16-byte lanes.
943 for (unsigned l = 0; l != NumElts; l += 16)
944 for (unsigned i = 0; i != 16; ++i) {
945 unsigned Idx = i + Shift;
947 Idx += NumElts - 16; // end of lane, switch operand.
948 Idxs[l + i] = Idx + l;
951 Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
954 // Bitcast back to a 64-bit element type.
955 return Builder.CreateBitCast(Res, ResultTy, "cast");
958 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
960 llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(),
961 cast<IntegerType>(Mask->getType())->getBitWidth());
962 Mask = Builder.CreateBitCast(Mask, MaskTy);
964 // If we have less than 8 elements, then the starting mask was an i8 and
965 // we need to extract down to the right number of elements.
968 for (unsigned i = 0; i != NumElts; ++i)
970 Mask = Builder.CreateShuffleVector(Mask, Mask,
971 makeArrayRef(Indices, NumElts),
978 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
979 Value *Op0, Value *Op1) {
980 // If the mask is all ones just emit the first operation.
981 if (const auto *C = dyn_cast<Constant>(Mask))
982 if (C->isAllOnesValue())
985 Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements());
986 return Builder.CreateSelect(Mask, Op0, Op1);
989 static Value *EmitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask,
990 Value *Op0, Value *Op1) {
991 // If the mask is all ones just emit the first operation.
992 if (const auto *C = dyn_cast<Constant>(Mask))
993 if (C->isAllOnesValue())
996 llvm::VectorType *MaskTy =
997 llvm::VectorType::get(Builder.getInt1Ty(),
998 Mask->getType()->getIntegerBitWidth());
999 Mask = Builder.CreateBitCast(Mask, MaskTy);
1000 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1001 return Builder.CreateSelect(Mask, Op0, Op1);
1004 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1005 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1006 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1007 static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
1008 Value *Op1, Value *Shift,
1009 Value *Passthru, Value *Mask,
1011 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1013 unsigned NumElts = Op0->getType()->getVectorNumElements();
1014 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1015 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1016 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1018 // Mask the immediate for VALIGN.
1020 ShiftVal &= (NumElts - 1);
1022 // If palignr is shifting the pair of vectors more than the size of two
1023 // lanes, emit zero.
1025 return llvm::Constant::getNullValue(Op0->getType());
1027 // If palignr is shifting the pair of input vectors more than one lane,
1028 // but less than two lanes, convert to shifting in zeroes.
1029 if (ShiftVal > 16) {
1032 Op0 = llvm::Constant::getNullValue(Op0->getType());
1035 uint32_t Indices[64];
1036 // 256-bit palignr operates on 128-bit lanes so we need to handle that
1037 for (unsigned l = 0; l < NumElts; l += 16) {
1038 for (unsigned i = 0; i != 16; ++i) {
1039 unsigned Idx = ShiftVal + i;
1040 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1041 Idx += NumElts - 16; // End of lane, switch operand.
1042 Indices[l + i] = Idx + l;
1046 Value *Align = Builder.CreateShuffleVector(Op1, Op0,
1047 makeArrayRef(Indices, NumElts),
1050 return EmitX86Select(Builder, Mask, Align, Passthru);
1053 static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallInst &CI,
1054 bool ZeroMask, bool IndexForm) {
1055 Type *Ty = CI.getType();
1056 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1057 unsigned EltWidth = Ty->getScalarSizeInBits();
1058 bool IsFloat = Ty->isFPOrFPVectorTy();
1060 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1061 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1062 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1063 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1064 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1065 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1066 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1067 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1068 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1069 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1070 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1071 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1072 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1073 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1074 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1075 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1076 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1077 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1078 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1079 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1080 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1081 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1082 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1083 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1084 else if (VecWidth == 128 && EltWidth == 16)
1085 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1086 else if (VecWidth == 256 && EltWidth == 16)
1087 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1088 else if (VecWidth == 512 && EltWidth == 16)
1089 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1090 else if (VecWidth == 128 && EltWidth == 8)
1091 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1092 else if (VecWidth == 256 && EltWidth == 8)
1093 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1094 else if (VecWidth == 512 && EltWidth == 8)
1095 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1097 llvm_unreachable("Unexpected intrinsic");
1099 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1100 CI.getArgOperand(2) };
1102 // If this isn't index form we need to swap operand 0 and 1.
1104 std::swap(Args[0], Args[1]);
1106 Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1108 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1109 : Builder.CreateBitCast(CI.getArgOperand(1),
1111 return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1114 static Value *UpgradeX86AddSubSatIntrinsics(IRBuilder<> &Builder, CallInst &CI,
1115 bool IsSigned, bool IsAddition) {
1116 Type *Ty = CI.getType();
1117 Value *Op0 = CI.getOperand(0);
1118 Value *Op1 = CI.getOperand(1);
1121 IsSigned ? (IsAddition ? Intrinsic::sadd_sat : Intrinsic::ssub_sat)
1122 : (IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat);
1123 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1124 Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
1126 if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
1127 Value *VecSrc = CI.getOperand(2);
1128 Value *Mask = CI.getOperand(3);
1129 Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1134 static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallInst &CI,
1135 bool IsRotateRight) {
1136 Type *Ty = CI.getType();
1137 Value *Src = CI.getArgOperand(0);
1138 Value *Amt = CI.getArgOperand(1);
1140 // Amount may be scalar immediate, in which case create a splat vector.
1141 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1142 // we only care about the lowest log2 bits anyway.
1143 if (Amt->getType() != Ty) {
1144 unsigned NumElts = Ty->getVectorNumElements();
1145 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1146 Amt = Builder.CreateVectorSplat(NumElts, Amt);
1149 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1150 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1151 Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
1153 if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
1154 Value *VecSrc = CI.getOperand(2);
1155 Value *Mask = CI.getOperand(3);
1156 Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1161 static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallInst &CI, unsigned Imm,
1163 Type *Ty = CI.getType();
1164 Value *LHS = CI.getArgOperand(0);
1165 Value *RHS = CI.getArgOperand(1);
1167 CmpInst::Predicate Pred;
1170 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1173 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1176 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1179 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1182 Pred = ICmpInst::ICMP_EQ;
1185 Pred = ICmpInst::ICMP_NE;
1188 return Constant::getNullValue(Ty); // FALSE
1190 return Constant::getAllOnesValue(Ty); // TRUE
1192 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1195 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1196 Value *Ext = Builder.CreateSExt(Cmp, Ty);
1200 static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallInst &CI,
1201 bool IsShiftRight, bool ZeroMask) {
1202 Type *Ty = CI.getType();
1203 Value *Op0 = CI.getArgOperand(0);
1204 Value *Op1 = CI.getArgOperand(1);
1205 Value *Amt = CI.getArgOperand(2);
1208 std::swap(Op0, Op1);
1210 // Amount may be scalar immediate, in which case create a splat vector.
1211 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1212 // we only care about the lowest log2 bits anyway.
1213 if (Amt->getType() != Ty) {
1214 unsigned NumElts = Ty->getVectorNumElements();
1215 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1216 Amt = Builder.CreateVectorSplat(NumElts, Amt);
1219 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1220 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1221 Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
1223 unsigned NumArgs = CI.getNumArgOperands();
1224 if (NumArgs >= 4) { // For masked intrinsics.
1225 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1226 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
1227 CI.getArgOperand(0);
1228 Value *Mask = CI.getOperand(NumArgs - 1);
1229 Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1234 static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
1235 Value *Ptr, Value *Data, Value *Mask,
1237 // Cast the pointer to the right type.
1238 Ptr = Builder.CreateBitCast(Ptr,
1239 llvm::PointerType::getUnqual(Data->getType()));
1241 Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1;
1243 // If the mask is all ones just emit a regular store.
1244 if (const auto *C = dyn_cast<Constant>(Mask))
1245 if (C->isAllOnesValue())
1246 return Builder.CreateAlignedStore(Data, Ptr, Align);
1248 // Convert the mask from an integer type to a vector of i1.
1249 unsigned NumElts = Data->getType()->getVectorNumElements();
1250 Mask = getX86MaskVec(Builder, Mask, NumElts);
1251 return Builder.CreateMaskedStore(Data, Ptr, Align, Mask);
1254 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
1255 Value *Ptr, Value *Passthru, Value *Mask,
1257 Type *ValTy = Passthru->getType();
1258 // Cast the pointer to the right type.
1259 Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(ValTy));
1261 Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1;
1263 // If the mask is all ones just emit a regular store.
1264 if (const auto *C = dyn_cast<Constant>(Mask))
1265 if (C->isAllOnesValue())
1266 return Builder.CreateAlignedLoad(ValTy, Ptr, Align);
1268 // Convert the mask from an integer type to a vector of i1.
1269 unsigned NumElts = Passthru->getType()->getVectorNumElements();
1270 Mask = getX86MaskVec(Builder, Mask, NumElts);
1271 return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru);
1274 static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) {
1275 Value *Op0 = CI.getArgOperand(0);
1276 llvm::Type *Ty = Op0->getType();
1277 Value *Zero = llvm::Constant::getNullValue(Ty);
1278 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Op0, Zero);
1279 Value *Neg = Builder.CreateNeg(Op0);
1280 Value *Res = Builder.CreateSelect(Cmp, Op0, Neg);
1282 if (CI.getNumArgOperands() == 3)
1283 Res = EmitX86Select(Builder,CI.getArgOperand(2), Res, CI.getArgOperand(1));
1288 static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
1289 ICmpInst::Predicate Pred) {
1290 Value *Op0 = CI.getArgOperand(0);
1291 Value *Op1 = CI.getArgOperand(1);
1292 Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1);
1293 Value *Res = Builder.CreateSelect(Cmp, Op0, Op1);
1295 if (CI.getNumArgOperands() == 4)
1296 Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1301 static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) {
1302 Type *Ty = CI.getType();
1304 // Arguments have a vXi32 type so cast to vXi64.
1305 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1306 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1309 // Shift left then arithmetic shift right.
1310 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1311 LHS = Builder.CreateShl(LHS, ShiftAmt);
1312 LHS = Builder.CreateAShr(LHS, ShiftAmt);
1313 RHS = Builder.CreateShl(RHS, ShiftAmt);
1314 RHS = Builder.CreateAShr(RHS, ShiftAmt);
1316 // Clear the upper bits.
1317 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1318 LHS = Builder.CreateAnd(LHS, Mask);
1319 RHS = Builder.CreateAnd(RHS, Mask);
1322 Value *Res = Builder.CreateMul(LHS, RHS);
1324 if (CI.getNumArgOperands() == 4)
1325 Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1330 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1331 static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
1333 unsigned NumElts = Vec->getType()->getVectorNumElements();
1335 const auto *C = dyn_cast<Constant>(Mask);
1336 if (!C || !C->isAllOnesValue())
1337 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1341 uint32_t Indices[8];
1342 for (unsigned i = 0; i != NumElts; ++i)
1344 for (unsigned i = NumElts; i != 8; ++i)
1345 Indices[i] = NumElts + i % NumElts;
1346 Vec = Builder.CreateShuffleVector(Vec,
1347 Constant::getNullValue(Vec->getType()),
1350 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1353 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
1354 unsigned CC, bool Signed) {
1355 Value *Op0 = CI.getArgOperand(0);
1356 unsigned NumElts = Op0->getType()->getVectorNumElements();
1360 Cmp = Constant::getNullValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
1361 } else if (CC == 7) {
1362 Cmp = Constant::getAllOnesValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
1364 ICmpInst::Predicate Pred;
1366 default: llvm_unreachable("Unknown condition code");
1367 case 0: Pred = ICmpInst::ICMP_EQ; break;
1368 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
1369 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
1370 case 4: Pred = ICmpInst::ICMP_NE; break;
1371 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
1372 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
1374 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
1377 Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1);
1379 return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
1382 // Replace a masked intrinsic with an older unmasked intrinsic.
1383 static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI,
1384 Intrinsic::ID IID) {
1385 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
1386 Value *Rep = Builder.CreateCall(Intrin,
1387 { CI.getArgOperand(0), CI.getArgOperand(1) });
1388 return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
1391 static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
1392 Value* A = CI.getArgOperand(0);
1393 Value* B = CI.getArgOperand(1);
1394 Value* Src = CI.getArgOperand(2);
1395 Value* Mask = CI.getArgOperand(3);
1397 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
1398 Value* Cmp = Builder.CreateIsNotNull(AndNode);
1399 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
1400 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
1401 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
1402 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
1406 static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) {
1407 Value* Op = CI.getArgOperand(0);
1408 Type* ReturnOp = CI.getType();
1409 unsigned NumElts = CI.getType()->getVectorNumElements();
1410 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
1411 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
1414 // Replace intrinsic with unmasked version and a select.
1415 static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
1416 CallInst &CI, Value *&Rep) {
1417 Name = Name.substr(12); // Remove avx512.mask.
1419 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
1420 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
1422 if (Name.startswith("max.p")) {
1423 if (VecWidth == 128 && EltWidth == 32)
1424 IID = Intrinsic::x86_sse_max_ps;
1425 else if (VecWidth == 128 && EltWidth == 64)
1426 IID = Intrinsic::x86_sse2_max_pd;
1427 else if (VecWidth == 256 && EltWidth == 32)
1428 IID = Intrinsic::x86_avx_max_ps_256;
1429 else if (VecWidth == 256 && EltWidth == 64)
1430 IID = Intrinsic::x86_avx_max_pd_256;
1432 llvm_unreachable("Unexpected intrinsic");
1433 } else if (Name.startswith("min.p")) {
1434 if (VecWidth == 128 && EltWidth == 32)
1435 IID = Intrinsic::x86_sse_min_ps;
1436 else if (VecWidth == 128 && EltWidth == 64)
1437 IID = Intrinsic::x86_sse2_min_pd;
1438 else if (VecWidth == 256 && EltWidth == 32)
1439 IID = Intrinsic::x86_avx_min_ps_256;
1440 else if (VecWidth == 256 && EltWidth == 64)
1441 IID = Intrinsic::x86_avx_min_pd_256;
1443 llvm_unreachable("Unexpected intrinsic");
1444 } else if (Name.startswith("pshuf.b.")) {
1445 if (VecWidth == 128)
1446 IID = Intrinsic::x86_ssse3_pshuf_b_128;
1447 else if (VecWidth == 256)
1448 IID = Intrinsic::x86_avx2_pshuf_b;
1449 else if (VecWidth == 512)
1450 IID = Intrinsic::x86_avx512_pshuf_b_512;
1452 llvm_unreachable("Unexpected intrinsic");
1453 } else if (Name.startswith("pmul.hr.sw.")) {
1454 if (VecWidth == 128)
1455 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
1456 else if (VecWidth == 256)
1457 IID = Intrinsic::x86_avx2_pmul_hr_sw;
1458 else if (VecWidth == 512)
1459 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
1461 llvm_unreachable("Unexpected intrinsic");
1462 } else if (Name.startswith("pmulh.w.")) {
1463 if (VecWidth == 128)
1464 IID = Intrinsic::x86_sse2_pmulh_w;
1465 else if (VecWidth == 256)
1466 IID = Intrinsic::x86_avx2_pmulh_w;
1467 else if (VecWidth == 512)
1468 IID = Intrinsic::x86_avx512_pmulh_w_512;
1470 llvm_unreachable("Unexpected intrinsic");
1471 } else if (Name.startswith("pmulhu.w.")) {
1472 if (VecWidth == 128)
1473 IID = Intrinsic::x86_sse2_pmulhu_w;
1474 else if (VecWidth == 256)
1475 IID = Intrinsic::x86_avx2_pmulhu_w;
1476 else if (VecWidth == 512)
1477 IID = Intrinsic::x86_avx512_pmulhu_w_512;
1479 llvm_unreachable("Unexpected intrinsic");
1480 } else if (Name.startswith("pmaddw.d.")) {
1481 if (VecWidth == 128)
1482 IID = Intrinsic::x86_sse2_pmadd_wd;
1483 else if (VecWidth == 256)
1484 IID = Intrinsic::x86_avx2_pmadd_wd;
1485 else if (VecWidth == 512)
1486 IID = Intrinsic::x86_avx512_pmaddw_d_512;
1488 llvm_unreachable("Unexpected intrinsic");
1489 } else if (Name.startswith("pmaddubs.w.")) {
1490 if (VecWidth == 128)
1491 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
1492 else if (VecWidth == 256)
1493 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
1494 else if (VecWidth == 512)
1495 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
1497 llvm_unreachable("Unexpected intrinsic");
1498 } else if (Name.startswith("packsswb.")) {
1499 if (VecWidth == 128)
1500 IID = Intrinsic::x86_sse2_packsswb_128;
1501 else if (VecWidth == 256)
1502 IID = Intrinsic::x86_avx2_packsswb;
1503 else if (VecWidth == 512)
1504 IID = Intrinsic::x86_avx512_packsswb_512;
1506 llvm_unreachable("Unexpected intrinsic");
1507 } else if (Name.startswith("packssdw.")) {
1508 if (VecWidth == 128)
1509 IID = Intrinsic::x86_sse2_packssdw_128;
1510 else if (VecWidth == 256)
1511 IID = Intrinsic::x86_avx2_packssdw;
1512 else if (VecWidth == 512)
1513 IID = Intrinsic::x86_avx512_packssdw_512;
1515 llvm_unreachable("Unexpected intrinsic");
1516 } else if (Name.startswith("packuswb.")) {
1517 if (VecWidth == 128)
1518 IID = Intrinsic::x86_sse2_packuswb_128;
1519 else if (VecWidth == 256)
1520 IID = Intrinsic::x86_avx2_packuswb;
1521 else if (VecWidth == 512)
1522 IID = Intrinsic::x86_avx512_packuswb_512;
1524 llvm_unreachable("Unexpected intrinsic");
1525 } else if (Name.startswith("packusdw.")) {
1526 if (VecWidth == 128)
1527 IID = Intrinsic::x86_sse41_packusdw;
1528 else if (VecWidth == 256)
1529 IID = Intrinsic::x86_avx2_packusdw;
1530 else if (VecWidth == 512)
1531 IID = Intrinsic::x86_avx512_packusdw_512;
1533 llvm_unreachable("Unexpected intrinsic");
1534 } else if (Name.startswith("vpermilvar.")) {
1535 if (VecWidth == 128 && EltWidth == 32)
1536 IID = Intrinsic::x86_avx_vpermilvar_ps;
1537 else if (VecWidth == 128 && EltWidth == 64)
1538 IID = Intrinsic::x86_avx_vpermilvar_pd;
1539 else if (VecWidth == 256 && EltWidth == 32)
1540 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
1541 else if (VecWidth == 256 && EltWidth == 64)
1542 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
1543 else if (VecWidth == 512 && EltWidth == 32)
1544 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
1545 else if (VecWidth == 512 && EltWidth == 64)
1546 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
1548 llvm_unreachable("Unexpected intrinsic");
1549 } else if (Name == "cvtpd2dq.256") {
1550 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
1551 } else if (Name == "cvtpd2ps.256") {
1552 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
1553 } else if (Name == "cvttpd2dq.256") {
1554 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
1555 } else if (Name == "cvttps2dq.128") {
1556 IID = Intrinsic::x86_sse2_cvttps2dq;
1557 } else if (Name == "cvttps2dq.256") {
1558 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
1559 } else if (Name.startswith("permvar.")) {
1560 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
1561 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1562 IID = Intrinsic::x86_avx2_permps;
1563 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1564 IID = Intrinsic::x86_avx2_permd;
1565 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1566 IID = Intrinsic::x86_avx512_permvar_df_256;
1567 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1568 IID = Intrinsic::x86_avx512_permvar_di_256;
1569 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1570 IID = Intrinsic::x86_avx512_permvar_sf_512;
1571 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1572 IID = Intrinsic::x86_avx512_permvar_si_512;
1573 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1574 IID = Intrinsic::x86_avx512_permvar_df_512;
1575 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1576 IID = Intrinsic::x86_avx512_permvar_di_512;
1577 else if (VecWidth == 128 && EltWidth == 16)
1578 IID = Intrinsic::x86_avx512_permvar_hi_128;
1579 else if (VecWidth == 256 && EltWidth == 16)
1580 IID = Intrinsic::x86_avx512_permvar_hi_256;
1581 else if (VecWidth == 512 && EltWidth == 16)
1582 IID = Intrinsic::x86_avx512_permvar_hi_512;
1583 else if (VecWidth == 128 && EltWidth == 8)
1584 IID = Intrinsic::x86_avx512_permvar_qi_128;
1585 else if (VecWidth == 256 && EltWidth == 8)
1586 IID = Intrinsic::x86_avx512_permvar_qi_256;
1587 else if (VecWidth == 512 && EltWidth == 8)
1588 IID = Intrinsic::x86_avx512_permvar_qi_512;
1590 llvm_unreachable("Unexpected intrinsic");
1591 } else if (Name.startswith("dbpsadbw.")) {
1592 if (VecWidth == 128)
1593 IID = Intrinsic::x86_avx512_dbpsadbw_128;
1594 else if (VecWidth == 256)
1595 IID = Intrinsic::x86_avx512_dbpsadbw_256;
1596 else if (VecWidth == 512)
1597 IID = Intrinsic::x86_avx512_dbpsadbw_512;
1599 llvm_unreachable("Unexpected intrinsic");
1600 } else if (Name.startswith("pmultishift.qb.")) {
1601 if (VecWidth == 128)
1602 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
1603 else if (VecWidth == 256)
1604 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
1605 else if (VecWidth == 512)
1606 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
1608 llvm_unreachable("Unexpected intrinsic");
1609 } else if (Name.startswith("conflict.")) {
1610 if (Name[9] == 'd' && VecWidth == 128)
1611 IID = Intrinsic::x86_avx512_conflict_d_128;
1612 else if (Name[9] == 'd' && VecWidth == 256)
1613 IID = Intrinsic::x86_avx512_conflict_d_256;
1614 else if (Name[9] == 'd' && VecWidth == 512)
1615 IID = Intrinsic::x86_avx512_conflict_d_512;
1616 else if (Name[9] == 'q' && VecWidth == 128)
1617 IID = Intrinsic::x86_avx512_conflict_q_128;
1618 else if (Name[9] == 'q' && VecWidth == 256)
1619 IID = Intrinsic::x86_avx512_conflict_q_256;
1620 else if (Name[9] == 'q' && VecWidth == 512)
1621 IID = Intrinsic::x86_avx512_conflict_q_512;
1623 llvm_unreachable("Unexpected intrinsic");
1624 } else if (Name.startswith("pavg.")) {
1625 if (Name[5] == 'b' && VecWidth == 128)
1626 IID = Intrinsic::x86_sse2_pavg_b;
1627 else if (Name[5] == 'b' && VecWidth == 256)
1628 IID = Intrinsic::x86_avx2_pavg_b;
1629 else if (Name[5] == 'b' && VecWidth == 512)
1630 IID = Intrinsic::x86_avx512_pavg_b_512;
1631 else if (Name[5] == 'w' && VecWidth == 128)
1632 IID = Intrinsic::x86_sse2_pavg_w;
1633 else if (Name[5] == 'w' && VecWidth == 256)
1634 IID = Intrinsic::x86_avx2_pavg_w;
1635 else if (Name[5] == 'w' && VecWidth == 512)
1636 IID = Intrinsic::x86_avx512_pavg_w_512;
1638 llvm_unreachable("Unexpected intrinsic");
1642 SmallVector<Value *, 4> Args(CI.arg_operands().begin(),
1643 CI.arg_operands().end());
1646 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1648 unsigned NumArgs = CI.getNumArgOperands();
1649 Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
1650 CI.getArgOperand(NumArgs - 2));
1654 /// Upgrade comment in call to inline asm that represents an objc retain release
1656 void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
1658 if (AsmStr->find("mov\tfp") == 0 &&
1659 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
1660 (Pos = AsmStr->find("# marker")) != std::string::npos) {
1661 AsmStr->replace(Pos, 1, ";");
1666 /// Upgrade a call to an old intrinsic. All argument and return casting must be
1667 /// provided to seamlessly integrate with existing context.
1668 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
1669 Function *F = CI->getCalledFunction();
1670 LLVMContext &C = CI->getContext();
1671 IRBuilder<> Builder(C);
1672 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
1674 assert(F && "Intrinsic call is not direct?");
1677 // Get the Function's name.
1678 StringRef Name = F->getName();
1680 assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
1681 Name = Name.substr(5);
1683 bool IsX86 = Name.startswith("x86.");
1685 Name = Name.substr(4);
1686 bool IsNVVM = Name.startswith("nvvm.");
1688 Name = Name.substr(5);
1690 if (IsX86 && Name.startswith("sse4a.movnt.")) {
1691 Module *M = F->getParent();
1692 SmallVector<Metadata *, 1> Elts;
1694 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1695 MDNode *Node = MDNode::get(C, Elts);
1697 Value *Arg0 = CI->getArgOperand(0);
1698 Value *Arg1 = CI->getArgOperand(1);
1700 // Nontemporal (unaligned) store of the 0'th element of the float/double
1702 Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
1703 PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
1704 Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
1706 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
1708 StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, 1);
1709 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1711 // Remove intrinsic.
1712 CI->eraseFromParent();
1716 if (IsX86 && (Name.startswith("avx.movnt.") ||
1717 Name.startswith("avx512.storent."))) {
1718 Module *M = F->getParent();
1719 SmallVector<Metadata *, 1> Elts;
1721 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1722 MDNode *Node = MDNode::get(C, Elts);
1724 Value *Arg0 = CI->getArgOperand(0);
1725 Value *Arg1 = CI->getArgOperand(1);
1727 // Convert the type of the pointer to a pointer to the stored type.
1728 Value *BC = Builder.CreateBitCast(Arg0,
1729 PointerType::getUnqual(Arg1->getType()),
1731 VectorType *VTy = cast<VectorType>(Arg1->getType());
1732 StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC,
1733 VTy->getBitWidth() / 8);
1734 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1736 // Remove intrinsic.
1737 CI->eraseFromParent();
1741 if (IsX86 && Name == "sse2.storel.dq") {
1742 Value *Arg0 = CI->getArgOperand(0);
1743 Value *Arg1 = CI->getArgOperand(1);
1745 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
1746 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
1747 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
1748 Value *BC = Builder.CreateBitCast(Arg0,
1749 PointerType::getUnqual(Elt->getType()),
1751 Builder.CreateAlignedStore(Elt, BC, 1);
1753 // Remove intrinsic.
1754 CI->eraseFromParent();
1758 if (IsX86 && (Name.startswith("sse.storeu.") ||
1759 Name.startswith("sse2.storeu.") ||
1760 Name.startswith("avx.storeu."))) {
1761 Value *Arg0 = CI->getArgOperand(0);
1762 Value *Arg1 = CI->getArgOperand(1);
1764 Arg0 = Builder.CreateBitCast(Arg0,
1765 PointerType::getUnqual(Arg1->getType()),
1767 Builder.CreateAlignedStore(Arg1, Arg0, 1);
1769 // Remove intrinsic.
1770 CI->eraseFromParent();
1774 if (IsX86 && Name == "avx512.mask.store.ss") {
1775 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
1776 UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
1779 // Remove intrinsic.
1780 CI->eraseFromParent();
1784 if (IsX86 && (Name.startswith("avx512.mask.store"))) {
1785 // "avx512.mask.storeu." or "avx512.mask.store."
1786 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
1787 UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
1788 CI->getArgOperand(2), Aligned);
1790 // Remove intrinsic.
1791 CI->eraseFromParent();
1796 // Upgrade packed integer vector compare intrinsics to compare instructions.
1797 if (IsX86 && (Name.startswith("sse2.pcmp") ||
1798 Name.startswith("avx2.pcmp"))) {
1799 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
1800 bool CmpEq = Name[9] == 'e';
1801 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
1802 CI->getArgOperand(0), CI->getArgOperand(1));
1803 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
1804 } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) {
1805 Type *ExtTy = Type::getInt32Ty(C);
1806 if (CI->getOperand(0)->getType()->isIntegerTy(8))
1807 ExtTy = Type::getInt64Ty(C);
1808 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
1809 ExtTy->getPrimitiveSizeInBits();
1810 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
1811 Rep = Builder.CreateVectorSplat(NumElts, Rep);
1812 } else if (IsX86 && (Name == "sse.sqrt.ss" ||
1813 Name == "sse2.sqrt.sd")) {
1814 Value *Vec = CI->getArgOperand(0);
1815 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
1816 Function *Intr = Intrinsic::getDeclaration(F->getParent(),
1817 Intrinsic::sqrt, Elt0->getType());
1818 Elt0 = Builder.CreateCall(Intr, Elt0);
1819 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
1820 } else if (IsX86 && (Name.startswith("avx.sqrt.p") ||
1821 Name.startswith("sse2.sqrt.p") ||
1822 Name.startswith("sse.sqrt.p"))) {
1823 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1826 {CI->getArgOperand(0)});
1827 } else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) {
1828 if (CI->getNumArgOperands() == 4 &&
1829 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
1830 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
1831 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
1832 : Intrinsic::x86_avx512_sqrt_pd_512;
1834 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) };
1835 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
1838 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1841 {CI->getArgOperand(0)});
1843 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1844 CI->getArgOperand(1));
1845 } else if (IsX86 && (Name.startswith("avx512.ptestm") ||
1846 Name.startswith("avx512.ptestnm"))) {
1847 Value *Op0 = CI->getArgOperand(0);
1848 Value *Op1 = CI->getArgOperand(1);
1849 Value *Mask = CI->getArgOperand(2);
1850 Rep = Builder.CreateAnd(Op0, Op1);
1851 llvm::Type *Ty = Op0->getType();
1852 Value *Zero = llvm::Constant::getNullValue(Ty);
1853 ICmpInst::Predicate Pred =
1854 Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1855 Rep = Builder.CreateICmp(Pred, Rep, Zero);
1856 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
1857 } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
1859 CI->getArgOperand(1)->getType()->getVectorNumElements();
1860 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
1861 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1862 CI->getArgOperand(1));
1863 } else if (IsX86 && (Name.startswith("avx512.kunpck"))) {
1864 unsigned NumElts = CI->getType()->getScalarSizeInBits();
1865 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
1866 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
1867 uint32_t Indices[64];
1868 for (unsigned i = 0; i != NumElts; ++i)
1871 // First extract half of each vector. This gives better codegen than
1872 // doing it in a single shuffle.
1873 LHS = Builder.CreateShuffleVector(LHS, LHS,
1874 makeArrayRef(Indices, NumElts / 2));
1875 RHS = Builder.CreateShuffleVector(RHS, RHS,
1876 makeArrayRef(Indices, NumElts / 2));
1877 // Concat the vectors.
1878 // NOTE: Operands have to be swapped to match intrinsic definition.
1879 Rep = Builder.CreateShuffleVector(RHS, LHS,
1880 makeArrayRef(Indices, NumElts));
1881 Rep = Builder.CreateBitCast(Rep, CI->getType());
1882 } else if (IsX86 && Name == "avx512.kand.w") {
1883 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1884 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1885 Rep = Builder.CreateAnd(LHS, RHS);
1886 Rep = Builder.CreateBitCast(Rep, CI->getType());
1887 } else if (IsX86 && Name == "avx512.kandn.w") {
1888 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1889 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1890 LHS = Builder.CreateNot(LHS);
1891 Rep = Builder.CreateAnd(LHS, RHS);
1892 Rep = Builder.CreateBitCast(Rep, CI->getType());
1893 } else if (IsX86 && Name == "avx512.kor.w") {
1894 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1895 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1896 Rep = Builder.CreateOr(LHS, RHS);
1897 Rep = Builder.CreateBitCast(Rep, CI->getType());
1898 } else if (IsX86 && Name == "avx512.kxor.w") {
1899 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1900 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1901 Rep = Builder.CreateXor(LHS, RHS);
1902 Rep = Builder.CreateBitCast(Rep, CI->getType());
1903 } else if (IsX86 && Name == "avx512.kxnor.w") {
1904 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1905 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1906 LHS = Builder.CreateNot(LHS);
1907 Rep = Builder.CreateXor(LHS, RHS);
1908 Rep = Builder.CreateBitCast(Rep, CI->getType());
1909 } else if (IsX86 && Name == "avx512.knot.w") {
1910 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1911 Rep = Builder.CreateNot(Rep);
1912 Rep = Builder.CreateBitCast(Rep, CI->getType());
1914 (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
1915 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1916 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1917 Rep = Builder.CreateOr(LHS, RHS);
1918 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
1920 if (Name[14] == 'c')
1921 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
1923 C = ConstantInt::getNullValue(Builder.getInt16Ty());
1924 Rep = Builder.CreateICmpEQ(Rep, C);
1925 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
1926 } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
1927 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
1928 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
1929 Name == "sse.div.ss" || Name == "sse2.div.sd")) {
1930 Type *I32Ty = Type::getInt32Ty(C);
1931 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
1932 ConstantInt::get(I32Ty, 0));
1933 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
1934 ConstantInt::get(I32Ty, 0));
1936 if (Name.contains(".add."))
1937 EltOp = Builder.CreateFAdd(Elt0, Elt1);
1938 else if (Name.contains(".sub."))
1939 EltOp = Builder.CreateFSub(Elt0, Elt1);
1940 else if (Name.contains(".mul."))
1941 EltOp = Builder.CreateFMul(Elt0, Elt1);
1943 EltOp = Builder.CreateFDiv(Elt0, Elt1);
1944 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
1945 ConstantInt::get(I32Ty, 0));
1946 } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
1947 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
1948 bool CmpEq = Name[16] == 'e';
1949 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
1950 } else if (IsX86 && Name.startswith("avx512.mask.vpshufbitqmb.")) {
1951 Type *OpTy = CI->getArgOperand(0)->getType();
1952 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1955 default: llvm_unreachable("Unexpected intrinsic");
1956 case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break;
1957 case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break;
1958 case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break;
1961 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1962 { CI->getOperand(0), CI->getArgOperand(1) });
1963 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
1964 } else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) {
1965 Type *OpTy = CI->getArgOperand(0)->getType();
1966 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1967 unsigned EltWidth = OpTy->getScalarSizeInBits();
1969 if (VecWidth == 128 && EltWidth == 32)
1970 IID = Intrinsic::x86_avx512_fpclass_ps_128;
1971 else if (VecWidth == 256 && EltWidth == 32)
1972 IID = Intrinsic::x86_avx512_fpclass_ps_256;
1973 else if (VecWidth == 512 && EltWidth == 32)
1974 IID = Intrinsic::x86_avx512_fpclass_ps_512;
1975 else if (VecWidth == 128 && EltWidth == 64)
1976 IID = Intrinsic::x86_avx512_fpclass_pd_128;
1977 else if (VecWidth == 256 && EltWidth == 64)
1978 IID = Intrinsic::x86_avx512_fpclass_pd_256;
1979 else if (VecWidth == 512 && EltWidth == 64)
1980 IID = Intrinsic::x86_avx512_fpclass_pd_512;
1982 llvm_unreachable("Unexpected intrinsic");
1984 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1985 { CI->getOperand(0), CI->getArgOperand(1) });
1986 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
1987 } else if (IsX86 && Name.startswith("avx512.mask.cmp.p")) {
1988 Type *OpTy = CI->getArgOperand(0)->getType();
1989 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1990 unsigned EltWidth = OpTy->getScalarSizeInBits();
1992 if (VecWidth == 128 && EltWidth == 32)
1993 IID = Intrinsic::x86_avx512_cmp_ps_128;
1994 else if (VecWidth == 256 && EltWidth == 32)
1995 IID = Intrinsic::x86_avx512_cmp_ps_256;
1996 else if (VecWidth == 512 && EltWidth == 32)
1997 IID = Intrinsic::x86_avx512_cmp_ps_512;
1998 else if (VecWidth == 128 && EltWidth == 64)
1999 IID = Intrinsic::x86_avx512_cmp_pd_128;
2000 else if (VecWidth == 256 && EltWidth == 64)
2001 IID = Intrinsic::x86_avx512_cmp_pd_256;
2002 else if (VecWidth == 512 && EltWidth == 64)
2003 IID = Intrinsic::x86_avx512_cmp_pd_512;
2005 llvm_unreachable("Unexpected intrinsic");
2007 SmallVector<Value *, 4> Args;
2008 Args.push_back(CI->getArgOperand(0));
2009 Args.push_back(CI->getArgOperand(1));
2010 Args.push_back(CI->getArgOperand(2));
2011 if (CI->getNumArgOperands() == 5)
2012 Args.push_back(CI->getArgOperand(4));
2014 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2016 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(3));
2017 } else if (IsX86 && Name.startswith("avx512.mask.cmp.") &&
2019 // Integer compare intrinsics.
2020 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2021 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2022 } else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) {
2023 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2024 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2025 } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") ||
2026 Name.startswith("avx512.cvtw2mask.") ||
2027 Name.startswith("avx512.cvtd2mask.") ||
2028 Name.startswith("avx512.cvtq2mask."))) {
2029 Value *Op = CI->getArgOperand(0);
2030 Value *Zero = llvm::Constant::getNullValue(Op->getType());
2031 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2032 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2033 } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
2034 Name == "ssse3.pabs.w.128" ||
2035 Name == "ssse3.pabs.d.128" ||
2036 Name.startswith("avx2.pabs") ||
2037 Name.startswith("avx512.mask.pabs"))) {
2038 Rep = upgradeAbs(Builder, *CI);
2039 } else if (IsX86 && (Name == "sse41.pmaxsb" ||
2040 Name == "sse2.pmaxs.w" ||
2041 Name == "sse41.pmaxsd" ||
2042 Name.startswith("avx2.pmaxs") ||
2043 Name.startswith("avx512.mask.pmaxs"))) {
2044 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
2045 } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
2046 Name == "sse41.pmaxuw" ||
2047 Name == "sse41.pmaxud" ||
2048 Name.startswith("avx2.pmaxu") ||
2049 Name.startswith("avx512.mask.pmaxu"))) {
2050 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
2051 } else if (IsX86 && (Name == "sse41.pminsb" ||
2052 Name == "sse2.pmins.w" ||
2053 Name == "sse41.pminsd" ||
2054 Name.startswith("avx2.pmins") ||
2055 Name.startswith("avx512.mask.pmins"))) {
2056 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
2057 } else if (IsX86 && (Name == "sse2.pminu.b" ||
2058 Name == "sse41.pminuw" ||
2059 Name == "sse41.pminud" ||
2060 Name.startswith("avx2.pminu") ||
2061 Name.startswith("avx512.mask.pminu"))) {
2062 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
2063 } else if (IsX86 && (Name == "sse2.pmulu.dq" ||
2064 Name == "avx2.pmulu.dq" ||
2065 Name == "avx512.pmulu.dq.512" ||
2066 Name.startswith("avx512.mask.pmulu.dq."))) {
2067 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
2068 } else if (IsX86 && (Name == "sse41.pmuldq" ||
2069 Name == "avx2.pmul.dq" ||
2070 Name == "avx512.pmul.dq.512" ||
2071 Name.startswith("avx512.mask.pmul.dq."))) {
2072 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
2073 } else if (IsX86 && (Name == "sse.cvtsi2ss" ||
2074 Name == "sse2.cvtsi2sd" ||
2075 Name == "sse.cvtsi642ss" ||
2076 Name == "sse2.cvtsi642sd")) {
2077 Rep = Builder.CreateSIToFP(CI->getArgOperand(1),
2078 CI->getType()->getVectorElementType());
2079 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2080 } else if (IsX86 && Name == "avx512.cvtusi2sd") {
2081 Rep = Builder.CreateUIToFP(CI->getArgOperand(1),
2082 CI->getType()->getVectorElementType());
2083 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2084 } else if (IsX86 && Name == "sse2.cvtss2sd") {
2085 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2086 Rep = Builder.CreateFPExt(Rep, CI->getType()->getVectorElementType());
2087 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2088 } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
2089 Name == "sse2.cvtdq2ps" ||
2090 Name == "avx.cvtdq2.pd.256" ||
2091 Name == "avx.cvtdq2.ps.256" ||
2092 Name.startswith("avx512.mask.cvtdq2pd.") ||
2093 Name.startswith("avx512.mask.cvtudq2pd.") ||
2094 Name.startswith("avx512.mask.cvtdq2ps.") ||
2095 Name.startswith("avx512.mask.cvtudq2ps.") ||
2096 Name.startswith("avx512.mask.cvtqq2pd.") ||
2097 Name.startswith("avx512.mask.cvtuqq2pd.") ||
2098 Name == "avx512.mask.cvtqq2ps.256" ||
2099 Name == "avx512.mask.cvtqq2ps.512" ||
2100 Name == "avx512.mask.cvtuqq2ps.256" ||
2101 Name == "avx512.mask.cvtuqq2ps.512" ||
2102 Name == "sse2.cvtps2pd" ||
2103 Name == "avx.cvt.ps2.pd.256" ||
2104 Name == "avx512.mask.cvtps2pd.128" ||
2105 Name == "avx512.mask.cvtps2pd.256")) {
2106 Type *DstTy = CI->getType();
2107 Rep = CI->getArgOperand(0);
2108 Type *SrcTy = Rep->getType();
2110 unsigned NumDstElts = DstTy->getVectorNumElements();
2111 if (NumDstElts < SrcTy->getVectorNumElements()) {
2112 assert(NumDstElts == 2 && "Unexpected vector size");
2113 uint32_t ShuffleMask[2] = { 0, 1 };
2114 Rep = Builder.CreateShuffleVector(Rep, Rep, ShuffleMask);
2117 bool IsPS2PD = SrcTy->getVectorElementType()->isFloatTy();
2118 bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
2120 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2121 else if (CI->getNumArgOperands() == 4 &&
2122 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2123 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2124 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2125 : Intrinsic::x86_avx512_sitofp_round;
2126 Function *F = Intrinsic::getDeclaration(CI->getModule(), IID,
2128 Rep = Builder.CreateCall(F, { Rep, CI->getArgOperand(3) });
2130 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2131 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2134 if (CI->getNumArgOperands() >= 3)
2135 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2136 CI->getArgOperand(1));
2137 } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
2138 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
2139 CI->getArgOperand(1), CI->getArgOperand(2),
2141 } else if (IsX86 && (Name.startswith("avx512.mask.load."))) {
2142 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
2143 CI->getArgOperand(1),CI->getArgOperand(2),
2145 } else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) {
2146 Type *ResultTy = CI->getType();
2147 Type *PtrTy = ResultTy->getVectorElementType();
2149 // Cast the pointer to element type.
2150 Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2151 llvm::PointerType::getUnqual(PtrTy));
2153 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2154 ResultTy->getVectorNumElements());
2156 Function *ELd = Intrinsic::getDeclaration(F->getParent(),
2157 Intrinsic::masked_expandload,
2159 Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
2160 } else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) {
2161 Type *ResultTy = CI->getArgOperand(1)->getType();
2162 Type *PtrTy = ResultTy->getVectorElementType();
2164 // Cast the pointer to element type.
2165 Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2166 llvm::PointerType::getUnqual(PtrTy));
2168 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2169 ResultTy->getVectorNumElements());
2171 Function *CSt = Intrinsic::getDeclaration(F->getParent(),
2172 Intrinsic::masked_compressstore,
2174 Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
2175 } else if (IsX86 && (Name.startswith("avx512.mask.compress.") ||
2176 Name.startswith("avx512.mask.expand."))) {
2177 Type *ResultTy = CI->getType();
2179 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2180 ResultTy->getVectorNumElements());
2182 bool IsCompress = Name[12] == 'c';
2183 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2184 : Intrinsic::x86_avx512_mask_expand;
2185 Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
2186 Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1),
2188 } else if (IsX86 && Name.startswith("xop.vpcom")) {
2190 if (Name.endswith("ub") || Name.endswith("uw") || Name.endswith("ud") ||
2191 Name.endswith("uq"))
2193 else if (Name.endswith("b") || Name.endswith("w") || Name.endswith("d") ||
2197 llvm_unreachable("Unknown suffix");
2200 if (CI->getNumArgOperands() == 3) {
2201 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2203 Name = Name.substr(9); // strip off "xop.vpcom"
2204 if (Name.startswith("lt"))
2206 else if (Name.startswith("le"))
2208 else if (Name.startswith("gt"))
2210 else if (Name.startswith("ge"))
2212 else if (Name.startswith("eq"))
2214 else if (Name.startswith("ne"))
2216 else if (Name.startswith("false"))
2218 else if (Name.startswith("true"))
2221 llvm_unreachable("Unknown condition");
2224 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
2225 } else if (IsX86 && Name.startswith("xop.vpcmov")) {
2226 Value *Sel = CI->getArgOperand(2);
2227 Value *NotSel = Builder.CreateNot(Sel);
2228 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2229 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2230 Rep = Builder.CreateOr(Sel0, Sel1);
2231 } else if (IsX86 && (Name.startswith("xop.vprot") ||
2232 Name.startswith("avx512.prol") ||
2233 Name.startswith("avx512.mask.prol"))) {
2234 Rep = upgradeX86Rotate(Builder, *CI, false);
2235 } else if (IsX86 && (Name.startswith("avx512.pror") ||
2236 Name.startswith("avx512.mask.pror"))) {
2237 Rep = upgradeX86Rotate(Builder, *CI, true);
2238 } else if (IsX86 && (Name.startswith("avx512.vpshld.") ||
2239 Name.startswith("avx512.mask.vpshld") ||
2240 Name.startswith("avx512.maskz.vpshld"))) {
2241 bool ZeroMask = Name[11] == 'z';
2242 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
2243 } else if (IsX86 && (Name.startswith("avx512.vpshrd.") ||
2244 Name.startswith("avx512.mask.vpshrd") ||
2245 Name.startswith("avx512.maskz.vpshrd"))) {
2246 bool ZeroMask = Name[11] == 'z';
2247 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
2248 } else if (IsX86 && Name == "sse42.crc32.64.8") {
2249 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
2250 Intrinsic::x86_sse42_crc32_32_8);
2251 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
2252 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
2253 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
2254 } else if (IsX86 && (Name.startswith("avx.vbroadcast.s") ||
2255 Name.startswith("avx512.vbroadcast.s"))) {
2256 // Replace broadcasts with a series of insertelements.
2257 Type *VecTy = CI->getType();
2258 Type *EltTy = VecTy->getVectorElementType();
2259 unsigned EltNum = VecTy->getVectorNumElements();
2260 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
2261 EltTy->getPointerTo());
2262 Value *Load = Builder.CreateLoad(EltTy, Cast);
2263 Type *I32Ty = Type::getInt32Ty(C);
2264 Rep = UndefValue::get(VecTy);
2265 for (unsigned I = 0; I < EltNum; ++I)
2266 Rep = Builder.CreateInsertElement(Rep, Load,
2267 ConstantInt::get(I32Ty, I));
2268 } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
2269 Name.startswith("sse41.pmovzx") ||
2270 Name.startswith("avx2.pmovsx") ||
2271 Name.startswith("avx2.pmovzx") ||
2272 Name.startswith("avx512.mask.pmovsx") ||
2273 Name.startswith("avx512.mask.pmovzx"))) {
2274 VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
2275 VectorType *DstTy = cast<VectorType>(CI->getType());
2276 unsigned NumDstElts = DstTy->getNumElements();
2278 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2279 SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
2280 for (unsigned i = 0; i != NumDstElts; ++i)
2283 Value *SV = Builder.CreateShuffleVector(
2284 CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
2286 bool DoSext = (StringRef::npos != Name.find("pmovsx"));
2287 Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
2288 : Builder.CreateZExt(SV, DstTy);
2289 // If there are 3 arguments, it's a masked intrinsic so we need a select.
2290 if (CI->getNumArgOperands() == 3)
2291 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2292 CI->getArgOperand(1));
2293 } else if (Name == "avx512.mask.pmov.qd.256" ||
2294 Name == "avx512.mask.pmov.qd.512" ||
2295 Name == "avx512.mask.pmov.wb.256" ||
2296 Name == "avx512.mask.pmov.wb.512") {
2297 Type *Ty = CI->getArgOperand(1)->getType();
2298 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
2299 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2300 CI->getArgOperand(1));
2301 } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
2302 Name == "avx2.vbroadcasti128")) {
2303 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2304 Type *EltTy = CI->getType()->getVectorElementType();
2305 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2306 Type *VT = VectorType::get(EltTy, NumSrcElts);
2307 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
2308 PointerType::getUnqual(VT));
2309 Value *Load = Builder.CreateAlignedLoad(VT, Op, 1);
2310 if (NumSrcElts == 2)
2311 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
2314 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
2315 { 0, 1, 2, 3, 0, 1, 2, 3 });
2316 } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") ||
2317 Name.startswith("avx512.mask.shuf.f"))) {
2318 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2319 Type *VT = CI->getType();
2320 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2321 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2322 unsigned ControlBitsMask = NumLanes - 1;
2323 unsigned NumControlBits = NumLanes / 2;
2324 SmallVector<uint32_t, 8> ShuffleMask(0);
2326 for (unsigned l = 0; l != NumLanes; ++l) {
2327 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
2328 // We actually need the other source.
2329 if (l >= NumLanes / 2)
2330 LaneMask += NumLanes;
2331 for (unsigned i = 0; i != NumElementsInLane; ++i)
2332 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
2334 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2335 CI->getArgOperand(1), ShuffleMask);
2336 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2337 CI->getArgOperand(3));
2338 }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") ||
2339 Name.startswith("avx512.mask.broadcasti"))) {
2340 unsigned NumSrcElts =
2341 CI->getArgOperand(0)->getType()->getVectorNumElements();
2342 unsigned NumDstElts = CI->getType()->getVectorNumElements();
2344 SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
2345 for (unsigned i = 0; i != NumDstElts; ++i)
2346 ShuffleMask[i] = i % NumSrcElts;
2348 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2349 CI->getArgOperand(0),
2351 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2352 CI->getArgOperand(1));
2353 } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
2354 Name.startswith("avx2.vbroadcast") ||
2355 Name.startswith("avx512.pbroadcast") ||
2356 Name.startswith("avx512.mask.broadcast.s"))) {
2357 // Replace vp?broadcasts with a vector shuffle.
2358 Value *Op = CI->getArgOperand(0);
2359 unsigned NumElts = CI->getType()->getVectorNumElements();
2360 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
2361 Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
2362 Constant::getNullValue(MaskTy));
2364 if (CI->getNumArgOperands() == 3)
2365 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2366 CI->getArgOperand(1));
2367 } else if (IsX86 && (Name.startswith("sse2.padds.") ||
2368 Name.startswith("sse2.psubs.") ||
2369 Name.startswith("avx2.padds.") ||
2370 Name.startswith("avx2.psubs.") ||
2371 Name.startswith("avx512.padds.") ||
2372 Name.startswith("avx512.psubs.") ||
2373 Name.startswith("avx512.mask.padds.") ||
2374 Name.startswith("avx512.mask.psubs."))) {
2375 bool IsAdd = Name.contains(".padds");
2376 Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, true, IsAdd);
2377 } else if (IsX86 && (Name.startswith("sse2.paddus.") ||
2378 Name.startswith("sse2.psubus.") ||
2379 Name.startswith("avx2.paddus.") ||
2380 Name.startswith("avx2.psubus.") ||
2381 Name.startswith("avx512.mask.paddus.") ||
2382 Name.startswith("avx512.mask.psubus."))) {
2383 bool IsAdd = Name.contains(".paddus");
2384 Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, false, IsAdd);
2385 } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
2386 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2387 CI->getArgOperand(1),
2388 CI->getArgOperand(2),
2389 CI->getArgOperand(3),
2390 CI->getArgOperand(4),
2392 } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
2393 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2394 CI->getArgOperand(1),
2395 CI->getArgOperand(2),
2396 CI->getArgOperand(3),
2397 CI->getArgOperand(4),
2399 } else if (IsX86 && (Name == "sse2.psll.dq" ||
2400 Name == "avx2.psll.dq")) {
2401 // 128/256-bit shift left specified in bits.
2402 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2403 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
2404 Shift / 8); // Shift is in bits.
2405 } else if (IsX86 && (Name == "sse2.psrl.dq" ||
2406 Name == "avx2.psrl.dq")) {
2407 // 128/256-bit shift right specified in bits.
2408 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2409 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
2410 Shift / 8); // Shift is in bits.
2411 } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
2412 Name == "avx2.psll.dq.bs" ||
2413 Name == "avx512.psll.dq.512")) {
2414 // 128/256/512-bit shift left specified in bytes.
2415 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2416 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2417 } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
2418 Name == "avx2.psrl.dq.bs" ||
2419 Name == "avx512.psrl.dq.512")) {
2420 // 128/256/512-bit shift right specified in bytes.
2421 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2422 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2423 } else if (IsX86 && (Name == "sse41.pblendw" ||
2424 Name.startswith("sse41.blendp") ||
2425 Name.startswith("avx.blend.p") ||
2426 Name == "avx2.pblendw" ||
2427 Name.startswith("avx2.pblendd."))) {
2428 Value *Op0 = CI->getArgOperand(0);
2429 Value *Op1 = CI->getArgOperand(1);
2430 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2431 VectorType *VecTy = cast<VectorType>(CI->getType());
2432 unsigned NumElts = VecTy->getNumElements();
2434 SmallVector<uint32_t, 16> Idxs(NumElts);
2435 for (unsigned i = 0; i != NumElts; ++i)
2436 Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
2438 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2439 } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
2440 Name == "avx2.vinserti128" ||
2441 Name.startswith("avx512.mask.insert"))) {
2442 Value *Op0 = CI->getArgOperand(0);
2443 Value *Op1 = CI->getArgOperand(1);
2444 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2445 unsigned DstNumElts = CI->getType()->getVectorNumElements();
2446 unsigned SrcNumElts = Op1->getType()->getVectorNumElements();
2447 unsigned Scale = DstNumElts / SrcNumElts;
2449 // Mask off the high bits of the immediate value; hardware ignores those.
2452 // Extend the second operand into a vector the size of the destination.
2453 Value *UndefV = UndefValue::get(Op1->getType());
2454 SmallVector<uint32_t, 8> Idxs(DstNumElts);
2455 for (unsigned i = 0; i != SrcNumElts; ++i)
2457 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
2458 Idxs[i] = SrcNumElts;
2459 Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);
2461 // Insert the second operand into the first operand.
2463 // Note that there is no guarantee that instruction lowering will actually
2464 // produce a vinsertf128 instruction for the created shuffles. In
2465 // particular, the 0 immediate case involves no lane changes, so it can
2466 // be handled as a blend.
2468 // Example of shuffle mask for 32-bit elements:
2469 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
2470 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
2472 // First fill with identify mask.
2473 for (unsigned i = 0; i != DstNumElts; ++i)
2475 // Then replace the elements where we need to insert.
2476 for (unsigned i = 0; i != SrcNumElts; ++i)
2477 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
2478 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
2480 // If the intrinsic has a mask operand, handle that.
2481 if (CI->getNumArgOperands() == 5)
2482 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2483 CI->getArgOperand(3));
2484 } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
2485 Name == "avx2.vextracti128" ||
2486 Name.startswith("avx512.mask.vextract"))) {
2487 Value *Op0 = CI->getArgOperand(0);
2488 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2489 unsigned DstNumElts = CI->getType()->getVectorNumElements();
2490 unsigned SrcNumElts = Op0->getType()->getVectorNumElements();
2491 unsigned Scale = SrcNumElts / DstNumElts;
2493 // Mask off the high bits of the immediate value; hardware ignores those.
2496 // Get indexes for the subvector of the input vector.
2497 SmallVector<uint32_t, 8> Idxs(DstNumElts);
2498 for (unsigned i = 0; i != DstNumElts; ++i) {
2499 Idxs[i] = i + (Imm * DstNumElts);
2501 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2503 // If the intrinsic has a mask operand, handle that.
2504 if (CI->getNumArgOperands() == 4)
2505 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2506 CI->getArgOperand(2));
2507 } else if (!IsX86 && Name == "stackprotectorcheck") {
2509 } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
2510 Name.startswith("avx512.mask.perm.di."))) {
2511 Value *Op0 = CI->getArgOperand(0);
2512 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2513 VectorType *VecTy = cast<VectorType>(CI->getType());
2514 unsigned NumElts = VecTy->getNumElements();
2516 SmallVector<uint32_t, 8> Idxs(NumElts);
2517 for (unsigned i = 0; i != NumElts; ++i)
2518 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
2520 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2522 if (CI->getNumArgOperands() == 4)
2523 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2524 CI->getArgOperand(2));
2525 } else if (IsX86 && (Name.startswith("avx.vperm2f128.") ||
2526 Name == "avx2.vperm2i128")) {
2527 // The immediate permute control byte looks like this:
2528 // [1:0] - select 128 bits from sources for low half of destination
2530 // [3] - zero low half of destination
2531 // [5:4] - select 128 bits from sources for high half of destination
2533 // [7] - zero high half of destination
2535 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2537 unsigned NumElts = CI->getType()->getVectorNumElements();
2538 unsigned HalfSize = NumElts / 2;
2539 SmallVector<uint32_t, 8> ShuffleMask(NumElts);
2541 // Determine which operand(s) are actually in use for this instruction.
2542 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2543 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2545 // If needed, replace operands based on zero mask.
2546 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
2547 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
2549 // Permute low half of result.
2550 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
2551 for (unsigned i = 0; i < HalfSize; ++i)
2552 ShuffleMask[i] = StartIndex + i;
2554 // Permute high half of result.
2555 StartIndex = (Imm & 0x10) ? HalfSize : 0;
2556 for (unsigned i = 0; i < HalfSize; ++i)
2557 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
2559 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2561 } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
2562 Name == "sse2.pshuf.d" ||
2563 Name.startswith("avx512.mask.vpermil.p") ||
2564 Name.startswith("avx512.mask.pshuf.d."))) {
2565 Value *Op0 = CI->getArgOperand(0);
2566 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2567 VectorType *VecTy = cast<VectorType>(CI->getType());
2568 unsigned NumElts = VecTy->getNumElements();
2569 // Calculate the size of each index in the immediate.
2570 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
2571 unsigned IdxMask = ((1 << IdxSize) - 1);
2573 SmallVector<uint32_t, 8> Idxs(NumElts);
2574 // Lookup the bits for this element, wrapping around the immediate every
2575 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
2576 // to offset by the first index of each group.
2577 for (unsigned i = 0; i != NumElts; ++i)
2578 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
2580 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2582 if (CI->getNumArgOperands() == 4)
2583 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2584 CI->getArgOperand(2));
2585 } else if (IsX86 && (Name == "sse2.pshufl.w" ||
2586 Name.startswith("avx512.mask.pshufl.w."))) {
2587 Value *Op0 = CI->getArgOperand(0);
2588 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2589 unsigned NumElts = CI->getType()->getVectorNumElements();
2591 SmallVector<uint32_t, 16> Idxs(NumElts);
2592 for (unsigned l = 0; l != NumElts; l += 8) {
2593 for (unsigned i = 0; i != 4; ++i)
2594 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
2595 for (unsigned i = 4; i != 8; ++i)
2596 Idxs[i + l] = i + l;
2599 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2601 if (CI->getNumArgOperands() == 4)
2602 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2603 CI->getArgOperand(2));
2604 } else if (IsX86 && (Name == "sse2.pshufh.w" ||
2605 Name.startswith("avx512.mask.pshufh.w."))) {
2606 Value *Op0 = CI->getArgOperand(0);
2607 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2608 unsigned NumElts = CI->getType()->getVectorNumElements();
2610 SmallVector<uint32_t, 16> Idxs(NumElts);
2611 for (unsigned l = 0; l != NumElts; l += 8) {
2612 for (unsigned i = 0; i != 4; ++i)
2613 Idxs[i + l] = i + l;
2614 for (unsigned i = 0; i != 4; ++i)
2615 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
2618 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2620 if (CI->getNumArgOperands() == 4)
2621 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2622 CI->getArgOperand(2));
2623 } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
2624 Value *Op0 = CI->getArgOperand(0);
2625 Value *Op1 = CI->getArgOperand(1);
2626 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2627 unsigned NumElts = CI->getType()->getVectorNumElements();
2629 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2630 unsigned HalfLaneElts = NumLaneElts / 2;
2632 SmallVector<uint32_t, 16> Idxs(NumElts);
2633 for (unsigned i = 0; i != NumElts; ++i) {
2634 // Base index is the starting element of the lane.
2635 Idxs[i] = i - (i % NumLaneElts);
2636 // If we are half way through the lane switch to the other source.
2637 if ((i % NumLaneElts) >= HalfLaneElts)
2639 // Now select the specific element. By adding HalfLaneElts bits from
2640 // the immediate. Wrapping around the immediate every 8-bits.
2641 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
2644 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2646 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2647 CI->getArgOperand(3));
2648 } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
2649 Name.startswith("avx512.mask.movshdup") ||
2650 Name.startswith("avx512.mask.movsldup"))) {
2651 Value *Op0 = CI->getArgOperand(0);
2652 unsigned NumElts = CI->getType()->getVectorNumElements();
2653 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2655 unsigned Offset = 0;
2656 if (Name.startswith("avx512.mask.movshdup."))
2659 SmallVector<uint32_t, 16> Idxs(NumElts);
2660 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
2661 for (unsigned i = 0; i != NumLaneElts; i += 2) {
2662 Idxs[i + l + 0] = i + l + Offset;
2663 Idxs[i + l + 1] = i + l + Offset;
2666 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2668 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2669 CI->getArgOperand(1));
2670 } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
2671 Name.startswith("avx512.mask.unpckl."))) {
2672 Value *Op0 = CI->getArgOperand(0);
2673 Value *Op1 = CI->getArgOperand(1);
2674 int NumElts = CI->getType()->getVectorNumElements();
2675 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2677 SmallVector<uint32_t, 64> Idxs(NumElts);
2678 for (int l = 0; l != NumElts; l += NumLaneElts)
2679 for (int i = 0; i != NumLaneElts; ++i)
2680 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
2682 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2684 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2685 CI->getArgOperand(2));
2686 } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
2687 Name.startswith("avx512.mask.unpckh."))) {
2688 Value *Op0 = CI->getArgOperand(0);
2689 Value *Op1 = CI->getArgOperand(1);
2690 int NumElts = CI->getType()->getVectorNumElements();
2691 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2693 SmallVector<uint32_t, 64> Idxs(NumElts);
2694 for (int l = 0; l != NumElts; l += NumLaneElts)
2695 for (int i = 0; i != NumLaneElts; ++i)
2696 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
2698 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2700 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2701 CI->getArgOperand(2));
2702 } else if (IsX86 && (Name.startswith("avx512.mask.and.") ||
2703 Name.startswith("avx512.mask.pand."))) {
2704 VectorType *FTy = cast<VectorType>(CI->getType());
2705 VectorType *ITy = VectorType::getInteger(FTy);
2706 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2707 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2708 Rep = Builder.CreateBitCast(Rep, FTy);
2709 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2710 CI->getArgOperand(2));
2711 } else if (IsX86 && (Name.startswith("avx512.mask.andn.") ||
2712 Name.startswith("avx512.mask.pandn."))) {
2713 VectorType *FTy = cast<VectorType>(CI->getType());
2714 VectorType *ITy = VectorType::getInteger(FTy);
2715 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
2716 Rep = Builder.CreateAnd(Rep,
2717 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2718 Rep = Builder.CreateBitCast(Rep, FTy);
2719 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2720 CI->getArgOperand(2));
2721 } else if (IsX86 && (Name.startswith("avx512.mask.or.") ||
2722 Name.startswith("avx512.mask.por."))) {
2723 VectorType *FTy = cast<VectorType>(CI->getType());
2724 VectorType *ITy = VectorType::getInteger(FTy);
2725 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2726 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2727 Rep = Builder.CreateBitCast(Rep, FTy);
2728 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2729 CI->getArgOperand(2));
2730 } else if (IsX86 && (Name.startswith("avx512.mask.xor.") ||
2731 Name.startswith("avx512.mask.pxor."))) {
2732 VectorType *FTy = cast<VectorType>(CI->getType());
2733 VectorType *ITy = VectorType::getInteger(FTy);
2734 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2735 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2736 Rep = Builder.CreateBitCast(Rep, FTy);
2737 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2738 CI->getArgOperand(2));
2739 } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
2740 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
2741 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2742 CI->getArgOperand(2));
2743 } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
2744 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
2745 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2746 CI->getArgOperand(2));
2747 } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
2748 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
2749 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2750 CI->getArgOperand(2));
2751 } else if (IsX86 && Name.startswith("avx512.mask.add.p")) {
2752 if (Name.endswith(".512")) {
2754 if (Name[17] == 's')
2755 IID = Intrinsic::x86_avx512_add_ps_512;
2757 IID = Intrinsic::x86_avx512_add_pd_512;
2759 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2760 { CI->getArgOperand(0), CI->getArgOperand(1),
2761 CI->getArgOperand(4) });
2763 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
2765 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2766 CI->getArgOperand(2));
2767 } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
2768 if (Name.endswith(".512")) {
2770 if (Name[17] == 's')
2771 IID = Intrinsic::x86_avx512_div_ps_512;
2773 IID = Intrinsic::x86_avx512_div_pd_512;
2775 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2776 { CI->getArgOperand(0), CI->getArgOperand(1),
2777 CI->getArgOperand(4) });
2779 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
2781 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2782 CI->getArgOperand(2));
2783 } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
2784 if (Name.endswith(".512")) {
2786 if (Name[17] == 's')
2787 IID = Intrinsic::x86_avx512_mul_ps_512;
2789 IID = Intrinsic::x86_avx512_mul_pd_512;
2791 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2792 { CI->getArgOperand(0), CI->getArgOperand(1),
2793 CI->getArgOperand(4) });
2795 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
2797 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2798 CI->getArgOperand(2));
2799 } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
2800 if (Name.endswith(".512")) {
2802 if (Name[17] == 's')
2803 IID = Intrinsic::x86_avx512_sub_ps_512;
2805 IID = Intrinsic::x86_avx512_sub_pd_512;
2807 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2808 { CI->getArgOperand(0), CI->getArgOperand(1),
2809 CI->getArgOperand(4) });
2811 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
2813 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2814 CI->getArgOperand(2));
2815 } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
2816 Name.startswith("avx512.mask.min.p")) &&
2817 Name.drop_front(18) == ".512") {
2818 bool IsDouble = Name[17] == 'd';
2819 bool IsMin = Name[13] == 'i';
2820 static const Intrinsic::ID MinMaxTbl[2][2] = {
2821 { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 },
2822 { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 }
2824 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
2826 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2827 { CI->getArgOperand(0), CI->getArgOperand(1),
2828 CI->getArgOperand(4) });
2829 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2830 CI->getArgOperand(2));
2831 } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
2832 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2835 { CI->getArgOperand(0), Builder.getInt1(false) });
2836 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2837 CI->getArgOperand(1));
2838 } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
2839 bool IsImmediate = Name[16] == 'i' ||
2840 (Name.size() > 18 && Name[18] == 'i');
2841 bool IsVariable = Name[16] == 'v';
2842 char Size = Name[16] == '.' ? Name[17] :
2843 Name[17] == '.' ? Name[18] :
2844 Name[18] == '.' ? Name[19] :
2848 if (IsVariable && Name[17] != '.') {
2849 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
2850 IID = Intrinsic::x86_avx2_psllv_q;
2851 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
2852 IID = Intrinsic::x86_avx2_psllv_q_256;
2853 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
2854 IID = Intrinsic::x86_avx2_psllv_d;
2855 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
2856 IID = Intrinsic::x86_avx2_psllv_d_256;
2857 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
2858 IID = Intrinsic::x86_avx512_psllv_w_128;
2859 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
2860 IID = Intrinsic::x86_avx512_psllv_w_256;
2861 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
2862 IID = Intrinsic::x86_avx512_psllv_w_512;
2864 llvm_unreachable("Unexpected size");
2865 } else if (Name.endswith(".128")) {
2866 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
2867 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
2868 : Intrinsic::x86_sse2_psll_d;
2869 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
2870 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
2871 : Intrinsic::x86_sse2_psll_q;
2872 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
2873 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
2874 : Intrinsic::x86_sse2_psll_w;
2876 llvm_unreachable("Unexpected size");
2877 } else if (Name.endswith(".256")) {
2878 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
2879 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
2880 : Intrinsic::x86_avx2_psll_d;
2881 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
2882 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
2883 : Intrinsic::x86_avx2_psll_q;
2884 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
2885 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
2886 : Intrinsic::x86_avx2_psll_w;
2888 llvm_unreachable("Unexpected size");
2890 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
2891 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
2892 IsVariable ? Intrinsic::x86_avx512_psllv_d_512 :
2893 Intrinsic::x86_avx512_psll_d_512;
2894 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
2895 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
2896 IsVariable ? Intrinsic::x86_avx512_psllv_q_512 :
2897 Intrinsic::x86_avx512_psll_q_512;
2898 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
2899 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
2900 : Intrinsic::x86_avx512_psll_w_512;
2902 llvm_unreachable("Unexpected size");
2905 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2906 } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
2907 bool IsImmediate = Name[16] == 'i' ||
2908 (Name.size() > 18 && Name[18] == 'i');
2909 bool IsVariable = Name[16] == 'v';
2910 char Size = Name[16] == '.' ? Name[17] :
2911 Name[17] == '.' ? Name[18] :
2912 Name[18] == '.' ? Name[19] :
2916 if (IsVariable && Name[17] != '.') {
2917 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
2918 IID = Intrinsic::x86_avx2_psrlv_q;
2919 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
2920 IID = Intrinsic::x86_avx2_psrlv_q_256;
2921 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
2922 IID = Intrinsic::x86_avx2_psrlv_d;
2923 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
2924 IID = Intrinsic::x86_avx2_psrlv_d_256;
2925 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
2926 IID = Intrinsic::x86_avx512_psrlv_w_128;
2927 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
2928 IID = Intrinsic::x86_avx512_psrlv_w_256;
2929 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
2930 IID = Intrinsic::x86_avx512_psrlv_w_512;
2932 llvm_unreachable("Unexpected size");
2933 } else if (Name.endswith(".128")) {
2934 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
2935 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
2936 : Intrinsic::x86_sse2_psrl_d;
2937 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
2938 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
2939 : Intrinsic::x86_sse2_psrl_q;
2940 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
2941 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
2942 : Intrinsic::x86_sse2_psrl_w;
2944 llvm_unreachable("Unexpected size");
2945 } else if (Name.endswith(".256")) {
2946 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
2947 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
2948 : Intrinsic::x86_avx2_psrl_d;
2949 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
2950 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
2951 : Intrinsic::x86_avx2_psrl_q;
2952 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
2953 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
2954 : Intrinsic::x86_avx2_psrl_w;
2956 llvm_unreachable("Unexpected size");
2958 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
2959 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
2960 IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 :
2961 Intrinsic::x86_avx512_psrl_d_512;
2962 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
2963 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
2964 IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 :
2965 Intrinsic::x86_avx512_psrl_q_512;
2966 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
2967 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
2968 : Intrinsic::x86_avx512_psrl_w_512;
2970 llvm_unreachable("Unexpected size");
2973 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2974 } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
2975 bool IsImmediate = Name[16] == 'i' ||
2976 (Name.size() > 18 && Name[18] == 'i');
2977 bool IsVariable = Name[16] == 'v';
2978 char Size = Name[16] == '.' ? Name[17] :
2979 Name[17] == '.' ? Name[18] :
2980 Name[18] == '.' ? Name[19] :
2984 if (IsVariable && Name[17] != '.') {
2985 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
2986 IID = Intrinsic::x86_avx2_psrav_d;
2987 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
2988 IID = Intrinsic::x86_avx2_psrav_d_256;
2989 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
2990 IID = Intrinsic::x86_avx512_psrav_w_128;
2991 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
2992 IID = Intrinsic::x86_avx512_psrav_w_256;
2993 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
2994 IID = Intrinsic::x86_avx512_psrav_w_512;
2996 llvm_unreachable("Unexpected size");
2997 } else if (Name.endswith(".128")) {
2998 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
2999 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3000 : Intrinsic::x86_sse2_psra_d;
3001 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3002 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
3003 IsVariable ? Intrinsic::x86_avx512_psrav_q_128 :
3004 Intrinsic::x86_avx512_psra_q_128;
3005 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3006 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3007 : Intrinsic::x86_sse2_psra_w;
3009 llvm_unreachable("Unexpected size");
3010 } else if (Name.endswith(".256")) {
3011 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3012 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3013 : Intrinsic::x86_avx2_psra_d;
3014 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3015 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
3016 IsVariable ? Intrinsic::x86_avx512_psrav_q_256 :
3017 Intrinsic::x86_avx512_psra_q_256;
3018 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3019 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3020 : Intrinsic::x86_avx2_psra_w;
3022 llvm_unreachable("Unexpected size");
3024 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3025 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
3026 IsVariable ? Intrinsic::x86_avx512_psrav_d_512 :
3027 Intrinsic::x86_avx512_psra_d_512;
3028 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3029 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
3030 IsVariable ? Intrinsic::x86_avx512_psrav_q_512 :
3031 Intrinsic::x86_avx512_psra_q_512;
3032 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3033 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3034 : Intrinsic::x86_avx512_psra_w_512;
3036 llvm_unreachable("Unexpected size");
3039 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3040 } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
3041 Rep = upgradeMaskedMove(Builder, *CI);
3042 } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
3043 Rep = UpgradeMaskToInt(Builder, *CI);
3044 } else if (IsX86 && Name.endswith(".movntdqa")) {
3045 Module *M = F->getParent();
3046 MDNode *Node = MDNode::get(
3047 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
3049 Value *Ptr = CI->getArgOperand(0);
3050 VectorType *VTy = cast<VectorType>(CI->getType());
3052 // Convert the type of the pointer to a pointer to the stored type.
3054 Builder.CreateBitCast(Ptr, PointerType::getUnqual(VTy), "cast");
3055 LoadInst *LI = Builder.CreateAlignedLoad(VTy, BC, VTy->getBitWidth() / 8);
3056 LI->setMetadata(M->getMDKindID("nontemporal"), Node);
3058 } else if (IsX86 && (Name.startswith("fma.vfmadd.") ||
3059 Name.startswith("fma.vfmsub.") ||
3060 Name.startswith("fma.vfnmadd.") ||
3061 Name.startswith("fma.vfnmsub."))) {
3062 bool NegMul = Name[6] == 'n';
3063 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3064 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3066 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3067 CI->getArgOperand(2) };
3070 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3071 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3072 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3075 if (NegMul && !IsScalar)
3076 Ops[0] = Builder.CreateFNeg(Ops[0]);
3077 if (NegMul && IsScalar)
3078 Ops[1] = Builder.CreateFNeg(Ops[1]);
3080 Ops[2] = Builder.CreateFNeg(Ops[2]);
3082 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3088 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
3090 } else if (IsX86 && Name.startswith("fma4.vfmadd.s")) {
3091 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3092 CI->getArgOperand(2) };
3094 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3095 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3096 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3098 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3103 Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
3105 } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.s") ||
3106 Name.startswith("avx512.maskz.vfmadd.s") ||
3107 Name.startswith("avx512.mask3.vfmadd.s") ||
3108 Name.startswith("avx512.mask3.vfmsub.s") ||
3109 Name.startswith("avx512.mask3.vfnmsub.s"))) {
3110 bool IsMask3 = Name[11] == '3';
3111 bool IsMaskZ = Name[11] == 'z';
3112 // Drop the "avx512.mask." to make it easier.
3113 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3114 bool NegMul = Name[2] == 'n';
3115 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3117 Value *A = CI->getArgOperand(0);
3118 Value *B = CI->getArgOperand(1);
3119 Value *C = CI->getArgOperand(2);
3121 if (NegMul && (IsMask3 || IsMaskZ))
3122 A = Builder.CreateFNeg(A);
3123 if (NegMul && !(IsMask3 || IsMaskZ))
3124 B = Builder.CreateFNeg(B);
3126 C = Builder.CreateFNeg(C);
3128 A = Builder.CreateExtractElement(A, (uint64_t)0);
3129 B = Builder.CreateExtractElement(B, (uint64_t)0);
3130 C = Builder.CreateExtractElement(C, (uint64_t)0);
3132 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3133 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3134 Value *Ops[] = { A, B, C, CI->getArgOperand(4) };
3137 if (Name.back() == 'd')
3138 IID = Intrinsic::x86_avx512_vfmadd_f64;
3140 IID = Intrinsic::x86_avx512_vfmadd_f32;
3141 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID);
3142 Rep = Builder.CreateCall(FMA, Ops);
3144 Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3147 Rep = Builder.CreateCall(FMA, { A, B, C });
3150 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) :
3153 // For Mask3 with NegAcc, we need to create a new extractelement that
3154 // avoids the negation above.
3155 if (NegAcc && IsMask3)
3156 PassThru = Builder.CreateExtractElement(CI->getArgOperand(2),
3159 Rep = EmitX86ScalarSelect(Builder, CI->getArgOperand(3),
3161 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
3163 } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") ||
3164 Name.startswith("avx512.mask.vfnmadd.p") ||
3165 Name.startswith("avx512.mask.vfnmsub.p") ||
3166 Name.startswith("avx512.mask3.vfmadd.p") ||
3167 Name.startswith("avx512.mask3.vfmsub.p") ||
3168 Name.startswith("avx512.mask3.vfnmsub.p") ||
3169 Name.startswith("avx512.maskz.vfmadd.p"))) {
3170 bool IsMask3 = Name[11] == '3';
3171 bool IsMaskZ = Name[11] == 'z';
3172 // Drop the "avx512.mask." to make it easier.
3173 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3174 bool NegMul = Name[2] == 'n';
3175 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3177 Value *A = CI->getArgOperand(0);
3178 Value *B = CI->getArgOperand(1);
3179 Value *C = CI->getArgOperand(2);
3181 if (NegMul && (IsMask3 || IsMaskZ))
3182 A = Builder.CreateFNeg(A);
3183 if (NegMul && !(IsMask3 || IsMaskZ))
3184 B = Builder.CreateFNeg(B);
3186 C = Builder.CreateFNeg(C);
3188 if (CI->getNumArgOperands() == 5 &&
3189 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3190 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3192 // Check the character before ".512" in string.
3193 if (Name[Name.size()-5] == 's')
3194 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3196 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3198 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3199 { A, B, C, CI->getArgOperand(4) });
3201 Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3204 Rep = Builder.CreateCall(FMA, { A, B, C });
3207 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3208 IsMask3 ? CI->getArgOperand(2) :
3209 CI->getArgOperand(0);
3211 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3212 } else if (IsX86 && (Name.startswith("fma.vfmaddsub.p") ||
3213 Name.startswith("fma.vfmsubadd.p"))) {
3214 bool IsSubAdd = Name[7] == 's';
3215 int NumElts = CI->getType()->getVectorNumElements();
3217 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3218 CI->getArgOperand(2) };
3220 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3222 Value *Odd = Builder.CreateCall(FMA, Ops);
3223 Ops[2] = Builder.CreateFNeg(Ops[2]);
3224 Value *Even = Builder.CreateCall(FMA, Ops);
3227 std::swap(Even, Odd);
3229 SmallVector<uint32_t, 32> Idxs(NumElts);
3230 for (int i = 0; i != NumElts; ++i)
3231 Idxs[i] = i + (i % 2) * NumElts;
3233 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3234 } else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") ||
3235 Name.startswith("avx512.mask3.vfmaddsub.p") ||
3236 Name.startswith("avx512.maskz.vfmaddsub.p") ||
3237 Name.startswith("avx512.mask3.vfmsubadd.p"))) {
3238 bool IsMask3 = Name[11] == '3';
3239 bool IsMaskZ = Name[11] == 'z';
3240 // Drop the "avx512.mask." to make it easier.
3241 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3242 bool IsSubAdd = Name[3] == 's';
3243 if (CI->getNumArgOperands() == 5 &&
3244 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3245 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3247 // Check the character before ".512" in string.
3248 if (Name[Name.size()-5] == 's')
3249 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3251 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3253 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3254 CI->getArgOperand(2), CI->getArgOperand(4) };
3256 Ops[2] = Builder.CreateFNeg(Ops[2]);
3258 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3259 {CI->getArgOperand(0), CI->getArgOperand(1),
3260 CI->getArgOperand(2), CI->getArgOperand(4)});
3262 int NumElts = CI->getType()->getVectorNumElements();
3264 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3265 CI->getArgOperand(2) };
3267 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3269 Value *Odd = Builder.CreateCall(FMA, Ops);
3270 Ops[2] = Builder.CreateFNeg(Ops[2]);
3271 Value *Even = Builder.CreateCall(FMA, Ops);
3274 std::swap(Even, Odd);
3276 SmallVector<uint32_t, 32> Idxs(NumElts);
3277 for (int i = 0; i != NumElts; ++i)
3278 Idxs[i] = i + (i % 2) * NumElts;
3280 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3283 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3284 IsMask3 ? CI->getArgOperand(2) :
3285 CI->getArgOperand(0);
3287 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3288 } else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
3289 Name.startswith("avx512.maskz.pternlog."))) {
3290 bool ZeroMask = Name[11] == 'z';
3291 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3292 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3294 if (VecWidth == 128 && EltWidth == 32)
3295 IID = Intrinsic::x86_avx512_pternlog_d_128;
3296 else if (VecWidth == 256 && EltWidth == 32)
3297 IID = Intrinsic::x86_avx512_pternlog_d_256;
3298 else if (VecWidth == 512 && EltWidth == 32)
3299 IID = Intrinsic::x86_avx512_pternlog_d_512;
3300 else if (VecWidth == 128 && EltWidth == 64)
3301 IID = Intrinsic::x86_avx512_pternlog_q_128;
3302 else if (VecWidth == 256 && EltWidth == 64)
3303 IID = Intrinsic::x86_avx512_pternlog_q_256;
3304 else if (VecWidth == 512 && EltWidth == 64)
3305 IID = Intrinsic::x86_avx512_pternlog_q_512;
3307 llvm_unreachable("Unexpected intrinsic");
3309 Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3310 CI->getArgOperand(2), CI->getArgOperand(3) };
3311 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3313 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3314 : CI->getArgOperand(0);
3315 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3316 } else if (IsX86 && (Name.startswith("avx512.mask.vpmadd52") ||
3317 Name.startswith("avx512.maskz.vpmadd52"))) {
3318 bool ZeroMask = Name[11] == 'z';
3319 bool High = Name[20] == 'h' || Name[21] == 'h';
3320 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3322 if (VecWidth == 128 && !High)
3323 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3324 else if (VecWidth == 256 && !High)
3325 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3326 else if (VecWidth == 512 && !High)
3327 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3328 else if (VecWidth == 128 && High)
3329 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
3330 else if (VecWidth == 256 && High)
3331 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
3332 else if (VecWidth == 512 && High)
3333 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
3335 llvm_unreachable("Unexpected intrinsic");
3337 Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3338 CI->getArgOperand(2) };
3339 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3341 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3342 : CI->getArgOperand(0);
3343 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3344 } else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") ||
3345 Name.startswith("avx512.mask.vpermt2var.") ||
3346 Name.startswith("avx512.maskz.vpermt2var."))) {
3347 bool ZeroMask = Name[11] == 'z';
3348 bool IndexForm = Name[17] == 'i';
3349 Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
3350 } else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") ||
3351 Name.startswith("avx512.maskz.vpdpbusd.") ||
3352 Name.startswith("avx512.mask.vpdpbusds.") ||
3353 Name.startswith("avx512.maskz.vpdpbusds."))) {
3354 bool ZeroMask = Name[11] == 'z';
3355 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3356 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3358 if (VecWidth == 128 && !IsSaturating)
3359 IID = Intrinsic::x86_avx512_vpdpbusd_128;
3360 else if (VecWidth == 256 && !IsSaturating)
3361 IID = Intrinsic::x86_avx512_vpdpbusd_256;
3362 else if (VecWidth == 512 && !IsSaturating)
3363 IID = Intrinsic::x86_avx512_vpdpbusd_512;
3364 else if (VecWidth == 128 && IsSaturating)
3365 IID = Intrinsic::x86_avx512_vpdpbusds_128;
3366 else if (VecWidth == 256 && IsSaturating)
3367 IID = Intrinsic::x86_avx512_vpdpbusds_256;
3368 else if (VecWidth == 512 && IsSaturating)
3369 IID = Intrinsic::x86_avx512_vpdpbusds_512;
3371 llvm_unreachable("Unexpected intrinsic");
3373 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3374 CI->getArgOperand(2) };
3375 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3377 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3378 : CI->getArgOperand(0);
3379 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3380 } else if (IsX86 && (Name.startswith("avx512.mask.vpdpwssd.") ||
3381 Name.startswith("avx512.maskz.vpdpwssd.") ||
3382 Name.startswith("avx512.mask.vpdpwssds.") ||
3383 Name.startswith("avx512.maskz.vpdpwssds."))) {
3384 bool ZeroMask = Name[11] == 'z';
3385 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3386 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3388 if (VecWidth == 128 && !IsSaturating)
3389 IID = Intrinsic::x86_avx512_vpdpwssd_128;
3390 else if (VecWidth == 256 && !IsSaturating)
3391 IID = Intrinsic::x86_avx512_vpdpwssd_256;
3392 else if (VecWidth == 512 && !IsSaturating)
3393 IID = Intrinsic::x86_avx512_vpdpwssd_512;
3394 else if (VecWidth == 128 && IsSaturating)
3395 IID = Intrinsic::x86_avx512_vpdpwssds_128;
3396 else if (VecWidth == 256 && IsSaturating)
3397 IID = Intrinsic::x86_avx512_vpdpwssds_256;
3398 else if (VecWidth == 512 && IsSaturating)
3399 IID = Intrinsic::x86_avx512_vpdpwssds_512;
3401 llvm_unreachable("Unexpected intrinsic");
3403 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3404 CI->getArgOperand(2) };
3405 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3407 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3408 : CI->getArgOperand(0);
3409 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3410 } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
3411 Name == "addcarry.u32" || Name == "addcarry.u64" ||
3412 Name == "subborrow.u32" || Name == "subborrow.u64")) {
3414 if (Name[0] == 'a' && Name.back() == '2')
3415 IID = Intrinsic::x86_addcarry_32;
3416 else if (Name[0] == 'a' && Name.back() == '4')
3417 IID = Intrinsic::x86_addcarry_64;
3418 else if (Name[0] == 's' && Name.back() == '2')
3419 IID = Intrinsic::x86_subborrow_32;
3420 else if (Name[0] == 's' && Name.back() == '4')
3421 IID = Intrinsic::x86_subborrow_64;
3423 llvm_unreachable("Unexpected intrinsic");
3425 // Make a call with 3 operands.
3426 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3427 CI->getArgOperand(2)};
3428 Value *NewCall = Builder.CreateCall(
3429 Intrinsic::getDeclaration(CI->getModule(), IID),
3432 // Extract the second result and store it.
3433 Value *Data = Builder.CreateExtractValue(NewCall, 1);
3434 // Cast the pointer to the right type.
3435 Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3),
3436 llvm::PointerType::getUnqual(Data->getType()));
3437 Builder.CreateAlignedStore(Data, Ptr, 1);
3438 // Replace the original call result with the first result of the new call.
3439 Value *CF = Builder.CreateExtractValue(NewCall, 0);
3441 CI->replaceAllUsesWith(CF);
3443 } else if (IsX86 && Name.startswith("avx512.mask.") &&
3444 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
3445 // Rep will be updated by the call in the condition.
3446 } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
3447 Value *Arg = CI->getArgOperand(0);
3448 Value *Neg = Builder.CreateNeg(Arg, "neg");
3449 Value *Cmp = Builder.CreateICmpSGE(
3450 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
3451 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
3452 } else if (IsNVVM && (Name.startswith("atomic.load.add.f32.p") ||
3453 Name.startswith("atomic.load.add.f64.p"))) {
3454 Value *Ptr = CI->getArgOperand(0);
3455 Value *Val = CI->getArgOperand(1);
3456 Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val,
3457 AtomicOrdering::SequentiallyConsistent);
3458 } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
3459 Name == "max.ui" || Name == "max.ull")) {
3460 Value *Arg0 = CI->getArgOperand(0);
3461 Value *Arg1 = CI->getArgOperand(1);
3462 Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3463 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
3464 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
3465 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
3466 } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
3467 Name == "min.ui" || Name == "min.ull")) {
3468 Value *Arg0 = CI->getArgOperand(0);
3469 Value *Arg1 = CI->getArgOperand(1);
3470 Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3471 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
3472 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
3473 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
3474 } else if (IsNVVM && Name == "clz.ll") {
3475 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
3476 Value *Arg = CI->getArgOperand(0);
3477 Value *Ctlz = Builder.CreateCall(
3478 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
3480 {Arg, Builder.getFalse()}, "ctlz");
3481 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
3482 } else if (IsNVVM && Name == "popc.ll") {
3483 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
3485 Value *Arg = CI->getArgOperand(0);
3486 Value *Popc = Builder.CreateCall(
3487 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
3490 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
3491 } else if (IsNVVM && Name == "h2f") {
3492 Rep = Builder.CreateCall(Intrinsic::getDeclaration(
3493 F->getParent(), Intrinsic::convert_from_fp16,
3494 {Builder.getFloatTy()}),
3495 CI->getArgOperand(0), "h2f");
3497 llvm_unreachable("Unknown function for CallInst upgrade.");
3501 CI->replaceAllUsesWith(Rep);
3502 CI->eraseFromParent();
3506 const auto &DefaultCase = [&NewFn, &CI]() -> void {
3507 // Handle generic mangling change, but nothing else
3509 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
3510 "Unknown function for CallInst upgrade and isn't just a name change");
3511 CI->setCalledFunction(NewFn);
3513 CallInst *NewCall = nullptr;
3514 switch (NewFn->getIntrinsicID()) {
3519 case Intrinsic::experimental_vector_reduce_v2_fmul: {
3520 SmallVector<Value *, 2> Args;
3522 Args.push_back(ConstantFP::get(CI->getOperand(0)->getType(), 1.0));
3524 Args.push_back(CI->getOperand(0));
3525 Args.push_back(CI->getOperand(1));
3526 NewCall = Builder.CreateCall(NewFn, Args);
3527 cast<Instruction>(NewCall)->copyFastMathFlags(CI);
3530 case Intrinsic::experimental_vector_reduce_v2_fadd: {
3531 SmallVector<Value *, 2> Args;
3533 Args.push_back(Constant::getNullValue(CI->getOperand(0)->getType()));
3535 Args.push_back(CI->getOperand(0));
3536 Args.push_back(CI->getOperand(1));
3537 NewCall = Builder.CreateCall(NewFn, Args);
3538 cast<Instruction>(NewCall)->copyFastMathFlags(CI);
3541 case Intrinsic::arm_neon_vld1:
3542 case Intrinsic::arm_neon_vld2:
3543 case Intrinsic::arm_neon_vld3:
3544 case Intrinsic::arm_neon_vld4:
3545 case Intrinsic::arm_neon_vld2lane:
3546 case Intrinsic::arm_neon_vld3lane:
3547 case Intrinsic::arm_neon_vld4lane:
3548 case Intrinsic::arm_neon_vst1:
3549 case Intrinsic::arm_neon_vst2:
3550 case Intrinsic::arm_neon_vst3:
3551 case Intrinsic::arm_neon_vst4:
3552 case Intrinsic::arm_neon_vst2lane:
3553 case Intrinsic::arm_neon_vst3lane:
3554 case Intrinsic::arm_neon_vst4lane: {
3555 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3556 CI->arg_operands().end());
3557 NewCall = Builder.CreateCall(NewFn, Args);
3561 case Intrinsic::bitreverse:
3562 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3565 case Intrinsic::ctlz:
3566 case Intrinsic::cttz:
3567 assert(CI->getNumArgOperands() == 1 &&
3568 "Mismatch between function args and call args");
3570 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
3573 case Intrinsic::objectsize: {
3574 Value *NullIsUnknownSize = CI->getNumArgOperands() == 2
3575 ? Builder.getFalse()
3576 : CI->getArgOperand(2);
3578 CI->getNumArgOperands() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
3579 NewCall = Builder.CreateCall(
3580 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
3584 case Intrinsic::ctpop:
3585 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3588 case Intrinsic::convert_from_fp16:
3589 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3592 case Intrinsic::dbg_value:
3593 // Upgrade from the old version that had an extra offset argument.
3594 assert(CI->getNumArgOperands() == 4);
3595 // Drop nonzero offsets instead of attempting to upgrade them.
3596 if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
3597 if (Offset->isZeroValue()) {
3598 NewCall = Builder.CreateCall(
3600 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
3603 CI->eraseFromParent();
3606 case Intrinsic::x86_xop_vfrcz_ss:
3607 case Intrinsic::x86_xop_vfrcz_sd:
3608 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
3611 case Intrinsic::x86_xop_vpermil2pd:
3612 case Intrinsic::x86_xop_vpermil2ps:
3613 case Intrinsic::x86_xop_vpermil2pd_256:
3614 case Intrinsic::x86_xop_vpermil2ps_256: {
3615 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3616 CI->arg_operands().end());
3617 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
3618 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
3619 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
3620 NewCall = Builder.CreateCall(NewFn, Args);
3624 case Intrinsic::x86_sse41_ptestc:
3625 case Intrinsic::x86_sse41_ptestz:
3626 case Intrinsic::x86_sse41_ptestnzc: {
3627 // The arguments for these intrinsics used to be v4f32, and changed
3628 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
3629 // So, the only thing required is a bitcast for both arguments.
3630 // First, check the arguments have the old type.
3631 Value *Arg0 = CI->getArgOperand(0);
3632 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
3635 // Old intrinsic, add bitcasts
3636 Value *Arg1 = CI->getArgOperand(1);
3638 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
3640 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
3641 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
3643 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
3647 case Intrinsic::x86_rdtscp: {
3648 // This used to take 1 arguments. If we have no arguments, it is already
3650 if (CI->getNumOperands() == 0)
3653 NewCall = Builder.CreateCall(NewFn);
3654 // Extract the second result and store it.
3655 Value *Data = Builder.CreateExtractValue(NewCall, 1);
3656 // Cast the pointer to the right type.
3657 Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
3658 llvm::PointerType::getUnqual(Data->getType()));
3659 Builder.CreateAlignedStore(Data, Ptr, 1);
3660 // Replace the original call result with the first result of the new call.
3661 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
3663 std::string Name = CI->getName();
3664 if (!Name.empty()) {
3665 CI->setName(Name + ".old");
3666 NewCall->setName(Name);
3668 CI->replaceAllUsesWith(TSC);
3669 CI->eraseFromParent();
3673 case Intrinsic::x86_sse41_insertps:
3674 case Intrinsic::x86_sse41_dppd:
3675 case Intrinsic::x86_sse41_dpps:
3676 case Intrinsic::x86_sse41_mpsadbw:
3677 case Intrinsic::x86_avx_dp_ps_256:
3678 case Intrinsic::x86_avx2_mpsadbw: {
3679 // Need to truncate the last argument from i32 to i8 -- this argument models
3680 // an inherently 8-bit immediate operand to these x86 instructions.
3681 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3682 CI->arg_operands().end());
3684 // Replace the last argument with a trunc.
3685 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
3686 NewCall = Builder.CreateCall(NewFn, Args);
3690 case Intrinsic::thread_pointer: {
3691 NewCall = Builder.CreateCall(NewFn, {});
3695 case Intrinsic::invariant_start:
3696 case Intrinsic::invariant_end:
3697 case Intrinsic::masked_load:
3698 case Intrinsic::masked_store:
3699 case Intrinsic::masked_gather:
3700 case Intrinsic::masked_scatter: {
3701 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3702 CI->arg_operands().end());
3703 NewCall = Builder.CreateCall(NewFn, Args);
3707 case Intrinsic::memcpy:
3708 case Intrinsic::memmove:
3709 case Intrinsic::memset: {
3710 // We have to make sure that the call signature is what we're expecting.
3711 // We only want to change the old signatures by removing the alignment arg:
3712 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
3713 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
3714 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
3715 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
3716 // Note: i8*'s in the above can be any pointer type
3717 if (CI->getNumArgOperands() != 5) {
3721 // Remove alignment argument (3), and add alignment attributes to the
3722 // dest/src pointers.
3723 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
3724 CI->getArgOperand(2), CI->getArgOperand(4)};
3725 NewCall = Builder.CreateCall(NewFn, Args);
3726 auto *MemCI = cast<MemIntrinsic>(NewCall);
3727 // All mem intrinsics support dest alignment.
3728 const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
3729 MemCI->setDestAlignment(Align->getZExtValue());
3730 // Memcpy/Memmove also support source alignment.
3731 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
3732 MTI->setSourceAlignment(Align->getZExtValue());
3736 assert(NewCall && "Should have either set this variable or returned through "
3737 "the default case");
3738 std::string Name = CI->getName();
3739 if (!Name.empty()) {
3740 CI->setName(Name + ".old");
3741 NewCall->setName(Name);
3743 CI->replaceAllUsesWith(NewCall);
3744 CI->eraseFromParent();
3747 void llvm::UpgradeCallsToIntrinsic(Function *F) {
3748 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
3750 // Check if this function should be upgraded and get the replacement function
3753 if (UpgradeIntrinsicFunction(F, NewFn)) {
3754 // Replace all users of the old function with the new function or new
3755 // instructions. This is not a range loop because the call is deleted.
3756 for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; )
3757 if (CallInst *CI = dyn_cast<CallInst>(*UI++))
3758 UpgradeIntrinsicCall(CI, NewFn);
3760 // Remove old function, no longer used, from the module.
3761 F->eraseFromParent();
3765 MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
3766 // Check if the tag uses struct-path aware TBAA format.
3767 if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
3770 auto &Context = MD.getContext();
3771 if (MD.getNumOperands() == 3) {
3772 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
3773 MDNode *ScalarType = MDNode::get(Context, Elts);
3774 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
3775 Metadata *Elts2[] = {ScalarType, ScalarType,
3776 ConstantAsMetadata::get(
3777 Constant::getNullValue(Type::getInt64Ty(Context))),
3779 return MDNode::get(Context, Elts2);
3781 // Create a MDNode <MD, MD, offset 0>
3782 Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
3783 Type::getInt64Ty(Context)))};
3784 return MDNode::get(Context, Elts);
3787 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
3788 Instruction *&Temp) {
3789 if (Opc != Instruction::BitCast)
3793 Type *SrcTy = V->getType();
3794 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
3795 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
3796 LLVMContext &Context = V->getContext();
3798 // We have no information about target data layout, so we assume that
3799 // the maximum pointer size is 64bit.
3800 Type *MidTy = Type::getInt64Ty(Context);
3801 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
3803 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
3809 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
3810 if (Opc != Instruction::BitCast)
3813 Type *SrcTy = C->getType();
3814 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
3815 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
3816 LLVMContext &Context = C->getContext();
3818 // We have no information about target data layout, so we assume that
3819 // the maximum pointer size is 64bit.
3820 Type *MidTy = Type::getInt64Ty(Context);
3822 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
3829 /// Check the debug info version number, if it is out-dated, drop the debug
3830 /// info. Return true if module is modified.
3831 bool llvm::UpgradeDebugInfo(Module &M) {
3832 unsigned Version = getDebugMetadataVersionFromModule(M);
3833 if (Version == DEBUG_METADATA_VERSION) {
3834 bool BrokenDebugInfo = false;
3835 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
3836 report_fatal_error("Broken module found, compilation aborted!");
3837 if (!BrokenDebugInfo)
3838 // Everything is ok.
3841 // Diagnose malformed debug info.
3842 DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
3843 M.getContext().diagnose(Diag);
3846 bool Modified = StripDebugInfo(M);
3847 if (Modified && Version != DEBUG_METADATA_VERSION) {
3848 // Diagnose a version mismatch.
3849 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
3850 M.getContext().diagnose(DiagVersion);
3855 /// This checks for objc retain release marker which should be upgraded. It
3856 /// returns true if module is modified.
3857 static bool UpgradeRetainReleaseMarker(Module &M) {
3858 bool Changed = false;
3859 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
3860 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
3861 if (ModRetainReleaseMarker) {
3862 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
3864 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
3866 SmallVector<StringRef, 4> ValueComp;
3867 ID->getString().split(ValueComp, "#");
3868 if (ValueComp.size() == 2) {
3869 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
3870 ID = MDString::get(M.getContext(), NewValue);
3872 M.addModuleFlag(Module::Error, MarkerKey, ID);
3873 M.eraseNamedMetadata(ModRetainReleaseMarker);
3881 void llvm::UpgradeARCRuntime(Module &M) {
3882 // This lambda converts normal function calls to ARC runtime functions to
3884 auto UpgradeToIntrinsic = [&](const char *OldFunc,
3885 llvm::Intrinsic::ID IntrinsicFunc) {
3886 Function *Fn = M.getFunction(OldFunc);
3891 Function *NewFn = llvm::Intrinsic::getDeclaration(&M, IntrinsicFunc);
3893 for (auto I = Fn->user_begin(), E = Fn->user_end(); I != E;) {
3894 CallInst *CI = dyn_cast<CallInst>(*I++);
3895 if (!CI || CI->getCalledFunction() != Fn)
3898 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
3899 FunctionType *NewFuncTy = NewFn->getFunctionType();
3900 SmallVector<Value *, 2> Args;
3902 // Don't upgrade the intrinsic if it's not valid to bitcast the return
3903 // value to the return type of the old function.
3904 if (NewFuncTy->getReturnType() != CI->getType() &&
3905 !CastInst::castIsValid(Instruction::BitCast, CI,
3906 NewFuncTy->getReturnType()))
3909 bool InvalidCast = false;
3911 for (unsigned I = 0, E = CI->getNumArgOperands(); I != E; ++I) {
3912 Value *Arg = CI->getArgOperand(I);
3914 // Bitcast argument to the parameter type of the new function if it's
3915 // not a variadic argument.
3916 if (I < NewFuncTy->getNumParams()) {
3917 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
3918 // to the parameter type of the new function.
3919 if (!CastInst::castIsValid(Instruction::BitCast, Arg,
3920 NewFuncTy->getParamType(I))) {
3924 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
3926 Args.push_back(Arg);
3932 // Create a call instruction that calls the new function.
3933 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
3934 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
3935 NewCall->setName(CI->getName());
3937 // Bitcast the return value back to the type of the old call.
3938 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
3940 if (!CI->use_empty())
3941 CI->replaceAllUsesWith(NewRetVal);
3942 CI->eraseFromParent();
3945 if (Fn->use_empty())
3946 Fn->eraseFromParent();
3949 // Unconditionally convert a call to "clang.arc.use" to a call to
3950 // "llvm.objc.clang.arc.use".
3951 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
3953 // Upgrade the retain release marker. If there is no need to upgrade
3954 // the marker, that means either the module is already new enough to contain
3955 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
3956 if (!UpgradeRetainReleaseMarker(M))
3959 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
3960 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
3961 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
3962 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
3963 {"objc_autoreleaseReturnValue",
3964 llvm::Intrinsic::objc_autoreleaseReturnValue},
3965 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
3966 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
3967 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
3968 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
3969 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
3970 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
3971 {"objc_release", llvm::Intrinsic::objc_release},
3972 {"objc_retain", llvm::Intrinsic::objc_retain},
3973 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
3974 {"objc_retainAutoreleaseReturnValue",
3975 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
3976 {"objc_retainAutoreleasedReturnValue",
3977 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
3978 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
3979 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
3980 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
3981 {"objc_unsafeClaimAutoreleasedReturnValue",
3982 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
3983 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
3984 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
3985 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
3986 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
3987 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
3988 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
3989 {"objc_arc_annotation_topdown_bbstart",
3990 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
3991 {"objc_arc_annotation_topdown_bbend",
3992 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
3993 {"objc_arc_annotation_bottomup_bbstart",
3994 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
3995 {"objc_arc_annotation_bottomup_bbend",
3996 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
3998 for (auto &I : RuntimeFuncs)
3999 UpgradeToIntrinsic(I.first, I.second);
4002 bool llvm::UpgradeModuleFlags(Module &M) {
4003 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
4007 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
4008 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
4009 MDNode *Op = ModFlags->getOperand(I);
4010 if (Op->getNumOperands() != 3)
4012 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
4015 if (ID->getString() == "Objective-C Image Info Version")
4017 if (ID->getString() == "Objective-C Class Properties")
4018 HasClassProperties = true;
4019 // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
4020 // field was Error and now they are Max.
4021 if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") {
4022 if (auto *Behavior =
4023 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
4024 if (Behavior->getLimitedValue() == Module::Error) {
4025 Type *Int32Ty = Type::getInt32Ty(M.getContext());
4026 Metadata *Ops[3] = {
4027 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)),
4028 MDString::get(M.getContext(), ID->getString()),
4030 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
4035 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
4036 // section name so that llvm-lto will not complain about mismatching
4037 // module flags that is functionally the same.
4038 if (ID->getString() == "Objective-C Image Info Section") {
4039 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
4040 SmallVector<StringRef, 4> ValueComp;
4041 Value->getString().split(ValueComp, " ");
4042 if (ValueComp.size() != 1) {
4043 std::string NewValue;
4044 for (auto &S : ValueComp)
4045 NewValue += S.str();
4046 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
4047 MDString::get(M.getContext(), NewValue)};
4048 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
4055 // "Objective-C Class Properties" is recently added for Objective-C. We
4056 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
4057 // flag of value 0, so we can correclty downgrade this flag when trying to
4058 // link an ObjC bitcode without this module flag with an ObjC bitcode with
4059 // this module flag.
4060 if (HasObjCFlag && !HasClassProperties) {
4061 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
4069 void llvm::UpgradeSectionAttributes(Module &M) {
4070 auto TrimSpaces = [](StringRef Section) -> std::string {
4071 SmallVector<StringRef, 5> Components;
4072 Section.split(Components, ',');
4074 SmallString<32> Buffer;
4075 raw_svector_ostream OS(Buffer);
4077 for (auto Component : Components)
4078 OS << ',' << Component.trim();
4080 return OS.str().substr(1);
4083 for (auto &GV : M.globals()) {
4084 if (!GV.hasSection())
4087 StringRef Section = GV.getSection();
4089 if (!Section.startswith("__DATA, __objc_catlist"))
4092 // __DATA, __objc_catlist, regular, no_dead_strip
4093 // __DATA,__objc_catlist,regular,no_dead_strip
4094 GV.setSection(TrimSpaces(Section));
4098 static bool isOldLoopArgument(Metadata *MD) {
4099 auto *T = dyn_cast_or_null<MDTuple>(MD);
4102 if (T->getNumOperands() < 1)
4104 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
4107 return S->getString().startswith("llvm.vectorizer.");
4110 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
4111 StringRef OldPrefix = "llvm.vectorizer.";
4112 assert(OldTag.startswith(OldPrefix) && "Expected old prefix");
4114 if (OldTag == "llvm.vectorizer.unroll")
4115 return MDString::get(C, "llvm.loop.interleave.count");
4117 return MDString::get(
4118 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
4122 static Metadata *upgradeLoopArgument(Metadata *MD) {
4123 auto *T = dyn_cast_or_null<MDTuple>(MD);
4126 if (T->getNumOperands() < 1)
4128 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
4131 if (!OldTag->getString().startswith("llvm.vectorizer."))
4134 // This has an old tag. Upgrade it.
4135 SmallVector<Metadata *, 8> Ops;
4136 Ops.reserve(T->getNumOperands());
4137 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
4138 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
4139 Ops.push_back(T->getOperand(I));
4141 return MDTuple::get(T->getContext(), Ops);
4144 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
4145 auto *T = dyn_cast<MDTuple>(&N);
4149 if (none_of(T->operands(), isOldLoopArgument))
4152 SmallVector<Metadata *, 8> Ops;
4153 Ops.reserve(T->getNumOperands());
4154 for (Metadata *MD : T->operands())
4155 Ops.push_back(upgradeLoopArgument(MD));
4157 return MDTuple::get(T->getContext(), Ops);
4160 std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
4161 std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64";
4163 // If X86, and the datalayout matches the expected format, add pointer size
4164 // address spaces to the datalayout.
4165 if (!Triple(TT).isX86() || DL.contains(AddrSpaces))
4168 SmallVector<StringRef, 4> Groups;
4169 Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)");
4170 if (!R.match(DL, &Groups))
4173 SmallString<1024> Buf;
4174 std::string Res = (Groups[1] + AddrSpaces + Groups[3]).toStringRef(Buf).str();
4178 void llvm::UpgradeFramePointerAttributes(AttrBuilder &B) {
4179 StringRef FramePointer;
4180 if (B.contains("no-frame-pointer-elim")) {
4181 // The value can be "true" or "false".
4182 for (const auto &I : B.td_attrs())
4183 if (I.first == "no-frame-pointer-elim")
4184 FramePointer = I.second == "true" ? "all" : "none";
4185 B.removeAttribute("no-frame-pointer-elim");
4187 if (B.contains("no-frame-pointer-elim-non-leaf")) {
4188 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
4189 if (FramePointer != "all")
4190 FramePointer = "non-leaf";
4191 B.removeAttribute("no-frame-pointer-elim-non-leaf");
4194 if (!FramePointer.empty())
4195 B.addAttribute("frame-pointer", FramePointer);