]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp
Merge llvm, clang, lld, lldb, compiler-rt and libc++ r304149, and update
[FreeBSD/FreeBSD.git] / contrib / llvm / tools / clang / lib / CodeGen / CGBuiltin.cpp
1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit Builtin calls as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "CGCXXABI.h"
15 #include "CGObjCRuntime.h"
16 #include "CGOpenCLRuntime.h"
17 #include "CodeGenFunction.h"
18 #include "CodeGenModule.h"
19 #include "TargetInfo.h"
20 #include "clang/AST/ASTContext.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/Analysis/Analyses/OSLog.h"
23 #include "clang/Basic/TargetBuiltins.h"
24 #include "clang/Basic/TargetInfo.h"
25 #include "clang/CodeGen/CGFunctionInfo.h"
26 #include "llvm/ADT/StringExtras.h"
27 #include "llvm/IR/CallSite.h"
28 #include "llvm/IR/DataLayout.h"
29 #include "llvm/IR/InlineAsm.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/IR/MDBuilder.h"
32 #include <sstream>
33
34 using namespace clang;
35 using namespace CodeGen;
36 using namespace llvm;
37
38 static
39 int64_t clamp(int64_t Value, int64_t Low, int64_t High) {
40   return std::min(High, std::max(Low, Value));
41 }
42
43 /// getBuiltinLibFunction - Given a builtin id for a function like
44 /// "__builtin_fabsf", return a Function* for "fabsf".
45 llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
46                                                      unsigned BuiltinID) {
47   assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
48
49   // Get the name, skip over the __builtin_ prefix (if necessary).
50   StringRef Name;
51   GlobalDecl D(FD);
52
53   // If the builtin has been declared explicitly with an assembler label,
54   // use the mangled name. This differs from the plain label on platforms
55   // that prefix labels.
56   if (FD->hasAttr<AsmLabelAttr>())
57     Name = getMangledName(D);
58   else
59     Name = Context.BuiltinInfo.getName(BuiltinID) + 10;
60
61   llvm::FunctionType *Ty =
62     cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
63
64   return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
65 }
66
67 /// Emit the conversions required to turn the given value into an
68 /// integer of the given size.
69 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
70                         QualType T, llvm::IntegerType *IntType) {
71   V = CGF.EmitToMemory(V, T);
72
73   if (V->getType()->isPointerTy())
74     return CGF.Builder.CreatePtrToInt(V, IntType);
75
76   assert(V->getType() == IntType);
77   return V;
78 }
79
80 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
81                           QualType T, llvm::Type *ResultType) {
82   V = CGF.EmitFromMemory(V, T);
83
84   if (ResultType->isPointerTy())
85     return CGF.Builder.CreateIntToPtr(V, ResultType);
86
87   assert(V->getType() == ResultType);
88   return V;
89 }
90
91 /// Utility to insert an atomic instruction based on Instrinsic::ID
92 /// and the expression node.
93 static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF,
94                                     llvm::AtomicRMWInst::BinOp Kind,
95                                     const CallExpr *E) {
96   QualType T = E->getType();
97   assert(E->getArg(0)->getType()->isPointerType());
98   assert(CGF.getContext().hasSameUnqualifiedType(T,
99                                   E->getArg(0)->getType()->getPointeeType()));
100   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
101
102   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
103   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
104
105   llvm::IntegerType *IntType =
106     llvm::IntegerType::get(CGF.getLLVMContext(),
107                            CGF.getContext().getTypeSize(T));
108   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
109
110   llvm::Value *Args[2];
111   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
112   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
113   llvm::Type *ValueType = Args[1]->getType();
114   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
115
116   llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
117       Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
118   return EmitFromInt(CGF, Result, T, ValueType);
119 }
120
121 static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
122   Value *Val = CGF.EmitScalarExpr(E->getArg(0));
123   Value *Address = CGF.EmitScalarExpr(E->getArg(1));
124
125   // Convert the type of the pointer to a pointer to the stored type.
126   Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
127   Value *BC = CGF.Builder.CreateBitCast(
128       Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
129   LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
130   LV.setNontemporal(true);
131   CGF.EmitStoreOfScalar(Val, LV, false);
132   return nullptr;
133 }
134
135 static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
136   Value *Address = CGF.EmitScalarExpr(E->getArg(0));
137
138   LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
139   LV.setNontemporal(true);
140   return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
141 }
142
143 static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
144                                llvm::AtomicRMWInst::BinOp Kind,
145                                const CallExpr *E) {
146   return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
147 }
148
149 /// Utility to insert an atomic instruction based Instrinsic::ID and
150 /// the expression node, where the return value is the result of the
151 /// operation.
152 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
153                                    llvm::AtomicRMWInst::BinOp Kind,
154                                    const CallExpr *E,
155                                    Instruction::BinaryOps Op,
156                                    bool Invert = false) {
157   QualType T = E->getType();
158   assert(E->getArg(0)->getType()->isPointerType());
159   assert(CGF.getContext().hasSameUnqualifiedType(T,
160                                   E->getArg(0)->getType()->getPointeeType()));
161   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
162
163   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
164   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
165
166   llvm::IntegerType *IntType =
167     llvm::IntegerType::get(CGF.getLLVMContext(),
168                            CGF.getContext().getTypeSize(T));
169   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
170
171   llvm::Value *Args[2];
172   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
173   llvm::Type *ValueType = Args[1]->getType();
174   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
175   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
176
177   llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
178       Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
179   Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
180   if (Invert)
181     Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
182                                      llvm::ConstantInt::get(IntType, -1));
183   Result = EmitFromInt(CGF, Result, T, ValueType);
184   return RValue::get(Result);
185 }
186
187 /// @brief Utility to insert an atomic cmpxchg instruction.
188 ///
189 /// @param CGF The current codegen function.
190 /// @param E   Builtin call expression to convert to cmpxchg.
191 ///            arg0 - address to operate on
192 ///            arg1 - value to compare with
193 ///            arg2 - new value
194 /// @param ReturnBool Specifies whether to return success flag of
195 ///                   cmpxchg result or the old value.
196 ///
197 /// @returns result of cmpxchg, according to ReturnBool
198 static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
199                                      bool ReturnBool) {
200   QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
201   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
202   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
203
204   llvm::IntegerType *IntType = llvm::IntegerType::get(
205       CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
206   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
207
208   Value *Args[3];
209   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
210   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
211   llvm::Type *ValueType = Args[1]->getType();
212   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
213   Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
214
215   Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
216       Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
217       llvm::AtomicOrdering::SequentiallyConsistent);
218   if (ReturnBool)
219     // Extract boolean success flag and zext it to int.
220     return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
221                                   CGF.ConvertType(E->getType()));
222   else
223     // Extract old value and emit it using the same type as compare value.
224     return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
225                        ValueType);
226 }
227
228 // Emit a simple mangled intrinsic that has 1 argument and a return type
229 // matching the argument type.
230 static Value *emitUnaryBuiltin(CodeGenFunction &CGF,
231                                const CallExpr *E,
232                                unsigned IntrinsicID) {
233   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
234
235   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
236   return CGF.Builder.CreateCall(F, Src0);
237 }
238
239 // Emit an intrinsic that has 2 operands of the same type as its result.
240 static Value *emitBinaryBuiltin(CodeGenFunction &CGF,
241                                 const CallExpr *E,
242                                 unsigned IntrinsicID) {
243   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
244   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
245
246   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
247   return CGF.Builder.CreateCall(F, { Src0, Src1 });
248 }
249
250 // Emit an intrinsic that has 3 operands of the same type as its result.
251 static Value *emitTernaryBuiltin(CodeGenFunction &CGF,
252                                  const CallExpr *E,
253                                  unsigned IntrinsicID) {
254   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
255   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
256   llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
257
258   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
259   return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
260 }
261
262 // Emit an intrinsic that has 1 float or double operand, and 1 integer.
263 static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
264                                const CallExpr *E,
265                                unsigned IntrinsicID) {
266   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
267   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
268
269   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
270   return CGF.Builder.CreateCall(F, {Src0, Src1});
271 }
272
273 /// EmitFAbs - Emit a call to @llvm.fabs().
274 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
275   Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
276   llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
277   Call->setDoesNotAccessMemory();
278   return Call;
279 }
280
281 /// Emit the computation of the sign bit for a floating point value. Returns
282 /// the i1 sign bit value.
283 static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
284   LLVMContext &C = CGF.CGM.getLLVMContext();
285
286   llvm::Type *Ty = V->getType();
287   int Width = Ty->getPrimitiveSizeInBits();
288   llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
289   V = CGF.Builder.CreateBitCast(V, IntTy);
290   if (Ty->isPPC_FP128Ty()) {
291     // We want the sign bit of the higher-order double. The bitcast we just
292     // did works as if the double-double was stored to memory and then
293     // read as an i128. The "store" will put the higher-order double in the
294     // lower address in both little- and big-Endian modes, but the "load"
295     // will treat those bits as a different part of the i128: the low bits in
296     // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
297     // we need to shift the high bits down to the low before truncating.
298     Width >>= 1;
299     if (CGF.getTarget().isBigEndian()) {
300       Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
301       V = CGF.Builder.CreateLShr(V, ShiftCst);
302     }
303     // We are truncating value in order to extract the higher-order
304     // double, which we will be using to extract the sign from.
305     IntTy = llvm::IntegerType::get(C, Width);
306     V = CGF.Builder.CreateTrunc(V, IntTy);
307   }
308   Value *Zero = llvm::Constant::getNullValue(IntTy);
309   return CGF.Builder.CreateICmpSLT(V, Zero);
310 }
311
312 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD,
313                               const CallExpr *E, llvm::Constant *calleeValue) {
314   CGCallee callee = CGCallee::forDirect(calleeValue, FD);
315   return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
316 }
317
318 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
319 /// depending on IntrinsicID.
320 ///
321 /// \arg CGF The current codegen function.
322 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
323 /// \arg X The first argument to the llvm.*.with.overflow.*.
324 /// \arg Y The second argument to the llvm.*.with.overflow.*.
325 /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
326 /// \returns The result (i.e. sum/product) returned by the intrinsic.
327 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
328                                           const llvm::Intrinsic::ID IntrinsicID,
329                                           llvm::Value *X, llvm::Value *Y,
330                                           llvm::Value *&Carry) {
331   // Make sure we have integers of the same width.
332   assert(X->getType() == Y->getType() &&
333          "Arguments must be the same type. (Did you forget to make sure both "
334          "arguments have the same integer width?)");
335
336   llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
337   llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
338   Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
339   return CGF.Builder.CreateExtractValue(Tmp, 0);
340 }
341
342 static Value *emitRangedBuiltin(CodeGenFunction &CGF,
343                                 unsigned IntrinsicID,
344                                 int low, int high) {
345     llvm::MDBuilder MDHelper(CGF.getLLVMContext());
346     llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
347     Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
348     llvm::Instruction *Call = CGF.Builder.CreateCall(F);
349     Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
350     return Call;
351 }
352
353 namespace {
354   struct WidthAndSignedness {
355     unsigned Width;
356     bool Signed;
357   };
358 }
359
360 static WidthAndSignedness
361 getIntegerWidthAndSignedness(const clang::ASTContext &context,
362                              const clang::QualType Type) {
363   assert(Type->isIntegerType() && "Given type is not an integer.");
364   unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width;
365   bool Signed = Type->isSignedIntegerType();
366   return {Width, Signed};
367 }
368
369 // Given one or more integer types, this function produces an integer type that
370 // encompasses them: any value in one of the given types could be expressed in
371 // the encompassing type.
372 static struct WidthAndSignedness
373 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
374   assert(Types.size() > 0 && "Empty list of types.");
375
376   // If any of the given types is signed, we must return a signed type.
377   bool Signed = false;
378   for (const auto &Type : Types) {
379     Signed |= Type.Signed;
380   }
381
382   // The encompassing type must have a width greater than or equal to the width
383   // of the specified types.  Aditionally, if the encompassing type is signed,
384   // its width must be strictly greater than the width of any unsigned types
385   // given.
386   unsigned Width = 0;
387   for (const auto &Type : Types) {
388     unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
389     if (Width < MinWidth) {
390       Width = MinWidth;
391     }
392   }
393
394   return {Width, Signed};
395 }
396
397 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
398   llvm::Type *DestType = Int8PtrTy;
399   if (ArgValue->getType() != DestType)
400     ArgValue =
401         Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
402
403   Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
404   return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
405 }
406
407 /// Checks if using the result of __builtin_object_size(p, @p From) in place of
408 /// __builtin_object_size(p, @p To) is correct
409 static bool areBOSTypesCompatible(int From, int To) {
410   // Note: Our __builtin_object_size implementation currently treats Type=0 and
411   // Type=2 identically. Encoding this implementation detail here may make
412   // improving __builtin_object_size difficult in the future, so it's omitted.
413   return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
414 }
415
416 static llvm::Value *
417 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
418   return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
419 }
420
421 llvm::Value *
422 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
423                                                  llvm::IntegerType *ResType,
424                                                  llvm::Value *EmittedE) {
425   uint64_t ObjectSize;
426   if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
427     return emitBuiltinObjectSize(E, Type, ResType, EmittedE);
428   return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
429 }
430
431 /// Returns a Value corresponding to the size of the given expression.
432 /// This Value may be either of the following:
433 ///   - A llvm::Argument (if E is a param with the pass_object_size attribute on
434 ///     it)
435 ///   - A call to the @llvm.objectsize intrinsic
436 ///
437 /// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
438 /// and we wouldn't otherwise try to reference a pass_object_size parameter,
439 /// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
440 llvm::Value *
441 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
442                                        llvm::IntegerType *ResType,
443                                        llvm::Value *EmittedE) {
444   // We need to reference an argument if the pointer is a parameter with the
445   // pass_object_size attribute.
446   if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
447     auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
448     auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
449     if (Param != nullptr && PS != nullptr &&
450         areBOSTypesCompatible(PS->getType(), Type)) {
451       auto Iter = SizeArguments.find(Param);
452       assert(Iter != SizeArguments.end());
453
454       const ImplicitParamDecl *D = Iter->second;
455       auto DIter = LocalDeclMap.find(D);
456       assert(DIter != LocalDeclMap.end());
457
458       return EmitLoadOfScalar(DIter->second, /*volatile=*/false,
459                               getContext().getSizeType(), E->getLocStart());
460     }
461   }
462
463   // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
464   // evaluate E for side-effects. In either case, we shouldn't lower to
465   // @llvm.objectsize.
466   if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
467     return getDefaultBuiltinObjectSizeResult(Type, ResType);
468
469   Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
470   assert(Ptr->getType()->isPointerTy() &&
471          "Non-pointer passed to __builtin_object_size?");
472
473   Value *F = CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
474
475   // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
476   Value *Min = Builder.getInt1((Type & 2) != 0);
477   // For GCC compatability, __builtin_object_size treat NULL as unknown size.
478   Value *NullIsUnknown = Builder.getTrue();
479   return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown});
480 }
481
482 // Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we
483 // handle them here.
484 enum class CodeGenFunction::MSVCIntrin {
485   _BitScanForward,
486   _BitScanReverse,
487   _InterlockedAnd,
488   _InterlockedDecrement,
489   _InterlockedExchange,
490   _InterlockedExchangeAdd,
491   _InterlockedExchangeSub,
492   _InterlockedIncrement,
493   _InterlockedOr,
494   _InterlockedXor,
495   _interlockedbittestandset,
496   __fastfail,
497 };
498
499 Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
500                                             const CallExpr *E) {
501   switch (BuiltinID) {
502   case MSVCIntrin::_BitScanForward:
503   case MSVCIntrin::_BitScanReverse: {
504     Value *ArgValue = EmitScalarExpr(E->getArg(1));
505
506     llvm::Type *ArgType = ArgValue->getType();
507     llvm::Type *IndexType =
508       EmitScalarExpr(E->getArg(0))->getType()->getPointerElementType();
509     llvm::Type *ResultType = ConvertType(E->getType());
510
511     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
512     Value *ResZero = llvm::Constant::getNullValue(ResultType);
513     Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
514
515     BasicBlock *Begin = Builder.GetInsertBlock();
516     BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
517     Builder.SetInsertPoint(End);
518     PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
519
520     Builder.SetInsertPoint(Begin);
521     Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
522     BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
523     Builder.CreateCondBr(IsZero, End, NotZero);
524     Result->addIncoming(ResZero, Begin);
525
526     Builder.SetInsertPoint(NotZero);
527     Address IndexAddress = EmitPointerWithAlignment(E->getArg(0));
528
529     if (BuiltinID == MSVCIntrin::_BitScanForward) {
530       Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
531       Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
532       ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
533       Builder.CreateStore(ZeroCount, IndexAddress, false);
534     } else {
535       unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
536       Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
537
538       Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
539       Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
540       ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
541       Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
542       Builder.CreateStore(Index, IndexAddress, false);
543     }
544     Builder.CreateBr(End);
545     Result->addIncoming(ResOne, NotZero);
546
547     Builder.SetInsertPoint(End);
548     return Result;
549   }
550   case MSVCIntrin::_InterlockedAnd:
551     return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
552   case MSVCIntrin::_InterlockedExchange:
553     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
554   case MSVCIntrin::_InterlockedExchangeAdd:
555     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
556   case MSVCIntrin::_InterlockedExchangeSub:
557     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
558   case MSVCIntrin::_InterlockedOr:
559     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
560   case MSVCIntrin::_InterlockedXor:
561     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
562
563   case MSVCIntrin::_interlockedbittestandset: {
564     llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
565     llvm::Value *Bit = EmitScalarExpr(E->getArg(1));
566     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
567         AtomicRMWInst::Or, Addr,
568         Builder.CreateShl(ConstantInt::get(Bit->getType(), 1), Bit),
569         llvm::AtomicOrdering::SequentiallyConsistent);
570     // Shift the relevant bit to the least significant position, truncate to
571     // the result type, and test the low bit.
572     llvm::Value *Shifted = Builder.CreateLShr(RMWI, Bit);
573     llvm::Value *Truncated =
574         Builder.CreateTrunc(Shifted, ConvertType(E->getType()));
575     return Builder.CreateAnd(Truncated,
576                              ConstantInt::get(Truncated->getType(), 1));
577   }
578
579   case MSVCIntrin::_InterlockedDecrement: {
580     llvm::Type *IntTy = ConvertType(E->getType());
581     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
582       AtomicRMWInst::Sub,
583       EmitScalarExpr(E->getArg(0)),
584       ConstantInt::get(IntTy, 1),
585       llvm::AtomicOrdering::SequentiallyConsistent);
586     return Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1));
587   }
588   case MSVCIntrin::_InterlockedIncrement: {
589     llvm::Type *IntTy = ConvertType(E->getType());
590     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
591       AtomicRMWInst::Add,
592       EmitScalarExpr(E->getArg(0)),
593       ConstantInt::get(IntTy, 1),
594       llvm::AtomicOrdering::SequentiallyConsistent);
595     return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1));
596   }
597
598   case MSVCIntrin::__fastfail: {
599     // Request immediate process termination from the kernel. The instruction
600     // sequences to do this are documented on MSDN:
601     // https://msdn.microsoft.com/en-us/library/dn774154.aspx
602     llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
603     StringRef Asm, Constraints;
604     switch (ISA) {
605     default:
606       ErrorUnsupported(E, "__fastfail call for this architecture");
607       break;
608     case llvm::Triple::x86:
609     case llvm::Triple::x86_64:
610       Asm = "int $$0x29";
611       Constraints = "{cx}";
612       break;
613     case llvm::Triple::thumb:
614       Asm = "udf #251";
615       Constraints = "{r0}";
616       break;
617     }
618     llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
619     llvm::InlineAsm *IA =
620         llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true);
621     llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
622         getLLVMContext(), llvm::AttributeList::FunctionIndex,
623         llvm::Attribute::NoReturn);
624     CallSite CS = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
625     CS.setAttributes(NoReturnAttr);
626     return CS.getInstruction();
627   }
628   }
629   llvm_unreachable("Incorrect MSVC intrinsic!");
630 }
631
632 namespace {
633 // ARC cleanup for __builtin_os_log_format
634 struct CallObjCArcUse final : EHScopeStack::Cleanup {
635   CallObjCArcUse(llvm::Value *object) : object(object) {}
636   llvm::Value *object;
637
638   void Emit(CodeGenFunction &CGF, Flags flags) override {
639     CGF.EmitARCIntrinsicUse(object);
640   }
641 };
642 }
643
644 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
645                                         unsigned BuiltinID, const CallExpr *E,
646                                         ReturnValueSlot ReturnValue) {
647   // See if we can constant fold this builtin.  If so, don't emit it at all.
648   Expr::EvalResult Result;
649   if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
650       !Result.hasSideEffects()) {
651     if (Result.Val.isInt())
652       return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
653                                                 Result.Val.getInt()));
654     if (Result.Val.isFloat())
655       return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
656                                                Result.Val.getFloat()));
657   }
658
659   switch (BuiltinID) {
660   default: break;  // Handle intrinsics and libm functions below.
661   case Builtin::BI__builtin___CFStringMakeConstantString:
662   case Builtin::BI__builtin___NSStringMakeConstantString:
663     return RValue::get(CGM.EmitConstantExpr(E, E->getType(), nullptr));
664   case Builtin::BI__builtin_stdarg_start:
665   case Builtin::BI__builtin_va_start:
666   case Builtin::BI__va_start:
667   case Builtin::BI__builtin_va_end:
668     return RValue::get(
669         EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
670                            ? EmitScalarExpr(E->getArg(0))
671                            : EmitVAListRef(E->getArg(0)).getPointer(),
672                        BuiltinID != Builtin::BI__builtin_va_end));
673   case Builtin::BI__builtin_va_copy: {
674     Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
675     Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
676
677     llvm::Type *Type = Int8PtrTy;
678
679     DstPtr = Builder.CreateBitCast(DstPtr, Type);
680     SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
681     return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy),
682                                           {DstPtr, SrcPtr}));
683   }
684   case Builtin::BI__builtin_abs:
685   case Builtin::BI__builtin_labs:
686   case Builtin::BI__builtin_llabs: {
687     Value *ArgValue = EmitScalarExpr(E->getArg(0));
688
689     Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
690     Value *CmpResult =
691     Builder.CreateICmpSGE(ArgValue,
692                           llvm::Constant::getNullValue(ArgValue->getType()),
693                                                             "abscond");
694     Value *Result =
695       Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
696
697     return RValue::get(Result);
698   }
699   case Builtin::BI__builtin_fabs:
700   case Builtin::BI__builtin_fabsf:
701   case Builtin::BI__builtin_fabsl: {
702     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
703   }
704   case Builtin::BI__builtin_fmod:
705   case Builtin::BI__builtin_fmodf:
706   case Builtin::BI__builtin_fmodl: {
707     Value *Arg1 = EmitScalarExpr(E->getArg(0));
708     Value *Arg2 = EmitScalarExpr(E->getArg(1));
709     Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod");
710     return RValue::get(Result);
711   }
712   case Builtin::BI__builtin_copysign:
713   case Builtin::BI__builtin_copysignf:
714   case Builtin::BI__builtin_copysignl: {
715     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
716   }
717   case Builtin::BI__builtin_ceil:
718   case Builtin::BI__builtin_ceilf:
719   case Builtin::BI__builtin_ceill: {
720     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil));
721   }
722   case Builtin::BI__builtin_floor:
723   case Builtin::BI__builtin_floorf:
724   case Builtin::BI__builtin_floorl: {
725     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor));
726   }
727   case Builtin::BI__builtin_trunc:
728   case Builtin::BI__builtin_truncf:
729   case Builtin::BI__builtin_truncl: {
730     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc));
731   }
732   case Builtin::BI__builtin_rint:
733   case Builtin::BI__builtin_rintf:
734   case Builtin::BI__builtin_rintl: {
735     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint));
736   }
737   case Builtin::BI__builtin_nearbyint:
738   case Builtin::BI__builtin_nearbyintf:
739   case Builtin::BI__builtin_nearbyintl: {
740     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint));
741   }
742   case Builtin::BI__builtin_round:
743   case Builtin::BI__builtin_roundf:
744   case Builtin::BI__builtin_roundl: {
745     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round));
746   }
747   case Builtin::BI__builtin_fmin:
748   case Builtin::BI__builtin_fminf:
749   case Builtin::BI__builtin_fminl: {
750     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum));
751   }
752   case Builtin::BI__builtin_fmax:
753   case Builtin::BI__builtin_fmaxf:
754   case Builtin::BI__builtin_fmaxl: {
755     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum));
756   }
757   case Builtin::BI__builtin_conj:
758   case Builtin::BI__builtin_conjf:
759   case Builtin::BI__builtin_conjl: {
760     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
761     Value *Real = ComplexVal.first;
762     Value *Imag = ComplexVal.second;
763     Value *Zero =
764       Imag->getType()->isFPOrFPVectorTy()
765         ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
766         : llvm::Constant::getNullValue(Imag->getType());
767
768     Imag = Builder.CreateFSub(Zero, Imag, "sub");
769     return RValue::getComplex(std::make_pair(Real, Imag));
770   }
771   case Builtin::BI__builtin_creal:
772   case Builtin::BI__builtin_crealf:
773   case Builtin::BI__builtin_creall:
774   case Builtin::BIcreal:
775   case Builtin::BIcrealf:
776   case Builtin::BIcreall: {
777     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
778     return RValue::get(ComplexVal.first);
779   }
780
781   case Builtin::BI__builtin_cimag:
782   case Builtin::BI__builtin_cimagf:
783   case Builtin::BI__builtin_cimagl:
784   case Builtin::BIcimag:
785   case Builtin::BIcimagf:
786   case Builtin::BIcimagl: {
787     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
788     return RValue::get(ComplexVal.second);
789   }
790
791   case Builtin::BI__builtin_ctzs:
792   case Builtin::BI__builtin_ctz:
793   case Builtin::BI__builtin_ctzl:
794   case Builtin::BI__builtin_ctzll: {
795     Value *ArgValue = EmitScalarExpr(E->getArg(0));
796
797     llvm::Type *ArgType = ArgValue->getType();
798     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
799
800     llvm::Type *ResultType = ConvertType(E->getType());
801     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
802     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
803     if (Result->getType() != ResultType)
804       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
805                                      "cast");
806     return RValue::get(Result);
807   }
808   case Builtin::BI__builtin_clzs:
809   case Builtin::BI__builtin_clz:
810   case Builtin::BI__builtin_clzl:
811   case Builtin::BI__builtin_clzll: {
812     Value *ArgValue = EmitScalarExpr(E->getArg(0));
813
814     llvm::Type *ArgType = ArgValue->getType();
815     Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
816
817     llvm::Type *ResultType = ConvertType(E->getType());
818     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
819     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
820     if (Result->getType() != ResultType)
821       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
822                                      "cast");
823     return RValue::get(Result);
824   }
825   case Builtin::BI__builtin_ffs:
826   case Builtin::BI__builtin_ffsl:
827   case Builtin::BI__builtin_ffsll: {
828     // ffs(x) -> x ? cttz(x) + 1 : 0
829     Value *ArgValue = EmitScalarExpr(E->getArg(0));
830
831     llvm::Type *ArgType = ArgValue->getType();
832     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
833
834     llvm::Type *ResultType = ConvertType(E->getType());
835     Value *Tmp =
836         Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
837                           llvm::ConstantInt::get(ArgType, 1));
838     Value *Zero = llvm::Constant::getNullValue(ArgType);
839     Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
840     Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
841     if (Result->getType() != ResultType)
842       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
843                                      "cast");
844     return RValue::get(Result);
845   }
846   case Builtin::BI__builtin_parity:
847   case Builtin::BI__builtin_parityl:
848   case Builtin::BI__builtin_parityll: {
849     // parity(x) -> ctpop(x) & 1
850     Value *ArgValue = EmitScalarExpr(E->getArg(0));
851
852     llvm::Type *ArgType = ArgValue->getType();
853     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
854
855     llvm::Type *ResultType = ConvertType(E->getType());
856     Value *Tmp = Builder.CreateCall(F, ArgValue);
857     Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
858     if (Result->getType() != ResultType)
859       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
860                                      "cast");
861     return RValue::get(Result);
862   }
863   case Builtin::BI__popcnt16:
864   case Builtin::BI__popcnt:
865   case Builtin::BI__popcnt64:
866   case Builtin::BI__builtin_popcount:
867   case Builtin::BI__builtin_popcountl:
868   case Builtin::BI__builtin_popcountll: {
869     Value *ArgValue = EmitScalarExpr(E->getArg(0));
870
871     llvm::Type *ArgType = ArgValue->getType();
872     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
873
874     llvm::Type *ResultType = ConvertType(E->getType());
875     Value *Result = Builder.CreateCall(F, ArgValue);
876     if (Result->getType() != ResultType)
877       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
878                                      "cast");
879     return RValue::get(Result);
880   }
881   case Builtin::BI_rotr8:
882   case Builtin::BI_rotr16:
883   case Builtin::BI_rotr:
884   case Builtin::BI_lrotr:
885   case Builtin::BI_rotr64: {
886     Value *Val = EmitScalarExpr(E->getArg(0));
887     Value *Shift = EmitScalarExpr(E->getArg(1));
888
889     llvm::Type *ArgType = Val->getType();
890     Shift = Builder.CreateIntCast(Shift, ArgType, false);
891     unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
892     Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
893     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
894
895     Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
896     Shift = Builder.CreateAnd(Shift, Mask);
897     Value *LeftShift = Builder.CreateSub(ArgTypeSize, Shift);
898
899     Value *RightShifted = Builder.CreateLShr(Val, Shift);
900     Value *LeftShifted = Builder.CreateShl(Val, LeftShift);
901     Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
902
903     Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
904     Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
905     return RValue::get(Result);
906   }
907   case Builtin::BI_rotl8:
908   case Builtin::BI_rotl16:
909   case Builtin::BI_rotl:
910   case Builtin::BI_lrotl:
911   case Builtin::BI_rotl64: {
912     Value *Val = EmitScalarExpr(E->getArg(0));
913     Value *Shift = EmitScalarExpr(E->getArg(1));
914
915     llvm::Type *ArgType = Val->getType();
916     Shift = Builder.CreateIntCast(Shift, ArgType, false);
917     unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
918     Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
919     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
920
921     Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
922     Shift = Builder.CreateAnd(Shift, Mask);
923     Value *RightShift = Builder.CreateSub(ArgTypeSize, Shift);
924
925     Value *LeftShifted = Builder.CreateShl(Val, Shift);
926     Value *RightShifted = Builder.CreateLShr(Val, RightShift);
927     Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
928
929     Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
930     Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
931     return RValue::get(Result);
932   }
933   case Builtin::BI__builtin_unpredictable: {
934     // Always return the argument of __builtin_unpredictable. LLVM does not
935     // handle this builtin. Metadata for this builtin should be added directly
936     // to instructions such as branches or switches that use it.
937     return RValue::get(EmitScalarExpr(E->getArg(0)));
938   }
939   case Builtin::BI__builtin_expect: {
940     Value *ArgValue = EmitScalarExpr(E->getArg(0));
941     llvm::Type *ArgType = ArgValue->getType();
942
943     Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
944     // Don't generate llvm.expect on -O0 as the backend won't use it for
945     // anything.
946     // Note, we still IRGen ExpectedValue because it could have side-effects.
947     if (CGM.getCodeGenOpts().OptimizationLevel == 0)
948       return RValue::get(ArgValue);
949
950     Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
951     Value *Result =
952         Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
953     return RValue::get(Result);
954   }
955   case Builtin::BI__builtin_assume_aligned: {
956     Value *PtrValue = EmitScalarExpr(E->getArg(0));
957     Value *OffsetValue =
958       (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
959
960     Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
961     ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
962     unsigned Alignment = (unsigned) AlignmentCI->getZExtValue();
963
964     EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue);
965     return RValue::get(PtrValue);
966   }
967   case Builtin::BI__assume:
968   case Builtin::BI__builtin_assume: {
969     if (E->getArg(0)->HasSideEffects(getContext()))
970       return RValue::get(nullptr);
971
972     Value *ArgValue = EmitScalarExpr(E->getArg(0));
973     Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
974     return RValue::get(Builder.CreateCall(FnAssume, ArgValue));
975   }
976   case Builtin::BI__builtin_bswap16:
977   case Builtin::BI__builtin_bswap32:
978   case Builtin::BI__builtin_bswap64: {
979     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
980   }
981   case Builtin::BI__builtin_bitreverse8:
982   case Builtin::BI__builtin_bitreverse16:
983   case Builtin::BI__builtin_bitreverse32:
984   case Builtin::BI__builtin_bitreverse64: {
985     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
986   }
987   case Builtin::BI__builtin_object_size: {
988     unsigned Type =
989         E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
990     auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
991
992     // We pass this builtin onto the optimizer so that it can figure out the
993     // object size in more complex cases.
994     return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
995                                              /*EmittedE=*/nullptr));
996   }
997   case Builtin::BI__builtin_prefetch: {
998     Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
999     // FIXME: Technically these constants should of type 'int', yes?
1000     RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
1001       llvm::ConstantInt::get(Int32Ty, 0);
1002     Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
1003       llvm::ConstantInt::get(Int32Ty, 3);
1004     Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
1005     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
1006     return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
1007   }
1008   case Builtin::BI__builtin_readcyclecounter: {
1009     Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
1010     return RValue::get(Builder.CreateCall(F));
1011   }
1012   case Builtin::BI__builtin___clear_cache: {
1013     Value *Begin = EmitScalarExpr(E->getArg(0));
1014     Value *End = EmitScalarExpr(E->getArg(1));
1015     Value *F = CGM.getIntrinsic(Intrinsic::clear_cache);
1016     return RValue::get(Builder.CreateCall(F, {Begin, End}));
1017   }
1018   case Builtin::BI__builtin_trap:
1019     return RValue::get(EmitTrapCall(Intrinsic::trap));
1020   case Builtin::BI__debugbreak:
1021     return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
1022   case Builtin::BI__builtin_unreachable: {
1023     if (SanOpts.has(SanitizerKind::Unreachable)) {
1024       SanitizerScope SanScope(this);
1025       EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()),
1026                                SanitizerKind::Unreachable),
1027                 SanitizerHandler::BuiltinUnreachable,
1028                 EmitCheckSourceLocation(E->getExprLoc()), None);
1029     } else
1030       Builder.CreateUnreachable();
1031
1032     // We do need to preserve an insertion point.
1033     EmitBlock(createBasicBlock("unreachable.cont"));
1034
1035     return RValue::get(nullptr);
1036   }
1037
1038   case Builtin::BI__builtin_powi:
1039   case Builtin::BI__builtin_powif:
1040   case Builtin::BI__builtin_powil: {
1041     Value *Base = EmitScalarExpr(E->getArg(0));
1042     Value *Exponent = EmitScalarExpr(E->getArg(1));
1043     llvm::Type *ArgType = Base->getType();
1044     Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
1045     return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
1046   }
1047
1048   case Builtin::BI__builtin_isgreater:
1049   case Builtin::BI__builtin_isgreaterequal:
1050   case Builtin::BI__builtin_isless:
1051   case Builtin::BI__builtin_islessequal:
1052   case Builtin::BI__builtin_islessgreater:
1053   case Builtin::BI__builtin_isunordered: {
1054     // Ordered comparisons: we know the arguments to these are matching scalar
1055     // floating point values.
1056     Value *LHS = EmitScalarExpr(E->getArg(0));
1057     Value *RHS = EmitScalarExpr(E->getArg(1));
1058
1059     switch (BuiltinID) {
1060     default: llvm_unreachable("Unknown ordered comparison");
1061     case Builtin::BI__builtin_isgreater:
1062       LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
1063       break;
1064     case Builtin::BI__builtin_isgreaterequal:
1065       LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
1066       break;
1067     case Builtin::BI__builtin_isless:
1068       LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
1069       break;
1070     case Builtin::BI__builtin_islessequal:
1071       LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
1072       break;
1073     case Builtin::BI__builtin_islessgreater:
1074       LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
1075       break;
1076     case Builtin::BI__builtin_isunordered:
1077       LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
1078       break;
1079     }
1080     // ZExt bool to int type.
1081     return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
1082   }
1083   case Builtin::BI__builtin_isnan: {
1084     Value *V = EmitScalarExpr(E->getArg(0));
1085     V = Builder.CreateFCmpUNO(V, V, "cmp");
1086     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
1087   }
1088
1089   case Builtin::BIfinite:
1090   case Builtin::BI__finite:
1091   case Builtin::BIfinitef:
1092   case Builtin::BI__finitef:
1093   case Builtin::BIfinitel:
1094   case Builtin::BI__finitel:
1095   case Builtin::BI__builtin_isinf:
1096   case Builtin::BI__builtin_isfinite: {
1097     // isinf(x)    --> fabs(x) == infinity
1098     // isfinite(x) --> fabs(x) != infinity
1099     // x != NaN via the ordered compare in either case.
1100     Value *V = EmitScalarExpr(E->getArg(0));
1101     Value *Fabs = EmitFAbs(*this, V);
1102     Constant *Infinity = ConstantFP::getInfinity(V->getType());
1103     CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf)
1104                                   ? CmpInst::FCMP_OEQ
1105                                   : CmpInst::FCMP_ONE;
1106     Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf");
1107     return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType())));
1108   }
1109
1110   case Builtin::BI__builtin_isinf_sign: {
1111     // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
1112     Value *Arg = EmitScalarExpr(E->getArg(0));
1113     Value *AbsArg = EmitFAbs(*this, Arg);
1114     Value *IsInf = Builder.CreateFCmpOEQ(
1115         AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
1116     Value *IsNeg = EmitSignBit(*this, Arg);
1117
1118     llvm::Type *IntTy = ConvertType(E->getType());
1119     Value *Zero = Constant::getNullValue(IntTy);
1120     Value *One = ConstantInt::get(IntTy, 1);
1121     Value *NegativeOne = ConstantInt::get(IntTy, -1);
1122     Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
1123     Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
1124     return RValue::get(Result);
1125   }
1126
1127   case Builtin::BI__builtin_isnormal: {
1128     // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
1129     Value *V = EmitScalarExpr(E->getArg(0));
1130     Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
1131
1132     Value *Abs = EmitFAbs(*this, V);
1133     Value *IsLessThanInf =
1134       Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
1135     APFloat Smallest = APFloat::getSmallestNormalized(
1136                    getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
1137     Value *IsNormal =
1138       Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
1139                             "isnormal");
1140     V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
1141     V = Builder.CreateAnd(V, IsNormal, "and");
1142     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
1143   }
1144
1145   case Builtin::BI__builtin_fpclassify: {
1146     Value *V = EmitScalarExpr(E->getArg(5));
1147     llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
1148
1149     // Create Result
1150     BasicBlock *Begin = Builder.GetInsertBlock();
1151     BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
1152     Builder.SetInsertPoint(End);
1153     PHINode *Result =
1154       Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
1155                         "fpclassify_result");
1156
1157     // if (V==0) return FP_ZERO
1158     Builder.SetInsertPoint(Begin);
1159     Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
1160                                           "iszero");
1161     Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
1162     BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
1163     Builder.CreateCondBr(IsZero, End, NotZero);
1164     Result->addIncoming(ZeroLiteral, Begin);
1165
1166     // if (V != V) return FP_NAN
1167     Builder.SetInsertPoint(NotZero);
1168     Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
1169     Value *NanLiteral = EmitScalarExpr(E->getArg(0));
1170     BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
1171     Builder.CreateCondBr(IsNan, End, NotNan);
1172     Result->addIncoming(NanLiteral, NotZero);
1173
1174     // if (fabs(V) == infinity) return FP_INFINITY
1175     Builder.SetInsertPoint(NotNan);
1176     Value *VAbs = EmitFAbs(*this, V);
1177     Value *IsInf =
1178       Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
1179                             "isinf");
1180     Value *InfLiteral = EmitScalarExpr(E->getArg(1));
1181     BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
1182     Builder.CreateCondBr(IsInf, End, NotInf);
1183     Result->addIncoming(InfLiteral, NotNan);
1184
1185     // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
1186     Builder.SetInsertPoint(NotInf);
1187     APFloat Smallest = APFloat::getSmallestNormalized(
1188         getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
1189     Value *IsNormal =
1190       Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
1191                             "isnormal");
1192     Value *NormalResult =
1193       Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
1194                            EmitScalarExpr(E->getArg(3)));
1195     Builder.CreateBr(End);
1196     Result->addIncoming(NormalResult, NotInf);
1197
1198     // return Result
1199     Builder.SetInsertPoint(End);
1200     return RValue::get(Result);
1201   }
1202
1203   case Builtin::BIalloca:
1204   case Builtin::BI_alloca:
1205   case Builtin::BI__builtin_alloca: {
1206     Value *Size = EmitScalarExpr(E->getArg(0));
1207     const TargetInfo &TI = getContext().getTargetInfo();
1208     // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
1209     unsigned SuitableAlignmentInBytes =
1210         CGM.getContext()
1211             .toCharUnitsFromBits(TI.getSuitableAlign())
1212             .getQuantity();
1213     AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
1214     AI->setAlignment(SuitableAlignmentInBytes);
1215     return RValue::get(AI);
1216   }
1217
1218   case Builtin::BI__builtin_alloca_with_align: {
1219     Value *Size = EmitScalarExpr(E->getArg(0));
1220     Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
1221     auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
1222     unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
1223     unsigned AlignmentInBytes =
1224         CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity();
1225     AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
1226     AI->setAlignment(AlignmentInBytes);
1227     return RValue::get(AI);
1228   }
1229
1230   case Builtin::BIbzero:
1231   case Builtin::BI__builtin_bzero: {
1232     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1233     Value *SizeVal = EmitScalarExpr(E->getArg(1));
1234     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1235                         E->getArg(0)->getExprLoc(), FD, 0);
1236     Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
1237     return RValue::get(Dest.getPointer());
1238   }
1239   case Builtin::BImemcpy:
1240   case Builtin::BI__builtin_memcpy: {
1241     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1242     Address Src = EmitPointerWithAlignment(E->getArg(1));
1243     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1244     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1245                         E->getArg(0)->getExprLoc(), FD, 0);
1246     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1247                         E->getArg(1)->getExprLoc(), FD, 1);
1248     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1249     return RValue::get(Dest.getPointer());
1250   }
1251
1252   case Builtin::BI__builtin_char_memchr:
1253     BuiltinID = Builtin::BI__builtin_memchr;
1254     break;
1255
1256   case Builtin::BI__builtin___memcpy_chk: {
1257     // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
1258     llvm::APSInt Size, DstSize;
1259     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1260         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1261       break;
1262     if (Size.ugt(DstSize))
1263       break;
1264     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1265     Address Src = EmitPointerWithAlignment(E->getArg(1));
1266     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1267     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1268     return RValue::get(Dest.getPointer());
1269   }
1270
1271   case Builtin::BI__builtin_objc_memmove_collectable: {
1272     Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
1273     Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
1274     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1275     CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
1276                                                   DestAddr, SrcAddr, SizeVal);
1277     return RValue::get(DestAddr.getPointer());
1278   }
1279
1280   case Builtin::BI__builtin___memmove_chk: {
1281     // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
1282     llvm::APSInt Size, DstSize;
1283     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1284         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1285       break;
1286     if (Size.ugt(DstSize))
1287       break;
1288     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1289     Address Src = EmitPointerWithAlignment(E->getArg(1));
1290     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1291     Builder.CreateMemMove(Dest, Src, SizeVal, false);
1292     return RValue::get(Dest.getPointer());
1293   }
1294
1295   case Builtin::BImemmove:
1296   case Builtin::BI__builtin_memmove: {
1297     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1298     Address Src = EmitPointerWithAlignment(E->getArg(1));
1299     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1300     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1301                         E->getArg(0)->getExprLoc(), FD, 0);
1302     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1303                         E->getArg(1)->getExprLoc(), FD, 1);
1304     Builder.CreateMemMove(Dest, Src, SizeVal, false);
1305     return RValue::get(Dest.getPointer());
1306   }
1307   case Builtin::BImemset:
1308   case Builtin::BI__builtin_memset: {
1309     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1310     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1311                                          Builder.getInt8Ty());
1312     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1313     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1314                         E->getArg(0)->getExprLoc(), FD, 0);
1315     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1316     return RValue::get(Dest.getPointer());
1317   }
1318   case Builtin::BI__builtin___memset_chk: {
1319     // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
1320     llvm::APSInt Size, DstSize;
1321     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1322         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1323       break;
1324     if (Size.ugt(DstSize))
1325       break;
1326     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1327     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1328                                          Builder.getInt8Ty());
1329     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1330     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1331     return RValue::get(Dest.getPointer());
1332   }
1333   case Builtin::BI__builtin_dwarf_cfa: {
1334     // The offset in bytes from the first argument to the CFA.
1335     //
1336     // Why on earth is this in the frontend?  Is there any reason at
1337     // all that the backend can't reasonably determine this while
1338     // lowering llvm.eh.dwarf.cfa()?
1339     //
1340     // TODO: If there's a satisfactory reason, add a target hook for
1341     // this instead of hard-coding 0, which is correct for most targets.
1342     int32_t Offset = 0;
1343
1344     Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
1345     return RValue::get(Builder.CreateCall(F,
1346                                       llvm::ConstantInt::get(Int32Ty, Offset)));
1347   }
1348   case Builtin::BI__builtin_return_address: {
1349     Value *Depth =
1350         CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
1351     Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1352     return RValue::get(Builder.CreateCall(F, Depth));
1353   }
1354   case Builtin::BI_ReturnAddress: {
1355     Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1356     return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
1357   }
1358   case Builtin::BI__builtin_frame_address: {
1359     Value *Depth =
1360         CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
1361     Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
1362     return RValue::get(Builder.CreateCall(F, Depth));
1363   }
1364   case Builtin::BI__builtin_extract_return_addr: {
1365     Value *Address = EmitScalarExpr(E->getArg(0));
1366     Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
1367     return RValue::get(Result);
1368   }
1369   case Builtin::BI__builtin_frob_return_addr: {
1370     Value *Address = EmitScalarExpr(E->getArg(0));
1371     Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
1372     return RValue::get(Result);
1373   }
1374   case Builtin::BI__builtin_dwarf_sp_column: {
1375     llvm::IntegerType *Ty
1376       = cast<llvm::IntegerType>(ConvertType(E->getType()));
1377     int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
1378     if (Column == -1) {
1379       CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
1380       return RValue::get(llvm::UndefValue::get(Ty));
1381     }
1382     return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
1383   }
1384   case Builtin::BI__builtin_init_dwarf_reg_size_table: {
1385     Value *Address = EmitScalarExpr(E->getArg(0));
1386     if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
1387       CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
1388     return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
1389   }
1390   case Builtin::BI__builtin_eh_return: {
1391     Value *Int = EmitScalarExpr(E->getArg(0));
1392     Value *Ptr = EmitScalarExpr(E->getArg(1));
1393
1394     llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
1395     assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
1396            "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
1397     Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
1398                                   ? Intrinsic::eh_return_i32
1399                                   : Intrinsic::eh_return_i64);
1400     Builder.CreateCall(F, {Int, Ptr});
1401     Builder.CreateUnreachable();
1402
1403     // We do need to preserve an insertion point.
1404     EmitBlock(createBasicBlock("builtin_eh_return.cont"));
1405
1406     return RValue::get(nullptr);
1407   }
1408   case Builtin::BI__builtin_unwind_init: {
1409     Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
1410     return RValue::get(Builder.CreateCall(F));
1411   }
1412   case Builtin::BI__builtin_extend_pointer: {
1413     // Extends a pointer to the size of an _Unwind_Word, which is
1414     // uint64_t on all platforms.  Generally this gets poked into a
1415     // register and eventually used as an address, so if the
1416     // addressing registers are wider than pointers and the platform
1417     // doesn't implicitly ignore high-order bits when doing
1418     // addressing, we need to make sure we zext / sext based on
1419     // the platform's expectations.
1420     //
1421     // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
1422
1423     // Cast the pointer to intptr_t.
1424     Value *Ptr = EmitScalarExpr(E->getArg(0));
1425     Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
1426
1427     // If that's 64 bits, we're done.
1428     if (IntPtrTy->getBitWidth() == 64)
1429       return RValue::get(Result);
1430
1431     // Otherwise, ask the codegen data what to do.
1432     if (getTargetHooks().extendPointerWithSExt())
1433       return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
1434     else
1435       return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
1436   }
1437   case Builtin::BI__builtin_setjmp: {
1438     // Buffer is a void**.
1439     Address Buf = EmitPointerWithAlignment(E->getArg(0));
1440
1441     // Store the frame pointer to the setjmp buffer.
1442     Value *FrameAddr =
1443       Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
1444                          ConstantInt::get(Int32Ty, 0));
1445     Builder.CreateStore(FrameAddr, Buf);
1446
1447     // Store the stack pointer to the setjmp buffer.
1448     Value *StackAddr =
1449         Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
1450     Address StackSaveSlot =
1451       Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize());
1452     Builder.CreateStore(StackAddr, StackSaveSlot);
1453
1454     // Call LLVM's EH setjmp, which is lightweight.
1455     Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
1456     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1457     return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
1458   }
1459   case Builtin::BI__builtin_longjmp: {
1460     Value *Buf = EmitScalarExpr(E->getArg(0));
1461     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1462
1463     // Call LLVM's EH longjmp, which is lightweight.
1464     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
1465
1466     // longjmp doesn't return; mark this as unreachable.
1467     Builder.CreateUnreachable();
1468
1469     // We do need to preserve an insertion point.
1470     EmitBlock(createBasicBlock("longjmp.cont"));
1471
1472     return RValue::get(nullptr);
1473   }
1474   case Builtin::BI__sync_fetch_and_add:
1475   case Builtin::BI__sync_fetch_and_sub:
1476   case Builtin::BI__sync_fetch_and_or:
1477   case Builtin::BI__sync_fetch_and_and:
1478   case Builtin::BI__sync_fetch_and_xor:
1479   case Builtin::BI__sync_fetch_and_nand:
1480   case Builtin::BI__sync_add_and_fetch:
1481   case Builtin::BI__sync_sub_and_fetch:
1482   case Builtin::BI__sync_and_and_fetch:
1483   case Builtin::BI__sync_or_and_fetch:
1484   case Builtin::BI__sync_xor_and_fetch:
1485   case Builtin::BI__sync_nand_and_fetch:
1486   case Builtin::BI__sync_val_compare_and_swap:
1487   case Builtin::BI__sync_bool_compare_and_swap:
1488   case Builtin::BI__sync_lock_test_and_set:
1489   case Builtin::BI__sync_lock_release:
1490   case Builtin::BI__sync_swap:
1491     llvm_unreachable("Shouldn't make it through sema");
1492   case Builtin::BI__sync_fetch_and_add_1:
1493   case Builtin::BI__sync_fetch_and_add_2:
1494   case Builtin::BI__sync_fetch_and_add_4:
1495   case Builtin::BI__sync_fetch_and_add_8:
1496   case Builtin::BI__sync_fetch_and_add_16:
1497     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
1498   case Builtin::BI__sync_fetch_and_sub_1:
1499   case Builtin::BI__sync_fetch_and_sub_2:
1500   case Builtin::BI__sync_fetch_and_sub_4:
1501   case Builtin::BI__sync_fetch_and_sub_8:
1502   case Builtin::BI__sync_fetch_and_sub_16:
1503     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
1504   case Builtin::BI__sync_fetch_and_or_1:
1505   case Builtin::BI__sync_fetch_and_or_2:
1506   case Builtin::BI__sync_fetch_and_or_4:
1507   case Builtin::BI__sync_fetch_and_or_8:
1508   case Builtin::BI__sync_fetch_and_or_16:
1509     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
1510   case Builtin::BI__sync_fetch_and_and_1:
1511   case Builtin::BI__sync_fetch_and_and_2:
1512   case Builtin::BI__sync_fetch_and_and_4:
1513   case Builtin::BI__sync_fetch_and_and_8:
1514   case Builtin::BI__sync_fetch_and_and_16:
1515     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
1516   case Builtin::BI__sync_fetch_and_xor_1:
1517   case Builtin::BI__sync_fetch_and_xor_2:
1518   case Builtin::BI__sync_fetch_and_xor_4:
1519   case Builtin::BI__sync_fetch_and_xor_8:
1520   case Builtin::BI__sync_fetch_and_xor_16:
1521     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
1522   case Builtin::BI__sync_fetch_and_nand_1:
1523   case Builtin::BI__sync_fetch_and_nand_2:
1524   case Builtin::BI__sync_fetch_and_nand_4:
1525   case Builtin::BI__sync_fetch_and_nand_8:
1526   case Builtin::BI__sync_fetch_and_nand_16:
1527     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
1528
1529   // Clang extensions: not overloaded yet.
1530   case Builtin::BI__sync_fetch_and_min:
1531     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
1532   case Builtin::BI__sync_fetch_and_max:
1533     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
1534   case Builtin::BI__sync_fetch_and_umin:
1535     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
1536   case Builtin::BI__sync_fetch_and_umax:
1537     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
1538
1539   case Builtin::BI__sync_add_and_fetch_1:
1540   case Builtin::BI__sync_add_and_fetch_2:
1541   case Builtin::BI__sync_add_and_fetch_4:
1542   case Builtin::BI__sync_add_and_fetch_8:
1543   case Builtin::BI__sync_add_and_fetch_16:
1544     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
1545                                 llvm::Instruction::Add);
1546   case Builtin::BI__sync_sub_and_fetch_1:
1547   case Builtin::BI__sync_sub_and_fetch_2:
1548   case Builtin::BI__sync_sub_and_fetch_4:
1549   case Builtin::BI__sync_sub_and_fetch_8:
1550   case Builtin::BI__sync_sub_and_fetch_16:
1551     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
1552                                 llvm::Instruction::Sub);
1553   case Builtin::BI__sync_and_and_fetch_1:
1554   case Builtin::BI__sync_and_and_fetch_2:
1555   case Builtin::BI__sync_and_and_fetch_4:
1556   case Builtin::BI__sync_and_and_fetch_8:
1557   case Builtin::BI__sync_and_and_fetch_16:
1558     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
1559                                 llvm::Instruction::And);
1560   case Builtin::BI__sync_or_and_fetch_1:
1561   case Builtin::BI__sync_or_and_fetch_2:
1562   case Builtin::BI__sync_or_and_fetch_4:
1563   case Builtin::BI__sync_or_and_fetch_8:
1564   case Builtin::BI__sync_or_and_fetch_16:
1565     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
1566                                 llvm::Instruction::Or);
1567   case Builtin::BI__sync_xor_and_fetch_1:
1568   case Builtin::BI__sync_xor_and_fetch_2:
1569   case Builtin::BI__sync_xor_and_fetch_4:
1570   case Builtin::BI__sync_xor_and_fetch_8:
1571   case Builtin::BI__sync_xor_and_fetch_16:
1572     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
1573                                 llvm::Instruction::Xor);
1574   case Builtin::BI__sync_nand_and_fetch_1:
1575   case Builtin::BI__sync_nand_and_fetch_2:
1576   case Builtin::BI__sync_nand_and_fetch_4:
1577   case Builtin::BI__sync_nand_and_fetch_8:
1578   case Builtin::BI__sync_nand_and_fetch_16:
1579     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
1580                                 llvm::Instruction::And, true);
1581
1582   case Builtin::BI__sync_val_compare_and_swap_1:
1583   case Builtin::BI__sync_val_compare_and_swap_2:
1584   case Builtin::BI__sync_val_compare_and_swap_4:
1585   case Builtin::BI__sync_val_compare_and_swap_8:
1586   case Builtin::BI__sync_val_compare_and_swap_16:
1587     return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
1588
1589   case Builtin::BI__sync_bool_compare_and_swap_1:
1590   case Builtin::BI__sync_bool_compare_and_swap_2:
1591   case Builtin::BI__sync_bool_compare_and_swap_4:
1592   case Builtin::BI__sync_bool_compare_and_swap_8:
1593   case Builtin::BI__sync_bool_compare_and_swap_16:
1594     return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
1595
1596   case Builtin::BI__sync_swap_1:
1597   case Builtin::BI__sync_swap_2:
1598   case Builtin::BI__sync_swap_4:
1599   case Builtin::BI__sync_swap_8:
1600   case Builtin::BI__sync_swap_16:
1601     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1602
1603   case Builtin::BI__sync_lock_test_and_set_1:
1604   case Builtin::BI__sync_lock_test_and_set_2:
1605   case Builtin::BI__sync_lock_test_and_set_4:
1606   case Builtin::BI__sync_lock_test_and_set_8:
1607   case Builtin::BI__sync_lock_test_and_set_16:
1608     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1609
1610   case Builtin::BI__sync_lock_release_1:
1611   case Builtin::BI__sync_lock_release_2:
1612   case Builtin::BI__sync_lock_release_4:
1613   case Builtin::BI__sync_lock_release_8:
1614   case Builtin::BI__sync_lock_release_16: {
1615     Value *Ptr = EmitScalarExpr(E->getArg(0));
1616     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
1617     CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
1618     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
1619                                              StoreSize.getQuantity() * 8);
1620     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
1621     llvm::StoreInst *Store =
1622       Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
1623                                  StoreSize);
1624     Store->setAtomic(llvm::AtomicOrdering::Release);
1625     return RValue::get(nullptr);
1626   }
1627
1628   case Builtin::BI__sync_synchronize: {
1629     // We assume this is supposed to correspond to a C++0x-style
1630     // sequentially-consistent fence (i.e. this is only usable for
1631     // synchonization, not device I/O or anything like that). This intrinsic
1632     // is really badly designed in the sense that in theory, there isn't
1633     // any way to safely use it... but in practice, it mostly works
1634     // to use it with non-atomic loads and stores to get acquire/release
1635     // semantics.
1636     Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
1637     return RValue::get(nullptr);
1638   }
1639
1640   case Builtin::BI__builtin_nontemporal_load:
1641     return RValue::get(EmitNontemporalLoad(*this, E));
1642   case Builtin::BI__builtin_nontemporal_store:
1643     return RValue::get(EmitNontemporalStore(*this, E));
1644   case Builtin::BI__c11_atomic_is_lock_free:
1645   case Builtin::BI__atomic_is_lock_free: {
1646     // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
1647     // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
1648     // _Atomic(T) is always properly-aligned.
1649     const char *LibCallName = "__atomic_is_lock_free";
1650     CallArgList Args;
1651     Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
1652              getContext().getSizeType());
1653     if (BuiltinID == Builtin::BI__atomic_is_lock_free)
1654       Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
1655                getContext().VoidPtrTy);
1656     else
1657       Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
1658                getContext().VoidPtrTy);
1659     const CGFunctionInfo &FuncInfo =
1660         CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
1661     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
1662     llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
1663     return EmitCall(FuncInfo, CGCallee::forDirect(Func),
1664                     ReturnValueSlot(), Args);
1665   }
1666
1667   case Builtin::BI__atomic_test_and_set: {
1668     // Look at the argument type to determine whether this is a volatile
1669     // operation. The parameter type is always volatile.
1670     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1671     bool Volatile =
1672         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1673
1674     Value *Ptr = EmitScalarExpr(E->getArg(0));
1675     unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
1676     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1677     Value *NewVal = Builder.getInt8(1);
1678     Value *Order = EmitScalarExpr(E->getArg(1));
1679     if (isa<llvm::ConstantInt>(Order)) {
1680       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1681       AtomicRMWInst *Result = nullptr;
1682       switch (ord) {
1683       case 0:  // memory_order_relaxed
1684       default: // invalid order
1685         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1686                                          llvm::AtomicOrdering::Monotonic);
1687         break;
1688       case 1: // memory_order_consume
1689       case 2: // memory_order_acquire
1690         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1691                                          llvm::AtomicOrdering::Acquire);
1692         break;
1693       case 3: // memory_order_release
1694         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1695                                          llvm::AtomicOrdering::Release);
1696         break;
1697       case 4: // memory_order_acq_rel
1698
1699         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1700                                          llvm::AtomicOrdering::AcquireRelease);
1701         break;
1702       case 5: // memory_order_seq_cst
1703         Result = Builder.CreateAtomicRMW(
1704             llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1705             llvm::AtomicOrdering::SequentiallyConsistent);
1706         break;
1707       }
1708       Result->setVolatile(Volatile);
1709       return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1710     }
1711
1712     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1713
1714     llvm::BasicBlock *BBs[5] = {
1715       createBasicBlock("monotonic", CurFn),
1716       createBasicBlock("acquire", CurFn),
1717       createBasicBlock("release", CurFn),
1718       createBasicBlock("acqrel", CurFn),
1719       createBasicBlock("seqcst", CurFn)
1720     };
1721     llvm::AtomicOrdering Orders[5] = {
1722         llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
1723         llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
1724         llvm::AtomicOrdering::SequentiallyConsistent};
1725
1726     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1727     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1728
1729     Builder.SetInsertPoint(ContBB);
1730     PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
1731
1732     for (unsigned i = 0; i < 5; ++i) {
1733       Builder.SetInsertPoint(BBs[i]);
1734       AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1735                                                    Ptr, NewVal, Orders[i]);
1736       RMW->setVolatile(Volatile);
1737       Result->addIncoming(RMW, BBs[i]);
1738       Builder.CreateBr(ContBB);
1739     }
1740
1741     SI->addCase(Builder.getInt32(0), BBs[0]);
1742     SI->addCase(Builder.getInt32(1), BBs[1]);
1743     SI->addCase(Builder.getInt32(2), BBs[1]);
1744     SI->addCase(Builder.getInt32(3), BBs[2]);
1745     SI->addCase(Builder.getInt32(4), BBs[3]);
1746     SI->addCase(Builder.getInt32(5), BBs[4]);
1747
1748     Builder.SetInsertPoint(ContBB);
1749     return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1750   }
1751
1752   case Builtin::BI__atomic_clear: {
1753     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1754     bool Volatile =
1755         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1756
1757     Address Ptr = EmitPointerWithAlignment(E->getArg(0));
1758     unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace();
1759     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1760     Value *NewVal = Builder.getInt8(0);
1761     Value *Order = EmitScalarExpr(E->getArg(1));
1762     if (isa<llvm::ConstantInt>(Order)) {
1763       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1764       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1765       switch (ord) {
1766       case 0:  // memory_order_relaxed
1767       default: // invalid order
1768         Store->setOrdering(llvm::AtomicOrdering::Monotonic);
1769         break;
1770       case 3:  // memory_order_release
1771         Store->setOrdering(llvm::AtomicOrdering::Release);
1772         break;
1773       case 5:  // memory_order_seq_cst
1774         Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
1775         break;
1776       }
1777       return RValue::get(nullptr);
1778     }
1779
1780     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1781
1782     llvm::BasicBlock *BBs[3] = {
1783       createBasicBlock("monotonic", CurFn),
1784       createBasicBlock("release", CurFn),
1785       createBasicBlock("seqcst", CurFn)
1786     };
1787     llvm::AtomicOrdering Orders[3] = {
1788         llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
1789         llvm::AtomicOrdering::SequentiallyConsistent};
1790
1791     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1792     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1793
1794     for (unsigned i = 0; i < 3; ++i) {
1795       Builder.SetInsertPoint(BBs[i]);
1796       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1797       Store->setOrdering(Orders[i]);
1798       Builder.CreateBr(ContBB);
1799     }
1800
1801     SI->addCase(Builder.getInt32(0), BBs[0]);
1802     SI->addCase(Builder.getInt32(3), BBs[1]);
1803     SI->addCase(Builder.getInt32(5), BBs[2]);
1804
1805     Builder.SetInsertPoint(ContBB);
1806     return RValue::get(nullptr);
1807   }
1808
1809   case Builtin::BI__atomic_thread_fence:
1810   case Builtin::BI__atomic_signal_fence:
1811   case Builtin::BI__c11_atomic_thread_fence:
1812   case Builtin::BI__c11_atomic_signal_fence: {
1813     llvm::SynchronizationScope Scope;
1814     if (BuiltinID == Builtin::BI__atomic_signal_fence ||
1815         BuiltinID == Builtin::BI__c11_atomic_signal_fence)
1816       Scope = llvm::SingleThread;
1817     else
1818       Scope = llvm::CrossThread;
1819     Value *Order = EmitScalarExpr(E->getArg(0));
1820     if (isa<llvm::ConstantInt>(Order)) {
1821       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1822       switch (ord) {
1823       case 0:  // memory_order_relaxed
1824       default: // invalid order
1825         break;
1826       case 1:  // memory_order_consume
1827       case 2:  // memory_order_acquire
1828         Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope);
1829         break;
1830       case 3:  // memory_order_release
1831         Builder.CreateFence(llvm::AtomicOrdering::Release, Scope);
1832         break;
1833       case 4:  // memory_order_acq_rel
1834         Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope);
1835         break;
1836       case 5:  // memory_order_seq_cst
1837         Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
1838                             Scope);
1839         break;
1840       }
1841       return RValue::get(nullptr);
1842     }
1843
1844     llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
1845     AcquireBB = createBasicBlock("acquire", CurFn);
1846     ReleaseBB = createBasicBlock("release", CurFn);
1847     AcqRelBB = createBasicBlock("acqrel", CurFn);
1848     SeqCstBB = createBasicBlock("seqcst", CurFn);
1849     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1850
1851     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1852     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
1853
1854     Builder.SetInsertPoint(AcquireBB);
1855     Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope);
1856     Builder.CreateBr(ContBB);
1857     SI->addCase(Builder.getInt32(1), AcquireBB);
1858     SI->addCase(Builder.getInt32(2), AcquireBB);
1859
1860     Builder.SetInsertPoint(ReleaseBB);
1861     Builder.CreateFence(llvm::AtomicOrdering::Release, Scope);
1862     Builder.CreateBr(ContBB);
1863     SI->addCase(Builder.getInt32(3), ReleaseBB);
1864
1865     Builder.SetInsertPoint(AcqRelBB);
1866     Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope);
1867     Builder.CreateBr(ContBB);
1868     SI->addCase(Builder.getInt32(4), AcqRelBB);
1869
1870     Builder.SetInsertPoint(SeqCstBB);
1871     Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, Scope);
1872     Builder.CreateBr(ContBB);
1873     SI->addCase(Builder.getInt32(5), SeqCstBB);
1874
1875     Builder.SetInsertPoint(ContBB);
1876     return RValue::get(nullptr);
1877   }
1878
1879     // Library functions with special handling.
1880   case Builtin::BIsqrt:
1881   case Builtin::BIsqrtf:
1882   case Builtin::BIsqrtl: {
1883     // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only
1884     // in finite- or unsafe-math mode (the intrinsic has different semantics
1885     // for handling negative numbers compared to the library function, so
1886     // -fmath-errno=0 is not enough).
1887     if (!FD->hasAttr<ConstAttr>())
1888       break;
1889     if (!(CGM.getCodeGenOpts().UnsafeFPMath ||
1890           CGM.getCodeGenOpts().NoNaNsFPMath))
1891       break;
1892     Value *Arg0 = EmitScalarExpr(E->getArg(0));
1893     llvm::Type *ArgType = Arg0->getType();
1894     Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType);
1895     return RValue::get(Builder.CreateCall(F, Arg0));
1896   }
1897
1898   case Builtin::BI__builtin_pow:
1899   case Builtin::BI__builtin_powf:
1900   case Builtin::BI__builtin_powl:
1901   case Builtin::BIpow:
1902   case Builtin::BIpowf:
1903   case Builtin::BIpowl: {
1904     // Transform a call to pow* into a @llvm.pow.* intrinsic call.
1905     if (!FD->hasAttr<ConstAttr>())
1906       break;
1907     Value *Base = EmitScalarExpr(E->getArg(0));
1908     Value *Exponent = EmitScalarExpr(E->getArg(1));
1909     llvm::Type *ArgType = Base->getType();
1910     Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
1911     return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
1912   }
1913
1914   case Builtin::BIfma:
1915   case Builtin::BIfmaf:
1916   case Builtin::BIfmal:
1917   case Builtin::BI__builtin_fma:
1918   case Builtin::BI__builtin_fmaf:
1919   case Builtin::BI__builtin_fmal: {
1920     // Rewrite fma to intrinsic.
1921     Value *FirstArg = EmitScalarExpr(E->getArg(0));
1922     llvm::Type *ArgType = FirstArg->getType();
1923     Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType);
1924     return RValue::get(
1925         Builder.CreateCall(F, {FirstArg, EmitScalarExpr(E->getArg(1)),
1926                                EmitScalarExpr(E->getArg(2))}));
1927   }
1928
1929   case Builtin::BI__builtin_signbit:
1930   case Builtin::BI__builtin_signbitf:
1931   case Builtin::BI__builtin_signbitl: {
1932     return RValue::get(
1933         Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
1934                            ConvertType(E->getType())));
1935   }
1936   case Builtin::BI__builtin_annotation: {
1937     llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
1938     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
1939                                       AnnVal->getType());
1940
1941     // Get the annotation string, go through casts. Sema requires this to be a
1942     // non-wide string literal, potentially casted, so the cast<> is safe.
1943     const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
1944     StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
1945     return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
1946   }
1947   case Builtin::BI__builtin_addcb:
1948   case Builtin::BI__builtin_addcs:
1949   case Builtin::BI__builtin_addc:
1950   case Builtin::BI__builtin_addcl:
1951   case Builtin::BI__builtin_addcll:
1952   case Builtin::BI__builtin_subcb:
1953   case Builtin::BI__builtin_subcs:
1954   case Builtin::BI__builtin_subc:
1955   case Builtin::BI__builtin_subcl:
1956   case Builtin::BI__builtin_subcll: {
1957
1958     // We translate all of these builtins from expressions of the form:
1959     //   int x = ..., y = ..., carryin = ..., carryout, result;
1960     //   result = __builtin_addc(x, y, carryin, &carryout);
1961     //
1962     // to LLVM IR of the form:
1963     //
1964     //   %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
1965     //   %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
1966     //   %carry1 = extractvalue {i32, i1} %tmp1, 1
1967     //   %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
1968     //                                                       i32 %carryin)
1969     //   %result = extractvalue {i32, i1} %tmp2, 0
1970     //   %carry2 = extractvalue {i32, i1} %tmp2, 1
1971     //   %tmp3 = or i1 %carry1, %carry2
1972     //   %tmp4 = zext i1 %tmp3 to i32
1973     //   store i32 %tmp4, i32* %carryout
1974
1975     // Scalarize our inputs.
1976     llvm::Value *X = EmitScalarExpr(E->getArg(0));
1977     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
1978     llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
1979     Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
1980
1981     // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
1982     llvm::Intrinsic::ID IntrinsicId;
1983     switch (BuiltinID) {
1984     default: llvm_unreachable("Unknown multiprecision builtin id.");
1985     case Builtin::BI__builtin_addcb:
1986     case Builtin::BI__builtin_addcs:
1987     case Builtin::BI__builtin_addc:
1988     case Builtin::BI__builtin_addcl:
1989     case Builtin::BI__builtin_addcll:
1990       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
1991       break;
1992     case Builtin::BI__builtin_subcb:
1993     case Builtin::BI__builtin_subcs:
1994     case Builtin::BI__builtin_subc:
1995     case Builtin::BI__builtin_subcl:
1996     case Builtin::BI__builtin_subcll:
1997       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
1998       break;
1999     }
2000
2001     // Construct our resulting LLVM IR expression.
2002     llvm::Value *Carry1;
2003     llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
2004                                               X, Y, Carry1);
2005     llvm::Value *Carry2;
2006     llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
2007                                               Sum1, Carryin, Carry2);
2008     llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
2009                                                X->getType());
2010     Builder.CreateStore(CarryOut, CarryOutPtr);
2011     return RValue::get(Sum2);
2012   }
2013
2014   case Builtin::BI__builtin_add_overflow:
2015   case Builtin::BI__builtin_sub_overflow:
2016   case Builtin::BI__builtin_mul_overflow: {
2017     const clang::Expr *LeftArg = E->getArg(0);
2018     const clang::Expr *RightArg = E->getArg(1);
2019     const clang::Expr *ResultArg = E->getArg(2);
2020
2021     clang::QualType ResultQTy =
2022         ResultArg->getType()->castAs<PointerType>()->getPointeeType();
2023
2024     WidthAndSignedness LeftInfo =
2025         getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
2026     WidthAndSignedness RightInfo =
2027         getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
2028     WidthAndSignedness ResultInfo =
2029         getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
2030     WidthAndSignedness EncompassingInfo =
2031         EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
2032
2033     llvm::Type *EncompassingLLVMTy =
2034         llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
2035
2036     llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
2037
2038     llvm::Intrinsic::ID IntrinsicId;
2039     switch (BuiltinID) {
2040     default:
2041       llvm_unreachable("Unknown overflow builtin id.");
2042     case Builtin::BI__builtin_add_overflow:
2043       IntrinsicId = EncompassingInfo.Signed
2044                         ? llvm::Intrinsic::sadd_with_overflow
2045                         : llvm::Intrinsic::uadd_with_overflow;
2046       break;
2047     case Builtin::BI__builtin_sub_overflow:
2048       IntrinsicId = EncompassingInfo.Signed
2049                         ? llvm::Intrinsic::ssub_with_overflow
2050                         : llvm::Intrinsic::usub_with_overflow;
2051       break;
2052     case Builtin::BI__builtin_mul_overflow:
2053       IntrinsicId = EncompassingInfo.Signed
2054                         ? llvm::Intrinsic::smul_with_overflow
2055                         : llvm::Intrinsic::umul_with_overflow;
2056       break;
2057     }
2058
2059     llvm::Value *Left = EmitScalarExpr(LeftArg);
2060     llvm::Value *Right = EmitScalarExpr(RightArg);
2061     Address ResultPtr = EmitPointerWithAlignment(ResultArg);
2062
2063     // Extend each operand to the encompassing type.
2064     Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
2065     Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
2066
2067     // Perform the operation on the extended values.
2068     llvm::Value *Overflow, *Result;
2069     Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
2070
2071     if (EncompassingInfo.Width > ResultInfo.Width) {
2072       // The encompassing type is wider than the result type, so we need to
2073       // truncate it.
2074       llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
2075
2076       // To see if the truncation caused an overflow, we will extend
2077       // the result and then compare it to the original result.
2078       llvm::Value *ResultTruncExt = Builder.CreateIntCast(
2079           ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
2080       llvm::Value *TruncationOverflow =
2081           Builder.CreateICmpNE(Result, ResultTruncExt);
2082
2083       Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
2084       Result = ResultTrunc;
2085     }
2086
2087     // Finally, store the result using the pointer.
2088     bool isVolatile =
2089       ResultArg->getType()->getPointeeType().isVolatileQualified();
2090     Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
2091
2092     return RValue::get(Overflow);
2093   }
2094
2095   case Builtin::BI__builtin_uadd_overflow:
2096   case Builtin::BI__builtin_uaddl_overflow:
2097   case Builtin::BI__builtin_uaddll_overflow:
2098   case Builtin::BI__builtin_usub_overflow:
2099   case Builtin::BI__builtin_usubl_overflow:
2100   case Builtin::BI__builtin_usubll_overflow:
2101   case Builtin::BI__builtin_umul_overflow:
2102   case Builtin::BI__builtin_umull_overflow:
2103   case Builtin::BI__builtin_umulll_overflow:
2104   case Builtin::BI__builtin_sadd_overflow:
2105   case Builtin::BI__builtin_saddl_overflow:
2106   case Builtin::BI__builtin_saddll_overflow:
2107   case Builtin::BI__builtin_ssub_overflow:
2108   case Builtin::BI__builtin_ssubl_overflow:
2109   case Builtin::BI__builtin_ssubll_overflow:
2110   case Builtin::BI__builtin_smul_overflow:
2111   case Builtin::BI__builtin_smull_overflow:
2112   case Builtin::BI__builtin_smulll_overflow: {
2113
2114     // We translate all of these builtins directly to the relevant llvm IR node.
2115
2116     // Scalarize our inputs.
2117     llvm::Value *X = EmitScalarExpr(E->getArg(0));
2118     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
2119     Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
2120
2121     // Decide which of the overflow intrinsics we are lowering to:
2122     llvm::Intrinsic::ID IntrinsicId;
2123     switch (BuiltinID) {
2124     default: llvm_unreachable("Unknown overflow builtin id.");
2125     case Builtin::BI__builtin_uadd_overflow:
2126     case Builtin::BI__builtin_uaddl_overflow:
2127     case Builtin::BI__builtin_uaddll_overflow:
2128       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
2129       break;
2130     case Builtin::BI__builtin_usub_overflow:
2131     case Builtin::BI__builtin_usubl_overflow:
2132     case Builtin::BI__builtin_usubll_overflow:
2133       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
2134       break;
2135     case Builtin::BI__builtin_umul_overflow:
2136     case Builtin::BI__builtin_umull_overflow:
2137     case Builtin::BI__builtin_umulll_overflow:
2138       IntrinsicId = llvm::Intrinsic::umul_with_overflow;
2139       break;
2140     case Builtin::BI__builtin_sadd_overflow:
2141     case Builtin::BI__builtin_saddl_overflow:
2142     case Builtin::BI__builtin_saddll_overflow:
2143       IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
2144       break;
2145     case Builtin::BI__builtin_ssub_overflow:
2146     case Builtin::BI__builtin_ssubl_overflow:
2147     case Builtin::BI__builtin_ssubll_overflow:
2148       IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
2149       break;
2150     case Builtin::BI__builtin_smul_overflow:
2151     case Builtin::BI__builtin_smull_overflow:
2152     case Builtin::BI__builtin_smulll_overflow:
2153       IntrinsicId = llvm::Intrinsic::smul_with_overflow;
2154       break;
2155     }
2156
2157
2158     llvm::Value *Carry;
2159     llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
2160     Builder.CreateStore(Sum, SumOutPtr);
2161
2162     return RValue::get(Carry);
2163   }
2164   case Builtin::BI__builtin_addressof:
2165     return RValue::get(EmitLValue(E->getArg(0)).getPointer());
2166   case Builtin::BI__builtin_operator_new:
2167     return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
2168                                     E->getArg(0), false);
2169   case Builtin::BI__builtin_operator_delete:
2170     return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
2171                                     E->getArg(0), true);
2172   case Builtin::BI__noop:
2173     // __noop always evaluates to an integer literal zero.
2174     return RValue::get(ConstantInt::get(IntTy, 0));
2175   case Builtin::BI__builtin_call_with_static_chain: {
2176     const CallExpr *Call = cast<CallExpr>(E->getArg(0));
2177     const Expr *Chain = E->getArg(1);
2178     return EmitCall(Call->getCallee()->getType(),
2179                     EmitCallee(Call->getCallee()), Call, ReturnValue,
2180                     EmitScalarExpr(Chain));
2181   }
2182   case Builtin::BI_InterlockedExchange8:
2183   case Builtin::BI_InterlockedExchange16:
2184   case Builtin::BI_InterlockedExchange:
2185   case Builtin::BI_InterlockedExchangePointer:
2186     return RValue::get(
2187         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
2188   case Builtin::BI_InterlockedCompareExchangePointer: {
2189     llvm::Type *RTy;
2190     llvm::IntegerType *IntType =
2191       IntegerType::get(getLLVMContext(),
2192                        getContext().getTypeSize(E->getType()));
2193     llvm::Type *IntPtrType = IntType->getPointerTo();
2194
2195     llvm::Value *Destination =
2196       Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType);
2197
2198     llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
2199     RTy = Exchange->getType();
2200     Exchange = Builder.CreatePtrToInt(Exchange, IntType);
2201
2202     llvm::Value *Comparand =
2203       Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
2204
2205     auto Result =
2206         Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
2207                                     AtomicOrdering::SequentiallyConsistent,
2208                                     AtomicOrdering::SequentiallyConsistent);
2209     Result->setVolatile(true);
2210
2211     return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
2212                                                                          0),
2213                                               RTy));
2214   }
2215   case Builtin::BI_InterlockedCompareExchange8:
2216   case Builtin::BI_InterlockedCompareExchange16:
2217   case Builtin::BI_InterlockedCompareExchange:
2218   case Builtin::BI_InterlockedCompareExchange64: {
2219     AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg(
2220         EmitScalarExpr(E->getArg(0)),
2221         EmitScalarExpr(E->getArg(2)),
2222         EmitScalarExpr(E->getArg(1)),
2223         AtomicOrdering::SequentiallyConsistent,
2224         AtomicOrdering::SequentiallyConsistent);
2225       CXI->setVolatile(true);
2226       return RValue::get(Builder.CreateExtractValue(CXI, 0));
2227   }
2228   case Builtin::BI_InterlockedIncrement16:
2229   case Builtin::BI_InterlockedIncrement:
2230     return RValue::get(
2231         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
2232   case Builtin::BI_InterlockedDecrement16:
2233   case Builtin::BI_InterlockedDecrement:
2234     return RValue::get(
2235         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
2236   case Builtin::BI_InterlockedAnd8:
2237   case Builtin::BI_InterlockedAnd16:
2238   case Builtin::BI_InterlockedAnd:
2239     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
2240   case Builtin::BI_InterlockedExchangeAdd8:
2241   case Builtin::BI_InterlockedExchangeAdd16:
2242   case Builtin::BI_InterlockedExchangeAdd:
2243     return RValue::get(
2244         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
2245   case Builtin::BI_InterlockedExchangeSub8:
2246   case Builtin::BI_InterlockedExchangeSub16:
2247   case Builtin::BI_InterlockedExchangeSub:
2248     return RValue::get(
2249         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
2250   case Builtin::BI_InterlockedOr8:
2251   case Builtin::BI_InterlockedOr16:
2252   case Builtin::BI_InterlockedOr:
2253     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
2254   case Builtin::BI_InterlockedXor8:
2255   case Builtin::BI_InterlockedXor16:
2256   case Builtin::BI_InterlockedXor:
2257     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
2258   case Builtin::BI_interlockedbittestandset:
2259     return RValue::get(
2260         EmitMSVCBuiltinExpr(MSVCIntrin::_interlockedbittestandset, E));
2261
2262   case Builtin::BI__exception_code:
2263   case Builtin::BI_exception_code:
2264     return RValue::get(EmitSEHExceptionCode());
2265   case Builtin::BI__exception_info:
2266   case Builtin::BI_exception_info:
2267     return RValue::get(EmitSEHExceptionInfo());
2268   case Builtin::BI__abnormal_termination:
2269   case Builtin::BI_abnormal_termination:
2270     return RValue::get(EmitSEHAbnormalTermination());
2271   case Builtin::BI_setjmpex: {
2272     if (getTarget().getTriple().isOSMSVCRT()) {
2273       llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2274       llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
2275           getLLVMContext(), llvm::AttributeList::FunctionIndex,
2276           llvm::Attribute::ReturnsTwice);
2277       llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction(
2278           llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2279           "_setjmpex", ReturnsTwiceAttr, /*Local=*/true);
2280       llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2281           EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2282       llvm::Value *FrameAddr =
2283           Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2284                              ConstantInt::get(Int32Ty, 0));
2285       llvm::Value *Args[] = {Buf, FrameAddr};
2286       llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args);
2287       CS.setAttributes(ReturnsTwiceAttr);
2288       return RValue::get(CS.getInstruction());
2289     }
2290     break;
2291   }
2292   case Builtin::BI_setjmp: {
2293     if (getTarget().getTriple().isOSMSVCRT()) {
2294       llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
2295           getLLVMContext(), llvm::AttributeList::FunctionIndex,
2296           llvm::Attribute::ReturnsTwice);
2297       llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2298           EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2299       llvm::CallSite CS;
2300       if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
2301         llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy};
2302         llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction(
2303             llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true),
2304             "_setjmp3", ReturnsTwiceAttr, /*Local=*/true);
2305         llvm::Value *Count = ConstantInt::get(IntTy, 0);
2306         llvm::Value *Args[] = {Buf, Count};
2307         CS = EmitRuntimeCallOrInvoke(SetJmp3, Args);
2308       } else {
2309         llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2310         llvm::Constant *SetJmp = CGM.CreateRuntimeFunction(
2311             llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2312             "_setjmp", ReturnsTwiceAttr, /*Local=*/true);
2313         llvm::Value *FrameAddr =
2314             Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2315                                ConstantInt::get(Int32Ty, 0));
2316         llvm::Value *Args[] = {Buf, FrameAddr};
2317         CS = EmitRuntimeCallOrInvoke(SetJmp, Args);
2318       }
2319       CS.setAttributes(ReturnsTwiceAttr);
2320       return RValue::get(CS.getInstruction());
2321     }
2322     break;
2323   }
2324
2325   case Builtin::BI__GetExceptionInfo: {
2326     if (llvm::GlobalVariable *GV =
2327             CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
2328       return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
2329     break;
2330   }
2331
2332   case Builtin::BI__fastfail:
2333     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
2334
2335   case Builtin::BI__builtin_coro_size: {
2336     auto & Context = getContext();
2337     auto SizeTy = Context.getSizeType();
2338     auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy));
2339     Value *F = CGM.getIntrinsic(Intrinsic::coro_size, T);
2340     return RValue::get(Builder.CreateCall(F));
2341   }
2342
2343   case Builtin::BI__builtin_coro_id:
2344     return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
2345   case Builtin::BI__builtin_coro_promise:
2346     return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
2347   case Builtin::BI__builtin_coro_resume:
2348     return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
2349   case Builtin::BI__builtin_coro_frame:
2350     return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
2351   case Builtin::BI__builtin_coro_free:
2352     return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
2353   case Builtin::BI__builtin_coro_destroy:
2354     return EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
2355   case Builtin::BI__builtin_coro_done:
2356     return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
2357   case Builtin::BI__builtin_coro_alloc:
2358     return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
2359   case Builtin::BI__builtin_coro_begin:
2360     return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
2361   case Builtin::BI__builtin_coro_end:
2362     return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
2363   case Builtin::BI__builtin_coro_suspend:
2364     return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
2365   case Builtin::BI__builtin_coro_param:
2366     return EmitCoroutineIntrinsic(E, Intrinsic::coro_param);
2367
2368   // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
2369   case Builtin::BIread_pipe:
2370   case Builtin::BIwrite_pipe: {
2371     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2372           *Arg1 = EmitScalarExpr(E->getArg(1));
2373     CGOpenCLRuntime OpenCLRT(CGM);
2374     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2375     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2376
2377     // Type of the generic packet parameter.
2378     unsigned GenericAS =
2379         getContext().getTargetAddressSpace(LangAS::opencl_generic);
2380     llvm::Type *I8PTy = llvm::PointerType::get(
2381         llvm::Type::getInt8Ty(getLLVMContext()), GenericAS);
2382
2383     // Testing which overloaded version we should generate the call for.
2384     if (2U == E->getNumArgs()) {
2385       const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
2386                                                              : "__write_pipe_2";
2387       // Creating a generic function type to be able to call with any builtin or
2388       // user defined type.
2389       llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
2390       llvm::FunctionType *FTy = llvm::FunctionType::get(
2391           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2392       Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
2393       return RValue::get(
2394           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2395                              {Arg0, BCast, PacketSize, PacketAlign}));
2396     } else {
2397       assert(4 == E->getNumArgs() &&
2398              "Illegal number of parameters to pipe function");
2399       const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
2400                                                              : "__write_pipe_4";
2401
2402       llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
2403                               Int32Ty, Int32Ty};
2404       Value *Arg2 = EmitScalarExpr(E->getArg(2)),
2405             *Arg3 = EmitScalarExpr(E->getArg(3));
2406       llvm::FunctionType *FTy = llvm::FunctionType::get(
2407           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2408       Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
2409       // We know the third argument is an integer type, but we may need to cast
2410       // it to i32.
2411       if (Arg2->getType() != Int32Ty)
2412         Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
2413       return RValue::get(Builder.CreateCall(
2414           CGM.CreateRuntimeFunction(FTy, Name),
2415           {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
2416     }
2417   }
2418   // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
2419   // functions
2420   case Builtin::BIreserve_read_pipe:
2421   case Builtin::BIreserve_write_pipe:
2422   case Builtin::BIwork_group_reserve_read_pipe:
2423   case Builtin::BIwork_group_reserve_write_pipe:
2424   case Builtin::BIsub_group_reserve_read_pipe:
2425   case Builtin::BIsub_group_reserve_write_pipe: {
2426     // Composing the mangled name for the function.
2427     const char *Name;
2428     if (BuiltinID == Builtin::BIreserve_read_pipe)
2429       Name = "__reserve_read_pipe";
2430     else if (BuiltinID == Builtin::BIreserve_write_pipe)
2431       Name = "__reserve_write_pipe";
2432     else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
2433       Name = "__work_group_reserve_read_pipe";
2434     else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
2435       Name = "__work_group_reserve_write_pipe";
2436     else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
2437       Name = "__sub_group_reserve_read_pipe";
2438     else
2439       Name = "__sub_group_reserve_write_pipe";
2440
2441     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2442           *Arg1 = EmitScalarExpr(E->getArg(1));
2443     llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
2444     CGOpenCLRuntime OpenCLRT(CGM);
2445     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2446     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2447
2448     // Building the generic function prototype.
2449     llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
2450     llvm::FunctionType *FTy = llvm::FunctionType::get(
2451         ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2452     // We know the second argument is an integer type, but we may need to cast
2453     // it to i32.
2454     if (Arg1->getType() != Int32Ty)
2455       Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
2456     return RValue::get(
2457         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2458                            {Arg0, Arg1, PacketSize, PacketAlign}));
2459   }
2460   // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
2461   // functions
2462   case Builtin::BIcommit_read_pipe:
2463   case Builtin::BIcommit_write_pipe:
2464   case Builtin::BIwork_group_commit_read_pipe:
2465   case Builtin::BIwork_group_commit_write_pipe:
2466   case Builtin::BIsub_group_commit_read_pipe:
2467   case Builtin::BIsub_group_commit_write_pipe: {
2468     const char *Name;
2469     if (BuiltinID == Builtin::BIcommit_read_pipe)
2470       Name = "__commit_read_pipe";
2471     else if (BuiltinID == Builtin::BIcommit_write_pipe)
2472       Name = "__commit_write_pipe";
2473     else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
2474       Name = "__work_group_commit_read_pipe";
2475     else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
2476       Name = "__work_group_commit_write_pipe";
2477     else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
2478       Name = "__sub_group_commit_read_pipe";
2479     else
2480       Name = "__sub_group_commit_write_pipe";
2481
2482     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2483           *Arg1 = EmitScalarExpr(E->getArg(1));
2484     CGOpenCLRuntime OpenCLRT(CGM);
2485     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2486     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2487
2488     // Building the generic function prototype.
2489     llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
2490     llvm::FunctionType *FTy =
2491         llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
2492                                 llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2493
2494     return RValue::get(
2495         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2496                            {Arg0, Arg1, PacketSize, PacketAlign}));
2497   }
2498   // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
2499   case Builtin::BIget_pipe_num_packets:
2500   case Builtin::BIget_pipe_max_packets: {
2501     const char *Name;
2502     if (BuiltinID == Builtin::BIget_pipe_num_packets)
2503       Name = "__get_pipe_num_packets";
2504     else
2505       Name = "__get_pipe_max_packets";
2506
2507     // Building the generic function prototype.
2508     Value *Arg0 = EmitScalarExpr(E->getArg(0));
2509     CGOpenCLRuntime OpenCLRT(CGM);
2510     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2511     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2512     llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
2513     llvm::FunctionType *FTy = llvm::FunctionType::get(
2514         Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2515
2516     return RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2517                                           {Arg0, PacketSize, PacketAlign}));
2518   }
2519
2520   // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
2521   case Builtin::BIto_global:
2522   case Builtin::BIto_local:
2523   case Builtin::BIto_private: {
2524     auto Arg0 = EmitScalarExpr(E->getArg(0));
2525     auto NewArgT = llvm::PointerType::get(Int8Ty,
2526       CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
2527     auto NewRetT = llvm::PointerType::get(Int8Ty,
2528       CGM.getContext().getTargetAddressSpace(
2529         E->getType()->getPointeeType().getAddressSpace()));
2530     auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
2531     llvm::Value *NewArg;
2532     if (Arg0->getType()->getPointerAddressSpace() !=
2533         NewArgT->getPointerAddressSpace())
2534       NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
2535     else
2536       NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
2537     auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
2538     auto NewCall =
2539         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
2540     return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
2541       ConvertType(E->getType())));
2542   }
2543
2544   // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
2545   // It contains four different overload formats specified in Table 6.13.17.1.
2546   case Builtin::BIenqueue_kernel: {
2547     StringRef Name; // Generated function call name
2548     unsigned NumArgs = E->getNumArgs();
2549
2550     llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
2551     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2552         getContext().getTargetAddressSpace(LangAS::opencl_generic));
2553
2554     llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
2555     llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
2556     LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
2557     llvm::Value *Range = NDRangeL.getAddress().getPointer();
2558     llvm::Type *RangeTy = NDRangeL.getAddress().getType();
2559
2560     if (NumArgs == 4) {
2561       // The most basic form of the call with parameters:
2562       // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
2563       Name = "__enqueue_kernel_basic";
2564       llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy};
2565       llvm::FunctionType *FTy = llvm::FunctionType::get(
2566           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false);
2567
2568       llvm::Value *Block = Builder.CreatePointerCast(
2569           EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy);
2570
2571       AttrBuilder B;
2572       B.addAttribute(Attribute::ByVal);
2573       llvm::AttributeList ByValAttrSet =
2574           llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
2575
2576       auto RTCall =
2577           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
2578                              {Queue, Flags, Range, Block});
2579       RTCall->setAttributes(ByValAttrSet);
2580       return RValue::get(RTCall);
2581     }
2582     assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
2583
2584     // Could have events and/or vaargs.
2585     if (E->getArg(3)->getType()->isBlockPointerType()) {
2586       // No events passed, but has variadic arguments.
2587       Name = "__enqueue_kernel_vaargs";
2588       llvm::Value *Block = Builder.CreatePointerCast(
2589           EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy);
2590       // Create a vector of the arguments, as well as a constant value to
2591       // express to the runtime the number of variadic arguments.
2592       std::vector<llvm::Value *> Args = {Queue, Flags, Range, Block,
2593                                          ConstantInt::get(IntTy, NumArgs - 4)};
2594       std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy,
2595                                           GenericVoidPtrTy, IntTy};
2596
2597       // Each of the following arguments specifies the size of the corresponding
2598       // argument passed to the enqueued block.
2599       for (unsigned I = 4/*Position of the first size arg*/; I < NumArgs; ++I)
2600         Args.push_back(
2601             Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy));
2602
2603       llvm::FunctionType *FTy = llvm::FunctionType::get(
2604           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
2605       return RValue::get(
2606           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2607                              llvm::ArrayRef<llvm::Value *>(Args)));
2608     }
2609     // Any calls now have event arguments passed.
2610     if (NumArgs >= 7) {
2611       llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy);
2612       llvm::Type *EventPtrTy = EventTy->getPointerTo(
2613           CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
2614
2615       llvm::Value *NumEvents =
2616           Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
2617       llvm::Value *EventList =
2618           E->getArg(4)->getType()->isArrayType()
2619               ? EmitArrayToPointerDecay(E->getArg(4)).getPointer()
2620               : EmitScalarExpr(E->getArg(4));
2621       llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5));
2622       // Convert to generic address space.
2623       EventList = Builder.CreatePointerCast(EventList, EventPtrTy);
2624       ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy);
2625       llvm::Value *Block = Builder.CreatePointerCast(
2626           EmitScalarExpr(E->getArg(6)), GenericVoidPtrTy);
2627
2628       std::vector<llvm::Type *> ArgTys = {
2629           QueueTy,    Int32Ty,    RangeTy,         Int32Ty,
2630           EventPtrTy, EventPtrTy, GenericVoidPtrTy};
2631
2632       std::vector<llvm::Value *> Args = {Queue,     Flags,    Range, NumEvents,
2633                                          EventList, ClkEvent, Block};
2634
2635       if (NumArgs == 7) {
2636         // Has events but no variadics.
2637         Name = "__enqueue_kernel_basic_events";
2638         llvm::FunctionType *FTy = llvm::FunctionType::get(
2639             Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2640         return RValue::get(
2641             Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2642                                llvm::ArrayRef<llvm::Value *>(Args)));
2643       }
2644       // Has event info and variadics
2645       // Pass the number of variadics to the runtime function too.
2646       Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
2647       ArgTys.push_back(Int32Ty);
2648       Name = "__enqueue_kernel_events_vaargs";
2649
2650       // Each of the following arguments specifies the size of the corresponding
2651       // argument passed to the enqueued block.
2652       for (unsigned I = 7/*Position of the first size arg*/; I < NumArgs; ++I)
2653         Args.push_back(
2654             Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy));
2655
2656       llvm::FunctionType *FTy = llvm::FunctionType::get(
2657           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
2658       return RValue::get(
2659           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2660                              llvm::ArrayRef<llvm::Value *>(Args)));
2661     }
2662   }
2663   // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
2664   // parameter.
2665   case Builtin::BIget_kernel_work_group_size: {
2666     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2667         getContext().getTargetAddressSpace(LangAS::opencl_generic));
2668     Value *Arg = EmitScalarExpr(E->getArg(0));
2669     Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy);
2670     return RValue::get(Builder.CreateCall(
2671         CGM.CreateRuntimeFunction(
2672             llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false),
2673             "__get_kernel_work_group_size_impl"),
2674         Arg));
2675   }
2676   case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
2677     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2678         getContext().getTargetAddressSpace(LangAS::opencl_generic));
2679     Value *Arg = EmitScalarExpr(E->getArg(0));
2680     Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy);
2681     return RValue::get(Builder.CreateCall(
2682         CGM.CreateRuntimeFunction(
2683             llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false),
2684             "__get_kernel_preferred_work_group_multiple_impl"),
2685         Arg));
2686   }
2687   case Builtin::BIprintf:
2688     if (getTarget().getTriple().isNVPTX())
2689       return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue);
2690     break;
2691   case Builtin::BI__builtin_canonicalize:
2692   case Builtin::BI__builtin_canonicalizef:
2693   case Builtin::BI__builtin_canonicalizel:
2694     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
2695
2696   case Builtin::BI__builtin_thread_pointer: {
2697     if (!getContext().getTargetInfo().isTLSSupported())
2698       CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
2699     // Fall through - it's already mapped to the intrinsic by GCCBuiltin.
2700     break;
2701   }
2702   case Builtin::BI__builtin_os_log_format: {
2703     assert(E->getNumArgs() >= 2 &&
2704            "__builtin_os_log_format takes at least 2 arguments");
2705     analyze_os_log::OSLogBufferLayout Layout;
2706     analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout);
2707     Address BufAddr = EmitPointerWithAlignment(E->getArg(0));
2708     // Ignore argument 1, the format string. It is not currently used.
2709     CharUnits Offset;
2710     Builder.CreateStore(
2711         Builder.getInt8(Layout.getSummaryByte()),
2712         Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
2713     Builder.CreateStore(
2714         Builder.getInt8(Layout.getNumArgsByte()),
2715         Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
2716
2717     llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
2718     for (const auto &Item : Layout.Items) {
2719       Builder.CreateStore(
2720           Builder.getInt8(Item.getDescriptorByte()),
2721           Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
2722       Builder.CreateStore(
2723           Builder.getInt8(Item.getSizeByte()),
2724           Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
2725       Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset);
2726       if (const Expr *TheExpr = Item.getExpr()) {
2727         Addr = Builder.CreateElementBitCast(
2728             Addr, ConvertTypeForMem(TheExpr->getType()));
2729         // Check if this is a retainable type.
2730         if (TheExpr->getType()->isObjCRetainableType()) {
2731           assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
2732                  "Only scalar can be a ObjC retainable type");
2733           llvm::Value *SV = EmitScalarExpr(TheExpr, /*Ignore*/ false);
2734           RValue RV = RValue::get(SV);
2735           LValue LV = MakeAddrLValue(Addr, TheExpr->getType());
2736           EmitStoreThroughLValue(RV, LV);
2737           // Check if the object is constant, if not, save it in
2738           // RetainableOperands.
2739           if (!isa<Constant>(SV))
2740             RetainableOperands.push_back(SV);
2741         } else {
2742           EmitAnyExprToMem(TheExpr, Addr, Qualifiers(), /*isInit*/ true);
2743         }
2744       } else {
2745         Addr = Builder.CreateElementBitCast(Addr, Int32Ty);
2746         Builder.CreateStore(
2747             Builder.getInt32(Item.getConstValue().getQuantity()), Addr);
2748       }
2749       Offset += Item.size();
2750     }
2751
2752     // Push a clang.arc.use cleanup for each object in RetainableOperands. The
2753     // cleanup will cause the use to appear after the final log call, keeping
2754     // the object valid while it's held in the log buffer.  Note that if there's
2755     // a release cleanup on the object, it will already be active; since
2756     // cleanups are emitted in reverse order, the use will occur before the
2757     // object is released.
2758     if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount &&
2759         CGM.getCodeGenOpts().OptimizationLevel != 0)
2760       for (llvm::Value *object : RetainableOperands)
2761         pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), object);
2762
2763     return RValue::get(BufAddr.getPointer());
2764   }
2765
2766   case Builtin::BI__builtin_os_log_format_buffer_size: {
2767     analyze_os_log::OSLogBufferLayout Layout;
2768     analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout);
2769     return RValue::get(ConstantInt::get(ConvertType(E->getType()),
2770                                         Layout.size().getQuantity()));
2771   }
2772
2773   case Builtin::BI__xray_customevent: {
2774     if (!ShouldXRayInstrumentFunction())
2775       return RValue::getIgnored();
2776     if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>()) {
2777       if (XRayAttr->neverXRayInstrument())
2778         return RValue::getIgnored();
2779     }
2780     Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
2781     auto FTy = F->getFunctionType();
2782     auto Arg0 = E->getArg(0);
2783     auto Arg0Val = EmitScalarExpr(Arg0);
2784     auto Arg0Ty = Arg0->getType();
2785     auto PTy0 = FTy->getParamType(0);
2786     if (PTy0 != Arg0Val->getType()) {
2787       if (Arg0Ty->isArrayType())
2788         Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer();
2789       else
2790         Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
2791     }
2792     auto Arg1 = EmitScalarExpr(E->getArg(1));
2793     auto PTy1 = FTy->getParamType(1);
2794     if (PTy1 != Arg1->getType())
2795       Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
2796     return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
2797   }
2798   }
2799
2800   // If this is an alias for a lib function (e.g. __builtin_sin), emit
2801   // the call using the normal call path, but using the unmangled
2802   // version of the function name.
2803   if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
2804     return emitLibraryCall(*this, FD, E,
2805                            CGM.getBuiltinLibFunction(FD, BuiltinID));
2806
2807   // If this is a predefined lib function (e.g. malloc), emit the call
2808   // using exactly the normal call path.
2809   if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
2810     return emitLibraryCall(*this, FD, E,
2811                       cast<llvm::Constant>(EmitScalarExpr(E->getCallee())));
2812
2813   // Check that a call to a target specific builtin has the correct target
2814   // features.
2815   // This is down here to avoid non-target specific builtins, however, if
2816   // generic builtins start to require generic target features then we
2817   // can move this up to the beginning of the function.
2818   checkTargetFeatures(E, FD);
2819
2820   // See if we have a target specific intrinsic.
2821   const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
2822   Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
2823   StringRef Prefix =
2824       llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
2825   if (!Prefix.empty()) {
2826     IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name);
2827     // NOTE we dont need to perform a compatibility flag check here since the
2828     // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
2829     // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
2830     if (IntrinsicID == Intrinsic::not_intrinsic)
2831       IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
2832   }
2833
2834   if (IntrinsicID != Intrinsic::not_intrinsic) {
2835     SmallVector<Value*, 16> Args;
2836
2837     // Find out if any arguments are required to be integer constant
2838     // expressions.
2839     unsigned ICEArguments = 0;
2840     ASTContext::GetBuiltinTypeError Error;
2841     getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
2842     assert(Error == ASTContext::GE_None && "Should not codegen an error");
2843
2844     Function *F = CGM.getIntrinsic(IntrinsicID);
2845     llvm::FunctionType *FTy = F->getFunctionType();
2846
2847     for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
2848       Value *ArgValue;
2849       // If this is a normal argument, just emit it as a scalar.
2850       if ((ICEArguments & (1 << i)) == 0) {
2851         ArgValue = EmitScalarExpr(E->getArg(i));
2852       } else {
2853         // If this is required to be a constant, constant fold it so that we
2854         // know that the generated intrinsic gets a ConstantInt.
2855         llvm::APSInt Result;
2856         bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
2857         assert(IsConst && "Constant arg isn't actually constant?");
2858         (void)IsConst;
2859         ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
2860       }
2861
2862       // If the intrinsic arg type is different from the builtin arg type
2863       // we need to do a bit cast.
2864       llvm::Type *PTy = FTy->getParamType(i);
2865       if (PTy != ArgValue->getType()) {
2866         assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
2867                "Must be able to losslessly bit cast to param");
2868         ArgValue = Builder.CreateBitCast(ArgValue, PTy);
2869       }
2870
2871       Args.push_back(ArgValue);
2872     }
2873
2874     Value *V = Builder.CreateCall(F, Args);
2875     QualType BuiltinRetType = E->getType();
2876
2877     llvm::Type *RetTy = VoidTy;
2878     if (!BuiltinRetType->isVoidType())
2879       RetTy = ConvertType(BuiltinRetType);
2880
2881     if (RetTy != V->getType()) {
2882       assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
2883              "Must be able to losslessly bit cast result type");
2884       V = Builder.CreateBitCast(V, RetTy);
2885     }
2886
2887     return RValue::get(V);
2888   }
2889
2890   // See if we have a target specific builtin that needs to be lowered.
2891   if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
2892     return RValue::get(V);
2893
2894   ErrorUnsupported(E, "builtin function");
2895
2896   // Unknown builtin, for now just dump it out and return undef.
2897   return GetUndefRValue(E->getType());
2898 }
2899
2900 static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
2901                                         unsigned BuiltinID, const CallExpr *E,
2902                                         llvm::Triple::ArchType Arch) {
2903   switch (Arch) {
2904   case llvm::Triple::arm:
2905   case llvm::Triple::armeb:
2906   case llvm::Triple::thumb:
2907   case llvm::Triple::thumbeb:
2908     return CGF->EmitARMBuiltinExpr(BuiltinID, E);
2909   case llvm::Triple::aarch64:
2910   case llvm::Triple::aarch64_be:
2911     return CGF->EmitAArch64BuiltinExpr(BuiltinID, E);
2912   case llvm::Triple::x86:
2913   case llvm::Triple::x86_64:
2914     return CGF->EmitX86BuiltinExpr(BuiltinID, E);
2915   case llvm::Triple::ppc:
2916   case llvm::Triple::ppc64:
2917   case llvm::Triple::ppc64le:
2918     return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
2919   case llvm::Triple::r600:
2920   case llvm::Triple::amdgcn:
2921     return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
2922   case llvm::Triple::systemz:
2923     return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
2924   case llvm::Triple::nvptx:
2925   case llvm::Triple::nvptx64:
2926     return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
2927   case llvm::Triple::wasm32:
2928   case llvm::Triple::wasm64:
2929     return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
2930   default:
2931     return nullptr;
2932   }
2933 }
2934
2935 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
2936                                               const CallExpr *E) {
2937   if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
2938     assert(getContext().getAuxTargetInfo() && "Missing aux target info");
2939     return EmitTargetArchBuiltinExpr(
2940         this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
2941         getContext().getAuxTargetInfo()->getTriple().getArch());
2942   }
2943
2944   return EmitTargetArchBuiltinExpr(this, BuiltinID, E,
2945                                    getTarget().getTriple().getArch());
2946 }
2947
2948 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
2949                                      NeonTypeFlags TypeFlags,
2950                                      bool V1Ty=false) {
2951   int IsQuad = TypeFlags.isQuad();
2952   switch (TypeFlags.getEltType()) {
2953   case NeonTypeFlags::Int8:
2954   case NeonTypeFlags::Poly8:
2955     return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
2956   case NeonTypeFlags::Int16:
2957   case NeonTypeFlags::Poly16:
2958   case NeonTypeFlags::Float16:
2959     return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
2960   case NeonTypeFlags::Int32:
2961     return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
2962   case NeonTypeFlags::Int64:
2963   case NeonTypeFlags::Poly64:
2964     return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
2965   case NeonTypeFlags::Poly128:
2966     // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
2967     // There is a lot of i128 and f128 API missing.
2968     // so we use v16i8 to represent poly128 and get pattern matched.
2969     return llvm::VectorType::get(CGF->Int8Ty, 16);
2970   case NeonTypeFlags::Float32:
2971     return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
2972   case NeonTypeFlags::Float64:
2973     return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
2974   }
2975   llvm_unreachable("Unknown vector element type!");
2976 }
2977
2978 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
2979                                           NeonTypeFlags IntTypeFlags) {
2980   int IsQuad = IntTypeFlags.isQuad();
2981   switch (IntTypeFlags.getEltType()) {
2982   case NeonTypeFlags::Int32:
2983     return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
2984   case NeonTypeFlags::Int64:
2985     return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad));
2986   default:
2987     llvm_unreachable("Type can't be converted to floating-point!");
2988   }
2989 }
2990
2991 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
2992   unsigned nElts = V->getType()->getVectorNumElements();
2993   Value* SV = llvm::ConstantVector::getSplat(nElts, C);
2994   return Builder.CreateShuffleVector(V, V, SV, "lane");
2995 }
2996
2997 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
2998                                      const char *name,
2999                                      unsigned shift, bool rightshift) {
3000   unsigned j = 0;
3001   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
3002        ai != ae; ++ai, ++j)
3003     if (shift > 0 && shift == j)
3004       Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
3005     else
3006       Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
3007
3008   return Builder.CreateCall(F, Ops, name);
3009 }
3010
3011 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
3012                                             bool neg) {
3013   int SV = cast<ConstantInt>(V)->getSExtValue();
3014   return ConstantInt::get(Ty, neg ? -SV : SV);
3015 }
3016
3017 // \brief Right-shift a vector by a constant.
3018 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
3019                                           llvm::Type *Ty, bool usgn,
3020                                           const char *name) {
3021   llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
3022
3023   int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
3024   int EltSize = VTy->getScalarSizeInBits();
3025
3026   Vec = Builder.CreateBitCast(Vec, Ty);
3027
3028   // lshr/ashr are undefined when the shift amount is equal to the vector
3029   // element size.
3030   if (ShiftAmt == EltSize) {
3031     if (usgn) {
3032       // Right-shifting an unsigned value by its size yields 0.
3033       return llvm::ConstantAggregateZero::get(VTy);
3034     } else {
3035       // Right-shifting a signed value by its size is equivalent
3036       // to a shift of size-1.
3037       --ShiftAmt;
3038       Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
3039     }
3040   }
3041
3042   Shift = EmitNeonShiftVector(Shift, Ty, false);
3043   if (usgn)
3044     return Builder.CreateLShr(Vec, Shift, name);
3045   else
3046     return Builder.CreateAShr(Vec, Shift, name);
3047 }
3048
3049 enum {
3050   AddRetType = (1 << 0),
3051   Add1ArgType = (1 << 1),
3052   Add2ArgTypes = (1 << 2),
3053
3054   VectorizeRetType = (1 << 3),
3055   VectorizeArgTypes = (1 << 4),
3056
3057   InventFloatType = (1 << 5),
3058   UnsignedAlts = (1 << 6),
3059
3060   Use64BitVectors = (1 << 7),
3061   Use128BitVectors = (1 << 8),
3062
3063   Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
3064   VectorRet = AddRetType | VectorizeRetType,
3065   VectorRetGetArgs01 =
3066       AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
3067   FpCmpzModifiers =
3068       AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
3069 };
3070
3071 namespace {
3072 struct NeonIntrinsicInfo {
3073   const char *NameHint;
3074   unsigned BuiltinID;
3075   unsigned LLVMIntrinsic;
3076   unsigned AltLLVMIntrinsic;
3077   unsigned TypeModifier;
3078
3079   bool operator<(unsigned RHSBuiltinID) const {
3080     return BuiltinID < RHSBuiltinID;
3081   }
3082   bool operator<(const NeonIntrinsicInfo &TE) const {
3083     return BuiltinID < TE.BuiltinID;
3084   }
3085 };
3086 } // end anonymous namespace
3087
3088 #define NEONMAP0(NameBase) \
3089   { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
3090
3091 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
3092   { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
3093       Intrinsic::LLVMIntrinsic, 0, TypeModifier }
3094
3095 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
3096   { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
3097       Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
3098       TypeModifier }
3099
3100 static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
3101   NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
3102   NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
3103   NEONMAP1(vabs_v, arm_neon_vabs, 0),
3104   NEONMAP1(vabsq_v, arm_neon_vabs, 0),
3105   NEONMAP0(vaddhn_v),
3106   NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
3107   NEONMAP1(vaeseq_v, arm_neon_aese, 0),
3108   NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
3109   NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
3110   NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
3111   NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
3112   NEONMAP1(vcage_v, arm_neon_vacge, 0),
3113   NEONMAP1(vcageq_v, arm_neon_vacge, 0),
3114   NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
3115   NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
3116   NEONMAP1(vcale_v, arm_neon_vacge, 0),
3117   NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
3118   NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
3119   NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
3120   NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
3121   NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
3122   NEONMAP1(vclz_v, ctlz, Add1ArgType),
3123   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
3124   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
3125   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
3126   NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
3127   NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
3128   NEONMAP0(vcvt_f32_v),
3129   NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3130   NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
3131   NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
3132   NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
3133   NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
3134   NEONMAP0(vcvt_s32_v),
3135   NEONMAP0(vcvt_s64_v),
3136   NEONMAP0(vcvt_u32_v),
3137   NEONMAP0(vcvt_u64_v),
3138   NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
3139   NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
3140   NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
3141   NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
3142   NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
3143   NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
3144   NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
3145   NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
3146   NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
3147   NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
3148   NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
3149   NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
3150   NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
3151   NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
3152   NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
3153   NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
3154   NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
3155   NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
3156   NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
3157   NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
3158   NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
3159   NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
3160   NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
3161   NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
3162   NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
3163   NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
3164   NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
3165   NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
3166   NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
3167   NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
3168   NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
3169   NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
3170   NEONMAP0(vcvtq_f32_v),
3171   NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3172   NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
3173   NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
3174   NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
3175   NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
3176   NEONMAP0(vcvtq_s32_v),
3177   NEONMAP0(vcvtq_s64_v),
3178   NEONMAP0(vcvtq_u32_v),
3179   NEONMAP0(vcvtq_u64_v),
3180   NEONMAP0(vext_v),
3181   NEONMAP0(vextq_v),
3182   NEONMAP0(vfma_v),
3183   NEONMAP0(vfmaq_v),
3184   NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
3185   NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
3186   NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
3187   NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
3188   NEONMAP0(vld1_dup_v),
3189   NEONMAP1(vld1_v, arm_neon_vld1, 0),
3190   NEONMAP0(vld1q_dup_v),
3191   NEONMAP1(vld1q_v, arm_neon_vld1, 0),
3192   NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
3193   NEONMAP1(vld2_v, arm_neon_vld2, 0),
3194   NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
3195   NEONMAP1(vld2q_v, arm_neon_vld2, 0),
3196   NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
3197   NEONMAP1(vld3_v, arm_neon_vld3, 0),
3198   NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
3199   NEONMAP1(vld3q_v, arm_neon_vld3, 0),
3200   NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
3201   NEONMAP1(vld4_v, arm_neon_vld4, 0),
3202   NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
3203   NEONMAP1(vld4q_v, arm_neon_vld4, 0),
3204   NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
3205   NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
3206   NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
3207   NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
3208   NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
3209   NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
3210   NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
3211   NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
3212   NEONMAP0(vmovl_v),
3213   NEONMAP0(vmovn_v),
3214   NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
3215   NEONMAP0(vmull_v),
3216   NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
3217   NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
3218   NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
3219   NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
3220   NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
3221   NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
3222   NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
3223   NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
3224   NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
3225   NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
3226   NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
3227   NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
3228   NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
3229   NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0),
3230   NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0),
3231   NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
3232   NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
3233   NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
3234   NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
3235   NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
3236   NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
3237   NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
3238   NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
3239   NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
3240   NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
3241   NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
3242   NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
3243   NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
3244   NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
3245   NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
3246   NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
3247   NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
3248   NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
3249   NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
3250   NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
3251   NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
3252   NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
3253   NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
3254   NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
3255   NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
3256   NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
3257   NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
3258   NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
3259   NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
3260   NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
3261   NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
3262   NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
3263   NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
3264   NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
3265   NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
3266   NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
3267   NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
3268   NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
3269   NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
3270   NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
3271   NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
3272   NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
3273   NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
3274   NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
3275   NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
3276   NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
3277   NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
3278   NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
3279   NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
3280   NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
3281   NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
3282   NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
3283   NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
3284   NEONMAP0(vshl_n_v),
3285   NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
3286   NEONMAP0(vshll_n_v),
3287   NEONMAP0(vshlq_n_v),
3288   NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
3289   NEONMAP0(vshr_n_v),
3290   NEONMAP0(vshrn_n_v),
3291   NEONMAP0(vshrq_n_v),
3292   NEONMAP1(vst1_v, arm_neon_vst1, 0),
3293   NEONMAP1(vst1q_v, arm_neon_vst1, 0),
3294   NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
3295   NEONMAP1(vst2_v, arm_neon_vst2, 0),
3296   NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
3297   NEONMAP1(vst2q_v, arm_neon_vst2, 0),
3298   NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
3299   NEONMAP1(vst3_v, arm_neon_vst3, 0),
3300   NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
3301   NEONMAP1(vst3q_v, arm_neon_vst3, 0),
3302   NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
3303   NEONMAP1(vst4_v, arm_neon_vst4, 0),
3304   NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
3305   NEONMAP1(vst4q_v, arm_neon_vst4, 0),
3306   NEONMAP0(vsubhn_v),
3307   NEONMAP0(vtrn_v),
3308   NEONMAP0(vtrnq_v),
3309   NEONMAP0(vtst_v),
3310   NEONMAP0(vtstq_v),
3311   NEONMAP0(vuzp_v),
3312   NEONMAP0(vuzpq_v),
3313   NEONMAP0(vzip_v),
3314   NEONMAP0(vzipq_v)
3315 };
3316
3317 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
3318   NEONMAP1(vabs_v, aarch64_neon_abs, 0),
3319   NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
3320   NEONMAP0(vaddhn_v),
3321   NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
3322   NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
3323   NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
3324   NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
3325   NEONMAP1(vcage_v, aarch64_neon_facge, 0),
3326   NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
3327   NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
3328   NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
3329   NEONMAP1(vcale_v, aarch64_neon_facge, 0),
3330   NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
3331   NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
3332   NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
3333   NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
3334   NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
3335   NEONMAP1(vclz_v, ctlz, Add1ArgType),
3336   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
3337   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
3338   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
3339   NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
3340   NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
3341   NEONMAP0(vcvt_f32_v),
3342   NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3343   NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3344   NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
3345   NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
3346   NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
3347   NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
3348   NEONMAP0(vcvtq_f32_v),
3349   NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3350   NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3351   NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
3352   NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
3353   NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
3354   NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
3355   NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
3356   NEONMAP0(vext_v),
3357   NEONMAP0(vextq_v),
3358   NEONMAP0(vfma_v),
3359   NEONMAP0(vfmaq_v),
3360   NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
3361   NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
3362   NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
3363   NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
3364   NEONMAP0(vmovl_v),
3365   NEONMAP0(vmovn_v),
3366   NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
3367   NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
3368   NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
3369   NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
3370   NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
3371   NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
3372   NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
3373   NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
3374   NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
3375   NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
3376   NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
3377   NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
3378   NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
3379   NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
3380   NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
3381   NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
3382   NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
3383   NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
3384   NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
3385   NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
3386   NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
3387   NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
3388   NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
3389   NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
3390   NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
3391   NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
3392   NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
3393   NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
3394   NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
3395   NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
3396   NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
3397   NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
3398   NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
3399   NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
3400   NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
3401   NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
3402   NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
3403   NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
3404   NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
3405   NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
3406   NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
3407   NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
3408   NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
3409   NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
3410   NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
3411   NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
3412   NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
3413   NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
3414   NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
3415   NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
3416   NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
3417   NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
3418   NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
3419   NEONMAP0(vshl_n_v),
3420   NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3421   NEONMAP0(vshll_n_v),
3422   NEONMAP0(vshlq_n_v),
3423   NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3424   NEONMAP0(vshr_n_v),
3425   NEONMAP0(vshrn_n_v),
3426   NEONMAP0(vshrq_n_v),
3427   NEONMAP0(vsubhn_v),
3428   NEONMAP0(vtst_v),
3429   NEONMAP0(vtstq_v),
3430 };
3431
3432 static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
3433   NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
3434   NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
3435   NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
3436   NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3437   NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3438   NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3439   NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3440   NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3441   NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3442   NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3443   NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3444   NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
3445   NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3446   NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
3447   NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3448   NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3449   NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3450   NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3451   NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3452   NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3453   NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3454   NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3455   NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3456   NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3457   NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3458   NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3459   NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3460   NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3461   NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3462   NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3463   NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3464   NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3465   NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3466   NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3467   NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3468   NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3469   NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3470   NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3471   NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3472   NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3473   NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3474   NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3475   NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3476   NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3477   NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3478   NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3479   NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3480   NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3481   NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
3482   NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3483   NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3484   NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3485   NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3486   NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3487   NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3488   NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3489   NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3490   NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3491   NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3492   NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3493   NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3494   NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3495   NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3496   NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3497   NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3498   NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3499   NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3500   NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3501   NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3502   NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
3503   NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
3504   NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
3505   NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3506   NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3507   NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3508   NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3509   NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3510   NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3511   NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3512   NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3513   NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3514   NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3515   NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3516   NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
3517   NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3518   NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
3519   NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3520   NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3521   NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
3522   NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
3523   NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3524   NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3525   NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
3526   NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
3527   NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
3528   NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
3529   NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
3530   NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
3531   NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
3532   NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
3533   NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3534   NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3535   NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3536   NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3537   NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
3538   NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3539   NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3540   NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3541   NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
3542   NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3543   NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
3544   NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
3545   NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
3546   NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3547   NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3548   NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
3549   NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
3550   NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3551   NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3552   NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
3553   NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
3554   NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
3555   NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
3556   NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3557   NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3558   NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3559   NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3560   NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
3561   NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3562   NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3563   NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3564   NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3565   NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3566   NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3567   NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
3568   NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
3569   NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3570   NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3571   NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3572   NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3573   NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
3574   NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
3575   NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
3576   NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
3577   NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3578   NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3579   NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
3580   NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
3581   NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
3582   NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3583   NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3584   NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3585   NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3586   NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
3587   NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3588   NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3589   NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3590   NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3591   NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
3592   NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
3593   NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3594   NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3595   NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
3596   NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
3597   NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
3598   NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
3599   NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
3600   NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
3601   NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
3602   NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
3603   NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
3604   NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
3605   NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
3606   NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
3607   NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
3608   NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
3609   NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
3610   NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
3611   NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
3612   NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
3613   NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
3614   NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
3615   NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3616   NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
3617   NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3618   NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
3619   NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
3620   NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
3621   NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3622   NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
3623   NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3624   NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
3625 };
3626
3627 #undef NEONMAP0
3628 #undef NEONMAP1
3629 #undef NEONMAP2
3630
3631 static bool NEONSIMDIntrinsicsProvenSorted = false;
3632
3633 static bool AArch64SIMDIntrinsicsProvenSorted = false;
3634 static bool AArch64SISDIntrinsicsProvenSorted = false;
3635
3636
3637 static const NeonIntrinsicInfo *
3638 findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap,
3639                        unsigned BuiltinID, bool &MapProvenSorted) {
3640
3641 #ifndef NDEBUG
3642   if (!MapProvenSorted) {
3643     assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap)));
3644     MapProvenSorted = true;
3645   }
3646 #endif
3647
3648   const NeonIntrinsicInfo *Builtin =
3649       std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID);
3650
3651   if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
3652     return Builtin;
3653
3654   return nullptr;
3655 }
3656
3657 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
3658                                                    unsigned Modifier,
3659                                                    llvm::Type *ArgType,
3660                                                    const CallExpr *E) {
3661   int VectorSize = 0;
3662   if (Modifier & Use64BitVectors)
3663     VectorSize = 64;
3664   else if (Modifier & Use128BitVectors)
3665     VectorSize = 128;
3666
3667   // Return type.
3668   SmallVector<llvm::Type *, 3> Tys;
3669   if (Modifier & AddRetType) {
3670     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
3671     if (Modifier & VectorizeRetType)
3672       Ty = llvm::VectorType::get(
3673           Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
3674
3675     Tys.push_back(Ty);
3676   }
3677
3678   // Arguments.
3679   if (Modifier & VectorizeArgTypes) {
3680     int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
3681     ArgType = llvm::VectorType::get(ArgType, Elts);
3682   }
3683
3684   if (Modifier & (Add1ArgType | Add2ArgTypes))
3685     Tys.push_back(ArgType);
3686
3687   if (Modifier & Add2ArgTypes)
3688     Tys.push_back(ArgType);
3689
3690   if (Modifier & InventFloatType)
3691     Tys.push_back(FloatTy);
3692
3693   return CGM.getIntrinsic(IntrinsicID, Tys);
3694 }
3695
3696 static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF,
3697                                             const NeonIntrinsicInfo &SISDInfo,
3698                                             SmallVectorImpl<Value *> &Ops,
3699                                             const CallExpr *E) {
3700   unsigned BuiltinID = SISDInfo.BuiltinID;
3701   unsigned int Int = SISDInfo.LLVMIntrinsic;
3702   unsigned Modifier = SISDInfo.TypeModifier;
3703   const char *s = SISDInfo.NameHint;
3704
3705   switch (BuiltinID) {
3706   case NEON::BI__builtin_neon_vcled_s64:
3707   case NEON::BI__builtin_neon_vcled_u64:
3708   case NEON::BI__builtin_neon_vcles_f32:
3709   case NEON::BI__builtin_neon_vcled_f64:
3710   case NEON::BI__builtin_neon_vcltd_s64:
3711   case NEON::BI__builtin_neon_vcltd_u64:
3712   case NEON::BI__builtin_neon_vclts_f32:
3713   case NEON::BI__builtin_neon_vcltd_f64:
3714   case NEON::BI__builtin_neon_vcales_f32:
3715   case NEON::BI__builtin_neon_vcaled_f64:
3716   case NEON::BI__builtin_neon_vcalts_f32:
3717   case NEON::BI__builtin_neon_vcaltd_f64:
3718     // Only one direction of comparisons actually exist, cmle is actually a cmge
3719     // with swapped operands. The table gives us the right intrinsic but we
3720     // still need to do the swap.
3721     std::swap(Ops[0], Ops[1]);
3722     break;
3723   }
3724
3725   assert(Int && "Generic code assumes a valid intrinsic");
3726
3727   // Determine the type(s) of this overloaded AArch64 intrinsic.
3728   const Expr *Arg = E->getArg(0);
3729   llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
3730   Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
3731
3732   int j = 0;
3733   ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
3734   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
3735        ai != ae; ++ai, ++j) {
3736     llvm::Type *ArgTy = ai->getType();
3737     if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
3738              ArgTy->getPrimitiveSizeInBits())
3739       continue;
3740
3741     assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
3742     // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
3743     // it before inserting.
3744     Ops[j] =
3745         CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType());
3746     Ops[j] =
3747         CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
3748   }
3749
3750   Value *Result = CGF.EmitNeonCall(F, Ops, s);
3751   llvm::Type *ResultType = CGF.ConvertType(E->getType());
3752   if (ResultType->getPrimitiveSizeInBits() <
3753       Result->getType()->getPrimitiveSizeInBits())
3754     return CGF.Builder.CreateExtractElement(Result, C0);
3755
3756   return CGF.Builder.CreateBitCast(Result, ResultType, s);
3757 }
3758
3759 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
3760     unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
3761     const char *NameHint, unsigned Modifier, const CallExpr *E,
3762     SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1) {
3763   // Get the last argument, which specifies the vector type.
3764   llvm::APSInt NeonTypeConst;
3765   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
3766   if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
3767     return nullptr;
3768
3769   // Determine the type of this overloaded NEON intrinsic.
3770   NeonTypeFlags Type(NeonTypeConst.getZExtValue());
3771   bool Usgn = Type.isUnsigned();
3772   bool Quad = Type.isQuad();
3773
3774   llvm::VectorType *VTy = GetNeonType(this, Type);
3775   llvm::Type *Ty = VTy;
3776   if (!Ty)
3777     return nullptr;
3778
3779   auto getAlignmentValue32 = [&](Address addr) -> Value* {
3780     return Builder.getInt32(addr.getAlignment().getQuantity());
3781   };
3782
3783   unsigned Int = LLVMIntrinsic;
3784   if ((Modifier & UnsignedAlts) && !Usgn)
3785     Int = AltLLVMIntrinsic;
3786
3787   switch (BuiltinID) {
3788   default: break;
3789   case NEON::BI__builtin_neon_vabs_v:
3790   case NEON::BI__builtin_neon_vabsq_v:
3791     if (VTy->getElementType()->isFloatingPointTy())
3792       return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
3793     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
3794   case NEON::BI__builtin_neon_vaddhn_v: {
3795     llvm::VectorType *SrcTy =
3796         llvm::VectorType::getExtendedElementVectorType(VTy);
3797
3798     // %sum = add <4 x i32> %lhs, %rhs
3799     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3800     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
3801     Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
3802
3803     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
3804     Constant *ShiftAmt =
3805         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
3806     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
3807
3808     // %res = trunc <4 x i32> %high to <4 x i16>
3809     return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
3810   }
3811   case NEON::BI__builtin_neon_vcale_v:
3812   case NEON::BI__builtin_neon_vcaleq_v:
3813   case NEON::BI__builtin_neon_vcalt_v:
3814   case NEON::BI__builtin_neon_vcaltq_v:
3815     std::swap(Ops[0], Ops[1]);
3816   case NEON::BI__builtin_neon_vcage_v:
3817   case NEON::BI__builtin_neon_vcageq_v:
3818   case NEON::BI__builtin_neon_vcagt_v:
3819   case NEON::BI__builtin_neon_vcagtq_v: {
3820     llvm::Type *VecFlt = llvm::VectorType::get(
3821         VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy,
3822         VTy->getNumElements());
3823     llvm::Type *Tys[] = { VTy, VecFlt };
3824     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3825     return EmitNeonCall(F, Ops, NameHint);
3826   }
3827   case NEON::BI__builtin_neon_vclz_v:
3828   case NEON::BI__builtin_neon_vclzq_v:
3829     // We generate target-independent intrinsic, which needs a second argument
3830     // for whether or not clz of zero is undefined; on ARM it isn't.
3831     Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
3832     break;
3833   case NEON::BI__builtin_neon_vcvt_f32_v:
3834   case NEON::BI__builtin_neon_vcvtq_f32_v:
3835     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3836     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad));
3837     return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
3838                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
3839   case NEON::BI__builtin_neon_vcvt_n_f32_v:
3840   case NEON::BI__builtin_neon_vcvt_n_f64_v:
3841   case NEON::BI__builtin_neon_vcvtq_n_f32_v:
3842   case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
3843     llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
3844     Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
3845     Function *F = CGM.getIntrinsic(Int, Tys);
3846     return EmitNeonCall(F, Ops, "vcvt_n");
3847   }
3848   case NEON::BI__builtin_neon_vcvt_n_s32_v:
3849   case NEON::BI__builtin_neon_vcvt_n_u32_v:
3850   case NEON::BI__builtin_neon_vcvt_n_s64_v:
3851   case NEON::BI__builtin_neon_vcvt_n_u64_v:
3852   case NEON::BI__builtin_neon_vcvtq_n_s32_v:
3853   case NEON::BI__builtin_neon_vcvtq_n_u32_v:
3854   case NEON::BI__builtin_neon_vcvtq_n_s64_v:
3855   case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
3856     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
3857     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3858     return EmitNeonCall(F, Ops, "vcvt_n");
3859   }
3860   case NEON::BI__builtin_neon_vcvt_s32_v:
3861   case NEON::BI__builtin_neon_vcvt_u32_v:
3862   case NEON::BI__builtin_neon_vcvt_s64_v:
3863   case NEON::BI__builtin_neon_vcvt_u64_v:
3864   case NEON::BI__builtin_neon_vcvtq_s32_v:
3865   case NEON::BI__builtin_neon_vcvtq_u32_v:
3866   case NEON::BI__builtin_neon_vcvtq_s64_v:
3867   case NEON::BI__builtin_neon_vcvtq_u64_v: {
3868     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
3869     return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
3870                 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
3871   }
3872   case NEON::BI__builtin_neon_vcvta_s32_v:
3873   case NEON::BI__builtin_neon_vcvta_s64_v:
3874   case NEON::BI__builtin_neon_vcvta_u32_v:
3875   case NEON::BI__builtin_neon_vcvta_u64_v:
3876   case NEON::BI__builtin_neon_vcvtaq_s32_v:
3877   case NEON::BI__builtin_neon_vcvtaq_s64_v:
3878   case NEON::BI__builtin_neon_vcvtaq_u32_v:
3879   case NEON::BI__builtin_neon_vcvtaq_u64_v:
3880   case NEON::BI__builtin_neon_vcvtn_s32_v:
3881   case NEON::BI__builtin_neon_vcvtn_s64_v:
3882   case NEON::BI__builtin_neon_vcvtn_u32_v:
3883   case NEON::BI__builtin_neon_vcvtn_u64_v:
3884   case NEON::BI__builtin_neon_vcvtnq_s32_v:
3885   case NEON::BI__builtin_neon_vcvtnq_s64_v:
3886   case NEON::BI__builtin_neon_vcvtnq_u32_v:
3887   case NEON::BI__builtin_neon_vcvtnq_u64_v:
3888   case NEON::BI__builtin_neon_vcvtp_s32_v:
3889   case NEON::BI__builtin_neon_vcvtp_s64_v:
3890   case NEON::BI__builtin_neon_vcvtp_u32_v:
3891   case NEON::BI__builtin_neon_vcvtp_u64_v:
3892   case NEON::BI__builtin_neon_vcvtpq_s32_v:
3893   case NEON::BI__builtin_neon_vcvtpq_s64_v:
3894   case NEON::BI__builtin_neon_vcvtpq_u32_v:
3895   case NEON::BI__builtin_neon_vcvtpq_u64_v:
3896   case NEON::BI__builtin_neon_vcvtm_s32_v:
3897   case NEON::BI__builtin_neon_vcvtm_s64_v:
3898   case NEON::BI__builtin_neon_vcvtm_u32_v:
3899   case NEON::BI__builtin_neon_vcvtm_u64_v:
3900   case NEON::BI__builtin_neon_vcvtmq_s32_v:
3901   case NEON::BI__builtin_neon_vcvtmq_s64_v:
3902   case NEON::BI__builtin_neon_vcvtmq_u32_v:
3903   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
3904     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
3905     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
3906   }
3907   case NEON::BI__builtin_neon_vext_v:
3908   case NEON::BI__builtin_neon_vextq_v: {
3909     int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
3910     SmallVector<uint32_t, 16> Indices;
3911     for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
3912       Indices.push_back(i+CV);
3913
3914     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3915     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3916     return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
3917   }
3918   case NEON::BI__builtin_neon_vfma_v:
3919   case NEON::BI__builtin_neon_vfmaq_v: {
3920     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
3921     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3922     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3923     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3924
3925     // NEON intrinsic puts accumulator first, unlike the LLVM fma.
3926     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
3927   }
3928   case NEON::BI__builtin_neon_vld1_v:
3929   case NEON::BI__builtin_neon_vld1q_v: {
3930     llvm::Type *Tys[] = {Ty, Int8PtrTy};
3931     Ops.push_back(getAlignmentValue32(PtrOp0));
3932     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
3933   }
3934   case NEON::BI__builtin_neon_vld2_v:
3935   case NEON::BI__builtin_neon_vld2q_v:
3936   case NEON::BI__builtin_neon_vld3_v:
3937   case NEON::BI__builtin_neon_vld3q_v:
3938   case NEON::BI__builtin_neon_vld4_v:
3939   case NEON::BI__builtin_neon_vld4q_v: {
3940     llvm::Type *Tys[] = {Ty, Int8PtrTy};
3941     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3942     Value *Align = getAlignmentValue32(PtrOp1);
3943     Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
3944     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3945     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3946     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
3947   }
3948   case NEON::BI__builtin_neon_vld1_dup_v:
3949   case NEON::BI__builtin_neon_vld1q_dup_v: {
3950     Value *V = UndefValue::get(Ty);
3951     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
3952     PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty);
3953     LoadInst *Ld = Builder.CreateLoad(PtrOp0);
3954     llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
3955     Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
3956     return EmitNeonSplat(Ops[0], CI);
3957   }
3958   case NEON::BI__builtin_neon_vld2_lane_v:
3959   case NEON::BI__builtin_neon_vld2q_lane_v:
3960   case NEON::BI__builtin_neon_vld3_lane_v:
3961   case NEON::BI__builtin_neon_vld3q_lane_v:
3962   case NEON::BI__builtin_neon_vld4_lane_v:
3963   case NEON::BI__builtin_neon_vld4q_lane_v: {
3964     llvm::Type *Tys[] = {Ty, Int8PtrTy};
3965     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3966     for (unsigned I = 2; I < Ops.size() - 1; ++I)
3967       Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
3968     Ops.push_back(getAlignmentValue32(PtrOp1));
3969     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
3970     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3971     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3972     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
3973   }
3974   case NEON::BI__builtin_neon_vmovl_v: {
3975     llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
3976     Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
3977     if (Usgn)
3978       return Builder.CreateZExt(Ops[0], Ty, "vmovl");
3979     return Builder.CreateSExt(Ops[0], Ty, "vmovl");
3980   }
3981   case NEON::BI__builtin_neon_vmovn_v: {
3982     llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
3983     Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
3984     return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
3985   }
3986   case NEON::BI__builtin_neon_vmull_v:
3987     // FIXME: the integer vmull operations could be emitted in terms of pure
3988     // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
3989     // hoisting the exts outside loops. Until global ISel comes along that can
3990     // see through such movement this leads to bad CodeGen. So we need an
3991     // intrinsic for now.
3992     Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
3993     Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
3994     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
3995   case NEON::BI__builtin_neon_vpadal_v:
3996   case NEON::BI__builtin_neon_vpadalq_v: {
3997     // The source operand type has twice as many elements of half the size.
3998     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
3999     llvm::Type *EltTy =
4000       llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
4001     llvm::Type *NarrowTy =
4002       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
4003     llvm::Type *Tys[2] = { Ty, NarrowTy };
4004     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
4005   }
4006   case NEON::BI__builtin_neon_vpaddl_v:
4007   case NEON::BI__builtin_neon_vpaddlq_v: {
4008     // The source operand type has twice as many elements of half the size.
4009     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
4010     llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
4011     llvm::Type *NarrowTy =
4012       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
4013     llvm::Type *Tys[2] = { Ty, NarrowTy };
4014     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
4015   }
4016   case NEON::BI__builtin_neon_vqdmlal_v:
4017   case NEON::BI__builtin_neon_vqdmlsl_v: {
4018     SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
4019     Ops[1] =
4020         EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
4021     Ops.resize(2);
4022     return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
4023   }
4024   case NEON::BI__builtin_neon_vqshl_n_v:
4025   case NEON::BI__builtin_neon_vqshlq_n_v:
4026     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
4027                         1, false);
4028   case NEON::BI__builtin_neon_vqshlu_n_v:
4029   case NEON::BI__builtin_neon_vqshluq_n_v:
4030     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
4031                         1, false);
4032   case NEON::BI__builtin_neon_vrecpe_v:
4033   case NEON::BI__builtin_neon_vrecpeq_v:
4034   case NEON::BI__builtin_neon_vrsqrte_v:
4035   case NEON::BI__builtin_neon_vrsqrteq_v:
4036     Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
4037     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
4038
4039   case NEON::BI__builtin_neon_vrshr_n_v:
4040   case NEON::BI__builtin_neon_vrshrq_n_v:
4041     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
4042                         1, true);
4043   case NEON::BI__builtin_neon_vshl_n_v:
4044   case NEON::BI__builtin_neon_vshlq_n_v:
4045     Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
4046     return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
4047                              "vshl_n");
4048   case NEON::BI__builtin_neon_vshll_n_v: {
4049     llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
4050     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4051     if (Usgn)
4052       Ops[0] = Builder.CreateZExt(Ops[0], VTy);
4053     else
4054       Ops[0] = Builder.CreateSExt(Ops[0], VTy);
4055     Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
4056     return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
4057   }
4058   case NEON::BI__builtin_neon_vshrn_n_v: {
4059     llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
4060     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4061     Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
4062     if (Usgn)
4063       Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
4064     else
4065       Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
4066     return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
4067   }
4068   case NEON::BI__builtin_neon_vshr_n_v:
4069   case NEON::BI__builtin_neon_vshrq_n_v:
4070     return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
4071   case NEON::BI__builtin_neon_vst1_v:
4072   case NEON::BI__builtin_neon_vst1q_v:
4073   case NEON::BI__builtin_neon_vst2_v:
4074   case NEON::BI__builtin_neon_vst2q_v:
4075   case NEON::BI__builtin_neon_vst3_v:
4076   case NEON::BI__builtin_neon_vst3q_v:
4077   case NEON::BI__builtin_neon_vst4_v:
4078   case NEON::BI__builtin_neon_vst4q_v:
4079   case NEON::BI__builtin_neon_vst2_lane_v:
4080   case NEON::BI__builtin_neon_vst2q_lane_v:
4081   case NEON::BI__builtin_neon_vst3_lane_v:
4082   case NEON::BI__builtin_neon_vst3q_lane_v:
4083   case NEON::BI__builtin_neon_vst4_lane_v:
4084   case NEON::BI__builtin_neon_vst4q_lane_v: {
4085     llvm::Type *Tys[] = {Int8PtrTy, Ty};
4086     Ops.push_back(getAlignmentValue32(PtrOp0));
4087     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
4088   }
4089   case NEON::BI__builtin_neon_vsubhn_v: {
4090     llvm::VectorType *SrcTy =
4091         llvm::VectorType::getExtendedElementVectorType(VTy);
4092
4093     // %sum = add <4 x i32> %lhs, %rhs
4094     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4095     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
4096     Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
4097
4098     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
4099     Constant *ShiftAmt =
4100         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
4101     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
4102
4103     // %res = trunc <4 x i32> %high to <4 x i16>
4104     return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
4105   }
4106   case NEON::BI__builtin_neon_vtrn_v:
4107   case NEON::BI__builtin_neon_vtrnq_v: {
4108     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4109     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4110     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4111     Value *SV = nullptr;
4112
4113     for (unsigned vi = 0; vi != 2; ++vi) {
4114       SmallVector<uint32_t, 16> Indices;
4115       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
4116         Indices.push_back(i+vi);
4117         Indices.push_back(i+e+vi);
4118       }
4119       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4120       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
4121       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4122     }
4123     return SV;
4124   }
4125   case NEON::BI__builtin_neon_vtst_v:
4126   case NEON::BI__builtin_neon_vtstq_v: {
4127     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4128     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4129     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
4130     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
4131                                 ConstantAggregateZero::get(Ty));
4132     return Builder.CreateSExt(Ops[0], Ty, "vtst");
4133   }
4134   case NEON::BI__builtin_neon_vuzp_v:
4135   case NEON::BI__builtin_neon_vuzpq_v: {
4136     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4137     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4138     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4139     Value *SV = nullptr;
4140
4141     for (unsigned vi = 0; vi != 2; ++vi) {
4142       SmallVector<uint32_t, 16> Indices;
4143       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
4144         Indices.push_back(2*i+vi);
4145
4146       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4147       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
4148       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4149     }
4150     return SV;
4151   }
4152   case NEON::BI__builtin_neon_vzip_v:
4153   case NEON::BI__builtin_neon_vzipq_v: {
4154     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4155     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4156     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4157     Value *SV = nullptr;
4158
4159     for (unsigned vi = 0; vi != 2; ++vi) {
4160       SmallVector<uint32_t, 16> Indices;
4161       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
4162         Indices.push_back((i + vi*e) >> 1);
4163         Indices.push_back(((i + vi*e) >> 1)+e);
4164       }
4165       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4166       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
4167       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4168     }
4169     return SV;
4170   }
4171   }
4172
4173   assert(Int && "Expected valid intrinsic number");
4174
4175   // Determine the type(s) of this overloaded AArch64 intrinsic.
4176   Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
4177
4178   Value *Result = EmitNeonCall(F, Ops, NameHint);
4179   llvm::Type *ResultType = ConvertType(E->getType());
4180   // AArch64 intrinsic one-element vector type cast to
4181   // scalar type expected by the builtin
4182   return Builder.CreateBitCast(Result, ResultType, NameHint);
4183 }
4184
4185 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
4186     Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
4187     const CmpInst::Predicate Ip, const Twine &Name) {
4188   llvm::Type *OTy = Op->getType();
4189
4190   // FIXME: this is utterly horrific. We should not be looking at previous
4191   // codegen context to find out what needs doing. Unfortunately TableGen
4192   // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
4193   // (etc).
4194   if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
4195     OTy = BI->getOperand(0)->getType();
4196
4197   Op = Builder.CreateBitCast(Op, OTy);
4198   if (OTy->getScalarType()->isFloatingPointTy()) {
4199     Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
4200   } else {
4201     Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
4202   }
4203   return Builder.CreateSExt(Op, Ty, Name);
4204 }
4205
4206 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
4207                                  Value *ExtOp, Value *IndexOp,
4208                                  llvm::Type *ResTy, unsigned IntID,
4209                                  const char *Name) {
4210   SmallVector<Value *, 2> TblOps;
4211   if (ExtOp)
4212     TblOps.push_back(ExtOp);
4213
4214   // Build a vector containing sequential number like (0, 1, 2, ..., 15)
4215   SmallVector<uint32_t, 16> Indices;
4216   llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
4217   for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
4218     Indices.push_back(2*i);
4219     Indices.push_back(2*i+1);
4220   }
4221
4222   int PairPos = 0, End = Ops.size() - 1;
4223   while (PairPos < End) {
4224     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
4225                                                      Ops[PairPos+1], Indices,
4226                                                      Name));
4227     PairPos += 2;
4228   }
4229
4230   // If there's an odd number of 64-bit lookup table, fill the high 64-bit
4231   // of the 128-bit lookup table with zero.
4232   if (PairPos == End) {
4233     Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
4234     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
4235                                                      ZeroTbl, Indices, Name));
4236   }
4237
4238   Function *TblF;
4239   TblOps.push_back(IndexOp);
4240   TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
4241
4242   return CGF.EmitNeonCall(TblF, TblOps, Name);
4243 }
4244
4245 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
4246   unsigned Value;
4247   switch (BuiltinID) {
4248   default:
4249     return nullptr;
4250   case ARM::BI__builtin_arm_nop:
4251     Value = 0;
4252     break;
4253   case ARM::BI__builtin_arm_yield:
4254   case ARM::BI__yield:
4255     Value = 1;
4256     break;
4257   case ARM::BI__builtin_arm_wfe:
4258   case ARM::BI__wfe:
4259     Value = 2;
4260     break;
4261   case ARM::BI__builtin_arm_wfi:
4262   case ARM::BI__wfi:
4263     Value = 3;
4264     break;
4265   case ARM::BI__builtin_arm_sev:
4266   case ARM::BI__sev:
4267     Value = 4;
4268     break;
4269   case ARM::BI__builtin_arm_sevl:
4270   case ARM::BI__sevl:
4271     Value = 5;
4272     break;
4273   }
4274
4275   return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
4276                             llvm::ConstantInt::get(Int32Ty, Value));
4277 }
4278
4279 // Generates the IR for the read/write special register builtin,
4280 // ValueType is the type of the value that is to be written or read,
4281 // RegisterType is the type of the register being written to or read from.
4282 static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
4283                                          const CallExpr *E,
4284                                          llvm::Type *RegisterType,
4285                                          llvm::Type *ValueType,
4286                                          bool IsRead,
4287                                          StringRef SysReg = "") {
4288   // write and register intrinsics only support 32 and 64 bit operations.
4289   assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
4290           && "Unsupported size for register.");
4291
4292   CodeGen::CGBuilderTy &Builder = CGF.Builder;
4293   CodeGen::CodeGenModule &CGM = CGF.CGM;
4294   LLVMContext &Context = CGM.getLLVMContext();
4295
4296   if (SysReg.empty()) {
4297     const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
4298     SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
4299   }
4300
4301   llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
4302   llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4303   llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4304
4305   llvm::Type *Types[] = { RegisterType };
4306
4307   bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
4308   assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
4309             && "Can't fit 64-bit value in 32-bit register");
4310
4311   if (IsRead) {
4312     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
4313     llvm::Value *Call = Builder.CreateCall(F, Metadata);
4314
4315     if (MixedTypes)
4316       // Read into 64 bit register and then truncate result to 32 bit.
4317       return Builder.CreateTrunc(Call, ValueType);
4318
4319     if (ValueType->isPointerTy())
4320       // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
4321       return Builder.CreateIntToPtr(Call, ValueType);
4322
4323     return Call;
4324   }
4325
4326   llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
4327   llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
4328   if (MixedTypes) {
4329     // Extend 32 bit write value to 64 bit to pass to write.
4330     ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
4331     return Builder.CreateCall(F, { Metadata, ArgValue });
4332   }
4333
4334   if (ValueType->isPointerTy()) {
4335     // Have VoidPtrTy ArgValue but want to return an i32/i64.
4336     ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
4337     return Builder.CreateCall(F, { Metadata, ArgValue });
4338   }
4339
4340   return Builder.CreateCall(F, { Metadata, ArgValue });
4341 }
4342
4343 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
4344 /// argument that specifies the vector type.
4345 static bool HasExtraNeonArgument(unsigned BuiltinID) {
4346   switch (BuiltinID) {
4347   default: break;
4348   case NEON::BI__builtin_neon_vget_lane_i8:
4349   case NEON::BI__builtin_neon_vget_lane_i16:
4350   case NEON::BI__builtin_neon_vget_lane_i32:
4351   case NEON::BI__builtin_neon_vget_lane_i64:
4352   case NEON::BI__builtin_neon_vget_lane_f32:
4353   case NEON::BI__builtin_neon_vgetq_lane_i8:
4354   case NEON::BI__builtin_neon_vgetq_lane_i16:
4355   case NEON::BI__builtin_neon_vgetq_lane_i32:
4356   case NEON::BI__builtin_neon_vgetq_lane_i64:
4357   case NEON::BI__builtin_neon_vgetq_lane_f32:
4358   case NEON::BI__builtin_neon_vset_lane_i8:
4359   case NEON::BI__builtin_neon_vset_lane_i16:
4360   case NEON::BI__builtin_neon_vset_lane_i32:
4361   case NEON::BI__builtin_neon_vset_lane_i64:
4362   case NEON::BI__builtin_neon_vset_lane_f32:
4363   case NEON::BI__builtin_neon_vsetq_lane_i8:
4364   case NEON::BI__builtin_neon_vsetq_lane_i16:
4365   case NEON::BI__builtin_neon_vsetq_lane_i32:
4366   case NEON::BI__builtin_neon_vsetq_lane_i64:
4367   case NEON::BI__builtin_neon_vsetq_lane_f32:
4368   case NEON::BI__builtin_neon_vsha1h_u32:
4369   case NEON::BI__builtin_neon_vsha1cq_u32:
4370   case NEON::BI__builtin_neon_vsha1pq_u32:
4371   case NEON::BI__builtin_neon_vsha1mq_u32:
4372   case ARM::BI_MoveToCoprocessor:
4373   case ARM::BI_MoveToCoprocessor2:
4374     return false;
4375   }
4376   return true;
4377 }
4378
4379 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
4380                                            const CallExpr *E) {
4381   if (auto Hint = GetValueForARMHint(BuiltinID))
4382     return Hint;
4383
4384   if (BuiltinID == ARM::BI__emit) {
4385     bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
4386     llvm::FunctionType *FTy =
4387         llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
4388
4389     APSInt Value;
4390     if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext()))
4391       llvm_unreachable("Sema will ensure that the parameter is constant");
4392
4393     uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
4394
4395     llvm::InlineAsm *Emit =
4396         IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
4397                                  /*SideEffects=*/true)
4398                 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
4399                                  /*SideEffects=*/true);
4400
4401     return Builder.CreateCall(Emit);
4402   }
4403
4404   if (BuiltinID == ARM::BI__builtin_arm_dbg) {
4405     Value *Option = EmitScalarExpr(E->getArg(0));
4406     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
4407   }
4408
4409   if (BuiltinID == ARM::BI__builtin_arm_prefetch) {
4410     Value *Address = EmitScalarExpr(E->getArg(0));
4411     Value *RW      = EmitScalarExpr(E->getArg(1));
4412     Value *IsData  = EmitScalarExpr(E->getArg(2));
4413
4414     // Locality is not supported on ARM target
4415     Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
4416
4417     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
4418     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
4419   }
4420
4421   if (BuiltinID == ARM::BI__builtin_arm_rbit) {
4422     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4423     return Builder.CreateCall(
4424         CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
4425   }
4426
4427   if (BuiltinID == ARM::BI__clear_cache) {
4428     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
4429     const FunctionDecl *FD = E->getDirectCallee();
4430     Value *Ops[2];
4431     for (unsigned i = 0; i < 2; i++)
4432       Ops[i] = EmitScalarExpr(E->getArg(i));
4433     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
4434     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
4435     StringRef Name = FD->getName();
4436     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
4437   }
4438
4439   if (BuiltinID == ARM::BI__builtin_arm_mcrr ||
4440       BuiltinID == ARM::BI__builtin_arm_mcrr2) {
4441     Function *F;
4442
4443     switch (BuiltinID) {
4444     default: llvm_unreachable("unexpected builtin");
4445     case ARM::BI__builtin_arm_mcrr:
4446       F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
4447       break;
4448     case ARM::BI__builtin_arm_mcrr2:
4449       F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
4450       break;
4451     }
4452
4453     // MCRR{2} instruction has 5 operands but
4454     // the intrinsic has 4 because Rt and Rt2
4455     // are represented as a single unsigned 64
4456     // bit integer in the intrinsic definition
4457     // but internally it's represented as 2 32
4458     // bit integers.
4459
4460     Value *Coproc = EmitScalarExpr(E->getArg(0));
4461     Value *Opc1 = EmitScalarExpr(E->getArg(1));
4462     Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
4463     Value *CRm = EmitScalarExpr(E->getArg(3));
4464
4465     Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4466     Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
4467     Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
4468     Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
4469
4470     return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
4471   }
4472
4473   if (BuiltinID == ARM::BI__builtin_arm_mrrc ||
4474       BuiltinID == ARM::BI__builtin_arm_mrrc2) {
4475     Function *F;
4476
4477     switch (BuiltinID) {
4478     default: llvm_unreachable("unexpected builtin");
4479     case ARM::BI__builtin_arm_mrrc:
4480       F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
4481       break;
4482     case ARM::BI__builtin_arm_mrrc2:
4483       F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
4484       break;
4485     }
4486
4487     Value *Coproc = EmitScalarExpr(E->getArg(0));
4488     Value *Opc1 = EmitScalarExpr(E->getArg(1));
4489     Value *CRm  = EmitScalarExpr(E->getArg(2));
4490     Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
4491
4492     // Returns an unsigned 64 bit integer, represented
4493     // as two 32 bit integers.
4494
4495     Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
4496     Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
4497     Rt = Builder.CreateZExt(Rt, Int64Ty);
4498     Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
4499
4500     Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
4501     RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
4502     RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
4503
4504     return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
4505   }
4506
4507   if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
4508       ((BuiltinID == ARM::BI__builtin_arm_ldrex ||
4509         BuiltinID == ARM::BI__builtin_arm_ldaex) &&
4510        getContext().getTypeSize(E->getType()) == 64) ||
4511       BuiltinID == ARM::BI__ldrexd) {
4512     Function *F;
4513
4514     switch (BuiltinID) {
4515     default: llvm_unreachable("unexpected builtin");
4516     case ARM::BI__builtin_arm_ldaex:
4517       F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
4518       break;
4519     case ARM::BI__builtin_arm_ldrexd:
4520     case ARM::BI__builtin_arm_ldrex:
4521     case ARM::BI__ldrexd:
4522       F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
4523       break;
4524     }
4525
4526     Value *LdPtr = EmitScalarExpr(E->getArg(0));
4527     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
4528                                     "ldrexd");
4529
4530     Value *Val0 = Builder.CreateExtractValue(Val, 1);
4531     Value *Val1 = Builder.CreateExtractValue(Val, 0);
4532     Val0 = Builder.CreateZExt(Val0, Int64Ty);
4533     Val1 = Builder.CreateZExt(Val1, Int64Ty);
4534
4535     Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
4536     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
4537     Val = Builder.CreateOr(Val, Val1);
4538     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
4539   }
4540
4541   if (BuiltinID == ARM::BI__builtin_arm_ldrex ||
4542       BuiltinID == ARM::BI__builtin_arm_ldaex) {
4543     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
4544
4545     QualType Ty = E->getType();
4546     llvm::Type *RealResTy = ConvertType(Ty);
4547     llvm::Type *PtrTy = llvm::IntegerType::get(
4548         getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
4549     LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
4550
4551     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex
4552                                        ? Intrinsic::arm_ldaex
4553                                        : Intrinsic::arm_ldrex,
4554                                    PtrTy);
4555     Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
4556
4557     if (RealResTy->isPointerTy())
4558       return Builder.CreateIntToPtr(Val, RealResTy);
4559     else {
4560       llvm::Type *IntResTy = llvm::IntegerType::get(
4561           getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
4562       Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
4563       return Builder.CreateBitCast(Val, RealResTy);
4564     }
4565   }
4566
4567   if (BuiltinID == ARM::BI__builtin_arm_strexd ||
4568       ((BuiltinID == ARM::BI__builtin_arm_stlex ||
4569         BuiltinID == ARM::BI__builtin_arm_strex) &&
4570        getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
4571     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4572                                        ? Intrinsic::arm_stlexd
4573                                        : Intrinsic::arm_strexd);
4574     llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
4575
4576     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
4577     Value *Val = EmitScalarExpr(E->getArg(0));
4578     Builder.CreateStore(Val, Tmp);
4579
4580     Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
4581     Val = Builder.CreateLoad(LdPtr);
4582
4583     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
4584     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
4585     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
4586     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
4587   }
4588
4589   if (BuiltinID == ARM::BI__builtin_arm_strex ||
4590       BuiltinID == ARM::BI__builtin_arm_stlex) {
4591     Value *StoreVal = EmitScalarExpr(E->getArg(0));
4592     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
4593
4594     QualType Ty = E->getArg(0)->getType();
4595     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
4596                                                  getContext().getTypeSize(Ty));
4597     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
4598
4599     if (StoreVal->getType()->isPointerTy())
4600       StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
4601     else {
4602       llvm::Type *IntTy = llvm::IntegerType::get(
4603           getLLVMContext(),
4604           CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
4605       StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
4606       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
4607     }
4608
4609     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4610                                        ? Intrinsic::arm_stlex
4611                                        : Intrinsic::arm_strex,
4612                                    StoreAddr->getType());
4613     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
4614   }
4615
4616   switch (BuiltinID) {
4617   case ARM::BI__iso_volatile_load8:
4618   case ARM::BI__iso_volatile_load16:
4619   case ARM::BI__iso_volatile_load32:
4620   case ARM::BI__iso_volatile_load64: {
4621     Value *Ptr = EmitScalarExpr(E->getArg(0));
4622     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4623     CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy);
4624     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
4625                                              LoadSize.getQuantity() * 8);
4626     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
4627     llvm::LoadInst *Load =
4628       Builder.CreateAlignedLoad(Ptr, LoadSize);
4629     Load->setVolatile(true);
4630     return Load;
4631   }
4632   case ARM::BI__iso_volatile_store8:
4633   case ARM::BI__iso_volatile_store16:
4634   case ARM::BI__iso_volatile_store32:
4635   case ARM::BI__iso_volatile_store64: {
4636     Value *Ptr = EmitScalarExpr(E->getArg(0));
4637     Value *Value = EmitScalarExpr(E->getArg(1));
4638     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4639     CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
4640     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
4641                                              StoreSize.getQuantity() * 8);
4642     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
4643     llvm::StoreInst *Store =
4644       Builder.CreateAlignedStore(Value, Ptr,
4645                                  StoreSize);
4646     Store->setVolatile(true);
4647     return Store;
4648   }
4649   }
4650
4651   if (BuiltinID == ARM::BI__builtin_arm_clrex) {
4652     Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
4653     return Builder.CreateCall(F);
4654   }
4655
4656   // CRC32
4657   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
4658   switch (BuiltinID) {
4659   case ARM::BI__builtin_arm_crc32b:
4660     CRCIntrinsicID = Intrinsic::arm_crc32b; break;
4661   case ARM::BI__builtin_arm_crc32cb:
4662     CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
4663   case ARM::BI__builtin_arm_crc32h:
4664     CRCIntrinsicID = Intrinsic::arm_crc32h; break;
4665   case ARM::BI__builtin_arm_crc32ch:
4666     CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
4667   case ARM::BI__builtin_arm_crc32w:
4668   case ARM::BI__builtin_arm_crc32d:
4669     CRCIntrinsicID = Intrinsic::arm_crc32w; break;
4670   case ARM::BI__builtin_arm_crc32cw:
4671   case ARM::BI__builtin_arm_crc32cd:
4672     CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
4673   }
4674
4675   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
4676     Value *Arg0 = EmitScalarExpr(E->getArg(0));
4677     Value *Arg1 = EmitScalarExpr(E->getArg(1));
4678
4679     // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w
4680     // intrinsics, hence we need different codegen for these cases.
4681     if (BuiltinID == ARM::BI__builtin_arm_crc32d ||
4682         BuiltinID == ARM::BI__builtin_arm_crc32cd) {
4683       Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4684       Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
4685       Value *Arg1b = Builder.CreateLShr(Arg1, C1);
4686       Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
4687
4688       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4689       Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
4690       return Builder.CreateCall(F, {Res, Arg1b});
4691     } else {
4692       Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
4693
4694       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4695       return Builder.CreateCall(F, {Arg0, Arg1});
4696     }
4697   }
4698
4699   if (BuiltinID == ARM::BI__builtin_arm_rsr ||
4700       BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4701       BuiltinID == ARM::BI__builtin_arm_rsrp ||
4702       BuiltinID == ARM::BI__builtin_arm_wsr ||
4703       BuiltinID == ARM::BI__builtin_arm_wsr64 ||
4704       BuiltinID == ARM::BI__builtin_arm_wsrp) {
4705
4706     bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr ||
4707                   BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4708                   BuiltinID == ARM::BI__builtin_arm_rsrp;
4709
4710     bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
4711                             BuiltinID == ARM::BI__builtin_arm_wsrp;
4712
4713     bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4714                    BuiltinID == ARM::BI__builtin_arm_wsr64;
4715
4716     llvm::Type *ValueType;
4717     llvm::Type *RegisterType;
4718     if (IsPointerBuiltin) {
4719       ValueType = VoidPtrTy;
4720       RegisterType = Int32Ty;
4721     } else if (Is64Bit) {
4722       ValueType = RegisterType = Int64Ty;
4723     } else {
4724       ValueType = RegisterType = Int32Ty;
4725     }
4726
4727     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
4728   }
4729
4730   // Find out if any arguments are required to be integer constant
4731   // expressions.
4732   unsigned ICEArguments = 0;
4733   ASTContext::GetBuiltinTypeError Error;
4734   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
4735   assert(Error == ASTContext::GE_None && "Should not codegen an error");
4736
4737   auto getAlignmentValue32 = [&](Address addr) -> Value* {
4738     return Builder.getInt32(addr.getAlignment().getQuantity());
4739   };
4740
4741   Address PtrOp0 = Address::invalid();
4742   Address PtrOp1 = Address::invalid();
4743   SmallVector<Value*, 4> Ops;
4744   bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
4745   unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
4746   for (unsigned i = 0, e = NumArgs; i != e; i++) {
4747     if (i == 0) {
4748       switch (BuiltinID) {
4749       case NEON::BI__builtin_neon_vld1_v:
4750       case NEON::BI__builtin_neon_vld1q_v:
4751       case NEON::BI__builtin_neon_vld1q_lane_v:
4752       case NEON::BI__builtin_neon_vld1_lane_v:
4753       case NEON::BI__builtin_neon_vld1_dup_v:
4754       case NEON::BI__builtin_neon_vld1q_dup_v:
4755       case NEON::BI__builtin_neon_vst1_v:
4756       case NEON::BI__builtin_neon_vst1q_v:
4757       case NEON::BI__builtin_neon_vst1q_lane_v:
4758       case NEON::BI__builtin_neon_vst1_lane_v:
4759       case NEON::BI__builtin_neon_vst2_v:
4760       case NEON::BI__builtin_neon_vst2q_v:
4761       case NEON::BI__builtin_neon_vst2_lane_v:
4762       case NEON::BI__builtin_neon_vst2q_lane_v:
4763       case NEON::BI__builtin_neon_vst3_v:
4764       case NEON::BI__builtin_neon_vst3q_v:
4765       case NEON::BI__builtin_neon_vst3_lane_v:
4766       case NEON::BI__builtin_neon_vst3q_lane_v:
4767       case NEON::BI__builtin_neon_vst4_v:
4768       case NEON::BI__builtin_neon_vst4q_v:
4769       case NEON::BI__builtin_neon_vst4_lane_v:
4770       case NEON::BI__builtin_neon_vst4q_lane_v:
4771         // Get the alignment for the argument in addition to the value;
4772         // we'll use it later.
4773         PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
4774         Ops.push_back(PtrOp0.getPointer());
4775         continue;
4776       }
4777     }
4778     if (i == 1) {
4779       switch (BuiltinID) {
4780       case NEON::BI__builtin_neon_vld2_v:
4781       case NEON::BI__builtin_neon_vld2q_v:
4782       case NEON::BI__builtin_neon_vld3_v:
4783       case NEON::BI__builtin_neon_vld3q_v:
4784       case NEON::BI__builtin_neon_vld4_v:
4785       case NEON::BI__builtin_neon_vld4q_v:
4786       case NEON::BI__builtin_neon_vld2_lane_v:
4787       case NEON::BI__builtin_neon_vld2q_lane_v:
4788       case NEON::BI__builtin_neon_vld3_lane_v:
4789       case NEON::BI__builtin_neon_vld3q_lane_v:
4790       case NEON::BI__builtin_neon_vld4_lane_v:
4791       case NEON::BI__builtin_neon_vld4q_lane_v:
4792       case NEON::BI__builtin_neon_vld2_dup_v:
4793       case NEON::BI__builtin_neon_vld3_dup_v:
4794       case NEON::BI__builtin_neon_vld4_dup_v:
4795         // Get the alignment for the argument in addition to the value;
4796         // we'll use it later.
4797         PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
4798         Ops.push_back(PtrOp1.getPointer());
4799         continue;
4800       }
4801     }
4802
4803     if ((ICEArguments & (1 << i)) == 0) {
4804       Ops.push_back(EmitScalarExpr(E->getArg(i)));
4805     } else {
4806       // If this is required to be a constant, constant fold it so that we know
4807       // that the generated intrinsic gets a ConstantInt.
4808       llvm::APSInt Result;
4809       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
4810       assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
4811       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
4812     }
4813   }
4814
4815   switch (BuiltinID) {
4816   default: break;
4817
4818   case NEON::BI__builtin_neon_vget_lane_i8:
4819   case NEON::BI__builtin_neon_vget_lane_i16:
4820   case NEON::BI__builtin_neon_vget_lane_i32:
4821   case NEON::BI__builtin_neon_vget_lane_i64:
4822   case NEON::BI__builtin_neon_vget_lane_f32:
4823   case NEON::BI__builtin_neon_vgetq_lane_i8:
4824   case NEON::BI__builtin_neon_vgetq_lane_i16:
4825   case NEON::BI__builtin_neon_vgetq_lane_i32:
4826   case NEON::BI__builtin_neon_vgetq_lane_i64:
4827   case NEON::BI__builtin_neon_vgetq_lane_f32:
4828     return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
4829
4830   case NEON::BI__builtin_neon_vset_lane_i8:
4831   case NEON::BI__builtin_neon_vset_lane_i16:
4832   case NEON::BI__builtin_neon_vset_lane_i32:
4833   case NEON::BI__builtin_neon_vset_lane_i64:
4834   case NEON::BI__builtin_neon_vset_lane_f32:
4835   case NEON::BI__builtin_neon_vsetq_lane_i8:
4836   case NEON::BI__builtin_neon_vsetq_lane_i16:
4837   case NEON::BI__builtin_neon_vsetq_lane_i32:
4838   case NEON::BI__builtin_neon_vsetq_lane_i64:
4839   case NEON::BI__builtin_neon_vsetq_lane_f32:
4840     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
4841
4842   case NEON::BI__builtin_neon_vsha1h_u32:
4843     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
4844                         "vsha1h");
4845   case NEON::BI__builtin_neon_vsha1cq_u32:
4846     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
4847                         "vsha1h");
4848   case NEON::BI__builtin_neon_vsha1pq_u32:
4849     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
4850                         "vsha1h");
4851   case NEON::BI__builtin_neon_vsha1mq_u32:
4852     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
4853                         "vsha1h");
4854
4855   // The ARM _MoveToCoprocessor builtins put the input register value as
4856   // the first argument, but the LLVM intrinsic expects it as the third one.
4857   case ARM::BI_MoveToCoprocessor:
4858   case ARM::BI_MoveToCoprocessor2: {
4859     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
4860                                    Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
4861     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
4862                                   Ops[3], Ops[4], Ops[5]});
4863   }
4864   case ARM::BI_BitScanForward:
4865   case ARM::BI_BitScanForward64:
4866     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
4867   case ARM::BI_BitScanReverse:
4868   case ARM::BI_BitScanReverse64:
4869     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
4870
4871   case ARM::BI_InterlockedAnd64:
4872     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
4873   case ARM::BI_InterlockedExchange64:
4874     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
4875   case ARM::BI_InterlockedExchangeAdd64:
4876     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
4877   case ARM::BI_InterlockedExchangeSub64:
4878     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
4879   case ARM::BI_InterlockedOr64:
4880     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
4881   case ARM::BI_InterlockedXor64:
4882     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
4883   case ARM::BI_InterlockedDecrement64:
4884     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
4885   case ARM::BI_InterlockedIncrement64:
4886     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
4887   }
4888
4889   // Get the last argument, which specifies the vector type.
4890   assert(HasExtraArg);
4891   llvm::APSInt Result;
4892   const Expr *Arg = E->getArg(E->getNumArgs()-1);
4893   if (!Arg->isIntegerConstantExpr(Result, getContext()))
4894     return nullptr;
4895
4896   if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
4897       BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
4898     // Determine the overloaded type of this builtin.
4899     llvm::Type *Ty;
4900     if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
4901       Ty = FloatTy;
4902     else
4903       Ty = DoubleTy;
4904
4905     // Determine whether this is an unsigned conversion or not.
4906     bool usgn = Result.getZExtValue() == 1;
4907     unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
4908
4909     // Call the appropriate intrinsic.
4910     Function *F = CGM.getIntrinsic(Int, Ty);
4911     return Builder.CreateCall(F, Ops, "vcvtr");
4912   }
4913
4914   // Determine the type of this overloaded NEON intrinsic.
4915   NeonTypeFlags Type(Result.getZExtValue());
4916   bool usgn = Type.isUnsigned();
4917   bool rightShift = false;
4918
4919   llvm::VectorType *VTy = GetNeonType(this, Type);
4920   llvm::Type *Ty = VTy;
4921   if (!Ty)
4922     return nullptr;
4923
4924   // Many NEON builtins have identical semantics and uses in ARM and
4925   // AArch64. Emit these in a single function.
4926   auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap);
4927   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
4928       IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
4929   if (Builtin)
4930     return EmitCommonNeonBuiltinExpr(
4931         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
4932         Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1);
4933
4934   unsigned Int;
4935   switch (BuiltinID) {
4936   default: return nullptr;
4937   case NEON::BI__builtin_neon_vld1q_lane_v:
4938     // Handle 64-bit integer elements as a special case.  Use shuffles of
4939     // one-element vectors to avoid poor code for i64 in the backend.
4940     if (VTy->getElementType()->isIntegerTy(64)) {
4941       // Extract the other lane.
4942       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4943       uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
4944       Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
4945       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
4946       // Load the value as a one-element vector.
4947       Ty = llvm::VectorType::get(VTy->getElementType(), 1);
4948       llvm::Type *Tys[] = {Ty, Int8PtrTy};
4949       Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
4950       Value *Align = getAlignmentValue32(PtrOp0);
4951       Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
4952       // Combine them.
4953       uint32_t Indices[] = {1 - Lane, Lane};
4954       SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
4955       return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
4956     }
4957     // fall through
4958   case NEON::BI__builtin_neon_vld1_lane_v: {
4959     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4960     PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
4961     Value *Ld = Builder.CreateLoad(PtrOp0);
4962     return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
4963   }
4964   case NEON::BI__builtin_neon_vld2_dup_v:
4965   case NEON::BI__builtin_neon_vld3_dup_v:
4966   case NEON::BI__builtin_neon_vld4_dup_v: {
4967     // Handle 64-bit elements as a special-case.  There is no "dup" needed.
4968     if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
4969       switch (BuiltinID) {
4970       case NEON::BI__builtin_neon_vld2_dup_v:
4971         Int = Intrinsic::arm_neon_vld2;
4972         break;
4973       case NEON::BI__builtin_neon_vld3_dup_v:
4974         Int = Intrinsic::arm_neon_vld3;
4975         break;
4976       case NEON::BI__builtin_neon_vld4_dup_v:
4977         Int = Intrinsic::arm_neon_vld4;
4978         break;
4979       default: llvm_unreachable("unknown vld_dup intrinsic?");
4980       }
4981       llvm::Type *Tys[] = {Ty, Int8PtrTy};
4982       Function *F = CGM.getIntrinsic(Int, Tys);
4983       llvm::Value *Align = getAlignmentValue32(PtrOp1);
4984       Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup");
4985       Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4986       Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4987       return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4988     }
4989     switch (BuiltinID) {
4990     case NEON::BI__builtin_neon_vld2_dup_v:
4991       Int = Intrinsic::arm_neon_vld2lane;
4992       break;
4993     case NEON::BI__builtin_neon_vld3_dup_v:
4994       Int = Intrinsic::arm_neon_vld3lane;
4995       break;
4996     case NEON::BI__builtin_neon_vld4_dup_v:
4997       Int = Intrinsic::arm_neon_vld4lane;
4998       break;
4999     default: llvm_unreachable("unknown vld_dup intrinsic?");
5000     }
5001     llvm::Type *Tys[] = {Ty, Int8PtrTy};
5002     Function *F = CGM.getIntrinsic(Int, Tys);
5003     llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
5004
5005     SmallVector<Value*, 6> Args;
5006     Args.push_back(Ops[1]);
5007     Args.append(STy->getNumElements(), UndefValue::get(Ty));
5008
5009     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
5010     Args.push_back(CI);
5011     Args.push_back(getAlignmentValue32(PtrOp1));
5012
5013     Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
5014     // splat lane 0 to all elts in each vector of the result.
5015     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
5016       Value *Val = Builder.CreateExtractValue(Ops[1], i);
5017       Value *Elt = Builder.CreateBitCast(Val, Ty);
5018       Elt = EmitNeonSplat(Elt, CI);
5019       Elt = Builder.CreateBitCast(Elt, Val->getType());
5020       Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
5021     }
5022     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5023     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5024     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5025   }
5026   case NEON::BI__builtin_neon_vqrshrn_n_v:
5027     Int =
5028       usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
5029     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
5030                         1, true);
5031   case NEON::BI__builtin_neon_vqrshrun_n_v:
5032     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
5033                         Ops, "vqrshrun_n", 1, true);
5034   case NEON::BI__builtin_neon_vqshrn_n_v:
5035     Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
5036     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
5037                         1, true);
5038   case NEON::BI__builtin_neon_vqshrun_n_v:
5039     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
5040                         Ops, "vqshrun_n", 1, true);
5041   case NEON::BI__builtin_neon_vrecpe_v:
5042   case NEON::BI__builtin_neon_vrecpeq_v:
5043     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
5044                         Ops, "vrecpe");
5045   case NEON::BI__builtin_neon_vrshrn_n_v:
5046     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
5047                         Ops, "vrshrn_n", 1, true);
5048   case NEON::BI__builtin_neon_vrsra_n_v:
5049   case NEON::BI__builtin_neon_vrsraq_n_v:
5050     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5051     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5052     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
5053     Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
5054     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
5055     return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
5056   case NEON::BI__builtin_neon_vsri_n_v:
5057   case NEON::BI__builtin_neon_vsriq_n_v:
5058     rightShift = true;
5059   case NEON::BI__builtin_neon_vsli_n_v:
5060   case NEON::BI__builtin_neon_vsliq_n_v:
5061     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
5062     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
5063                         Ops, "vsli_n");
5064   case NEON::BI__builtin_neon_vsra_n_v:
5065   case NEON::BI__builtin_neon_vsraq_n_v:
5066     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5067     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
5068     return Builder.CreateAdd(Ops[0], Ops[1]);
5069   case NEON::BI__builtin_neon_vst1q_lane_v:
5070     // Handle 64-bit integer elements as a special case.  Use a shuffle to get
5071     // a one-element vector and avoid poor code for i64 in the backend.
5072     if (VTy->getElementType()->isIntegerTy(64)) {
5073       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5074       Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
5075       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
5076       Ops[2] = getAlignmentValue32(PtrOp0);
5077       llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
5078       return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
5079                                                  Tys), Ops);
5080     }
5081     // fall through
5082   case NEON::BI__builtin_neon_vst1_lane_v: {
5083     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5084     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
5085     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5086     auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty));
5087     return St;
5088   }
5089   case NEON::BI__builtin_neon_vtbl1_v:
5090     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
5091                         Ops, "vtbl1");
5092   case NEON::BI__builtin_neon_vtbl2_v:
5093     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
5094                         Ops, "vtbl2");
5095   case NEON::BI__builtin_neon_vtbl3_v:
5096     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
5097                         Ops, "vtbl3");
5098   case NEON::BI__builtin_neon_vtbl4_v:
5099     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
5100                         Ops, "vtbl4");
5101   case NEON::BI__builtin_neon_vtbx1_v:
5102     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
5103                         Ops, "vtbx1");
5104   case NEON::BI__builtin_neon_vtbx2_v:
5105     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
5106                         Ops, "vtbx2");
5107   case NEON::BI__builtin_neon_vtbx3_v:
5108     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
5109                         Ops, "vtbx3");
5110   case NEON::BI__builtin_neon_vtbx4_v:
5111     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
5112                         Ops, "vtbx4");
5113   }
5114 }
5115
5116 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
5117                                       const CallExpr *E,
5118                                       SmallVectorImpl<Value *> &Ops) {
5119   unsigned int Int = 0;
5120   const char *s = nullptr;
5121
5122   switch (BuiltinID) {
5123   default:
5124     return nullptr;
5125   case NEON::BI__builtin_neon_vtbl1_v:
5126   case NEON::BI__builtin_neon_vqtbl1_v:
5127   case NEON::BI__builtin_neon_vqtbl1q_v:
5128   case NEON::BI__builtin_neon_vtbl2_v:
5129   case NEON::BI__builtin_neon_vqtbl2_v:
5130   case NEON::BI__builtin_neon_vqtbl2q_v:
5131   case NEON::BI__builtin_neon_vtbl3_v:
5132   case NEON::BI__builtin_neon_vqtbl3_v:
5133   case NEON::BI__builtin_neon_vqtbl3q_v:
5134   case NEON::BI__builtin_neon_vtbl4_v:
5135   case NEON::BI__builtin_neon_vqtbl4_v:
5136   case NEON::BI__builtin_neon_vqtbl4q_v:
5137     break;
5138   case NEON::BI__builtin_neon_vtbx1_v:
5139   case NEON::BI__builtin_neon_vqtbx1_v:
5140   case NEON::BI__builtin_neon_vqtbx1q_v:
5141   case NEON::BI__builtin_neon_vtbx2_v:
5142   case NEON::BI__builtin_neon_vqtbx2_v:
5143   case NEON::BI__builtin_neon_vqtbx2q_v:
5144   case NEON::BI__builtin_neon_vtbx3_v:
5145   case NEON::BI__builtin_neon_vqtbx3_v:
5146   case NEON::BI__builtin_neon_vqtbx3q_v:
5147   case NEON::BI__builtin_neon_vtbx4_v:
5148   case NEON::BI__builtin_neon_vqtbx4_v:
5149   case NEON::BI__builtin_neon_vqtbx4q_v:
5150     break;
5151   }
5152
5153   assert(E->getNumArgs() >= 3);
5154
5155   // Get the last argument, which specifies the vector type.
5156   llvm::APSInt Result;
5157   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
5158   if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
5159     return nullptr;
5160
5161   // Determine the type of this overloaded NEON intrinsic.
5162   NeonTypeFlags Type(Result.getZExtValue());
5163   llvm::VectorType *Ty = GetNeonType(&CGF, Type);
5164   if (!Ty)
5165     return nullptr;
5166
5167   CodeGen::CGBuilderTy &Builder = CGF.Builder;
5168
5169   // AArch64 scalar builtins are not overloaded, they do not have an extra
5170   // argument that specifies the vector type, need to handle each case.
5171   switch (BuiltinID) {
5172   case NEON::BI__builtin_neon_vtbl1_v: {
5173     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr,
5174                               Ops[1], Ty, Intrinsic::aarch64_neon_tbl1,
5175                               "vtbl1");
5176   }
5177   case NEON::BI__builtin_neon_vtbl2_v: {
5178     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr,
5179                               Ops[2], Ty, Intrinsic::aarch64_neon_tbl1,
5180                               "vtbl1");
5181   }
5182   case NEON::BI__builtin_neon_vtbl3_v: {
5183     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr,
5184                               Ops[3], Ty, Intrinsic::aarch64_neon_tbl2,
5185                               "vtbl2");
5186   }
5187   case NEON::BI__builtin_neon_vtbl4_v: {
5188     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr,
5189                               Ops[4], Ty, Intrinsic::aarch64_neon_tbl2,
5190                               "vtbl2");
5191   }
5192   case NEON::BI__builtin_neon_vtbx1_v: {
5193     Value *TblRes =
5194         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2],
5195                            Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
5196
5197     llvm::Constant *EightV = ConstantInt::get(Ty, 8);
5198     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
5199     CmpRes = Builder.CreateSExt(CmpRes, Ty);
5200
5201     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
5202     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
5203     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
5204   }
5205   case NEON::BI__builtin_neon_vtbx2_v: {
5206     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0],
5207                               Ops[3], Ty, Intrinsic::aarch64_neon_tbx1,
5208                               "vtbx1");
5209   }
5210   case NEON::BI__builtin_neon_vtbx3_v: {
5211     Value *TblRes =
5212         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4],
5213                            Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
5214
5215     llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
5216     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
5217                                            TwentyFourV);
5218     CmpRes = Builder.CreateSExt(CmpRes, Ty);
5219
5220     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
5221     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
5222     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
5223   }
5224   case NEON::BI__builtin_neon_vtbx4_v: {
5225     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0],
5226                               Ops[5], Ty, Intrinsic::aarch64_neon_tbx2,
5227                               "vtbx2");
5228   }
5229   case NEON::BI__builtin_neon_vqtbl1_v:
5230   case NEON::BI__builtin_neon_vqtbl1q_v:
5231     Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
5232   case NEON::BI__builtin_neon_vqtbl2_v:
5233   case NEON::BI__builtin_neon_vqtbl2q_v: {
5234     Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
5235   case NEON::BI__builtin_neon_vqtbl3_v:
5236   case NEON::BI__builtin_neon_vqtbl3q_v:
5237     Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
5238   case NEON::BI__builtin_neon_vqtbl4_v:
5239   case NEON::BI__builtin_neon_vqtbl4q_v:
5240     Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
5241   case NEON::BI__builtin_neon_vqtbx1_v:
5242   case NEON::BI__builtin_neon_vqtbx1q_v:
5243     Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
5244   case NEON::BI__builtin_neon_vqtbx2_v:
5245   case NEON::BI__builtin_neon_vqtbx2q_v:
5246     Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
5247   case NEON::BI__builtin_neon_vqtbx3_v:
5248   case NEON::BI__builtin_neon_vqtbx3q_v:
5249     Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
5250   case NEON::BI__builtin_neon_vqtbx4_v:
5251   case NEON::BI__builtin_neon_vqtbx4q_v:
5252     Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
5253   }
5254   }
5255
5256   if (!Int)
5257     return nullptr;
5258
5259   Function *F = CGF.CGM.getIntrinsic(Int, Ty);
5260   return CGF.EmitNeonCall(F, Ops, s);
5261 }
5262
5263 Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
5264   llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4);
5265   Op = Builder.CreateBitCast(Op, Int16Ty);
5266   Value *V = UndefValue::get(VTy);
5267   llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
5268   Op = Builder.CreateInsertElement(V, Op, CI);
5269   return Op;
5270 }
5271
5272 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
5273                                                const CallExpr *E) {
5274   unsigned HintID = static_cast<unsigned>(-1);
5275   switch (BuiltinID) {
5276   default: break;
5277   case AArch64::BI__builtin_arm_nop:
5278     HintID = 0;
5279     break;
5280   case AArch64::BI__builtin_arm_yield:
5281     HintID = 1;
5282     break;
5283   case AArch64::BI__builtin_arm_wfe:
5284     HintID = 2;
5285     break;
5286   case AArch64::BI__builtin_arm_wfi:
5287     HintID = 3;
5288     break;
5289   case AArch64::BI__builtin_arm_sev:
5290     HintID = 4;
5291     break;
5292   case AArch64::BI__builtin_arm_sevl:
5293     HintID = 5;
5294     break;
5295   }
5296
5297   if (HintID != static_cast<unsigned>(-1)) {
5298     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
5299     return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
5300   }
5301
5302   if (BuiltinID == AArch64::BI__builtin_arm_prefetch) {
5303     Value *Address         = EmitScalarExpr(E->getArg(0));
5304     Value *RW              = EmitScalarExpr(E->getArg(1));
5305     Value *CacheLevel      = EmitScalarExpr(E->getArg(2));
5306     Value *RetentionPolicy = EmitScalarExpr(E->getArg(3));
5307     Value *IsData          = EmitScalarExpr(E->getArg(4));
5308
5309     Value *Locality = nullptr;
5310     if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) {
5311       // Temporal fetch, needs to convert cache level to locality.
5312       Locality = llvm::ConstantInt::get(Int32Ty,
5313         -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3);
5314     } else {
5315       // Streaming fetch.
5316       Locality = llvm::ConstantInt::get(Int32Ty, 0);
5317     }
5318
5319     // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify
5320     // PLDL3STRM or PLDL2STRM.
5321     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
5322     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
5323   }
5324
5325   if (BuiltinID == AArch64::BI__builtin_arm_rbit) {
5326     assert((getContext().getTypeSize(E->getType()) == 32) &&
5327            "rbit of unusual size!");
5328     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5329     return Builder.CreateCall(
5330         CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5331   }
5332   if (BuiltinID == AArch64::BI__builtin_arm_rbit64) {
5333     assert((getContext().getTypeSize(E->getType()) == 64) &&
5334            "rbit of unusual size!");
5335     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5336     return Builder.CreateCall(
5337         CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5338   }
5339
5340   if (BuiltinID == AArch64::BI__clear_cache) {
5341     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
5342     const FunctionDecl *FD = E->getDirectCallee();
5343     Value *Ops[2];
5344     for (unsigned i = 0; i < 2; i++)
5345       Ops[i] = EmitScalarExpr(E->getArg(i));
5346     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
5347     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
5348     StringRef Name = FD->getName();
5349     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
5350   }
5351
5352   if ((BuiltinID == AArch64::BI__builtin_arm_ldrex ||
5353       BuiltinID == AArch64::BI__builtin_arm_ldaex) &&
5354       getContext().getTypeSize(E->getType()) == 128) {
5355     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
5356                                        ? Intrinsic::aarch64_ldaxp
5357                                        : Intrinsic::aarch64_ldxp);
5358
5359     Value *LdPtr = EmitScalarExpr(E->getArg(0));
5360     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
5361                                     "ldxp");
5362
5363     Value *Val0 = Builder.CreateExtractValue(Val, 1);
5364     Value *Val1 = Builder.CreateExtractValue(Val, 0);
5365     llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
5366     Val0 = Builder.CreateZExt(Val0, Int128Ty);
5367     Val1 = Builder.CreateZExt(Val1, Int128Ty);
5368
5369     Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
5370     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
5371     Val = Builder.CreateOr(Val, Val1);
5372     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
5373   } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex ||
5374              BuiltinID == AArch64::BI__builtin_arm_ldaex) {
5375     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
5376
5377     QualType Ty = E->getType();
5378     llvm::Type *RealResTy = ConvertType(Ty);
5379     llvm::Type *PtrTy = llvm::IntegerType::get(
5380         getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
5381     LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
5382
5383     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
5384                                        ? Intrinsic::aarch64_ldaxr
5385                                        : Intrinsic::aarch64_ldxr,
5386                                    PtrTy);
5387     Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
5388
5389     if (RealResTy->isPointerTy())
5390       return Builder.CreateIntToPtr(Val, RealResTy);
5391
5392     llvm::Type *IntResTy = llvm::IntegerType::get(
5393         getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
5394     Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
5395     return Builder.CreateBitCast(Val, RealResTy);
5396   }
5397
5398   if ((BuiltinID == AArch64::BI__builtin_arm_strex ||
5399        BuiltinID == AArch64::BI__builtin_arm_stlex) &&
5400       getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
5401     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
5402                                        ? Intrinsic::aarch64_stlxp
5403                                        : Intrinsic::aarch64_stxp);
5404     llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
5405
5406     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
5407     EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
5408
5409     Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy));
5410     llvm::Value *Val = Builder.CreateLoad(Tmp);
5411
5412     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
5413     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
5414     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
5415                                          Int8PtrTy);
5416     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
5417   }
5418
5419   if (BuiltinID == AArch64::BI__builtin_arm_strex ||
5420       BuiltinID == AArch64::BI__builtin_arm_stlex) {
5421     Value *StoreVal = EmitScalarExpr(E->getArg(0));
5422     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
5423
5424     QualType Ty = E->getArg(0)->getType();
5425     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
5426                                                  getContext().getTypeSize(Ty));
5427     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
5428
5429     if (StoreVal->getType()->isPointerTy())
5430       StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
5431     else {
5432       llvm::Type *IntTy = llvm::IntegerType::get(
5433           getLLVMContext(),
5434           CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
5435       StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
5436       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
5437     }
5438
5439     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
5440                                        ? Intrinsic::aarch64_stlxr
5441                                        : Intrinsic::aarch64_stxr,
5442                                    StoreAddr->getType());
5443     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
5444   }
5445
5446   if (BuiltinID == AArch64::BI__builtin_arm_clrex) {
5447     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
5448     return Builder.CreateCall(F);
5449   }
5450
5451   // CRC32
5452   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
5453   switch (BuiltinID) {
5454   case AArch64::BI__builtin_arm_crc32b:
5455     CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
5456   case AArch64::BI__builtin_arm_crc32cb:
5457     CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
5458   case AArch64::BI__builtin_arm_crc32h:
5459     CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
5460   case AArch64::BI__builtin_arm_crc32ch:
5461     CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
5462   case AArch64::BI__builtin_arm_crc32w:
5463     CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
5464   case AArch64::BI__builtin_arm_crc32cw:
5465     CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
5466   case AArch64::BI__builtin_arm_crc32d:
5467     CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
5468   case AArch64::BI__builtin_arm_crc32cd:
5469     CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
5470   }
5471
5472   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
5473     Value *Arg0 = EmitScalarExpr(E->getArg(0));
5474     Value *Arg1 = EmitScalarExpr(E->getArg(1));
5475     Function *F = CGM.getIntrinsic(CRCIntrinsicID);
5476
5477     llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
5478     Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
5479
5480     return Builder.CreateCall(F, {Arg0, Arg1});
5481   }
5482
5483   if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
5484       BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
5485       BuiltinID == AArch64::BI__builtin_arm_rsrp ||
5486       BuiltinID == AArch64::BI__builtin_arm_wsr ||
5487       BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
5488       BuiltinID == AArch64::BI__builtin_arm_wsrp) {
5489
5490     bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr ||
5491                   BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
5492                   BuiltinID == AArch64::BI__builtin_arm_rsrp;
5493
5494     bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp ||
5495                             BuiltinID == AArch64::BI__builtin_arm_wsrp;
5496
5497     bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr &&
5498                    BuiltinID != AArch64::BI__builtin_arm_wsr;
5499
5500     llvm::Type *ValueType;
5501     llvm::Type *RegisterType = Int64Ty;
5502     if (IsPointerBuiltin) {
5503       ValueType = VoidPtrTy;
5504     } else if (Is64Bit) {
5505       ValueType = Int64Ty;
5506     } else {
5507       ValueType = Int32Ty;
5508     }
5509
5510     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
5511   }
5512
5513   // Find out if any arguments are required to be integer constant
5514   // expressions.
5515   unsigned ICEArguments = 0;
5516   ASTContext::GetBuiltinTypeError Error;
5517   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5518   assert(Error == ASTContext::GE_None && "Should not codegen an error");
5519
5520   llvm::SmallVector<Value*, 4> Ops;
5521   for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
5522     if ((ICEArguments & (1 << i)) == 0) {
5523       Ops.push_back(EmitScalarExpr(E->getArg(i)));
5524     } else {
5525       // If this is required to be a constant, constant fold it so that we know
5526       // that the generated intrinsic gets a ConstantInt.
5527       llvm::APSInt Result;
5528       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
5529       assert(IsConst && "Constant arg isn't actually constant?");
5530       (void)IsConst;
5531       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
5532     }
5533   }
5534
5535   auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap);
5536   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
5537       SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
5538
5539   if (Builtin) {
5540     Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
5541     Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
5542     assert(Result && "SISD intrinsic should have been handled");
5543     return Result;
5544   }
5545
5546   llvm::APSInt Result;
5547   const Expr *Arg = E->getArg(E->getNumArgs()-1);
5548   NeonTypeFlags Type(0);
5549   if (Arg->isIntegerConstantExpr(Result, getContext()))
5550     // Determine the type of this overloaded NEON intrinsic.
5551     Type = NeonTypeFlags(Result.getZExtValue());
5552
5553   bool usgn = Type.isUnsigned();
5554   bool quad = Type.isQuad();
5555
5556   // Handle non-overloaded intrinsics first.
5557   switch (BuiltinID) {
5558   default: break;
5559   case NEON::BI__builtin_neon_vldrq_p128: {
5560     llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
5561     llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0);
5562     Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
5563     return Builder.CreateAlignedLoad(Int128Ty, Ptr,
5564                                      CharUnits::fromQuantity(16));
5565   }
5566   case NEON::BI__builtin_neon_vstrq_p128: {
5567     llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
5568     Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
5569     return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
5570   }
5571   case NEON::BI__builtin_neon_vcvts_u32_f32:
5572   case NEON::BI__builtin_neon_vcvtd_u64_f64:
5573     usgn = true;
5574     // FALL THROUGH
5575   case NEON::BI__builtin_neon_vcvts_s32_f32:
5576   case NEON::BI__builtin_neon_vcvtd_s64_f64: {
5577     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5578     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5579     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5580     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5581     Ops[0] = Builder.CreateBitCast(Ops[0], FTy);
5582     if (usgn)
5583       return Builder.CreateFPToUI(Ops[0], InTy);
5584     return Builder.CreateFPToSI(Ops[0], InTy);
5585   }
5586   case NEON::BI__builtin_neon_vcvts_f32_u32:
5587   case NEON::BI__builtin_neon_vcvtd_f64_u64:
5588     usgn = true;
5589     // FALL THROUGH
5590   case NEON::BI__builtin_neon_vcvts_f32_s32:
5591   case NEON::BI__builtin_neon_vcvtd_f64_s64: {
5592     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5593     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5594     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5595     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5596     Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
5597     if (usgn)
5598       return Builder.CreateUIToFP(Ops[0], FTy);
5599     return Builder.CreateSIToFP(Ops[0], FTy);
5600   }
5601   case NEON::BI__builtin_neon_vpaddd_s64: {
5602     llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
5603     Value *Vec = EmitScalarExpr(E->getArg(0));
5604     // The vector is v2f64, so make sure it's bitcast to that.
5605     Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
5606     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5607     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5608     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5609     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5610     // Pairwise addition of a v2f64 into a scalar f64.
5611     return Builder.CreateAdd(Op0, Op1, "vpaddd");
5612   }
5613   case NEON::BI__builtin_neon_vpaddd_f64: {
5614     llvm::Type *Ty =
5615       llvm::VectorType::get(DoubleTy, 2);
5616     Value *Vec = EmitScalarExpr(E->getArg(0));
5617     // The vector is v2f64, so make sure it's bitcast to that.
5618     Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
5619     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5620     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5621     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5622     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5623     // Pairwise addition of a v2f64 into a scalar f64.
5624     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5625   }
5626   case NEON::BI__builtin_neon_vpadds_f32: {
5627     llvm::Type *Ty =
5628       llvm::VectorType::get(FloatTy, 2);
5629     Value *Vec = EmitScalarExpr(E->getArg(0));
5630     // The vector is v2f32, so make sure it's bitcast to that.
5631     Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
5632     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5633     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5634     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5635     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5636     // Pairwise addition of a v2f32 into a scalar f32.
5637     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5638   }
5639   case NEON::BI__builtin_neon_vceqzd_s64:
5640   case NEON::BI__builtin_neon_vceqzd_f64:
5641   case NEON::BI__builtin_neon_vceqzs_f32:
5642     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5643     return EmitAArch64CompareBuiltinExpr(
5644         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5645         ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
5646   case NEON::BI__builtin_neon_vcgezd_s64:
5647   case NEON::BI__builtin_neon_vcgezd_f64:
5648   case NEON::BI__builtin_neon_vcgezs_f32:
5649     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5650     return EmitAArch64CompareBuiltinExpr(
5651         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5652         ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
5653   case NEON::BI__builtin_neon_vclezd_s64:
5654   case NEON::BI__builtin_neon_vclezd_f64:
5655   case NEON::BI__builtin_neon_vclezs_f32:
5656     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5657     return EmitAArch64CompareBuiltinExpr(
5658         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5659         ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
5660   case NEON::BI__builtin_neon_vcgtzd_s64:
5661   case NEON::BI__builtin_neon_vcgtzd_f64:
5662   case NEON::BI__builtin_neon_vcgtzs_f32:
5663     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5664     return EmitAArch64CompareBuiltinExpr(
5665         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5666         ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
5667   case NEON::BI__builtin_neon_vcltzd_s64:
5668   case NEON::BI__builtin_neon_vcltzd_f64:
5669   case NEON::BI__builtin_neon_vcltzs_f32:
5670     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5671     return EmitAArch64CompareBuiltinExpr(
5672         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5673         ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
5674
5675   case NEON::BI__builtin_neon_vceqzd_u64: {
5676     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5677     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5678     Ops[0] =
5679         Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
5680     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
5681   }
5682   case NEON::BI__builtin_neon_vceqd_f64:
5683   case NEON::BI__builtin_neon_vcled_f64:
5684   case NEON::BI__builtin_neon_vcltd_f64:
5685   case NEON::BI__builtin_neon_vcged_f64:
5686   case NEON::BI__builtin_neon_vcgtd_f64: {
5687     llvm::CmpInst::Predicate P;
5688     switch (BuiltinID) {
5689     default: llvm_unreachable("missing builtin ID in switch!");
5690     case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
5691     case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
5692     case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
5693     case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
5694     case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
5695     }
5696     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5697     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5698     Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
5699     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5700     return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
5701   }
5702   case NEON::BI__builtin_neon_vceqs_f32:
5703   case NEON::BI__builtin_neon_vcles_f32:
5704   case NEON::BI__builtin_neon_vclts_f32:
5705   case NEON::BI__builtin_neon_vcges_f32:
5706   case NEON::BI__builtin_neon_vcgts_f32: {
5707     llvm::CmpInst::Predicate P;
5708     switch (BuiltinID) {
5709     default: llvm_unreachable("missing builtin ID in switch!");
5710     case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
5711     case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
5712     case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
5713     case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
5714     case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
5715     }
5716     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5717     Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
5718     Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
5719     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5720     return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
5721   }
5722   case NEON::BI__builtin_neon_vceqd_s64:
5723   case NEON::BI__builtin_neon_vceqd_u64:
5724   case NEON::BI__builtin_neon_vcgtd_s64:
5725   case NEON::BI__builtin_neon_vcgtd_u64:
5726   case NEON::BI__builtin_neon_vcltd_s64:
5727   case NEON::BI__builtin_neon_vcltd_u64:
5728   case NEON::BI__builtin_neon_vcged_u64:
5729   case NEON::BI__builtin_neon_vcged_s64:
5730   case NEON::BI__builtin_neon_vcled_u64:
5731   case NEON::BI__builtin_neon_vcled_s64: {
5732     llvm::CmpInst::Predicate P;
5733     switch (BuiltinID) {
5734     default: llvm_unreachable("missing builtin ID in switch!");
5735     case NEON::BI__builtin_neon_vceqd_s64:
5736     case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
5737     case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
5738     case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
5739     case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
5740     case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
5741     case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
5742     case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
5743     case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
5744     case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
5745     }
5746     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5747     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5748     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5749     Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
5750     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
5751   }
5752   case NEON::BI__builtin_neon_vtstd_s64:
5753   case NEON::BI__builtin_neon_vtstd_u64: {
5754     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5755     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5756     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5757     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
5758     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
5759                                 llvm::Constant::getNullValue(Int64Ty));
5760     return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
5761   }
5762   case NEON::BI__builtin_neon_vset_lane_i8:
5763   case NEON::BI__builtin_neon_vset_lane_i16:
5764   case NEON::BI__builtin_neon_vset_lane_i32:
5765   case NEON::BI__builtin_neon_vset_lane_i64:
5766   case NEON::BI__builtin_neon_vset_lane_f32:
5767   case NEON::BI__builtin_neon_vsetq_lane_i8:
5768   case NEON::BI__builtin_neon_vsetq_lane_i16:
5769   case NEON::BI__builtin_neon_vsetq_lane_i32:
5770   case NEON::BI__builtin_neon_vsetq_lane_i64:
5771   case NEON::BI__builtin_neon_vsetq_lane_f32:
5772     Ops.push_back(EmitScalarExpr(E->getArg(2)));
5773     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5774   case NEON::BI__builtin_neon_vset_lane_f64:
5775     // The vector type needs a cast for the v1f64 variant.
5776     Ops[1] = Builder.CreateBitCast(Ops[1],
5777                                    llvm::VectorType::get(DoubleTy, 1));
5778     Ops.push_back(EmitScalarExpr(E->getArg(2)));
5779     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5780   case NEON::BI__builtin_neon_vsetq_lane_f64:
5781     // The vector type needs a cast for the v2f64 variant.
5782     Ops[1] = Builder.CreateBitCast(Ops[1],
5783         llvm::VectorType::get(DoubleTy, 2));
5784     Ops.push_back(EmitScalarExpr(E->getArg(2)));
5785     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5786
5787   case NEON::BI__builtin_neon_vget_lane_i8:
5788   case NEON::BI__builtin_neon_vdupb_lane_i8:
5789     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8));
5790     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5791                                         "vget_lane");
5792   case NEON::BI__builtin_neon_vgetq_lane_i8:
5793   case NEON::BI__builtin_neon_vdupb_laneq_i8:
5794     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16));
5795     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5796                                         "vgetq_lane");
5797   case NEON::BI__builtin_neon_vget_lane_i16:
5798   case NEON::BI__builtin_neon_vduph_lane_i16:
5799     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4));
5800     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5801                                         "vget_lane");
5802   case NEON::BI__builtin_neon_vgetq_lane_i16:
5803   case NEON::BI__builtin_neon_vduph_laneq_i16:
5804     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8));
5805     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5806                                         "vgetq_lane");
5807   case NEON::BI__builtin_neon_vget_lane_i32:
5808   case NEON::BI__builtin_neon_vdups_lane_i32:
5809     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2));
5810     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5811                                         "vget_lane");
5812   case NEON::BI__builtin_neon_vdups_lane_f32:
5813     Ops[0] = Builder.CreateBitCast(Ops[0],
5814         llvm::VectorType::get(FloatTy, 2));
5815     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5816                                         "vdups_lane");
5817   case NEON::BI__builtin_neon_vgetq_lane_i32:
5818   case NEON::BI__builtin_neon_vdups_laneq_i32:
5819     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
5820     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5821                                         "vgetq_lane");
5822   case NEON::BI__builtin_neon_vget_lane_i64:
5823   case NEON::BI__builtin_neon_vdupd_lane_i64:
5824     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1));
5825     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5826                                         "vget_lane");
5827   case NEON::BI__builtin_neon_vdupd_lane_f64:
5828     Ops[0] = Builder.CreateBitCast(Ops[0],
5829         llvm::VectorType::get(DoubleTy, 1));
5830     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5831                                         "vdupd_lane");
5832   case NEON::BI__builtin_neon_vgetq_lane_i64:
5833   case NEON::BI__builtin_neon_vdupd_laneq_i64:
5834     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
5835     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5836                                         "vgetq_lane");
5837   case NEON::BI__builtin_neon_vget_lane_f32:
5838     Ops[0] = Builder.CreateBitCast(Ops[0],
5839         llvm::VectorType::get(FloatTy, 2));
5840     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5841                                         "vget_lane");
5842   case NEON::BI__builtin_neon_vget_lane_f64:
5843     Ops[0] = Builder.CreateBitCast(Ops[0],
5844         llvm::VectorType::get(DoubleTy, 1));
5845     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5846                                         "vget_lane");
5847   case NEON::BI__builtin_neon_vgetq_lane_f32:
5848   case NEON::BI__builtin_neon_vdups_laneq_f32:
5849     Ops[0] = Builder.CreateBitCast(Ops[0],
5850         llvm::VectorType::get(FloatTy, 4));
5851     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5852                                         "vgetq_lane");
5853   case NEON::BI__builtin_neon_vgetq_lane_f64:
5854   case NEON::BI__builtin_neon_vdupd_laneq_f64:
5855     Ops[0] = Builder.CreateBitCast(Ops[0],
5856         llvm::VectorType::get(DoubleTy, 2));
5857     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5858                                         "vgetq_lane");
5859   case NEON::BI__builtin_neon_vaddd_s64:
5860   case NEON::BI__builtin_neon_vaddd_u64:
5861     return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
5862   case NEON::BI__builtin_neon_vsubd_s64:
5863   case NEON::BI__builtin_neon_vsubd_u64:
5864     return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
5865   case NEON::BI__builtin_neon_vqdmlalh_s16:
5866   case NEON::BI__builtin_neon_vqdmlslh_s16: {
5867     SmallVector<Value *, 2> ProductOps;
5868     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5869     ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
5870     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
5871     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5872                           ProductOps, "vqdmlXl");
5873     Constant *CI = ConstantInt::get(SizeTy, 0);
5874     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5875
5876     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
5877                                         ? Intrinsic::aarch64_neon_sqadd
5878                                         : Intrinsic::aarch64_neon_sqsub;
5879     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
5880   }
5881   case NEON::BI__builtin_neon_vqshlud_n_s64: {
5882     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5883     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5884     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
5885                         Ops, "vqshlu_n");
5886   }
5887   case NEON::BI__builtin_neon_vqshld_n_u64:
5888   case NEON::BI__builtin_neon_vqshld_n_s64: {
5889     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
5890                                    ? Intrinsic::aarch64_neon_uqshl
5891                                    : Intrinsic::aarch64_neon_sqshl;
5892     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5893     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5894     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
5895   }
5896   case NEON::BI__builtin_neon_vrshrd_n_u64:
5897   case NEON::BI__builtin_neon_vrshrd_n_s64: {
5898     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
5899                                    ? Intrinsic::aarch64_neon_urshl
5900                                    : Intrinsic::aarch64_neon_srshl;
5901     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5902     int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
5903     Ops[1] = ConstantInt::get(Int64Ty, -SV);
5904     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
5905   }
5906   case NEON::BI__builtin_neon_vrsrad_n_u64:
5907   case NEON::BI__builtin_neon_vrsrad_n_s64: {
5908     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
5909                                    ? Intrinsic::aarch64_neon_urshl
5910                                    : Intrinsic::aarch64_neon_srshl;
5911     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5912     Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
5913     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
5914                                 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
5915     return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
5916   }
5917   case NEON::BI__builtin_neon_vshld_n_s64:
5918   case NEON::BI__builtin_neon_vshld_n_u64: {
5919     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5920     return Builder.CreateShl(
5921         Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
5922   }
5923   case NEON::BI__builtin_neon_vshrd_n_s64: {
5924     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5925     return Builder.CreateAShr(
5926         Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
5927                                                    Amt->getZExtValue())),
5928         "shrd_n");
5929   }
5930   case NEON::BI__builtin_neon_vshrd_n_u64: {
5931     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5932     uint64_t ShiftAmt = Amt->getZExtValue();
5933     // Right-shifting an unsigned value by its size yields 0.
5934     if (ShiftAmt == 64)
5935       return ConstantInt::get(Int64Ty, 0);
5936     return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
5937                               "shrd_n");
5938   }
5939   case NEON::BI__builtin_neon_vsrad_n_s64: {
5940     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
5941     Ops[1] = Builder.CreateAShr(
5942         Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
5943                                                    Amt->getZExtValue())),
5944         "shrd_n");
5945     return Builder.CreateAdd(Ops[0], Ops[1]);
5946   }
5947   case NEON::BI__builtin_neon_vsrad_n_u64: {
5948     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
5949     uint64_t ShiftAmt = Amt->getZExtValue();
5950     // Right-shifting an unsigned value by its size yields 0.
5951     // As Op + 0 = Op, return Ops[0] directly.
5952     if (ShiftAmt == 64)
5953       return Ops[0];
5954     Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
5955                                 "shrd_n");
5956     return Builder.CreateAdd(Ops[0], Ops[1]);
5957   }
5958   case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
5959   case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
5960   case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
5961   case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
5962     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
5963                                           "lane");
5964     SmallVector<Value *, 2> ProductOps;
5965     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5966     ProductOps.push_back(vectorWrapScalar16(Ops[2]));
5967     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
5968     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5969                           ProductOps, "vqdmlXl");
5970     Constant *CI = ConstantInt::get(SizeTy, 0);
5971     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5972     Ops.pop_back();
5973
5974     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
5975                        BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
5976                           ? Intrinsic::aarch64_neon_sqadd
5977                           : Intrinsic::aarch64_neon_sqsub;
5978     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
5979   }
5980   case NEON::BI__builtin_neon_vqdmlals_s32:
5981   case NEON::BI__builtin_neon_vqdmlsls_s32: {
5982     SmallVector<Value *, 2> ProductOps;
5983     ProductOps.push_back(Ops[1]);
5984     ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
5985     Ops[1] =
5986         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
5987                      ProductOps, "vqdmlXl");
5988
5989     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
5990                                         ? Intrinsic::aarch64_neon_sqadd
5991                                         : Intrinsic::aarch64_neon_sqsub;
5992     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
5993   }
5994   case NEON::BI__builtin_neon_vqdmlals_lane_s32:
5995   case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
5996   case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
5997   case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
5998     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
5999                                           "lane");
6000     SmallVector<Value *, 2> ProductOps;
6001     ProductOps.push_back(Ops[1]);
6002     ProductOps.push_back(Ops[2]);
6003     Ops[1] =
6004         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6005                      ProductOps, "vqdmlXl");
6006     Ops.pop_back();
6007
6008     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
6009                        BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
6010                           ? Intrinsic::aarch64_neon_sqadd
6011                           : Intrinsic::aarch64_neon_sqsub;
6012     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
6013   }
6014   }
6015
6016   llvm::VectorType *VTy = GetNeonType(this, Type);
6017   llvm::Type *Ty = VTy;
6018   if (!Ty)
6019     return nullptr;
6020
6021   // Not all intrinsics handled by the common case work for AArch64 yet, so only
6022   // defer to common code if it's been added to our special map.
6023   Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
6024                                    AArch64SIMDIntrinsicsProvenSorted);
6025
6026   if (Builtin)
6027     return EmitCommonNeonBuiltinExpr(
6028         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
6029         Builtin->NameHint, Builtin->TypeModifier, E, Ops,
6030         /*never use addresses*/ Address::invalid(), Address::invalid());
6031
6032   if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops))
6033     return V;
6034
6035   unsigned Int;
6036   switch (BuiltinID) {
6037   default: return nullptr;
6038   case NEON::BI__builtin_neon_vbsl_v:
6039   case NEON::BI__builtin_neon_vbslq_v: {
6040     llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
6041     Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
6042     Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
6043     Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
6044
6045     Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
6046     Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
6047     Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
6048     return Builder.CreateBitCast(Ops[0], Ty);
6049   }
6050   case NEON::BI__builtin_neon_vfma_lane_v:
6051   case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
6052     // The ARM builtins (and instructions) have the addend as the first
6053     // operand, but the 'fma' intrinsics have it last. Swap it around here.
6054     Value *Addend = Ops[0];
6055     Value *Multiplicand = Ops[1];
6056     Value *LaneSource = Ops[2];
6057     Ops[0] = Multiplicand;
6058     Ops[1] = LaneSource;
6059     Ops[2] = Addend;
6060
6061     // Now adjust things to handle the lane access.
6062     llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ?
6063       llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) :
6064       VTy;
6065     llvm::Constant *cst = cast<Constant>(Ops[3]);
6066     Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst);
6067     Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
6068     Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
6069
6070     Ops.pop_back();
6071     Int = Intrinsic::fma;
6072     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
6073   }
6074   case NEON::BI__builtin_neon_vfma_laneq_v: {
6075     llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
6076     // v1f64 fma should be mapped to Neon scalar f64 fma
6077     if (VTy && VTy->getElementType() == DoubleTy) {
6078       Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6079       Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
6080       llvm::Type *VTy = GetNeonType(this,
6081         NeonTypeFlags(NeonTypeFlags::Float64, false, true));
6082       Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
6083       Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6084       Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
6085       Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
6086       return Builder.CreateBitCast(Result, Ty);
6087     }
6088     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6089     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6090     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6091
6092     llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
6093                                             VTy->getNumElements() * 2);
6094     Ops[2] = Builder.CreateBitCast(Ops[2], STy);
6095     Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
6096                                                cast<ConstantInt>(Ops[3]));
6097     Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
6098
6099     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
6100   }
6101   case NEON::BI__builtin_neon_vfmaq_laneq_v: {
6102     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6103     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6104     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6105
6106     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6107     Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
6108     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
6109   }
6110   case NEON::BI__builtin_neon_vfmas_lane_f32:
6111   case NEON::BI__builtin_neon_vfmas_laneq_f32:
6112   case NEON::BI__builtin_neon_vfmad_lane_f64:
6113   case NEON::BI__builtin_neon_vfmad_laneq_f64: {
6114     Ops.push_back(EmitScalarExpr(E->getArg(3)));
6115     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
6116     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6117     Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6118     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
6119   }
6120   case NEON::BI__builtin_neon_vmull_v:
6121     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6122     Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
6123     if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
6124     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
6125   case NEON::BI__builtin_neon_vmax_v:
6126   case NEON::BI__builtin_neon_vmaxq_v:
6127     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6128     Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
6129     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
6130     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
6131   case NEON::BI__builtin_neon_vmin_v:
6132   case NEON::BI__builtin_neon_vminq_v:
6133     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6134     Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
6135     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
6136     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
6137   case NEON::BI__builtin_neon_vabd_v:
6138   case NEON::BI__builtin_neon_vabdq_v:
6139     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6140     Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
6141     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
6142     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
6143   case NEON::BI__builtin_neon_vpadal_v:
6144   case NEON::BI__builtin_neon_vpadalq_v: {
6145     unsigned ArgElts = VTy->getNumElements();
6146     llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
6147     unsigned BitWidth = EltTy->getBitWidth();
6148     llvm::Type *ArgTy = llvm::VectorType::get(
6149         llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts);
6150     llvm::Type* Tys[2] = { VTy, ArgTy };
6151     Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
6152     SmallVector<llvm::Value*, 1> TmpOps;
6153     TmpOps.push_back(Ops[1]);
6154     Function *F = CGM.getIntrinsic(Int, Tys);
6155     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
6156     llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
6157     return Builder.CreateAdd(tmp, addend);
6158   }
6159   case NEON::BI__builtin_neon_vpmin_v:
6160   case NEON::BI__builtin_neon_vpminq_v:
6161     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6162     Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
6163     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
6164     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
6165   case NEON::BI__builtin_neon_vpmax_v:
6166   case NEON::BI__builtin_neon_vpmaxq_v:
6167     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6168     Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
6169     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
6170     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
6171   case NEON::BI__builtin_neon_vminnm_v:
6172   case NEON::BI__builtin_neon_vminnmq_v:
6173     Int = Intrinsic::aarch64_neon_fminnm;
6174     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
6175   case NEON::BI__builtin_neon_vmaxnm_v:
6176   case NEON::BI__builtin_neon_vmaxnmq_v:
6177     Int = Intrinsic::aarch64_neon_fmaxnm;
6178     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
6179   case NEON::BI__builtin_neon_vrecpss_f32: {
6180     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6181     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
6182                         Ops, "vrecps");
6183   }
6184   case NEON::BI__builtin_neon_vrecpsd_f64: {
6185     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6186     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
6187                         Ops, "vrecps");
6188   }
6189   case NEON::BI__builtin_neon_vqshrun_n_v:
6190     Int = Intrinsic::aarch64_neon_sqshrun;
6191     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
6192   case NEON::BI__builtin_neon_vqrshrun_n_v:
6193     Int = Intrinsic::aarch64_neon_sqrshrun;
6194     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
6195   case NEON::BI__builtin_neon_vqshrn_n_v:
6196     Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
6197     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
6198   case NEON::BI__builtin_neon_vrshrn_n_v:
6199     Int = Intrinsic::aarch64_neon_rshrn;
6200     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
6201   case NEON::BI__builtin_neon_vqrshrn_n_v:
6202     Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
6203     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
6204   case NEON::BI__builtin_neon_vrnda_v:
6205   case NEON::BI__builtin_neon_vrndaq_v: {
6206     Int = Intrinsic::round;
6207     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
6208   }
6209   case NEON::BI__builtin_neon_vrndi_v:
6210   case NEON::BI__builtin_neon_vrndiq_v: {
6211     Int = Intrinsic::nearbyint;
6212     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi");
6213   }
6214   case NEON::BI__builtin_neon_vrndm_v:
6215   case NEON::BI__builtin_neon_vrndmq_v: {
6216     Int = Intrinsic::floor;
6217     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
6218   }
6219   case NEON::BI__builtin_neon_vrndn_v:
6220   case NEON::BI__builtin_neon_vrndnq_v: {
6221     Int = Intrinsic::aarch64_neon_frintn;
6222     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
6223   }
6224   case NEON::BI__builtin_neon_vrndp_v:
6225   case NEON::BI__builtin_neon_vrndpq_v: {
6226     Int = Intrinsic::ceil;
6227     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
6228   }
6229   case NEON::BI__builtin_neon_vrndx_v:
6230   case NEON::BI__builtin_neon_vrndxq_v: {
6231     Int = Intrinsic::rint;
6232     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
6233   }
6234   case NEON::BI__builtin_neon_vrnd_v:
6235   case NEON::BI__builtin_neon_vrndq_v: {
6236     Int = Intrinsic::trunc;
6237     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
6238   }
6239   case NEON::BI__builtin_neon_vceqz_v:
6240   case NEON::BI__builtin_neon_vceqzq_v:
6241     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
6242                                          ICmpInst::ICMP_EQ, "vceqz");
6243   case NEON::BI__builtin_neon_vcgez_v:
6244   case NEON::BI__builtin_neon_vcgezq_v:
6245     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
6246                                          ICmpInst::ICMP_SGE, "vcgez");
6247   case NEON::BI__builtin_neon_vclez_v:
6248   case NEON::BI__builtin_neon_vclezq_v:
6249     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
6250                                          ICmpInst::ICMP_SLE, "vclez");
6251   case NEON::BI__builtin_neon_vcgtz_v:
6252   case NEON::BI__builtin_neon_vcgtzq_v:
6253     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
6254                                          ICmpInst::ICMP_SGT, "vcgtz");
6255   case NEON::BI__builtin_neon_vcltz_v:
6256   case NEON::BI__builtin_neon_vcltzq_v:
6257     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
6258                                          ICmpInst::ICMP_SLT, "vcltz");
6259   case NEON::BI__builtin_neon_vcvt_f64_v:
6260   case NEON::BI__builtin_neon_vcvtq_f64_v:
6261     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6262     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
6263     return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
6264                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
6265   case NEON::BI__builtin_neon_vcvt_f64_f32: {
6266     assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
6267            "unexpected vcvt_f64_f32 builtin");
6268     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
6269     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6270
6271     return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
6272   }
6273   case NEON::BI__builtin_neon_vcvt_f32_f64: {
6274     assert(Type.getEltType() == NeonTypeFlags::Float32 &&
6275            "unexpected vcvt_f32_f64 builtin");
6276     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
6277     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6278
6279     return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
6280   }
6281   case NEON::BI__builtin_neon_vcvt_s32_v:
6282   case NEON::BI__builtin_neon_vcvt_u32_v:
6283   case NEON::BI__builtin_neon_vcvt_s64_v:
6284   case NEON::BI__builtin_neon_vcvt_u64_v:
6285   case NEON::BI__builtin_neon_vcvtq_s32_v:
6286   case NEON::BI__builtin_neon_vcvtq_u32_v:
6287   case NEON::BI__builtin_neon_vcvtq_s64_v:
6288   case NEON::BI__builtin_neon_vcvtq_u64_v: {
6289     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
6290     if (usgn)
6291       return Builder.CreateFPToUI(Ops[0], Ty);
6292     return Builder.CreateFPToSI(Ops[0], Ty);
6293   }
6294   case NEON::BI__builtin_neon_vcvta_s32_v:
6295   case NEON::BI__builtin_neon_vcvtaq_s32_v:
6296   case NEON::BI__builtin_neon_vcvta_u32_v:
6297   case NEON::BI__builtin_neon_vcvtaq_u32_v:
6298   case NEON::BI__builtin_neon_vcvta_s64_v:
6299   case NEON::BI__builtin_neon_vcvtaq_s64_v:
6300   case NEON::BI__builtin_neon_vcvta_u64_v:
6301   case NEON::BI__builtin_neon_vcvtaq_u64_v: {
6302     Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
6303     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6304     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
6305   }
6306   case NEON::BI__builtin_neon_vcvtm_s32_v:
6307   case NEON::BI__builtin_neon_vcvtmq_s32_v:
6308   case NEON::BI__builtin_neon_vcvtm_u32_v:
6309   case NEON::BI__builtin_neon_vcvtmq_u32_v:
6310   case NEON::BI__builtin_neon_vcvtm_s64_v:
6311   case NEON::BI__builtin_neon_vcvtmq_s64_v:
6312   case NEON::BI__builtin_neon_vcvtm_u64_v:
6313   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
6314     Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
6315     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6316     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
6317   }
6318   case NEON::BI__builtin_neon_vcvtn_s32_v:
6319   case NEON::BI__builtin_neon_vcvtnq_s32_v:
6320   case NEON::BI__builtin_neon_vcvtn_u32_v:
6321   case NEON::BI__builtin_neon_vcvtnq_u32_v:
6322   case NEON::BI__builtin_neon_vcvtn_s64_v:
6323   case NEON::BI__builtin_neon_vcvtnq_s64_v:
6324   case NEON::BI__builtin_neon_vcvtn_u64_v:
6325   case NEON::BI__builtin_neon_vcvtnq_u64_v: {
6326     Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
6327     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6328     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
6329   }
6330   case NEON::BI__builtin_neon_vcvtp_s32_v:
6331   case NEON::BI__builtin_neon_vcvtpq_s32_v:
6332   case NEON::BI__builtin_neon_vcvtp_u32_v:
6333   case NEON::BI__builtin_neon_vcvtpq_u32_v:
6334   case NEON::BI__builtin_neon_vcvtp_s64_v:
6335   case NEON::BI__builtin_neon_vcvtpq_s64_v:
6336   case NEON::BI__builtin_neon_vcvtp_u64_v:
6337   case NEON::BI__builtin_neon_vcvtpq_u64_v: {
6338     Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
6339     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6340     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
6341   }
6342   case NEON::BI__builtin_neon_vmulx_v:
6343   case NEON::BI__builtin_neon_vmulxq_v: {
6344     Int = Intrinsic::aarch64_neon_fmulx;
6345     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
6346   }
6347   case NEON::BI__builtin_neon_vmul_lane_v:
6348   case NEON::BI__builtin_neon_vmul_laneq_v: {
6349     // v1f64 vmul_lane should be mapped to Neon scalar mul lane
6350     bool Quad = false;
6351     if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
6352       Quad = true;
6353     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6354     llvm::Type *VTy = GetNeonType(this,
6355       NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
6356     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6357     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
6358     Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
6359     return Builder.CreateBitCast(Result, Ty);
6360   }
6361   case NEON::BI__builtin_neon_vnegd_s64:
6362     return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
6363   case NEON::BI__builtin_neon_vpmaxnm_v:
6364   case NEON::BI__builtin_neon_vpmaxnmq_v: {
6365     Int = Intrinsic::aarch64_neon_fmaxnmp;
6366     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
6367   }
6368   case NEON::BI__builtin_neon_vpminnm_v:
6369   case NEON::BI__builtin_neon_vpminnmq_v: {
6370     Int = Intrinsic::aarch64_neon_fminnmp;
6371     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
6372   }
6373   case NEON::BI__builtin_neon_vsqrt_v:
6374   case NEON::BI__builtin_neon_vsqrtq_v: {
6375     Int = Intrinsic::sqrt;
6376     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6377     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
6378   }
6379   case NEON::BI__builtin_neon_vrbit_v:
6380   case NEON::BI__builtin_neon_vrbitq_v: {
6381     Int = Intrinsic::aarch64_neon_rbit;
6382     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
6383   }
6384   case NEON::BI__builtin_neon_vaddv_u8:
6385     // FIXME: These are handled by the AArch64 scalar code.
6386     usgn = true;
6387     // FALLTHROUGH
6388   case NEON::BI__builtin_neon_vaddv_s8: {
6389     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6390     Ty = Int32Ty;
6391     VTy = llvm::VectorType::get(Int8Ty, 8);
6392     llvm::Type *Tys[2] = { Ty, VTy };
6393     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6394     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6395     return Builder.CreateTrunc(Ops[0], Int8Ty);
6396   }
6397   case NEON::BI__builtin_neon_vaddv_u16:
6398     usgn = true;
6399     // FALLTHROUGH
6400   case NEON::BI__builtin_neon_vaddv_s16: {
6401     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6402     Ty = Int32Ty;
6403     VTy = llvm::VectorType::get(Int16Ty, 4);
6404     llvm::Type *Tys[2] = { Ty, VTy };
6405     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6406     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6407     return Builder.CreateTrunc(Ops[0], Int16Ty);
6408   }
6409   case NEON::BI__builtin_neon_vaddvq_u8:
6410     usgn = true;
6411     // FALLTHROUGH
6412   case NEON::BI__builtin_neon_vaddvq_s8: {
6413     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6414     Ty = Int32Ty;
6415     VTy = llvm::VectorType::get(Int8Ty, 16);
6416     llvm::Type *Tys[2] = { Ty, VTy };
6417     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6418     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6419     return Builder.CreateTrunc(Ops[0], Int8Ty);
6420   }
6421   case NEON::BI__builtin_neon_vaddvq_u16:
6422     usgn = true;
6423     // FALLTHROUGH
6424   case NEON::BI__builtin_neon_vaddvq_s16: {
6425     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6426     Ty = Int32Ty;
6427     VTy = llvm::VectorType::get(Int16Ty, 8);
6428     llvm::Type *Tys[2] = { Ty, VTy };
6429     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6430     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6431     return Builder.CreateTrunc(Ops[0], Int16Ty);
6432   }
6433   case NEON::BI__builtin_neon_vmaxv_u8: {
6434     Int = Intrinsic::aarch64_neon_umaxv;
6435     Ty = Int32Ty;
6436     VTy = llvm::VectorType::get(Int8Ty, 8);
6437     llvm::Type *Tys[2] = { Ty, VTy };
6438     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6439     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6440     return Builder.CreateTrunc(Ops[0], Int8Ty);
6441   }
6442   case NEON::BI__builtin_neon_vmaxv_u16: {
6443     Int = Intrinsic::aarch64_neon_umaxv;
6444     Ty = Int32Ty;
6445     VTy = llvm::VectorType::get(Int16Ty, 4);
6446     llvm::Type *Tys[2] = { Ty, VTy };
6447     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6448     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6449     return Builder.CreateTrunc(Ops[0], Int16Ty);
6450   }
6451   case NEON::BI__builtin_neon_vmaxvq_u8: {
6452     Int = Intrinsic::aarch64_neon_umaxv;
6453     Ty = Int32Ty;
6454     VTy = llvm::VectorType::get(Int8Ty, 16);
6455     llvm::Type *Tys[2] = { Ty, VTy };
6456     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6457     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6458     return Builder.CreateTrunc(Ops[0], Int8Ty);
6459   }
6460   case NEON::BI__builtin_neon_vmaxvq_u16: {
6461     Int = Intrinsic::aarch64_neon_umaxv;
6462     Ty = Int32Ty;
6463     VTy = llvm::VectorType::get(Int16Ty, 8);
6464     llvm::Type *Tys[2] = { Ty, VTy };
6465     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6466     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6467     return Builder.CreateTrunc(Ops[0], Int16Ty);
6468   }
6469   case NEON::BI__builtin_neon_vmaxv_s8: {
6470     Int = Intrinsic::aarch64_neon_smaxv;
6471     Ty = Int32Ty;
6472     VTy = llvm::VectorType::get(Int8Ty, 8);
6473     llvm::Type *Tys[2] = { Ty, VTy };
6474     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6475     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6476     return Builder.CreateTrunc(Ops[0], Int8Ty);
6477   }
6478   case NEON::BI__builtin_neon_vmaxv_s16: {
6479     Int = Intrinsic::aarch64_neon_smaxv;
6480     Ty = Int32Ty;
6481     VTy = llvm::VectorType::get(Int16Ty, 4);
6482     llvm::Type *Tys[2] = { Ty, VTy };
6483     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6484     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6485     return Builder.CreateTrunc(Ops[0], Int16Ty);
6486   }
6487   case NEON::BI__builtin_neon_vmaxvq_s8: {
6488     Int = Intrinsic::aarch64_neon_smaxv;
6489     Ty = Int32Ty;
6490     VTy = llvm::VectorType::get(Int8Ty, 16);
6491     llvm::Type *Tys[2] = { Ty, VTy };
6492     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6493     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6494     return Builder.CreateTrunc(Ops[0], Int8Ty);
6495   }
6496   case NEON::BI__builtin_neon_vmaxvq_s16: {
6497     Int = Intrinsic::aarch64_neon_smaxv;
6498     Ty = Int32Ty;
6499     VTy = llvm::VectorType::get(Int16Ty, 8);
6500     llvm::Type *Tys[2] = { Ty, VTy };
6501     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6502     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6503     return Builder.CreateTrunc(Ops[0], Int16Ty);
6504   }
6505   case NEON::BI__builtin_neon_vminv_u8: {
6506     Int = Intrinsic::aarch64_neon_uminv;
6507     Ty = Int32Ty;
6508     VTy = llvm::VectorType::get(Int8Ty, 8);
6509     llvm::Type *Tys[2] = { Ty, VTy };
6510     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6511     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6512     return Builder.CreateTrunc(Ops[0], Int8Ty);
6513   }
6514   case NEON::BI__builtin_neon_vminv_u16: {
6515     Int = Intrinsic::aarch64_neon_uminv;
6516     Ty = Int32Ty;
6517     VTy = llvm::VectorType::get(Int16Ty, 4);
6518     llvm::Type *Tys[2] = { Ty, VTy };
6519     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6520     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6521     return Builder.CreateTrunc(Ops[0], Int16Ty);
6522   }
6523   case NEON::BI__builtin_neon_vminvq_u8: {
6524     Int = Intrinsic::aarch64_neon_uminv;
6525     Ty = Int32Ty;
6526     VTy = llvm::VectorType::get(Int8Ty, 16);
6527     llvm::Type *Tys[2] = { Ty, VTy };
6528     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6529     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6530     return Builder.CreateTrunc(Ops[0], Int8Ty);
6531   }
6532   case NEON::BI__builtin_neon_vminvq_u16: {
6533     Int = Intrinsic::aarch64_neon_uminv;
6534     Ty = Int32Ty;
6535     VTy = llvm::VectorType::get(Int16Ty, 8);
6536     llvm::Type *Tys[2] = { Ty, VTy };
6537     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6538     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6539     return Builder.CreateTrunc(Ops[0], Int16Ty);
6540   }
6541   case NEON::BI__builtin_neon_vminv_s8: {
6542     Int = Intrinsic::aarch64_neon_sminv;
6543     Ty = Int32Ty;
6544     VTy = llvm::VectorType::get(Int8Ty, 8);
6545     llvm::Type *Tys[2] = { Ty, VTy };
6546     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6547     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6548     return Builder.CreateTrunc(Ops[0], Int8Ty);
6549   }
6550   case NEON::BI__builtin_neon_vminv_s16: {
6551     Int = Intrinsic::aarch64_neon_sminv;
6552     Ty = Int32Ty;
6553     VTy = llvm::VectorType::get(Int16Ty, 4);
6554     llvm::Type *Tys[2] = { Ty, VTy };
6555     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6556     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6557     return Builder.CreateTrunc(Ops[0], Int16Ty);
6558   }
6559   case NEON::BI__builtin_neon_vminvq_s8: {
6560     Int = Intrinsic::aarch64_neon_sminv;
6561     Ty = Int32Ty;
6562     VTy = llvm::VectorType::get(Int8Ty, 16);
6563     llvm::Type *Tys[2] = { Ty, VTy };
6564     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6565     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6566     return Builder.CreateTrunc(Ops[0], Int8Ty);
6567   }
6568   case NEON::BI__builtin_neon_vminvq_s16: {
6569     Int = Intrinsic::aarch64_neon_sminv;
6570     Ty = Int32Ty;
6571     VTy = llvm::VectorType::get(Int16Ty, 8);
6572     llvm::Type *Tys[2] = { Ty, VTy };
6573     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6574     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6575     return Builder.CreateTrunc(Ops[0], Int16Ty);
6576   }
6577   case NEON::BI__builtin_neon_vmul_n_f64: {
6578     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6579     Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
6580     return Builder.CreateFMul(Ops[0], RHS);
6581   }
6582   case NEON::BI__builtin_neon_vaddlv_u8: {
6583     Int = Intrinsic::aarch64_neon_uaddlv;
6584     Ty = Int32Ty;
6585     VTy = llvm::VectorType::get(Int8Ty, 8);
6586     llvm::Type *Tys[2] = { Ty, VTy };
6587     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6588     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6589     return Builder.CreateTrunc(Ops[0], Int16Ty);
6590   }
6591   case NEON::BI__builtin_neon_vaddlv_u16: {
6592     Int = Intrinsic::aarch64_neon_uaddlv;
6593     Ty = Int32Ty;
6594     VTy = llvm::VectorType::get(Int16Ty, 4);
6595     llvm::Type *Tys[2] = { Ty, VTy };
6596     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6597     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6598   }
6599   case NEON::BI__builtin_neon_vaddlvq_u8: {
6600     Int = Intrinsic::aarch64_neon_uaddlv;
6601     Ty = Int32Ty;
6602     VTy = llvm::VectorType::get(Int8Ty, 16);
6603     llvm::Type *Tys[2] = { Ty, VTy };
6604     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6605     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6606     return Builder.CreateTrunc(Ops[0], Int16Ty);
6607   }
6608   case NEON::BI__builtin_neon_vaddlvq_u16: {
6609     Int = Intrinsic::aarch64_neon_uaddlv;
6610     Ty = Int32Ty;
6611     VTy = llvm::VectorType::get(Int16Ty, 8);
6612     llvm::Type *Tys[2] = { Ty, VTy };
6613     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6614     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6615   }
6616   case NEON::BI__builtin_neon_vaddlv_s8: {
6617     Int = Intrinsic::aarch64_neon_saddlv;
6618     Ty = Int32Ty;
6619     VTy = llvm::VectorType::get(Int8Ty, 8);
6620     llvm::Type *Tys[2] = { Ty, VTy };
6621     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6622     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6623     return Builder.CreateTrunc(Ops[0], Int16Ty);
6624   }
6625   case NEON::BI__builtin_neon_vaddlv_s16: {
6626     Int = Intrinsic::aarch64_neon_saddlv;
6627     Ty = Int32Ty;
6628     VTy = llvm::VectorType::get(Int16Ty, 4);
6629     llvm::Type *Tys[2] = { Ty, VTy };
6630     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6631     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6632   }
6633   case NEON::BI__builtin_neon_vaddlvq_s8: {
6634     Int = Intrinsic::aarch64_neon_saddlv;
6635     Ty = Int32Ty;
6636     VTy = llvm::VectorType::get(Int8Ty, 16);
6637     llvm::Type *Tys[2] = { Ty, VTy };
6638     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6639     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6640     return Builder.CreateTrunc(Ops[0], Int16Ty);
6641   }
6642   case NEON::BI__builtin_neon_vaddlvq_s16: {
6643     Int = Intrinsic::aarch64_neon_saddlv;
6644     Ty = Int32Ty;
6645     VTy = llvm::VectorType::get(Int16Ty, 8);
6646     llvm::Type *Tys[2] = { Ty, VTy };
6647     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6648     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6649   }
6650   case NEON::BI__builtin_neon_vsri_n_v:
6651   case NEON::BI__builtin_neon_vsriq_n_v: {
6652     Int = Intrinsic::aarch64_neon_vsri;
6653     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6654     return EmitNeonCall(Intrin, Ops, "vsri_n");
6655   }
6656   case NEON::BI__builtin_neon_vsli_n_v:
6657   case NEON::BI__builtin_neon_vsliq_n_v: {
6658     Int = Intrinsic::aarch64_neon_vsli;
6659     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6660     return EmitNeonCall(Intrin, Ops, "vsli_n");
6661   }
6662   case NEON::BI__builtin_neon_vsra_n_v:
6663   case NEON::BI__builtin_neon_vsraq_n_v:
6664     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6665     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
6666     return Builder.CreateAdd(Ops[0], Ops[1]);
6667   case NEON::BI__builtin_neon_vrsra_n_v:
6668   case NEON::BI__builtin_neon_vrsraq_n_v: {
6669     Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
6670     SmallVector<llvm::Value*,2> TmpOps;
6671     TmpOps.push_back(Ops[1]);
6672     TmpOps.push_back(Ops[2]);
6673     Function* F = CGM.getIntrinsic(Int, Ty);
6674     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
6675     Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
6676     return Builder.CreateAdd(Ops[0], tmp);
6677   }
6678     // FIXME: Sharing loads & stores with 32-bit is complicated by the absence
6679     // of an Align parameter here.
6680   case NEON::BI__builtin_neon_vld1_x2_v:
6681   case NEON::BI__builtin_neon_vld1q_x2_v:
6682   case NEON::BI__builtin_neon_vld1_x3_v:
6683   case NEON::BI__builtin_neon_vld1q_x3_v:
6684   case NEON::BI__builtin_neon_vld1_x4_v:
6685   case NEON::BI__builtin_neon_vld1q_x4_v: {
6686     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
6687     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6688     llvm::Type *Tys[2] = { VTy, PTy };
6689     unsigned Int;
6690     switch (BuiltinID) {
6691     case NEON::BI__builtin_neon_vld1_x2_v:
6692     case NEON::BI__builtin_neon_vld1q_x2_v:
6693       Int = Intrinsic::aarch64_neon_ld1x2;
6694       break;
6695     case NEON::BI__builtin_neon_vld1_x3_v:
6696     case NEON::BI__builtin_neon_vld1q_x3_v:
6697       Int = Intrinsic::aarch64_neon_ld1x3;
6698       break;
6699     case NEON::BI__builtin_neon_vld1_x4_v:
6700     case NEON::BI__builtin_neon_vld1q_x4_v:
6701       Int = Intrinsic::aarch64_neon_ld1x4;
6702       break;
6703     }
6704     Function *F = CGM.getIntrinsic(Int, Tys);
6705     Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
6706     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6707     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6708     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6709   }
6710   case NEON::BI__builtin_neon_vst1_x2_v:
6711   case NEON::BI__builtin_neon_vst1q_x2_v:
6712   case NEON::BI__builtin_neon_vst1_x3_v:
6713   case NEON::BI__builtin_neon_vst1q_x3_v:
6714   case NEON::BI__builtin_neon_vst1_x4_v:
6715   case NEON::BI__builtin_neon_vst1q_x4_v: {
6716     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
6717     llvm::Type *Tys[2] = { VTy, PTy };
6718     unsigned Int;
6719     switch (BuiltinID) {
6720     case NEON::BI__builtin_neon_vst1_x2_v:
6721     case NEON::BI__builtin_neon_vst1q_x2_v:
6722       Int = Intrinsic::aarch64_neon_st1x2;
6723       break;
6724     case NEON::BI__builtin_neon_vst1_x3_v:
6725     case NEON::BI__builtin_neon_vst1q_x3_v:
6726       Int = Intrinsic::aarch64_neon_st1x3;
6727       break;
6728     case NEON::BI__builtin_neon_vst1_x4_v:
6729     case NEON::BI__builtin_neon_vst1q_x4_v:
6730       Int = Intrinsic::aarch64_neon_st1x4;
6731       break;
6732     }
6733     std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6734     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
6735   }
6736   case NEON::BI__builtin_neon_vld1_v:
6737   case NEON::BI__builtin_neon_vld1q_v: {
6738     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
6739     auto Alignment = CharUnits::fromQuantity(
6740         BuiltinID == NEON::BI__builtin_neon_vld1_v ? 8 : 16);
6741     return Builder.CreateAlignedLoad(VTy, Ops[0], Alignment);
6742   }
6743   case NEON::BI__builtin_neon_vst1_v:
6744   case NEON::BI__builtin_neon_vst1q_v:
6745     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
6746     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6747     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6748   case NEON::BI__builtin_neon_vld1_lane_v:
6749   case NEON::BI__builtin_neon_vld1q_lane_v: {
6750     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6751     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
6752     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6753     auto Alignment = CharUnits::fromQuantity(
6754         BuiltinID == NEON::BI__builtin_neon_vld1_lane_v ? 8 : 16);
6755     Ops[0] =
6756         Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
6757     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
6758   }
6759   case NEON::BI__builtin_neon_vld1_dup_v:
6760   case NEON::BI__builtin_neon_vld1q_dup_v: {
6761     Value *V = UndefValue::get(Ty);
6762     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
6763     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6764     auto Alignment = CharUnits::fromQuantity(
6765         BuiltinID == NEON::BI__builtin_neon_vld1_dup_v ? 8 : 16);
6766     Ops[0] =
6767         Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
6768     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
6769     Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
6770     return EmitNeonSplat(Ops[0], CI);
6771   }
6772   case NEON::BI__builtin_neon_vst1_lane_v:
6773   case NEON::BI__builtin_neon_vst1q_lane_v:
6774     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6775     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
6776     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6777     return Builder.CreateDefaultAlignedStore(Ops[1],
6778                                              Builder.CreateBitCast(Ops[0], Ty));
6779   case NEON::BI__builtin_neon_vld2_v:
6780   case NEON::BI__builtin_neon_vld2q_v: {
6781     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6782     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6783     llvm::Type *Tys[2] = { VTy, PTy };
6784     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
6785     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6786     Ops[0] = Builder.CreateBitCast(Ops[0],
6787                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6788     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6789   }
6790   case NEON::BI__builtin_neon_vld3_v:
6791   case NEON::BI__builtin_neon_vld3q_v: {
6792     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6793     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6794     llvm::Type *Tys[2] = { VTy, PTy };
6795     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
6796     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6797     Ops[0] = Builder.CreateBitCast(Ops[0],
6798                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6799     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6800   }
6801   case NEON::BI__builtin_neon_vld4_v:
6802   case NEON::BI__builtin_neon_vld4q_v: {
6803     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6804     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6805     llvm::Type *Tys[2] = { VTy, PTy };
6806     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
6807     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6808     Ops[0] = Builder.CreateBitCast(Ops[0],
6809                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6810     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6811   }
6812   case NEON::BI__builtin_neon_vld2_dup_v:
6813   case NEON::BI__builtin_neon_vld2q_dup_v: {
6814     llvm::Type *PTy =
6815       llvm::PointerType::getUnqual(VTy->getElementType());
6816     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6817     llvm::Type *Tys[2] = { VTy, PTy };
6818     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
6819     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6820     Ops[0] = Builder.CreateBitCast(Ops[0],
6821                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6822     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6823   }
6824   case NEON::BI__builtin_neon_vld3_dup_v:
6825   case NEON::BI__builtin_neon_vld3q_dup_v: {
6826     llvm::Type *PTy =
6827       llvm::PointerType::getUnqual(VTy->getElementType());
6828     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6829     llvm::Type *Tys[2] = { VTy, PTy };
6830     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
6831     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6832     Ops[0] = Builder.CreateBitCast(Ops[0],
6833                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6834     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6835   }
6836   case NEON::BI__builtin_neon_vld4_dup_v:
6837   case NEON::BI__builtin_neon_vld4q_dup_v: {
6838     llvm::Type *PTy =
6839       llvm::PointerType::getUnqual(VTy->getElementType());
6840     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6841     llvm::Type *Tys[2] = { VTy, PTy };
6842     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
6843     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6844     Ops[0] = Builder.CreateBitCast(Ops[0],
6845                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6846     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6847   }
6848   case NEON::BI__builtin_neon_vld2_lane_v:
6849   case NEON::BI__builtin_neon_vld2q_lane_v: {
6850     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6851     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
6852     Ops.push_back(Ops[1]);
6853     Ops.erase(Ops.begin()+1);
6854     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6855     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6856     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6857     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
6858     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6859     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6860     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6861   }
6862   case NEON::BI__builtin_neon_vld3_lane_v:
6863   case NEON::BI__builtin_neon_vld3q_lane_v: {
6864     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6865     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
6866     Ops.push_back(Ops[1]);
6867     Ops.erase(Ops.begin()+1);
6868     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6869     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6870     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6871     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
6872     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
6873     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6874     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6875     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6876   }
6877   case NEON::BI__builtin_neon_vld4_lane_v:
6878   case NEON::BI__builtin_neon_vld4q_lane_v: {
6879     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6880     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
6881     Ops.push_back(Ops[1]);
6882     Ops.erase(Ops.begin()+1);
6883     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6884     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6885     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6886     Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
6887     Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
6888     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane");
6889     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6890     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6891     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6892   }
6893   case NEON::BI__builtin_neon_vst2_v:
6894   case NEON::BI__builtin_neon_vst2q_v: {
6895     Ops.push_back(Ops[0]);
6896     Ops.erase(Ops.begin());
6897     llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
6898     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
6899                         Ops, "");
6900   }
6901   case NEON::BI__builtin_neon_vst2_lane_v:
6902   case NEON::BI__builtin_neon_vst2q_lane_v: {
6903     Ops.push_back(Ops[0]);
6904     Ops.erase(Ops.begin());
6905     Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
6906     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6907     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
6908                         Ops, "");
6909   }
6910   case NEON::BI__builtin_neon_vst3_v:
6911   case NEON::BI__builtin_neon_vst3q_v: {
6912     Ops.push_back(Ops[0]);
6913     Ops.erase(Ops.begin());
6914     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6915     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
6916                         Ops, "");
6917   }
6918   case NEON::BI__builtin_neon_vst3_lane_v:
6919   case NEON::BI__builtin_neon_vst3q_lane_v: {
6920     Ops.push_back(Ops[0]);
6921     Ops.erase(Ops.begin());
6922     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6923     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6924     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
6925                         Ops, "");
6926   }
6927   case NEON::BI__builtin_neon_vst4_v:
6928   case NEON::BI__builtin_neon_vst4q_v: {
6929     Ops.push_back(Ops[0]);
6930     Ops.erase(Ops.begin());
6931     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6932     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
6933                         Ops, "");
6934   }
6935   case NEON::BI__builtin_neon_vst4_lane_v:
6936   case NEON::BI__builtin_neon_vst4q_lane_v: {
6937     Ops.push_back(Ops[0]);
6938     Ops.erase(Ops.begin());
6939     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
6940     llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
6941     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
6942                         Ops, "");
6943   }
6944   case NEON::BI__builtin_neon_vtrn_v:
6945   case NEON::BI__builtin_neon_vtrnq_v: {
6946     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6947     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6948     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6949     Value *SV = nullptr;
6950
6951     for (unsigned vi = 0; vi != 2; ++vi) {
6952       SmallVector<uint32_t, 16> Indices;
6953       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6954         Indices.push_back(i+vi);
6955         Indices.push_back(i+e+vi);
6956       }
6957       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6958       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
6959       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6960     }
6961     return SV;
6962   }
6963   case NEON::BI__builtin_neon_vuzp_v:
6964   case NEON::BI__builtin_neon_vuzpq_v: {
6965     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6966     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6967     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6968     Value *SV = nullptr;
6969
6970     for (unsigned vi = 0; vi != 2; ++vi) {
6971       SmallVector<uint32_t, 16> Indices;
6972       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
6973         Indices.push_back(2*i+vi);
6974
6975       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6976       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
6977       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6978     }
6979     return SV;
6980   }
6981   case NEON::BI__builtin_neon_vzip_v:
6982   case NEON::BI__builtin_neon_vzipq_v: {
6983     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6984     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6985     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6986     Value *SV = nullptr;
6987
6988     for (unsigned vi = 0; vi != 2; ++vi) {
6989       SmallVector<uint32_t, 16> Indices;
6990       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6991         Indices.push_back((i + vi*e) >> 1);
6992         Indices.push_back(((i + vi*e) >> 1)+e);
6993       }
6994       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6995       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
6996       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6997     }
6998     return SV;
6999   }
7000   case NEON::BI__builtin_neon_vqtbl1q_v: {
7001     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
7002                         Ops, "vtbl1");
7003   }
7004   case NEON::BI__builtin_neon_vqtbl2q_v: {
7005     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
7006                         Ops, "vtbl2");
7007   }
7008   case NEON::BI__builtin_neon_vqtbl3q_v: {
7009     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
7010                         Ops, "vtbl3");
7011   }
7012   case NEON::BI__builtin_neon_vqtbl4q_v: {
7013     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
7014                         Ops, "vtbl4");
7015   }
7016   case NEON::BI__builtin_neon_vqtbx1q_v: {
7017     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
7018                         Ops, "vtbx1");
7019   }
7020   case NEON::BI__builtin_neon_vqtbx2q_v: {
7021     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
7022                         Ops, "vtbx2");
7023   }
7024   case NEON::BI__builtin_neon_vqtbx3q_v: {
7025     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
7026                         Ops, "vtbx3");
7027   }
7028   case NEON::BI__builtin_neon_vqtbx4q_v: {
7029     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
7030                         Ops, "vtbx4");
7031   }
7032   case NEON::BI__builtin_neon_vsqadd_v:
7033   case NEON::BI__builtin_neon_vsqaddq_v: {
7034     Int = Intrinsic::aarch64_neon_usqadd;
7035     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
7036   }
7037   case NEON::BI__builtin_neon_vuqadd_v:
7038   case NEON::BI__builtin_neon_vuqaddq_v: {
7039     Int = Intrinsic::aarch64_neon_suqadd;
7040     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
7041   }
7042   }
7043 }
7044
7045 llvm::Value *CodeGenFunction::
7046 BuildVector(ArrayRef<llvm::Value*> Ops) {
7047   assert((Ops.size() & (Ops.size() - 1)) == 0 &&
7048          "Not a power-of-two sized vector!");
7049   bool AllConstants = true;
7050   for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
7051     AllConstants &= isa<Constant>(Ops[i]);
7052
7053   // If this is a constant vector, create a ConstantVector.
7054   if (AllConstants) {
7055     SmallVector<llvm::Constant*, 16> CstOps;
7056     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
7057       CstOps.push_back(cast<Constant>(Ops[i]));
7058     return llvm::ConstantVector::get(CstOps);
7059   }
7060
7061   // Otherwise, insertelement the values to build the vector.
7062   Value *Result =
7063     llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
7064
7065   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
7066     Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
7067
7068   return Result;
7069 }
7070
7071 // Convert the mask from an integer type to a vector of i1.
7072 static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
7073                               unsigned NumElts) {
7074
7075   llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(),
7076                          cast<IntegerType>(Mask->getType())->getBitWidth());
7077   Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
7078
7079   // If we have less than 8 elements, then the starting mask was an i8 and
7080   // we need to extract down to the right number of elements.
7081   if (NumElts < 8) {
7082     uint32_t Indices[4];
7083     for (unsigned i = 0; i != NumElts; ++i)
7084       Indices[i] = i;
7085     MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec,
7086                                              makeArrayRef(Indices, NumElts),
7087                                              "extract");
7088   }
7089   return MaskVec;
7090 }
7091
7092 static Value *EmitX86MaskedStore(CodeGenFunction &CGF,
7093                                  SmallVectorImpl<Value *> &Ops,
7094                                  unsigned Align) {
7095   // Cast the pointer to right type.
7096   Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
7097                                llvm::PointerType::getUnqual(Ops[1]->getType()));
7098
7099   // If the mask is all ones just emit a regular store.
7100   if (const auto *C = dyn_cast<Constant>(Ops[2]))
7101     if (C->isAllOnesValue())
7102       return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align);
7103
7104   Value *MaskVec = getMaskVecValue(CGF, Ops[2],
7105                                    Ops[1]->getType()->getVectorNumElements());
7106
7107   return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec);
7108 }
7109
7110 static Value *EmitX86MaskedLoad(CodeGenFunction &CGF,
7111                                 SmallVectorImpl<Value *> &Ops, unsigned Align) {
7112   // Cast the pointer to right type.
7113   Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
7114                                llvm::PointerType::getUnqual(Ops[1]->getType()));
7115
7116   // If the mask is all ones just emit a regular store.
7117   if (const auto *C = dyn_cast<Constant>(Ops[2]))
7118     if (C->isAllOnesValue())
7119       return CGF.Builder.CreateAlignedLoad(Ops[0], Align);
7120
7121   Value *MaskVec = getMaskVecValue(CGF, Ops[2],
7122                                    Ops[1]->getType()->getVectorNumElements());
7123
7124   return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]);
7125 }
7126
7127 static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF,
7128                                         SmallVectorImpl<Value *> &Ops,
7129                                         llvm::Type *DstTy,
7130                                         unsigned SrcSizeInBits,
7131                                         unsigned Align) {
7132   // Load the subvector.
7133   Ops[0] = CGF.Builder.CreateAlignedLoad(Ops[0], Align);
7134
7135   // Create broadcast mask.
7136   unsigned NumDstElts = DstTy->getVectorNumElements();
7137   unsigned NumSrcElts = SrcSizeInBits / DstTy->getScalarSizeInBits();
7138
7139   SmallVector<uint32_t, 8> Mask;
7140   for (unsigned i = 0; i != NumDstElts; i += NumSrcElts)
7141     for (unsigned j = 0; j != NumSrcElts; ++j)
7142       Mask.push_back(j);
7143
7144   return CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], Mask, "subvecbcst");
7145 }
7146
7147 static Value *EmitX86Select(CodeGenFunction &CGF,
7148                             Value *Mask, Value *Op0, Value *Op1) {
7149
7150   // If the mask is all ones just return first argument.
7151   if (const auto *C = dyn_cast<Constant>(Mask))
7152     if (C->isAllOnesValue())
7153       return Op0;
7154
7155   Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements());
7156
7157   return CGF.Builder.CreateSelect(Mask, Op0, Op1);
7158 }
7159
7160 static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
7161                                    bool Signed, SmallVectorImpl<Value *> &Ops) {
7162   unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
7163   Value *Cmp;
7164
7165   if (CC == 3) {
7166     Cmp = Constant::getNullValue(
7167                        llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
7168   } else if (CC == 7) {
7169     Cmp = Constant::getAllOnesValue(
7170                        llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
7171   } else {
7172     ICmpInst::Predicate Pred;
7173     switch (CC) {
7174     default: llvm_unreachable("Unknown condition code");
7175     case 0: Pred = ICmpInst::ICMP_EQ;  break;
7176     case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
7177     case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
7178     case 4: Pred = ICmpInst::ICMP_NE;  break;
7179     case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
7180     case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
7181     }
7182     Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
7183   }
7184
7185   const auto *C = dyn_cast<Constant>(Ops.back());
7186   if (!C || !C->isAllOnesValue())
7187     Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), NumElts));
7188
7189   if (NumElts < 8) {
7190     uint32_t Indices[8];
7191     for (unsigned i = 0; i != NumElts; ++i)
7192       Indices[i] = i;
7193     for (unsigned i = NumElts; i != 8; ++i)
7194       Indices[i] = i % NumElts + NumElts;
7195     Cmp = CGF.Builder.CreateShuffleVector(
7196         Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
7197   }
7198   return CGF.Builder.CreateBitCast(Cmp,
7199                                    IntegerType::get(CGF.getLLVMContext(),
7200                                                     std::max(NumElts, 8U)));
7201 }
7202
7203 static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred,
7204                             ArrayRef<Value *> Ops) {
7205   Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
7206   Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
7207
7208   if (Ops.size() == 2)
7209     return Res;
7210
7211   assert(Ops.size() == 4);
7212   return EmitX86Select(CGF, Ops[3], Res, Ops[2]);
7213 }
7214
7215 static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, 
7216                               llvm::Type *DstTy) {
7217   unsigned NumberOfElements = DstTy->getVectorNumElements();
7218   Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
7219   return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
7220 }
7221
7222 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
7223                                            const CallExpr *E) {
7224   if (BuiltinID == X86::BI__builtin_ms_va_start ||
7225       BuiltinID == X86::BI__builtin_ms_va_end)
7226     return EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
7227                           BuiltinID == X86::BI__builtin_ms_va_start);
7228   if (BuiltinID == X86::BI__builtin_ms_va_copy) {
7229     // Lower this manually. We can't reliably determine whether or not any
7230     // given va_copy() is for a Win64 va_list from the calling convention
7231     // alone, because it's legal to do this from a System V ABI function.
7232     // With opaque pointer types, we won't have enough information in LLVM
7233     // IR to determine this from the argument types, either. Best to do it
7234     // now, while we have enough information.
7235     Address DestAddr = EmitMSVAListRef(E->getArg(0));
7236     Address SrcAddr = EmitMSVAListRef(E->getArg(1));
7237
7238     llvm::Type *BPP = Int8PtrPtrTy;
7239
7240     DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
7241                        DestAddr.getAlignment());
7242     SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
7243                       SrcAddr.getAlignment());
7244
7245     Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
7246     return Builder.CreateStore(ArgPtr, DestAddr);
7247   }
7248
7249   SmallVector<Value*, 4> Ops;
7250
7251   // Find out if any arguments are required to be integer constant expressions.
7252   unsigned ICEArguments = 0;
7253   ASTContext::GetBuiltinTypeError Error;
7254   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
7255   assert(Error == ASTContext::GE_None && "Should not codegen an error");
7256
7257   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
7258     // If this is a normal argument, just emit it as a scalar.
7259     if ((ICEArguments & (1 << i)) == 0) {
7260       Ops.push_back(EmitScalarExpr(E->getArg(i)));
7261       continue;
7262     }
7263
7264     // If this is required to be a constant, constant fold it so that we know
7265     // that the generated intrinsic gets a ConstantInt.
7266     llvm::APSInt Result;
7267     bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
7268     assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
7269     Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
7270   }
7271
7272   // These exist so that the builtin that takes an immediate can be bounds
7273   // checked by clang to avoid passing bad immediates to the backend. Since
7274   // AVX has a larger immediate than SSE we would need separate builtins to
7275   // do the different bounds checking. Rather than create a clang specific
7276   // SSE only builtin, this implements eight separate builtins to match gcc
7277   // implementation.
7278   auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
7279     Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
7280     llvm::Function *F = CGM.getIntrinsic(ID);
7281     return Builder.CreateCall(F, Ops);
7282   };
7283
7284   // For the vector forms of FP comparisons, translate the builtins directly to
7285   // IR.
7286   // TODO: The builtins could be removed if the SSE header files used vector
7287   // extension comparisons directly (vector ordered/unordered may need
7288   // additional support via __builtin_isnan()).
7289   auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) {
7290     Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
7291     llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
7292     llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
7293     Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
7294     return Builder.CreateBitCast(Sext, FPVecTy);
7295   };
7296
7297   switch (BuiltinID) {
7298   default: return nullptr;
7299   case X86::BI__builtin_cpu_supports: {
7300     const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
7301     StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
7302
7303     // TODO: When/if this becomes more than x86 specific then use a TargetInfo
7304     // based mapping.
7305     // Processor features and mapping to processor feature value.
7306     enum X86Features {
7307       CMOV = 0,
7308       MMX,
7309       POPCNT,
7310       SSE,
7311       SSE2,
7312       SSE3,
7313       SSSE3,
7314       SSE4_1,
7315       SSE4_2,
7316       AVX,
7317       AVX2,
7318       SSE4_A,
7319       FMA4,
7320       XOP,
7321       FMA,
7322       AVX512F,
7323       BMI,
7324       BMI2,
7325       AES,
7326       PCLMUL,
7327       AVX512VL,
7328       AVX512BW,
7329       AVX512DQ,
7330       AVX512CD,
7331       AVX512ER,
7332       AVX512PF,
7333       AVX512VBMI,
7334       AVX512IFMA,
7335       AVX512VPOPCNTDQ,
7336       MAX
7337     };
7338
7339     X86Features Feature =
7340         StringSwitch<X86Features>(FeatureStr)
7341             .Case("cmov", X86Features::CMOV)
7342             .Case("mmx", X86Features::MMX)
7343             .Case("popcnt", X86Features::POPCNT)
7344             .Case("sse", X86Features::SSE)
7345             .Case("sse2", X86Features::SSE2)
7346             .Case("sse3", X86Features::SSE3)
7347             .Case("ssse3", X86Features::SSSE3)
7348             .Case("sse4.1", X86Features::SSE4_1)
7349             .Case("sse4.2", X86Features::SSE4_2)
7350             .Case("avx", X86Features::AVX)
7351             .Case("avx2", X86Features::AVX2)
7352             .Case("sse4a", X86Features::SSE4_A)
7353             .Case("fma4", X86Features::FMA4)
7354             .Case("xop", X86Features::XOP)
7355             .Case("fma", X86Features::FMA)
7356             .Case("avx512f", X86Features::AVX512F)
7357             .Case("bmi", X86Features::BMI)
7358             .Case("bmi2", X86Features::BMI2)
7359             .Case("aes", X86Features::AES)
7360             .Case("pclmul", X86Features::PCLMUL)
7361             .Case("avx512vl", X86Features::AVX512VL)
7362             .Case("avx512bw", X86Features::AVX512BW)
7363             .Case("avx512dq", X86Features::AVX512DQ)
7364             .Case("avx512cd", X86Features::AVX512CD)
7365             .Case("avx512er", X86Features::AVX512ER)
7366             .Case("avx512pf", X86Features::AVX512PF)
7367             .Case("avx512vbmi", X86Features::AVX512VBMI)
7368             .Case("avx512ifma", X86Features::AVX512IFMA)
7369             .Case("avx512vpopcntdq", X86Features::AVX512VPOPCNTDQ)
7370             .Default(X86Features::MAX);
7371     assert(Feature != X86Features::MAX && "Invalid feature!");
7372
7373     // Matching the struct layout from the compiler-rt/libgcc structure that is
7374     // filled in:
7375     // unsigned int __cpu_vendor;
7376     // unsigned int __cpu_type;
7377     // unsigned int __cpu_subtype;
7378     // unsigned int __cpu_features[1];
7379     llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
7380                                             llvm::ArrayType::get(Int32Ty, 1));
7381
7382     // Grab the global __cpu_model.
7383     llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
7384
7385     // Grab the first (0th) element from the field __cpu_features off of the
7386     // global in the struct STy.
7387     Value *Idxs[] = {
7388       ConstantInt::get(Int32Ty, 0),
7389       ConstantInt::get(Int32Ty, 3),
7390       ConstantInt::get(Int32Ty, 0)
7391     };
7392     Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
7393     Value *Features = Builder.CreateAlignedLoad(CpuFeatures,
7394                                                 CharUnits::fromQuantity(4));
7395
7396     // Check the value of the bit corresponding to the feature requested.
7397     Value *Bitset = Builder.CreateAnd(
7398         Features, llvm::ConstantInt::get(Int32Ty, 1ULL << Feature));
7399     return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0));
7400   }
7401   case X86::BI_mm_prefetch: {
7402     Value *Address = Ops[0];
7403     Value *RW = ConstantInt::get(Int32Ty, 0);
7404     Value *Locality = Ops[1];
7405     Value *Data = ConstantInt::get(Int32Ty, 1);
7406     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
7407     return Builder.CreateCall(F, {Address, RW, Locality, Data});
7408   }
7409   case X86::BI_mm_clflush: {
7410     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
7411                               Ops[0]);
7412   }
7413   case X86::BI_mm_lfence: {
7414     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
7415   }
7416   case X86::BI_mm_mfence: {
7417     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
7418   }
7419   case X86::BI_mm_sfence: {
7420     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
7421   }
7422   case X86::BI_mm_pause: {
7423     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
7424   }
7425   case X86::BI__rdtsc: {
7426     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
7427   }
7428   case X86::BI__builtin_ia32_undef128:
7429   case X86::BI__builtin_ia32_undef256:
7430   case X86::BI__builtin_ia32_undef512:
7431     // The x86 definition of "undef" is not the same as the LLVM definition
7432     // (PR32176). We leave optimizing away an unnecessary zero constant to the
7433     // IR optimizer and backend.
7434     // TODO: If we had a "freeze" IR instruction to generate a fixed undef
7435     // value, we should use that here instead of a zero.
7436     return llvm::Constant::getNullValue(ConvertType(E->getType()));
7437   case X86::BI__builtin_ia32_vec_init_v8qi:
7438   case X86::BI__builtin_ia32_vec_init_v4hi:
7439   case X86::BI__builtin_ia32_vec_init_v2si:
7440     return Builder.CreateBitCast(BuildVector(Ops),
7441                                  llvm::Type::getX86_MMXTy(getLLVMContext()));
7442   case X86::BI__builtin_ia32_vec_ext_v2si:
7443     return Builder.CreateExtractElement(Ops[0],
7444                                   llvm::ConstantInt::get(Ops[1]->getType(), 0));
7445   case X86::BI_mm_setcsr:
7446   case X86::BI__builtin_ia32_ldmxcsr: {
7447     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
7448     Builder.CreateStore(Ops[0], Tmp);
7449     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
7450                           Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
7451   }
7452   case X86::BI_mm_getcsr:
7453   case X86::BI__builtin_ia32_stmxcsr: {
7454     Address Tmp = CreateMemTemp(E->getType());
7455     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
7456                        Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
7457     return Builder.CreateLoad(Tmp, "stmxcsr");
7458   }
7459   case X86::BI__builtin_ia32_xsave:
7460   case X86::BI__builtin_ia32_xsave64:
7461   case X86::BI__builtin_ia32_xrstor:
7462   case X86::BI__builtin_ia32_xrstor64:
7463   case X86::BI__builtin_ia32_xsaveopt:
7464   case X86::BI__builtin_ia32_xsaveopt64:
7465   case X86::BI__builtin_ia32_xrstors:
7466   case X86::BI__builtin_ia32_xrstors64:
7467   case X86::BI__builtin_ia32_xsavec:
7468   case X86::BI__builtin_ia32_xsavec64:
7469   case X86::BI__builtin_ia32_xsaves:
7470   case X86::BI__builtin_ia32_xsaves64: {
7471     Intrinsic::ID ID;
7472 #define INTRINSIC_X86_XSAVE_ID(NAME) \
7473     case X86::BI__builtin_ia32_##NAME: \
7474       ID = Intrinsic::x86_##NAME; \
7475       break
7476     switch (BuiltinID) {
7477     default: llvm_unreachable("Unsupported intrinsic!");
7478     INTRINSIC_X86_XSAVE_ID(xsave);
7479     INTRINSIC_X86_XSAVE_ID(xsave64);
7480     INTRINSIC_X86_XSAVE_ID(xrstor);
7481     INTRINSIC_X86_XSAVE_ID(xrstor64);
7482     INTRINSIC_X86_XSAVE_ID(xsaveopt);
7483     INTRINSIC_X86_XSAVE_ID(xsaveopt64);
7484     INTRINSIC_X86_XSAVE_ID(xrstors);
7485     INTRINSIC_X86_XSAVE_ID(xrstors64);
7486     INTRINSIC_X86_XSAVE_ID(xsavec);
7487     INTRINSIC_X86_XSAVE_ID(xsavec64);
7488     INTRINSIC_X86_XSAVE_ID(xsaves);
7489     INTRINSIC_X86_XSAVE_ID(xsaves64);
7490     }
7491 #undef INTRINSIC_X86_XSAVE_ID
7492     Value *Mhi = Builder.CreateTrunc(
7493       Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
7494     Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
7495     Ops[1] = Mhi;
7496     Ops.push_back(Mlo);
7497     return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
7498   }
7499   case X86::BI__builtin_ia32_storedqudi128_mask:
7500   case X86::BI__builtin_ia32_storedqusi128_mask:
7501   case X86::BI__builtin_ia32_storedquhi128_mask:
7502   case X86::BI__builtin_ia32_storedquqi128_mask:
7503   case X86::BI__builtin_ia32_storeupd128_mask:
7504   case X86::BI__builtin_ia32_storeups128_mask:
7505   case X86::BI__builtin_ia32_storedqudi256_mask:
7506   case X86::BI__builtin_ia32_storedqusi256_mask:
7507   case X86::BI__builtin_ia32_storedquhi256_mask:
7508   case X86::BI__builtin_ia32_storedquqi256_mask:
7509   case X86::BI__builtin_ia32_storeupd256_mask:
7510   case X86::BI__builtin_ia32_storeups256_mask:
7511   case X86::BI__builtin_ia32_storedqudi512_mask:
7512   case X86::BI__builtin_ia32_storedqusi512_mask:
7513   case X86::BI__builtin_ia32_storedquhi512_mask:
7514   case X86::BI__builtin_ia32_storedquqi512_mask:
7515   case X86::BI__builtin_ia32_storeupd512_mask:
7516   case X86::BI__builtin_ia32_storeups512_mask:
7517     return EmitX86MaskedStore(*this, Ops, 1);
7518
7519   case X86::BI__builtin_ia32_storess128_mask:
7520   case X86::BI__builtin_ia32_storesd128_mask: {
7521     return EmitX86MaskedStore(*this, Ops, 16);
7522   }
7523   case X86::BI__builtin_ia32_vpopcntd_512:
7524   case X86::BI__builtin_ia32_vpopcntq_512: {
7525     llvm::Type *ResultType = ConvertType(E->getType());
7526     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
7527     return Builder.CreateCall(F, Ops);
7528   }
7529   case X86::BI__builtin_ia32_cvtmask2b128:
7530   case X86::BI__builtin_ia32_cvtmask2b256:
7531   case X86::BI__builtin_ia32_cvtmask2b512:
7532   case X86::BI__builtin_ia32_cvtmask2w128:
7533   case X86::BI__builtin_ia32_cvtmask2w256:
7534   case X86::BI__builtin_ia32_cvtmask2w512:
7535   case X86::BI__builtin_ia32_cvtmask2d128:
7536   case X86::BI__builtin_ia32_cvtmask2d256:
7537   case X86::BI__builtin_ia32_cvtmask2d512:
7538   case X86::BI__builtin_ia32_cvtmask2q128:
7539   case X86::BI__builtin_ia32_cvtmask2q256:
7540   case X86::BI__builtin_ia32_cvtmask2q512:
7541     return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
7542
7543   case X86::BI__builtin_ia32_movdqa32store128_mask:
7544   case X86::BI__builtin_ia32_movdqa64store128_mask:
7545   case X86::BI__builtin_ia32_storeaps128_mask:
7546   case X86::BI__builtin_ia32_storeapd128_mask:
7547   case X86::BI__builtin_ia32_movdqa32store256_mask:
7548   case X86::BI__builtin_ia32_movdqa64store256_mask:
7549   case X86::BI__builtin_ia32_storeaps256_mask:
7550   case X86::BI__builtin_ia32_storeapd256_mask:
7551   case X86::BI__builtin_ia32_movdqa32store512_mask:
7552   case X86::BI__builtin_ia32_movdqa64store512_mask:
7553   case X86::BI__builtin_ia32_storeaps512_mask:
7554   case X86::BI__builtin_ia32_storeapd512_mask: {
7555     unsigned Align =
7556       getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
7557     return EmitX86MaskedStore(*this, Ops, Align);
7558   }
7559   case X86::BI__builtin_ia32_loadups128_mask:
7560   case X86::BI__builtin_ia32_loadups256_mask:
7561   case X86::BI__builtin_ia32_loadups512_mask:
7562   case X86::BI__builtin_ia32_loadupd128_mask:
7563   case X86::BI__builtin_ia32_loadupd256_mask:
7564   case X86::BI__builtin_ia32_loadupd512_mask:
7565   case X86::BI__builtin_ia32_loaddquqi128_mask:
7566   case X86::BI__builtin_ia32_loaddquqi256_mask:
7567   case X86::BI__builtin_ia32_loaddquqi512_mask:
7568   case X86::BI__builtin_ia32_loaddquhi128_mask:
7569   case X86::BI__builtin_ia32_loaddquhi256_mask:
7570   case X86::BI__builtin_ia32_loaddquhi512_mask:
7571   case X86::BI__builtin_ia32_loaddqusi128_mask:
7572   case X86::BI__builtin_ia32_loaddqusi256_mask:
7573   case X86::BI__builtin_ia32_loaddqusi512_mask:
7574   case X86::BI__builtin_ia32_loaddqudi128_mask:
7575   case X86::BI__builtin_ia32_loaddqudi256_mask:
7576   case X86::BI__builtin_ia32_loaddqudi512_mask:
7577     return EmitX86MaskedLoad(*this, Ops, 1);
7578
7579   case X86::BI__builtin_ia32_loadss128_mask:
7580   case X86::BI__builtin_ia32_loadsd128_mask:
7581     return EmitX86MaskedLoad(*this, Ops, 16);
7582
7583   case X86::BI__builtin_ia32_loadaps128_mask:
7584   case X86::BI__builtin_ia32_loadaps256_mask:
7585   case X86::BI__builtin_ia32_loadaps512_mask:
7586   case X86::BI__builtin_ia32_loadapd128_mask:
7587   case X86::BI__builtin_ia32_loadapd256_mask:
7588   case X86::BI__builtin_ia32_loadapd512_mask:
7589   case X86::BI__builtin_ia32_movdqa32load128_mask:
7590   case X86::BI__builtin_ia32_movdqa32load256_mask:
7591   case X86::BI__builtin_ia32_movdqa32load512_mask:
7592   case X86::BI__builtin_ia32_movdqa64load128_mask:
7593   case X86::BI__builtin_ia32_movdqa64load256_mask:
7594   case X86::BI__builtin_ia32_movdqa64load512_mask: {
7595     unsigned Align =
7596       getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
7597     return EmitX86MaskedLoad(*this, Ops, Align);
7598   }
7599
7600   case X86::BI__builtin_ia32_vbroadcastf128_pd256:
7601   case X86::BI__builtin_ia32_vbroadcastf128_ps256: {
7602     llvm::Type *DstTy = ConvertType(E->getType());
7603     return EmitX86SubVectorBroadcast(*this, Ops, DstTy, 128, 1);
7604   }
7605
7606   case X86::BI__builtin_ia32_storehps:
7607   case X86::BI__builtin_ia32_storelps: {
7608     llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
7609     llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
7610
7611     // cast val v2i64
7612     Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
7613
7614     // extract (0, 1)
7615     unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
7616     llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index);
7617     Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
7618
7619     // cast pointer to i64 & store
7620     Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
7621     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7622   }
7623   case X86::BI__builtin_ia32_palignr128:
7624   case X86::BI__builtin_ia32_palignr256:
7625   case X86::BI__builtin_ia32_palignr512_mask: {
7626     unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
7627
7628     unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
7629     assert(NumElts % 16 == 0);
7630
7631     // If palignr is shifting the pair of vectors more than the size of two
7632     // lanes, emit zero.
7633     if (ShiftVal >= 32)
7634       return llvm::Constant::getNullValue(ConvertType(E->getType()));
7635
7636     // If palignr is shifting the pair of input vectors more than one lane,
7637     // but less than two lanes, convert to shifting in zeroes.
7638     if (ShiftVal > 16) {
7639       ShiftVal -= 16;
7640       Ops[1] = Ops[0];
7641       Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
7642     }
7643
7644     uint32_t Indices[64];
7645     // 256-bit palignr operates on 128-bit lanes so we need to handle that
7646     for (unsigned l = 0; l != NumElts; l += 16) {
7647       for (unsigned i = 0; i != 16; ++i) {
7648         unsigned Idx = ShiftVal + i;
7649         if (Idx >= 16)
7650           Idx += NumElts - 16; // End of lane, switch operand.
7651         Indices[l + i] = Idx + l;
7652       }
7653     }
7654
7655     Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0],
7656                                                makeArrayRef(Indices, NumElts),
7657                                                "palignr");
7658
7659     // If this isn't a masked builtin, just return the align operation.
7660     if (Ops.size() == 3)
7661       return Align;
7662
7663     return EmitX86Select(*this, Ops[4], Align, Ops[3]);
7664   }
7665
7666   case X86::BI__builtin_ia32_movnti:
7667   case X86::BI__builtin_ia32_movnti64:
7668   case X86::BI__builtin_ia32_movntsd:
7669   case X86::BI__builtin_ia32_movntss: {
7670     llvm::MDNode *Node = llvm::MDNode::get(
7671         getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
7672
7673     Value *Ptr = Ops[0];
7674     Value *Src = Ops[1];
7675
7676     // Extract the 0'th element of the source vector.
7677     if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
7678         BuiltinID == X86::BI__builtin_ia32_movntss)
7679       Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
7680
7681     // Convert the type of the pointer to a pointer to the stored type.
7682     Value *BC = Builder.CreateBitCast(
7683         Ptr, llvm::PointerType::getUnqual(Src->getType()), "cast");
7684
7685     // Unaligned nontemporal store of the scalar value.
7686     StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC);
7687     SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
7688     SI->setAlignment(1);
7689     return SI;
7690   }
7691
7692   case X86::BI__builtin_ia32_selectb_128:
7693   case X86::BI__builtin_ia32_selectb_256:
7694   case X86::BI__builtin_ia32_selectb_512:
7695   case X86::BI__builtin_ia32_selectw_128:
7696   case X86::BI__builtin_ia32_selectw_256:
7697   case X86::BI__builtin_ia32_selectw_512:
7698   case X86::BI__builtin_ia32_selectd_128:
7699   case X86::BI__builtin_ia32_selectd_256:
7700   case X86::BI__builtin_ia32_selectd_512:
7701   case X86::BI__builtin_ia32_selectq_128:
7702   case X86::BI__builtin_ia32_selectq_256:
7703   case X86::BI__builtin_ia32_selectq_512:
7704   case X86::BI__builtin_ia32_selectps_128:
7705   case X86::BI__builtin_ia32_selectps_256:
7706   case X86::BI__builtin_ia32_selectps_512:
7707   case X86::BI__builtin_ia32_selectpd_128:
7708   case X86::BI__builtin_ia32_selectpd_256:
7709   case X86::BI__builtin_ia32_selectpd_512:
7710     return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
7711   case X86::BI__builtin_ia32_pcmpeqb128_mask:
7712   case X86::BI__builtin_ia32_pcmpeqb256_mask:
7713   case X86::BI__builtin_ia32_pcmpeqb512_mask:
7714   case X86::BI__builtin_ia32_pcmpeqw128_mask:
7715   case X86::BI__builtin_ia32_pcmpeqw256_mask:
7716   case X86::BI__builtin_ia32_pcmpeqw512_mask:
7717   case X86::BI__builtin_ia32_pcmpeqd128_mask:
7718   case X86::BI__builtin_ia32_pcmpeqd256_mask:
7719   case X86::BI__builtin_ia32_pcmpeqd512_mask:
7720   case X86::BI__builtin_ia32_pcmpeqq128_mask:
7721   case X86::BI__builtin_ia32_pcmpeqq256_mask:
7722   case X86::BI__builtin_ia32_pcmpeqq512_mask:
7723     return EmitX86MaskedCompare(*this, 0, false, Ops);
7724   case X86::BI__builtin_ia32_pcmpgtb128_mask:
7725   case X86::BI__builtin_ia32_pcmpgtb256_mask:
7726   case X86::BI__builtin_ia32_pcmpgtb512_mask:
7727   case X86::BI__builtin_ia32_pcmpgtw128_mask:
7728   case X86::BI__builtin_ia32_pcmpgtw256_mask:
7729   case X86::BI__builtin_ia32_pcmpgtw512_mask:
7730   case X86::BI__builtin_ia32_pcmpgtd128_mask:
7731   case X86::BI__builtin_ia32_pcmpgtd256_mask:
7732   case X86::BI__builtin_ia32_pcmpgtd512_mask:
7733   case X86::BI__builtin_ia32_pcmpgtq128_mask:
7734   case X86::BI__builtin_ia32_pcmpgtq256_mask:
7735   case X86::BI__builtin_ia32_pcmpgtq512_mask:
7736     return EmitX86MaskedCompare(*this, 6, true, Ops);
7737   case X86::BI__builtin_ia32_cmpb128_mask:
7738   case X86::BI__builtin_ia32_cmpb256_mask:
7739   case X86::BI__builtin_ia32_cmpb512_mask:
7740   case X86::BI__builtin_ia32_cmpw128_mask:
7741   case X86::BI__builtin_ia32_cmpw256_mask:
7742   case X86::BI__builtin_ia32_cmpw512_mask:
7743   case X86::BI__builtin_ia32_cmpd128_mask:
7744   case X86::BI__builtin_ia32_cmpd256_mask:
7745   case X86::BI__builtin_ia32_cmpd512_mask:
7746   case X86::BI__builtin_ia32_cmpq128_mask:
7747   case X86::BI__builtin_ia32_cmpq256_mask:
7748   case X86::BI__builtin_ia32_cmpq512_mask: {
7749     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
7750     return EmitX86MaskedCompare(*this, CC, true, Ops);
7751   }
7752   case X86::BI__builtin_ia32_ucmpb128_mask:
7753   case X86::BI__builtin_ia32_ucmpb256_mask:
7754   case X86::BI__builtin_ia32_ucmpb512_mask:
7755   case X86::BI__builtin_ia32_ucmpw128_mask:
7756   case X86::BI__builtin_ia32_ucmpw256_mask:
7757   case X86::BI__builtin_ia32_ucmpw512_mask:
7758   case X86::BI__builtin_ia32_ucmpd128_mask:
7759   case X86::BI__builtin_ia32_ucmpd256_mask:
7760   case X86::BI__builtin_ia32_ucmpd512_mask:
7761   case X86::BI__builtin_ia32_ucmpq128_mask:
7762   case X86::BI__builtin_ia32_ucmpq256_mask:
7763   case X86::BI__builtin_ia32_ucmpq512_mask: {
7764     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
7765     return EmitX86MaskedCompare(*this, CC, false, Ops);
7766   }
7767
7768   case X86::BI__builtin_ia32_vplzcntd_128_mask:
7769   case X86::BI__builtin_ia32_vplzcntd_256_mask:
7770   case X86::BI__builtin_ia32_vplzcntd_512_mask:
7771   case X86::BI__builtin_ia32_vplzcntq_128_mask:
7772   case X86::BI__builtin_ia32_vplzcntq_256_mask:
7773   case X86::BI__builtin_ia32_vplzcntq_512_mask: {
7774     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
7775     return EmitX86Select(*this, Ops[2],
7776                          Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}),
7777                          Ops[1]);
7778   }
7779
7780   case X86::BI__builtin_ia32_pmaxsb128:
7781   case X86::BI__builtin_ia32_pmaxsw128:
7782   case X86::BI__builtin_ia32_pmaxsd128:
7783   case X86::BI__builtin_ia32_pmaxsq128_mask:
7784   case X86::BI__builtin_ia32_pmaxsb256:
7785   case X86::BI__builtin_ia32_pmaxsw256:
7786   case X86::BI__builtin_ia32_pmaxsd256:
7787   case X86::BI__builtin_ia32_pmaxsq256_mask:
7788   case X86::BI__builtin_ia32_pmaxsb512_mask:
7789   case X86::BI__builtin_ia32_pmaxsw512_mask:
7790   case X86::BI__builtin_ia32_pmaxsd512_mask:
7791   case X86::BI__builtin_ia32_pmaxsq512_mask:
7792     return EmitX86MinMax(*this, ICmpInst::ICMP_SGT, Ops);
7793   case X86::BI__builtin_ia32_pmaxub128:
7794   case X86::BI__builtin_ia32_pmaxuw128:
7795   case X86::BI__builtin_ia32_pmaxud128:
7796   case X86::BI__builtin_ia32_pmaxuq128_mask:
7797   case X86::BI__builtin_ia32_pmaxub256:
7798   case X86::BI__builtin_ia32_pmaxuw256:
7799   case X86::BI__builtin_ia32_pmaxud256:
7800   case X86::BI__builtin_ia32_pmaxuq256_mask:
7801   case X86::BI__builtin_ia32_pmaxub512_mask:
7802   case X86::BI__builtin_ia32_pmaxuw512_mask:
7803   case X86::BI__builtin_ia32_pmaxud512_mask:
7804   case X86::BI__builtin_ia32_pmaxuq512_mask:
7805     return EmitX86MinMax(*this, ICmpInst::ICMP_UGT, Ops);
7806   case X86::BI__builtin_ia32_pminsb128:
7807   case X86::BI__builtin_ia32_pminsw128:
7808   case X86::BI__builtin_ia32_pminsd128:
7809   case X86::BI__builtin_ia32_pminsq128_mask:
7810   case X86::BI__builtin_ia32_pminsb256:
7811   case X86::BI__builtin_ia32_pminsw256:
7812   case X86::BI__builtin_ia32_pminsd256:
7813   case X86::BI__builtin_ia32_pminsq256_mask:
7814   case X86::BI__builtin_ia32_pminsb512_mask:
7815   case X86::BI__builtin_ia32_pminsw512_mask:
7816   case X86::BI__builtin_ia32_pminsd512_mask:
7817   case X86::BI__builtin_ia32_pminsq512_mask:
7818     return EmitX86MinMax(*this, ICmpInst::ICMP_SLT, Ops);
7819   case X86::BI__builtin_ia32_pminub128:
7820   case X86::BI__builtin_ia32_pminuw128:
7821   case X86::BI__builtin_ia32_pminud128:
7822   case X86::BI__builtin_ia32_pminuq128_mask:
7823   case X86::BI__builtin_ia32_pminub256:
7824   case X86::BI__builtin_ia32_pminuw256:
7825   case X86::BI__builtin_ia32_pminud256:
7826   case X86::BI__builtin_ia32_pminuq256_mask:
7827   case X86::BI__builtin_ia32_pminub512_mask:
7828   case X86::BI__builtin_ia32_pminuw512_mask:
7829   case X86::BI__builtin_ia32_pminud512_mask:
7830   case X86::BI__builtin_ia32_pminuq512_mask:
7831     return EmitX86MinMax(*this, ICmpInst::ICMP_ULT, Ops);
7832
7833   // 3DNow!
7834   case X86::BI__builtin_ia32_pswapdsf:
7835   case X86::BI__builtin_ia32_pswapdsi: {
7836     llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
7837     Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
7838     llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
7839     return Builder.CreateCall(F, Ops, "pswapd");
7840   }
7841   case X86::BI__builtin_ia32_rdrand16_step:
7842   case X86::BI__builtin_ia32_rdrand32_step:
7843   case X86::BI__builtin_ia32_rdrand64_step:
7844   case X86::BI__builtin_ia32_rdseed16_step:
7845   case X86::BI__builtin_ia32_rdseed32_step:
7846   case X86::BI__builtin_ia32_rdseed64_step: {
7847     Intrinsic::ID ID;
7848     switch (BuiltinID) {
7849     default: llvm_unreachable("Unsupported intrinsic!");
7850     case X86::BI__builtin_ia32_rdrand16_step:
7851       ID = Intrinsic::x86_rdrand_16;
7852       break;
7853     case X86::BI__builtin_ia32_rdrand32_step:
7854       ID = Intrinsic::x86_rdrand_32;
7855       break;
7856     case X86::BI__builtin_ia32_rdrand64_step:
7857       ID = Intrinsic::x86_rdrand_64;
7858       break;
7859     case X86::BI__builtin_ia32_rdseed16_step:
7860       ID = Intrinsic::x86_rdseed_16;
7861       break;
7862     case X86::BI__builtin_ia32_rdseed32_step:
7863       ID = Intrinsic::x86_rdseed_32;
7864       break;
7865     case X86::BI__builtin_ia32_rdseed64_step:
7866       ID = Intrinsic::x86_rdseed_64;
7867       break;
7868     }
7869
7870     Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
7871     Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
7872                                       Ops[0]);
7873     return Builder.CreateExtractValue(Call, 1);
7874   }
7875
7876   // SSE packed comparison intrinsics
7877   case X86::BI__builtin_ia32_cmpeqps:
7878   case X86::BI__builtin_ia32_cmpeqpd:
7879     return getVectorFCmpIR(CmpInst::FCMP_OEQ);
7880   case X86::BI__builtin_ia32_cmpltps:
7881   case X86::BI__builtin_ia32_cmpltpd:
7882     return getVectorFCmpIR(CmpInst::FCMP_OLT);
7883   case X86::BI__builtin_ia32_cmpleps:
7884   case X86::BI__builtin_ia32_cmplepd:
7885     return getVectorFCmpIR(CmpInst::FCMP_OLE);
7886   case X86::BI__builtin_ia32_cmpunordps:
7887   case X86::BI__builtin_ia32_cmpunordpd:
7888     return getVectorFCmpIR(CmpInst::FCMP_UNO);
7889   case X86::BI__builtin_ia32_cmpneqps:
7890   case X86::BI__builtin_ia32_cmpneqpd:
7891     return getVectorFCmpIR(CmpInst::FCMP_UNE);
7892   case X86::BI__builtin_ia32_cmpnltps:
7893   case X86::BI__builtin_ia32_cmpnltpd:
7894     return getVectorFCmpIR(CmpInst::FCMP_UGE);
7895   case X86::BI__builtin_ia32_cmpnleps:
7896   case X86::BI__builtin_ia32_cmpnlepd:
7897     return getVectorFCmpIR(CmpInst::FCMP_UGT);
7898   case X86::BI__builtin_ia32_cmpordps:
7899   case X86::BI__builtin_ia32_cmpordpd:
7900     return getVectorFCmpIR(CmpInst::FCMP_ORD);
7901   case X86::BI__builtin_ia32_cmpps:
7902   case X86::BI__builtin_ia32_cmpps256:
7903   case X86::BI__builtin_ia32_cmppd:
7904   case X86::BI__builtin_ia32_cmppd256: {
7905     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
7906     // If this one of the SSE immediates, we can use native IR.
7907     if (CC < 8) {
7908       FCmpInst::Predicate Pred;
7909       switch (CC) {
7910       case 0: Pred = FCmpInst::FCMP_OEQ; break;
7911       case 1: Pred = FCmpInst::FCMP_OLT; break;
7912       case 2: Pred = FCmpInst::FCMP_OLE; break;
7913       case 3: Pred = FCmpInst::FCMP_UNO; break;
7914       case 4: Pred = FCmpInst::FCMP_UNE; break;
7915       case 5: Pred = FCmpInst::FCMP_UGE; break;
7916       case 6: Pred = FCmpInst::FCMP_UGT; break;
7917       case 7: Pred = FCmpInst::FCMP_ORD; break;
7918       }
7919       return getVectorFCmpIR(Pred);
7920     }
7921
7922     // We can't handle 8-31 immediates with native IR, use the intrinsic.
7923     Intrinsic::ID ID;
7924     switch (BuiltinID) {
7925     default: llvm_unreachable("Unsupported intrinsic!");
7926     case X86::BI__builtin_ia32_cmpps:
7927       ID = Intrinsic::x86_sse_cmp_ps;
7928       break;
7929     case X86::BI__builtin_ia32_cmpps256:
7930       ID = Intrinsic::x86_avx_cmp_ps_256;
7931       break;
7932     case X86::BI__builtin_ia32_cmppd:
7933       ID = Intrinsic::x86_sse2_cmp_pd;
7934       break;
7935     case X86::BI__builtin_ia32_cmppd256:
7936       ID = Intrinsic::x86_avx_cmp_pd_256;
7937       break;
7938     }
7939
7940     return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
7941   }
7942
7943   // SSE scalar comparison intrinsics
7944   case X86::BI__builtin_ia32_cmpeqss:
7945     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
7946   case X86::BI__builtin_ia32_cmpltss:
7947     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
7948   case X86::BI__builtin_ia32_cmpless:
7949     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
7950   case X86::BI__builtin_ia32_cmpunordss:
7951     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
7952   case X86::BI__builtin_ia32_cmpneqss:
7953     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
7954   case X86::BI__builtin_ia32_cmpnltss:
7955     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
7956   case X86::BI__builtin_ia32_cmpnless:
7957     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
7958   case X86::BI__builtin_ia32_cmpordss:
7959     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
7960   case X86::BI__builtin_ia32_cmpeqsd:
7961     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
7962   case X86::BI__builtin_ia32_cmpltsd:
7963     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
7964   case X86::BI__builtin_ia32_cmplesd:
7965     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
7966   case X86::BI__builtin_ia32_cmpunordsd:
7967     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
7968   case X86::BI__builtin_ia32_cmpneqsd:
7969     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
7970   case X86::BI__builtin_ia32_cmpnltsd:
7971     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
7972   case X86::BI__builtin_ia32_cmpnlesd:
7973     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
7974   case X86::BI__builtin_ia32_cmpordsd:
7975     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
7976
7977   case X86::BI__emul:
7978   case X86::BI__emulu: {
7979     llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
7980     bool isSigned = (BuiltinID == X86::BI__emul);
7981     Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
7982     Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
7983     return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
7984   }
7985   case X86::BI__mulh:
7986   case X86::BI__umulh:
7987   case X86::BI_mul128:
7988   case X86::BI_umul128: {
7989     llvm::Type *ResType = ConvertType(E->getType());
7990     llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
7991
7992     bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
7993     Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
7994     Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
7995
7996     Value *MulResult, *HigherBits;
7997     if (IsSigned) {
7998       MulResult = Builder.CreateNSWMul(LHS, RHS);
7999       HigherBits = Builder.CreateAShr(MulResult, 64);
8000     } else {
8001       MulResult = Builder.CreateNUWMul(LHS, RHS);
8002       HigherBits = Builder.CreateLShr(MulResult, 64);
8003     }
8004     HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
8005
8006     if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
8007       return HigherBits;
8008
8009     Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
8010     Builder.CreateStore(HigherBits, HighBitsAddress);
8011     return Builder.CreateIntCast(MulResult, ResType, IsSigned);
8012   }
8013
8014   case X86::BI__faststorefence: {
8015     return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
8016                                llvm::CrossThread);
8017   }
8018   case X86::BI_ReadWriteBarrier:
8019   case X86::BI_ReadBarrier:
8020   case X86::BI_WriteBarrier: {
8021     return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
8022                                llvm::SingleThread);
8023   }
8024   case X86::BI_BitScanForward:
8025   case X86::BI_BitScanForward64:
8026     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
8027   case X86::BI_BitScanReverse:
8028   case X86::BI_BitScanReverse64:
8029     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
8030
8031   case X86::BI_InterlockedAnd64:
8032     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
8033   case X86::BI_InterlockedExchange64:
8034     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
8035   case X86::BI_InterlockedExchangeAdd64:
8036     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
8037   case X86::BI_InterlockedExchangeSub64:
8038     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
8039   case X86::BI_InterlockedOr64:
8040     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
8041   case X86::BI_InterlockedXor64:
8042     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
8043   case X86::BI_InterlockedDecrement64:
8044     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
8045   case X86::BI_InterlockedIncrement64:
8046     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
8047
8048   case X86::BI_AddressOfReturnAddress: {
8049     Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress);
8050     return Builder.CreateCall(F);
8051   }
8052   case X86::BI__stosb: {
8053     // We treat __stosb as a volatile memset - it may not generate "rep stosb"
8054     // instruction, but it will create a memset that won't be optimized away.
8055     return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true);
8056   }
8057   case X86::BI__ud2:
8058     // llvm.trap makes a ud2a instruction on x86.
8059     return EmitTrapCall(Intrinsic::trap);
8060   case X86::BI__int2c: {
8061     // This syscall signals a driver assertion failure in x86 NT kernels.
8062     llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
8063     llvm::InlineAsm *IA =
8064         llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*SideEffects=*/true);
8065     llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
8066         getLLVMContext(), llvm::AttributeList::FunctionIndex,
8067         llvm::Attribute::NoReturn);
8068     CallSite CS = Builder.CreateCall(IA);
8069     CS.setAttributes(NoReturnAttr);
8070     return CS.getInstruction();
8071   }
8072   case X86::BI__readfsbyte:
8073   case X86::BI__readfsword:
8074   case X86::BI__readfsdword:
8075   case X86::BI__readfsqword: {
8076     llvm::Type *IntTy = ConvertType(E->getType());
8077     Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
8078                                         llvm::PointerType::get(IntTy, 257));
8079     LoadInst *Load = Builder.CreateAlignedLoad(
8080         IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
8081     Load->setVolatile(true);
8082     return Load;
8083   }
8084   case X86::BI__readgsbyte:
8085   case X86::BI__readgsword:
8086   case X86::BI__readgsdword:
8087   case X86::BI__readgsqword: {
8088     llvm::Type *IntTy = ConvertType(E->getType());
8089     Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
8090                                         llvm::PointerType::get(IntTy, 256));
8091     LoadInst *Load = Builder.CreateAlignedLoad(
8092         IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
8093     Load->setVolatile(true);
8094     return Load;
8095   }
8096   }
8097 }
8098
8099
8100 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
8101                                            const CallExpr *E) {
8102   SmallVector<Value*, 4> Ops;
8103
8104   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
8105     Ops.push_back(EmitScalarExpr(E->getArg(i)));
8106
8107   Intrinsic::ID ID = Intrinsic::not_intrinsic;
8108
8109   switch (BuiltinID) {
8110   default: return nullptr;
8111
8112   // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
8113   // call __builtin_readcyclecounter.
8114   case PPC::BI__builtin_ppc_get_timebase:
8115     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
8116
8117   // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
8118   case PPC::BI__builtin_altivec_lvx:
8119   case PPC::BI__builtin_altivec_lvxl:
8120   case PPC::BI__builtin_altivec_lvebx:
8121   case PPC::BI__builtin_altivec_lvehx:
8122   case PPC::BI__builtin_altivec_lvewx:
8123   case PPC::BI__builtin_altivec_lvsl:
8124   case PPC::BI__builtin_altivec_lvsr:
8125   case PPC::BI__builtin_vsx_lxvd2x:
8126   case PPC::BI__builtin_vsx_lxvw4x:
8127   case PPC::BI__builtin_vsx_lxvd2x_be:
8128   case PPC::BI__builtin_vsx_lxvw4x_be:
8129   case PPC::BI__builtin_vsx_lxvl:
8130   case PPC::BI__builtin_vsx_lxvll:
8131   {
8132     if(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
8133        BuiltinID == PPC::BI__builtin_vsx_lxvll){
8134       Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy);
8135     }else {
8136       Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
8137       Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
8138       Ops.pop_back();
8139     }
8140
8141     switch (BuiltinID) {
8142     default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
8143     case PPC::BI__builtin_altivec_lvx:
8144       ID = Intrinsic::ppc_altivec_lvx;
8145       break;
8146     case PPC::BI__builtin_altivec_lvxl:
8147       ID = Intrinsic::ppc_altivec_lvxl;
8148       break;
8149     case PPC::BI__builtin_altivec_lvebx:
8150       ID = Intrinsic::ppc_altivec_lvebx;
8151       break;
8152     case PPC::BI__builtin_altivec_lvehx:
8153       ID = Intrinsic::ppc_altivec_lvehx;
8154       break;
8155     case PPC::BI__builtin_altivec_lvewx:
8156       ID = Intrinsic::ppc_altivec_lvewx;
8157       break;
8158     case PPC::BI__builtin_altivec_lvsl:
8159       ID = Intrinsic::ppc_altivec_lvsl;
8160       break;
8161     case PPC::BI__builtin_altivec_lvsr:
8162       ID = Intrinsic::ppc_altivec_lvsr;
8163       break;
8164     case PPC::BI__builtin_vsx_lxvd2x:
8165       ID = Intrinsic::ppc_vsx_lxvd2x;
8166       break;
8167     case PPC::BI__builtin_vsx_lxvw4x:
8168       ID = Intrinsic::ppc_vsx_lxvw4x;
8169       break;
8170     case PPC::BI__builtin_vsx_lxvd2x_be:
8171       ID = Intrinsic::ppc_vsx_lxvd2x_be;
8172       break;
8173     case PPC::BI__builtin_vsx_lxvw4x_be:
8174       ID = Intrinsic::ppc_vsx_lxvw4x_be;
8175       break;
8176     case PPC::BI__builtin_vsx_lxvl:
8177       ID = Intrinsic::ppc_vsx_lxvl;
8178       break;
8179     case PPC::BI__builtin_vsx_lxvll:
8180       ID = Intrinsic::ppc_vsx_lxvll;
8181       break;
8182     }
8183     llvm::Function *F = CGM.getIntrinsic(ID);
8184     return Builder.CreateCall(F, Ops, "");
8185   }
8186
8187   // vec_st, vec_xst_be
8188   case PPC::BI__builtin_altivec_stvx:
8189   case PPC::BI__builtin_altivec_stvxl:
8190   case PPC::BI__builtin_altivec_stvebx:
8191   case PPC::BI__builtin_altivec_stvehx:
8192   case PPC::BI__builtin_altivec_stvewx:
8193   case PPC::BI__builtin_vsx_stxvd2x:
8194   case PPC::BI__builtin_vsx_stxvw4x:
8195   case PPC::BI__builtin_vsx_stxvd2x_be:
8196   case PPC::BI__builtin_vsx_stxvw4x_be:
8197   case PPC::BI__builtin_vsx_stxvl:
8198   case PPC::BI__builtin_vsx_stxvll:
8199   {
8200     if(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
8201       BuiltinID == PPC::BI__builtin_vsx_stxvll ){
8202       Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
8203     }else {
8204       Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
8205       Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
8206       Ops.pop_back();
8207     }
8208
8209     switch (BuiltinID) {
8210     default: llvm_unreachable("Unsupported st intrinsic!");
8211     case PPC::BI__builtin_altivec_stvx:
8212       ID = Intrinsic::ppc_altivec_stvx;
8213       break;
8214     case PPC::BI__builtin_altivec_stvxl:
8215       ID = Intrinsic::ppc_altivec_stvxl;
8216       break;
8217     case PPC::BI__builtin_altivec_stvebx:
8218       ID = Intrinsic::ppc_altivec_stvebx;
8219       break;
8220     case PPC::BI__builtin_altivec_stvehx:
8221       ID = Intrinsic::ppc_altivec_stvehx;
8222       break;
8223     case PPC::BI__builtin_altivec_stvewx:
8224       ID = Intrinsic::ppc_altivec_stvewx;
8225       break;
8226     case PPC::BI__builtin_vsx_stxvd2x:
8227       ID = Intrinsic::ppc_vsx_stxvd2x;
8228       break;
8229     case PPC::BI__builtin_vsx_stxvw4x:
8230       ID = Intrinsic::ppc_vsx_stxvw4x;
8231       break;
8232     case PPC::BI__builtin_vsx_stxvd2x_be:
8233       ID = Intrinsic::ppc_vsx_stxvd2x_be;
8234       break;
8235     case PPC::BI__builtin_vsx_stxvw4x_be:
8236       ID = Intrinsic::ppc_vsx_stxvw4x_be;
8237       break;
8238     case PPC::BI__builtin_vsx_stxvl:
8239       ID = Intrinsic::ppc_vsx_stxvl;
8240       break;
8241     case PPC::BI__builtin_vsx_stxvll:
8242       ID = Intrinsic::ppc_vsx_stxvll;
8243       break;
8244     }
8245     llvm::Function *F = CGM.getIntrinsic(ID);
8246     return Builder.CreateCall(F, Ops, "");
8247   }
8248   // Square root
8249   case PPC::BI__builtin_vsx_xvsqrtsp:
8250   case PPC::BI__builtin_vsx_xvsqrtdp: {
8251     llvm::Type *ResultType = ConvertType(E->getType());
8252     Value *X = EmitScalarExpr(E->getArg(0));
8253     ID = Intrinsic::sqrt;
8254     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
8255     return Builder.CreateCall(F, X);
8256   }
8257   // Count leading zeros
8258   case PPC::BI__builtin_altivec_vclzb:
8259   case PPC::BI__builtin_altivec_vclzh:
8260   case PPC::BI__builtin_altivec_vclzw:
8261   case PPC::BI__builtin_altivec_vclzd: {
8262     llvm::Type *ResultType = ConvertType(E->getType());
8263     Value *X = EmitScalarExpr(E->getArg(0));
8264     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8265     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
8266     return Builder.CreateCall(F, {X, Undef});
8267   }
8268   case PPC::BI__builtin_altivec_vctzb:
8269   case PPC::BI__builtin_altivec_vctzh:
8270   case PPC::BI__builtin_altivec_vctzw:
8271   case PPC::BI__builtin_altivec_vctzd: {
8272     llvm::Type *ResultType = ConvertType(E->getType());
8273     Value *X = EmitScalarExpr(E->getArg(0));
8274     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8275     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
8276     return Builder.CreateCall(F, {X, Undef});
8277   }
8278   case PPC::BI__builtin_altivec_vpopcntb:
8279   case PPC::BI__builtin_altivec_vpopcnth:
8280   case PPC::BI__builtin_altivec_vpopcntw:
8281   case PPC::BI__builtin_altivec_vpopcntd: {
8282     llvm::Type *ResultType = ConvertType(E->getType());
8283     Value *X = EmitScalarExpr(E->getArg(0));
8284     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
8285     return Builder.CreateCall(F, X);
8286   }
8287   // Copy sign
8288   case PPC::BI__builtin_vsx_xvcpsgnsp:
8289   case PPC::BI__builtin_vsx_xvcpsgndp: {
8290     llvm::Type *ResultType = ConvertType(E->getType());
8291     Value *X = EmitScalarExpr(E->getArg(0));
8292     Value *Y = EmitScalarExpr(E->getArg(1));
8293     ID = Intrinsic::copysign;
8294     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
8295     return Builder.CreateCall(F, {X, Y});
8296   }
8297   // Rounding/truncation
8298   case PPC::BI__builtin_vsx_xvrspip:
8299   case PPC::BI__builtin_vsx_xvrdpip:
8300   case PPC::BI__builtin_vsx_xvrdpim:
8301   case PPC::BI__builtin_vsx_xvrspim:
8302   case PPC::BI__builtin_vsx_xvrdpi:
8303   case PPC::BI__builtin_vsx_xvrspi:
8304   case PPC::BI__builtin_vsx_xvrdpic:
8305   case PPC::BI__builtin_vsx_xvrspic:
8306   case PPC::BI__builtin_vsx_xvrdpiz:
8307   case PPC::BI__builtin_vsx_xvrspiz: {
8308     llvm::Type *ResultType = ConvertType(E->getType());
8309     Value *X = EmitScalarExpr(E->getArg(0));
8310     if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
8311         BuiltinID == PPC::BI__builtin_vsx_xvrspim)
8312       ID = Intrinsic::floor;
8313     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
8314              BuiltinID == PPC::BI__builtin_vsx_xvrspi)
8315       ID = Intrinsic::round;
8316     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
8317              BuiltinID == PPC::BI__builtin_vsx_xvrspic)
8318       ID = Intrinsic::nearbyint;
8319     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
8320              BuiltinID == PPC::BI__builtin_vsx_xvrspip)
8321       ID = Intrinsic::ceil;
8322     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
8323              BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
8324       ID = Intrinsic::trunc;
8325     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
8326     return Builder.CreateCall(F, X);
8327   }
8328
8329   // Absolute value
8330   case PPC::BI__builtin_vsx_xvabsdp:
8331   case PPC::BI__builtin_vsx_xvabssp: {
8332     llvm::Type *ResultType = ConvertType(E->getType());
8333     Value *X = EmitScalarExpr(E->getArg(0));
8334     llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
8335     return Builder.CreateCall(F, X);
8336   }
8337
8338   // FMA variations
8339   case PPC::BI__builtin_vsx_xvmaddadp:
8340   case PPC::BI__builtin_vsx_xvmaddasp:
8341   case PPC::BI__builtin_vsx_xvnmaddadp:
8342   case PPC::BI__builtin_vsx_xvnmaddasp:
8343   case PPC::BI__builtin_vsx_xvmsubadp:
8344   case PPC::BI__builtin_vsx_xvmsubasp:
8345   case PPC::BI__builtin_vsx_xvnmsubadp:
8346   case PPC::BI__builtin_vsx_xvnmsubasp: {
8347     llvm::Type *ResultType = ConvertType(E->getType());
8348     Value *X = EmitScalarExpr(E->getArg(0));
8349     Value *Y = EmitScalarExpr(E->getArg(1));
8350     Value *Z = EmitScalarExpr(E->getArg(2));
8351     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
8352     llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
8353     switch (BuiltinID) {
8354       case PPC::BI__builtin_vsx_xvmaddadp:
8355       case PPC::BI__builtin_vsx_xvmaddasp:
8356         return Builder.CreateCall(F, {X, Y, Z});
8357       case PPC::BI__builtin_vsx_xvnmaddadp:
8358       case PPC::BI__builtin_vsx_xvnmaddasp:
8359         return Builder.CreateFSub(Zero,
8360                                   Builder.CreateCall(F, {X, Y, Z}), "sub");
8361       case PPC::BI__builtin_vsx_xvmsubadp:
8362       case PPC::BI__builtin_vsx_xvmsubasp:
8363         return Builder.CreateCall(F,
8364                                   {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
8365       case PPC::BI__builtin_vsx_xvnmsubadp:
8366       case PPC::BI__builtin_vsx_xvnmsubasp:
8367         Value *FsubRes =
8368           Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
8369         return Builder.CreateFSub(Zero, FsubRes, "sub");
8370     }
8371     llvm_unreachable("Unknown FMA operation");
8372     return nullptr; // Suppress no-return warning
8373   }
8374
8375   case PPC::BI__builtin_vsx_insertword: {
8376     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
8377
8378     // Third argument is a compile time constant int. It must be clamped to
8379     // to the range [0, 12].
8380     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
8381     assert(ArgCI &&
8382            "Third arg to xxinsertw intrinsic must be constant integer");
8383     const int64_t MaxIndex = 12;
8384     int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
8385
8386     // The builtin semantics don't exactly match the xxinsertw instructions
8387     // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
8388     // word from the first argument, and inserts it in the second argument. The
8389     // instruction extracts the word from its second input register and inserts
8390     // it into its first input register, so swap the first and second arguments.
8391     std::swap(Ops[0], Ops[1]);
8392
8393     // Need to cast the second argument from a vector of unsigned int to a
8394     // vector of long long.
8395     Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
8396
8397     if (getTarget().isLittleEndian()) {
8398       // Create a shuffle mask of (1, 0)
8399       Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
8400                                    ConstantInt::get(Int32Ty, 0)
8401                                  };
8402       Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8403
8404       // Reverse the double words in the vector we will extract from.
8405       Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
8406       Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ShuffleMask);
8407
8408       // Reverse the index.
8409       Index = MaxIndex - Index;
8410     }
8411
8412     // Intrinsic expects the first arg to be a vector of int.
8413     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
8414     Ops[2] = ConstantInt::getSigned(Int32Ty, Index);
8415     return Builder.CreateCall(F, Ops);
8416   }
8417
8418   case PPC::BI__builtin_vsx_extractuword: {
8419     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
8420
8421     // Intrinsic expects the first argument to be a vector of doublewords.
8422     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
8423
8424     // The second argument is a compile time constant int that needs to
8425     // be clamped to the range [0, 12].
8426     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[1]);
8427     assert(ArgCI &&
8428            "Second Arg to xxextractuw intrinsic must be a constant integer!");
8429     const int64_t MaxIndex = 12;
8430     int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
8431
8432     if (getTarget().isLittleEndian()) {
8433       // Reverse the index.
8434       Index = MaxIndex - Index;
8435       Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
8436
8437       // Emit the call, then reverse the double words of the results vector.
8438       Value *Call = Builder.CreateCall(F, Ops);
8439
8440       // Create a shuffle mask of (1, 0)
8441       Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
8442                                    ConstantInt::get(Int32Ty, 0)
8443                                  };
8444       Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8445
8446       Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ShuffleMask);
8447       return ShuffleCall;
8448     } else {
8449       Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
8450       return Builder.CreateCall(F, Ops);
8451     }
8452   }
8453
8454   case PPC::BI__builtin_vsx_xxpermdi: {
8455     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
8456     assert(ArgCI && "Third arg must be constant integer!");
8457
8458     unsigned Index = ArgCI->getZExtValue();
8459     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
8460     Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
8461
8462     // Element zero comes from the first input vector and element one comes from
8463     // the second. The element indices within each vector are numbered in big
8464     // endian order so the shuffle mask must be adjusted for this on little
8465     // endian platforms (i.e. index is complemented and source vector reversed).
8466     unsigned ElemIdx0;
8467     unsigned ElemIdx1;
8468     if (getTarget().isLittleEndian()) {
8469       ElemIdx0 = (~Index & 1) + 2;
8470       ElemIdx1 = (~Index & 2) >> 1;
8471     } else { // BigEndian
8472       ElemIdx0 = (Index & 2) >> 1;
8473       ElemIdx1 = 2 + (Index & 1);
8474     }
8475
8476     Constant *ShuffleElts[2] = {ConstantInt::get(Int32Ty, ElemIdx0),
8477                                 ConstantInt::get(Int32Ty, ElemIdx1)};
8478     Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8479
8480     Value *ShuffleCall =
8481         Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask);
8482     QualType BIRetType = E->getType();
8483     auto RetTy = ConvertType(BIRetType);
8484     return Builder.CreateBitCast(ShuffleCall, RetTy);
8485   }
8486
8487   case PPC::BI__builtin_vsx_xxsldwi: {
8488     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
8489     assert(ArgCI && "Third argument must be a compile time constant");
8490     unsigned Index = ArgCI->getZExtValue() & 0x3;
8491     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
8492     Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int32Ty, 4));
8493
8494     // Create a shuffle mask
8495     unsigned ElemIdx0;
8496     unsigned ElemIdx1;
8497     unsigned ElemIdx2;
8498     unsigned ElemIdx3;
8499     if (getTarget().isLittleEndian()) {
8500       // Little endian element N comes from element 8+N-Index of the
8501       // concatenated wide vector (of course, using modulo arithmetic on
8502       // the total number of elements).
8503       ElemIdx0 = (8 - Index) % 8;
8504       ElemIdx1 = (9 - Index) % 8;
8505       ElemIdx2 = (10 - Index) % 8;
8506       ElemIdx3 = (11 - Index) % 8;
8507     } else {
8508       // Big endian ElemIdx<N> = Index + N
8509       ElemIdx0 = Index;
8510       ElemIdx1 = Index + 1;
8511       ElemIdx2 = Index + 2;
8512       ElemIdx3 = Index + 3;
8513     }
8514
8515     Constant *ShuffleElts[4] = {ConstantInt::get(Int32Ty, ElemIdx0),
8516                                 ConstantInt::get(Int32Ty, ElemIdx1),
8517                                 ConstantInt::get(Int32Ty, ElemIdx2),
8518                                 ConstantInt::get(Int32Ty, ElemIdx3)};
8519
8520     Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8521     Value *ShuffleCall =
8522         Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask);
8523     QualType BIRetType = E->getType();
8524     auto RetTy = ConvertType(BIRetType);
8525     return Builder.CreateBitCast(ShuffleCall, RetTy);
8526   }
8527   }
8528 }
8529
8530 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
8531                                               const CallExpr *E) {
8532   switch (BuiltinID) {
8533   case AMDGPU::BI__builtin_amdgcn_div_scale:
8534   case AMDGPU::BI__builtin_amdgcn_div_scalef: {
8535     // Translate from the intrinsics's struct return to the builtin's out
8536     // argument.
8537
8538     Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
8539
8540     llvm::Value *X = EmitScalarExpr(E->getArg(0));
8541     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
8542     llvm::Value *Z = EmitScalarExpr(E->getArg(2));
8543
8544     llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
8545                                            X->getType());
8546
8547     llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
8548
8549     llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
8550     llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
8551
8552     llvm::Type *RealFlagType
8553       = FlagOutPtr.getPointer()->getType()->getPointerElementType();
8554
8555     llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
8556     Builder.CreateStore(FlagExt, FlagOutPtr);
8557     return Result;
8558   }
8559   case AMDGPU::BI__builtin_amdgcn_div_fmas:
8560   case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
8561     llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
8562     llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
8563     llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
8564     llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
8565
8566     llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
8567                                       Src0->getType());
8568     llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
8569     return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
8570   }
8571
8572   case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
8573     return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
8574   case AMDGPU::BI__builtin_amdgcn_mov_dpp: {
8575     llvm::SmallVector<llvm::Value *, 5> Args;
8576     for (unsigned I = 0; I != 5; ++I)
8577       Args.push_back(EmitScalarExpr(E->getArg(I)));
8578     Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_mov_dpp,
8579                                     Args[0]->getType());
8580     return Builder.CreateCall(F, Args);
8581   }
8582   case AMDGPU::BI__builtin_amdgcn_div_fixup:
8583   case AMDGPU::BI__builtin_amdgcn_div_fixupf:
8584   case AMDGPU::BI__builtin_amdgcn_div_fixuph:
8585     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
8586   case AMDGPU::BI__builtin_amdgcn_trig_preop:
8587   case AMDGPU::BI__builtin_amdgcn_trig_preopf:
8588     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
8589   case AMDGPU::BI__builtin_amdgcn_rcp:
8590   case AMDGPU::BI__builtin_amdgcn_rcpf:
8591   case AMDGPU::BI__builtin_amdgcn_rcph:
8592     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
8593   case AMDGPU::BI__builtin_amdgcn_rsq:
8594   case AMDGPU::BI__builtin_amdgcn_rsqf:
8595   case AMDGPU::BI__builtin_amdgcn_rsqh:
8596     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
8597   case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
8598   case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
8599     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
8600   case AMDGPU::BI__builtin_amdgcn_sinf:
8601   case AMDGPU::BI__builtin_amdgcn_sinh:
8602     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
8603   case AMDGPU::BI__builtin_amdgcn_cosf:
8604   case AMDGPU::BI__builtin_amdgcn_cosh:
8605     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
8606   case AMDGPU::BI__builtin_amdgcn_log_clampf:
8607     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
8608   case AMDGPU::BI__builtin_amdgcn_ldexp:
8609   case AMDGPU::BI__builtin_amdgcn_ldexpf:
8610   case AMDGPU::BI__builtin_amdgcn_ldexph:
8611     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
8612   case AMDGPU::BI__builtin_amdgcn_frexp_mant:
8613   case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
8614   case AMDGPU::BI__builtin_amdgcn_frexp_manth:
8615     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
8616   case AMDGPU::BI__builtin_amdgcn_frexp_exp:
8617   case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
8618     Value *Src0 = EmitScalarExpr(E->getArg(0));
8619     Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
8620                                 { Builder.getInt32Ty(), Src0->getType() });
8621     return Builder.CreateCall(F, Src0);
8622   }
8623   case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
8624     Value *Src0 = EmitScalarExpr(E->getArg(0));
8625     Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
8626                                 { Builder.getInt16Ty(), Src0->getType() });
8627     return Builder.CreateCall(F, Src0);
8628   }
8629   case AMDGPU::BI__builtin_amdgcn_fract:
8630   case AMDGPU::BI__builtin_amdgcn_fractf:
8631   case AMDGPU::BI__builtin_amdgcn_fracth:
8632     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
8633   case AMDGPU::BI__builtin_amdgcn_lerp:
8634     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
8635   case AMDGPU::BI__builtin_amdgcn_uicmp:
8636   case AMDGPU::BI__builtin_amdgcn_uicmpl:
8637   case AMDGPU::BI__builtin_amdgcn_sicmp:
8638   case AMDGPU::BI__builtin_amdgcn_sicmpl:
8639     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_icmp);
8640   case AMDGPU::BI__builtin_amdgcn_fcmp:
8641   case AMDGPU::BI__builtin_amdgcn_fcmpf:
8642     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fcmp);
8643   case AMDGPU::BI__builtin_amdgcn_class:
8644   case AMDGPU::BI__builtin_amdgcn_classf:
8645   case AMDGPU::BI__builtin_amdgcn_classh:
8646     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
8647   case AMDGPU::BI__builtin_amdgcn_fmed3f:
8648   case AMDGPU::BI__builtin_amdgcn_fmed3h:
8649     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3);
8650   case AMDGPU::BI__builtin_amdgcn_read_exec: {
8651     CallInst *CI = cast<CallInst>(
8652       EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec"));
8653     CI->setConvergent();
8654     return CI;
8655   }
8656
8657   // amdgcn workitem
8658   case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
8659     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
8660   case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
8661     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
8662   case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
8663     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
8664
8665   // r600 intrinsics
8666   case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
8667   case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
8668     return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee);
8669   case AMDGPU::BI__builtin_r600_read_tidig_x:
8670     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
8671   case AMDGPU::BI__builtin_r600_read_tidig_y:
8672     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
8673   case AMDGPU::BI__builtin_r600_read_tidig_z:
8674     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
8675   default:
8676     return nullptr;
8677   }
8678 }
8679
8680 /// Handle a SystemZ function in which the final argument is a pointer
8681 /// to an int that receives the post-instruction CC value.  At the LLVM level
8682 /// this is represented as a function that returns a {result, cc} pair.
8683 static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF,
8684                                          unsigned IntrinsicID,
8685                                          const CallExpr *E) {
8686   unsigned NumArgs = E->getNumArgs() - 1;
8687   SmallVector<Value *, 8> Args(NumArgs);
8688   for (unsigned I = 0; I < NumArgs; ++I)
8689     Args[I] = CGF.EmitScalarExpr(E->getArg(I));
8690   Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
8691   Value *F = CGF.CGM.getIntrinsic(IntrinsicID);
8692   Value *Call = CGF.Builder.CreateCall(F, Args);
8693   Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
8694   CGF.Builder.CreateStore(CC, CCPtr);
8695   return CGF.Builder.CreateExtractValue(Call, 0);
8696 }
8697
8698 Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
8699                                                const CallExpr *E) {
8700   switch (BuiltinID) {
8701   case SystemZ::BI__builtin_tbegin: {
8702     Value *TDB = EmitScalarExpr(E->getArg(0));
8703     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
8704     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
8705     return Builder.CreateCall(F, {TDB, Control});
8706   }
8707   case SystemZ::BI__builtin_tbegin_nofloat: {
8708     Value *TDB = EmitScalarExpr(E->getArg(0));
8709     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
8710     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
8711     return Builder.CreateCall(F, {TDB, Control});
8712   }
8713   case SystemZ::BI__builtin_tbeginc: {
8714     Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
8715     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
8716     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
8717     return Builder.CreateCall(F, {TDB, Control});
8718   }
8719   case SystemZ::BI__builtin_tabort: {
8720     Value *Data = EmitScalarExpr(E->getArg(0));
8721     Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
8722     return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
8723   }
8724   case SystemZ::BI__builtin_non_tx_store: {
8725     Value *Address = EmitScalarExpr(E->getArg(0));
8726     Value *Data = EmitScalarExpr(E->getArg(1));
8727     Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
8728     return Builder.CreateCall(F, {Data, Address});
8729   }
8730
8731   // Vector builtins.  Note that most vector builtins are mapped automatically
8732   // to target-specific LLVM intrinsics.  The ones handled specially here can
8733   // be represented via standard LLVM IR, which is preferable to enable common
8734   // LLVM optimizations.
8735
8736   case SystemZ::BI__builtin_s390_vpopctb:
8737   case SystemZ::BI__builtin_s390_vpopcth:
8738   case SystemZ::BI__builtin_s390_vpopctf:
8739   case SystemZ::BI__builtin_s390_vpopctg: {
8740     llvm::Type *ResultType = ConvertType(E->getType());
8741     Value *X = EmitScalarExpr(E->getArg(0));
8742     Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
8743     return Builder.CreateCall(F, X);
8744   }
8745
8746   case SystemZ::BI__builtin_s390_vclzb:
8747   case SystemZ::BI__builtin_s390_vclzh:
8748   case SystemZ::BI__builtin_s390_vclzf:
8749   case SystemZ::BI__builtin_s390_vclzg: {
8750     llvm::Type *ResultType = ConvertType(E->getType());
8751     Value *X = EmitScalarExpr(E->getArg(0));
8752     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8753     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
8754     return Builder.CreateCall(F, {X, Undef});
8755   }
8756
8757   case SystemZ::BI__builtin_s390_vctzb:
8758   case SystemZ::BI__builtin_s390_vctzh:
8759   case SystemZ::BI__builtin_s390_vctzf:
8760   case SystemZ::BI__builtin_s390_vctzg: {
8761     llvm::Type *ResultType = ConvertType(E->getType());
8762     Value *X = EmitScalarExpr(E->getArg(0));
8763     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8764     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
8765     return Builder.CreateCall(F, {X, Undef});
8766   }
8767
8768   case SystemZ::BI__builtin_s390_vfsqdb: {
8769     llvm::Type *ResultType = ConvertType(E->getType());
8770     Value *X = EmitScalarExpr(E->getArg(0));
8771     Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
8772     return Builder.CreateCall(F, X);
8773   }
8774   case SystemZ::BI__builtin_s390_vfmadb: {
8775     llvm::Type *ResultType = ConvertType(E->getType());
8776     Value *X = EmitScalarExpr(E->getArg(0));
8777     Value *Y = EmitScalarExpr(E->getArg(1));
8778     Value *Z = EmitScalarExpr(E->getArg(2));
8779     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
8780     return Builder.CreateCall(F, {X, Y, Z});
8781   }
8782   case SystemZ::BI__builtin_s390_vfmsdb: {
8783     llvm::Type *ResultType = ConvertType(E->getType());
8784     Value *X = EmitScalarExpr(E->getArg(0));
8785     Value *Y = EmitScalarExpr(E->getArg(1));
8786     Value *Z = EmitScalarExpr(E->getArg(2));
8787     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
8788     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
8789     return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
8790   }
8791   case SystemZ::BI__builtin_s390_vflpdb: {
8792     llvm::Type *ResultType = ConvertType(E->getType());
8793     Value *X = EmitScalarExpr(E->getArg(0));
8794     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
8795     return Builder.CreateCall(F, X);
8796   }
8797   case SystemZ::BI__builtin_s390_vflndb: {
8798     llvm::Type *ResultType = ConvertType(E->getType());
8799     Value *X = EmitScalarExpr(E->getArg(0));
8800     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
8801     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
8802     return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub");
8803   }
8804   case SystemZ::BI__builtin_s390_vfidb: {
8805     llvm::Type *ResultType = ConvertType(E->getType());
8806     Value *X = EmitScalarExpr(E->getArg(0));
8807     // Constant-fold the M4 and M5 mask arguments.
8808     llvm::APSInt M4, M5;
8809     bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext());
8810     bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext());
8811     assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?");
8812     (void)IsConstM4; (void)IsConstM5;
8813     // Check whether this instance of vfidb can be represented via a LLVM
8814     // standard intrinsic.  We only support some combinations of M4 and M5.
8815     Intrinsic::ID ID = Intrinsic::not_intrinsic;
8816     switch (M4.getZExtValue()) {
8817     default: break;
8818     case 0:  // IEEE-inexact exception allowed
8819       switch (M5.getZExtValue()) {
8820       default: break;
8821       case 0: ID = Intrinsic::rint; break;
8822       }
8823       break;
8824     case 4:  // IEEE-inexact exception suppressed
8825       switch (M5.getZExtValue()) {
8826       default: break;
8827       case 0: ID = Intrinsic::nearbyint; break;
8828       case 1: ID = Intrinsic::round; break;
8829       case 5: ID = Intrinsic::trunc; break;
8830       case 6: ID = Intrinsic::ceil; break;
8831       case 7: ID = Intrinsic::floor; break;
8832       }
8833       break;
8834     }
8835     if (ID != Intrinsic::not_intrinsic) {
8836       Function *F = CGM.getIntrinsic(ID, ResultType);
8837       return Builder.CreateCall(F, X);
8838     }
8839     Function *F = CGM.getIntrinsic(Intrinsic::s390_vfidb);
8840     Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
8841     Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
8842     return Builder.CreateCall(F, {X, M4Value, M5Value});
8843   }
8844
8845   // Vector intrisincs that output the post-instruction CC value.
8846
8847 #define INTRINSIC_WITH_CC(NAME) \
8848     case SystemZ::BI__builtin_##NAME: \
8849       return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
8850
8851   INTRINSIC_WITH_CC(s390_vpkshs);
8852   INTRINSIC_WITH_CC(s390_vpksfs);
8853   INTRINSIC_WITH_CC(s390_vpksgs);
8854
8855   INTRINSIC_WITH_CC(s390_vpklshs);
8856   INTRINSIC_WITH_CC(s390_vpklsfs);
8857   INTRINSIC_WITH_CC(s390_vpklsgs);
8858
8859   INTRINSIC_WITH_CC(s390_vceqbs);
8860   INTRINSIC_WITH_CC(s390_vceqhs);
8861   INTRINSIC_WITH_CC(s390_vceqfs);
8862   INTRINSIC_WITH_CC(s390_vceqgs);
8863
8864   INTRINSIC_WITH_CC(s390_vchbs);
8865   INTRINSIC_WITH_CC(s390_vchhs);
8866   INTRINSIC_WITH_CC(s390_vchfs);
8867   INTRINSIC_WITH_CC(s390_vchgs);
8868
8869   INTRINSIC_WITH_CC(s390_vchlbs);
8870   INTRINSIC_WITH_CC(s390_vchlhs);
8871   INTRINSIC_WITH_CC(s390_vchlfs);
8872   INTRINSIC_WITH_CC(s390_vchlgs);
8873
8874   INTRINSIC_WITH_CC(s390_vfaebs);
8875   INTRINSIC_WITH_CC(s390_vfaehs);
8876   INTRINSIC_WITH_CC(s390_vfaefs);
8877
8878   INTRINSIC_WITH_CC(s390_vfaezbs);
8879   INTRINSIC_WITH_CC(s390_vfaezhs);
8880   INTRINSIC_WITH_CC(s390_vfaezfs);
8881
8882   INTRINSIC_WITH_CC(s390_vfeebs);
8883   INTRINSIC_WITH_CC(s390_vfeehs);
8884   INTRINSIC_WITH_CC(s390_vfeefs);
8885
8886   INTRINSIC_WITH_CC(s390_vfeezbs);
8887   INTRINSIC_WITH_CC(s390_vfeezhs);
8888   INTRINSIC_WITH_CC(s390_vfeezfs);
8889
8890   INTRINSIC_WITH_CC(s390_vfenebs);
8891   INTRINSIC_WITH_CC(s390_vfenehs);
8892   INTRINSIC_WITH_CC(s390_vfenefs);
8893
8894   INTRINSIC_WITH_CC(s390_vfenezbs);
8895   INTRINSIC_WITH_CC(s390_vfenezhs);
8896   INTRINSIC_WITH_CC(s390_vfenezfs);
8897
8898   INTRINSIC_WITH_CC(s390_vistrbs);
8899   INTRINSIC_WITH_CC(s390_vistrhs);
8900   INTRINSIC_WITH_CC(s390_vistrfs);
8901
8902   INTRINSIC_WITH_CC(s390_vstrcbs);
8903   INTRINSIC_WITH_CC(s390_vstrchs);
8904   INTRINSIC_WITH_CC(s390_vstrcfs);
8905
8906   INTRINSIC_WITH_CC(s390_vstrczbs);
8907   INTRINSIC_WITH_CC(s390_vstrczhs);
8908   INTRINSIC_WITH_CC(s390_vstrczfs);
8909
8910   INTRINSIC_WITH_CC(s390_vfcedbs);
8911   INTRINSIC_WITH_CC(s390_vfchdbs);
8912   INTRINSIC_WITH_CC(s390_vfchedbs);
8913
8914   INTRINSIC_WITH_CC(s390_vftcidb);
8915
8916 #undef INTRINSIC_WITH_CC
8917
8918   default:
8919     return nullptr;
8920   }
8921 }
8922
8923 Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
8924                                              const CallExpr *E) {
8925   auto MakeLdg = [&](unsigned IntrinsicID) {
8926     Value *Ptr = EmitScalarExpr(E->getArg(0));
8927     clang::CharUnits Align =
8928         getNaturalPointeeTypeAlignment(E->getArg(0)->getType());
8929     return Builder.CreateCall(
8930         CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
8931                                        Ptr->getType()}),
8932         {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())});
8933   };
8934   auto MakeScopedAtomic = [&](unsigned IntrinsicID) {
8935     Value *Ptr = EmitScalarExpr(E->getArg(0));
8936     return Builder.CreateCall(
8937         CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
8938                                        Ptr->getType()}),
8939         {Ptr, EmitScalarExpr(E->getArg(1))});
8940   };
8941   switch (BuiltinID) {
8942   case NVPTX::BI__nvvm_atom_add_gen_i:
8943   case NVPTX::BI__nvvm_atom_add_gen_l:
8944   case NVPTX::BI__nvvm_atom_add_gen_ll:
8945     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
8946
8947   case NVPTX::BI__nvvm_atom_sub_gen_i:
8948   case NVPTX::BI__nvvm_atom_sub_gen_l:
8949   case NVPTX::BI__nvvm_atom_sub_gen_ll:
8950     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
8951
8952   case NVPTX::BI__nvvm_atom_and_gen_i:
8953   case NVPTX::BI__nvvm_atom_and_gen_l:
8954   case NVPTX::BI__nvvm_atom_and_gen_ll:
8955     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
8956
8957   case NVPTX::BI__nvvm_atom_or_gen_i:
8958   case NVPTX::BI__nvvm_atom_or_gen_l:
8959   case NVPTX::BI__nvvm_atom_or_gen_ll:
8960     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
8961
8962   case NVPTX::BI__nvvm_atom_xor_gen_i:
8963   case NVPTX::BI__nvvm_atom_xor_gen_l:
8964   case NVPTX::BI__nvvm_atom_xor_gen_ll:
8965     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
8966
8967   case NVPTX::BI__nvvm_atom_xchg_gen_i:
8968   case NVPTX::BI__nvvm_atom_xchg_gen_l:
8969   case NVPTX::BI__nvvm_atom_xchg_gen_ll:
8970     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
8971
8972   case NVPTX::BI__nvvm_atom_max_gen_i:
8973   case NVPTX::BI__nvvm_atom_max_gen_l:
8974   case NVPTX::BI__nvvm_atom_max_gen_ll:
8975     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
8976
8977   case NVPTX::BI__nvvm_atom_max_gen_ui:
8978   case NVPTX::BI__nvvm_atom_max_gen_ul:
8979   case NVPTX::BI__nvvm_atom_max_gen_ull:
8980     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
8981
8982   case NVPTX::BI__nvvm_atom_min_gen_i:
8983   case NVPTX::BI__nvvm_atom_min_gen_l:
8984   case NVPTX::BI__nvvm_atom_min_gen_ll:
8985     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
8986
8987   case NVPTX::BI__nvvm_atom_min_gen_ui:
8988   case NVPTX::BI__nvvm_atom_min_gen_ul:
8989   case NVPTX::BI__nvvm_atom_min_gen_ull:
8990     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
8991
8992   case NVPTX::BI__nvvm_atom_cas_gen_i:
8993   case NVPTX::BI__nvvm_atom_cas_gen_l:
8994   case NVPTX::BI__nvvm_atom_cas_gen_ll:
8995     // __nvvm_atom_cas_gen_* should return the old value rather than the
8996     // success flag.
8997     return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
8998
8999   case NVPTX::BI__nvvm_atom_add_gen_f: {
9000     Value *Ptr = EmitScalarExpr(E->getArg(0));
9001     Value *Val = EmitScalarExpr(E->getArg(1));
9002     // atomicrmw only deals with integer arguments so we need to use
9003     // LLVM's nvvm_atomic_load_add_f32 intrinsic for that.
9004     Value *FnALAF32 =
9005         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType());
9006     return Builder.CreateCall(FnALAF32, {Ptr, Val});
9007   }
9008
9009   case NVPTX::BI__nvvm_atom_inc_gen_ui: {
9010     Value *Ptr = EmitScalarExpr(E->getArg(0));
9011     Value *Val = EmitScalarExpr(E->getArg(1));
9012     Value *FnALI32 =
9013         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
9014     return Builder.CreateCall(FnALI32, {Ptr, Val});
9015   }
9016
9017   case NVPTX::BI__nvvm_atom_dec_gen_ui: {
9018     Value *Ptr = EmitScalarExpr(E->getArg(0));
9019     Value *Val = EmitScalarExpr(E->getArg(1));
9020     Value *FnALD32 =
9021         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
9022     return Builder.CreateCall(FnALD32, {Ptr, Val});
9023   }
9024
9025   case NVPTX::BI__nvvm_ldg_c:
9026   case NVPTX::BI__nvvm_ldg_c2:
9027   case NVPTX::BI__nvvm_ldg_c4:
9028   case NVPTX::BI__nvvm_ldg_s:
9029   case NVPTX::BI__nvvm_ldg_s2:
9030   case NVPTX::BI__nvvm_ldg_s4:
9031   case NVPTX::BI__nvvm_ldg_i:
9032   case NVPTX::BI__nvvm_ldg_i2:
9033   case NVPTX::BI__nvvm_ldg_i4:
9034   case NVPTX::BI__nvvm_ldg_l:
9035   case NVPTX::BI__nvvm_ldg_ll:
9036   case NVPTX::BI__nvvm_ldg_ll2:
9037   case NVPTX::BI__nvvm_ldg_uc:
9038   case NVPTX::BI__nvvm_ldg_uc2:
9039   case NVPTX::BI__nvvm_ldg_uc4:
9040   case NVPTX::BI__nvvm_ldg_us:
9041   case NVPTX::BI__nvvm_ldg_us2:
9042   case NVPTX::BI__nvvm_ldg_us4:
9043   case NVPTX::BI__nvvm_ldg_ui:
9044   case NVPTX::BI__nvvm_ldg_ui2:
9045   case NVPTX::BI__nvvm_ldg_ui4:
9046   case NVPTX::BI__nvvm_ldg_ul:
9047   case NVPTX::BI__nvvm_ldg_ull:
9048   case NVPTX::BI__nvvm_ldg_ull2:
9049     // PTX Interoperability section 2.2: "For a vector with an even number of
9050     // elements, its alignment is set to number of elements times the alignment
9051     // of its member: n*alignof(t)."
9052     return MakeLdg(Intrinsic::nvvm_ldg_global_i);
9053   case NVPTX::BI__nvvm_ldg_f:
9054   case NVPTX::BI__nvvm_ldg_f2:
9055   case NVPTX::BI__nvvm_ldg_f4:
9056   case NVPTX::BI__nvvm_ldg_d:
9057   case NVPTX::BI__nvvm_ldg_d2:
9058     return MakeLdg(Intrinsic::nvvm_ldg_global_f);
9059
9060   case NVPTX::BI__nvvm_atom_cta_add_gen_i:
9061   case NVPTX::BI__nvvm_atom_cta_add_gen_l:
9062   case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
9063     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta);
9064   case NVPTX::BI__nvvm_atom_sys_add_gen_i:
9065   case NVPTX::BI__nvvm_atom_sys_add_gen_l:
9066   case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
9067     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys);
9068   case NVPTX::BI__nvvm_atom_cta_add_gen_f:
9069   case NVPTX::BI__nvvm_atom_cta_add_gen_d:
9070     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta);
9071   case NVPTX::BI__nvvm_atom_sys_add_gen_f:
9072   case NVPTX::BI__nvvm_atom_sys_add_gen_d:
9073     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys);
9074   case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
9075   case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
9076   case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
9077     return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta);
9078   case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
9079   case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
9080   case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
9081     return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys);
9082   case NVPTX::BI__nvvm_atom_cta_max_gen_i:
9083   case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
9084   case NVPTX::BI__nvvm_atom_cta_max_gen_l:
9085   case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
9086   case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
9087   case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
9088     return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta);
9089   case NVPTX::BI__nvvm_atom_sys_max_gen_i:
9090   case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
9091   case NVPTX::BI__nvvm_atom_sys_max_gen_l:
9092   case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
9093   case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
9094   case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
9095     return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys);
9096   case NVPTX::BI__nvvm_atom_cta_min_gen_i:
9097   case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
9098   case NVPTX::BI__nvvm_atom_cta_min_gen_l:
9099   case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
9100   case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
9101   case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
9102     return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta);
9103   case NVPTX::BI__nvvm_atom_sys_min_gen_i:
9104   case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
9105   case NVPTX::BI__nvvm_atom_sys_min_gen_l:
9106   case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
9107   case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
9108   case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
9109     return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys);
9110   case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
9111     return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta);
9112   case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
9113     return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta);
9114   case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
9115     return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys);
9116   case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
9117     return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys);
9118   case NVPTX::BI__nvvm_atom_cta_and_gen_i:
9119   case NVPTX::BI__nvvm_atom_cta_and_gen_l:
9120   case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
9121     return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta);
9122   case NVPTX::BI__nvvm_atom_sys_and_gen_i:
9123   case NVPTX::BI__nvvm_atom_sys_and_gen_l:
9124   case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
9125     return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys);
9126   case NVPTX::BI__nvvm_atom_cta_or_gen_i:
9127   case NVPTX::BI__nvvm_atom_cta_or_gen_l:
9128   case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
9129     return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta);
9130   case NVPTX::BI__nvvm_atom_sys_or_gen_i:
9131   case NVPTX::BI__nvvm_atom_sys_or_gen_l:
9132   case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
9133     return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys);
9134   case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
9135   case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
9136   case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
9137     return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta);
9138   case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
9139   case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
9140   case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
9141     return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys);
9142   case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
9143   case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
9144   case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
9145     Value *Ptr = EmitScalarExpr(E->getArg(0));
9146     return Builder.CreateCall(
9147         CGM.getIntrinsic(
9148             Intrinsic::nvvm_atomic_cas_gen_i_cta,
9149             {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
9150         {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
9151   }
9152   case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
9153   case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
9154   case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
9155     Value *Ptr = EmitScalarExpr(E->getArg(0));
9156     return Builder.CreateCall(
9157         CGM.getIntrinsic(
9158             Intrinsic::nvvm_atomic_cas_gen_i_sys,
9159             {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
9160         {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
9161   }
9162   default:
9163     return nullptr;
9164   }
9165 }
9166
9167 Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
9168                                                    const CallExpr *E) {
9169   switch (BuiltinID) {
9170   case WebAssembly::BI__builtin_wasm_current_memory: {
9171     llvm::Type *ResultType = ConvertType(E->getType());
9172     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType);
9173     return Builder.CreateCall(Callee);
9174   }
9175   case WebAssembly::BI__builtin_wasm_grow_memory: {
9176     Value *X = EmitScalarExpr(E->getArg(0));
9177     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType());
9178     return Builder.CreateCall(Callee, X);
9179   }
9180
9181   default:
9182     return nullptr;
9183   }
9184 }