]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp
Import mandoc 1.14.3
[FreeBSD/FreeBSD.git] / contrib / llvm / tools / clang / lib / CodeGen / CGBuiltin.cpp
1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit Builtin calls as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "CGCXXABI.h"
15 #include "CGObjCRuntime.h"
16 #include "CGOpenCLRuntime.h"
17 #include "CodeGenFunction.h"
18 #include "CodeGenModule.h"
19 #include "TargetInfo.h"
20 #include "clang/AST/ASTContext.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/Analysis/Analyses/OSLog.h"
23 #include "clang/Basic/TargetBuiltins.h"
24 #include "clang/Basic/TargetInfo.h"
25 #include "clang/CodeGen/CGFunctionInfo.h"
26 #include "llvm/ADT/StringExtras.h"
27 #include "llvm/IR/CallSite.h"
28 #include "llvm/IR/DataLayout.h"
29 #include "llvm/IR/InlineAsm.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/IR/MDBuilder.h"
32 #include <sstream>
33
34 using namespace clang;
35 using namespace CodeGen;
36 using namespace llvm;
37
38 static
39 int64_t clamp(int64_t Value, int64_t Low, int64_t High) {
40   return std::min(High, std::max(Low, Value));
41 }
42
43 /// getBuiltinLibFunction - Given a builtin id for a function like
44 /// "__builtin_fabsf", return a Function* for "fabsf".
45 llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
46                                                      unsigned BuiltinID) {
47   assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
48
49   // Get the name, skip over the __builtin_ prefix (if necessary).
50   StringRef Name;
51   GlobalDecl D(FD);
52
53   // If the builtin has been declared explicitly with an assembler label,
54   // use the mangled name. This differs from the plain label on platforms
55   // that prefix labels.
56   if (FD->hasAttr<AsmLabelAttr>())
57     Name = getMangledName(D);
58   else
59     Name = Context.BuiltinInfo.getName(BuiltinID) + 10;
60
61   llvm::FunctionType *Ty =
62     cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
63
64   return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
65 }
66
67 /// Emit the conversions required to turn the given value into an
68 /// integer of the given size.
69 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
70                         QualType T, llvm::IntegerType *IntType) {
71   V = CGF.EmitToMemory(V, T);
72
73   if (V->getType()->isPointerTy())
74     return CGF.Builder.CreatePtrToInt(V, IntType);
75
76   assert(V->getType() == IntType);
77   return V;
78 }
79
80 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
81                           QualType T, llvm::Type *ResultType) {
82   V = CGF.EmitFromMemory(V, T);
83
84   if (ResultType->isPointerTy())
85     return CGF.Builder.CreateIntToPtr(V, ResultType);
86
87   assert(V->getType() == ResultType);
88   return V;
89 }
90
91 /// Utility to insert an atomic instruction based on Instrinsic::ID
92 /// and the expression node.
93 static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF,
94                                     llvm::AtomicRMWInst::BinOp Kind,
95                                     const CallExpr *E) {
96   QualType T = E->getType();
97   assert(E->getArg(0)->getType()->isPointerType());
98   assert(CGF.getContext().hasSameUnqualifiedType(T,
99                                   E->getArg(0)->getType()->getPointeeType()));
100   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
101
102   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
103   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
104
105   llvm::IntegerType *IntType =
106     llvm::IntegerType::get(CGF.getLLVMContext(),
107                            CGF.getContext().getTypeSize(T));
108   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
109
110   llvm::Value *Args[2];
111   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
112   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
113   llvm::Type *ValueType = Args[1]->getType();
114   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
115
116   llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
117       Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
118   return EmitFromInt(CGF, Result, T, ValueType);
119 }
120
121 static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
122   Value *Val = CGF.EmitScalarExpr(E->getArg(0));
123   Value *Address = CGF.EmitScalarExpr(E->getArg(1));
124
125   // Convert the type of the pointer to a pointer to the stored type.
126   Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
127   Value *BC = CGF.Builder.CreateBitCast(
128       Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
129   LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
130   LV.setNontemporal(true);
131   CGF.EmitStoreOfScalar(Val, LV, false);
132   return nullptr;
133 }
134
135 static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
136   Value *Address = CGF.EmitScalarExpr(E->getArg(0));
137
138   LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
139   LV.setNontemporal(true);
140   return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
141 }
142
143 static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
144                                llvm::AtomicRMWInst::BinOp Kind,
145                                const CallExpr *E) {
146   return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
147 }
148
149 /// Utility to insert an atomic instruction based Instrinsic::ID and
150 /// the expression node, where the return value is the result of the
151 /// operation.
152 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
153                                    llvm::AtomicRMWInst::BinOp Kind,
154                                    const CallExpr *E,
155                                    Instruction::BinaryOps Op,
156                                    bool Invert = false) {
157   QualType T = E->getType();
158   assert(E->getArg(0)->getType()->isPointerType());
159   assert(CGF.getContext().hasSameUnqualifiedType(T,
160                                   E->getArg(0)->getType()->getPointeeType()));
161   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
162
163   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
164   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
165
166   llvm::IntegerType *IntType =
167     llvm::IntegerType::get(CGF.getLLVMContext(),
168                            CGF.getContext().getTypeSize(T));
169   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
170
171   llvm::Value *Args[2];
172   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
173   llvm::Type *ValueType = Args[1]->getType();
174   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
175   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
176
177   llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
178       Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
179   Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
180   if (Invert)
181     Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
182                                      llvm::ConstantInt::get(IntType, -1));
183   Result = EmitFromInt(CGF, Result, T, ValueType);
184   return RValue::get(Result);
185 }
186
187 /// @brief Utility to insert an atomic cmpxchg instruction.
188 ///
189 /// @param CGF The current codegen function.
190 /// @param E   Builtin call expression to convert to cmpxchg.
191 ///            arg0 - address to operate on
192 ///            arg1 - value to compare with
193 ///            arg2 - new value
194 /// @param ReturnBool Specifies whether to return success flag of
195 ///                   cmpxchg result or the old value.
196 ///
197 /// @returns result of cmpxchg, according to ReturnBool
198 static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
199                                      bool ReturnBool) {
200   QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
201   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
202   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
203
204   llvm::IntegerType *IntType = llvm::IntegerType::get(
205       CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
206   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
207
208   Value *Args[3];
209   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
210   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
211   llvm::Type *ValueType = Args[1]->getType();
212   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
213   Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
214
215   Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
216       Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
217       llvm::AtomicOrdering::SequentiallyConsistent);
218   if (ReturnBool)
219     // Extract boolean success flag and zext it to int.
220     return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
221                                   CGF.ConvertType(E->getType()));
222   else
223     // Extract old value and emit it using the same type as compare value.
224     return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
225                        ValueType);
226 }
227
228 // Emit a simple mangled intrinsic that has 1 argument and a return type
229 // matching the argument type.
230 static Value *emitUnaryBuiltin(CodeGenFunction &CGF,
231                                const CallExpr *E,
232                                unsigned IntrinsicID) {
233   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
234
235   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
236   return CGF.Builder.CreateCall(F, Src0);
237 }
238
239 // Emit an intrinsic that has 2 operands of the same type as its result.
240 static Value *emitBinaryBuiltin(CodeGenFunction &CGF,
241                                 const CallExpr *E,
242                                 unsigned IntrinsicID) {
243   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
244   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
245
246   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
247   return CGF.Builder.CreateCall(F, { Src0, Src1 });
248 }
249
250 // Emit an intrinsic that has 3 operands of the same type as its result.
251 static Value *emitTernaryBuiltin(CodeGenFunction &CGF,
252                                  const CallExpr *E,
253                                  unsigned IntrinsicID) {
254   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
255   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
256   llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
257
258   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
259   return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
260 }
261
262 // Emit an intrinsic that has 1 float or double operand, and 1 integer.
263 static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
264                                const CallExpr *E,
265                                unsigned IntrinsicID) {
266   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
267   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
268
269   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
270   return CGF.Builder.CreateCall(F, {Src0, Src1});
271 }
272
273 /// EmitFAbs - Emit a call to @llvm.fabs().
274 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
275   Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
276   llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
277   Call->setDoesNotAccessMemory();
278   return Call;
279 }
280
281 /// Emit the computation of the sign bit for a floating point value. Returns
282 /// the i1 sign bit value.
283 static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
284   LLVMContext &C = CGF.CGM.getLLVMContext();
285
286   llvm::Type *Ty = V->getType();
287   int Width = Ty->getPrimitiveSizeInBits();
288   llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
289   V = CGF.Builder.CreateBitCast(V, IntTy);
290   if (Ty->isPPC_FP128Ty()) {
291     // We want the sign bit of the higher-order double. The bitcast we just
292     // did works as if the double-double was stored to memory and then
293     // read as an i128. The "store" will put the higher-order double in the
294     // lower address in both little- and big-Endian modes, but the "load"
295     // will treat those bits as a different part of the i128: the low bits in
296     // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
297     // we need to shift the high bits down to the low before truncating.
298     Width >>= 1;
299     if (CGF.getTarget().isBigEndian()) {
300       Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
301       V = CGF.Builder.CreateLShr(V, ShiftCst);
302     }
303     // We are truncating value in order to extract the higher-order
304     // double, which we will be using to extract the sign from.
305     IntTy = llvm::IntegerType::get(C, Width);
306     V = CGF.Builder.CreateTrunc(V, IntTy);
307   }
308   Value *Zero = llvm::Constant::getNullValue(IntTy);
309   return CGF.Builder.CreateICmpSLT(V, Zero);
310 }
311
312 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD,
313                               const CallExpr *E, llvm::Constant *calleeValue) {
314   CGCallee callee = CGCallee::forDirect(calleeValue, FD);
315   return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
316 }
317
318 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
319 /// depending on IntrinsicID.
320 ///
321 /// \arg CGF The current codegen function.
322 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
323 /// \arg X The first argument to the llvm.*.with.overflow.*.
324 /// \arg Y The second argument to the llvm.*.with.overflow.*.
325 /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
326 /// \returns The result (i.e. sum/product) returned by the intrinsic.
327 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
328                                           const llvm::Intrinsic::ID IntrinsicID,
329                                           llvm::Value *X, llvm::Value *Y,
330                                           llvm::Value *&Carry) {
331   // Make sure we have integers of the same width.
332   assert(X->getType() == Y->getType() &&
333          "Arguments must be the same type. (Did you forget to make sure both "
334          "arguments have the same integer width?)");
335
336   llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
337   llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
338   Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
339   return CGF.Builder.CreateExtractValue(Tmp, 0);
340 }
341
342 static Value *emitRangedBuiltin(CodeGenFunction &CGF,
343                                 unsigned IntrinsicID,
344                                 int low, int high) {
345     llvm::MDBuilder MDHelper(CGF.getLLVMContext());
346     llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
347     Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
348     llvm::Instruction *Call = CGF.Builder.CreateCall(F);
349     Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
350     return Call;
351 }
352
353 namespace {
354   struct WidthAndSignedness {
355     unsigned Width;
356     bool Signed;
357   };
358 }
359
360 static WidthAndSignedness
361 getIntegerWidthAndSignedness(const clang::ASTContext &context,
362                              const clang::QualType Type) {
363   assert(Type->isIntegerType() && "Given type is not an integer.");
364   unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width;
365   bool Signed = Type->isSignedIntegerType();
366   return {Width, Signed};
367 }
368
369 // Given one or more integer types, this function produces an integer type that
370 // encompasses them: any value in one of the given types could be expressed in
371 // the encompassing type.
372 static struct WidthAndSignedness
373 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
374   assert(Types.size() > 0 && "Empty list of types.");
375
376   // If any of the given types is signed, we must return a signed type.
377   bool Signed = false;
378   for (const auto &Type : Types) {
379     Signed |= Type.Signed;
380   }
381
382   // The encompassing type must have a width greater than or equal to the width
383   // of the specified types.  Aditionally, if the encompassing type is signed,
384   // its width must be strictly greater than the width of any unsigned types
385   // given.
386   unsigned Width = 0;
387   for (const auto &Type : Types) {
388     unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
389     if (Width < MinWidth) {
390       Width = MinWidth;
391     }
392   }
393
394   return {Width, Signed};
395 }
396
397 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
398   llvm::Type *DestType = Int8PtrTy;
399   if (ArgValue->getType() != DestType)
400     ArgValue =
401         Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
402
403   Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
404   return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
405 }
406
407 /// Checks if using the result of __builtin_object_size(p, @p From) in place of
408 /// __builtin_object_size(p, @p To) is correct
409 static bool areBOSTypesCompatible(int From, int To) {
410   // Note: Our __builtin_object_size implementation currently treats Type=0 and
411   // Type=2 identically. Encoding this implementation detail here may make
412   // improving __builtin_object_size difficult in the future, so it's omitted.
413   return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
414 }
415
416 static llvm::Value *
417 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
418   return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
419 }
420
421 llvm::Value *
422 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
423                                                  llvm::IntegerType *ResType,
424                                                  llvm::Value *EmittedE) {
425   uint64_t ObjectSize;
426   if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
427     return emitBuiltinObjectSize(E, Type, ResType, EmittedE);
428   return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
429 }
430
431 /// Returns a Value corresponding to the size of the given expression.
432 /// This Value may be either of the following:
433 ///   - A llvm::Argument (if E is a param with the pass_object_size attribute on
434 ///     it)
435 ///   - A call to the @llvm.objectsize intrinsic
436 ///
437 /// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
438 /// and we wouldn't otherwise try to reference a pass_object_size parameter,
439 /// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
440 llvm::Value *
441 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
442                                        llvm::IntegerType *ResType,
443                                        llvm::Value *EmittedE) {
444   // We need to reference an argument if the pointer is a parameter with the
445   // pass_object_size attribute.
446   if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
447     auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
448     auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
449     if (Param != nullptr && PS != nullptr &&
450         areBOSTypesCompatible(PS->getType(), Type)) {
451       auto Iter = SizeArguments.find(Param);
452       assert(Iter != SizeArguments.end());
453
454       const ImplicitParamDecl *D = Iter->second;
455       auto DIter = LocalDeclMap.find(D);
456       assert(DIter != LocalDeclMap.end());
457
458       return EmitLoadOfScalar(DIter->second, /*volatile=*/false,
459                               getContext().getSizeType(), E->getLocStart());
460     }
461   }
462
463   // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
464   // evaluate E for side-effects. In either case, we shouldn't lower to
465   // @llvm.objectsize.
466   if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
467     return getDefaultBuiltinObjectSizeResult(Type, ResType);
468
469   Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
470   assert(Ptr->getType()->isPointerTy() &&
471          "Non-pointer passed to __builtin_object_size?");
472
473   Value *F = CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
474
475   // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
476   Value *Min = Builder.getInt1((Type & 2) != 0);
477   // For GCC compatability, __builtin_object_size treat NULL as unknown size.
478   Value *NullIsUnknown = Builder.getTrue();
479   return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown});
480 }
481
482 // Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we
483 // handle them here.
484 enum class CodeGenFunction::MSVCIntrin {
485   _BitScanForward,
486   _BitScanReverse,
487   _InterlockedAnd,
488   _InterlockedDecrement,
489   _InterlockedExchange,
490   _InterlockedExchangeAdd,
491   _InterlockedExchangeSub,
492   _InterlockedIncrement,
493   _InterlockedOr,
494   _InterlockedXor,
495   _interlockedbittestandset,
496   __fastfail,
497 };
498
499 Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
500                                             const CallExpr *E) {
501   switch (BuiltinID) {
502   case MSVCIntrin::_BitScanForward:
503   case MSVCIntrin::_BitScanReverse: {
504     Value *ArgValue = EmitScalarExpr(E->getArg(1));
505
506     llvm::Type *ArgType = ArgValue->getType();
507     llvm::Type *IndexType =
508       EmitScalarExpr(E->getArg(0))->getType()->getPointerElementType();
509     llvm::Type *ResultType = ConvertType(E->getType());
510
511     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
512     Value *ResZero = llvm::Constant::getNullValue(ResultType);
513     Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
514
515     BasicBlock *Begin = Builder.GetInsertBlock();
516     BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
517     Builder.SetInsertPoint(End);
518     PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
519
520     Builder.SetInsertPoint(Begin);
521     Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
522     BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
523     Builder.CreateCondBr(IsZero, End, NotZero);
524     Result->addIncoming(ResZero, Begin);
525
526     Builder.SetInsertPoint(NotZero);
527     Address IndexAddress = EmitPointerWithAlignment(E->getArg(0));
528
529     if (BuiltinID == MSVCIntrin::_BitScanForward) {
530       Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
531       Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
532       ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
533       Builder.CreateStore(ZeroCount, IndexAddress, false);
534     } else {
535       unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
536       Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
537
538       Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
539       Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
540       ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
541       Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
542       Builder.CreateStore(Index, IndexAddress, false);
543     }
544     Builder.CreateBr(End);
545     Result->addIncoming(ResOne, NotZero);
546
547     Builder.SetInsertPoint(End);
548     return Result;
549   }
550   case MSVCIntrin::_InterlockedAnd:
551     return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
552   case MSVCIntrin::_InterlockedExchange:
553     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
554   case MSVCIntrin::_InterlockedExchangeAdd:
555     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
556   case MSVCIntrin::_InterlockedExchangeSub:
557     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
558   case MSVCIntrin::_InterlockedOr:
559     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
560   case MSVCIntrin::_InterlockedXor:
561     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
562
563   case MSVCIntrin::_interlockedbittestandset: {
564     llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
565     llvm::Value *Bit = EmitScalarExpr(E->getArg(1));
566     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
567         AtomicRMWInst::Or, Addr,
568         Builder.CreateShl(ConstantInt::get(Bit->getType(), 1), Bit),
569         llvm::AtomicOrdering::SequentiallyConsistent);
570     // Shift the relevant bit to the least significant position, truncate to
571     // the result type, and test the low bit.
572     llvm::Value *Shifted = Builder.CreateLShr(RMWI, Bit);
573     llvm::Value *Truncated =
574         Builder.CreateTrunc(Shifted, ConvertType(E->getType()));
575     return Builder.CreateAnd(Truncated,
576                              ConstantInt::get(Truncated->getType(), 1));
577   }
578
579   case MSVCIntrin::_InterlockedDecrement: {
580     llvm::Type *IntTy = ConvertType(E->getType());
581     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
582       AtomicRMWInst::Sub,
583       EmitScalarExpr(E->getArg(0)),
584       ConstantInt::get(IntTy, 1),
585       llvm::AtomicOrdering::SequentiallyConsistent);
586     return Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1));
587   }
588   case MSVCIntrin::_InterlockedIncrement: {
589     llvm::Type *IntTy = ConvertType(E->getType());
590     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
591       AtomicRMWInst::Add,
592       EmitScalarExpr(E->getArg(0)),
593       ConstantInt::get(IntTy, 1),
594       llvm::AtomicOrdering::SequentiallyConsistent);
595     return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1));
596   }
597
598   case MSVCIntrin::__fastfail: {
599     // Request immediate process termination from the kernel. The instruction
600     // sequences to do this are documented on MSDN:
601     // https://msdn.microsoft.com/en-us/library/dn774154.aspx
602     llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
603     StringRef Asm, Constraints;
604     switch (ISA) {
605     default:
606       ErrorUnsupported(E, "__fastfail call for this architecture");
607       break;
608     case llvm::Triple::x86:
609     case llvm::Triple::x86_64:
610       Asm = "int $$0x29";
611       Constraints = "{cx}";
612       break;
613     case llvm::Triple::thumb:
614       Asm = "udf #251";
615       Constraints = "{r0}";
616       break;
617     }
618     llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
619     llvm::InlineAsm *IA =
620         llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true);
621     llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
622         getLLVMContext(), llvm::AttributeList::FunctionIndex,
623         llvm::Attribute::NoReturn);
624     CallSite CS = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
625     CS.setAttributes(NoReturnAttr);
626     return CS.getInstruction();
627   }
628   }
629   llvm_unreachable("Incorrect MSVC intrinsic!");
630 }
631
632 namespace {
633 // ARC cleanup for __builtin_os_log_format
634 struct CallObjCArcUse final : EHScopeStack::Cleanup {
635   CallObjCArcUse(llvm::Value *object) : object(object) {}
636   llvm::Value *object;
637
638   void Emit(CodeGenFunction &CGF, Flags flags) override {
639     CGF.EmitARCIntrinsicUse(object);
640   }
641 };
642 }
643
644 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
645                                         unsigned BuiltinID, const CallExpr *E,
646                                         ReturnValueSlot ReturnValue) {
647   // See if we can constant fold this builtin.  If so, don't emit it at all.
648   Expr::EvalResult Result;
649   if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
650       !Result.hasSideEffects()) {
651     if (Result.Val.isInt())
652       return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
653                                                 Result.Val.getInt()));
654     if (Result.Val.isFloat())
655       return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
656                                                Result.Val.getFloat()));
657   }
658
659   switch (BuiltinID) {
660   default: break;  // Handle intrinsics and libm functions below.
661   case Builtin::BI__builtin___CFStringMakeConstantString:
662   case Builtin::BI__builtin___NSStringMakeConstantString:
663     return RValue::get(CGM.EmitConstantExpr(E, E->getType(), nullptr));
664   case Builtin::BI__builtin_stdarg_start:
665   case Builtin::BI__builtin_va_start:
666   case Builtin::BI__va_start:
667   case Builtin::BI__builtin_va_end:
668     return RValue::get(
669         EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
670                            ? EmitScalarExpr(E->getArg(0))
671                            : EmitVAListRef(E->getArg(0)).getPointer(),
672                        BuiltinID != Builtin::BI__builtin_va_end));
673   case Builtin::BI__builtin_va_copy: {
674     Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
675     Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
676
677     llvm::Type *Type = Int8PtrTy;
678
679     DstPtr = Builder.CreateBitCast(DstPtr, Type);
680     SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
681     return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy),
682                                           {DstPtr, SrcPtr}));
683   }
684   case Builtin::BI__builtin_abs:
685   case Builtin::BI__builtin_labs:
686   case Builtin::BI__builtin_llabs: {
687     Value *ArgValue = EmitScalarExpr(E->getArg(0));
688
689     Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
690     Value *CmpResult =
691     Builder.CreateICmpSGE(ArgValue,
692                           llvm::Constant::getNullValue(ArgValue->getType()),
693                                                             "abscond");
694     Value *Result =
695       Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
696
697     return RValue::get(Result);
698   }
699   case Builtin::BI__builtin_fabs:
700   case Builtin::BI__builtin_fabsf:
701   case Builtin::BI__builtin_fabsl: {
702     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
703   }
704   case Builtin::BI__builtin_fmod:
705   case Builtin::BI__builtin_fmodf:
706   case Builtin::BI__builtin_fmodl: {
707     Value *Arg1 = EmitScalarExpr(E->getArg(0));
708     Value *Arg2 = EmitScalarExpr(E->getArg(1));
709     Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod");
710     return RValue::get(Result);
711   }
712   case Builtin::BI__builtin_copysign:
713   case Builtin::BI__builtin_copysignf:
714   case Builtin::BI__builtin_copysignl: {
715     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
716   }
717   case Builtin::BI__builtin_ceil:
718   case Builtin::BI__builtin_ceilf:
719   case Builtin::BI__builtin_ceill: {
720     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil));
721   }
722   case Builtin::BI__builtin_floor:
723   case Builtin::BI__builtin_floorf:
724   case Builtin::BI__builtin_floorl: {
725     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor));
726   }
727   case Builtin::BI__builtin_trunc:
728   case Builtin::BI__builtin_truncf:
729   case Builtin::BI__builtin_truncl: {
730     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc));
731   }
732   case Builtin::BI__builtin_rint:
733   case Builtin::BI__builtin_rintf:
734   case Builtin::BI__builtin_rintl: {
735     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint));
736   }
737   case Builtin::BI__builtin_nearbyint:
738   case Builtin::BI__builtin_nearbyintf:
739   case Builtin::BI__builtin_nearbyintl: {
740     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint));
741   }
742   case Builtin::BI__builtin_round:
743   case Builtin::BI__builtin_roundf:
744   case Builtin::BI__builtin_roundl: {
745     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round));
746   }
747   case Builtin::BI__builtin_fmin:
748   case Builtin::BI__builtin_fminf:
749   case Builtin::BI__builtin_fminl: {
750     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum));
751   }
752   case Builtin::BI__builtin_fmax:
753   case Builtin::BI__builtin_fmaxf:
754   case Builtin::BI__builtin_fmaxl: {
755     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum));
756   }
757   case Builtin::BI__builtin_conj:
758   case Builtin::BI__builtin_conjf:
759   case Builtin::BI__builtin_conjl: {
760     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
761     Value *Real = ComplexVal.first;
762     Value *Imag = ComplexVal.second;
763     Value *Zero =
764       Imag->getType()->isFPOrFPVectorTy()
765         ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
766         : llvm::Constant::getNullValue(Imag->getType());
767
768     Imag = Builder.CreateFSub(Zero, Imag, "sub");
769     return RValue::getComplex(std::make_pair(Real, Imag));
770   }
771   case Builtin::BI__builtin_creal:
772   case Builtin::BI__builtin_crealf:
773   case Builtin::BI__builtin_creall:
774   case Builtin::BIcreal:
775   case Builtin::BIcrealf:
776   case Builtin::BIcreall: {
777     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
778     return RValue::get(ComplexVal.first);
779   }
780
781   case Builtin::BI__builtin_cimag:
782   case Builtin::BI__builtin_cimagf:
783   case Builtin::BI__builtin_cimagl:
784   case Builtin::BIcimag:
785   case Builtin::BIcimagf:
786   case Builtin::BIcimagl: {
787     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
788     return RValue::get(ComplexVal.second);
789   }
790
791   case Builtin::BI__builtin_ctzs:
792   case Builtin::BI__builtin_ctz:
793   case Builtin::BI__builtin_ctzl:
794   case Builtin::BI__builtin_ctzll: {
795     Value *ArgValue = EmitScalarExpr(E->getArg(0));
796
797     llvm::Type *ArgType = ArgValue->getType();
798     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
799
800     llvm::Type *ResultType = ConvertType(E->getType());
801     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
802     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
803     if (Result->getType() != ResultType)
804       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
805                                      "cast");
806     return RValue::get(Result);
807   }
808   case Builtin::BI__builtin_clzs:
809   case Builtin::BI__builtin_clz:
810   case Builtin::BI__builtin_clzl:
811   case Builtin::BI__builtin_clzll: {
812     Value *ArgValue = EmitScalarExpr(E->getArg(0));
813
814     llvm::Type *ArgType = ArgValue->getType();
815     Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
816
817     llvm::Type *ResultType = ConvertType(E->getType());
818     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
819     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
820     if (Result->getType() != ResultType)
821       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
822                                      "cast");
823     return RValue::get(Result);
824   }
825   case Builtin::BI__builtin_ffs:
826   case Builtin::BI__builtin_ffsl:
827   case Builtin::BI__builtin_ffsll: {
828     // ffs(x) -> x ? cttz(x) + 1 : 0
829     Value *ArgValue = EmitScalarExpr(E->getArg(0));
830
831     llvm::Type *ArgType = ArgValue->getType();
832     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
833
834     llvm::Type *ResultType = ConvertType(E->getType());
835     Value *Tmp =
836         Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
837                           llvm::ConstantInt::get(ArgType, 1));
838     Value *Zero = llvm::Constant::getNullValue(ArgType);
839     Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
840     Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
841     if (Result->getType() != ResultType)
842       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
843                                      "cast");
844     return RValue::get(Result);
845   }
846   case Builtin::BI__builtin_parity:
847   case Builtin::BI__builtin_parityl:
848   case Builtin::BI__builtin_parityll: {
849     // parity(x) -> ctpop(x) & 1
850     Value *ArgValue = EmitScalarExpr(E->getArg(0));
851
852     llvm::Type *ArgType = ArgValue->getType();
853     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
854
855     llvm::Type *ResultType = ConvertType(E->getType());
856     Value *Tmp = Builder.CreateCall(F, ArgValue);
857     Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
858     if (Result->getType() != ResultType)
859       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
860                                      "cast");
861     return RValue::get(Result);
862   }
863   case Builtin::BI__popcnt16:
864   case Builtin::BI__popcnt:
865   case Builtin::BI__popcnt64:
866   case Builtin::BI__builtin_popcount:
867   case Builtin::BI__builtin_popcountl:
868   case Builtin::BI__builtin_popcountll: {
869     Value *ArgValue = EmitScalarExpr(E->getArg(0));
870
871     llvm::Type *ArgType = ArgValue->getType();
872     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
873
874     llvm::Type *ResultType = ConvertType(E->getType());
875     Value *Result = Builder.CreateCall(F, ArgValue);
876     if (Result->getType() != ResultType)
877       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
878                                      "cast");
879     return RValue::get(Result);
880   }
881   case Builtin::BI_rotr8:
882   case Builtin::BI_rotr16:
883   case Builtin::BI_rotr:
884   case Builtin::BI_lrotr:
885   case Builtin::BI_rotr64: {
886     Value *Val = EmitScalarExpr(E->getArg(0));
887     Value *Shift = EmitScalarExpr(E->getArg(1));
888
889     llvm::Type *ArgType = Val->getType();
890     Shift = Builder.CreateIntCast(Shift, ArgType, false);
891     unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
892     Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
893     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
894
895     Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
896     Shift = Builder.CreateAnd(Shift, Mask);
897     Value *LeftShift = Builder.CreateSub(ArgTypeSize, Shift);
898
899     Value *RightShifted = Builder.CreateLShr(Val, Shift);
900     Value *LeftShifted = Builder.CreateShl(Val, LeftShift);
901     Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
902
903     Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
904     Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
905     return RValue::get(Result);
906   }
907   case Builtin::BI_rotl8:
908   case Builtin::BI_rotl16:
909   case Builtin::BI_rotl:
910   case Builtin::BI_lrotl:
911   case Builtin::BI_rotl64: {
912     Value *Val = EmitScalarExpr(E->getArg(0));
913     Value *Shift = EmitScalarExpr(E->getArg(1));
914
915     llvm::Type *ArgType = Val->getType();
916     Shift = Builder.CreateIntCast(Shift, ArgType, false);
917     unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
918     Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
919     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
920
921     Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
922     Shift = Builder.CreateAnd(Shift, Mask);
923     Value *RightShift = Builder.CreateSub(ArgTypeSize, Shift);
924
925     Value *LeftShifted = Builder.CreateShl(Val, Shift);
926     Value *RightShifted = Builder.CreateLShr(Val, RightShift);
927     Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
928
929     Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
930     Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
931     return RValue::get(Result);
932   }
933   case Builtin::BI__builtin_unpredictable: {
934     // Always return the argument of __builtin_unpredictable. LLVM does not
935     // handle this builtin. Metadata for this builtin should be added directly
936     // to instructions such as branches or switches that use it.
937     return RValue::get(EmitScalarExpr(E->getArg(0)));
938   }
939   case Builtin::BI__builtin_expect: {
940     Value *ArgValue = EmitScalarExpr(E->getArg(0));
941     llvm::Type *ArgType = ArgValue->getType();
942
943     Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
944     // Don't generate llvm.expect on -O0 as the backend won't use it for
945     // anything.
946     // Note, we still IRGen ExpectedValue because it could have side-effects.
947     if (CGM.getCodeGenOpts().OptimizationLevel == 0)
948       return RValue::get(ArgValue);
949
950     Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
951     Value *Result =
952         Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
953     return RValue::get(Result);
954   }
955   case Builtin::BI__builtin_assume_aligned: {
956     Value *PtrValue = EmitScalarExpr(E->getArg(0));
957     Value *OffsetValue =
958       (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
959
960     Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
961     ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
962     unsigned Alignment = (unsigned) AlignmentCI->getZExtValue();
963
964     EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue);
965     return RValue::get(PtrValue);
966   }
967   case Builtin::BI__assume:
968   case Builtin::BI__builtin_assume: {
969     if (E->getArg(0)->HasSideEffects(getContext()))
970       return RValue::get(nullptr);
971
972     Value *ArgValue = EmitScalarExpr(E->getArg(0));
973     Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
974     return RValue::get(Builder.CreateCall(FnAssume, ArgValue));
975   }
976   case Builtin::BI__builtin_bswap16:
977   case Builtin::BI__builtin_bswap32:
978   case Builtin::BI__builtin_bswap64: {
979     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
980   }
981   case Builtin::BI__builtin_bitreverse8:
982   case Builtin::BI__builtin_bitreverse16:
983   case Builtin::BI__builtin_bitreverse32:
984   case Builtin::BI__builtin_bitreverse64: {
985     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
986   }
987   case Builtin::BI__builtin_object_size: {
988     unsigned Type =
989         E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
990     auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
991
992     // We pass this builtin onto the optimizer so that it can figure out the
993     // object size in more complex cases.
994     return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
995                                              /*EmittedE=*/nullptr));
996   }
997   case Builtin::BI__builtin_prefetch: {
998     Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
999     // FIXME: Technically these constants should of type 'int', yes?
1000     RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
1001       llvm::ConstantInt::get(Int32Ty, 0);
1002     Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
1003       llvm::ConstantInt::get(Int32Ty, 3);
1004     Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
1005     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
1006     return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
1007   }
1008   case Builtin::BI__builtin_readcyclecounter: {
1009     Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
1010     return RValue::get(Builder.CreateCall(F));
1011   }
1012   case Builtin::BI__builtin___clear_cache: {
1013     Value *Begin = EmitScalarExpr(E->getArg(0));
1014     Value *End = EmitScalarExpr(E->getArg(1));
1015     Value *F = CGM.getIntrinsic(Intrinsic::clear_cache);
1016     return RValue::get(Builder.CreateCall(F, {Begin, End}));
1017   }
1018   case Builtin::BI__builtin_trap:
1019     return RValue::get(EmitTrapCall(Intrinsic::trap));
1020   case Builtin::BI__debugbreak:
1021     return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
1022   case Builtin::BI__builtin_unreachable: {
1023     if (SanOpts.has(SanitizerKind::Unreachable)) {
1024       SanitizerScope SanScope(this);
1025       EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()),
1026                                SanitizerKind::Unreachable),
1027                 SanitizerHandler::BuiltinUnreachable,
1028                 EmitCheckSourceLocation(E->getExprLoc()), None);
1029     } else
1030       Builder.CreateUnreachable();
1031
1032     // We do need to preserve an insertion point.
1033     EmitBlock(createBasicBlock("unreachable.cont"));
1034
1035     return RValue::get(nullptr);
1036   }
1037
1038   case Builtin::BI__builtin_powi:
1039   case Builtin::BI__builtin_powif:
1040   case Builtin::BI__builtin_powil: {
1041     Value *Base = EmitScalarExpr(E->getArg(0));
1042     Value *Exponent = EmitScalarExpr(E->getArg(1));
1043     llvm::Type *ArgType = Base->getType();
1044     Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
1045     return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
1046   }
1047
1048   case Builtin::BI__builtin_isgreater:
1049   case Builtin::BI__builtin_isgreaterequal:
1050   case Builtin::BI__builtin_isless:
1051   case Builtin::BI__builtin_islessequal:
1052   case Builtin::BI__builtin_islessgreater:
1053   case Builtin::BI__builtin_isunordered: {
1054     // Ordered comparisons: we know the arguments to these are matching scalar
1055     // floating point values.
1056     Value *LHS = EmitScalarExpr(E->getArg(0));
1057     Value *RHS = EmitScalarExpr(E->getArg(1));
1058
1059     switch (BuiltinID) {
1060     default: llvm_unreachable("Unknown ordered comparison");
1061     case Builtin::BI__builtin_isgreater:
1062       LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
1063       break;
1064     case Builtin::BI__builtin_isgreaterequal:
1065       LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
1066       break;
1067     case Builtin::BI__builtin_isless:
1068       LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
1069       break;
1070     case Builtin::BI__builtin_islessequal:
1071       LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
1072       break;
1073     case Builtin::BI__builtin_islessgreater:
1074       LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
1075       break;
1076     case Builtin::BI__builtin_isunordered:
1077       LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
1078       break;
1079     }
1080     // ZExt bool to int type.
1081     return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
1082   }
1083   case Builtin::BI__builtin_isnan: {
1084     Value *V = EmitScalarExpr(E->getArg(0));
1085     V = Builder.CreateFCmpUNO(V, V, "cmp");
1086     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
1087   }
1088
1089   case Builtin::BIfinite:
1090   case Builtin::BI__finite:
1091   case Builtin::BIfinitef:
1092   case Builtin::BI__finitef:
1093   case Builtin::BIfinitel:
1094   case Builtin::BI__finitel:
1095   case Builtin::BI__builtin_isinf:
1096   case Builtin::BI__builtin_isfinite: {
1097     // isinf(x)    --> fabs(x) == infinity
1098     // isfinite(x) --> fabs(x) != infinity
1099     // x != NaN via the ordered compare in either case.
1100     Value *V = EmitScalarExpr(E->getArg(0));
1101     Value *Fabs = EmitFAbs(*this, V);
1102     Constant *Infinity = ConstantFP::getInfinity(V->getType());
1103     CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf)
1104                                   ? CmpInst::FCMP_OEQ
1105                                   : CmpInst::FCMP_ONE;
1106     Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf");
1107     return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType())));
1108   }
1109
1110   case Builtin::BI__builtin_isinf_sign: {
1111     // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
1112     Value *Arg = EmitScalarExpr(E->getArg(0));
1113     Value *AbsArg = EmitFAbs(*this, Arg);
1114     Value *IsInf = Builder.CreateFCmpOEQ(
1115         AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
1116     Value *IsNeg = EmitSignBit(*this, Arg);
1117
1118     llvm::Type *IntTy = ConvertType(E->getType());
1119     Value *Zero = Constant::getNullValue(IntTy);
1120     Value *One = ConstantInt::get(IntTy, 1);
1121     Value *NegativeOne = ConstantInt::get(IntTy, -1);
1122     Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
1123     Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
1124     return RValue::get(Result);
1125   }
1126
1127   case Builtin::BI__builtin_isnormal: {
1128     // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
1129     Value *V = EmitScalarExpr(E->getArg(0));
1130     Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
1131
1132     Value *Abs = EmitFAbs(*this, V);
1133     Value *IsLessThanInf =
1134       Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
1135     APFloat Smallest = APFloat::getSmallestNormalized(
1136                    getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
1137     Value *IsNormal =
1138       Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
1139                             "isnormal");
1140     V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
1141     V = Builder.CreateAnd(V, IsNormal, "and");
1142     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
1143   }
1144
1145   case Builtin::BI__builtin_fpclassify: {
1146     Value *V = EmitScalarExpr(E->getArg(5));
1147     llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
1148
1149     // Create Result
1150     BasicBlock *Begin = Builder.GetInsertBlock();
1151     BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
1152     Builder.SetInsertPoint(End);
1153     PHINode *Result =
1154       Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
1155                         "fpclassify_result");
1156
1157     // if (V==0) return FP_ZERO
1158     Builder.SetInsertPoint(Begin);
1159     Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
1160                                           "iszero");
1161     Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
1162     BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
1163     Builder.CreateCondBr(IsZero, End, NotZero);
1164     Result->addIncoming(ZeroLiteral, Begin);
1165
1166     // if (V != V) return FP_NAN
1167     Builder.SetInsertPoint(NotZero);
1168     Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
1169     Value *NanLiteral = EmitScalarExpr(E->getArg(0));
1170     BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
1171     Builder.CreateCondBr(IsNan, End, NotNan);
1172     Result->addIncoming(NanLiteral, NotZero);
1173
1174     // if (fabs(V) == infinity) return FP_INFINITY
1175     Builder.SetInsertPoint(NotNan);
1176     Value *VAbs = EmitFAbs(*this, V);
1177     Value *IsInf =
1178       Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
1179                             "isinf");
1180     Value *InfLiteral = EmitScalarExpr(E->getArg(1));
1181     BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
1182     Builder.CreateCondBr(IsInf, End, NotInf);
1183     Result->addIncoming(InfLiteral, NotNan);
1184
1185     // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
1186     Builder.SetInsertPoint(NotInf);
1187     APFloat Smallest = APFloat::getSmallestNormalized(
1188         getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
1189     Value *IsNormal =
1190       Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
1191                             "isnormal");
1192     Value *NormalResult =
1193       Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
1194                            EmitScalarExpr(E->getArg(3)));
1195     Builder.CreateBr(End);
1196     Result->addIncoming(NormalResult, NotInf);
1197
1198     // return Result
1199     Builder.SetInsertPoint(End);
1200     return RValue::get(Result);
1201   }
1202
1203   case Builtin::BIalloca:
1204   case Builtin::BI_alloca:
1205   case Builtin::BI__builtin_alloca: {
1206     Value *Size = EmitScalarExpr(E->getArg(0));
1207     const TargetInfo &TI = getContext().getTargetInfo();
1208     // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
1209     unsigned SuitableAlignmentInBytes =
1210         CGM.getContext()
1211             .toCharUnitsFromBits(TI.getSuitableAlign())
1212             .getQuantity();
1213     AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
1214     AI->setAlignment(SuitableAlignmentInBytes);
1215     return RValue::get(AI);
1216   }
1217
1218   case Builtin::BI__builtin_alloca_with_align: {
1219     Value *Size = EmitScalarExpr(E->getArg(0));
1220     Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
1221     auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
1222     unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
1223     unsigned AlignmentInBytes =
1224         CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity();
1225     AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
1226     AI->setAlignment(AlignmentInBytes);
1227     return RValue::get(AI);
1228   }
1229
1230   case Builtin::BIbzero:
1231   case Builtin::BI__builtin_bzero: {
1232     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1233     Value *SizeVal = EmitScalarExpr(E->getArg(1));
1234     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1235                         E->getArg(0)->getExprLoc(), FD, 0);
1236     Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
1237     return RValue::get(Dest.getPointer());
1238   }
1239   case Builtin::BImemcpy:
1240   case Builtin::BI__builtin_memcpy: {
1241     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1242     Address Src = EmitPointerWithAlignment(E->getArg(1));
1243     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1244     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1245                         E->getArg(0)->getExprLoc(), FD, 0);
1246     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1247                         E->getArg(1)->getExprLoc(), FD, 1);
1248     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1249     return RValue::get(Dest.getPointer());
1250   }
1251
1252   case Builtin::BI__builtin_char_memchr:
1253     BuiltinID = Builtin::BI__builtin_memchr;
1254     break;
1255
1256   case Builtin::BI__builtin___memcpy_chk: {
1257     // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
1258     llvm::APSInt Size, DstSize;
1259     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1260         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1261       break;
1262     if (Size.ugt(DstSize))
1263       break;
1264     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1265     Address Src = EmitPointerWithAlignment(E->getArg(1));
1266     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1267     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1268     return RValue::get(Dest.getPointer());
1269   }
1270
1271   case Builtin::BI__builtin_objc_memmove_collectable: {
1272     Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
1273     Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
1274     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1275     CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
1276                                                   DestAddr, SrcAddr, SizeVal);
1277     return RValue::get(DestAddr.getPointer());
1278   }
1279
1280   case Builtin::BI__builtin___memmove_chk: {
1281     // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
1282     llvm::APSInt Size, DstSize;
1283     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1284         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1285       break;
1286     if (Size.ugt(DstSize))
1287       break;
1288     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1289     Address Src = EmitPointerWithAlignment(E->getArg(1));
1290     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1291     Builder.CreateMemMove(Dest, Src, SizeVal, false);
1292     return RValue::get(Dest.getPointer());
1293   }
1294
1295   case Builtin::BImemmove:
1296   case Builtin::BI__builtin_memmove: {
1297     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1298     Address Src = EmitPointerWithAlignment(E->getArg(1));
1299     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1300     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1301                         E->getArg(0)->getExprLoc(), FD, 0);
1302     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1303                         E->getArg(1)->getExprLoc(), FD, 1);
1304     Builder.CreateMemMove(Dest, Src, SizeVal, false);
1305     return RValue::get(Dest.getPointer());
1306   }
1307   case Builtin::BImemset:
1308   case Builtin::BI__builtin_memset: {
1309     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1310     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1311                                          Builder.getInt8Ty());
1312     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1313     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1314                         E->getArg(0)->getExprLoc(), FD, 0);
1315     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1316     return RValue::get(Dest.getPointer());
1317   }
1318   case Builtin::BI__builtin___memset_chk: {
1319     // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
1320     llvm::APSInt Size, DstSize;
1321     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1322         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1323       break;
1324     if (Size.ugt(DstSize))
1325       break;
1326     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1327     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1328                                          Builder.getInt8Ty());
1329     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1330     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1331     return RValue::get(Dest.getPointer());
1332   }
1333   case Builtin::BI__builtin_dwarf_cfa: {
1334     // The offset in bytes from the first argument to the CFA.
1335     //
1336     // Why on earth is this in the frontend?  Is there any reason at
1337     // all that the backend can't reasonably determine this while
1338     // lowering llvm.eh.dwarf.cfa()?
1339     //
1340     // TODO: If there's a satisfactory reason, add a target hook for
1341     // this instead of hard-coding 0, which is correct for most targets.
1342     int32_t Offset = 0;
1343
1344     Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
1345     return RValue::get(Builder.CreateCall(F,
1346                                       llvm::ConstantInt::get(Int32Ty, Offset)));
1347   }
1348   case Builtin::BI__builtin_return_address: {
1349     Value *Depth =
1350         CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
1351     Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1352     return RValue::get(Builder.CreateCall(F, Depth));
1353   }
1354   case Builtin::BI_ReturnAddress: {
1355     Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1356     return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
1357   }
1358   case Builtin::BI__builtin_frame_address: {
1359     Value *Depth =
1360         CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
1361     Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
1362     return RValue::get(Builder.CreateCall(F, Depth));
1363   }
1364   case Builtin::BI__builtin_extract_return_addr: {
1365     Value *Address = EmitScalarExpr(E->getArg(0));
1366     Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
1367     return RValue::get(Result);
1368   }
1369   case Builtin::BI__builtin_frob_return_addr: {
1370     Value *Address = EmitScalarExpr(E->getArg(0));
1371     Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
1372     return RValue::get(Result);
1373   }
1374   case Builtin::BI__builtin_dwarf_sp_column: {
1375     llvm::IntegerType *Ty
1376       = cast<llvm::IntegerType>(ConvertType(E->getType()));
1377     int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
1378     if (Column == -1) {
1379       CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
1380       return RValue::get(llvm::UndefValue::get(Ty));
1381     }
1382     return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
1383   }
1384   case Builtin::BI__builtin_init_dwarf_reg_size_table: {
1385     Value *Address = EmitScalarExpr(E->getArg(0));
1386     if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
1387       CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
1388     return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
1389   }
1390   case Builtin::BI__builtin_eh_return: {
1391     Value *Int = EmitScalarExpr(E->getArg(0));
1392     Value *Ptr = EmitScalarExpr(E->getArg(1));
1393
1394     llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
1395     assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
1396            "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
1397     Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
1398                                   ? Intrinsic::eh_return_i32
1399                                   : Intrinsic::eh_return_i64);
1400     Builder.CreateCall(F, {Int, Ptr});
1401     Builder.CreateUnreachable();
1402
1403     // We do need to preserve an insertion point.
1404     EmitBlock(createBasicBlock("builtin_eh_return.cont"));
1405
1406     return RValue::get(nullptr);
1407   }
1408   case Builtin::BI__builtin_unwind_init: {
1409     Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
1410     return RValue::get(Builder.CreateCall(F));
1411   }
1412   case Builtin::BI__builtin_extend_pointer: {
1413     // Extends a pointer to the size of an _Unwind_Word, which is
1414     // uint64_t on all platforms.  Generally this gets poked into a
1415     // register and eventually used as an address, so if the
1416     // addressing registers are wider than pointers and the platform
1417     // doesn't implicitly ignore high-order bits when doing
1418     // addressing, we need to make sure we zext / sext based on
1419     // the platform's expectations.
1420     //
1421     // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
1422
1423     // Cast the pointer to intptr_t.
1424     Value *Ptr = EmitScalarExpr(E->getArg(0));
1425     Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
1426
1427     // If that's 64 bits, we're done.
1428     if (IntPtrTy->getBitWidth() == 64)
1429       return RValue::get(Result);
1430
1431     // Otherwise, ask the codegen data what to do.
1432     if (getTargetHooks().extendPointerWithSExt())
1433       return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
1434     else
1435       return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
1436   }
1437   case Builtin::BI__builtin_setjmp: {
1438     // Buffer is a void**.
1439     Address Buf = EmitPointerWithAlignment(E->getArg(0));
1440
1441     // Store the frame pointer to the setjmp buffer.
1442     Value *FrameAddr =
1443       Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
1444                          ConstantInt::get(Int32Ty, 0));
1445     Builder.CreateStore(FrameAddr, Buf);
1446
1447     // Store the stack pointer to the setjmp buffer.
1448     Value *StackAddr =
1449         Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
1450     Address StackSaveSlot =
1451       Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize());
1452     Builder.CreateStore(StackAddr, StackSaveSlot);
1453
1454     // Call LLVM's EH setjmp, which is lightweight.
1455     Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
1456     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1457     return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
1458   }
1459   case Builtin::BI__builtin_longjmp: {
1460     Value *Buf = EmitScalarExpr(E->getArg(0));
1461     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1462
1463     // Call LLVM's EH longjmp, which is lightweight.
1464     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
1465
1466     // longjmp doesn't return; mark this as unreachable.
1467     Builder.CreateUnreachable();
1468
1469     // We do need to preserve an insertion point.
1470     EmitBlock(createBasicBlock("longjmp.cont"));
1471
1472     return RValue::get(nullptr);
1473   }
1474   case Builtin::BI__sync_fetch_and_add:
1475   case Builtin::BI__sync_fetch_and_sub:
1476   case Builtin::BI__sync_fetch_and_or:
1477   case Builtin::BI__sync_fetch_and_and:
1478   case Builtin::BI__sync_fetch_and_xor:
1479   case Builtin::BI__sync_fetch_and_nand:
1480   case Builtin::BI__sync_add_and_fetch:
1481   case Builtin::BI__sync_sub_and_fetch:
1482   case Builtin::BI__sync_and_and_fetch:
1483   case Builtin::BI__sync_or_and_fetch:
1484   case Builtin::BI__sync_xor_and_fetch:
1485   case Builtin::BI__sync_nand_and_fetch:
1486   case Builtin::BI__sync_val_compare_and_swap:
1487   case Builtin::BI__sync_bool_compare_and_swap:
1488   case Builtin::BI__sync_lock_test_and_set:
1489   case Builtin::BI__sync_lock_release:
1490   case Builtin::BI__sync_swap:
1491     llvm_unreachable("Shouldn't make it through sema");
1492   case Builtin::BI__sync_fetch_and_add_1:
1493   case Builtin::BI__sync_fetch_and_add_2:
1494   case Builtin::BI__sync_fetch_and_add_4:
1495   case Builtin::BI__sync_fetch_and_add_8:
1496   case Builtin::BI__sync_fetch_and_add_16:
1497     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
1498   case Builtin::BI__sync_fetch_and_sub_1:
1499   case Builtin::BI__sync_fetch_and_sub_2:
1500   case Builtin::BI__sync_fetch_and_sub_4:
1501   case Builtin::BI__sync_fetch_and_sub_8:
1502   case Builtin::BI__sync_fetch_and_sub_16:
1503     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
1504   case Builtin::BI__sync_fetch_and_or_1:
1505   case Builtin::BI__sync_fetch_and_or_2:
1506   case Builtin::BI__sync_fetch_and_or_4:
1507   case Builtin::BI__sync_fetch_and_or_8:
1508   case Builtin::BI__sync_fetch_and_or_16:
1509     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
1510   case Builtin::BI__sync_fetch_and_and_1:
1511   case Builtin::BI__sync_fetch_and_and_2:
1512   case Builtin::BI__sync_fetch_and_and_4:
1513   case Builtin::BI__sync_fetch_and_and_8:
1514   case Builtin::BI__sync_fetch_and_and_16:
1515     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
1516   case Builtin::BI__sync_fetch_and_xor_1:
1517   case Builtin::BI__sync_fetch_and_xor_2:
1518   case Builtin::BI__sync_fetch_and_xor_4:
1519   case Builtin::BI__sync_fetch_and_xor_8:
1520   case Builtin::BI__sync_fetch_and_xor_16:
1521     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
1522   case Builtin::BI__sync_fetch_and_nand_1:
1523   case Builtin::BI__sync_fetch_and_nand_2:
1524   case Builtin::BI__sync_fetch_and_nand_4:
1525   case Builtin::BI__sync_fetch_and_nand_8:
1526   case Builtin::BI__sync_fetch_and_nand_16:
1527     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
1528
1529   // Clang extensions: not overloaded yet.
1530   case Builtin::BI__sync_fetch_and_min:
1531     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
1532   case Builtin::BI__sync_fetch_and_max:
1533     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
1534   case Builtin::BI__sync_fetch_and_umin:
1535     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
1536   case Builtin::BI__sync_fetch_and_umax:
1537     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
1538
1539   case Builtin::BI__sync_add_and_fetch_1:
1540   case Builtin::BI__sync_add_and_fetch_2:
1541   case Builtin::BI__sync_add_and_fetch_4:
1542   case Builtin::BI__sync_add_and_fetch_8:
1543   case Builtin::BI__sync_add_and_fetch_16:
1544     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
1545                                 llvm::Instruction::Add);
1546   case Builtin::BI__sync_sub_and_fetch_1:
1547   case Builtin::BI__sync_sub_and_fetch_2:
1548   case Builtin::BI__sync_sub_and_fetch_4:
1549   case Builtin::BI__sync_sub_and_fetch_8:
1550   case Builtin::BI__sync_sub_and_fetch_16:
1551     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
1552                                 llvm::Instruction::Sub);
1553   case Builtin::BI__sync_and_and_fetch_1:
1554   case Builtin::BI__sync_and_and_fetch_2:
1555   case Builtin::BI__sync_and_and_fetch_4:
1556   case Builtin::BI__sync_and_and_fetch_8:
1557   case Builtin::BI__sync_and_and_fetch_16:
1558     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
1559                                 llvm::Instruction::And);
1560   case Builtin::BI__sync_or_and_fetch_1:
1561   case Builtin::BI__sync_or_and_fetch_2:
1562   case Builtin::BI__sync_or_and_fetch_4:
1563   case Builtin::BI__sync_or_and_fetch_8:
1564   case Builtin::BI__sync_or_and_fetch_16:
1565     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
1566                                 llvm::Instruction::Or);
1567   case Builtin::BI__sync_xor_and_fetch_1:
1568   case Builtin::BI__sync_xor_and_fetch_2:
1569   case Builtin::BI__sync_xor_and_fetch_4:
1570   case Builtin::BI__sync_xor_and_fetch_8:
1571   case Builtin::BI__sync_xor_and_fetch_16:
1572     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
1573                                 llvm::Instruction::Xor);
1574   case Builtin::BI__sync_nand_and_fetch_1:
1575   case Builtin::BI__sync_nand_and_fetch_2:
1576   case Builtin::BI__sync_nand_and_fetch_4:
1577   case Builtin::BI__sync_nand_and_fetch_8:
1578   case Builtin::BI__sync_nand_and_fetch_16:
1579     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
1580                                 llvm::Instruction::And, true);
1581
1582   case Builtin::BI__sync_val_compare_and_swap_1:
1583   case Builtin::BI__sync_val_compare_and_swap_2:
1584   case Builtin::BI__sync_val_compare_and_swap_4:
1585   case Builtin::BI__sync_val_compare_and_swap_8:
1586   case Builtin::BI__sync_val_compare_and_swap_16:
1587     return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
1588
1589   case Builtin::BI__sync_bool_compare_and_swap_1:
1590   case Builtin::BI__sync_bool_compare_and_swap_2:
1591   case Builtin::BI__sync_bool_compare_and_swap_4:
1592   case Builtin::BI__sync_bool_compare_and_swap_8:
1593   case Builtin::BI__sync_bool_compare_and_swap_16:
1594     return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
1595
1596   case Builtin::BI__sync_swap_1:
1597   case Builtin::BI__sync_swap_2:
1598   case Builtin::BI__sync_swap_4:
1599   case Builtin::BI__sync_swap_8:
1600   case Builtin::BI__sync_swap_16:
1601     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1602
1603   case Builtin::BI__sync_lock_test_and_set_1:
1604   case Builtin::BI__sync_lock_test_and_set_2:
1605   case Builtin::BI__sync_lock_test_and_set_4:
1606   case Builtin::BI__sync_lock_test_and_set_8:
1607   case Builtin::BI__sync_lock_test_and_set_16:
1608     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1609
1610   case Builtin::BI__sync_lock_release_1:
1611   case Builtin::BI__sync_lock_release_2:
1612   case Builtin::BI__sync_lock_release_4:
1613   case Builtin::BI__sync_lock_release_8:
1614   case Builtin::BI__sync_lock_release_16: {
1615     Value *Ptr = EmitScalarExpr(E->getArg(0));
1616     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
1617     CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
1618     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
1619                                              StoreSize.getQuantity() * 8);
1620     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
1621     llvm::StoreInst *Store =
1622       Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
1623                                  StoreSize);
1624     Store->setAtomic(llvm::AtomicOrdering::Release);
1625     return RValue::get(nullptr);
1626   }
1627
1628   case Builtin::BI__sync_synchronize: {
1629     // We assume this is supposed to correspond to a C++0x-style
1630     // sequentially-consistent fence (i.e. this is only usable for
1631     // synchonization, not device I/O or anything like that). This intrinsic
1632     // is really badly designed in the sense that in theory, there isn't
1633     // any way to safely use it... but in practice, it mostly works
1634     // to use it with non-atomic loads and stores to get acquire/release
1635     // semantics.
1636     Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
1637     return RValue::get(nullptr);
1638   }
1639
1640   case Builtin::BI__builtin_nontemporal_load:
1641     return RValue::get(EmitNontemporalLoad(*this, E));
1642   case Builtin::BI__builtin_nontemporal_store:
1643     return RValue::get(EmitNontemporalStore(*this, E));
1644   case Builtin::BI__c11_atomic_is_lock_free:
1645   case Builtin::BI__atomic_is_lock_free: {
1646     // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
1647     // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
1648     // _Atomic(T) is always properly-aligned.
1649     const char *LibCallName = "__atomic_is_lock_free";
1650     CallArgList Args;
1651     Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
1652              getContext().getSizeType());
1653     if (BuiltinID == Builtin::BI__atomic_is_lock_free)
1654       Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
1655                getContext().VoidPtrTy);
1656     else
1657       Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
1658                getContext().VoidPtrTy);
1659     const CGFunctionInfo &FuncInfo =
1660         CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
1661     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
1662     llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
1663     return EmitCall(FuncInfo, CGCallee::forDirect(Func),
1664                     ReturnValueSlot(), Args);
1665   }
1666
1667   case Builtin::BI__atomic_test_and_set: {
1668     // Look at the argument type to determine whether this is a volatile
1669     // operation. The parameter type is always volatile.
1670     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1671     bool Volatile =
1672         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1673
1674     Value *Ptr = EmitScalarExpr(E->getArg(0));
1675     unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
1676     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1677     Value *NewVal = Builder.getInt8(1);
1678     Value *Order = EmitScalarExpr(E->getArg(1));
1679     if (isa<llvm::ConstantInt>(Order)) {
1680       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1681       AtomicRMWInst *Result = nullptr;
1682       switch (ord) {
1683       case 0:  // memory_order_relaxed
1684       default: // invalid order
1685         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1686                                          llvm::AtomicOrdering::Monotonic);
1687         break;
1688       case 1: // memory_order_consume
1689       case 2: // memory_order_acquire
1690         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1691                                          llvm::AtomicOrdering::Acquire);
1692         break;
1693       case 3: // memory_order_release
1694         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1695                                          llvm::AtomicOrdering::Release);
1696         break;
1697       case 4: // memory_order_acq_rel
1698
1699         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1700                                          llvm::AtomicOrdering::AcquireRelease);
1701         break;
1702       case 5: // memory_order_seq_cst
1703         Result = Builder.CreateAtomicRMW(
1704             llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1705             llvm::AtomicOrdering::SequentiallyConsistent);
1706         break;
1707       }
1708       Result->setVolatile(Volatile);
1709       return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1710     }
1711
1712     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1713
1714     llvm::BasicBlock *BBs[5] = {
1715       createBasicBlock("monotonic", CurFn),
1716       createBasicBlock("acquire", CurFn),
1717       createBasicBlock("release", CurFn),
1718       createBasicBlock("acqrel", CurFn),
1719       createBasicBlock("seqcst", CurFn)
1720     };
1721     llvm::AtomicOrdering Orders[5] = {
1722         llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
1723         llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
1724         llvm::AtomicOrdering::SequentiallyConsistent};
1725
1726     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1727     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1728
1729     Builder.SetInsertPoint(ContBB);
1730     PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
1731
1732     for (unsigned i = 0; i < 5; ++i) {
1733       Builder.SetInsertPoint(BBs[i]);
1734       AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1735                                                    Ptr, NewVal, Orders[i]);
1736       RMW->setVolatile(Volatile);
1737       Result->addIncoming(RMW, BBs[i]);
1738       Builder.CreateBr(ContBB);
1739     }
1740
1741     SI->addCase(Builder.getInt32(0), BBs[0]);
1742     SI->addCase(Builder.getInt32(1), BBs[1]);
1743     SI->addCase(Builder.getInt32(2), BBs[1]);
1744     SI->addCase(Builder.getInt32(3), BBs[2]);
1745     SI->addCase(Builder.getInt32(4), BBs[3]);
1746     SI->addCase(Builder.getInt32(5), BBs[4]);
1747
1748     Builder.SetInsertPoint(ContBB);
1749     return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1750   }
1751
1752   case Builtin::BI__atomic_clear: {
1753     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1754     bool Volatile =
1755         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1756
1757     Address Ptr = EmitPointerWithAlignment(E->getArg(0));
1758     unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace();
1759     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1760     Value *NewVal = Builder.getInt8(0);
1761     Value *Order = EmitScalarExpr(E->getArg(1));
1762     if (isa<llvm::ConstantInt>(Order)) {
1763       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1764       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1765       switch (ord) {
1766       case 0:  // memory_order_relaxed
1767       default: // invalid order
1768         Store->setOrdering(llvm::AtomicOrdering::Monotonic);
1769         break;
1770       case 3:  // memory_order_release
1771         Store->setOrdering(llvm::AtomicOrdering::Release);
1772         break;
1773       case 5:  // memory_order_seq_cst
1774         Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
1775         break;
1776       }
1777       return RValue::get(nullptr);
1778     }
1779
1780     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1781
1782     llvm::BasicBlock *BBs[3] = {
1783       createBasicBlock("monotonic", CurFn),
1784       createBasicBlock("release", CurFn),
1785       createBasicBlock("seqcst", CurFn)
1786     };
1787     llvm::AtomicOrdering Orders[3] = {
1788         llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
1789         llvm::AtomicOrdering::SequentiallyConsistent};
1790
1791     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1792     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1793
1794     for (unsigned i = 0; i < 3; ++i) {
1795       Builder.SetInsertPoint(BBs[i]);
1796       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1797       Store->setOrdering(Orders[i]);
1798       Builder.CreateBr(ContBB);
1799     }
1800
1801     SI->addCase(Builder.getInt32(0), BBs[0]);
1802     SI->addCase(Builder.getInt32(3), BBs[1]);
1803     SI->addCase(Builder.getInt32(5), BBs[2]);
1804
1805     Builder.SetInsertPoint(ContBB);
1806     return RValue::get(nullptr);
1807   }
1808
1809   case Builtin::BI__atomic_thread_fence:
1810   case Builtin::BI__atomic_signal_fence:
1811   case Builtin::BI__c11_atomic_thread_fence:
1812   case Builtin::BI__c11_atomic_signal_fence: {
1813     llvm::SyncScope::ID SSID;
1814     if (BuiltinID == Builtin::BI__atomic_signal_fence ||
1815         BuiltinID == Builtin::BI__c11_atomic_signal_fence)
1816       SSID = llvm::SyncScope::SingleThread;
1817     else
1818       SSID = llvm::SyncScope::System;
1819     Value *Order = EmitScalarExpr(E->getArg(0));
1820     if (isa<llvm::ConstantInt>(Order)) {
1821       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1822       switch (ord) {
1823       case 0:  // memory_order_relaxed
1824       default: // invalid order
1825         break;
1826       case 1:  // memory_order_consume
1827       case 2:  // memory_order_acquire
1828         Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
1829         break;
1830       case 3:  // memory_order_release
1831         Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
1832         break;
1833       case 4:  // memory_order_acq_rel
1834         Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
1835         break;
1836       case 5:  // memory_order_seq_cst
1837         Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
1838         break;
1839       }
1840       return RValue::get(nullptr);
1841     }
1842
1843     llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
1844     AcquireBB = createBasicBlock("acquire", CurFn);
1845     ReleaseBB = createBasicBlock("release", CurFn);
1846     AcqRelBB = createBasicBlock("acqrel", CurFn);
1847     SeqCstBB = createBasicBlock("seqcst", CurFn);
1848     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1849
1850     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1851     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
1852
1853     Builder.SetInsertPoint(AcquireBB);
1854     Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
1855     Builder.CreateBr(ContBB);
1856     SI->addCase(Builder.getInt32(1), AcquireBB);
1857     SI->addCase(Builder.getInt32(2), AcquireBB);
1858
1859     Builder.SetInsertPoint(ReleaseBB);
1860     Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
1861     Builder.CreateBr(ContBB);
1862     SI->addCase(Builder.getInt32(3), ReleaseBB);
1863
1864     Builder.SetInsertPoint(AcqRelBB);
1865     Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
1866     Builder.CreateBr(ContBB);
1867     SI->addCase(Builder.getInt32(4), AcqRelBB);
1868
1869     Builder.SetInsertPoint(SeqCstBB);
1870     Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
1871     Builder.CreateBr(ContBB);
1872     SI->addCase(Builder.getInt32(5), SeqCstBB);
1873
1874     Builder.SetInsertPoint(ContBB);
1875     return RValue::get(nullptr);
1876   }
1877
1878     // Library functions with special handling.
1879   case Builtin::BIsqrt:
1880   case Builtin::BIsqrtf:
1881   case Builtin::BIsqrtl: {
1882     // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only
1883     // in finite- or unsafe-math mode (the intrinsic has different semantics
1884     // for handling negative numbers compared to the library function, so
1885     // -fmath-errno=0 is not enough).
1886     if (!FD->hasAttr<ConstAttr>())
1887       break;
1888     if (!(CGM.getCodeGenOpts().UnsafeFPMath ||
1889           CGM.getCodeGenOpts().NoNaNsFPMath))
1890       break;
1891     Value *Arg0 = EmitScalarExpr(E->getArg(0));
1892     llvm::Type *ArgType = Arg0->getType();
1893     Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType);
1894     return RValue::get(Builder.CreateCall(F, Arg0));
1895   }
1896
1897   case Builtin::BI__builtin_pow:
1898   case Builtin::BI__builtin_powf:
1899   case Builtin::BI__builtin_powl:
1900   case Builtin::BIpow:
1901   case Builtin::BIpowf:
1902   case Builtin::BIpowl: {
1903     // Transform a call to pow* into a @llvm.pow.* intrinsic call.
1904     if (!FD->hasAttr<ConstAttr>())
1905       break;
1906     Value *Base = EmitScalarExpr(E->getArg(0));
1907     Value *Exponent = EmitScalarExpr(E->getArg(1));
1908     llvm::Type *ArgType = Base->getType();
1909     Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
1910     return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
1911   }
1912
1913   case Builtin::BIfma:
1914   case Builtin::BIfmaf:
1915   case Builtin::BIfmal:
1916   case Builtin::BI__builtin_fma:
1917   case Builtin::BI__builtin_fmaf:
1918   case Builtin::BI__builtin_fmal: {
1919     // Rewrite fma to intrinsic.
1920     Value *FirstArg = EmitScalarExpr(E->getArg(0));
1921     llvm::Type *ArgType = FirstArg->getType();
1922     Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType);
1923     return RValue::get(
1924         Builder.CreateCall(F, {FirstArg, EmitScalarExpr(E->getArg(1)),
1925                                EmitScalarExpr(E->getArg(2))}));
1926   }
1927
1928   case Builtin::BI__builtin_signbit:
1929   case Builtin::BI__builtin_signbitf:
1930   case Builtin::BI__builtin_signbitl: {
1931     return RValue::get(
1932         Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
1933                            ConvertType(E->getType())));
1934   }
1935   case Builtin::BI__builtin_annotation: {
1936     llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
1937     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
1938                                       AnnVal->getType());
1939
1940     // Get the annotation string, go through casts. Sema requires this to be a
1941     // non-wide string literal, potentially casted, so the cast<> is safe.
1942     const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
1943     StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
1944     return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
1945   }
1946   case Builtin::BI__builtin_addcb:
1947   case Builtin::BI__builtin_addcs:
1948   case Builtin::BI__builtin_addc:
1949   case Builtin::BI__builtin_addcl:
1950   case Builtin::BI__builtin_addcll:
1951   case Builtin::BI__builtin_subcb:
1952   case Builtin::BI__builtin_subcs:
1953   case Builtin::BI__builtin_subc:
1954   case Builtin::BI__builtin_subcl:
1955   case Builtin::BI__builtin_subcll: {
1956
1957     // We translate all of these builtins from expressions of the form:
1958     //   int x = ..., y = ..., carryin = ..., carryout, result;
1959     //   result = __builtin_addc(x, y, carryin, &carryout);
1960     //
1961     // to LLVM IR of the form:
1962     //
1963     //   %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
1964     //   %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
1965     //   %carry1 = extractvalue {i32, i1} %tmp1, 1
1966     //   %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
1967     //                                                       i32 %carryin)
1968     //   %result = extractvalue {i32, i1} %tmp2, 0
1969     //   %carry2 = extractvalue {i32, i1} %tmp2, 1
1970     //   %tmp3 = or i1 %carry1, %carry2
1971     //   %tmp4 = zext i1 %tmp3 to i32
1972     //   store i32 %tmp4, i32* %carryout
1973
1974     // Scalarize our inputs.
1975     llvm::Value *X = EmitScalarExpr(E->getArg(0));
1976     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
1977     llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
1978     Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
1979
1980     // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
1981     llvm::Intrinsic::ID IntrinsicId;
1982     switch (BuiltinID) {
1983     default: llvm_unreachable("Unknown multiprecision builtin id.");
1984     case Builtin::BI__builtin_addcb:
1985     case Builtin::BI__builtin_addcs:
1986     case Builtin::BI__builtin_addc:
1987     case Builtin::BI__builtin_addcl:
1988     case Builtin::BI__builtin_addcll:
1989       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
1990       break;
1991     case Builtin::BI__builtin_subcb:
1992     case Builtin::BI__builtin_subcs:
1993     case Builtin::BI__builtin_subc:
1994     case Builtin::BI__builtin_subcl:
1995     case Builtin::BI__builtin_subcll:
1996       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
1997       break;
1998     }
1999
2000     // Construct our resulting LLVM IR expression.
2001     llvm::Value *Carry1;
2002     llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
2003                                               X, Y, Carry1);
2004     llvm::Value *Carry2;
2005     llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
2006                                               Sum1, Carryin, Carry2);
2007     llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
2008                                                X->getType());
2009     Builder.CreateStore(CarryOut, CarryOutPtr);
2010     return RValue::get(Sum2);
2011   }
2012
2013   case Builtin::BI__builtin_add_overflow:
2014   case Builtin::BI__builtin_sub_overflow:
2015   case Builtin::BI__builtin_mul_overflow: {
2016     const clang::Expr *LeftArg = E->getArg(0);
2017     const clang::Expr *RightArg = E->getArg(1);
2018     const clang::Expr *ResultArg = E->getArg(2);
2019
2020     clang::QualType ResultQTy =
2021         ResultArg->getType()->castAs<PointerType>()->getPointeeType();
2022
2023     WidthAndSignedness LeftInfo =
2024         getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
2025     WidthAndSignedness RightInfo =
2026         getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
2027     WidthAndSignedness ResultInfo =
2028         getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
2029     WidthAndSignedness EncompassingInfo =
2030         EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
2031
2032     llvm::Type *EncompassingLLVMTy =
2033         llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
2034
2035     llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
2036
2037     llvm::Intrinsic::ID IntrinsicId;
2038     switch (BuiltinID) {
2039     default:
2040       llvm_unreachable("Unknown overflow builtin id.");
2041     case Builtin::BI__builtin_add_overflow:
2042       IntrinsicId = EncompassingInfo.Signed
2043                         ? llvm::Intrinsic::sadd_with_overflow
2044                         : llvm::Intrinsic::uadd_with_overflow;
2045       break;
2046     case Builtin::BI__builtin_sub_overflow:
2047       IntrinsicId = EncompassingInfo.Signed
2048                         ? llvm::Intrinsic::ssub_with_overflow
2049                         : llvm::Intrinsic::usub_with_overflow;
2050       break;
2051     case Builtin::BI__builtin_mul_overflow:
2052       IntrinsicId = EncompassingInfo.Signed
2053                         ? llvm::Intrinsic::smul_with_overflow
2054                         : llvm::Intrinsic::umul_with_overflow;
2055       break;
2056     }
2057
2058     llvm::Value *Left = EmitScalarExpr(LeftArg);
2059     llvm::Value *Right = EmitScalarExpr(RightArg);
2060     Address ResultPtr = EmitPointerWithAlignment(ResultArg);
2061
2062     // Extend each operand to the encompassing type.
2063     Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
2064     Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
2065
2066     // Perform the operation on the extended values.
2067     llvm::Value *Overflow, *Result;
2068     Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
2069
2070     if (EncompassingInfo.Width > ResultInfo.Width) {
2071       // The encompassing type is wider than the result type, so we need to
2072       // truncate it.
2073       llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
2074
2075       // To see if the truncation caused an overflow, we will extend
2076       // the result and then compare it to the original result.
2077       llvm::Value *ResultTruncExt = Builder.CreateIntCast(
2078           ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
2079       llvm::Value *TruncationOverflow =
2080           Builder.CreateICmpNE(Result, ResultTruncExt);
2081
2082       Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
2083       Result = ResultTrunc;
2084     }
2085
2086     // Finally, store the result using the pointer.
2087     bool isVolatile =
2088       ResultArg->getType()->getPointeeType().isVolatileQualified();
2089     Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
2090
2091     return RValue::get(Overflow);
2092   }
2093
2094   case Builtin::BI__builtin_uadd_overflow:
2095   case Builtin::BI__builtin_uaddl_overflow:
2096   case Builtin::BI__builtin_uaddll_overflow:
2097   case Builtin::BI__builtin_usub_overflow:
2098   case Builtin::BI__builtin_usubl_overflow:
2099   case Builtin::BI__builtin_usubll_overflow:
2100   case Builtin::BI__builtin_umul_overflow:
2101   case Builtin::BI__builtin_umull_overflow:
2102   case Builtin::BI__builtin_umulll_overflow:
2103   case Builtin::BI__builtin_sadd_overflow:
2104   case Builtin::BI__builtin_saddl_overflow:
2105   case Builtin::BI__builtin_saddll_overflow:
2106   case Builtin::BI__builtin_ssub_overflow:
2107   case Builtin::BI__builtin_ssubl_overflow:
2108   case Builtin::BI__builtin_ssubll_overflow:
2109   case Builtin::BI__builtin_smul_overflow:
2110   case Builtin::BI__builtin_smull_overflow:
2111   case Builtin::BI__builtin_smulll_overflow: {
2112
2113     // We translate all of these builtins directly to the relevant llvm IR node.
2114
2115     // Scalarize our inputs.
2116     llvm::Value *X = EmitScalarExpr(E->getArg(0));
2117     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
2118     Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
2119
2120     // Decide which of the overflow intrinsics we are lowering to:
2121     llvm::Intrinsic::ID IntrinsicId;
2122     switch (BuiltinID) {
2123     default: llvm_unreachable("Unknown overflow builtin id.");
2124     case Builtin::BI__builtin_uadd_overflow:
2125     case Builtin::BI__builtin_uaddl_overflow:
2126     case Builtin::BI__builtin_uaddll_overflow:
2127       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
2128       break;
2129     case Builtin::BI__builtin_usub_overflow:
2130     case Builtin::BI__builtin_usubl_overflow:
2131     case Builtin::BI__builtin_usubll_overflow:
2132       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
2133       break;
2134     case Builtin::BI__builtin_umul_overflow:
2135     case Builtin::BI__builtin_umull_overflow:
2136     case Builtin::BI__builtin_umulll_overflow:
2137       IntrinsicId = llvm::Intrinsic::umul_with_overflow;
2138       break;
2139     case Builtin::BI__builtin_sadd_overflow:
2140     case Builtin::BI__builtin_saddl_overflow:
2141     case Builtin::BI__builtin_saddll_overflow:
2142       IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
2143       break;
2144     case Builtin::BI__builtin_ssub_overflow:
2145     case Builtin::BI__builtin_ssubl_overflow:
2146     case Builtin::BI__builtin_ssubll_overflow:
2147       IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
2148       break;
2149     case Builtin::BI__builtin_smul_overflow:
2150     case Builtin::BI__builtin_smull_overflow:
2151     case Builtin::BI__builtin_smulll_overflow:
2152       IntrinsicId = llvm::Intrinsic::smul_with_overflow;
2153       break;
2154     }
2155
2156
2157     llvm::Value *Carry;
2158     llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
2159     Builder.CreateStore(Sum, SumOutPtr);
2160
2161     return RValue::get(Carry);
2162   }
2163   case Builtin::BI__builtin_addressof:
2164     return RValue::get(EmitLValue(E->getArg(0)).getPointer());
2165   case Builtin::BI__builtin_operator_new:
2166     return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
2167                                     E->getArg(0), false);
2168   case Builtin::BI__builtin_operator_delete:
2169     return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
2170                                     E->getArg(0), true);
2171   case Builtin::BI__noop:
2172     // __noop always evaluates to an integer literal zero.
2173     return RValue::get(ConstantInt::get(IntTy, 0));
2174   case Builtin::BI__builtin_call_with_static_chain: {
2175     const CallExpr *Call = cast<CallExpr>(E->getArg(0));
2176     const Expr *Chain = E->getArg(1);
2177     return EmitCall(Call->getCallee()->getType(),
2178                     EmitCallee(Call->getCallee()), Call, ReturnValue,
2179                     EmitScalarExpr(Chain));
2180   }
2181   case Builtin::BI_InterlockedExchange8:
2182   case Builtin::BI_InterlockedExchange16:
2183   case Builtin::BI_InterlockedExchange:
2184   case Builtin::BI_InterlockedExchangePointer:
2185     return RValue::get(
2186         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
2187   case Builtin::BI_InterlockedCompareExchangePointer: {
2188     llvm::Type *RTy;
2189     llvm::IntegerType *IntType =
2190       IntegerType::get(getLLVMContext(),
2191                        getContext().getTypeSize(E->getType()));
2192     llvm::Type *IntPtrType = IntType->getPointerTo();
2193
2194     llvm::Value *Destination =
2195       Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType);
2196
2197     llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
2198     RTy = Exchange->getType();
2199     Exchange = Builder.CreatePtrToInt(Exchange, IntType);
2200
2201     llvm::Value *Comparand =
2202       Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
2203
2204     auto Result =
2205         Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
2206                                     AtomicOrdering::SequentiallyConsistent,
2207                                     AtomicOrdering::SequentiallyConsistent);
2208     Result->setVolatile(true);
2209
2210     return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
2211                                                                          0),
2212                                               RTy));
2213   }
2214   case Builtin::BI_InterlockedCompareExchange8:
2215   case Builtin::BI_InterlockedCompareExchange16:
2216   case Builtin::BI_InterlockedCompareExchange:
2217   case Builtin::BI_InterlockedCompareExchange64: {
2218     AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg(
2219         EmitScalarExpr(E->getArg(0)),
2220         EmitScalarExpr(E->getArg(2)),
2221         EmitScalarExpr(E->getArg(1)),
2222         AtomicOrdering::SequentiallyConsistent,
2223         AtomicOrdering::SequentiallyConsistent);
2224       CXI->setVolatile(true);
2225       return RValue::get(Builder.CreateExtractValue(CXI, 0));
2226   }
2227   case Builtin::BI_InterlockedIncrement16:
2228   case Builtin::BI_InterlockedIncrement:
2229     return RValue::get(
2230         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
2231   case Builtin::BI_InterlockedDecrement16:
2232   case Builtin::BI_InterlockedDecrement:
2233     return RValue::get(
2234         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
2235   case Builtin::BI_InterlockedAnd8:
2236   case Builtin::BI_InterlockedAnd16:
2237   case Builtin::BI_InterlockedAnd:
2238     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
2239   case Builtin::BI_InterlockedExchangeAdd8:
2240   case Builtin::BI_InterlockedExchangeAdd16:
2241   case Builtin::BI_InterlockedExchangeAdd:
2242     return RValue::get(
2243         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
2244   case Builtin::BI_InterlockedExchangeSub8:
2245   case Builtin::BI_InterlockedExchangeSub16:
2246   case Builtin::BI_InterlockedExchangeSub:
2247     return RValue::get(
2248         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
2249   case Builtin::BI_InterlockedOr8:
2250   case Builtin::BI_InterlockedOr16:
2251   case Builtin::BI_InterlockedOr:
2252     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
2253   case Builtin::BI_InterlockedXor8:
2254   case Builtin::BI_InterlockedXor16:
2255   case Builtin::BI_InterlockedXor:
2256     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
2257   case Builtin::BI_interlockedbittestandset:
2258     return RValue::get(
2259         EmitMSVCBuiltinExpr(MSVCIntrin::_interlockedbittestandset, E));
2260
2261   case Builtin::BI__exception_code:
2262   case Builtin::BI_exception_code:
2263     return RValue::get(EmitSEHExceptionCode());
2264   case Builtin::BI__exception_info:
2265   case Builtin::BI_exception_info:
2266     return RValue::get(EmitSEHExceptionInfo());
2267   case Builtin::BI__abnormal_termination:
2268   case Builtin::BI_abnormal_termination:
2269     return RValue::get(EmitSEHAbnormalTermination());
2270   case Builtin::BI_setjmpex: {
2271     if (getTarget().getTriple().isOSMSVCRT()) {
2272       llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2273       llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
2274           getLLVMContext(), llvm::AttributeList::FunctionIndex,
2275           llvm::Attribute::ReturnsTwice);
2276       llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction(
2277           llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2278           "_setjmpex", ReturnsTwiceAttr, /*Local=*/true);
2279       llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2280           EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2281       llvm::Value *FrameAddr =
2282           Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2283                              ConstantInt::get(Int32Ty, 0));
2284       llvm::Value *Args[] = {Buf, FrameAddr};
2285       llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args);
2286       CS.setAttributes(ReturnsTwiceAttr);
2287       return RValue::get(CS.getInstruction());
2288     }
2289     break;
2290   }
2291   case Builtin::BI_setjmp: {
2292     if (getTarget().getTriple().isOSMSVCRT()) {
2293       llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
2294           getLLVMContext(), llvm::AttributeList::FunctionIndex,
2295           llvm::Attribute::ReturnsTwice);
2296       llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2297           EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2298       llvm::CallSite CS;
2299       if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
2300         llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy};
2301         llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction(
2302             llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true),
2303             "_setjmp3", ReturnsTwiceAttr, /*Local=*/true);
2304         llvm::Value *Count = ConstantInt::get(IntTy, 0);
2305         llvm::Value *Args[] = {Buf, Count};
2306         CS = EmitRuntimeCallOrInvoke(SetJmp3, Args);
2307       } else {
2308         llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2309         llvm::Constant *SetJmp = CGM.CreateRuntimeFunction(
2310             llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2311             "_setjmp", ReturnsTwiceAttr, /*Local=*/true);
2312         llvm::Value *FrameAddr =
2313             Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2314                                ConstantInt::get(Int32Ty, 0));
2315         llvm::Value *Args[] = {Buf, FrameAddr};
2316         CS = EmitRuntimeCallOrInvoke(SetJmp, Args);
2317       }
2318       CS.setAttributes(ReturnsTwiceAttr);
2319       return RValue::get(CS.getInstruction());
2320     }
2321     break;
2322   }
2323
2324   case Builtin::BI__GetExceptionInfo: {
2325     if (llvm::GlobalVariable *GV =
2326             CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
2327       return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
2328     break;
2329   }
2330
2331   case Builtin::BI__fastfail:
2332     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
2333
2334   case Builtin::BI__builtin_coro_size: {
2335     auto & Context = getContext();
2336     auto SizeTy = Context.getSizeType();
2337     auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy));
2338     Value *F = CGM.getIntrinsic(Intrinsic::coro_size, T);
2339     return RValue::get(Builder.CreateCall(F));
2340   }
2341
2342   case Builtin::BI__builtin_coro_id:
2343     return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
2344   case Builtin::BI__builtin_coro_promise:
2345     return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
2346   case Builtin::BI__builtin_coro_resume:
2347     return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
2348   case Builtin::BI__builtin_coro_frame:
2349     return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
2350   case Builtin::BI__builtin_coro_free:
2351     return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
2352   case Builtin::BI__builtin_coro_destroy:
2353     return EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
2354   case Builtin::BI__builtin_coro_done:
2355     return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
2356   case Builtin::BI__builtin_coro_alloc:
2357     return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
2358   case Builtin::BI__builtin_coro_begin:
2359     return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
2360   case Builtin::BI__builtin_coro_end:
2361     return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
2362   case Builtin::BI__builtin_coro_suspend:
2363     return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
2364   case Builtin::BI__builtin_coro_param:
2365     return EmitCoroutineIntrinsic(E, Intrinsic::coro_param);
2366
2367   // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
2368   case Builtin::BIread_pipe:
2369   case Builtin::BIwrite_pipe: {
2370     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2371           *Arg1 = EmitScalarExpr(E->getArg(1));
2372     CGOpenCLRuntime OpenCLRT(CGM);
2373     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2374     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2375
2376     // Type of the generic packet parameter.
2377     unsigned GenericAS =
2378         getContext().getTargetAddressSpace(LangAS::opencl_generic);
2379     llvm::Type *I8PTy = llvm::PointerType::get(
2380         llvm::Type::getInt8Ty(getLLVMContext()), GenericAS);
2381
2382     // Testing which overloaded version we should generate the call for.
2383     if (2U == E->getNumArgs()) {
2384       const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
2385                                                              : "__write_pipe_2";
2386       // Creating a generic function type to be able to call with any builtin or
2387       // user defined type.
2388       llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
2389       llvm::FunctionType *FTy = llvm::FunctionType::get(
2390           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2391       Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
2392       return RValue::get(
2393           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2394                              {Arg0, BCast, PacketSize, PacketAlign}));
2395     } else {
2396       assert(4 == E->getNumArgs() &&
2397              "Illegal number of parameters to pipe function");
2398       const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
2399                                                              : "__write_pipe_4";
2400
2401       llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
2402                               Int32Ty, Int32Ty};
2403       Value *Arg2 = EmitScalarExpr(E->getArg(2)),
2404             *Arg3 = EmitScalarExpr(E->getArg(3));
2405       llvm::FunctionType *FTy = llvm::FunctionType::get(
2406           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2407       Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
2408       // We know the third argument is an integer type, but we may need to cast
2409       // it to i32.
2410       if (Arg2->getType() != Int32Ty)
2411         Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
2412       return RValue::get(Builder.CreateCall(
2413           CGM.CreateRuntimeFunction(FTy, Name),
2414           {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
2415     }
2416   }
2417   // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
2418   // functions
2419   case Builtin::BIreserve_read_pipe:
2420   case Builtin::BIreserve_write_pipe:
2421   case Builtin::BIwork_group_reserve_read_pipe:
2422   case Builtin::BIwork_group_reserve_write_pipe:
2423   case Builtin::BIsub_group_reserve_read_pipe:
2424   case Builtin::BIsub_group_reserve_write_pipe: {
2425     // Composing the mangled name for the function.
2426     const char *Name;
2427     if (BuiltinID == Builtin::BIreserve_read_pipe)
2428       Name = "__reserve_read_pipe";
2429     else if (BuiltinID == Builtin::BIreserve_write_pipe)
2430       Name = "__reserve_write_pipe";
2431     else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
2432       Name = "__work_group_reserve_read_pipe";
2433     else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
2434       Name = "__work_group_reserve_write_pipe";
2435     else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
2436       Name = "__sub_group_reserve_read_pipe";
2437     else
2438       Name = "__sub_group_reserve_write_pipe";
2439
2440     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2441           *Arg1 = EmitScalarExpr(E->getArg(1));
2442     llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
2443     CGOpenCLRuntime OpenCLRT(CGM);
2444     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2445     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2446
2447     // Building the generic function prototype.
2448     llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
2449     llvm::FunctionType *FTy = llvm::FunctionType::get(
2450         ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2451     // We know the second argument is an integer type, but we may need to cast
2452     // it to i32.
2453     if (Arg1->getType() != Int32Ty)
2454       Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
2455     return RValue::get(
2456         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2457                            {Arg0, Arg1, PacketSize, PacketAlign}));
2458   }
2459   // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
2460   // functions
2461   case Builtin::BIcommit_read_pipe:
2462   case Builtin::BIcommit_write_pipe:
2463   case Builtin::BIwork_group_commit_read_pipe:
2464   case Builtin::BIwork_group_commit_write_pipe:
2465   case Builtin::BIsub_group_commit_read_pipe:
2466   case Builtin::BIsub_group_commit_write_pipe: {
2467     const char *Name;
2468     if (BuiltinID == Builtin::BIcommit_read_pipe)
2469       Name = "__commit_read_pipe";
2470     else if (BuiltinID == Builtin::BIcommit_write_pipe)
2471       Name = "__commit_write_pipe";
2472     else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
2473       Name = "__work_group_commit_read_pipe";
2474     else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
2475       Name = "__work_group_commit_write_pipe";
2476     else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
2477       Name = "__sub_group_commit_read_pipe";
2478     else
2479       Name = "__sub_group_commit_write_pipe";
2480
2481     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2482           *Arg1 = EmitScalarExpr(E->getArg(1));
2483     CGOpenCLRuntime OpenCLRT(CGM);
2484     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2485     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2486
2487     // Building the generic function prototype.
2488     llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
2489     llvm::FunctionType *FTy =
2490         llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
2491                                 llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2492
2493     return RValue::get(
2494         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2495                            {Arg0, Arg1, PacketSize, PacketAlign}));
2496   }
2497   // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
2498   case Builtin::BIget_pipe_num_packets:
2499   case Builtin::BIget_pipe_max_packets: {
2500     const char *Name;
2501     if (BuiltinID == Builtin::BIget_pipe_num_packets)
2502       Name = "__get_pipe_num_packets";
2503     else
2504       Name = "__get_pipe_max_packets";
2505
2506     // Building the generic function prototype.
2507     Value *Arg0 = EmitScalarExpr(E->getArg(0));
2508     CGOpenCLRuntime OpenCLRT(CGM);
2509     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2510     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2511     llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
2512     llvm::FunctionType *FTy = llvm::FunctionType::get(
2513         Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2514
2515     return RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2516                                           {Arg0, PacketSize, PacketAlign}));
2517   }
2518
2519   // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
2520   case Builtin::BIto_global:
2521   case Builtin::BIto_local:
2522   case Builtin::BIto_private: {
2523     auto Arg0 = EmitScalarExpr(E->getArg(0));
2524     auto NewArgT = llvm::PointerType::get(Int8Ty,
2525       CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
2526     auto NewRetT = llvm::PointerType::get(Int8Ty,
2527       CGM.getContext().getTargetAddressSpace(
2528         E->getType()->getPointeeType().getAddressSpace()));
2529     auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
2530     llvm::Value *NewArg;
2531     if (Arg0->getType()->getPointerAddressSpace() !=
2532         NewArgT->getPointerAddressSpace())
2533       NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
2534     else
2535       NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
2536     auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
2537     auto NewCall =
2538         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
2539     return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
2540       ConvertType(E->getType())));
2541   }
2542
2543   // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
2544   // It contains four different overload formats specified in Table 6.13.17.1.
2545   case Builtin::BIenqueue_kernel: {
2546     StringRef Name; // Generated function call name
2547     unsigned NumArgs = E->getNumArgs();
2548
2549     llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
2550     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2551         getContext().getTargetAddressSpace(LangAS::opencl_generic));
2552
2553     llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
2554     llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
2555     LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
2556     llvm::Value *Range = NDRangeL.getAddress().getPointer();
2557     llvm::Type *RangeTy = NDRangeL.getAddress().getType();
2558
2559     if (NumArgs == 4) {
2560       // The most basic form of the call with parameters:
2561       // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
2562       Name = "__enqueue_kernel_basic";
2563       llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy};
2564       llvm::FunctionType *FTy = llvm::FunctionType::get(
2565           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false);
2566
2567       llvm::Value *Block = Builder.CreatePointerCast(
2568           EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy);
2569
2570       AttrBuilder B;
2571       B.addAttribute(Attribute::ByVal);
2572       llvm::AttributeList ByValAttrSet =
2573           llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
2574
2575       auto RTCall =
2576           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
2577                              {Queue, Flags, Range, Block});
2578       RTCall->setAttributes(ByValAttrSet);
2579       return RValue::get(RTCall);
2580     }
2581     assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
2582
2583     // Could have events and/or vaargs.
2584     if (E->getArg(3)->getType()->isBlockPointerType()) {
2585       // No events passed, but has variadic arguments.
2586       Name = "__enqueue_kernel_vaargs";
2587       llvm::Value *Block = Builder.CreatePointerCast(
2588           EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy);
2589       // Create a vector of the arguments, as well as a constant value to
2590       // express to the runtime the number of variadic arguments.
2591       std::vector<llvm::Value *> Args = {Queue, Flags, Range, Block,
2592                                          ConstantInt::get(IntTy, NumArgs - 4)};
2593       std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy,
2594                                           GenericVoidPtrTy, IntTy};
2595
2596       // Each of the following arguments specifies the size of the corresponding
2597       // argument passed to the enqueued block.
2598       for (unsigned I = 4/*Position of the first size arg*/; I < NumArgs; ++I)
2599         Args.push_back(
2600             Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy));
2601
2602       llvm::FunctionType *FTy = llvm::FunctionType::get(
2603           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
2604       return RValue::get(
2605           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2606                              llvm::ArrayRef<llvm::Value *>(Args)));
2607     }
2608     // Any calls now have event arguments passed.
2609     if (NumArgs >= 7) {
2610       llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy);
2611       llvm::Type *EventPtrTy = EventTy->getPointerTo(
2612           CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
2613
2614       llvm::Value *NumEvents =
2615           Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
2616       llvm::Value *EventList =
2617           E->getArg(4)->getType()->isArrayType()
2618               ? EmitArrayToPointerDecay(E->getArg(4)).getPointer()
2619               : EmitScalarExpr(E->getArg(4));
2620       llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5));
2621       // Convert to generic address space.
2622       EventList = Builder.CreatePointerCast(EventList, EventPtrTy);
2623       ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy);
2624       llvm::Value *Block = Builder.CreatePointerCast(
2625           EmitScalarExpr(E->getArg(6)), GenericVoidPtrTy);
2626
2627       std::vector<llvm::Type *> ArgTys = {
2628           QueueTy,    Int32Ty,    RangeTy,         Int32Ty,
2629           EventPtrTy, EventPtrTy, GenericVoidPtrTy};
2630
2631       std::vector<llvm::Value *> Args = {Queue,     Flags,    Range, NumEvents,
2632                                          EventList, ClkEvent, Block};
2633
2634       if (NumArgs == 7) {
2635         // Has events but no variadics.
2636         Name = "__enqueue_kernel_basic_events";
2637         llvm::FunctionType *FTy = llvm::FunctionType::get(
2638             Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2639         return RValue::get(
2640             Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2641                                llvm::ArrayRef<llvm::Value *>(Args)));
2642       }
2643       // Has event info and variadics
2644       // Pass the number of variadics to the runtime function too.
2645       Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
2646       ArgTys.push_back(Int32Ty);
2647       Name = "__enqueue_kernel_events_vaargs";
2648
2649       // Each of the following arguments specifies the size of the corresponding
2650       // argument passed to the enqueued block.
2651       for (unsigned I = 7/*Position of the first size arg*/; I < NumArgs; ++I)
2652         Args.push_back(
2653             Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy));
2654
2655       llvm::FunctionType *FTy = llvm::FunctionType::get(
2656           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
2657       return RValue::get(
2658           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2659                              llvm::ArrayRef<llvm::Value *>(Args)));
2660     }
2661     LLVM_FALLTHROUGH;
2662   }
2663   // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
2664   // parameter.
2665   case Builtin::BIget_kernel_work_group_size: {
2666     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2667         getContext().getTargetAddressSpace(LangAS::opencl_generic));
2668     Value *Arg = EmitScalarExpr(E->getArg(0));
2669     Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy);
2670     return RValue::get(Builder.CreateCall(
2671         CGM.CreateRuntimeFunction(
2672             llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false),
2673             "__get_kernel_work_group_size_impl"),
2674         Arg));
2675   }
2676   case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
2677     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2678         getContext().getTargetAddressSpace(LangAS::opencl_generic));
2679     Value *Arg = EmitScalarExpr(E->getArg(0));
2680     Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy);
2681     return RValue::get(Builder.CreateCall(
2682         CGM.CreateRuntimeFunction(
2683             llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false),
2684             "__get_kernel_preferred_work_group_multiple_impl"),
2685         Arg));
2686   }
2687   case Builtin::BIprintf:
2688     if (getTarget().getTriple().isNVPTX())
2689       return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue);
2690     break;
2691   case Builtin::BI__builtin_canonicalize:
2692   case Builtin::BI__builtin_canonicalizef:
2693   case Builtin::BI__builtin_canonicalizel:
2694     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
2695
2696   case Builtin::BI__builtin_thread_pointer: {
2697     if (!getContext().getTargetInfo().isTLSSupported())
2698       CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
2699     // Fall through - it's already mapped to the intrinsic by GCCBuiltin.
2700     break;
2701   }
2702   case Builtin::BI__builtin_os_log_format: {
2703     assert(E->getNumArgs() >= 2 &&
2704            "__builtin_os_log_format takes at least 2 arguments");
2705     analyze_os_log::OSLogBufferLayout Layout;
2706     analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout);
2707     Address BufAddr = EmitPointerWithAlignment(E->getArg(0));
2708     // Ignore argument 1, the format string. It is not currently used.
2709     CharUnits Offset;
2710     Builder.CreateStore(
2711         Builder.getInt8(Layout.getSummaryByte()),
2712         Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
2713     Builder.CreateStore(
2714         Builder.getInt8(Layout.getNumArgsByte()),
2715         Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
2716
2717     llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
2718     for (const auto &Item : Layout.Items) {
2719       Builder.CreateStore(
2720           Builder.getInt8(Item.getDescriptorByte()),
2721           Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
2722       Builder.CreateStore(
2723           Builder.getInt8(Item.getSizeByte()),
2724           Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
2725       Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset);
2726       if (const Expr *TheExpr = Item.getExpr()) {
2727         Addr = Builder.CreateElementBitCast(
2728             Addr, ConvertTypeForMem(TheExpr->getType()));
2729         // Check if this is a retainable type.
2730         if (TheExpr->getType()->isObjCRetainableType()) {
2731           assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
2732                  "Only scalar can be a ObjC retainable type");
2733           llvm::Value *SV = EmitScalarExpr(TheExpr, /*Ignore*/ false);
2734           RValue RV = RValue::get(SV);
2735           LValue LV = MakeAddrLValue(Addr, TheExpr->getType());
2736           EmitStoreThroughLValue(RV, LV);
2737           // Check if the object is constant, if not, save it in
2738           // RetainableOperands.
2739           if (!isa<Constant>(SV))
2740             RetainableOperands.push_back(SV);
2741         } else {
2742           EmitAnyExprToMem(TheExpr, Addr, Qualifiers(), /*isInit*/ true);
2743         }
2744       } else {
2745         Addr = Builder.CreateElementBitCast(Addr, Int32Ty);
2746         Builder.CreateStore(
2747             Builder.getInt32(Item.getConstValue().getQuantity()), Addr);
2748       }
2749       Offset += Item.size();
2750     }
2751
2752     // Push a clang.arc.use cleanup for each object in RetainableOperands. The
2753     // cleanup will cause the use to appear after the final log call, keeping
2754     // the object valid while it's held in the log buffer.  Note that if there's
2755     // a release cleanup on the object, it will already be active; since
2756     // cleanups are emitted in reverse order, the use will occur before the
2757     // object is released.
2758     if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount &&
2759         CGM.getCodeGenOpts().OptimizationLevel != 0)
2760       for (llvm::Value *object : RetainableOperands)
2761         pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), object);
2762
2763     return RValue::get(BufAddr.getPointer());
2764   }
2765
2766   case Builtin::BI__builtin_os_log_format_buffer_size: {
2767     analyze_os_log::OSLogBufferLayout Layout;
2768     analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout);
2769     return RValue::get(ConstantInt::get(ConvertType(E->getType()),
2770                                         Layout.size().getQuantity()));
2771   }
2772
2773   case Builtin::BI__xray_customevent: {
2774     if (!ShouldXRayInstrumentFunction())
2775       return RValue::getIgnored();
2776     if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>()) {
2777       if (XRayAttr->neverXRayInstrument())
2778         return RValue::getIgnored();
2779     }
2780     Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
2781     auto FTy = F->getFunctionType();
2782     auto Arg0 = E->getArg(0);
2783     auto Arg0Val = EmitScalarExpr(Arg0);
2784     auto Arg0Ty = Arg0->getType();
2785     auto PTy0 = FTy->getParamType(0);
2786     if (PTy0 != Arg0Val->getType()) {
2787       if (Arg0Ty->isArrayType())
2788         Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer();
2789       else
2790         Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
2791     }
2792     auto Arg1 = EmitScalarExpr(E->getArg(1));
2793     auto PTy1 = FTy->getParamType(1);
2794     if (PTy1 != Arg1->getType())
2795       Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
2796     return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
2797   }
2798
2799   case Builtin::BI__builtin_ms_va_start:
2800   case Builtin::BI__builtin_ms_va_end:
2801     return RValue::get(
2802         EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
2803                        BuiltinID == Builtin::BI__builtin_ms_va_start));
2804
2805   case Builtin::BI__builtin_ms_va_copy: {
2806     // Lower this manually. We can't reliably determine whether or not any
2807     // given va_copy() is for a Win64 va_list from the calling convention
2808     // alone, because it's legal to do this from a System V ABI function.
2809     // With opaque pointer types, we won't have enough information in LLVM
2810     // IR to determine this from the argument types, either. Best to do it
2811     // now, while we have enough information.
2812     Address DestAddr = EmitMSVAListRef(E->getArg(0));
2813     Address SrcAddr = EmitMSVAListRef(E->getArg(1));
2814
2815     llvm::Type *BPP = Int8PtrPtrTy;
2816
2817     DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
2818                        DestAddr.getAlignment());
2819     SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
2820                       SrcAddr.getAlignment());
2821
2822     Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
2823     return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
2824   }
2825   }
2826
2827   // If this is an alias for a lib function (e.g. __builtin_sin), emit
2828   // the call using the normal call path, but using the unmangled
2829   // version of the function name.
2830   if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
2831     return emitLibraryCall(*this, FD, E,
2832                            CGM.getBuiltinLibFunction(FD, BuiltinID));
2833
2834   // If this is a predefined lib function (e.g. malloc), emit the call
2835   // using exactly the normal call path.
2836   if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
2837     return emitLibraryCall(*this, FD, E,
2838                       cast<llvm::Constant>(EmitScalarExpr(E->getCallee())));
2839
2840   // Check that a call to a target specific builtin has the correct target
2841   // features.
2842   // This is down here to avoid non-target specific builtins, however, if
2843   // generic builtins start to require generic target features then we
2844   // can move this up to the beginning of the function.
2845   checkTargetFeatures(E, FD);
2846
2847   // See if we have a target specific intrinsic.
2848   const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
2849   Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
2850   StringRef Prefix =
2851       llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
2852   if (!Prefix.empty()) {
2853     IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name);
2854     // NOTE we dont need to perform a compatibility flag check here since the
2855     // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
2856     // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
2857     if (IntrinsicID == Intrinsic::not_intrinsic)
2858       IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
2859   }
2860
2861   if (IntrinsicID != Intrinsic::not_intrinsic) {
2862     SmallVector<Value*, 16> Args;
2863
2864     // Find out if any arguments are required to be integer constant
2865     // expressions.
2866     unsigned ICEArguments = 0;
2867     ASTContext::GetBuiltinTypeError Error;
2868     getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
2869     assert(Error == ASTContext::GE_None && "Should not codegen an error");
2870
2871     Function *F = CGM.getIntrinsic(IntrinsicID);
2872     llvm::FunctionType *FTy = F->getFunctionType();
2873
2874     for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
2875       Value *ArgValue;
2876       // If this is a normal argument, just emit it as a scalar.
2877       if ((ICEArguments & (1 << i)) == 0) {
2878         ArgValue = EmitScalarExpr(E->getArg(i));
2879       } else {
2880         // If this is required to be a constant, constant fold it so that we
2881         // know that the generated intrinsic gets a ConstantInt.
2882         llvm::APSInt Result;
2883         bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
2884         assert(IsConst && "Constant arg isn't actually constant?");
2885         (void)IsConst;
2886         ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
2887       }
2888
2889       // If the intrinsic arg type is different from the builtin arg type
2890       // we need to do a bit cast.
2891       llvm::Type *PTy = FTy->getParamType(i);
2892       if (PTy != ArgValue->getType()) {
2893         assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
2894                "Must be able to losslessly bit cast to param");
2895         ArgValue = Builder.CreateBitCast(ArgValue, PTy);
2896       }
2897
2898       Args.push_back(ArgValue);
2899     }
2900
2901     Value *V = Builder.CreateCall(F, Args);
2902     QualType BuiltinRetType = E->getType();
2903
2904     llvm::Type *RetTy = VoidTy;
2905     if (!BuiltinRetType->isVoidType())
2906       RetTy = ConvertType(BuiltinRetType);
2907
2908     if (RetTy != V->getType()) {
2909       assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
2910              "Must be able to losslessly bit cast result type");
2911       V = Builder.CreateBitCast(V, RetTy);
2912     }
2913
2914     return RValue::get(V);
2915   }
2916
2917   // See if we have a target specific builtin that needs to be lowered.
2918   if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
2919     return RValue::get(V);
2920
2921   ErrorUnsupported(E, "builtin function");
2922
2923   // Unknown builtin, for now just dump it out and return undef.
2924   return GetUndefRValue(E->getType());
2925 }
2926
2927 static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
2928                                         unsigned BuiltinID, const CallExpr *E,
2929                                         llvm::Triple::ArchType Arch) {
2930   switch (Arch) {
2931   case llvm::Triple::arm:
2932   case llvm::Triple::armeb:
2933   case llvm::Triple::thumb:
2934   case llvm::Triple::thumbeb:
2935     return CGF->EmitARMBuiltinExpr(BuiltinID, E);
2936   case llvm::Triple::aarch64:
2937   case llvm::Triple::aarch64_be:
2938     return CGF->EmitAArch64BuiltinExpr(BuiltinID, E);
2939   case llvm::Triple::x86:
2940   case llvm::Triple::x86_64:
2941     return CGF->EmitX86BuiltinExpr(BuiltinID, E);
2942   case llvm::Triple::ppc:
2943   case llvm::Triple::ppc64:
2944   case llvm::Triple::ppc64le:
2945     return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
2946   case llvm::Triple::r600:
2947   case llvm::Triple::amdgcn:
2948     return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
2949   case llvm::Triple::systemz:
2950     return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
2951   case llvm::Triple::nvptx:
2952   case llvm::Triple::nvptx64:
2953     return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
2954   case llvm::Triple::wasm32:
2955   case llvm::Triple::wasm64:
2956     return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
2957   default:
2958     return nullptr;
2959   }
2960 }
2961
2962 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
2963                                               const CallExpr *E) {
2964   if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
2965     assert(getContext().getAuxTargetInfo() && "Missing aux target info");
2966     return EmitTargetArchBuiltinExpr(
2967         this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
2968         getContext().getAuxTargetInfo()->getTriple().getArch());
2969   }
2970
2971   return EmitTargetArchBuiltinExpr(this, BuiltinID, E,
2972                                    getTarget().getTriple().getArch());
2973 }
2974
2975 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
2976                                      NeonTypeFlags TypeFlags,
2977                                      bool V1Ty=false) {
2978   int IsQuad = TypeFlags.isQuad();
2979   switch (TypeFlags.getEltType()) {
2980   case NeonTypeFlags::Int8:
2981   case NeonTypeFlags::Poly8:
2982     return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
2983   case NeonTypeFlags::Int16:
2984   case NeonTypeFlags::Poly16:
2985   case NeonTypeFlags::Float16:
2986     return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
2987   case NeonTypeFlags::Int32:
2988     return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
2989   case NeonTypeFlags::Int64:
2990   case NeonTypeFlags::Poly64:
2991     return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
2992   case NeonTypeFlags::Poly128:
2993     // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
2994     // There is a lot of i128 and f128 API missing.
2995     // so we use v16i8 to represent poly128 and get pattern matched.
2996     return llvm::VectorType::get(CGF->Int8Ty, 16);
2997   case NeonTypeFlags::Float32:
2998     return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
2999   case NeonTypeFlags::Float64:
3000     return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
3001   }
3002   llvm_unreachable("Unknown vector element type!");
3003 }
3004
3005 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
3006                                           NeonTypeFlags IntTypeFlags) {
3007   int IsQuad = IntTypeFlags.isQuad();
3008   switch (IntTypeFlags.getEltType()) {
3009   case NeonTypeFlags::Int32:
3010     return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
3011   case NeonTypeFlags::Int64:
3012     return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad));
3013   default:
3014     llvm_unreachable("Type can't be converted to floating-point!");
3015   }
3016 }
3017
3018 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
3019   unsigned nElts = V->getType()->getVectorNumElements();
3020   Value* SV = llvm::ConstantVector::getSplat(nElts, C);
3021   return Builder.CreateShuffleVector(V, V, SV, "lane");
3022 }
3023
3024 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
3025                                      const char *name,
3026                                      unsigned shift, bool rightshift) {
3027   unsigned j = 0;
3028   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
3029        ai != ae; ++ai, ++j)
3030     if (shift > 0 && shift == j)
3031       Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
3032     else
3033       Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
3034
3035   return Builder.CreateCall(F, Ops, name);
3036 }
3037
3038 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
3039                                             bool neg) {
3040   int SV = cast<ConstantInt>(V)->getSExtValue();
3041   return ConstantInt::get(Ty, neg ? -SV : SV);
3042 }
3043
3044 // \brief Right-shift a vector by a constant.
3045 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
3046                                           llvm::Type *Ty, bool usgn,
3047                                           const char *name) {
3048   llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
3049
3050   int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
3051   int EltSize = VTy->getScalarSizeInBits();
3052
3053   Vec = Builder.CreateBitCast(Vec, Ty);
3054
3055   // lshr/ashr are undefined when the shift amount is equal to the vector
3056   // element size.
3057   if (ShiftAmt == EltSize) {
3058     if (usgn) {
3059       // Right-shifting an unsigned value by its size yields 0.
3060       return llvm::ConstantAggregateZero::get(VTy);
3061     } else {
3062       // Right-shifting a signed value by its size is equivalent
3063       // to a shift of size-1.
3064       --ShiftAmt;
3065       Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
3066     }
3067   }
3068
3069   Shift = EmitNeonShiftVector(Shift, Ty, false);
3070   if (usgn)
3071     return Builder.CreateLShr(Vec, Shift, name);
3072   else
3073     return Builder.CreateAShr(Vec, Shift, name);
3074 }
3075
3076 enum {
3077   AddRetType = (1 << 0),
3078   Add1ArgType = (1 << 1),
3079   Add2ArgTypes = (1 << 2),
3080
3081   VectorizeRetType = (1 << 3),
3082   VectorizeArgTypes = (1 << 4),
3083
3084   InventFloatType = (1 << 5),
3085   UnsignedAlts = (1 << 6),
3086
3087   Use64BitVectors = (1 << 7),
3088   Use128BitVectors = (1 << 8),
3089
3090   Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
3091   VectorRet = AddRetType | VectorizeRetType,
3092   VectorRetGetArgs01 =
3093       AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
3094   FpCmpzModifiers =
3095       AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
3096 };
3097
3098 namespace {
3099 struct NeonIntrinsicInfo {
3100   const char *NameHint;
3101   unsigned BuiltinID;
3102   unsigned LLVMIntrinsic;
3103   unsigned AltLLVMIntrinsic;
3104   unsigned TypeModifier;
3105
3106   bool operator<(unsigned RHSBuiltinID) const {
3107     return BuiltinID < RHSBuiltinID;
3108   }
3109   bool operator<(const NeonIntrinsicInfo &TE) const {
3110     return BuiltinID < TE.BuiltinID;
3111   }
3112 };
3113 } // end anonymous namespace
3114
3115 #define NEONMAP0(NameBase) \
3116   { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
3117
3118 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
3119   { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
3120       Intrinsic::LLVMIntrinsic, 0, TypeModifier }
3121
3122 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
3123   { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
3124       Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
3125       TypeModifier }
3126
3127 static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
3128   NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
3129   NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
3130   NEONMAP1(vabs_v, arm_neon_vabs, 0),
3131   NEONMAP1(vabsq_v, arm_neon_vabs, 0),
3132   NEONMAP0(vaddhn_v),
3133   NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
3134   NEONMAP1(vaeseq_v, arm_neon_aese, 0),
3135   NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
3136   NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
3137   NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
3138   NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
3139   NEONMAP1(vcage_v, arm_neon_vacge, 0),
3140   NEONMAP1(vcageq_v, arm_neon_vacge, 0),
3141   NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
3142   NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
3143   NEONMAP1(vcale_v, arm_neon_vacge, 0),
3144   NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
3145   NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
3146   NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
3147   NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
3148   NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
3149   NEONMAP1(vclz_v, ctlz, Add1ArgType),
3150   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
3151   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
3152   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
3153   NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
3154   NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
3155   NEONMAP0(vcvt_f32_v),
3156   NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3157   NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
3158   NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
3159   NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
3160   NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
3161   NEONMAP0(vcvt_s32_v),
3162   NEONMAP0(vcvt_s64_v),
3163   NEONMAP0(vcvt_u32_v),
3164   NEONMAP0(vcvt_u64_v),
3165   NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
3166   NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
3167   NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
3168   NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
3169   NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
3170   NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
3171   NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
3172   NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
3173   NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
3174   NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
3175   NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
3176   NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
3177   NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
3178   NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
3179   NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
3180   NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
3181   NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
3182   NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
3183   NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
3184   NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
3185   NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
3186   NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
3187   NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
3188   NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
3189   NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
3190   NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
3191   NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
3192   NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
3193   NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
3194   NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
3195   NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
3196   NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
3197   NEONMAP0(vcvtq_f32_v),
3198   NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3199   NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
3200   NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
3201   NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
3202   NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
3203   NEONMAP0(vcvtq_s32_v),
3204   NEONMAP0(vcvtq_s64_v),
3205   NEONMAP0(vcvtq_u32_v),
3206   NEONMAP0(vcvtq_u64_v),
3207   NEONMAP0(vext_v),
3208   NEONMAP0(vextq_v),
3209   NEONMAP0(vfma_v),
3210   NEONMAP0(vfmaq_v),
3211   NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
3212   NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
3213   NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
3214   NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
3215   NEONMAP0(vld1_dup_v),
3216   NEONMAP1(vld1_v, arm_neon_vld1, 0),
3217   NEONMAP0(vld1q_dup_v),
3218   NEONMAP1(vld1q_v, arm_neon_vld1, 0),
3219   NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
3220   NEONMAP1(vld2_v, arm_neon_vld2, 0),
3221   NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
3222   NEONMAP1(vld2q_v, arm_neon_vld2, 0),
3223   NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
3224   NEONMAP1(vld3_v, arm_neon_vld3, 0),
3225   NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
3226   NEONMAP1(vld3q_v, arm_neon_vld3, 0),
3227   NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
3228   NEONMAP1(vld4_v, arm_neon_vld4, 0),
3229   NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
3230   NEONMAP1(vld4q_v, arm_neon_vld4, 0),
3231   NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
3232   NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
3233   NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
3234   NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
3235   NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
3236   NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
3237   NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
3238   NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
3239   NEONMAP0(vmovl_v),
3240   NEONMAP0(vmovn_v),
3241   NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
3242   NEONMAP0(vmull_v),
3243   NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
3244   NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
3245   NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
3246   NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
3247   NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
3248   NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
3249   NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
3250   NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
3251   NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
3252   NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
3253   NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
3254   NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
3255   NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
3256   NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0),
3257   NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0),
3258   NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
3259   NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
3260   NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
3261   NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
3262   NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
3263   NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
3264   NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
3265   NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
3266   NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
3267   NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
3268   NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
3269   NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
3270   NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
3271   NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
3272   NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
3273   NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
3274   NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
3275   NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
3276   NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
3277   NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
3278   NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
3279   NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
3280   NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
3281   NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
3282   NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
3283   NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
3284   NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
3285   NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
3286   NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
3287   NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
3288   NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
3289   NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
3290   NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
3291   NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
3292   NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
3293   NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
3294   NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
3295   NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
3296   NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
3297   NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
3298   NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
3299   NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
3300   NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
3301   NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
3302   NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
3303   NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
3304   NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
3305   NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
3306   NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
3307   NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
3308   NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
3309   NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
3310   NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
3311   NEONMAP0(vshl_n_v),
3312   NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
3313   NEONMAP0(vshll_n_v),
3314   NEONMAP0(vshlq_n_v),
3315   NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
3316   NEONMAP0(vshr_n_v),
3317   NEONMAP0(vshrn_n_v),
3318   NEONMAP0(vshrq_n_v),
3319   NEONMAP1(vst1_v, arm_neon_vst1, 0),
3320   NEONMAP1(vst1q_v, arm_neon_vst1, 0),
3321   NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
3322   NEONMAP1(vst2_v, arm_neon_vst2, 0),
3323   NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
3324   NEONMAP1(vst2q_v, arm_neon_vst2, 0),
3325   NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
3326   NEONMAP1(vst3_v, arm_neon_vst3, 0),
3327   NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
3328   NEONMAP1(vst3q_v, arm_neon_vst3, 0),
3329   NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
3330   NEONMAP1(vst4_v, arm_neon_vst4, 0),
3331   NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
3332   NEONMAP1(vst4q_v, arm_neon_vst4, 0),
3333   NEONMAP0(vsubhn_v),
3334   NEONMAP0(vtrn_v),
3335   NEONMAP0(vtrnq_v),
3336   NEONMAP0(vtst_v),
3337   NEONMAP0(vtstq_v),
3338   NEONMAP0(vuzp_v),
3339   NEONMAP0(vuzpq_v),
3340   NEONMAP0(vzip_v),
3341   NEONMAP0(vzipq_v)
3342 };
3343
3344 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
3345   NEONMAP1(vabs_v, aarch64_neon_abs, 0),
3346   NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
3347   NEONMAP0(vaddhn_v),
3348   NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
3349   NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
3350   NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
3351   NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
3352   NEONMAP1(vcage_v, aarch64_neon_facge, 0),
3353   NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
3354   NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
3355   NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
3356   NEONMAP1(vcale_v, aarch64_neon_facge, 0),
3357   NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
3358   NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
3359   NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
3360   NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
3361   NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
3362   NEONMAP1(vclz_v, ctlz, Add1ArgType),
3363   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
3364   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
3365   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
3366   NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
3367   NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
3368   NEONMAP0(vcvt_f32_v),
3369   NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3370   NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3371   NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
3372   NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
3373   NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
3374   NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
3375   NEONMAP0(vcvtq_f32_v),
3376   NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3377   NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3378   NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
3379   NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
3380   NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
3381   NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
3382   NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
3383   NEONMAP0(vext_v),
3384   NEONMAP0(vextq_v),
3385   NEONMAP0(vfma_v),
3386   NEONMAP0(vfmaq_v),
3387   NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
3388   NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
3389   NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
3390   NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
3391   NEONMAP0(vmovl_v),
3392   NEONMAP0(vmovn_v),
3393   NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
3394   NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
3395   NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
3396   NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
3397   NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
3398   NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
3399   NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
3400   NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
3401   NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
3402   NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
3403   NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
3404   NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
3405   NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
3406   NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
3407   NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
3408   NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
3409   NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
3410   NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
3411   NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
3412   NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
3413   NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
3414   NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
3415   NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
3416   NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
3417   NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
3418   NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
3419   NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
3420   NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
3421   NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
3422   NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
3423   NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
3424   NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
3425   NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
3426   NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
3427   NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
3428   NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
3429   NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
3430   NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
3431   NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
3432   NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
3433   NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
3434   NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
3435   NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
3436   NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
3437   NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
3438   NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
3439   NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
3440   NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
3441   NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
3442   NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
3443   NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
3444   NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
3445   NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
3446   NEONMAP0(vshl_n_v),
3447   NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3448   NEONMAP0(vshll_n_v),
3449   NEONMAP0(vshlq_n_v),
3450   NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3451   NEONMAP0(vshr_n_v),
3452   NEONMAP0(vshrn_n_v),
3453   NEONMAP0(vshrq_n_v),
3454   NEONMAP0(vsubhn_v),
3455   NEONMAP0(vtst_v),
3456   NEONMAP0(vtstq_v),
3457 };
3458
3459 static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
3460   NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
3461   NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
3462   NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
3463   NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3464   NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3465   NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3466   NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3467   NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3468   NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3469   NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3470   NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3471   NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
3472   NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3473   NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
3474   NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3475   NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3476   NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3477   NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3478   NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3479   NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3480   NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3481   NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3482   NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3483   NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3484   NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3485   NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3486   NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3487   NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3488   NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3489   NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3490   NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3491   NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3492   NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3493   NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3494   NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3495   NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3496   NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3497   NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3498   NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3499   NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3500   NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3501   NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3502   NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3503   NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3504   NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3505   NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3506   NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3507   NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3508   NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
3509   NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3510   NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3511   NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3512   NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3513   NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3514   NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3515   NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3516   NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3517   NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3518   NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3519   NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3520   NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3521   NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3522   NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3523   NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3524   NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3525   NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3526   NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3527   NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3528   NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3529   NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
3530   NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
3531   NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
3532   NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3533   NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3534   NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3535   NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3536   NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3537   NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3538   NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3539   NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3540   NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3541   NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3542   NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3543   NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
3544   NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3545   NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
3546   NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3547   NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3548   NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
3549   NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
3550   NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3551   NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3552   NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
3553   NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
3554   NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
3555   NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
3556   NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
3557   NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
3558   NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
3559   NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
3560   NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3561   NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3562   NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3563   NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3564   NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
3565   NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3566   NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3567   NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3568   NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
3569   NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3570   NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
3571   NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
3572   NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
3573   NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3574   NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3575   NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
3576   NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
3577   NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3578   NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3579   NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
3580   NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
3581   NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
3582   NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
3583   NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3584   NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3585   NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3586   NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3587   NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
3588   NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3589   NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3590   NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3591   NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3592   NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3593   NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3594   NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
3595   NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
3596   NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3597   NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3598   NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3599   NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3600   NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
3601   NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
3602   NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
3603   NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
3604   NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3605   NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3606   NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
3607   NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
3608   NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
3609   NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3610   NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3611   NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3612   NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3613   NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
3614   NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3615   NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3616   NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3617   NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3618   NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
3619   NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
3620   NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3621   NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3622   NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
3623   NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
3624   NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
3625   NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
3626   NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
3627   NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
3628   NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
3629   NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
3630   NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
3631   NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
3632   NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
3633   NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
3634   NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
3635   NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
3636   NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
3637   NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
3638   NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
3639   NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
3640   NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
3641   NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
3642   NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3643   NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
3644   NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3645   NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
3646   NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
3647   NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
3648   NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3649   NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
3650   NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3651   NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
3652 };
3653
3654 #undef NEONMAP0
3655 #undef NEONMAP1
3656 #undef NEONMAP2
3657
3658 static bool NEONSIMDIntrinsicsProvenSorted = false;
3659
3660 static bool AArch64SIMDIntrinsicsProvenSorted = false;
3661 static bool AArch64SISDIntrinsicsProvenSorted = false;
3662
3663
3664 static const NeonIntrinsicInfo *
3665 findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap,
3666                        unsigned BuiltinID, bool &MapProvenSorted) {
3667
3668 #ifndef NDEBUG
3669   if (!MapProvenSorted) {
3670     assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap)));
3671     MapProvenSorted = true;
3672   }
3673 #endif
3674
3675   const NeonIntrinsicInfo *Builtin =
3676       std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID);
3677
3678   if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
3679     return Builtin;
3680
3681   return nullptr;
3682 }
3683
3684 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
3685                                                    unsigned Modifier,
3686                                                    llvm::Type *ArgType,
3687                                                    const CallExpr *E) {
3688   int VectorSize = 0;
3689   if (Modifier & Use64BitVectors)
3690     VectorSize = 64;
3691   else if (Modifier & Use128BitVectors)
3692     VectorSize = 128;
3693
3694   // Return type.
3695   SmallVector<llvm::Type *, 3> Tys;
3696   if (Modifier & AddRetType) {
3697     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
3698     if (Modifier & VectorizeRetType)
3699       Ty = llvm::VectorType::get(
3700           Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
3701
3702     Tys.push_back(Ty);
3703   }
3704
3705   // Arguments.
3706   if (Modifier & VectorizeArgTypes) {
3707     int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
3708     ArgType = llvm::VectorType::get(ArgType, Elts);
3709   }
3710
3711   if (Modifier & (Add1ArgType | Add2ArgTypes))
3712     Tys.push_back(ArgType);
3713
3714   if (Modifier & Add2ArgTypes)
3715     Tys.push_back(ArgType);
3716
3717   if (Modifier & InventFloatType)
3718     Tys.push_back(FloatTy);
3719
3720   return CGM.getIntrinsic(IntrinsicID, Tys);
3721 }
3722
3723 static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF,
3724                                             const NeonIntrinsicInfo &SISDInfo,
3725                                             SmallVectorImpl<Value *> &Ops,
3726                                             const CallExpr *E) {
3727   unsigned BuiltinID = SISDInfo.BuiltinID;
3728   unsigned int Int = SISDInfo.LLVMIntrinsic;
3729   unsigned Modifier = SISDInfo.TypeModifier;
3730   const char *s = SISDInfo.NameHint;
3731
3732   switch (BuiltinID) {
3733   case NEON::BI__builtin_neon_vcled_s64:
3734   case NEON::BI__builtin_neon_vcled_u64:
3735   case NEON::BI__builtin_neon_vcles_f32:
3736   case NEON::BI__builtin_neon_vcled_f64:
3737   case NEON::BI__builtin_neon_vcltd_s64:
3738   case NEON::BI__builtin_neon_vcltd_u64:
3739   case NEON::BI__builtin_neon_vclts_f32:
3740   case NEON::BI__builtin_neon_vcltd_f64:
3741   case NEON::BI__builtin_neon_vcales_f32:
3742   case NEON::BI__builtin_neon_vcaled_f64:
3743   case NEON::BI__builtin_neon_vcalts_f32:
3744   case NEON::BI__builtin_neon_vcaltd_f64:
3745     // Only one direction of comparisons actually exist, cmle is actually a cmge
3746     // with swapped operands. The table gives us the right intrinsic but we
3747     // still need to do the swap.
3748     std::swap(Ops[0], Ops[1]);
3749     break;
3750   }
3751
3752   assert(Int && "Generic code assumes a valid intrinsic");
3753
3754   // Determine the type(s) of this overloaded AArch64 intrinsic.
3755   const Expr *Arg = E->getArg(0);
3756   llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
3757   Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
3758
3759   int j = 0;
3760   ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
3761   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
3762        ai != ae; ++ai, ++j) {
3763     llvm::Type *ArgTy = ai->getType();
3764     if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
3765              ArgTy->getPrimitiveSizeInBits())
3766       continue;
3767
3768     assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
3769     // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
3770     // it before inserting.
3771     Ops[j] =
3772         CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType());
3773     Ops[j] =
3774         CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
3775   }
3776
3777   Value *Result = CGF.EmitNeonCall(F, Ops, s);
3778   llvm::Type *ResultType = CGF.ConvertType(E->getType());
3779   if (ResultType->getPrimitiveSizeInBits() <
3780       Result->getType()->getPrimitiveSizeInBits())
3781     return CGF.Builder.CreateExtractElement(Result, C0);
3782
3783   return CGF.Builder.CreateBitCast(Result, ResultType, s);
3784 }
3785
3786 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
3787     unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
3788     const char *NameHint, unsigned Modifier, const CallExpr *E,
3789     SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1) {
3790   // Get the last argument, which specifies the vector type.
3791   llvm::APSInt NeonTypeConst;
3792   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
3793   if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
3794     return nullptr;
3795
3796   // Determine the type of this overloaded NEON intrinsic.
3797   NeonTypeFlags Type(NeonTypeConst.getZExtValue());
3798   bool Usgn = Type.isUnsigned();
3799   bool Quad = Type.isQuad();
3800
3801   llvm::VectorType *VTy = GetNeonType(this, Type);
3802   llvm::Type *Ty = VTy;
3803   if (!Ty)
3804     return nullptr;
3805
3806   auto getAlignmentValue32 = [&](Address addr) -> Value* {
3807     return Builder.getInt32(addr.getAlignment().getQuantity());
3808   };
3809
3810   unsigned Int = LLVMIntrinsic;
3811   if ((Modifier & UnsignedAlts) && !Usgn)
3812     Int = AltLLVMIntrinsic;
3813
3814   switch (BuiltinID) {
3815   default: break;
3816   case NEON::BI__builtin_neon_vabs_v:
3817   case NEON::BI__builtin_neon_vabsq_v:
3818     if (VTy->getElementType()->isFloatingPointTy())
3819       return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
3820     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
3821   case NEON::BI__builtin_neon_vaddhn_v: {
3822     llvm::VectorType *SrcTy =
3823         llvm::VectorType::getExtendedElementVectorType(VTy);
3824
3825     // %sum = add <4 x i32> %lhs, %rhs
3826     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3827     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
3828     Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
3829
3830     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
3831     Constant *ShiftAmt =
3832         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
3833     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
3834
3835     // %res = trunc <4 x i32> %high to <4 x i16>
3836     return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
3837   }
3838   case NEON::BI__builtin_neon_vcale_v:
3839   case NEON::BI__builtin_neon_vcaleq_v:
3840   case NEON::BI__builtin_neon_vcalt_v:
3841   case NEON::BI__builtin_neon_vcaltq_v:
3842     std::swap(Ops[0], Ops[1]);
3843     LLVM_FALLTHROUGH;
3844   case NEON::BI__builtin_neon_vcage_v:
3845   case NEON::BI__builtin_neon_vcageq_v:
3846   case NEON::BI__builtin_neon_vcagt_v:
3847   case NEON::BI__builtin_neon_vcagtq_v: {
3848     llvm::Type *VecFlt = llvm::VectorType::get(
3849         VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy,
3850         VTy->getNumElements());
3851     llvm::Type *Tys[] = { VTy, VecFlt };
3852     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3853     return EmitNeonCall(F, Ops, NameHint);
3854   }
3855   case NEON::BI__builtin_neon_vclz_v:
3856   case NEON::BI__builtin_neon_vclzq_v:
3857     // We generate target-independent intrinsic, which needs a second argument
3858     // for whether or not clz of zero is undefined; on ARM it isn't.
3859     Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
3860     break;
3861   case NEON::BI__builtin_neon_vcvt_f32_v:
3862   case NEON::BI__builtin_neon_vcvtq_f32_v:
3863     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3864     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad));
3865     return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
3866                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
3867   case NEON::BI__builtin_neon_vcvt_n_f32_v:
3868   case NEON::BI__builtin_neon_vcvt_n_f64_v:
3869   case NEON::BI__builtin_neon_vcvtq_n_f32_v:
3870   case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
3871     llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
3872     Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
3873     Function *F = CGM.getIntrinsic(Int, Tys);
3874     return EmitNeonCall(F, Ops, "vcvt_n");
3875   }
3876   case NEON::BI__builtin_neon_vcvt_n_s32_v:
3877   case NEON::BI__builtin_neon_vcvt_n_u32_v:
3878   case NEON::BI__builtin_neon_vcvt_n_s64_v:
3879   case NEON::BI__builtin_neon_vcvt_n_u64_v:
3880   case NEON::BI__builtin_neon_vcvtq_n_s32_v:
3881   case NEON::BI__builtin_neon_vcvtq_n_u32_v:
3882   case NEON::BI__builtin_neon_vcvtq_n_s64_v:
3883   case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
3884     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
3885     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3886     return EmitNeonCall(F, Ops, "vcvt_n");
3887   }
3888   case NEON::BI__builtin_neon_vcvt_s32_v:
3889   case NEON::BI__builtin_neon_vcvt_u32_v:
3890   case NEON::BI__builtin_neon_vcvt_s64_v:
3891   case NEON::BI__builtin_neon_vcvt_u64_v:
3892   case NEON::BI__builtin_neon_vcvtq_s32_v:
3893   case NEON::BI__builtin_neon_vcvtq_u32_v:
3894   case NEON::BI__builtin_neon_vcvtq_s64_v:
3895   case NEON::BI__builtin_neon_vcvtq_u64_v: {
3896     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
3897     return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
3898                 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
3899   }
3900   case NEON::BI__builtin_neon_vcvta_s32_v:
3901   case NEON::BI__builtin_neon_vcvta_s64_v:
3902   case NEON::BI__builtin_neon_vcvta_u32_v:
3903   case NEON::BI__builtin_neon_vcvta_u64_v:
3904   case NEON::BI__builtin_neon_vcvtaq_s32_v:
3905   case NEON::BI__builtin_neon_vcvtaq_s64_v:
3906   case NEON::BI__builtin_neon_vcvtaq_u32_v:
3907   case NEON::BI__builtin_neon_vcvtaq_u64_v:
3908   case NEON::BI__builtin_neon_vcvtn_s32_v:
3909   case NEON::BI__builtin_neon_vcvtn_s64_v:
3910   case NEON::BI__builtin_neon_vcvtn_u32_v:
3911   case NEON::BI__builtin_neon_vcvtn_u64_v:
3912   case NEON::BI__builtin_neon_vcvtnq_s32_v:
3913   case NEON::BI__builtin_neon_vcvtnq_s64_v:
3914   case NEON::BI__builtin_neon_vcvtnq_u32_v:
3915   case NEON::BI__builtin_neon_vcvtnq_u64_v:
3916   case NEON::BI__builtin_neon_vcvtp_s32_v:
3917   case NEON::BI__builtin_neon_vcvtp_s64_v:
3918   case NEON::BI__builtin_neon_vcvtp_u32_v:
3919   case NEON::BI__builtin_neon_vcvtp_u64_v:
3920   case NEON::BI__builtin_neon_vcvtpq_s32_v:
3921   case NEON::BI__builtin_neon_vcvtpq_s64_v:
3922   case NEON::BI__builtin_neon_vcvtpq_u32_v:
3923   case NEON::BI__builtin_neon_vcvtpq_u64_v:
3924   case NEON::BI__builtin_neon_vcvtm_s32_v:
3925   case NEON::BI__builtin_neon_vcvtm_s64_v:
3926   case NEON::BI__builtin_neon_vcvtm_u32_v:
3927   case NEON::BI__builtin_neon_vcvtm_u64_v:
3928   case NEON::BI__builtin_neon_vcvtmq_s32_v:
3929   case NEON::BI__builtin_neon_vcvtmq_s64_v:
3930   case NEON::BI__builtin_neon_vcvtmq_u32_v:
3931   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
3932     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
3933     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
3934   }
3935   case NEON::BI__builtin_neon_vext_v:
3936   case NEON::BI__builtin_neon_vextq_v: {
3937     int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
3938     SmallVector<uint32_t, 16> Indices;
3939     for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
3940       Indices.push_back(i+CV);
3941
3942     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3943     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3944     return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
3945   }
3946   case NEON::BI__builtin_neon_vfma_v:
3947   case NEON::BI__builtin_neon_vfmaq_v: {
3948     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
3949     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3950     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3951     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3952
3953     // NEON intrinsic puts accumulator first, unlike the LLVM fma.
3954     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
3955   }
3956   case NEON::BI__builtin_neon_vld1_v:
3957   case NEON::BI__builtin_neon_vld1q_v: {
3958     llvm::Type *Tys[] = {Ty, Int8PtrTy};
3959     Ops.push_back(getAlignmentValue32(PtrOp0));
3960     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
3961   }
3962   case NEON::BI__builtin_neon_vld2_v:
3963   case NEON::BI__builtin_neon_vld2q_v:
3964   case NEON::BI__builtin_neon_vld3_v:
3965   case NEON::BI__builtin_neon_vld3q_v:
3966   case NEON::BI__builtin_neon_vld4_v:
3967   case NEON::BI__builtin_neon_vld4q_v: {
3968     llvm::Type *Tys[] = {Ty, Int8PtrTy};
3969     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3970     Value *Align = getAlignmentValue32(PtrOp1);
3971     Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
3972     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3973     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3974     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
3975   }
3976   case NEON::BI__builtin_neon_vld1_dup_v:
3977   case NEON::BI__builtin_neon_vld1q_dup_v: {
3978     Value *V = UndefValue::get(Ty);
3979     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
3980     PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty);
3981     LoadInst *Ld = Builder.CreateLoad(PtrOp0);
3982     llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
3983     Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
3984     return EmitNeonSplat(Ops[0], CI);
3985   }
3986   case NEON::BI__builtin_neon_vld2_lane_v:
3987   case NEON::BI__builtin_neon_vld2q_lane_v:
3988   case NEON::BI__builtin_neon_vld3_lane_v:
3989   case NEON::BI__builtin_neon_vld3q_lane_v:
3990   case NEON::BI__builtin_neon_vld4_lane_v:
3991   case NEON::BI__builtin_neon_vld4q_lane_v: {
3992     llvm::Type *Tys[] = {Ty, Int8PtrTy};
3993     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3994     for (unsigned I = 2; I < Ops.size() - 1; ++I)
3995       Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
3996     Ops.push_back(getAlignmentValue32(PtrOp1));
3997     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
3998     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3999     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4000     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4001   }
4002   case NEON::BI__builtin_neon_vmovl_v: {
4003     llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
4004     Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
4005     if (Usgn)
4006       return Builder.CreateZExt(Ops[0], Ty, "vmovl");
4007     return Builder.CreateSExt(Ops[0], Ty, "vmovl");
4008   }
4009   case NEON::BI__builtin_neon_vmovn_v: {
4010     llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
4011     Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
4012     return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
4013   }
4014   case NEON::BI__builtin_neon_vmull_v:
4015     // FIXME: the integer vmull operations could be emitted in terms of pure
4016     // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
4017     // hoisting the exts outside loops. Until global ISel comes along that can
4018     // see through such movement this leads to bad CodeGen. So we need an
4019     // intrinsic for now.
4020     Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
4021     Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
4022     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
4023   case NEON::BI__builtin_neon_vpadal_v:
4024   case NEON::BI__builtin_neon_vpadalq_v: {
4025     // The source operand type has twice as many elements of half the size.
4026     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
4027     llvm::Type *EltTy =
4028       llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
4029     llvm::Type *NarrowTy =
4030       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
4031     llvm::Type *Tys[2] = { Ty, NarrowTy };
4032     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
4033   }
4034   case NEON::BI__builtin_neon_vpaddl_v:
4035   case NEON::BI__builtin_neon_vpaddlq_v: {
4036     // The source operand type has twice as many elements of half the size.
4037     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
4038     llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
4039     llvm::Type *NarrowTy =
4040       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
4041     llvm::Type *Tys[2] = { Ty, NarrowTy };
4042     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
4043   }
4044   case NEON::BI__builtin_neon_vqdmlal_v:
4045   case NEON::BI__builtin_neon_vqdmlsl_v: {
4046     SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
4047     Ops[1] =
4048         EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
4049     Ops.resize(2);
4050     return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
4051   }
4052   case NEON::BI__builtin_neon_vqshl_n_v:
4053   case NEON::BI__builtin_neon_vqshlq_n_v:
4054     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
4055                         1, false);
4056   case NEON::BI__builtin_neon_vqshlu_n_v:
4057   case NEON::BI__builtin_neon_vqshluq_n_v:
4058     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
4059                         1, false);
4060   case NEON::BI__builtin_neon_vrecpe_v:
4061   case NEON::BI__builtin_neon_vrecpeq_v:
4062   case NEON::BI__builtin_neon_vrsqrte_v:
4063   case NEON::BI__builtin_neon_vrsqrteq_v:
4064     Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
4065     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
4066
4067   case NEON::BI__builtin_neon_vrshr_n_v:
4068   case NEON::BI__builtin_neon_vrshrq_n_v:
4069     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
4070                         1, true);
4071   case NEON::BI__builtin_neon_vshl_n_v:
4072   case NEON::BI__builtin_neon_vshlq_n_v:
4073     Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
4074     return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
4075                              "vshl_n");
4076   case NEON::BI__builtin_neon_vshll_n_v: {
4077     llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
4078     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4079     if (Usgn)
4080       Ops[0] = Builder.CreateZExt(Ops[0], VTy);
4081     else
4082       Ops[0] = Builder.CreateSExt(Ops[0], VTy);
4083     Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
4084     return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
4085   }
4086   case NEON::BI__builtin_neon_vshrn_n_v: {
4087     llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
4088     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4089     Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
4090     if (Usgn)
4091       Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
4092     else
4093       Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
4094     return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
4095   }
4096   case NEON::BI__builtin_neon_vshr_n_v:
4097   case NEON::BI__builtin_neon_vshrq_n_v:
4098     return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
4099   case NEON::BI__builtin_neon_vst1_v:
4100   case NEON::BI__builtin_neon_vst1q_v:
4101   case NEON::BI__builtin_neon_vst2_v:
4102   case NEON::BI__builtin_neon_vst2q_v:
4103   case NEON::BI__builtin_neon_vst3_v:
4104   case NEON::BI__builtin_neon_vst3q_v:
4105   case NEON::BI__builtin_neon_vst4_v:
4106   case NEON::BI__builtin_neon_vst4q_v:
4107   case NEON::BI__builtin_neon_vst2_lane_v:
4108   case NEON::BI__builtin_neon_vst2q_lane_v:
4109   case NEON::BI__builtin_neon_vst3_lane_v:
4110   case NEON::BI__builtin_neon_vst3q_lane_v:
4111   case NEON::BI__builtin_neon_vst4_lane_v:
4112   case NEON::BI__builtin_neon_vst4q_lane_v: {
4113     llvm::Type *Tys[] = {Int8PtrTy, Ty};
4114     Ops.push_back(getAlignmentValue32(PtrOp0));
4115     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
4116   }
4117   case NEON::BI__builtin_neon_vsubhn_v: {
4118     llvm::VectorType *SrcTy =
4119         llvm::VectorType::getExtendedElementVectorType(VTy);
4120
4121     // %sum = add <4 x i32> %lhs, %rhs
4122     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4123     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
4124     Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
4125
4126     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
4127     Constant *ShiftAmt =
4128         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
4129     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
4130
4131     // %res = trunc <4 x i32> %high to <4 x i16>
4132     return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
4133   }
4134   case NEON::BI__builtin_neon_vtrn_v:
4135   case NEON::BI__builtin_neon_vtrnq_v: {
4136     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4137     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4138     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4139     Value *SV = nullptr;
4140
4141     for (unsigned vi = 0; vi != 2; ++vi) {
4142       SmallVector<uint32_t, 16> Indices;
4143       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
4144         Indices.push_back(i+vi);
4145         Indices.push_back(i+e+vi);
4146       }
4147       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4148       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
4149       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4150     }
4151     return SV;
4152   }
4153   case NEON::BI__builtin_neon_vtst_v:
4154   case NEON::BI__builtin_neon_vtstq_v: {
4155     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4156     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4157     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
4158     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
4159                                 ConstantAggregateZero::get(Ty));
4160     return Builder.CreateSExt(Ops[0], Ty, "vtst");
4161   }
4162   case NEON::BI__builtin_neon_vuzp_v:
4163   case NEON::BI__builtin_neon_vuzpq_v: {
4164     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4165     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4166     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4167     Value *SV = nullptr;
4168
4169     for (unsigned vi = 0; vi != 2; ++vi) {
4170       SmallVector<uint32_t, 16> Indices;
4171       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
4172         Indices.push_back(2*i+vi);
4173
4174       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4175       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
4176       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4177     }
4178     return SV;
4179   }
4180   case NEON::BI__builtin_neon_vzip_v:
4181   case NEON::BI__builtin_neon_vzipq_v: {
4182     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4183     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4184     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4185     Value *SV = nullptr;
4186
4187     for (unsigned vi = 0; vi != 2; ++vi) {
4188       SmallVector<uint32_t, 16> Indices;
4189       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
4190         Indices.push_back((i + vi*e) >> 1);
4191         Indices.push_back(((i + vi*e) >> 1)+e);
4192       }
4193       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4194       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
4195       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4196     }
4197     return SV;
4198   }
4199   }
4200
4201   assert(Int && "Expected valid intrinsic number");
4202
4203   // Determine the type(s) of this overloaded AArch64 intrinsic.
4204   Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
4205
4206   Value *Result = EmitNeonCall(F, Ops, NameHint);
4207   llvm::Type *ResultType = ConvertType(E->getType());
4208   // AArch64 intrinsic one-element vector type cast to
4209   // scalar type expected by the builtin
4210   return Builder.CreateBitCast(Result, ResultType, NameHint);
4211 }
4212
4213 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
4214     Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
4215     const CmpInst::Predicate Ip, const Twine &Name) {
4216   llvm::Type *OTy = Op->getType();
4217
4218   // FIXME: this is utterly horrific. We should not be looking at previous
4219   // codegen context to find out what needs doing. Unfortunately TableGen
4220   // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
4221   // (etc).
4222   if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
4223     OTy = BI->getOperand(0)->getType();
4224
4225   Op = Builder.CreateBitCast(Op, OTy);
4226   if (OTy->getScalarType()->isFloatingPointTy()) {
4227     Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
4228   } else {
4229     Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
4230   }
4231   return Builder.CreateSExt(Op, Ty, Name);
4232 }
4233
4234 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
4235                                  Value *ExtOp, Value *IndexOp,
4236                                  llvm::Type *ResTy, unsigned IntID,
4237                                  const char *Name) {
4238   SmallVector<Value *, 2> TblOps;
4239   if (ExtOp)
4240     TblOps.push_back(ExtOp);
4241
4242   // Build a vector containing sequential number like (0, 1, 2, ..., 15)
4243   SmallVector<uint32_t, 16> Indices;
4244   llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
4245   for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
4246     Indices.push_back(2*i);
4247     Indices.push_back(2*i+1);
4248   }
4249
4250   int PairPos = 0, End = Ops.size() - 1;
4251   while (PairPos < End) {
4252     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
4253                                                      Ops[PairPos+1], Indices,
4254                                                      Name));
4255     PairPos += 2;
4256   }
4257
4258   // If there's an odd number of 64-bit lookup table, fill the high 64-bit
4259   // of the 128-bit lookup table with zero.
4260   if (PairPos == End) {
4261     Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
4262     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
4263                                                      ZeroTbl, Indices, Name));
4264   }
4265
4266   Function *TblF;
4267   TblOps.push_back(IndexOp);
4268   TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
4269
4270   return CGF.EmitNeonCall(TblF, TblOps, Name);
4271 }
4272
4273 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
4274   unsigned Value;
4275   switch (BuiltinID) {
4276   default:
4277     return nullptr;
4278   case ARM::BI__builtin_arm_nop:
4279     Value = 0;
4280     break;
4281   case ARM::BI__builtin_arm_yield:
4282   case ARM::BI__yield:
4283     Value = 1;
4284     break;
4285   case ARM::BI__builtin_arm_wfe:
4286   case ARM::BI__wfe:
4287     Value = 2;
4288     break;
4289   case ARM::BI__builtin_arm_wfi:
4290   case ARM::BI__wfi:
4291     Value = 3;
4292     break;
4293   case ARM::BI__builtin_arm_sev:
4294   case ARM::BI__sev:
4295     Value = 4;
4296     break;
4297   case ARM::BI__builtin_arm_sevl:
4298   case ARM::BI__sevl:
4299     Value = 5;
4300     break;
4301   }
4302
4303   return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
4304                             llvm::ConstantInt::get(Int32Ty, Value));
4305 }
4306
4307 // Generates the IR for the read/write special register builtin,
4308 // ValueType is the type of the value that is to be written or read,
4309 // RegisterType is the type of the register being written to or read from.
4310 static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
4311                                          const CallExpr *E,
4312                                          llvm::Type *RegisterType,
4313                                          llvm::Type *ValueType,
4314                                          bool IsRead,
4315                                          StringRef SysReg = "") {
4316   // write and register intrinsics only support 32 and 64 bit operations.
4317   assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
4318           && "Unsupported size for register.");
4319
4320   CodeGen::CGBuilderTy &Builder = CGF.Builder;
4321   CodeGen::CodeGenModule &CGM = CGF.CGM;
4322   LLVMContext &Context = CGM.getLLVMContext();
4323
4324   if (SysReg.empty()) {
4325     const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
4326     SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
4327   }
4328
4329   llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
4330   llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4331   llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4332
4333   llvm::Type *Types[] = { RegisterType };
4334
4335   bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
4336   assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
4337             && "Can't fit 64-bit value in 32-bit register");
4338
4339   if (IsRead) {
4340     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
4341     llvm::Value *Call = Builder.CreateCall(F, Metadata);
4342
4343     if (MixedTypes)
4344       // Read into 64 bit register and then truncate result to 32 bit.
4345       return Builder.CreateTrunc(Call, ValueType);
4346
4347     if (ValueType->isPointerTy())
4348       // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
4349       return Builder.CreateIntToPtr(Call, ValueType);
4350
4351     return Call;
4352   }
4353
4354   llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
4355   llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
4356   if (MixedTypes) {
4357     // Extend 32 bit write value to 64 bit to pass to write.
4358     ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
4359     return Builder.CreateCall(F, { Metadata, ArgValue });
4360   }
4361
4362   if (ValueType->isPointerTy()) {
4363     // Have VoidPtrTy ArgValue but want to return an i32/i64.
4364     ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
4365     return Builder.CreateCall(F, { Metadata, ArgValue });
4366   }
4367
4368   return Builder.CreateCall(F, { Metadata, ArgValue });
4369 }
4370
4371 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
4372 /// argument that specifies the vector type.
4373 static bool HasExtraNeonArgument(unsigned BuiltinID) {
4374   switch (BuiltinID) {
4375   default: break;
4376   case NEON::BI__builtin_neon_vget_lane_i8:
4377   case NEON::BI__builtin_neon_vget_lane_i16:
4378   case NEON::BI__builtin_neon_vget_lane_i32:
4379   case NEON::BI__builtin_neon_vget_lane_i64:
4380   case NEON::BI__builtin_neon_vget_lane_f32:
4381   case NEON::BI__builtin_neon_vgetq_lane_i8:
4382   case NEON::BI__builtin_neon_vgetq_lane_i16:
4383   case NEON::BI__builtin_neon_vgetq_lane_i32:
4384   case NEON::BI__builtin_neon_vgetq_lane_i64:
4385   case NEON::BI__builtin_neon_vgetq_lane_f32:
4386   case NEON::BI__builtin_neon_vset_lane_i8:
4387   case NEON::BI__builtin_neon_vset_lane_i16:
4388   case NEON::BI__builtin_neon_vset_lane_i32:
4389   case NEON::BI__builtin_neon_vset_lane_i64:
4390   case NEON::BI__builtin_neon_vset_lane_f32:
4391   case NEON::BI__builtin_neon_vsetq_lane_i8:
4392   case NEON::BI__builtin_neon_vsetq_lane_i16:
4393   case NEON::BI__builtin_neon_vsetq_lane_i32:
4394   case NEON::BI__builtin_neon_vsetq_lane_i64:
4395   case NEON::BI__builtin_neon_vsetq_lane_f32:
4396   case NEON::BI__builtin_neon_vsha1h_u32:
4397   case NEON::BI__builtin_neon_vsha1cq_u32:
4398   case NEON::BI__builtin_neon_vsha1pq_u32:
4399   case NEON::BI__builtin_neon_vsha1mq_u32:
4400   case ARM::BI_MoveToCoprocessor:
4401   case ARM::BI_MoveToCoprocessor2:
4402     return false;
4403   }
4404   return true;
4405 }
4406
4407 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
4408                                            const CallExpr *E) {
4409   if (auto Hint = GetValueForARMHint(BuiltinID))
4410     return Hint;
4411
4412   if (BuiltinID == ARM::BI__emit) {
4413     bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
4414     llvm::FunctionType *FTy =
4415         llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
4416
4417     APSInt Value;
4418     if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext()))
4419       llvm_unreachable("Sema will ensure that the parameter is constant");
4420
4421     uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
4422
4423     llvm::InlineAsm *Emit =
4424         IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
4425                                  /*SideEffects=*/true)
4426                 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
4427                                  /*SideEffects=*/true);
4428
4429     return Builder.CreateCall(Emit);
4430   }
4431
4432   if (BuiltinID == ARM::BI__builtin_arm_dbg) {
4433     Value *Option = EmitScalarExpr(E->getArg(0));
4434     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
4435   }
4436
4437   if (BuiltinID == ARM::BI__builtin_arm_prefetch) {
4438     Value *Address = EmitScalarExpr(E->getArg(0));
4439     Value *RW      = EmitScalarExpr(E->getArg(1));
4440     Value *IsData  = EmitScalarExpr(E->getArg(2));
4441
4442     // Locality is not supported on ARM target
4443     Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
4444
4445     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
4446     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
4447   }
4448
4449   if (BuiltinID == ARM::BI__builtin_arm_rbit) {
4450     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4451     return Builder.CreateCall(
4452         CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
4453   }
4454
4455   if (BuiltinID == ARM::BI__clear_cache) {
4456     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
4457     const FunctionDecl *FD = E->getDirectCallee();
4458     Value *Ops[2];
4459     for (unsigned i = 0; i < 2; i++)
4460       Ops[i] = EmitScalarExpr(E->getArg(i));
4461     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
4462     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
4463     StringRef Name = FD->getName();
4464     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
4465   }
4466
4467   if (BuiltinID == ARM::BI__builtin_arm_mcrr ||
4468       BuiltinID == ARM::BI__builtin_arm_mcrr2) {
4469     Function *F;
4470
4471     switch (BuiltinID) {
4472     default: llvm_unreachable("unexpected builtin");
4473     case ARM::BI__builtin_arm_mcrr:
4474       F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
4475       break;
4476     case ARM::BI__builtin_arm_mcrr2:
4477       F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
4478       break;
4479     }
4480
4481     // MCRR{2} instruction has 5 operands but
4482     // the intrinsic has 4 because Rt and Rt2
4483     // are represented as a single unsigned 64
4484     // bit integer in the intrinsic definition
4485     // but internally it's represented as 2 32
4486     // bit integers.
4487
4488     Value *Coproc = EmitScalarExpr(E->getArg(0));
4489     Value *Opc1 = EmitScalarExpr(E->getArg(1));
4490     Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
4491     Value *CRm = EmitScalarExpr(E->getArg(3));
4492
4493     Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4494     Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
4495     Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
4496     Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
4497
4498     return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
4499   }
4500
4501   if (BuiltinID == ARM::BI__builtin_arm_mrrc ||
4502       BuiltinID == ARM::BI__builtin_arm_mrrc2) {
4503     Function *F;
4504
4505     switch (BuiltinID) {
4506     default: llvm_unreachable("unexpected builtin");
4507     case ARM::BI__builtin_arm_mrrc:
4508       F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
4509       break;
4510     case ARM::BI__builtin_arm_mrrc2:
4511       F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
4512       break;
4513     }
4514
4515     Value *Coproc = EmitScalarExpr(E->getArg(0));
4516     Value *Opc1 = EmitScalarExpr(E->getArg(1));
4517     Value *CRm  = EmitScalarExpr(E->getArg(2));
4518     Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
4519
4520     // Returns an unsigned 64 bit integer, represented
4521     // as two 32 bit integers.
4522
4523     Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
4524     Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
4525     Rt = Builder.CreateZExt(Rt, Int64Ty);
4526     Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
4527
4528     Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
4529     RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
4530     RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
4531
4532     return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
4533   }
4534
4535   if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
4536       ((BuiltinID == ARM::BI__builtin_arm_ldrex ||
4537         BuiltinID == ARM::BI__builtin_arm_ldaex) &&
4538        getContext().getTypeSize(E->getType()) == 64) ||
4539       BuiltinID == ARM::BI__ldrexd) {
4540     Function *F;
4541
4542     switch (BuiltinID) {
4543     default: llvm_unreachable("unexpected builtin");
4544     case ARM::BI__builtin_arm_ldaex:
4545       F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
4546       break;
4547     case ARM::BI__builtin_arm_ldrexd:
4548     case ARM::BI__builtin_arm_ldrex:
4549     case ARM::BI__ldrexd:
4550       F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
4551       break;
4552     }
4553
4554     Value *LdPtr = EmitScalarExpr(E->getArg(0));
4555     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
4556                                     "ldrexd");
4557
4558     Value *Val0 = Builder.CreateExtractValue(Val, 1);
4559     Value *Val1 = Builder.CreateExtractValue(Val, 0);
4560     Val0 = Builder.CreateZExt(Val0, Int64Ty);
4561     Val1 = Builder.CreateZExt(Val1, Int64Ty);
4562
4563     Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
4564     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
4565     Val = Builder.CreateOr(Val, Val1);
4566     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
4567   }
4568
4569   if (BuiltinID == ARM::BI__builtin_arm_ldrex ||
4570       BuiltinID == ARM::BI__builtin_arm_ldaex) {
4571     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
4572
4573     QualType Ty = E->getType();
4574     llvm::Type *RealResTy = ConvertType(Ty);
4575     llvm::Type *PtrTy = llvm::IntegerType::get(
4576         getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
4577     LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
4578
4579     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex
4580                                        ? Intrinsic::arm_ldaex
4581                                        : Intrinsic::arm_ldrex,
4582                                    PtrTy);
4583     Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
4584
4585     if (RealResTy->isPointerTy())
4586       return Builder.CreateIntToPtr(Val, RealResTy);
4587     else {
4588       llvm::Type *IntResTy = llvm::IntegerType::get(
4589           getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
4590       Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
4591       return Builder.CreateBitCast(Val, RealResTy);
4592     }
4593   }
4594
4595   if (BuiltinID == ARM::BI__builtin_arm_strexd ||
4596       ((BuiltinID == ARM::BI__builtin_arm_stlex ||
4597         BuiltinID == ARM::BI__builtin_arm_strex) &&
4598        getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
4599     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4600                                        ? Intrinsic::arm_stlexd
4601                                        : Intrinsic::arm_strexd);
4602     llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
4603
4604     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
4605     Value *Val = EmitScalarExpr(E->getArg(0));
4606     Builder.CreateStore(Val, Tmp);
4607
4608     Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
4609     Val = Builder.CreateLoad(LdPtr);
4610
4611     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
4612     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
4613     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
4614     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
4615   }
4616
4617   if (BuiltinID == ARM::BI__builtin_arm_strex ||
4618       BuiltinID == ARM::BI__builtin_arm_stlex) {
4619     Value *StoreVal = EmitScalarExpr(E->getArg(0));
4620     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
4621
4622     QualType Ty = E->getArg(0)->getType();
4623     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
4624                                                  getContext().getTypeSize(Ty));
4625     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
4626
4627     if (StoreVal->getType()->isPointerTy())
4628       StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
4629     else {
4630       llvm::Type *IntTy = llvm::IntegerType::get(
4631           getLLVMContext(),
4632           CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
4633       StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
4634       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
4635     }
4636
4637     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4638                                        ? Intrinsic::arm_stlex
4639                                        : Intrinsic::arm_strex,
4640                                    StoreAddr->getType());
4641     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
4642   }
4643
4644   switch (BuiltinID) {
4645   case ARM::BI__iso_volatile_load8:
4646   case ARM::BI__iso_volatile_load16:
4647   case ARM::BI__iso_volatile_load32:
4648   case ARM::BI__iso_volatile_load64: {
4649     Value *Ptr = EmitScalarExpr(E->getArg(0));
4650     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4651     CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy);
4652     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
4653                                              LoadSize.getQuantity() * 8);
4654     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
4655     llvm::LoadInst *Load =
4656       Builder.CreateAlignedLoad(Ptr, LoadSize);
4657     Load->setVolatile(true);
4658     return Load;
4659   }
4660   case ARM::BI__iso_volatile_store8:
4661   case ARM::BI__iso_volatile_store16:
4662   case ARM::BI__iso_volatile_store32:
4663   case ARM::BI__iso_volatile_store64: {
4664     Value *Ptr = EmitScalarExpr(E->getArg(0));
4665     Value *Value = EmitScalarExpr(E->getArg(1));
4666     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4667     CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
4668     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
4669                                              StoreSize.getQuantity() * 8);
4670     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
4671     llvm::StoreInst *Store =
4672       Builder.CreateAlignedStore(Value, Ptr,
4673                                  StoreSize);
4674     Store->setVolatile(true);
4675     return Store;
4676   }
4677   }
4678
4679   if (BuiltinID == ARM::BI__builtin_arm_clrex) {
4680     Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
4681     return Builder.CreateCall(F);
4682   }
4683
4684   // CRC32
4685   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
4686   switch (BuiltinID) {
4687   case ARM::BI__builtin_arm_crc32b:
4688     CRCIntrinsicID = Intrinsic::arm_crc32b; break;
4689   case ARM::BI__builtin_arm_crc32cb:
4690     CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
4691   case ARM::BI__builtin_arm_crc32h:
4692     CRCIntrinsicID = Intrinsic::arm_crc32h; break;
4693   case ARM::BI__builtin_arm_crc32ch:
4694     CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
4695   case ARM::BI__builtin_arm_crc32w:
4696   case ARM::BI__builtin_arm_crc32d:
4697     CRCIntrinsicID = Intrinsic::arm_crc32w; break;
4698   case ARM::BI__builtin_arm_crc32cw:
4699   case ARM::BI__builtin_arm_crc32cd:
4700     CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
4701   }
4702
4703   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
4704     Value *Arg0 = EmitScalarExpr(E->getArg(0));
4705     Value *Arg1 = EmitScalarExpr(E->getArg(1));
4706
4707     // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w
4708     // intrinsics, hence we need different codegen for these cases.
4709     if (BuiltinID == ARM::BI__builtin_arm_crc32d ||
4710         BuiltinID == ARM::BI__builtin_arm_crc32cd) {
4711       Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4712       Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
4713       Value *Arg1b = Builder.CreateLShr(Arg1, C1);
4714       Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
4715
4716       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4717       Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
4718       return Builder.CreateCall(F, {Res, Arg1b});
4719     } else {
4720       Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
4721
4722       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4723       return Builder.CreateCall(F, {Arg0, Arg1});
4724     }
4725   }
4726
4727   if (BuiltinID == ARM::BI__builtin_arm_rsr ||
4728       BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4729       BuiltinID == ARM::BI__builtin_arm_rsrp ||
4730       BuiltinID == ARM::BI__builtin_arm_wsr ||
4731       BuiltinID == ARM::BI__builtin_arm_wsr64 ||
4732       BuiltinID == ARM::BI__builtin_arm_wsrp) {
4733
4734     bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr ||
4735                   BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4736                   BuiltinID == ARM::BI__builtin_arm_rsrp;
4737
4738     bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
4739                             BuiltinID == ARM::BI__builtin_arm_wsrp;
4740
4741     bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4742                    BuiltinID == ARM::BI__builtin_arm_wsr64;
4743
4744     llvm::Type *ValueType;
4745     llvm::Type *RegisterType;
4746     if (IsPointerBuiltin) {
4747       ValueType = VoidPtrTy;
4748       RegisterType = Int32Ty;
4749     } else if (Is64Bit) {
4750       ValueType = RegisterType = Int64Ty;
4751     } else {
4752       ValueType = RegisterType = Int32Ty;
4753     }
4754
4755     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
4756   }
4757
4758   // Find out if any arguments are required to be integer constant
4759   // expressions.
4760   unsigned ICEArguments = 0;
4761   ASTContext::GetBuiltinTypeError Error;
4762   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
4763   assert(Error == ASTContext::GE_None && "Should not codegen an error");
4764
4765   auto getAlignmentValue32 = [&](Address addr) -> Value* {
4766     return Builder.getInt32(addr.getAlignment().getQuantity());
4767   };
4768
4769   Address PtrOp0 = Address::invalid();
4770   Address PtrOp1 = Address::invalid();
4771   SmallVector<Value*, 4> Ops;
4772   bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
4773   unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
4774   for (unsigned i = 0, e = NumArgs; i != e; i++) {
4775     if (i == 0) {
4776       switch (BuiltinID) {
4777       case NEON::BI__builtin_neon_vld1_v:
4778       case NEON::BI__builtin_neon_vld1q_v:
4779       case NEON::BI__builtin_neon_vld1q_lane_v:
4780       case NEON::BI__builtin_neon_vld1_lane_v:
4781       case NEON::BI__builtin_neon_vld1_dup_v:
4782       case NEON::BI__builtin_neon_vld1q_dup_v:
4783       case NEON::BI__builtin_neon_vst1_v:
4784       case NEON::BI__builtin_neon_vst1q_v:
4785       case NEON::BI__builtin_neon_vst1q_lane_v:
4786       case NEON::BI__builtin_neon_vst1_lane_v:
4787       case NEON::BI__builtin_neon_vst2_v:
4788       case NEON::BI__builtin_neon_vst2q_v:
4789       case NEON::BI__builtin_neon_vst2_lane_v:
4790       case NEON::BI__builtin_neon_vst2q_lane_v:
4791       case NEON::BI__builtin_neon_vst3_v:
4792       case NEON::BI__builtin_neon_vst3q_v:
4793       case NEON::BI__builtin_neon_vst3_lane_v:
4794       case NEON::BI__builtin_neon_vst3q_lane_v:
4795       case NEON::BI__builtin_neon_vst4_v:
4796       case NEON::BI__builtin_neon_vst4q_v:
4797       case NEON::BI__builtin_neon_vst4_lane_v:
4798       case NEON::BI__builtin_neon_vst4q_lane_v:
4799         // Get the alignment for the argument in addition to the value;
4800         // we'll use it later.
4801         PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
4802         Ops.push_back(PtrOp0.getPointer());
4803         continue;
4804       }
4805     }
4806     if (i == 1) {
4807       switch (BuiltinID) {
4808       case NEON::BI__builtin_neon_vld2_v:
4809       case NEON::BI__builtin_neon_vld2q_v:
4810       case NEON::BI__builtin_neon_vld3_v:
4811       case NEON::BI__builtin_neon_vld3q_v:
4812       case NEON::BI__builtin_neon_vld4_v:
4813       case NEON::BI__builtin_neon_vld4q_v:
4814       case NEON::BI__builtin_neon_vld2_lane_v:
4815       case NEON::BI__builtin_neon_vld2q_lane_v:
4816       case NEON::BI__builtin_neon_vld3_lane_v:
4817       case NEON::BI__builtin_neon_vld3q_lane_v:
4818       case NEON::BI__builtin_neon_vld4_lane_v:
4819       case NEON::BI__builtin_neon_vld4q_lane_v:
4820       case NEON::BI__builtin_neon_vld2_dup_v:
4821       case NEON::BI__builtin_neon_vld3_dup_v:
4822       case NEON::BI__builtin_neon_vld4_dup_v:
4823         // Get the alignment for the argument in addition to the value;
4824         // we'll use it later.
4825         PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
4826         Ops.push_back(PtrOp1.getPointer());
4827         continue;
4828       }
4829     }
4830
4831     if ((ICEArguments & (1 << i)) == 0) {
4832       Ops.push_back(EmitScalarExpr(E->getArg(i)));
4833     } else {
4834       // If this is required to be a constant, constant fold it so that we know
4835       // that the generated intrinsic gets a ConstantInt.
4836       llvm::APSInt Result;
4837       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
4838       assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
4839       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
4840     }
4841   }
4842
4843   switch (BuiltinID) {
4844   default: break;
4845
4846   case NEON::BI__builtin_neon_vget_lane_i8:
4847   case NEON::BI__builtin_neon_vget_lane_i16:
4848   case NEON::BI__builtin_neon_vget_lane_i32:
4849   case NEON::BI__builtin_neon_vget_lane_i64:
4850   case NEON::BI__builtin_neon_vget_lane_f32:
4851   case NEON::BI__builtin_neon_vgetq_lane_i8:
4852   case NEON::BI__builtin_neon_vgetq_lane_i16:
4853   case NEON::BI__builtin_neon_vgetq_lane_i32:
4854   case NEON::BI__builtin_neon_vgetq_lane_i64:
4855   case NEON::BI__builtin_neon_vgetq_lane_f32:
4856     return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
4857
4858   case NEON::BI__builtin_neon_vset_lane_i8:
4859   case NEON::BI__builtin_neon_vset_lane_i16:
4860   case NEON::BI__builtin_neon_vset_lane_i32:
4861   case NEON::BI__builtin_neon_vset_lane_i64:
4862   case NEON::BI__builtin_neon_vset_lane_f32:
4863   case NEON::BI__builtin_neon_vsetq_lane_i8:
4864   case NEON::BI__builtin_neon_vsetq_lane_i16:
4865   case NEON::BI__builtin_neon_vsetq_lane_i32:
4866   case NEON::BI__builtin_neon_vsetq_lane_i64:
4867   case NEON::BI__builtin_neon_vsetq_lane_f32:
4868     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
4869
4870   case NEON::BI__builtin_neon_vsha1h_u32:
4871     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
4872                         "vsha1h");
4873   case NEON::BI__builtin_neon_vsha1cq_u32:
4874     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
4875                         "vsha1h");
4876   case NEON::BI__builtin_neon_vsha1pq_u32:
4877     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
4878                         "vsha1h");
4879   case NEON::BI__builtin_neon_vsha1mq_u32:
4880     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
4881                         "vsha1h");
4882
4883   // The ARM _MoveToCoprocessor builtins put the input register value as
4884   // the first argument, but the LLVM intrinsic expects it as the third one.
4885   case ARM::BI_MoveToCoprocessor:
4886   case ARM::BI_MoveToCoprocessor2: {
4887     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
4888                                    Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
4889     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
4890                                   Ops[3], Ops[4], Ops[5]});
4891   }
4892   case ARM::BI_BitScanForward:
4893   case ARM::BI_BitScanForward64:
4894     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
4895   case ARM::BI_BitScanReverse:
4896   case ARM::BI_BitScanReverse64:
4897     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
4898
4899   case ARM::BI_InterlockedAnd64:
4900     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
4901   case ARM::BI_InterlockedExchange64:
4902     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
4903   case ARM::BI_InterlockedExchangeAdd64:
4904     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
4905   case ARM::BI_InterlockedExchangeSub64:
4906     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
4907   case ARM::BI_InterlockedOr64:
4908     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
4909   case ARM::BI_InterlockedXor64:
4910     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
4911   case ARM::BI_InterlockedDecrement64:
4912     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
4913   case ARM::BI_InterlockedIncrement64:
4914     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
4915   }
4916
4917   // Get the last argument, which specifies the vector type.
4918   assert(HasExtraArg);
4919   llvm::APSInt Result;
4920   const Expr *Arg = E->getArg(E->getNumArgs()-1);
4921   if (!Arg->isIntegerConstantExpr(Result, getContext()))
4922     return nullptr;
4923
4924   if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
4925       BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
4926     // Determine the overloaded type of this builtin.
4927     llvm::Type *Ty;
4928     if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
4929       Ty = FloatTy;
4930     else
4931       Ty = DoubleTy;
4932
4933     // Determine whether this is an unsigned conversion or not.
4934     bool usgn = Result.getZExtValue() == 1;
4935     unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
4936
4937     // Call the appropriate intrinsic.
4938     Function *F = CGM.getIntrinsic(Int, Ty);
4939     return Builder.CreateCall(F, Ops, "vcvtr");
4940   }
4941
4942   // Determine the type of this overloaded NEON intrinsic.
4943   NeonTypeFlags Type(Result.getZExtValue());
4944   bool usgn = Type.isUnsigned();
4945   bool rightShift = false;
4946
4947   llvm::VectorType *VTy = GetNeonType(this, Type);
4948   llvm::Type *Ty = VTy;
4949   if (!Ty)
4950     return nullptr;
4951
4952   // Many NEON builtins have identical semantics and uses in ARM and
4953   // AArch64. Emit these in a single function.
4954   auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap);
4955   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
4956       IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
4957   if (Builtin)
4958     return EmitCommonNeonBuiltinExpr(
4959         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
4960         Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1);
4961
4962   unsigned Int;
4963   switch (BuiltinID) {
4964   default: return nullptr;
4965   case NEON::BI__builtin_neon_vld1q_lane_v:
4966     // Handle 64-bit integer elements as a special case.  Use shuffles of
4967     // one-element vectors to avoid poor code for i64 in the backend.
4968     if (VTy->getElementType()->isIntegerTy(64)) {
4969       // Extract the other lane.
4970       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4971       uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
4972       Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
4973       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
4974       // Load the value as a one-element vector.
4975       Ty = llvm::VectorType::get(VTy->getElementType(), 1);
4976       llvm::Type *Tys[] = {Ty, Int8PtrTy};
4977       Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
4978       Value *Align = getAlignmentValue32(PtrOp0);
4979       Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
4980       // Combine them.
4981       uint32_t Indices[] = {1 - Lane, Lane};
4982       SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
4983       return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
4984     }
4985     // fall through
4986   case NEON::BI__builtin_neon_vld1_lane_v: {
4987     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4988     PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
4989     Value *Ld = Builder.CreateLoad(PtrOp0);
4990     return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
4991   }
4992   case NEON::BI__builtin_neon_vld2_dup_v:
4993   case NEON::BI__builtin_neon_vld3_dup_v:
4994   case NEON::BI__builtin_neon_vld4_dup_v: {
4995     // Handle 64-bit elements as a special-case.  There is no "dup" needed.
4996     if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
4997       switch (BuiltinID) {
4998       case NEON::BI__builtin_neon_vld2_dup_v:
4999         Int = Intrinsic::arm_neon_vld2;
5000         break;
5001       case NEON::BI__builtin_neon_vld3_dup_v:
5002         Int = Intrinsic::arm_neon_vld3;
5003         break;
5004       case NEON::BI__builtin_neon_vld4_dup_v:
5005         Int = Intrinsic::arm_neon_vld4;
5006         break;
5007       default: llvm_unreachable("unknown vld_dup intrinsic?");
5008       }
5009       llvm::Type *Tys[] = {Ty, Int8PtrTy};
5010       Function *F = CGM.getIntrinsic(Int, Tys);
5011       llvm::Value *Align = getAlignmentValue32(PtrOp1);
5012       Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup");
5013       Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5014       Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5015       return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5016     }
5017     switch (BuiltinID) {
5018     case NEON::BI__builtin_neon_vld2_dup_v:
5019       Int = Intrinsic::arm_neon_vld2lane;
5020       break;
5021     case NEON::BI__builtin_neon_vld3_dup_v:
5022       Int = Intrinsic::arm_neon_vld3lane;
5023       break;
5024     case NEON::BI__builtin_neon_vld4_dup_v:
5025       Int = Intrinsic::arm_neon_vld4lane;
5026       break;
5027     default: llvm_unreachable("unknown vld_dup intrinsic?");
5028     }
5029     llvm::Type *Tys[] = {Ty, Int8PtrTy};
5030     Function *F = CGM.getIntrinsic(Int, Tys);
5031     llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
5032
5033     SmallVector<Value*, 6> Args;
5034     Args.push_back(Ops[1]);
5035     Args.append(STy->getNumElements(), UndefValue::get(Ty));
5036
5037     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
5038     Args.push_back(CI);
5039     Args.push_back(getAlignmentValue32(PtrOp1));
5040
5041     Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
5042     // splat lane 0 to all elts in each vector of the result.
5043     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
5044       Value *Val = Builder.CreateExtractValue(Ops[1], i);
5045       Value *Elt = Builder.CreateBitCast(Val, Ty);
5046       Elt = EmitNeonSplat(Elt, CI);
5047       Elt = Builder.CreateBitCast(Elt, Val->getType());
5048       Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
5049     }
5050     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5051     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5052     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5053   }
5054   case NEON::BI__builtin_neon_vqrshrn_n_v:
5055     Int =
5056       usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
5057     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
5058                         1, true);
5059   case NEON::BI__builtin_neon_vqrshrun_n_v:
5060     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
5061                         Ops, "vqrshrun_n", 1, true);
5062   case NEON::BI__builtin_neon_vqshrn_n_v:
5063     Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
5064     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
5065                         1, true);
5066   case NEON::BI__builtin_neon_vqshrun_n_v:
5067     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
5068                         Ops, "vqshrun_n", 1, true);
5069   case NEON::BI__builtin_neon_vrecpe_v:
5070   case NEON::BI__builtin_neon_vrecpeq_v:
5071     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
5072                         Ops, "vrecpe");
5073   case NEON::BI__builtin_neon_vrshrn_n_v:
5074     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
5075                         Ops, "vrshrn_n", 1, true);
5076   case NEON::BI__builtin_neon_vrsra_n_v:
5077   case NEON::BI__builtin_neon_vrsraq_n_v:
5078     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5079     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5080     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
5081     Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
5082     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
5083     return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
5084   case NEON::BI__builtin_neon_vsri_n_v:
5085   case NEON::BI__builtin_neon_vsriq_n_v:
5086     rightShift = true;
5087     LLVM_FALLTHROUGH;
5088   case NEON::BI__builtin_neon_vsli_n_v:
5089   case NEON::BI__builtin_neon_vsliq_n_v:
5090     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
5091     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
5092                         Ops, "vsli_n");
5093   case NEON::BI__builtin_neon_vsra_n_v:
5094   case NEON::BI__builtin_neon_vsraq_n_v:
5095     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5096     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
5097     return Builder.CreateAdd(Ops[0], Ops[1]);
5098   case NEON::BI__builtin_neon_vst1q_lane_v:
5099     // Handle 64-bit integer elements as a special case.  Use a shuffle to get
5100     // a one-element vector and avoid poor code for i64 in the backend.
5101     if (VTy->getElementType()->isIntegerTy(64)) {
5102       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5103       Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
5104       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
5105       Ops[2] = getAlignmentValue32(PtrOp0);
5106       llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
5107       return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
5108                                                  Tys), Ops);
5109     }
5110     // fall through
5111   case NEON::BI__builtin_neon_vst1_lane_v: {
5112     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5113     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
5114     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5115     auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty));
5116     return St;
5117   }
5118   case NEON::BI__builtin_neon_vtbl1_v:
5119     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
5120                         Ops, "vtbl1");
5121   case NEON::BI__builtin_neon_vtbl2_v:
5122     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
5123                         Ops, "vtbl2");
5124   case NEON::BI__builtin_neon_vtbl3_v:
5125     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
5126                         Ops, "vtbl3");
5127   case NEON::BI__builtin_neon_vtbl4_v:
5128     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
5129                         Ops, "vtbl4");
5130   case NEON::BI__builtin_neon_vtbx1_v:
5131     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
5132                         Ops, "vtbx1");
5133   case NEON::BI__builtin_neon_vtbx2_v:
5134     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
5135                         Ops, "vtbx2");
5136   case NEON::BI__builtin_neon_vtbx3_v:
5137     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
5138                         Ops, "vtbx3");
5139   case NEON::BI__builtin_neon_vtbx4_v:
5140     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
5141                         Ops, "vtbx4");
5142   }
5143 }
5144
5145 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
5146                                       const CallExpr *E,
5147                                       SmallVectorImpl<Value *> &Ops) {
5148   unsigned int Int = 0;
5149   const char *s = nullptr;
5150
5151   switch (BuiltinID) {
5152   default:
5153     return nullptr;
5154   case NEON::BI__builtin_neon_vtbl1_v:
5155   case NEON::BI__builtin_neon_vqtbl1_v:
5156   case NEON::BI__builtin_neon_vqtbl1q_v:
5157   case NEON::BI__builtin_neon_vtbl2_v:
5158   case NEON::BI__builtin_neon_vqtbl2_v:
5159   case NEON::BI__builtin_neon_vqtbl2q_v:
5160   case NEON::BI__builtin_neon_vtbl3_v:
5161   case NEON::BI__builtin_neon_vqtbl3_v:
5162   case NEON::BI__builtin_neon_vqtbl3q_v:
5163   case NEON::BI__builtin_neon_vtbl4_v:
5164   case NEON::BI__builtin_neon_vqtbl4_v:
5165   case NEON::BI__builtin_neon_vqtbl4q_v:
5166     break;
5167   case NEON::BI__builtin_neon_vtbx1_v:
5168   case NEON::BI__builtin_neon_vqtbx1_v:
5169   case NEON::BI__builtin_neon_vqtbx1q_v:
5170   case NEON::BI__builtin_neon_vtbx2_v:
5171   case NEON::BI__builtin_neon_vqtbx2_v:
5172   case NEON::BI__builtin_neon_vqtbx2q_v:
5173   case NEON::BI__builtin_neon_vtbx3_v:
5174   case NEON::BI__builtin_neon_vqtbx3_v:
5175   case NEON::BI__builtin_neon_vqtbx3q_v:
5176   case NEON::BI__builtin_neon_vtbx4_v:
5177   case NEON::BI__builtin_neon_vqtbx4_v:
5178   case NEON::BI__builtin_neon_vqtbx4q_v:
5179     break;
5180   }
5181
5182   assert(E->getNumArgs() >= 3);
5183
5184   // Get the last argument, which specifies the vector type.
5185   llvm::APSInt Result;
5186   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
5187   if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
5188     return nullptr;
5189
5190   // Determine the type of this overloaded NEON intrinsic.
5191   NeonTypeFlags Type(Result.getZExtValue());
5192   llvm::VectorType *Ty = GetNeonType(&CGF, Type);
5193   if (!Ty)
5194     return nullptr;
5195
5196   CodeGen::CGBuilderTy &Builder = CGF.Builder;
5197
5198   // AArch64 scalar builtins are not overloaded, they do not have an extra
5199   // argument that specifies the vector type, need to handle each case.
5200   switch (BuiltinID) {
5201   case NEON::BI__builtin_neon_vtbl1_v: {
5202     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr,
5203                               Ops[1], Ty, Intrinsic::aarch64_neon_tbl1,
5204                               "vtbl1");
5205   }
5206   case NEON::BI__builtin_neon_vtbl2_v: {
5207     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr,
5208                               Ops[2], Ty, Intrinsic::aarch64_neon_tbl1,
5209                               "vtbl1");
5210   }
5211   case NEON::BI__builtin_neon_vtbl3_v: {
5212     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr,
5213                               Ops[3], Ty, Intrinsic::aarch64_neon_tbl2,
5214                               "vtbl2");
5215   }
5216   case NEON::BI__builtin_neon_vtbl4_v: {
5217     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr,
5218                               Ops[4], Ty, Intrinsic::aarch64_neon_tbl2,
5219                               "vtbl2");
5220   }
5221   case NEON::BI__builtin_neon_vtbx1_v: {
5222     Value *TblRes =
5223         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2],
5224                            Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
5225
5226     llvm::Constant *EightV = ConstantInt::get(Ty, 8);
5227     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
5228     CmpRes = Builder.CreateSExt(CmpRes, Ty);
5229
5230     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
5231     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
5232     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
5233   }
5234   case NEON::BI__builtin_neon_vtbx2_v: {
5235     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0],
5236                               Ops[3], Ty, Intrinsic::aarch64_neon_tbx1,
5237                               "vtbx1");
5238   }
5239   case NEON::BI__builtin_neon_vtbx3_v: {
5240     Value *TblRes =
5241         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4],
5242                            Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
5243
5244     llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
5245     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
5246                                            TwentyFourV);
5247     CmpRes = Builder.CreateSExt(CmpRes, Ty);
5248
5249     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
5250     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
5251     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
5252   }
5253   case NEON::BI__builtin_neon_vtbx4_v: {
5254     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0],
5255                               Ops[5], Ty, Intrinsic::aarch64_neon_tbx2,
5256                               "vtbx2");
5257   }
5258   case NEON::BI__builtin_neon_vqtbl1_v:
5259   case NEON::BI__builtin_neon_vqtbl1q_v:
5260     Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
5261   case NEON::BI__builtin_neon_vqtbl2_v:
5262   case NEON::BI__builtin_neon_vqtbl2q_v: {
5263     Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
5264   case NEON::BI__builtin_neon_vqtbl3_v:
5265   case NEON::BI__builtin_neon_vqtbl3q_v:
5266     Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
5267   case NEON::BI__builtin_neon_vqtbl4_v:
5268   case NEON::BI__builtin_neon_vqtbl4q_v:
5269     Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
5270   case NEON::BI__builtin_neon_vqtbx1_v:
5271   case NEON::BI__builtin_neon_vqtbx1q_v:
5272     Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
5273   case NEON::BI__builtin_neon_vqtbx2_v:
5274   case NEON::BI__builtin_neon_vqtbx2q_v:
5275     Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
5276   case NEON::BI__builtin_neon_vqtbx3_v:
5277   case NEON::BI__builtin_neon_vqtbx3q_v:
5278     Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
5279   case NEON::BI__builtin_neon_vqtbx4_v:
5280   case NEON::BI__builtin_neon_vqtbx4q_v:
5281     Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
5282   }
5283   }
5284
5285   if (!Int)
5286     return nullptr;
5287
5288   Function *F = CGF.CGM.getIntrinsic(Int, Ty);
5289   return CGF.EmitNeonCall(F, Ops, s);
5290 }
5291
5292 Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
5293   llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4);
5294   Op = Builder.CreateBitCast(Op, Int16Ty);
5295   Value *V = UndefValue::get(VTy);
5296   llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
5297   Op = Builder.CreateInsertElement(V, Op, CI);
5298   return Op;
5299 }
5300
5301 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
5302                                                const CallExpr *E) {
5303   unsigned HintID = static_cast<unsigned>(-1);
5304   switch (BuiltinID) {
5305   default: break;
5306   case AArch64::BI__builtin_arm_nop:
5307     HintID = 0;
5308     break;
5309   case AArch64::BI__builtin_arm_yield:
5310     HintID = 1;
5311     break;
5312   case AArch64::BI__builtin_arm_wfe:
5313     HintID = 2;
5314     break;
5315   case AArch64::BI__builtin_arm_wfi:
5316     HintID = 3;
5317     break;
5318   case AArch64::BI__builtin_arm_sev:
5319     HintID = 4;
5320     break;
5321   case AArch64::BI__builtin_arm_sevl:
5322     HintID = 5;
5323     break;
5324   }
5325
5326   if (HintID != static_cast<unsigned>(-1)) {
5327     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
5328     return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
5329   }
5330
5331   if (BuiltinID == AArch64::BI__builtin_arm_prefetch) {
5332     Value *Address         = EmitScalarExpr(E->getArg(0));
5333     Value *RW              = EmitScalarExpr(E->getArg(1));
5334     Value *CacheLevel      = EmitScalarExpr(E->getArg(2));
5335     Value *RetentionPolicy = EmitScalarExpr(E->getArg(3));
5336     Value *IsData          = EmitScalarExpr(E->getArg(4));
5337
5338     Value *Locality = nullptr;
5339     if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) {
5340       // Temporal fetch, needs to convert cache level to locality.
5341       Locality = llvm::ConstantInt::get(Int32Ty,
5342         -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3);
5343     } else {
5344       // Streaming fetch.
5345       Locality = llvm::ConstantInt::get(Int32Ty, 0);
5346     }
5347
5348     // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify
5349     // PLDL3STRM or PLDL2STRM.
5350     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
5351     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
5352   }
5353
5354   if (BuiltinID == AArch64::BI__builtin_arm_rbit) {
5355     assert((getContext().getTypeSize(E->getType()) == 32) &&
5356            "rbit of unusual size!");
5357     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5358     return Builder.CreateCall(
5359         CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5360   }
5361   if (BuiltinID == AArch64::BI__builtin_arm_rbit64) {
5362     assert((getContext().getTypeSize(E->getType()) == 64) &&
5363            "rbit of unusual size!");
5364     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5365     return Builder.CreateCall(
5366         CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5367   }
5368
5369   if (BuiltinID == AArch64::BI__clear_cache) {
5370     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
5371     const FunctionDecl *FD = E->getDirectCallee();
5372     Value *Ops[2];
5373     for (unsigned i = 0; i < 2; i++)
5374       Ops[i] = EmitScalarExpr(E->getArg(i));
5375     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
5376     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
5377     StringRef Name = FD->getName();
5378     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
5379   }
5380
5381   if ((BuiltinID == AArch64::BI__builtin_arm_ldrex ||
5382       BuiltinID == AArch64::BI__builtin_arm_ldaex) &&
5383       getContext().getTypeSize(E->getType()) == 128) {
5384     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
5385                                        ? Intrinsic::aarch64_ldaxp
5386                                        : Intrinsic::aarch64_ldxp);
5387
5388     Value *LdPtr = EmitScalarExpr(E->getArg(0));
5389     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
5390                                     "ldxp");
5391
5392     Value *Val0 = Builder.CreateExtractValue(Val, 1);
5393     Value *Val1 = Builder.CreateExtractValue(Val, 0);
5394     llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
5395     Val0 = Builder.CreateZExt(Val0, Int128Ty);
5396     Val1 = Builder.CreateZExt(Val1, Int128Ty);
5397
5398     Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
5399     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
5400     Val = Builder.CreateOr(Val, Val1);
5401     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
5402   } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex ||
5403              BuiltinID == AArch64::BI__builtin_arm_ldaex) {
5404     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
5405
5406     QualType Ty = E->getType();
5407     llvm::Type *RealResTy = ConvertType(Ty);
5408     llvm::Type *PtrTy = llvm::IntegerType::get(
5409         getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
5410     LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
5411
5412     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
5413                                        ? Intrinsic::aarch64_ldaxr
5414                                        : Intrinsic::aarch64_ldxr,
5415                                    PtrTy);
5416     Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
5417
5418     if (RealResTy->isPointerTy())
5419       return Builder.CreateIntToPtr(Val, RealResTy);
5420
5421     llvm::Type *IntResTy = llvm::IntegerType::get(
5422         getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
5423     Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
5424     return Builder.CreateBitCast(Val, RealResTy);
5425   }
5426
5427   if ((BuiltinID == AArch64::BI__builtin_arm_strex ||
5428        BuiltinID == AArch64::BI__builtin_arm_stlex) &&
5429       getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
5430     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
5431                                        ? Intrinsic::aarch64_stlxp
5432                                        : Intrinsic::aarch64_stxp);
5433     llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
5434
5435     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
5436     EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
5437
5438     Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy));
5439     llvm::Value *Val = Builder.CreateLoad(Tmp);
5440
5441     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
5442     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
5443     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
5444                                          Int8PtrTy);
5445     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
5446   }
5447
5448   if (BuiltinID == AArch64::BI__builtin_arm_strex ||
5449       BuiltinID == AArch64::BI__builtin_arm_stlex) {
5450     Value *StoreVal = EmitScalarExpr(E->getArg(0));
5451     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
5452
5453     QualType Ty = E->getArg(0)->getType();
5454     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
5455                                                  getContext().getTypeSize(Ty));
5456     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
5457
5458     if (StoreVal->getType()->isPointerTy())
5459       StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
5460     else {
5461       llvm::Type *IntTy = llvm::IntegerType::get(
5462           getLLVMContext(),
5463           CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
5464       StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
5465       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
5466     }
5467
5468     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
5469                                        ? Intrinsic::aarch64_stlxr
5470                                        : Intrinsic::aarch64_stxr,
5471                                    StoreAddr->getType());
5472     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
5473   }
5474
5475   if (BuiltinID == AArch64::BI__builtin_arm_clrex) {
5476     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
5477     return Builder.CreateCall(F);
5478   }
5479
5480   // CRC32
5481   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
5482   switch (BuiltinID) {
5483   case AArch64::BI__builtin_arm_crc32b:
5484     CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
5485   case AArch64::BI__builtin_arm_crc32cb:
5486     CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
5487   case AArch64::BI__builtin_arm_crc32h:
5488     CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
5489   case AArch64::BI__builtin_arm_crc32ch:
5490     CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
5491   case AArch64::BI__builtin_arm_crc32w:
5492     CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
5493   case AArch64::BI__builtin_arm_crc32cw:
5494     CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
5495   case AArch64::BI__builtin_arm_crc32d:
5496     CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
5497   case AArch64::BI__builtin_arm_crc32cd:
5498     CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
5499   }
5500
5501   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
5502     Value *Arg0 = EmitScalarExpr(E->getArg(0));
5503     Value *Arg1 = EmitScalarExpr(E->getArg(1));
5504     Function *F = CGM.getIntrinsic(CRCIntrinsicID);
5505
5506     llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
5507     Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
5508
5509     return Builder.CreateCall(F, {Arg0, Arg1});
5510   }
5511
5512   if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
5513       BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
5514       BuiltinID == AArch64::BI__builtin_arm_rsrp ||
5515       BuiltinID == AArch64::BI__builtin_arm_wsr ||
5516       BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
5517       BuiltinID == AArch64::BI__builtin_arm_wsrp) {
5518
5519     bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr ||
5520                   BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
5521                   BuiltinID == AArch64::BI__builtin_arm_rsrp;
5522
5523     bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp ||
5524                             BuiltinID == AArch64::BI__builtin_arm_wsrp;
5525
5526     bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr &&
5527                    BuiltinID != AArch64::BI__builtin_arm_wsr;
5528
5529     llvm::Type *ValueType;
5530     llvm::Type *RegisterType = Int64Ty;
5531     if (IsPointerBuiltin) {
5532       ValueType = VoidPtrTy;
5533     } else if (Is64Bit) {
5534       ValueType = Int64Ty;
5535     } else {
5536       ValueType = Int32Ty;
5537     }
5538
5539     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
5540   }
5541
5542   // Find out if any arguments are required to be integer constant
5543   // expressions.
5544   unsigned ICEArguments = 0;
5545   ASTContext::GetBuiltinTypeError Error;
5546   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5547   assert(Error == ASTContext::GE_None && "Should not codegen an error");
5548
5549   llvm::SmallVector<Value*, 4> Ops;
5550   for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
5551     if ((ICEArguments & (1 << i)) == 0) {
5552       Ops.push_back(EmitScalarExpr(E->getArg(i)));
5553     } else {
5554       // If this is required to be a constant, constant fold it so that we know
5555       // that the generated intrinsic gets a ConstantInt.
5556       llvm::APSInt Result;
5557       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
5558       assert(IsConst && "Constant arg isn't actually constant?");
5559       (void)IsConst;
5560       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
5561     }
5562   }
5563
5564   auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap);
5565   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
5566       SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
5567
5568   if (Builtin) {
5569     Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
5570     Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
5571     assert(Result && "SISD intrinsic should have been handled");
5572     return Result;
5573   }
5574
5575   llvm::APSInt Result;
5576   const Expr *Arg = E->getArg(E->getNumArgs()-1);
5577   NeonTypeFlags Type(0);
5578   if (Arg->isIntegerConstantExpr(Result, getContext()))
5579     // Determine the type of this overloaded NEON intrinsic.
5580     Type = NeonTypeFlags(Result.getZExtValue());
5581
5582   bool usgn = Type.isUnsigned();
5583   bool quad = Type.isQuad();
5584
5585   // Handle non-overloaded intrinsics first.
5586   switch (BuiltinID) {
5587   default: break;
5588   case NEON::BI__builtin_neon_vldrq_p128: {
5589     llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
5590     llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0);
5591     Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
5592     return Builder.CreateAlignedLoad(Int128Ty, Ptr,
5593                                      CharUnits::fromQuantity(16));
5594   }
5595   case NEON::BI__builtin_neon_vstrq_p128: {
5596     llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
5597     Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
5598     return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
5599   }
5600   case NEON::BI__builtin_neon_vcvts_u32_f32:
5601   case NEON::BI__builtin_neon_vcvtd_u64_f64:
5602     usgn = true;
5603     // FALL THROUGH
5604   case NEON::BI__builtin_neon_vcvts_s32_f32:
5605   case NEON::BI__builtin_neon_vcvtd_s64_f64: {
5606     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5607     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5608     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5609     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5610     Ops[0] = Builder.CreateBitCast(Ops[0], FTy);
5611     if (usgn)
5612       return Builder.CreateFPToUI(Ops[0], InTy);
5613     return Builder.CreateFPToSI(Ops[0], InTy);
5614   }
5615   case NEON::BI__builtin_neon_vcvts_f32_u32:
5616   case NEON::BI__builtin_neon_vcvtd_f64_u64:
5617     usgn = true;
5618     // FALL THROUGH
5619   case NEON::BI__builtin_neon_vcvts_f32_s32:
5620   case NEON::BI__builtin_neon_vcvtd_f64_s64: {
5621     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5622     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5623     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5624     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5625     Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
5626     if (usgn)
5627       return Builder.CreateUIToFP(Ops[0], FTy);
5628     return Builder.CreateSIToFP(Ops[0], FTy);
5629   }
5630   case NEON::BI__builtin_neon_vpaddd_s64: {
5631     llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
5632     Value *Vec = EmitScalarExpr(E->getArg(0));
5633     // The vector is v2f64, so make sure it's bitcast to that.
5634     Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
5635     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5636     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5637     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5638     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5639     // Pairwise addition of a v2f64 into a scalar f64.
5640     return Builder.CreateAdd(Op0, Op1, "vpaddd");
5641   }
5642   case NEON::BI__builtin_neon_vpaddd_f64: {
5643     llvm::Type *Ty =
5644       llvm::VectorType::get(DoubleTy, 2);
5645     Value *Vec = EmitScalarExpr(E->getArg(0));
5646     // The vector is v2f64, so make sure it's bitcast to that.
5647     Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
5648     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5649     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5650     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5651     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5652     // Pairwise addition of a v2f64 into a scalar f64.
5653     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5654   }
5655   case NEON::BI__builtin_neon_vpadds_f32: {
5656     llvm::Type *Ty =
5657       llvm::VectorType::get(FloatTy, 2);
5658     Value *Vec = EmitScalarExpr(E->getArg(0));
5659     // The vector is v2f32, so make sure it's bitcast to that.
5660     Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
5661     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5662     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5663     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5664     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5665     // Pairwise addition of a v2f32 into a scalar f32.
5666     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5667   }
5668   case NEON::BI__builtin_neon_vceqzd_s64:
5669   case NEON::BI__builtin_neon_vceqzd_f64:
5670   case NEON::BI__builtin_neon_vceqzs_f32:
5671     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5672     return EmitAArch64CompareBuiltinExpr(
5673         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5674         ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
5675   case NEON::BI__builtin_neon_vcgezd_s64:
5676   case NEON::BI__builtin_neon_vcgezd_f64:
5677   case NEON::BI__builtin_neon_vcgezs_f32:
5678     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5679     return EmitAArch64CompareBuiltinExpr(
5680         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5681         ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
5682   case NEON::BI__builtin_neon_vclezd_s64:
5683   case NEON::BI__builtin_neon_vclezd_f64:
5684   case NEON::BI__builtin_neon_vclezs_f32:
5685     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5686     return EmitAArch64CompareBuiltinExpr(
5687         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5688         ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
5689   case NEON::BI__builtin_neon_vcgtzd_s64:
5690   case NEON::BI__builtin_neon_vcgtzd_f64:
5691   case NEON::BI__builtin_neon_vcgtzs_f32:
5692     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5693     return EmitAArch64CompareBuiltinExpr(
5694         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5695         ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
5696   case NEON::BI__builtin_neon_vcltzd_s64:
5697   case NEON::BI__builtin_neon_vcltzd_f64:
5698   case NEON::BI__builtin_neon_vcltzs_f32:
5699     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5700     return EmitAArch64CompareBuiltinExpr(
5701         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5702         ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
5703
5704   case NEON::BI__builtin_neon_vceqzd_u64: {
5705     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5706     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5707     Ops[0] =
5708         Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
5709     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
5710   }
5711   case NEON::BI__builtin_neon_vceqd_f64:
5712   case NEON::BI__builtin_neon_vcled_f64:
5713   case NEON::BI__builtin_neon_vcltd_f64:
5714   case NEON::BI__builtin_neon_vcged_f64:
5715   case NEON::BI__builtin_neon_vcgtd_f64: {
5716     llvm::CmpInst::Predicate P;
5717     switch (BuiltinID) {
5718     default: llvm_unreachable("missing builtin ID in switch!");
5719     case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
5720     case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
5721     case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
5722     case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
5723     case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
5724     }
5725     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5726     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5727     Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
5728     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5729     return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
5730   }
5731   case NEON::BI__builtin_neon_vceqs_f32:
5732   case NEON::BI__builtin_neon_vcles_f32:
5733   case NEON::BI__builtin_neon_vclts_f32:
5734   case NEON::BI__builtin_neon_vcges_f32:
5735   case NEON::BI__builtin_neon_vcgts_f32: {
5736     llvm::CmpInst::Predicate P;
5737     switch (BuiltinID) {
5738     default: llvm_unreachable("missing builtin ID in switch!");
5739     case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
5740     case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
5741     case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
5742     case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
5743     case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
5744     }
5745     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5746     Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
5747     Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
5748     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5749     return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
5750   }
5751   case NEON::BI__builtin_neon_vceqd_s64:
5752   case NEON::BI__builtin_neon_vceqd_u64:
5753   case NEON::BI__builtin_neon_vcgtd_s64:
5754   case NEON::BI__builtin_neon_vcgtd_u64:
5755   case NEON::BI__builtin_neon_vcltd_s64:
5756   case NEON::BI__builtin_neon_vcltd_u64:
5757   case NEON::BI__builtin_neon_vcged_u64:
5758   case NEON::BI__builtin_neon_vcged_s64:
5759   case NEON::BI__builtin_neon_vcled_u64:
5760   case NEON::BI__builtin_neon_vcled_s64: {
5761     llvm::CmpInst::Predicate P;
5762     switch (BuiltinID) {
5763     default: llvm_unreachable("missing builtin ID in switch!");
5764     case NEON::BI__builtin_neon_vceqd_s64:
5765     case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
5766     case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
5767     case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
5768     case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
5769     case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
5770     case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
5771     case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
5772     case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
5773     case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
5774     }
5775     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5776     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5777     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5778     Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
5779     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
5780   }
5781   case NEON::BI__builtin_neon_vtstd_s64:
5782   case NEON::BI__builtin_neon_vtstd_u64: {
5783     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5784     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5785     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5786     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
5787     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
5788                                 llvm::Constant::getNullValue(Int64Ty));
5789     return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
5790   }
5791   case NEON::BI__builtin_neon_vset_lane_i8:
5792   case NEON::BI__builtin_neon_vset_lane_i16:
5793   case NEON::BI__builtin_neon_vset_lane_i32:
5794   case NEON::BI__builtin_neon_vset_lane_i64:
5795   case NEON::BI__builtin_neon_vset_lane_f32:
5796   case NEON::BI__builtin_neon_vsetq_lane_i8:
5797   case NEON::BI__builtin_neon_vsetq_lane_i16:
5798   case NEON::BI__builtin_neon_vsetq_lane_i32:
5799   case NEON::BI__builtin_neon_vsetq_lane_i64:
5800   case NEON::BI__builtin_neon_vsetq_lane_f32:
5801     Ops.push_back(EmitScalarExpr(E->getArg(2)));
5802     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5803   case NEON::BI__builtin_neon_vset_lane_f64:
5804     // The vector type needs a cast for the v1f64 variant.
5805     Ops[1] = Builder.CreateBitCast(Ops[1],
5806                                    llvm::VectorType::get(DoubleTy, 1));
5807     Ops.push_back(EmitScalarExpr(E->getArg(2)));
5808     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5809   case NEON::BI__builtin_neon_vsetq_lane_f64:
5810     // The vector type needs a cast for the v2f64 variant.
5811     Ops[1] = Builder.CreateBitCast(Ops[1],
5812         llvm::VectorType::get(DoubleTy, 2));
5813     Ops.push_back(EmitScalarExpr(E->getArg(2)));
5814     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5815
5816   case NEON::BI__builtin_neon_vget_lane_i8:
5817   case NEON::BI__builtin_neon_vdupb_lane_i8:
5818     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8));
5819     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5820                                         "vget_lane");
5821   case NEON::BI__builtin_neon_vgetq_lane_i8:
5822   case NEON::BI__builtin_neon_vdupb_laneq_i8:
5823     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16));
5824     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5825                                         "vgetq_lane");
5826   case NEON::BI__builtin_neon_vget_lane_i16:
5827   case NEON::BI__builtin_neon_vduph_lane_i16:
5828     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4));
5829     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5830                                         "vget_lane");
5831   case NEON::BI__builtin_neon_vgetq_lane_i16:
5832   case NEON::BI__builtin_neon_vduph_laneq_i16:
5833     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8));
5834     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5835                                         "vgetq_lane");
5836   case NEON::BI__builtin_neon_vget_lane_i32:
5837   case NEON::BI__builtin_neon_vdups_lane_i32:
5838     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2));
5839     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5840                                         "vget_lane");
5841   case NEON::BI__builtin_neon_vdups_lane_f32:
5842     Ops[0] = Builder.CreateBitCast(Ops[0],
5843         llvm::VectorType::get(FloatTy, 2));
5844     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5845                                         "vdups_lane");
5846   case NEON::BI__builtin_neon_vgetq_lane_i32:
5847   case NEON::BI__builtin_neon_vdups_laneq_i32:
5848     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
5849     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5850                                         "vgetq_lane");
5851   case NEON::BI__builtin_neon_vget_lane_i64:
5852   case NEON::BI__builtin_neon_vdupd_lane_i64:
5853     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1));
5854     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5855                                         "vget_lane");
5856   case NEON::BI__builtin_neon_vdupd_lane_f64:
5857     Ops[0] = Builder.CreateBitCast(Ops[0],
5858         llvm::VectorType::get(DoubleTy, 1));
5859     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5860                                         "vdupd_lane");
5861   case NEON::BI__builtin_neon_vgetq_lane_i64:
5862   case NEON::BI__builtin_neon_vdupd_laneq_i64:
5863     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
5864     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5865                                         "vgetq_lane");
5866   case NEON::BI__builtin_neon_vget_lane_f32:
5867     Ops[0] = Builder.CreateBitCast(Ops[0],
5868         llvm::VectorType::get(FloatTy, 2));
5869     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5870                                         "vget_lane");
5871   case NEON::BI__builtin_neon_vget_lane_f64:
5872     Ops[0] = Builder.CreateBitCast(Ops[0],
5873         llvm::VectorType::get(DoubleTy, 1));
5874     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5875                                         "vget_lane");
5876   case NEON::BI__builtin_neon_vgetq_lane_f32:
5877   case NEON::BI__builtin_neon_vdups_laneq_f32:
5878     Ops[0] = Builder.CreateBitCast(Ops[0],
5879         llvm::VectorType::get(FloatTy, 4));
5880     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5881                                         "vgetq_lane");
5882   case NEON::BI__builtin_neon_vgetq_lane_f64:
5883   case NEON::BI__builtin_neon_vdupd_laneq_f64:
5884     Ops[0] = Builder.CreateBitCast(Ops[0],
5885         llvm::VectorType::get(DoubleTy, 2));
5886     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5887                                         "vgetq_lane");
5888   case NEON::BI__builtin_neon_vaddd_s64:
5889   case NEON::BI__builtin_neon_vaddd_u64:
5890     return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
5891   case NEON::BI__builtin_neon_vsubd_s64:
5892   case NEON::BI__builtin_neon_vsubd_u64:
5893     return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
5894   case NEON::BI__builtin_neon_vqdmlalh_s16:
5895   case NEON::BI__builtin_neon_vqdmlslh_s16: {
5896     SmallVector<Value *, 2> ProductOps;
5897     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5898     ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
5899     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
5900     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5901                           ProductOps, "vqdmlXl");
5902     Constant *CI = ConstantInt::get(SizeTy, 0);
5903     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5904
5905     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
5906                                         ? Intrinsic::aarch64_neon_sqadd
5907                                         : Intrinsic::aarch64_neon_sqsub;
5908     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
5909   }
5910   case NEON::BI__builtin_neon_vqshlud_n_s64: {
5911     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5912     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5913     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
5914                         Ops, "vqshlu_n");
5915   }
5916   case NEON::BI__builtin_neon_vqshld_n_u64:
5917   case NEON::BI__builtin_neon_vqshld_n_s64: {
5918     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
5919                                    ? Intrinsic::aarch64_neon_uqshl
5920                                    : Intrinsic::aarch64_neon_sqshl;
5921     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5922     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5923     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
5924   }
5925   case NEON::BI__builtin_neon_vrshrd_n_u64:
5926   case NEON::BI__builtin_neon_vrshrd_n_s64: {
5927     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
5928                                    ? Intrinsic::aarch64_neon_urshl
5929                                    : Intrinsic::aarch64_neon_srshl;
5930     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5931     int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
5932     Ops[1] = ConstantInt::get(Int64Ty, -SV);
5933     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
5934   }
5935   case NEON::BI__builtin_neon_vrsrad_n_u64:
5936   case NEON::BI__builtin_neon_vrsrad_n_s64: {
5937     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
5938                                    ? Intrinsic::aarch64_neon_urshl
5939                                    : Intrinsic::aarch64_neon_srshl;
5940     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5941     Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
5942     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
5943                                 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
5944     return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
5945   }
5946   case NEON::BI__builtin_neon_vshld_n_s64:
5947   case NEON::BI__builtin_neon_vshld_n_u64: {
5948     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5949     return Builder.CreateShl(
5950         Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
5951   }
5952   case NEON::BI__builtin_neon_vshrd_n_s64: {
5953     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5954     return Builder.CreateAShr(
5955         Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
5956                                                    Amt->getZExtValue())),
5957         "shrd_n");
5958   }
5959   case NEON::BI__builtin_neon_vshrd_n_u64: {
5960     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5961     uint64_t ShiftAmt = Amt->getZExtValue();
5962     // Right-shifting an unsigned value by its size yields 0.
5963     if (ShiftAmt == 64)
5964       return ConstantInt::get(Int64Ty, 0);
5965     return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
5966                               "shrd_n");
5967   }
5968   case NEON::BI__builtin_neon_vsrad_n_s64: {
5969     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
5970     Ops[1] = Builder.CreateAShr(
5971         Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
5972                                                    Amt->getZExtValue())),
5973         "shrd_n");
5974     return Builder.CreateAdd(Ops[0], Ops[1]);
5975   }
5976   case NEON::BI__builtin_neon_vsrad_n_u64: {
5977     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
5978     uint64_t ShiftAmt = Amt->getZExtValue();
5979     // Right-shifting an unsigned value by its size yields 0.
5980     // As Op + 0 = Op, return Ops[0] directly.
5981     if (ShiftAmt == 64)
5982       return Ops[0];
5983     Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
5984                                 "shrd_n");
5985     return Builder.CreateAdd(Ops[0], Ops[1]);
5986   }
5987   case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
5988   case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
5989   case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
5990   case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
5991     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
5992                                           "lane");
5993     SmallVector<Value *, 2> ProductOps;
5994     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5995     ProductOps.push_back(vectorWrapScalar16(Ops[2]));
5996     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
5997     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5998                           ProductOps, "vqdmlXl");
5999     Constant *CI = ConstantInt::get(SizeTy, 0);
6000     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
6001     Ops.pop_back();
6002
6003     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
6004                        BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
6005                           ? Intrinsic::aarch64_neon_sqadd
6006                           : Intrinsic::aarch64_neon_sqsub;
6007     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
6008   }
6009   case NEON::BI__builtin_neon_vqdmlals_s32:
6010   case NEON::BI__builtin_neon_vqdmlsls_s32: {
6011     SmallVector<Value *, 2> ProductOps;
6012     ProductOps.push_back(Ops[1]);
6013     ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
6014     Ops[1] =
6015         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6016                      ProductOps, "vqdmlXl");
6017
6018     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
6019                                         ? Intrinsic::aarch64_neon_sqadd
6020                                         : Intrinsic::aarch64_neon_sqsub;
6021     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
6022   }
6023   case NEON::BI__builtin_neon_vqdmlals_lane_s32:
6024   case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
6025   case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
6026   case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
6027     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
6028                                           "lane");
6029     SmallVector<Value *, 2> ProductOps;
6030     ProductOps.push_back(Ops[1]);
6031     ProductOps.push_back(Ops[2]);
6032     Ops[1] =
6033         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6034                      ProductOps, "vqdmlXl");
6035     Ops.pop_back();
6036
6037     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
6038                        BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
6039                           ? Intrinsic::aarch64_neon_sqadd
6040                           : Intrinsic::aarch64_neon_sqsub;
6041     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
6042   }
6043   }
6044
6045   llvm::VectorType *VTy = GetNeonType(this, Type);
6046   llvm::Type *Ty = VTy;
6047   if (!Ty)
6048     return nullptr;
6049
6050   // Not all intrinsics handled by the common case work for AArch64 yet, so only
6051   // defer to common code if it's been added to our special map.
6052   Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
6053                                    AArch64SIMDIntrinsicsProvenSorted);
6054
6055   if (Builtin)
6056     return EmitCommonNeonBuiltinExpr(
6057         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
6058         Builtin->NameHint, Builtin->TypeModifier, E, Ops,
6059         /*never use addresses*/ Address::invalid(), Address::invalid());
6060
6061   if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops))
6062     return V;
6063
6064   unsigned Int;
6065   switch (BuiltinID) {
6066   default: return nullptr;
6067   case NEON::BI__builtin_neon_vbsl_v:
6068   case NEON::BI__builtin_neon_vbslq_v: {
6069     llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
6070     Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
6071     Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
6072     Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
6073
6074     Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
6075     Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
6076     Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
6077     return Builder.CreateBitCast(Ops[0], Ty);
6078   }
6079   case NEON::BI__builtin_neon_vfma_lane_v:
6080   case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
6081     // The ARM builtins (and instructions) have the addend as the first
6082     // operand, but the 'fma' intrinsics have it last. Swap it around here.
6083     Value *Addend = Ops[0];
6084     Value *Multiplicand = Ops[1];
6085     Value *LaneSource = Ops[2];
6086     Ops[0] = Multiplicand;
6087     Ops[1] = LaneSource;
6088     Ops[2] = Addend;
6089
6090     // Now adjust things to handle the lane access.
6091     llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ?
6092       llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) :
6093       VTy;
6094     llvm::Constant *cst = cast<Constant>(Ops[3]);
6095     Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst);
6096     Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
6097     Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
6098
6099     Ops.pop_back();
6100     Int = Intrinsic::fma;
6101     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
6102   }
6103   case NEON::BI__builtin_neon_vfma_laneq_v: {
6104     llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
6105     // v1f64 fma should be mapped to Neon scalar f64 fma
6106     if (VTy && VTy->getElementType() == DoubleTy) {
6107       Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6108       Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
6109       llvm::Type *VTy = GetNeonType(this,
6110         NeonTypeFlags(NeonTypeFlags::Float64, false, true));
6111       Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
6112       Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6113       Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
6114       Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
6115       return Builder.CreateBitCast(Result, Ty);
6116     }
6117     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6118     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6119     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6120
6121     llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
6122                                             VTy->getNumElements() * 2);
6123     Ops[2] = Builder.CreateBitCast(Ops[2], STy);
6124     Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
6125                                                cast<ConstantInt>(Ops[3]));
6126     Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
6127
6128     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
6129   }
6130   case NEON::BI__builtin_neon_vfmaq_laneq_v: {
6131     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6132     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6133     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6134
6135     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6136     Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
6137     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
6138   }
6139   case NEON::BI__builtin_neon_vfmas_lane_f32:
6140   case NEON::BI__builtin_neon_vfmas_laneq_f32:
6141   case NEON::BI__builtin_neon_vfmad_lane_f64:
6142   case NEON::BI__builtin_neon_vfmad_laneq_f64: {
6143     Ops.push_back(EmitScalarExpr(E->getArg(3)));
6144     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
6145     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6146     Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6147     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
6148   }
6149   case NEON::BI__builtin_neon_vmull_v:
6150     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6151     Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
6152     if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
6153     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
6154   case NEON::BI__builtin_neon_vmax_v:
6155   case NEON::BI__builtin_neon_vmaxq_v:
6156     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6157     Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
6158     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
6159     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
6160   case NEON::BI__builtin_neon_vmin_v:
6161   case NEON::BI__builtin_neon_vminq_v:
6162     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6163     Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
6164     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
6165     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
6166   case NEON::BI__builtin_neon_vabd_v:
6167   case NEON::BI__builtin_neon_vabdq_v:
6168     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6169     Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
6170     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
6171     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
6172   case NEON::BI__builtin_neon_vpadal_v:
6173   case NEON::BI__builtin_neon_vpadalq_v: {
6174     unsigned ArgElts = VTy->getNumElements();
6175     llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
6176     unsigned BitWidth = EltTy->getBitWidth();
6177     llvm::Type *ArgTy = llvm::VectorType::get(
6178         llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts);
6179     llvm::Type* Tys[2] = { VTy, ArgTy };
6180     Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
6181     SmallVector<llvm::Value*, 1> TmpOps;
6182     TmpOps.push_back(Ops[1]);
6183     Function *F = CGM.getIntrinsic(Int, Tys);
6184     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
6185     llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
6186     return Builder.CreateAdd(tmp, addend);
6187   }
6188   case NEON::BI__builtin_neon_vpmin_v:
6189   case NEON::BI__builtin_neon_vpminq_v:
6190     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6191     Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
6192     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
6193     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
6194   case NEON::BI__builtin_neon_vpmax_v:
6195   case NEON::BI__builtin_neon_vpmaxq_v:
6196     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6197     Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
6198     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
6199     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
6200   case NEON::BI__builtin_neon_vminnm_v:
6201   case NEON::BI__builtin_neon_vminnmq_v:
6202     Int = Intrinsic::aarch64_neon_fminnm;
6203     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
6204   case NEON::BI__builtin_neon_vmaxnm_v:
6205   case NEON::BI__builtin_neon_vmaxnmq_v:
6206     Int = Intrinsic::aarch64_neon_fmaxnm;
6207     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
6208   case NEON::BI__builtin_neon_vrecpss_f32: {
6209     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6210     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
6211                         Ops, "vrecps");
6212   }
6213   case NEON::BI__builtin_neon_vrecpsd_f64: {
6214     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6215     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
6216                         Ops, "vrecps");
6217   }
6218   case NEON::BI__builtin_neon_vqshrun_n_v:
6219     Int = Intrinsic::aarch64_neon_sqshrun;
6220     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
6221   case NEON::BI__builtin_neon_vqrshrun_n_v:
6222     Int = Intrinsic::aarch64_neon_sqrshrun;
6223     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
6224   case NEON::BI__builtin_neon_vqshrn_n_v:
6225     Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
6226     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
6227   case NEON::BI__builtin_neon_vrshrn_n_v:
6228     Int = Intrinsic::aarch64_neon_rshrn;
6229     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
6230   case NEON::BI__builtin_neon_vqrshrn_n_v:
6231     Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
6232     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
6233   case NEON::BI__builtin_neon_vrnda_v:
6234   case NEON::BI__builtin_neon_vrndaq_v: {
6235     Int = Intrinsic::round;
6236     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
6237   }
6238   case NEON::BI__builtin_neon_vrndi_v:
6239   case NEON::BI__builtin_neon_vrndiq_v: {
6240     Int = Intrinsic::nearbyint;
6241     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi");
6242   }
6243   case NEON::BI__builtin_neon_vrndm_v:
6244   case NEON::BI__builtin_neon_vrndmq_v: {
6245     Int = Intrinsic::floor;
6246     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
6247   }
6248   case NEON::BI__builtin_neon_vrndn_v:
6249   case NEON::BI__builtin_neon_vrndnq_v: {
6250     Int = Intrinsic::aarch64_neon_frintn;
6251     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
6252   }
6253   case NEON::BI__builtin_neon_vrndp_v:
6254   case NEON::BI__builtin_neon_vrndpq_v: {
6255     Int = Intrinsic::ceil;
6256     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
6257   }
6258   case NEON::BI__builtin_neon_vrndx_v:
6259   case NEON::BI__builtin_neon_vrndxq_v: {
6260     Int = Intrinsic::rint;
6261     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
6262   }
6263   case NEON::BI__builtin_neon_vrnd_v:
6264   case NEON::BI__builtin_neon_vrndq_v: {
6265     Int = Intrinsic::trunc;
6266     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
6267   }
6268   case NEON::BI__builtin_neon_vceqz_v:
6269   case NEON::BI__builtin_neon_vceqzq_v:
6270     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
6271                                          ICmpInst::ICMP_EQ, "vceqz");
6272   case NEON::BI__builtin_neon_vcgez_v:
6273   case NEON::BI__builtin_neon_vcgezq_v:
6274     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
6275                                          ICmpInst::ICMP_SGE, "vcgez");
6276   case NEON::BI__builtin_neon_vclez_v:
6277   case NEON::BI__builtin_neon_vclezq_v:
6278     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
6279                                          ICmpInst::ICMP_SLE, "vclez");
6280   case NEON::BI__builtin_neon_vcgtz_v:
6281   case NEON::BI__builtin_neon_vcgtzq_v:
6282     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
6283                                          ICmpInst::ICMP_SGT, "vcgtz");
6284   case NEON::BI__builtin_neon_vcltz_v:
6285   case NEON::BI__builtin_neon_vcltzq_v:
6286     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
6287                                          ICmpInst::ICMP_SLT, "vcltz");
6288   case NEON::BI__builtin_neon_vcvt_f64_v:
6289   case NEON::BI__builtin_neon_vcvtq_f64_v:
6290     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6291     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
6292     return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
6293                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
6294   case NEON::BI__builtin_neon_vcvt_f64_f32: {
6295     assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
6296            "unexpected vcvt_f64_f32 builtin");
6297     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
6298     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6299
6300     return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
6301   }
6302   case NEON::BI__builtin_neon_vcvt_f32_f64: {
6303     assert(Type.getEltType() == NeonTypeFlags::Float32 &&
6304            "unexpected vcvt_f32_f64 builtin");
6305     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
6306     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6307
6308     return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
6309   }
6310   case NEON::BI__builtin_neon_vcvt_s32_v:
6311   case NEON::BI__builtin_neon_vcvt_u32_v:
6312   case NEON::BI__builtin_neon_vcvt_s64_v:
6313   case NEON::BI__builtin_neon_vcvt_u64_v:
6314   case NEON::BI__builtin_neon_vcvtq_s32_v:
6315   case NEON::BI__builtin_neon_vcvtq_u32_v:
6316   case NEON::BI__builtin_neon_vcvtq_s64_v:
6317   case NEON::BI__builtin_neon_vcvtq_u64_v: {
6318     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
6319     if (usgn)
6320       return Builder.CreateFPToUI(Ops[0], Ty);
6321     return Builder.CreateFPToSI(Ops[0], Ty);
6322   }
6323   case NEON::BI__builtin_neon_vcvta_s32_v:
6324   case NEON::BI__builtin_neon_vcvtaq_s32_v:
6325   case NEON::BI__builtin_neon_vcvta_u32_v:
6326   case NEON::BI__builtin_neon_vcvtaq_u32_v:
6327   case NEON::BI__builtin_neon_vcvta_s64_v:
6328   case NEON::BI__builtin_neon_vcvtaq_s64_v:
6329   case NEON::BI__builtin_neon_vcvta_u64_v:
6330   case NEON::BI__builtin_neon_vcvtaq_u64_v: {
6331     Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
6332     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6333     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
6334   }
6335   case NEON::BI__builtin_neon_vcvtm_s32_v:
6336   case NEON::BI__builtin_neon_vcvtmq_s32_v:
6337   case NEON::BI__builtin_neon_vcvtm_u32_v:
6338   case NEON::BI__builtin_neon_vcvtmq_u32_v:
6339   case NEON::BI__builtin_neon_vcvtm_s64_v:
6340   case NEON::BI__builtin_neon_vcvtmq_s64_v:
6341   case NEON::BI__builtin_neon_vcvtm_u64_v:
6342   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
6343     Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
6344     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6345     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
6346   }
6347   case NEON::BI__builtin_neon_vcvtn_s32_v:
6348   case NEON::BI__builtin_neon_vcvtnq_s32_v:
6349   case NEON::BI__builtin_neon_vcvtn_u32_v:
6350   case NEON::BI__builtin_neon_vcvtnq_u32_v:
6351   case NEON::BI__builtin_neon_vcvtn_s64_v:
6352   case NEON::BI__builtin_neon_vcvtnq_s64_v:
6353   case NEON::BI__builtin_neon_vcvtn_u64_v:
6354   case NEON::BI__builtin_neon_vcvtnq_u64_v: {
6355     Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
6356     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6357     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
6358   }
6359   case NEON::BI__builtin_neon_vcvtp_s32_v:
6360   case NEON::BI__builtin_neon_vcvtpq_s32_v:
6361   case NEON::BI__builtin_neon_vcvtp_u32_v:
6362   case NEON::BI__builtin_neon_vcvtpq_u32_v:
6363   case NEON::BI__builtin_neon_vcvtp_s64_v:
6364   case NEON::BI__builtin_neon_vcvtpq_s64_v:
6365   case NEON::BI__builtin_neon_vcvtp_u64_v:
6366   case NEON::BI__builtin_neon_vcvtpq_u64_v: {
6367     Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
6368     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6369     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
6370   }
6371   case NEON::BI__builtin_neon_vmulx_v:
6372   case NEON::BI__builtin_neon_vmulxq_v: {
6373     Int = Intrinsic::aarch64_neon_fmulx;
6374     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
6375   }
6376   case NEON::BI__builtin_neon_vmul_lane_v:
6377   case NEON::BI__builtin_neon_vmul_laneq_v: {
6378     // v1f64 vmul_lane should be mapped to Neon scalar mul lane
6379     bool Quad = false;
6380     if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
6381       Quad = true;
6382     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6383     llvm::Type *VTy = GetNeonType(this,
6384       NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
6385     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6386     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
6387     Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
6388     return Builder.CreateBitCast(Result, Ty);
6389   }
6390   case NEON::BI__builtin_neon_vnegd_s64:
6391     return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
6392   case NEON::BI__builtin_neon_vpmaxnm_v:
6393   case NEON::BI__builtin_neon_vpmaxnmq_v: {
6394     Int = Intrinsic::aarch64_neon_fmaxnmp;
6395     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
6396   }
6397   case NEON::BI__builtin_neon_vpminnm_v:
6398   case NEON::BI__builtin_neon_vpminnmq_v: {
6399     Int = Intrinsic::aarch64_neon_fminnmp;
6400     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
6401   }
6402   case NEON::BI__builtin_neon_vsqrt_v:
6403   case NEON::BI__builtin_neon_vsqrtq_v: {
6404     Int = Intrinsic::sqrt;
6405     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6406     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
6407   }
6408   case NEON::BI__builtin_neon_vrbit_v:
6409   case NEON::BI__builtin_neon_vrbitq_v: {
6410     Int = Intrinsic::aarch64_neon_rbit;
6411     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
6412   }
6413   case NEON::BI__builtin_neon_vaddv_u8:
6414     // FIXME: These are handled by the AArch64 scalar code.
6415     usgn = true;
6416     // FALLTHROUGH
6417   case NEON::BI__builtin_neon_vaddv_s8: {
6418     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6419     Ty = Int32Ty;
6420     VTy = llvm::VectorType::get(Int8Ty, 8);
6421     llvm::Type *Tys[2] = { Ty, VTy };
6422     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6423     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6424     return Builder.CreateTrunc(Ops[0], Int8Ty);
6425   }
6426   case NEON::BI__builtin_neon_vaddv_u16:
6427     usgn = true;
6428     // FALLTHROUGH
6429   case NEON::BI__builtin_neon_vaddv_s16: {
6430     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6431     Ty = Int32Ty;
6432     VTy = llvm::VectorType::get(Int16Ty, 4);
6433     llvm::Type *Tys[2] = { Ty, VTy };
6434     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6435     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6436     return Builder.CreateTrunc(Ops[0], Int16Ty);
6437   }
6438   case NEON::BI__builtin_neon_vaddvq_u8:
6439     usgn = true;
6440     // FALLTHROUGH
6441   case NEON::BI__builtin_neon_vaddvq_s8: {
6442     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6443     Ty = Int32Ty;
6444     VTy = llvm::VectorType::get(Int8Ty, 16);
6445     llvm::Type *Tys[2] = { Ty, VTy };
6446     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6447     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6448     return Builder.CreateTrunc(Ops[0], Int8Ty);
6449   }
6450   case NEON::BI__builtin_neon_vaddvq_u16:
6451     usgn = true;
6452     // FALLTHROUGH
6453   case NEON::BI__builtin_neon_vaddvq_s16: {
6454     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6455     Ty = Int32Ty;
6456     VTy = llvm::VectorType::get(Int16Ty, 8);
6457     llvm::Type *Tys[2] = { Ty, VTy };
6458     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6459     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6460     return Builder.CreateTrunc(Ops[0], Int16Ty);
6461   }
6462   case NEON::BI__builtin_neon_vmaxv_u8: {
6463     Int = Intrinsic::aarch64_neon_umaxv;
6464     Ty = Int32Ty;
6465     VTy = llvm::VectorType::get(Int8Ty, 8);
6466     llvm::Type *Tys[2] = { Ty, VTy };
6467     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6468     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6469     return Builder.CreateTrunc(Ops[0], Int8Ty);
6470   }
6471   case NEON::BI__builtin_neon_vmaxv_u16: {
6472     Int = Intrinsic::aarch64_neon_umaxv;
6473     Ty = Int32Ty;
6474     VTy = llvm::VectorType::get(Int16Ty, 4);
6475     llvm::Type *Tys[2] = { Ty, VTy };
6476     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6477     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6478     return Builder.CreateTrunc(Ops[0], Int16Ty);
6479   }
6480   case NEON::BI__builtin_neon_vmaxvq_u8: {
6481     Int = Intrinsic::aarch64_neon_umaxv;
6482     Ty = Int32Ty;
6483     VTy = llvm::VectorType::get(Int8Ty, 16);
6484     llvm::Type *Tys[2] = { Ty, VTy };
6485     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6486     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6487     return Builder.CreateTrunc(Ops[0], Int8Ty);
6488   }
6489   case NEON::BI__builtin_neon_vmaxvq_u16: {
6490     Int = Intrinsic::aarch64_neon_umaxv;
6491     Ty = Int32Ty;
6492     VTy = llvm::VectorType::get(Int16Ty, 8);
6493     llvm::Type *Tys[2] = { Ty, VTy };
6494     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6495     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6496     return Builder.CreateTrunc(Ops[0], Int16Ty);
6497   }
6498   case NEON::BI__builtin_neon_vmaxv_s8: {
6499     Int = Intrinsic::aarch64_neon_smaxv;
6500     Ty = Int32Ty;
6501     VTy = llvm::VectorType::get(Int8Ty, 8);
6502     llvm::Type *Tys[2] = { Ty, VTy };
6503     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6504     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6505     return Builder.CreateTrunc(Ops[0], Int8Ty);
6506   }
6507   case NEON::BI__builtin_neon_vmaxv_s16: {
6508     Int = Intrinsic::aarch64_neon_smaxv;
6509     Ty = Int32Ty;
6510     VTy = llvm::VectorType::get(Int16Ty, 4);
6511     llvm::Type *Tys[2] = { Ty, VTy };
6512     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6513     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6514     return Builder.CreateTrunc(Ops[0], Int16Ty);
6515   }
6516   case NEON::BI__builtin_neon_vmaxvq_s8: {
6517     Int = Intrinsic::aarch64_neon_smaxv;
6518     Ty = Int32Ty;
6519     VTy = llvm::VectorType::get(Int8Ty, 16);
6520     llvm::Type *Tys[2] = { Ty, VTy };
6521     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6522     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6523     return Builder.CreateTrunc(Ops[0], Int8Ty);
6524   }
6525   case NEON::BI__builtin_neon_vmaxvq_s16: {
6526     Int = Intrinsic::aarch64_neon_smaxv;
6527     Ty = Int32Ty;
6528     VTy = llvm::VectorType::get(Int16Ty, 8);
6529     llvm::Type *Tys[2] = { Ty, VTy };
6530     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6531     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6532     return Builder.CreateTrunc(Ops[0], Int16Ty);
6533   }
6534   case NEON::BI__builtin_neon_vminv_u8: {
6535     Int = Intrinsic::aarch64_neon_uminv;
6536     Ty = Int32Ty;
6537     VTy = llvm::VectorType::get(Int8Ty, 8);
6538     llvm::Type *Tys[2] = { Ty, VTy };
6539     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6540     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6541     return Builder.CreateTrunc(Ops[0], Int8Ty);
6542   }
6543   case NEON::BI__builtin_neon_vminv_u16: {
6544     Int = Intrinsic::aarch64_neon_uminv;
6545     Ty = Int32Ty;
6546     VTy = llvm::VectorType::get(Int16Ty, 4);
6547     llvm::Type *Tys[2] = { Ty, VTy };
6548     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6549     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6550     return Builder.CreateTrunc(Ops[0], Int16Ty);
6551   }
6552   case NEON::BI__builtin_neon_vminvq_u8: {
6553     Int = Intrinsic::aarch64_neon_uminv;
6554     Ty = Int32Ty;
6555     VTy = llvm::VectorType::get(Int8Ty, 16);
6556     llvm::Type *Tys[2] = { Ty, VTy };
6557     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6558     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6559     return Builder.CreateTrunc(Ops[0], Int8Ty);
6560   }
6561   case NEON::BI__builtin_neon_vminvq_u16: {
6562     Int = Intrinsic::aarch64_neon_uminv;
6563     Ty = Int32Ty;
6564     VTy = llvm::VectorType::get(Int16Ty, 8);
6565     llvm::Type *Tys[2] = { Ty, VTy };
6566     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6567     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6568     return Builder.CreateTrunc(Ops[0], Int16Ty);
6569   }
6570   case NEON::BI__builtin_neon_vminv_s8: {
6571     Int = Intrinsic::aarch64_neon_sminv;
6572     Ty = Int32Ty;
6573     VTy = llvm::VectorType::get(Int8Ty, 8);
6574     llvm::Type *Tys[2] = { Ty, VTy };
6575     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6576     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6577     return Builder.CreateTrunc(Ops[0], Int8Ty);
6578   }
6579   case NEON::BI__builtin_neon_vminv_s16: {
6580     Int = Intrinsic::aarch64_neon_sminv;
6581     Ty = Int32Ty;
6582     VTy = llvm::VectorType::get(Int16Ty, 4);
6583     llvm::Type *Tys[2] = { Ty, VTy };
6584     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6585     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6586     return Builder.CreateTrunc(Ops[0], Int16Ty);
6587   }
6588   case NEON::BI__builtin_neon_vminvq_s8: {
6589     Int = Intrinsic::aarch64_neon_sminv;
6590     Ty = Int32Ty;
6591     VTy = llvm::VectorType::get(Int8Ty, 16);
6592     llvm::Type *Tys[2] = { Ty, VTy };
6593     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6594     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6595     return Builder.CreateTrunc(Ops[0], Int8Ty);
6596   }
6597   case NEON::BI__builtin_neon_vminvq_s16: {
6598     Int = Intrinsic::aarch64_neon_sminv;
6599     Ty = Int32Ty;
6600     VTy = llvm::VectorType::get(Int16Ty, 8);
6601     llvm::Type *Tys[2] = { Ty, VTy };
6602     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6603     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6604     return Builder.CreateTrunc(Ops[0], Int16Ty);
6605   }
6606   case NEON::BI__builtin_neon_vmul_n_f64: {
6607     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6608     Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
6609     return Builder.CreateFMul(Ops[0], RHS);
6610   }
6611   case NEON::BI__builtin_neon_vaddlv_u8: {
6612     Int = Intrinsic::aarch64_neon_uaddlv;
6613     Ty = Int32Ty;
6614     VTy = llvm::VectorType::get(Int8Ty, 8);
6615     llvm::Type *Tys[2] = { Ty, VTy };
6616     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6617     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6618     return Builder.CreateTrunc(Ops[0], Int16Ty);
6619   }
6620   case NEON::BI__builtin_neon_vaddlv_u16: {
6621     Int = Intrinsic::aarch64_neon_uaddlv;
6622     Ty = Int32Ty;
6623     VTy = llvm::VectorType::get(Int16Ty, 4);
6624     llvm::Type *Tys[2] = { Ty, VTy };
6625     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6626     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6627   }
6628   case NEON::BI__builtin_neon_vaddlvq_u8: {
6629     Int = Intrinsic::aarch64_neon_uaddlv;
6630     Ty = Int32Ty;
6631     VTy = llvm::VectorType::get(Int8Ty, 16);
6632     llvm::Type *Tys[2] = { Ty, VTy };
6633     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6634     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6635     return Builder.CreateTrunc(Ops[0], Int16Ty);
6636   }
6637   case NEON::BI__builtin_neon_vaddlvq_u16: {
6638     Int = Intrinsic::aarch64_neon_uaddlv;
6639     Ty = Int32Ty;
6640     VTy = llvm::VectorType::get(Int16Ty, 8);
6641     llvm::Type *Tys[2] = { Ty, VTy };
6642     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6643     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6644   }
6645   case NEON::BI__builtin_neon_vaddlv_s8: {
6646     Int = Intrinsic::aarch64_neon_saddlv;
6647     Ty = Int32Ty;
6648     VTy = llvm::VectorType::get(Int8Ty, 8);
6649     llvm::Type *Tys[2] = { Ty, VTy };
6650     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6651     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6652     return Builder.CreateTrunc(Ops[0], Int16Ty);
6653   }
6654   case NEON::BI__builtin_neon_vaddlv_s16: {
6655     Int = Intrinsic::aarch64_neon_saddlv;
6656     Ty = Int32Ty;
6657     VTy = llvm::VectorType::get(Int16Ty, 4);
6658     llvm::Type *Tys[2] = { Ty, VTy };
6659     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6660     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6661   }
6662   case NEON::BI__builtin_neon_vaddlvq_s8: {
6663     Int = Intrinsic::aarch64_neon_saddlv;
6664     Ty = Int32Ty;
6665     VTy = llvm::VectorType::get(Int8Ty, 16);
6666     llvm::Type *Tys[2] = { Ty, VTy };
6667     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6668     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6669     return Builder.CreateTrunc(Ops[0], Int16Ty);
6670   }
6671   case NEON::BI__builtin_neon_vaddlvq_s16: {
6672     Int = Intrinsic::aarch64_neon_saddlv;
6673     Ty = Int32Ty;
6674     VTy = llvm::VectorType::get(Int16Ty, 8);
6675     llvm::Type *Tys[2] = { Ty, VTy };
6676     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6677     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6678   }
6679   case NEON::BI__builtin_neon_vsri_n_v:
6680   case NEON::BI__builtin_neon_vsriq_n_v: {
6681     Int = Intrinsic::aarch64_neon_vsri;
6682     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6683     return EmitNeonCall(Intrin, Ops, "vsri_n");
6684   }
6685   case NEON::BI__builtin_neon_vsli_n_v:
6686   case NEON::BI__builtin_neon_vsliq_n_v: {
6687     Int = Intrinsic::aarch64_neon_vsli;
6688     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6689     return EmitNeonCall(Intrin, Ops, "vsli_n");
6690   }
6691   case NEON::BI__builtin_neon_vsra_n_v:
6692   case NEON::BI__builtin_neon_vsraq_n_v:
6693     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6694     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
6695     return Builder.CreateAdd(Ops[0], Ops[1]);
6696   case NEON::BI__builtin_neon_vrsra_n_v:
6697   case NEON::BI__builtin_neon_vrsraq_n_v: {
6698     Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
6699     SmallVector<llvm::Value*,2> TmpOps;
6700     TmpOps.push_back(Ops[1]);
6701     TmpOps.push_back(Ops[2]);
6702     Function* F = CGM.getIntrinsic(Int, Ty);
6703     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
6704     Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
6705     return Builder.CreateAdd(Ops[0], tmp);
6706   }
6707     // FIXME: Sharing loads & stores with 32-bit is complicated by the absence
6708     // of an Align parameter here.
6709   case NEON::BI__builtin_neon_vld1_x2_v:
6710   case NEON::BI__builtin_neon_vld1q_x2_v:
6711   case NEON::BI__builtin_neon_vld1_x3_v:
6712   case NEON::BI__builtin_neon_vld1q_x3_v:
6713   case NEON::BI__builtin_neon_vld1_x4_v:
6714   case NEON::BI__builtin_neon_vld1q_x4_v: {
6715     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
6716     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6717     llvm::Type *Tys[2] = { VTy, PTy };
6718     unsigned Int;
6719     switch (BuiltinID) {
6720     case NEON::BI__builtin_neon_vld1_x2_v:
6721     case NEON::BI__builtin_neon_vld1q_x2_v:
6722       Int = Intrinsic::aarch64_neon_ld1x2;
6723       break;
6724     case NEON::BI__builtin_neon_vld1_x3_v:
6725     case NEON::BI__builtin_neon_vld1q_x3_v:
6726       Int = Intrinsic::aarch64_neon_ld1x3;
6727       break;
6728     case NEON::BI__builtin_neon_vld1_x4_v:
6729     case NEON::BI__builtin_neon_vld1q_x4_v:
6730       Int = Intrinsic::aarch64_neon_ld1x4;
6731       break;
6732     }
6733     Function *F = CGM.getIntrinsic(Int, Tys);
6734     Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
6735     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6736     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6737     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6738   }
6739   case NEON::BI__builtin_neon_vst1_x2_v:
6740   case NEON::BI__builtin_neon_vst1q_x2_v:
6741   case NEON::BI__builtin_neon_vst1_x3_v:
6742   case NEON::BI__builtin_neon_vst1q_x3_v:
6743   case NEON::BI__builtin_neon_vst1_x4_v:
6744   case NEON::BI__builtin_neon_vst1q_x4_v: {
6745     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
6746     llvm::Type *Tys[2] = { VTy, PTy };
6747     unsigned Int;
6748     switch (BuiltinID) {
6749     case NEON::BI__builtin_neon_vst1_x2_v:
6750     case NEON::BI__builtin_neon_vst1q_x2_v:
6751       Int = Intrinsic::aarch64_neon_st1x2;
6752       break;
6753     case NEON::BI__builtin_neon_vst1_x3_v:
6754     case NEON::BI__builtin_neon_vst1q_x3_v:
6755       Int = Intrinsic::aarch64_neon_st1x3;
6756       break;
6757     case NEON::BI__builtin_neon_vst1_x4_v:
6758     case NEON::BI__builtin_neon_vst1q_x4_v:
6759       Int = Intrinsic::aarch64_neon_st1x4;
6760       break;
6761     }
6762     std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6763     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
6764   }
6765   case NEON::BI__builtin_neon_vld1_v:
6766   case NEON::BI__builtin_neon_vld1q_v: {
6767     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
6768     auto Alignment = CharUnits::fromQuantity(
6769         BuiltinID == NEON::BI__builtin_neon_vld1_v ? 8 : 16);
6770     return Builder.CreateAlignedLoad(VTy, Ops[0], Alignment);
6771   }
6772   case NEON::BI__builtin_neon_vst1_v:
6773   case NEON::BI__builtin_neon_vst1q_v:
6774     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
6775     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6776     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6777   case NEON::BI__builtin_neon_vld1_lane_v:
6778   case NEON::BI__builtin_neon_vld1q_lane_v: {
6779     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6780     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
6781     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6782     auto Alignment = CharUnits::fromQuantity(
6783         BuiltinID == NEON::BI__builtin_neon_vld1_lane_v ? 8 : 16);
6784     Ops[0] =
6785         Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
6786     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
6787   }
6788   case NEON::BI__builtin_neon_vld1_dup_v:
6789   case NEON::BI__builtin_neon_vld1q_dup_v: {
6790     Value *V = UndefValue::get(Ty);
6791     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
6792     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6793     auto Alignment = CharUnits::fromQuantity(
6794         BuiltinID == NEON::BI__builtin_neon_vld1_dup_v ? 8 : 16);
6795     Ops[0] =
6796         Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
6797     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
6798     Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
6799     return EmitNeonSplat(Ops[0], CI);
6800   }
6801   case NEON::BI__builtin_neon_vst1_lane_v:
6802   case NEON::BI__builtin_neon_vst1q_lane_v:
6803     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6804     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
6805     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6806     return Builder.CreateDefaultAlignedStore(Ops[1],
6807                                              Builder.CreateBitCast(Ops[0], Ty));
6808   case NEON::BI__builtin_neon_vld2_v:
6809   case NEON::BI__builtin_neon_vld2q_v: {
6810     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6811     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6812     llvm::Type *Tys[2] = { VTy, PTy };
6813     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
6814     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6815     Ops[0] = Builder.CreateBitCast(Ops[0],
6816                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6817     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6818   }
6819   case NEON::BI__builtin_neon_vld3_v:
6820   case NEON::BI__builtin_neon_vld3q_v: {
6821     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6822     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6823     llvm::Type *Tys[2] = { VTy, PTy };
6824     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
6825     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6826     Ops[0] = Builder.CreateBitCast(Ops[0],
6827                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6828     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6829   }
6830   case NEON::BI__builtin_neon_vld4_v:
6831   case NEON::BI__builtin_neon_vld4q_v: {
6832     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6833     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6834     llvm::Type *Tys[2] = { VTy, PTy };
6835     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
6836     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6837     Ops[0] = Builder.CreateBitCast(Ops[0],
6838                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6839     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6840   }
6841   case NEON::BI__builtin_neon_vld2_dup_v:
6842   case NEON::BI__builtin_neon_vld2q_dup_v: {
6843     llvm::Type *PTy =
6844       llvm::PointerType::getUnqual(VTy->getElementType());
6845     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6846     llvm::Type *Tys[2] = { VTy, PTy };
6847     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
6848     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6849     Ops[0] = Builder.CreateBitCast(Ops[0],
6850                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6851     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6852   }
6853   case NEON::BI__builtin_neon_vld3_dup_v:
6854   case NEON::BI__builtin_neon_vld3q_dup_v: {
6855     llvm::Type *PTy =
6856       llvm::PointerType::getUnqual(VTy->getElementType());
6857     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6858     llvm::Type *Tys[2] = { VTy, PTy };
6859     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
6860     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6861     Ops[0] = Builder.CreateBitCast(Ops[0],
6862                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6863     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6864   }
6865   case NEON::BI__builtin_neon_vld4_dup_v:
6866   case NEON::BI__builtin_neon_vld4q_dup_v: {
6867     llvm::Type *PTy =
6868       llvm::PointerType::getUnqual(VTy->getElementType());
6869     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6870     llvm::Type *Tys[2] = { VTy, PTy };
6871     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
6872     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6873     Ops[0] = Builder.CreateBitCast(Ops[0],
6874                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6875     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6876   }
6877   case NEON::BI__builtin_neon_vld2_lane_v:
6878   case NEON::BI__builtin_neon_vld2q_lane_v: {
6879     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6880     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
6881     Ops.push_back(Ops[1]);
6882     Ops.erase(Ops.begin()+1);
6883     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6884     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6885     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6886     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
6887     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6888     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6889     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6890   }
6891   case NEON::BI__builtin_neon_vld3_lane_v:
6892   case NEON::BI__builtin_neon_vld3q_lane_v: {
6893     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6894     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
6895     Ops.push_back(Ops[1]);
6896     Ops.erase(Ops.begin()+1);
6897     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6898     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6899     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6900     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
6901     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
6902     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6903     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6904     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6905   }
6906   case NEON::BI__builtin_neon_vld4_lane_v:
6907   case NEON::BI__builtin_neon_vld4q_lane_v: {
6908     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6909     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
6910     Ops.push_back(Ops[1]);
6911     Ops.erase(Ops.begin()+1);
6912     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6913     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6914     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6915     Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
6916     Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
6917     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane");
6918     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6919     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6920     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6921   }
6922   case NEON::BI__builtin_neon_vst2_v:
6923   case NEON::BI__builtin_neon_vst2q_v: {
6924     Ops.push_back(Ops[0]);
6925     Ops.erase(Ops.begin());
6926     llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
6927     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
6928                         Ops, "");
6929   }
6930   case NEON::BI__builtin_neon_vst2_lane_v:
6931   case NEON::BI__builtin_neon_vst2q_lane_v: {
6932     Ops.push_back(Ops[0]);
6933     Ops.erase(Ops.begin());
6934     Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
6935     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6936     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
6937                         Ops, "");
6938   }
6939   case NEON::BI__builtin_neon_vst3_v:
6940   case NEON::BI__builtin_neon_vst3q_v: {
6941     Ops.push_back(Ops[0]);
6942     Ops.erase(Ops.begin());
6943     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6944     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
6945                         Ops, "");
6946   }
6947   case NEON::BI__builtin_neon_vst3_lane_v:
6948   case NEON::BI__builtin_neon_vst3q_lane_v: {
6949     Ops.push_back(Ops[0]);
6950     Ops.erase(Ops.begin());
6951     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6952     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6953     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
6954                         Ops, "");
6955   }
6956   case NEON::BI__builtin_neon_vst4_v:
6957   case NEON::BI__builtin_neon_vst4q_v: {
6958     Ops.push_back(Ops[0]);
6959     Ops.erase(Ops.begin());
6960     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6961     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
6962                         Ops, "");
6963   }
6964   case NEON::BI__builtin_neon_vst4_lane_v:
6965   case NEON::BI__builtin_neon_vst4q_lane_v: {
6966     Ops.push_back(Ops[0]);
6967     Ops.erase(Ops.begin());
6968     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
6969     llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
6970     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
6971                         Ops, "");
6972   }
6973   case NEON::BI__builtin_neon_vtrn_v:
6974   case NEON::BI__builtin_neon_vtrnq_v: {
6975     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6976     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6977     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6978     Value *SV = nullptr;
6979
6980     for (unsigned vi = 0; vi != 2; ++vi) {
6981       SmallVector<uint32_t, 16> Indices;
6982       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6983         Indices.push_back(i+vi);
6984         Indices.push_back(i+e+vi);
6985       }
6986       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6987       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
6988       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6989     }
6990     return SV;
6991   }
6992   case NEON::BI__builtin_neon_vuzp_v:
6993   case NEON::BI__builtin_neon_vuzpq_v: {
6994     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6995     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6996     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6997     Value *SV = nullptr;
6998
6999     for (unsigned vi = 0; vi != 2; ++vi) {
7000       SmallVector<uint32_t, 16> Indices;
7001       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
7002         Indices.push_back(2*i+vi);
7003
7004       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7005       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
7006       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7007     }
7008     return SV;
7009   }
7010   case NEON::BI__builtin_neon_vzip_v:
7011   case NEON::BI__builtin_neon_vzipq_v: {
7012     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
7013     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7014     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7015     Value *SV = nullptr;
7016
7017     for (unsigned vi = 0; vi != 2; ++vi) {
7018       SmallVector<uint32_t, 16> Indices;
7019       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7020         Indices.push_back((i + vi*e) >> 1);
7021         Indices.push_back(((i + vi*e) >> 1)+e);
7022       }
7023       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7024       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
7025       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7026     }
7027     return SV;
7028   }
7029   case NEON::BI__builtin_neon_vqtbl1q_v: {
7030     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
7031                         Ops, "vtbl1");
7032   }
7033   case NEON::BI__builtin_neon_vqtbl2q_v: {
7034     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
7035                         Ops, "vtbl2");
7036   }
7037   case NEON::BI__builtin_neon_vqtbl3q_v: {
7038     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
7039                         Ops, "vtbl3");
7040   }
7041   case NEON::BI__builtin_neon_vqtbl4q_v: {
7042     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
7043                         Ops, "vtbl4");
7044   }
7045   case NEON::BI__builtin_neon_vqtbx1q_v: {
7046     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
7047                         Ops, "vtbx1");
7048   }
7049   case NEON::BI__builtin_neon_vqtbx2q_v: {
7050     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
7051                         Ops, "vtbx2");
7052   }
7053   case NEON::BI__builtin_neon_vqtbx3q_v: {
7054     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
7055                         Ops, "vtbx3");
7056   }
7057   case NEON::BI__builtin_neon_vqtbx4q_v: {
7058     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
7059                         Ops, "vtbx4");
7060   }
7061   case NEON::BI__builtin_neon_vsqadd_v:
7062   case NEON::BI__builtin_neon_vsqaddq_v: {
7063     Int = Intrinsic::aarch64_neon_usqadd;
7064     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
7065   }
7066   case NEON::BI__builtin_neon_vuqadd_v:
7067   case NEON::BI__builtin_neon_vuqaddq_v: {
7068     Int = Intrinsic::aarch64_neon_suqadd;
7069     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
7070   }
7071   }
7072 }
7073
7074 llvm::Value *CodeGenFunction::
7075 BuildVector(ArrayRef<llvm::Value*> Ops) {
7076   assert((Ops.size() & (Ops.size() - 1)) == 0 &&
7077          "Not a power-of-two sized vector!");
7078   bool AllConstants = true;
7079   for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
7080     AllConstants &= isa<Constant>(Ops[i]);
7081
7082   // If this is a constant vector, create a ConstantVector.
7083   if (AllConstants) {
7084     SmallVector<llvm::Constant*, 16> CstOps;
7085     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
7086       CstOps.push_back(cast<Constant>(Ops[i]));
7087     return llvm::ConstantVector::get(CstOps);
7088   }
7089
7090   // Otherwise, insertelement the values to build the vector.
7091   Value *Result =
7092     llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
7093
7094   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
7095     Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
7096
7097   return Result;
7098 }
7099
7100 // Convert the mask from an integer type to a vector of i1.
7101 static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
7102                               unsigned NumElts) {
7103
7104   llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(),
7105                          cast<IntegerType>(Mask->getType())->getBitWidth());
7106   Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
7107
7108   // If we have less than 8 elements, then the starting mask was an i8 and
7109   // we need to extract down to the right number of elements.
7110   if (NumElts < 8) {
7111     uint32_t Indices[4];
7112     for (unsigned i = 0; i != NumElts; ++i)
7113       Indices[i] = i;
7114     MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec,
7115                                              makeArrayRef(Indices, NumElts),
7116                                              "extract");
7117   }
7118   return MaskVec;
7119 }
7120
7121 static Value *EmitX86MaskedStore(CodeGenFunction &CGF,
7122                                  SmallVectorImpl<Value *> &Ops,
7123                                  unsigned Align) {
7124   // Cast the pointer to right type.
7125   Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
7126                                llvm::PointerType::getUnqual(Ops[1]->getType()));
7127
7128   // If the mask is all ones just emit a regular store.
7129   if (const auto *C = dyn_cast<Constant>(Ops[2]))
7130     if (C->isAllOnesValue())
7131       return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align);
7132
7133   Value *MaskVec = getMaskVecValue(CGF, Ops[2],
7134                                    Ops[1]->getType()->getVectorNumElements());
7135
7136   return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec);
7137 }
7138
7139 static Value *EmitX86MaskedLoad(CodeGenFunction &CGF,
7140                                 SmallVectorImpl<Value *> &Ops, unsigned Align) {
7141   // Cast the pointer to right type.
7142   Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
7143                                llvm::PointerType::getUnqual(Ops[1]->getType()));
7144
7145   // If the mask is all ones just emit a regular store.
7146   if (const auto *C = dyn_cast<Constant>(Ops[2]))
7147     if (C->isAllOnesValue())
7148       return CGF.Builder.CreateAlignedLoad(Ops[0], Align);
7149
7150   Value *MaskVec = getMaskVecValue(CGF, Ops[2],
7151                                    Ops[1]->getType()->getVectorNumElements());
7152
7153   return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]);
7154 }
7155
7156 static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF,
7157                                         SmallVectorImpl<Value *> &Ops,
7158                                         llvm::Type *DstTy,
7159                                         unsigned SrcSizeInBits,
7160                                         unsigned Align) {
7161   // Load the subvector.
7162   Ops[0] = CGF.Builder.CreateAlignedLoad(Ops[0], Align);
7163
7164   // Create broadcast mask.
7165   unsigned NumDstElts = DstTy->getVectorNumElements();
7166   unsigned NumSrcElts = SrcSizeInBits / DstTy->getScalarSizeInBits();
7167
7168   SmallVector<uint32_t, 8> Mask;
7169   for (unsigned i = 0; i != NumDstElts; i += NumSrcElts)
7170     for (unsigned j = 0; j != NumSrcElts; ++j)
7171       Mask.push_back(j);
7172
7173   return CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], Mask, "subvecbcst");
7174 }
7175
7176 static Value *EmitX86Select(CodeGenFunction &CGF,
7177                             Value *Mask, Value *Op0, Value *Op1) {
7178
7179   // If the mask is all ones just return first argument.
7180   if (const auto *C = dyn_cast<Constant>(Mask))
7181     if (C->isAllOnesValue())
7182       return Op0;
7183
7184   Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements());
7185
7186   return CGF.Builder.CreateSelect(Mask, Op0, Op1);
7187 }
7188
7189 static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
7190                                    bool Signed, SmallVectorImpl<Value *> &Ops) {
7191   unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
7192   Value *Cmp;
7193
7194   if (CC == 3) {
7195     Cmp = Constant::getNullValue(
7196                        llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
7197   } else if (CC == 7) {
7198     Cmp = Constant::getAllOnesValue(
7199                        llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
7200   } else {
7201     ICmpInst::Predicate Pred;
7202     switch (CC) {
7203     default: llvm_unreachable("Unknown condition code");
7204     case 0: Pred = ICmpInst::ICMP_EQ;  break;
7205     case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
7206     case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
7207     case 4: Pred = ICmpInst::ICMP_NE;  break;
7208     case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
7209     case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
7210     }
7211     Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
7212   }
7213
7214   const auto *C = dyn_cast<Constant>(Ops.back());
7215   if (!C || !C->isAllOnesValue())
7216     Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), NumElts));
7217
7218   if (NumElts < 8) {
7219     uint32_t Indices[8];
7220     for (unsigned i = 0; i != NumElts; ++i)
7221       Indices[i] = i;
7222     for (unsigned i = NumElts; i != 8; ++i)
7223       Indices[i] = i % NumElts + NumElts;
7224     Cmp = CGF.Builder.CreateShuffleVector(
7225         Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
7226   }
7227   return CGF.Builder.CreateBitCast(Cmp,
7228                                    IntegerType::get(CGF.getLLVMContext(),
7229                                                     std::max(NumElts, 8U)));
7230 }
7231
7232 static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred,
7233                             ArrayRef<Value *> Ops) {
7234   Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
7235   Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
7236
7237   if (Ops.size() == 2)
7238     return Res;
7239
7240   assert(Ops.size() == 4);
7241   return EmitX86Select(CGF, Ops[3], Res, Ops[2]);
7242 }
7243
7244 static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, 
7245                               llvm::Type *DstTy) {
7246   unsigned NumberOfElements = DstTy->getVectorNumElements();
7247   Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
7248   return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
7249 }
7250
7251 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
7252                                            const CallExpr *E) {
7253   SmallVector<Value*, 4> Ops;
7254
7255   // Find out if any arguments are required to be integer constant expressions.
7256   unsigned ICEArguments = 0;
7257   ASTContext::GetBuiltinTypeError Error;
7258   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
7259   assert(Error == ASTContext::GE_None && "Should not codegen an error");
7260
7261   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
7262     // If this is a normal argument, just emit it as a scalar.
7263     if ((ICEArguments & (1 << i)) == 0) {
7264       Ops.push_back(EmitScalarExpr(E->getArg(i)));
7265       continue;
7266     }
7267
7268     // If this is required to be a constant, constant fold it so that we know
7269     // that the generated intrinsic gets a ConstantInt.
7270     llvm::APSInt Result;
7271     bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
7272     assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
7273     Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
7274   }
7275
7276   // These exist so that the builtin that takes an immediate can be bounds
7277   // checked by clang to avoid passing bad immediates to the backend. Since
7278   // AVX has a larger immediate than SSE we would need separate builtins to
7279   // do the different bounds checking. Rather than create a clang specific
7280   // SSE only builtin, this implements eight separate builtins to match gcc
7281   // implementation.
7282   auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
7283     Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
7284     llvm::Function *F = CGM.getIntrinsic(ID);
7285     return Builder.CreateCall(F, Ops);
7286   };
7287
7288   // For the vector forms of FP comparisons, translate the builtins directly to
7289   // IR.
7290   // TODO: The builtins could be removed if the SSE header files used vector
7291   // extension comparisons directly (vector ordered/unordered may need
7292   // additional support via __builtin_isnan()).
7293   auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) {
7294     Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
7295     llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
7296     llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
7297     Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
7298     return Builder.CreateBitCast(Sext, FPVecTy);
7299   };
7300
7301   switch (BuiltinID) {
7302   default: return nullptr;
7303   case X86::BI__builtin_cpu_supports: {
7304     const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
7305     StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
7306
7307     // TODO: When/if this becomes more than x86 specific then use a TargetInfo
7308     // based mapping.
7309     // Processor features and mapping to processor feature value.
7310     enum X86Features {
7311       CMOV = 0,
7312       MMX,
7313       POPCNT,
7314       SSE,
7315       SSE2,
7316       SSE3,
7317       SSSE3,
7318       SSE4_1,
7319       SSE4_2,
7320       AVX,
7321       AVX2,
7322       SSE4_A,
7323       FMA4,
7324       XOP,
7325       FMA,
7326       AVX512F,
7327       BMI,
7328       BMI2,
7329       AES,
7330       PCLMUL,
7331       AVX512VL,
7332       AVX512BW,
7333       AVX512DQ,
7334       AVX512CD,
7335       AVX512ER,
7336       AVX512PF,
7337       AVX512VBMI,
7338       AVX512IFMA,
7339       AVX5124VNNIW, // TODO implement this fully
7340       AVX5124FMAPS, // TODO implement this fully
7341       AVX512VPOPCNTDQ,
7342       MAX
7343     };
7344
7345     X86Features Feature =
7346         StringSwitch<X86Features>(FeatureStr)
7347             .Case("cmov", X86Features::CMOV)
7348             .Case("mmx", X86Features::MMX)
7349             .Case("popcnt", X86Features::POPCNT)
7350             .Case("sse", X86Features::SSE)
7351             .Case("sse2", X86Features::SSE2)
7352             .Case("sse3", X86Features::SSE3)
7353             .Case("ssse3", X86Features::SSSE3)
7354             .Case("sse4.1", X86Features::SSE4_1)
7355             .Case("sse4.2", X86Features::SSE4_2)
7356             .Case("avx", X86Features::AVX)
7357             .Case("avx2", X86Features::AVX2)
7358             .Case("sse4a", X86Features::SSE4_A)
7359             .Case("fma4", X86Features::FMA4)
7360             .Case("xop", X86Features::XOP)
7361             .Case("fma", X86Features::FMA)
7362             .Case("avx512f", X86Features::AVX512F)
7363             .Case("bmi", X86Features::BMI)
7364             .Case("bmi2", X86Features::BMI2)
7365             .Case("aes", X86Features::AES)
7366             .Case("pclmul", X86Features::PCLMUL)
7367             .Case("avx512vl", X86Features::AVX512VL)
7368             .Case("avx512bw", X86Features::AVX512BW)
7369             .Case("avx512dq", X86Features::AVX512DQ)
7370             .Case("avx512cd", X86Features::AVX512CD)
7371             .Case("avx512er", X86Features::AVX512ER)
7372             .Case("avx512pf", X86Features::AVX512PF)
7373             .Case("avx512vbmi", X86Features::AVX512VBMI)
7374             .Case("avx512ifma", X86Features::AVX512IFMA)
7375             .Case("avx512vpopcntdq", X86Features::AVX512VPOPCNTDQ)
7376             .Default(X86Features::MAX);
7377     assert(Feature != X86Features::MAX && "Invalid feature!");
7378
7379     // Matching the struct layout from the compiler-rt/libgcc structure that is
7380     // filled in:
7381     // unsigned int __cpu_vendor;
7382     // unsigned int __cpu_type;
7383     // unsigned int __cpu_subtype;
7384     // unsigned int __cpu_features[1];
7385     llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
7386                                             llvm::ArrayType::get(Int32Ty, 1));
7387
7388     // Grab the global __cpu_model.
7389     llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
7390
7391     // Grab the first (0th) element from the field __cpu_features off of the
7392     // global in the struct STy.
7393     Value *Idxs[] = {
7394       ConstantInt::get(Int32Ty, 0),
7395       ConstantInt::get(Int32Ty, 3),
7396       ConstantInt::get(Int32Ty, 0)
7397     };
7398     Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
7399     Value *Features = Builder.CreateAlignedLoad(CpuFeatures,
7400                                                 CharUnits::fromQuantity(4));
7401
7402     // Check the value of the bit corresponding to the feature requested.
7403     Value *Bitset = Builder.CreateAnd(
7404         Features, llvm::ConstantInt::get(Int32Ty, 1ULL << Feature));
7405     return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0));
7406   }
7407   case X86::BI_mm_prefetch: {
7408     Value *Address = Ops[0];
7409     Value *RW = ConstantInt::get(Int32Ty, 0);
7410     Value *Locality = Ops[1];
7411     Value *Data = ConstantInt::get(Int32Ty, 1);
7412     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
7413     return Builder.CreateCall(F, {Address, RW, Locality, Data});
7414   }
7415   case X86::BI_mm_clflush: {
7416     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
7417                               Ops[0]);
7418   }
7419   case X86::BI_mm_lfence: {
7420     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
7421   }
7422   case X86::BI_mm_mfence: {
7423     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
7424   }
7425   case X86::BI_mm_sfence: {
7426     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
7427   }
7428   case X86::BI_mm_pause: {
7429     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
7430   }
7431   case X86::BI__rdtsc: {
7432     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
7433   }
7434   case X86::BI__builtin_ia32_undef128:
7435   case X86::BI__builtin_ia32_undef256:
7436   case X86::BI__builtin_ia32_undef512:
7437     // The x86 definition of "undef" is not the same as the LLVM definition
7438     // (PR32176). We leave optimizing away an unnecessary zero constant to the
7439     // IR optimizer and backend.
7440     // TODO: If we had a "freeze" IR instruction to generate a fixed undef
7441     // value, we should use that here instead of a zero.
7442     return llvm::Constant::getNullValue(ConvertType(E->getType()));
7443   case X86::BI__builtin_ia32_vec_init_v8qi:
7444   case X86::BI__builtin_ia32_vec_init_v4hi:
7445   case X86::BI__builtin_ia32_vec_init_v2si:
7446     return Builder.CreateBitCast(BuildVector(Ops),
7447                                  llvm::Type::getX86_MMXTy(getLLVMContext()));
7448   case X86::BI__builtin_ia32_vec_ext_v2si:
7449     return Builder.CreateExtractElement(Ops[0],
7450                                   llvm::ConstantInt::get(Ops[1]->getType(), 0));
7451   case X86::BI_mm_setcsr:
7452   case X86::BI__builtin_ia32_ldmxcsr: {
7453     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
7454     Builder.CreateStore(Ops[0], Tmp);
7455     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
7456                           Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
7457   }
7458   case X86::BI_mm_getcsr:
7459   case X86::BI__builtin_ia32_stmxcsr: {
7460     Address Tmp = CreateMemTemp(E->getType());
7461     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
7462                        Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
7463     return Builder.CreateLoad(Tmp, "stmxcsr");
7464   }
7465   case X86::BI__builtin_ia32_xsave:
7466   case X86::BI__builtin_ia32_xsave64:
7467   case X86::BI__builtin_ia32_xrstor:
7468   case X86::BI__builtin_ia32_xrstor64:
7469   case X86::BI__builtin_ia32_xsaveopt:
7470   case X86::BI__builtin_ia32_xsaveopt64:
7471   case X86::BI__builtin_ia32_xrstors:
7472   case X86::BI__builtin_ia32_xrstors64:
7473   case X86::BI__builtin_ia32_xsavec:
7474   case X86::BI__builtin_ia32_xsavec64:
7475   case X86::BI__builtin_ia32_xsaves:
7476   case X86::BI__builtin_ia32_xsaves64: {
7477     Intrinsic::ID ID;
7478 #define INTRINSIC_X86_XSAVE_ID(NAME) \
7479     case X86::BI__builtin_ia32_##NAME: \
7480       ID = Intrinsic::x86_##NAME; \
7481       break
7482     switch (BuiltinID) {
7483     default: llvm_unreachable("Unsupported intrinsic!");
7484     INTRINSIC_X86_XSAVE_ID(xsave);
7485     INTRINSIC_X86_XSAVE_ID(xsave64);
7486     INTRINSIC_X86_XSAVE_ID(xrstor);
7487     INTRINSIC_X86_XSAVE_ID(xrstor64);
7488     INTRINSIC_X86_XSAVE_ID(xsaveopt);
7489     INTRINSIC_X86_XSAVE_ID(xsaveopt64);
7490     INTRINSIC_X86_XSAVE_ID(xrstors);
7491     INTRINSIC_X86_XSAVE_ID(xrstors64);
7492     INTRINSIC_X86_XSAVE_ID(xsavec);
7493     INTRINSIC_X86_XSAVE_ID(xsavec64);
7494     INTRINSIC_X86_XSAVE_ID(xsaves);
7495     INTRINSIC_X86_XSAVE_ID(xsaves64);
7496     }
7497 #undef INTRINSIC_X86_XSAVE_ID
7498     Value *Mhi = Builder.CreateTrunc(
7499       Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
7500     Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
7501     Ops[1] = Mhi;
7502     Ops.push_back(Mlo);
7503     return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
7504   }
7505   case X86::BI__builtin_ia32_storedqudi128_mask:
7506   case X86::BI__builtin_ia32_storedqusi128_mask:
7507   case X86::BI__builtin_ia32_storedquhi128_mask:
7508   case X86::BI__builtin_ia32_storedquqi128_mask:
7509   case X86::BI__builtin_ia32_storeupd128_mask:
7510   case X86::BI__builtin_ia32_storeups128_mask:
7511   case X86::BI__builtin_ia32_storedqudi256_mask:
7512   case X86::BI__builtin_ia32_storedqusi256_mask:
7513   case X86::BI__builtin_ia32_storedquhi256_mask:
7514   case X86::BI__builtin_ia32_storedquqi256_mask:
7515   case X86::BI__builtin_ia32_storeupd256_mask:
7516   case X86::BI__builtin_ia32_storeups256_mask:
7517   case X86::BI__builtin_ia32_storedqudi512_mask:
7518   case X86::BI__builtin_ia32_storedqusi512_mask:
7519   case X86::BI__builtin_ia32_storedquhi512_mask:
7520   case X86::BI__builtin_ia32_storedquqi512_mask:
7521   case X86::BI__builtin_ia32_storeupd512_mask:
7522   case X86::BI__builtin_ia32_storeups512_mask:
7523     return EmitX86MaskedStore(*this, Ops, 1);
7524
7525   case X86::BI__builtin_ia32_storess128_mask:
7526   case X86::BI__builtin_ia32_storesd128_mask: {
7527     return EmitX86MaskedStore(*this, Ops, 16);
7528   }
7529   case X86::BI__builtin_ia32_vpopcntd_512:
7530   case X86::BI__builtin_ia32_vpopcntq_512: {
7531     llvm::Type *ResultType = ConvertType(E->getType());
7532     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
7533     return Builder.CreateCall(F, Ops);
7534   }
7535   case X86::BI__builtin_ia32_cvtmask2b128:
7536   case X86::BI__builtin_ia32_cvtmask2b256:
7537   case X86::BI__builtin_ia32_cvtmask2b512:
7538   case X86::BI__builtin_ia32_cvtmask2w128:
7539   case X86::BI__builtin_ia32_cvtmask2w256:
7540   case X86::BI__builtin_ia32_cvtmask2w512:
7541   case X86::BI__builtin_ia32_cvtmask2d128:
7542   case X86::BI__builtin_ia32_cvtmask2d256:
7543   case X86::BI__builtin_ia32_cvtmask2d512:
7544   case X86::BI__builtin_ia32_cvtmask2q128:
7545   case X86::BI__builtin_ia32_cvtmask2q256:
7546   case X86::BI__builtin_ia32_cvtmask2q512:
7547     return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
7548
7549   case X86::BI__builtin_ia32_movdqa32store128_mask:
7550   case X86::BI__builtin_ia32_movdqa64store128_mask:
7551   case X86::BI__builtin_ia32_storeaps128_mask:
7552   case X86::BI__builtin_ia32_storeapd128_mask:
7553   case X86::BI__builtin_ia32_movdqa32store256_mask:
7554   case X86::BI__builtin_ia32_movdqa64store256_mask:
7555   case X86::BI__builtin_ia32_storeaps256_mask:
7556   case X86::BI__builtin_ia32_storeapd256_mask:
7557   case X86::BI__builtin_ia32_movdqa32store512_mask:
7558   case X86::BI__builtin_ia32_movdqa64store512_mask:
7559   case X86::BI__builtin_ia32_storeaps512_mask:
7560   case X86::BI__builtin_ia32_storeapd512_mask: {
7561     unsigned Align =
7562       getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
7563     return EmitX86MaskedStore(*this, Ops, Align);
7564   }
7565   case X86::BI__builtin_ia32_loadups128_mask:
7566   case X86::BI__builtin_ia32_loadups256_mask:
7567   case X86::BI__builtin_ia32_loadups512_mask:
7568   case X86::BI__builtin_ia32_loadupd128_mask:
7569   case X86::BI__builtin_ia32_loadupd256_mask:
7570   case X86::BI__builtin_ia32_loadupd512_mask:
7571   case X86::BI__builtin_ia32_loaddquqi128_mask:
7572   case X86::BI__builtin_ia32_loaddquqi256_mask:
7573   case X86::BI__builtin_ia32_loaddquqi512_mask:
7574   case X86::BI__builtin_ia32_loaddquhi128_mask:
7575   case X86::BI__builtin_ia32_loaddquhi256_mask:
7576   case X86::BI__builtin_ia32_loaddquhi512_mask:
7577   case X86::BI__builtin_ia32_loaddqusi128_mask:
7578   case X86::BI__builtin_ia32_loaddqusi256_mask:
7579   case X86::BI__builtin_ia32_loaddqusi512_mask:
7580   case X86::BI__builtin_ia32_loaddqudi128_mask:
7581   case X86::BI__builtin_ia32_loaddqudi256_mask:
7582   case X86::BI__builtin_ia32_loaddqudi512_mask:
7583     return EmitX86MaskedLoad(*this, Ops, 1);
7584
7585   case X86::BI__builtin_ia32_loadss128_mask:
7586   case X86::BI__builtin_ia32_loadsd128_mask:
7587     return EmitX86MaskedLoad(*this, Ops, 16);
7588
7589   case X86::BI__builtin_ia32_loadaps128_mask:
7590   case X86::BI__builtin_ia32_loadaps256_mask:
7591   case X86::BI__builtin_ia32_loadaps512_mask:
7592   case X86::BI__builtin_ia32_loadapd128_mask:
7593   case X86::BI__builtin_ia32_loadapd256_mask:
7594   case X86::BI__builtin_ia32_loadapd512_mask:
7595   case X86::BI__builtin_ia32_movdqa32load128_mask:
7596   case X86::BI__builtin_ia32_movdqa32load256_mask:
7597   case X86::BI__builtin_ia32_movdqa32load512_mask:
7598   case X86::BI__builtin_ia32_movdqa64load128_mask:
7599   case X86::BI__builtin_ia32_movdqa64load256_mask:
7600   case X86::BI__builtin_ia32_movdqa64load512_mask: {
7601     unsigned Align =
7602       getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
7603     return EmitX86MaskedLoad(*this, Ops, Align);
7604   }
7605
7606   case X86::BI__builtin_ia32_vbroadcastf128_pd256:
7607   case X86::BI__builtin_ia32_vbroadcastf128_ps256: {
7608     llvm::Type *DstTy = ConvertType(E->getType());
7609     return EmitX86SubVectorBroadcast(*this, Ops, DstTy, 128, 1);
7610   }
7611
7612   case X86::BI__builtin_ia32_storehps:
7613   case X86::BI__builtin_ia32_storelps: {
7614     llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
7615     llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
7616
7617     // cast val v2i64
7618     Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
7619
7620     // extract (0, 1)
7621     unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
7622     llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index);
7623     Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
7624
7625     // cast pointer to i64 & store
7626     Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
7627     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7628   }
7629   case X86::BI__builtin_ia32_palignr128:
7630   case X86::BI__builtin_ia32_palignr256:
7631   case X86::BI__builtin_ia32_palignr512_mask: {
7632     unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
7633
7634     unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
7635     assert(NumElts % 16 == 0);
7636
7637     // If palignr is shifting the pair of vectors more than the size of two
7638     // lanes, emit zero.
7639     if (ShiftVal >= 32)
7640       return llvm::Constant::getNullValue(ConvertType(E->getType()));
7641
7642     // If palignr is shifting the pair of input vectors more than one lane,
7643     // but less than two lanes, convert to shifting in zeroes.
7644     if (ShiftVal > 16) {
7645       ShiftVal -= 16;
7646       Ops[1] = Ops[0];
7647       Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
7648     }
7649
7650     uint32_t Indices[64];
7651     // 256-bit palignr operates on 128-bit lanes so we need to handle that
7652     for (unsigned l = 0; l != NumElts; l += 16) {
7653       for (unsigned i = 0; i != 16; ++i) {
7654         unsigned Idx = ShiftVal + i;
7655         if (Idx >= 16)
7656           Idx += NumElts - 16; // End of lane, switch operand.
7657         Indices[l + i] = Idx + l;
7658       }
7659     }
7660
7661     Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0],
7662                                                makeArrayRef(Indices, NumElts),
7663                                                "palignr");
7664
7665     // If this isn't a masked builtin, just return the align operation.
7666     if (Ops.size() == 3)
7667       return Align;
7668
7669     return EmitX86Select(*this, Ops[4], Align, Ops[3]);
7670   }
7671
7672   case X86::BI__builtin_ia32_movnti:
7673   case X86::BI__builtin_ia32_movnti64:
7674   case X86::BI__builtin_ia32_movntsd:
7675   case X86::BI__builtin_ia32_movntss: {
7676     llvm::MDNode *Node = llvm::MDNode::get(
7677         getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
7678
7679     Value *Ptr = Ops[0];
7680     Value *Src = Ops[1];
7681
7682     // Extract the 0'th element of the source vector.
7683     if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
7684         BuiltinID == X86::BI__builtin_ia32_movntss)
7685       Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
7686
7687     // Convert the type of the pointer to a pointer to the stored type.
7688     Value *BC = Builder.CreateBitCast(
7689         Ptr, llvm::PointerType::getUnqual(Src->getType()), "cast");
7690
7691     // Unaligned nontemporal store of the scalar value.
7692     StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC);
7693     SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
7694     SI->setAlignment(1);
7695     return SI;
7696   }
7697
7698   case X86::BI__builtin_ia32_selectb_128:
7699   case X86::BI__builtin_ia32_selectb_256:
7700   case X86::BI__builtin_ia32_selectb_512:
7701   case X86::BI__builtin_ia32_selectw_128:
7702   case X86::BI__builtin_ia32_selectw_256:
7703   case X86::BI__builtin_ia32_selectw_512:
7704   case X86::BI__builtin_ia32_selectd_128:
7705   case X86::BI__builtin_ia32_selectd_256:
7706   case X86::BI__builtin_ia32_selectd_512:
7707   case X86::BI__builtin_ia32_selectq_128:
7708   case X86::BI__builtin_ia32_selectq_256:
7709   case X86::BI__builtin_ia32_selectq_512:
7710   case X86::BI__builtin_ia32_selectps_128:
7711   case X86::BI__builtin_ia32_selectps_256:
7712   case X86::BI__builtin_ia32_selectps_512:
7713   case X86::BI__builtin_ia32_selectpd_128:
7714   case X86::BI__builtin_ia32_selectpd_256:
7715   case X86::BI__builtin_ia32_selectpd_512:
7716     return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
7717   case X86::BI__builtin_ia32_pcmpeqb128_mask:
7718   case X86::BI__builtin_ia32_pcmpeqb256_mask:
7719   case X86::BI__builtin_ia32_pcmpeqb512_mask:
7720   case X86::BI__builtin_ia32_pcmpeqw128_mask:
7721   case X86::BI__builtin_ia32_pcmpeqw256_mask:
7722   case X86::BI__builtin_ia32_pcmpeqw512_mask:
7723   case X86::BI__builtin_ia32_pcmpeqd128_mask:
7724   case X86::BI__builtin_ia32_pcmpeqd256_mask:
7725   case X86::BI__builtin_ia32_pcmpeqd512_mask:
7726   case X86::BI__builtin_ia32_pcmpeqq128_mask:
7727   case X86::BI__builtin_ia32_pcmpeqq256_mask:
7728   case X86::BI__builtin_ia32_pcmpeqq512_mask:
7729     return EmitX86MaskedCompare(*this, 0, false, Ops);
7730   case X86::BI__builtin_ia32_pcmpgtb128_mask:
7731   case X86::BI__builtin_ia32_pcmpgtb256_mask:
7732   case X86::BI__builtin_ia32_pcmpgtb512_mask:
7733   case X86::BI__builtin_ia32_pcmpgtw128_mask:
7734   case X86::BI__builtin_ia32_pcmpgtw256_mask:
7735   case X86::BI__builtin_ia32_pcmpgtw512_mask:
7736   case X86::BI__builtin_ia32_pcmpgtd128_mask:
7737   case X86::BI__builtin_ia32_pcmpgtd256_mask:
7738   case X86::BI__builtin_ia32_pcmpgtd512_mask:
7739   case X86::BI__builtin_ia32_pcmpgtq128_mask:
7740   case X86::BI__builtin_ia32_pcmpgtq256_mask:
7741   case X86::BI__builtin_ia32_pcmpgtq512_mask:
7742     return EmitX86MaskedCompare(*this, 6, true, Ops);
7743   case X86::BI__builtin_ia32_cmpb128_mask:
7744   case X86::BI__builtin_ia32_cmpb256_mask:
7745   case X86::BI__builtin_ia32_cmpb512_mask:
7746   case X86::BI__builtin_ia32_cmpw128_mask:
7747   case X86::BI__builtin_ia32_cmpw256_mask:
7748   case X86::BI__builtin_ia32_cmpw512_mask:
7749   case X86::BI__builtin_ia32_cmpd128_mask:
7750   case X86::BI__builtin_ia32_cmpd256_mask:
7751   case X86::BI__builtin_ia32_cmpd512_mask:
7752   case X86::BI__builtin_ia32_cmpq128_mask:
7753   case X86::BI__builtin_ia32_cmpq256_mask:
7754   case X86::BI__builtin_ia32_cmpq512_mask: {
7755     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
7756     return EmitX86MaskedCompare(*this, CC, true, Ops);
7757   }
7758   case X86::BI__builtin_ia32_ucmpb128_mask:
7759   case X86::BI__builtin_ia32_ucmpb256_mask:
7760   case X86::BI__builtin_ia32_ucmpb512_mask:
7761   case X86::BI__builtin_ia32_ucmpw128_mask:
7762   case X86::BI__builtin_ia32_ucmpw256_mask:
7763   case X86::BI__builtin_ia32_ucmpw512_mask:
7764   case X86::BI__builtin_ia32_ucmpd128_mask:
7765   case X86::BI__builtin_ia32_ucmpd256_mask:
7766   case X86::BI__builtin_ia32_ucmpd512_mask:
7767   case X86::BI__builtin_ia32_ucmpq128_mask:
7768   case X86::BI__builtin_ia32_ucmpq256_mask:
7769   case X86::BI__builtin_ia32_ucmpq512_mask: {
7770     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
7771     return EmitX86MaskedCompare(*this, CC, false, Ops);
7772   }
7773
7774   case X86::BI__builtin_ia32_vplzcntd_128_mask:
7775   case X86::BI__builtin_ia32_vplzcntd_256_mask:
7776   case X86::BI__builtin_ia32_vplzcntd_512_mask:
7777   case X86::BI__builtin_ia32_vplzcntq_128_mask:
7778   case X86::BI__builtin_ia32_vplzcntq_256_mask:
7779   case X86::BI__builtin_ia32_vplzcntq_512_mask: {
7780     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
7781     return EmitX86Select(*this, Ops[2],
7782                          Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}),
7783                          Ops[1]);
7784   }
7785
7786   case X86::BI__builtin_ia32_pmaxsb128:
7787   case X86::BI__builtin_ia32_pmaxsw128:
7788   case X86::BI__builtin_ia32_pmaxsd128:
7789   case X86::BI__builtin_ia32_pmaxsq128_mask:
7790   case X86::BI__builtin_ia32_pmaxsb256:
7791   case X86::BI__builtin_ia32_pmaxsw256:
7792   case X86::BI__builtin_ia32_pmaxsd256:
7793   case X86::BI__builtin_ia32_pmaxsq256_mask:
7794   case X86::BI__builtin_ia32_pmaxsb512_mask:
7795   case X86::BI__builtin_ia32_pmaxsw512_mask:
7796   case X86::BI__builtin_ia32_pmaxsd512_mask:
7797   case X86::BI__builtin_ia32_pmaxsq512_mask:
7798     return EmitX86MinMax(*this, ICmpInst::ICMP_SGT, Ops);
7799   case X86::BI__builtin_ia32_pmaxub128:
7800   case X86::BI__builtin_ia32_pmaxuw128:
7801   case X86::BI__builtin_ia32_pmaxud128:
7802   case X86::BI__builtin_ia32_pmaxuq128_mask:
7803   case X86::BI__builtin_ia32_pmaxub256:
7804   case X86::BI__builtin_ia32_pmaxuw256:
7805   case X86::BI__builtin_ia32_pmaxud256:
7806   case X86::BI__builtin_ia32_pmaxuq256_mask:
7807   case X86::BI__builtin_ia32_pmaxub512_mask:
7808   case X86::BI__builtin_ia32_pmaxuw512_mask:
7809   case X86::BI__builtin_ia32_pmaxud512_mask:
7810   case X86::BI__builtin_ia32_pmaxuq512_mask:
7811     return EmitX86MinMax(*this, ICmpInst::ICMP_UGT, Ops);
7812   case X86::BI__builtin_ia32_pminsb128:
7813   case X86::BI__builtin_ia32_pminsw128:
7814   case X86::BI__builtin_ia32_pminsd128:
7815   case X86::BI__builtin_ia32_pminsq128_mask:
7816   case X86::BI__builtin_ia32_pminsb256:
7817   case X86::BI__builtin_ia32_pminsw256:
7818   case X86::BI__builtin_ia32_pminsd256:
7819   case X86::BI__builtin_ia32_pminsq256_mask:
7820   case X86::BI__builtin_ia32_pminsb512_mask:
7821   case X86::BI__builtin_ia32_pminsw512_mask:
7822   case X86::BI__builtin_ia32_pminsd512_mask:
7823   case X86::BI__builtin_ia32_pminsq512_mask:
7824     return EmitX86MinMax(*this, ICmpInst::ICMP_SLT, Ops);
7825   case X86::BI__builtin_ia32_pminub128:
7826   case X86::BI__builtin_ia32_pminuw128:
7827   case X86::BI__builtin_ia32_pminud128:
7828   case X86::BI__builtin_ia32_pminuq128_mask:
7829   case X86::BI__builtin_ia32_pminub256:
7830   case X86::BI__builtin_ia32_pminuw256:
7831   case X86::BI__builtin_ia32_pminud256:
7832   case X86::BI__builtin_ia32_pminuq256_mask:
7833   case X86::BI__builtin_ia32_pminub512_mask:
7834   case X86::BI__builtin_ia32_pminuw512_mask:
7835   case X86::BI__builtin_ia32_pminud512_mask:
7836   case X86::BI__builtin_ia32_pminuq512_mask:
7837     return EmitX86MinMax(*this, ICmpInst::ICMP_ULT, Ops);
7838
7839   // 3DNow!
7840   case X86::BI__builtin_ia32_pswapdsf:
7841   case X86::BI__builtin_ia32_pswapdsi: {
7842     llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
7843     Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
7844     llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
7845     return Builder.CreateCall(F, Ops, "pswapd");
7846   }
7847   case X86::BI__builtin_ia32_rdrand16_step:
7848   case X86::BI__builtin_ia32_rdrand32_step:
7849   case X86::BI__builtin_ia32_rdrand64_step:
7850   case X86::BI__builtin_ia32_rdseed16_step:
7851   case X86::BI__builtin_ia32_rdseed32_step:
7852   case X86::BI__builtin_ia32_rdseed64_step: {
7853     Intrinsic::ID ID;
7854     switch (BuiltinID) {
7855     default: llvm_unreachable("Unsupported intrinsic!");
7856     case X86::BI__builtin_ia32_rdrand16_step:
7857       ID = Intrinsic::x86_rdrand_16;
7858       break;
7859     case X86::BI__builtin_ia32_rdrand32_step:
7860       ID = Intrinsic::x86_rdrand_32;
7861       break;
7862     case X86::BI__builtin_ia32_rdrand64_step:
7863       ID = Intrinsic::x86_rdrand_64;
7864       break;
7865     case X86::BI__builtin_ia32_rdseed16_step:
7866       ID = Intrinsic::x86_rdseed_16;
7867       break;
7868     case X86::BI__builtin_ia32_rdseed32_step:
7869       ID = Intrinsic::x86_rdseed_32;
7870       break;
7871     case X86::BI__builtin_ia32_rdseed64_step:
7872       ID = Intrinsic::x86_rdseed_64;
7873       break;
7874     }
7875
7876     Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
7877     Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
7878                                       Ops[0]);
7879     return Builder.CreateExtractValue(Call, 1);
7880   }
7881
7882   // SSE packed comparison intrinsics
7883   case X86::BI__builtin_ia32_cmpeqps:
7884   case X86::BI__builtin_ia32_cmpeqpd:
7885     return getVectorFCmpIR(CmpInst::FCMP_OEQ);
7886   case X86::BI__builtin_ia32_cmpltps:
7887   case X86::BI__builtin_ia32_cmpltpd:
7888     return getVectorFCmpIR(CmpInst::FCMP_OLT);
7889   case X86::BI__builtin_ia32_cmpleps:
7890   case X86::BI__builtin_ia32_cmplepd:
7891     return getVectorFCmpIR(CmpInst::FCMP_OLE);
7892   case X86::BI__builtin_ia32_cmpunordps:
7893   case X86::BI__builtin_ia32_cmpunordpd:
7894     return getVectorFCmpIR(CmpInst::FCMP_UNO);
7895   case X86::BI__builtin_ia32_cmpneqps:
7896   case X86::BI__builtin_ia32_cmpneqpd:
7897     return getVectorFCmpIR(CmpInst::FCMP_UNE);
7898   case X86::BI__builtin_ia32_cmpnltps:
7899   case X86::BI__builtin_ia32_cmpnltpd:
7900     return getVectorFCmpIR(CmpInst::FCMP_UGE);
7901   case X86::BI__builtin_ia32_cmpnleps:
7902   case X86::BI__builtin_ia32_cmpnlepd:
7903     return getVectorFCmpIR(CmpInst::FCMP_UGT);
7904   case X86::BI__builtin_ia32_cmpordps:
7905   case X86::BI__builtin_ia32_cmpordpd:
7906     return getVectorFCmpIR(CmpInst::FCMP_ORD);
7907   case X86::BI__builtin_ia32_cmpps:
7908   case X86::BI__builtin_ia32_cmpps256:
7909   case X86::BI__builtin_ia32_cmppd:
7910   case X86::BI__builtin_ia32_cmppd256: {
7911     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
7912     // If this one of the SSE immediates, we can use native IR.
7913     if (CC < 8) {
7914       FCmpInst::Predicate Pred;
7915       switch (CC) {
7916       case 0: Pred = FCmpInst::FCMP_OEQ; break;
7917       case 1: Pred = FCmpInst::FCMP_OLT; break;
7918       case 2: Pred = FCmpInst::FCMP_OLE; break;
7919       case 3: Pred = FCmpInst::FCMP_UNO; break;
7920       case 4: Pred = FCmpInst::FCMP_UNE; break;
7921       case 5: Pred = FCmpInst::FCMP_UGE; break;
7922       case 6: Pred = FCmpInst::FCMP_UGT; break;
7923       case 7: Pred = FCmpInst::FCMP_ORD; break;
7924       }
7925       return getVectorFCmpIR(Pred);
7926     }
7927
7928     // We can't handle 8-31 immediates with native IR, use the intrinsic.
7929     // Except for predicates that create constants.
7930     Intrinsic::ID ID;
7931     switch (BuiltinID) {
7932     default: llvm_unreachable("Unsupported intrinsic!");
7933     case X86::BI__builtin_ia32_cmpps:
7934       ID = Intrinsic::x86_sse_cmp_ps;
7935       break;
7936     case X86::BI__builtin_ia32_cmpps256:
7937       // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector
7938       // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0...
7939       if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) {
7940          Value *Constant = (CC == 0xf || CC == 0x1f) ?
7941                 llvm::Constant::getAllOnesValue(Builder.getInt32Ty()) :
7942                 llvm::Constant::getNullValue(Builder.getInt32Ty());
7943          Value *Vec = Builder.CreateVectorSplat(
7944                         Ops[0]->getType()->getVectorNumElements(), Constant);
7945          return Builder.CreateBitCast(Vec, Ops[0]->getType());
7946       }
7947       ID = Intrinsic::x86_avx_cmp_ps_256;
7948       break;
7949     case X86::BI__builtin_ia32_cmppd:
7950       ID = Intrinsic::x86_sse2_cmp_pd;
7951       break;
7952     case X86::BI__builtin_ia32_cmppd256:
7953       // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector
7954       // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0...
7955       if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) {
7956          Value *Constant = (CC == 0xf || CC == 0x1f) ?
7957                 llvm::Constant::getAllOnesValue(Builder.getInt64Ty()) :
7958                 llvm::Constant::getNullValue(Builder.getInt64Ty());
7959          Value *Vec = Builder.CreateVectorSplat(
7960                         Ops[0]->getType()->getVectorNumElements(), Constant);
7961          return Builder.CreateBitCast(Vec, Ops[0]->getType());
7962       }
7963       ID = Intrinsic::x86_avx_cmp_pd_256;
7964       break;
7965     }
7966
7967     return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
7968   }
7969
7970   // SSE scalar comparison intrinsics
7971   case X86::BI__builtin_ia32_cmpeqss:
7972     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
7973   case X86::BI__builtin_ia32_cmpltss:
7974     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
7975   case X86::BI__builtin_ia32_cmpless:
7976     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
7977   case X86::BI__builtin_ia32_cmpunordss:
7978     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
7979   case X86::BI__builtin_ia32_cmpneqss:
7980     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
7981   case X86::BI__builtin_ia32_cmpnltss:
7982     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
7983   case X86::BI__builtin_ia32_cmpnless:
7984     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
7985   case X86::BI__builtin_ia32_cmpordss:
7986     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
7987   case X86::BI__builtin_ia32_cmpeqsd:
7988     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
7989   case X86::BI__builtin_ia32_cmpltsd:
7990     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
7991   case X86::BI__builtin_ia32_cmplesd:
7992     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
7993   case X86::BI__builtin_ia32_cmpunordsd:
7994     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
7995   case X86::BI__builtin_ia32_cmpneqsd:
7996     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
7997   case X86::BI__builtin_ia32_cmpnltsd:
7998     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
7999   case X86::BI__builtin_ia32_cmpnlesd:
8000     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
8001   case X86::BI__builtin_ia32_cmpordsd:
8002     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
8003
8004   case X86::BI__emul:
8005   case X86::BI__emulu: {
8006     llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
8007     bool isSigned = (BuiltinID == X86::BI__emul);
8008     Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
8009     Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
8010     return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
8011   }
8012   case X86::BI__mulh:
8013   case X86::BI__umulh:
8014   case X86::BI_mul128:
8015   case X86::BI_umul128: {
8016     llvm::Type *ResType = ConvertType(E->getType());
8017     llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
8018
8019     bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
8020     Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
8021     Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
8022
8023     Value *MulResult, *HigherBits;
8024     if (IsSigned) {
8025       MulResult = Builder.CreateNSWMul(LHS, RHS);
8026       HigherBits = Builder.CreateAShr(MulResult, 64);
8027     } else {
8028       MulResult = Builder.CreateNUWMul(LHS, RHS);
8029       HigherBits = Builder.CreateLShr(MulResult, 64);
8030     }
8031     HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
8032
8033     if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
8034       return HigherBits;
8035
8036     Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
8037     Builder.CreateStore(HigherBits, HighBitsAddress);
8038     return Builder.CreateIntCast(MulResult, ResType, IsSigned);
8039   }
8040
8041   case X86::BI__faststorefence: {
8042     return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
8043                                llvm::SyncScope::System);
8044   }
8045   case X86::BI_ReadWriteBarrier:
8046   case X86::BI_ReadBarrier:
8047   case X86::BI_WriteBarrier: {
8048     return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
8049                                llvm::SyncScope::SingleThread);
8050   }
8051   case X86::BI_BitScanForward:
8052   case X86::BI_BitScanForward64:
8053     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
8054   case X86::BI_BitScanReverse:
8055   case X86::BI_BitScanReverse64:
8056     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
8057
8058   case X86::BI_InterlockedAnd64:
8059     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
8060   case X86::BI_InterlockedExchange64:
8061     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
8062   case X86::BI_InterlockedExchangeAdd64:
8063     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
8064   case X86::BI_InterlockedExchangeSub64:
8065     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
8066   case X86::BI_InterlockedOr64:
8067     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
8068   case X86::BI_InterlockedXor64:
8069     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
8070   case X86::BI_InterlockedDecrement64:
8071     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
8072   case X86::BI_InterlockedIncrement64:
8073     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
8074
8075   case X86::BI_AddressOfReturnAddress: {
8076     Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress);
8077     return Builder.CreateCall(F);
8078   }
8079   case X86::BI__stosb: {
8080     // We treat __stosb as a volatile memset - it may not generate "rep stosb"
8081     // instruction, but it will create a memset that won't be optimized away.
8082     return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true);
8083   }
8084   case X86::BI__ud2:
8085     // llvm.trap makes a ud2a instruction on x86.
8086     return EmitTrapCall(Intrinsic::trap);
8087   case X86::BI__int2c: {
8088     // This syscall signals a driver assertion failure in x86 NT kernels.
8089     llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
8090     llvm::InlineAsm *IA =
8091         llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*SideEffects=*/true);
8092     llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
8093         getLLVMContext(), llvm::AttributeList::FunctionIndex,
8094         llvm::Attribute::NoReturn);
8095     CallSite CS = Builder.CreateCall(IA);
8096     CS.setAttributes(NoReturnAttr);
8097     return CS.getInstruction();
8098   }
8099   case X86::BI__readfsbyte:
8100   case X86::BI__readfsword:
8101   case X86::BI__readfsdword:
8102   case X86::BI__readfsqword: {
8103     llvm::Type *IntTy = ConvertType(E->getType());
8104     Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
8105                                         llvm::PointerType::get(IntTy, 257));
8106     LoadInst *Load = Builder.CreateAlignedLoad(
8107         IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
8108     Load->setVolatile(true);
8109     return Load;
8110   }
8111   case X86::BI__readgsbyte:
8112   case X86::BI__readgsword:
8113   case X86::BI__readgsdword:
8114   case X86::BI__readgsqword: {
8115     llvm::Type *IntTy = ConvertType(E->getType());
8116     Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
8117                                         llvm::PointerType::get(IntTy, 256));
8118     LoadInst *Load = Builder.CreateAlignedLoad(
8119         IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
8120     Load->setVolatile(true);
8121     return Load;
8122   }
8123   }
8124 }
8125
8126
8127 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
8128                                            const CallExpr *E) {
8129   SmallVector<Value*, 4> Ops;
8130
8131   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
8132     Ops.push_back(EmitScalarExpr(E->getArg(i)));
8133
8134   Intrinsic::ID ID = Intrinsic::not_intrinsic;
8135
8136   switch (BuiltinID) {
8137   default: return nullptr;
8138
8139   // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
8140   // call __builtin_readcyclecounter.
8141   case PPC::BI__builtin_ppc_get_timebase:
8142     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
8143
8144   // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
8145   case PPC::BI__builtin_altivec_lvx:
8146   case PPC::BI__builtin_altivec_lvxl:
8147   case PPC::BI__builtin_altivec_lvebx:
8148   case PPC::BI__builtin_altivec_lvehx:
8149   case PPC::BI__builtin_altivec_lvewx:
8150   case PPC::BI__builtin_altivec_lvsl:
8151   case PPC::BI__builtin_altivec_lvsr:
8152   case PPC::BI__builtin_vsx_lxvd2x:
8153   case PPC::BI__builtin_vsx_lxvw4x:
8154   case PPC::BI__builtin_vsx_lxvd2x_be:
8155   case PPC::BI__builtin_vsx_lxvw4x_be:
8156   case PPC::BI__builtin_vsx_lxvl:
8157   case PPC::BI__builtin_vsx_lxvll:
8158   {
8159     if(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
8160        BuiltinID == PPC::BI__builtin_vsx_lxvll){
8161       Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy);
8162     }else {
8163       Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
8164       Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
8165       Ops.pop_back();
8166     }
8167
8168     switch (BuiltinID) {
8169     default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
8170     case PPC::BI__builtin_altivec_lvx:
8171       ID = Intrinsic::ppc_altivec_lvx;
8172       break;
8173     case PPC::BI__builtin_altivec_lvxl:
8174       ID = Intrinsic::ppc_altivec_lvxl;
8175       break;
8176     case PPC::BI__builtin_altivec_lvebx:
8177       ID = Intrinsic::ppc_altivec_lvebx;
8178       break;
8179     case PPC::BI__builtin_altivec_lvehx:
8180       ID = Intrinsic::ppc_altivec_lvehx;
8181       break;
8182     case PPC::BI__builtin_altivec_lvewx:
8183       ID = Intrinsic::ppc_altivec_lvewx;
8184       break;
8185     case PPC::BI__builtin_altivec_lvsl:
8186       ID = Intrinsic::ppc_altivec_lvsl;
8187       break;
8188     case PPC::BI__builtin_altivec_lvsr:
8189       ID = Intrinsic::ppc_altivec_lvsr;
8190       break;
8191     case PPC::BI__builtin_vsx_lxvd2x:
8192       ID = Intrinsic::ppc_vsx_lxvd2x;
8193       break;
8194     case PPC::BI__builtin_vsx_lxvw4x:
8195       ID = Intrinsic::ppc_vsx_lxvw4x;
8196       break;
8197     case PPC::BI__builtin_vsx_lxvd2x_be:
8198       ID = Intrinsic::ppc_vsx_lxvd2x_be;
8199       break;
8200     case PPC::BI__builtin_vsx_lxvw4x_be:
8201       ID = Intrinsic::ppc_vsx_lxvw4x_be;
8202       break;
8203     case PPC::BI__builtin_vsx_lxvl:
8204       ID = Intrinsic::ppc_vsx_lxvl;
8205       break;
8206     case PPC::BI__builtin_vsx_lxvll:
8207       ID = Intrinsic::ppc_vsx_lxvll;
8208       break;
8209     }
8210     llvm::Function *F = CGM.getIntrinsic(ID);
8211     return Builder.CreateCall(F, Ops, "");
8212   }
8213
8214   // vec_st, vec_xst_be
8215   case PPC::BI__builtin_altivec_stvx:
8216   case PPC::BI__builtin_altivec_stvxl:
8217   case PPC::BI__builtin_altivec_stvebx:
8218   case PPC::BI__builtin_altivec_stvehx:
8219   case PPC::BI__builtin_altivec_stvewx:
8220   case PPC::BI__builtin_vsx_stxvd2x:
8221   case PPC::BI__builtin_vsx_stxvw4x:
8222   case PPC::BI__builtin_vsx_stxvd2x_be:
8223   case PPC::BI__builtin_vsx_stxvw4x_be:
8224   case PPC::BI__builtin_vsx_stxvl:
8225   case PPC::BI__builtin_vsx_stxvll:
8226   {
8227     if(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
8228       BuiltinID == PPC::BI__builtin_vsx_stxvll ){
8229       Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
8230     }else {
8231       Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
8232       Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
8233       Ops.pop_back();
8234     }
8235
8236     switch (BuiltinID) {
8237     default: llvm_unreachable("Unsupported st intrinsic!");
8238     case PPC::BI__builtin_altivec_stvx:
8239       ID = Intrinsic::ppc_altivec_stvx;
8240       break;
8241     case PPC::BI__builtin_altivec_stvxl:
8242       ID = Intrinsic::ppc_altivec_stvxl;
8243       break;
8244     case PPC::BI__builtin_altivec_stvebx:
8245       ID = Intrinsic::ppc_altivec_stvebx;
8246       break;
8247     case PPC::BI__builtin_altivec_stvehx:
8248       ID = Intrinsic::ppc_altivec_stvehx;
8249       break;
8250     case PPC::BI__builtin_altivec_stvewx:
8251       ID = Intrinsic::ppc_altivec_stvewx;
8252       break;
8253     case PPC::BI__builtin_vsx_stxvd2x:
8254       ID = Intrinsic::ppc_vsx_stxvd2x;
8255       break;
8256     case PPC::BI__builtin_vsx_stxvw4x:
8257       ID = Intrinsic::ppc_vsx_stxvw4x;
8258       break;
8259     case PPC::BI__builtin_vsx_stxvd2x_be:
8260       ID = Intrinsic::ppc_vsx_stxvd2x_be;
8261       break;
8262     case PPC::BI__builtin_vsx_stxvw4x_be:
8263       ID = Intrinsic::ppc_vsx_stxvw4x_be;
8264       break;
8265     case PPC::BI__builtin_vsx_stxvl:
8266       ID = Intrinsic::ppc_vsx_stxvl;
8267       break;
8268     case PPC::BI__builtin_vsx_stxvll:
8269       ID = Intrinsic::ppc_vsx_stxvll;
8270       break;
8271     }
8272     llvm::Function *F = CGM.getIntrinsic(ID);
8273     return Builder.CreateCall(F, Ops, "");
8274   }
8275   // Square root
8276   case PPC::BI__builtin_vsx_xvsqrtsp:
8277   case PPC::BI__builtin_vsx_xvsqrtdp: {
8278     llvm::Type *ResultType = ConvertType(E->getType());
8279     Value *X = EmitScalarExpr(E->getArg(0));
8280     ID = Intrinsic::sqrt;
8281     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
8282     return Builder.CreateCall(F, X);
8283   }
8284   // Count leading zeros
8285   case PPC::BI__builtin_altivec_vclzb:
8286   case PPC::BI__builtin_altivec_vclzh:
8287   case PPC::BI__builtin_altivec_vclzw:
8288   case PPC::BI__builtin_altivec_vclzd: {
8289     llvm::Type *ResultType = ConvertType(E->getType());
8290     Value *X = EmitScalarExpr(E->getArg(0));
8291     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8292     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
8293     return Builder.CreateCall(F, {X, Undef});
8294   }
8295   case PPC::BI__builtin_altivec_vctzb:
8296   case PPC::BI__builtin_altivec_vctzh:
8297   case PPC::BI__builtin_altivec_vctzw:
8298   case PPC::BI__builtin_altivec_vctzd: {
8299     llvm::Type *ResultType = ConvertType(E->getType());
8300     Value *X = EmitScalarExpr(E->getArg(0));
8301     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8302     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
8303     return Builder.CreateCall(F, {X, Undef});
8304   }
8305   case PPC::BI__builtin_altivec_vpopcntb:
8306   case PPC::BI__builtin_altivec_vpopcnth:
8307   case PPC::BI__builtin_altivec_vpopcntw:
8308   case PPC::BI__builtin_altivec_vpopcntd: {
8309     llvm::Type *ResultType = ConvertType(E->getType());
8310     Value *X = EmitScalarExpr(E->getArg(0));
8311     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
8312     return Builder.CreateCall(F, X);
8313   }
8314   // Copy sign
8315   case PPC::BI__builtin_vsx_xvcpsgnsp:
8316   case PPC::BI__builtin_vsx_xvcpsgndp: {
8317     llvm::Type *ResultType = ConvertType(E->getType());
8318     Value *X = EmitScalarExpr(E->getArg(0));
8319     Value *Y = EmitScalarExpr(E->getArg(1));
8320     ID = Intrinsic::copysign;
8321     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
8322     return Builder.CreateCall(F, {X, Y});
8323   }
8324   // Rounding/truncation
8325   case PPC::BI__builtin_vsx_xvrspip:
8326   case PPC::BI__builtin_vsx_xvrdpip:
8327   case PPC::BI__builtin_vsx_xvrdpim:
8328   case PPC::BI__builtin_vsx_xvrspim:
8329   case PPC::BI__builtin_vsx_xvrdpi:
8330   case PPC::BI__builtin_vsx_xvrspi:
8331   case PPC::BI__builtin_vsx_xvrdpic:
8332   case PPC::BI__builtin_vsx_xvrspic:
8333   case PPC::BI__builtin_vsx_xvrdpiz:
8334   case PPC::BI__builtin_vsx_xvrspiz: {
8335     llvm::Type *ResultType = ConvertType(E->getType());
8336     Value *X = EmitScalarExpr(E->getArg(0));
8337     if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
8338         BuiltinID == PPC::BI__builtin_vsx_xvrspim)
8339       ID = Intrinsic::floor;
8340     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
8341              BuiltinID == PPC::BI__builtin_vsx_xvrspi)
8342       ID = Intrinsic::round;
8343     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
8344              BuiltinID == PPC::BI__builtin_vsx_xvrspic)
8345       ID = Intrinsic::nearbyint;
8346     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
8347              BuiltinID == PPC::BI__builtin_vsx_xvrspip)
8348       ID = Intrinsic::ceil;
8349     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
8350              BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
8351       ID = Intrinsic::trunc;
8352     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
8353     return Builder.CreateCall(F, X);
8354   }
8355
8356   // Absolute value
8357   case PPC::BI__builtin_vsx_xvabsdp:
8358   case PPC::BI__builtin_vsx_xvabssp: {
8359     llvm::Type *ResultType = ConvertType(E->getType());
8360     Value *X = EmitScalarExpr(E->getArg(0));
8361     llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
8362     return Builder.CreateCall(F, X);
8363   }
8364
8365   // FMA variations
8366   case PPC::BI__builtin_vsx_xvmaddadp:
8367   case PPC::BI__builtin_vsx_xvmaddasp:
8368   case PPC::BI__builtin_vsx_xvnmaddadp:
8369   case PPC::BI__builtin_vsx_xvnmaddasp:
8370   case PPC::BI__builtin_vsx_xvmsubadp:
8371   case PPC::BI__builtin_vsx_xvmsubasp:
8372   case PPC::BI__builtin_vsx_xvnmsubadp:
8373   case PPC::BI__builtin_vsx_xvnmsubasp: {
8374     llvm::Type *ResultType = ConvertType(E->getType());
8375     Value *X = EmitScalarExpr(E->getArg(0));
8376     Value *Y = EmitScalarExpr(E->getArg(1));
8377     Value *Z = EmitScalarExpr(E->getArg(2));
8378     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
8379     llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
8380     switch (BuiltinID) {
8381       case PPC::BI__builtin_vsx_xvmaddadp:
8382       case PPC::BI__builtin_vsx_xvmaddasp:
8383         return Builder.CreateCall(F, {X, Y, Z});
8384       case PPC::BI__builtin_vsx_xvnmaddadp:
8385       case PPC::BI__builtin_vsx_xvnmaddasp:
8386         return Builder.CreateFSub(Zero,
8387                                   Builder.CreateCall(F, {X, Y, Z}), "sub");
8388       case PPC::BI__builtin_vsx_xvmsubadp:
8389       case PPC::BI__builtin_vsx_xvmsubasp:
8390         return Builder.CreateCall(F,
8391                                   {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
8392       case PPC::BI__builtin_vsx_xvnmsubadp:
8393       case PPC::BI__builtin_vsx_xvnmsubasp:
8394         Value *FsubRes =
8395           Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
8396         return Builder.CreateFSub(Zero, FsubRes, "sub");
8397     }
8398     llvm_unreachable("Unknown FMA operation");
8399     return nullptr; // Suppress no-return warning
8400   }
8401
8402   case PPC::BI__builtin_vsx_insertword: {
8403     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
8404
8405     // Third argument is a compile time constant int. It must be clamped to
8406     // to the range [0, 12].
8407     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
8408     assert(ArgCI &&
8409            "Third arg to xxinsertw intrinsic must be constant integer");
8410     const int64_t MaxIndex = 12;
8411     int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
8412
8413     // The builtin semantics don't exactly match the xxinsertw instructions
8414     // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
8415     // word from the first argument, and inserts it in the second argument. The
8416     // instruction extracts the word from its second input register and inserts
8417     // it into its first input register, so swap the first and second arguments.
8418     std::swap(Ops[0], Ops[1]);
8419
8420     // Need to cast the second argument from a vector of unsigned int to a
8421     // vector of long long.
8422     Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
8423
8424     if (getTarget().isLittleEndian()) {
8425       // Create a shuffle mask of (1, 0)
8426       Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
8427                                    ConstantInt::get(Int32Ty, 0)
8428                                  };
8429       Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8430
8431       // Reverse the double words in the vector we will extract from.
8432       Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
8433       Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ShuffleMask);
8434
8435       // Reverse the index.
8436       Index = MaxIndex - Index;
8437     }
8438
8439     // Intrinsic expects the first arg to be a vector of int.
8440     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
8441     Ops[2] = ConstantInt::getSigned(Int32Ty, Index);
8442     return Builder.CreateCall(F, Ops);
8443   }
8444
8445   case PPC::BI__builtin_vsx_extractuword: {
8446     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
8447
8448     // Intrinsic expects the first argument to be a vector of doublewords.
8449     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
8450
8451     // The second argument is a compile time constant int that needs to
8452     // be clamped to the range [0, 12].
8453     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[1]);
8454     assert(ArgCI &&
8455            "Second Arg to xxextractuw intrinsic must be a constant integer!");
8456     const int64_t MaxIndex = 12;
8457     int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
8458
8459     if (getTarget().isLittleEndian()) {
8460       // Reverse the index.
8461       Index = MaxIndex - Index;
8462       Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
8463
8464       // Emit the call, then reverse the double words of the results vector.
8465       Value *Call = Builder.CreateCall(F, Ops);
8466
8467       // Create a shuffle mask of (1, 0)
8468       Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
8469                                    ConstantInt::get(Int32Ty, 0)
8470                                  };
8471       Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8472
8473       Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ShuffleMask);
8474       return ShuffleCall;
8475     } else {
8476       Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
8477       return Builder.CreateCall(F, Ops);
8478     }
8479   }
8480
8481   case PPC::BI__builtin_vsx_xxpermdi: {
8482     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
8483     assert(ArgCI && "Third arg must be constant integer!");
8484
8485     unsigned Index = ArgCI->getZExtValue();
8486     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
8487     Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
8488
8489     // Element zero comes from the first input vector and element one comes from
8490     // the second. The element indices within each vector are numbered in big
8491     // endian order so the shuffle mask must be adjusted for this on little
8492     // endian platforms (i.e. index is complemented and source vector reversed).
8493     unsigned ElemIdx0;
8494     unsigned ElemIdx1;
8495     if (getTarget().isLittleEndian()) {
8496       ElemIdx0 = (~Index & 1) + 2;
8497       ElemIdx1 = (~Index & 2) >> 1;
8498     } else { // BigEndian
8499       ElemIdx0 = (Index & 2) >> 1;
8500       ElemIdx1 = 2 + (Index & 1);
8501     }
8502
8503     Constant *ShuffleElts[2] = {ConstantInt::get(Int32Ty, ElemIdx0),
8504                                 ConstantInt::get(Int32Ty, ElemIdx1)};
8505     Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8506
8507     Value *ShuffleCall =
8508         Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask);
8509     QualType BIRetType = E->getType();
8510     auto RetTy = ConvertType(BIRetType);
8511     return Builder.CreateBitCast(ShuffleCall, RetTy);
8512   }
8513
8514   case PPC::BI__builtin_vsx_xxsldwi: {
8515     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
8516     assert(ArgCI && "Third argument must be a compile time constant");
8517     unsigned Index = ArgCI->getZExtValue() & 0x3;
8518     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
8519     Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int32Ty, 4));
8520
8521     // Create a shuffle mask
8522     unsigned ElemIdx0;
8523     unsigned ElemIdx1;
8524     unsigned ElemIdx2;
8525     unsigned ElemIdx3;
8526     if (getTarget().isLittleEndian()) {
8527       // Little endian element N comes from element 8+N-Index of the
8528       // concatenated wide vector (of course, using modulo arithmetic on
8529       // the total number of elements).
8530       ElemIdx0 = (8 - Index) % 8;
8531       ElemIdx1 = (9 - Index) % 8;
8532       ElemIdx2 = (10 - Index) % 8;
8533       ElemIdx3 = (11 - Index) % 8;
8534     } else {
8535       // Big endian ElemIdx<N> = Index + N
8536       ElemIdx0 = Index;
8537       ElemIdx1 = Index + 1;
8538       ElemIdx2 = Index + 2;
8539       ElemIdx3 = Index + 3;
8540     }
8541
8542     Constant *ShuffleElts[4] = {ConstantInt::get(Int32Ty, ElemIdx0),
8543                                 ConstantInt::get(Int32Ty, ElemIdx1),
8544                                 ConstantInt::get(Int32Ty, ElemIdx2),
8545                                 ConstantInt::get(Int32Ty, ElemIdx3)};
8546
8547     Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8548     Value *ShuffleCall =
8549         Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask);
8550     QualType BIRetType = E->getType();
8551     auto RetTy = ConvertType(BIRetType);
8552     return Builder.CreateBitCast(ShuffleCall, RetTy);
8553   }
8554   }
8555 }
8556
8557 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
8558                                               const CallExpr *E) {
8559   switch (BuiltinID) {
8560   case AMDGPU::BI__builtin_amdgcn_div_scale:
8561   case AMDGPU::BI__builtin_amdgcn_div_scalef: {
8562     // Translate from the intrinsics's struct return to the builtin's out
8563     // argument.
8564
8565     Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
8566
8567     llvm::Value *X = EmitScalarExpr(E->getArg(0));
8568     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
8569     llvm::Value *Z = EmitScalarExpr(E->getArg(2));
8570
8571     llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
8572                                            X->getType());
8573
8574     llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
8575
8576     llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
8577     llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
8578
8579     llvm::Type *RealFlagType
8580       = FlagOutPtr.getPointer()->getType()->getPointerElementType();
8581
8582     llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
8583     Builder.CreateStore(FlagExt, FlagOutPtr);
8584     return Result;
8585   }
8586   case AMDGPU::BI__builtin_amdgcn_div_fmas:
8587   case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
8588     llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
8589     llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
8590     llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
8591     llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
8592
8593     llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
8594                                       Src0->getType());
8595     llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
8596     return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
8597   }
8598
8599   case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
8600     return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
8601   case AMDGPU::BI__builtin_amdgcn_mov_dpp: {
8602     llvm::SmallVector<llvm::Value *, 5> Args;
8603     for (unsigned I = 0; I != 5; ++I)
8604       Args.push_back(EmitScalarExpr(E->getArg(I)));
8605     Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_mov_dpp,
8606                                     Args[0]->getType());
8607     return Builder.CreateCall(F, Args);
8608   }
8609   case AMDGPU::BI__builtin_amdgcn_div_fixup:
8610   case AMDGPU::BI__builtin_amdgcn_div_fixupf:
8611   case AMDGPU::BI__builtin_amdgcn_div_fixuph:
8612     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
8613   case AMDGPU::BI__builtin_amdgcn_trig_preop:
8614   case AMDGPU::BI__builtin_amdgcn_trig_preopf:
8615     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
8616   case AMDGPU::BI__builtin_amdgcn_rcp:
8617   case AMDGPU::BI__builtin_amdgcn_rcpf:
8618   case AMDGPU::BI__builtin_amdgcn_rcph:
8619     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
8620   case AMDGPU::BI__builtin_amdgcn_rsq:
8621   case AMDGPU::BI__builtin_amdgcn_rsqf:
8622   case AMDGPU::BI__builtin_amdgcn_rsqh:
8623     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
8624   case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
8625   case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
8626     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
8627   case AMDGPU::BI__builtin_amdgcn_sinf:
8628   case AMDGPU::BI__builtin_amdgcn_sinh:
8629     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
8630   case AMDGPU::BI__builtin_amdgcn_cosf:
8631   case AMDGPU::BI__builtin_amdgcn_cosh:
8632     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
8633   case AMDGPU::BI__builtin_amdgcn_log_clampf:
8634     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
8635   case AMDGPU::BI__builtin_amdgcn_ldexp:
8636   case AMDGPU::BI__builtin_amdgcn_ldexpf:
8637   case AMDGPU::BI__builtin_amdgcn_ldexph:
8638     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
8639   case AMDGPU::BI__builtin_amdgcn_frexp_mant:
8640   case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
8641   case AMDGPU::BI__builtin_amdgcn_frexp_manth:
8642     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
8643   case AMDGPU::BI__builtin_amdgcn_frexp_exp:
8644   case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
8645     Value *Src0 = EmitScalarExpr(E->getArg(0));
8646     Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
8647                                 { Builder.getInt32Ty(), Src0->getType() });
8648     return Builder.CreateCall(F, Src0);
8649   }
8650   case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
8651     Value *Src0 = EmitScalarExpr(E->getArg(0));
8652     Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
8653                                 { Builder.getInt16Ty(), Src0->getType() });
8654     return Builder.CreateCall(F, Src0);
8655   }
8656   case AMDGPU::BI__builtin_amdgcn_fract:
8657   case AMDGPU::BI__builtin_amdgcn_fractf:
8658   case AMDGPU::BI__builtin_amdgcn_fracth:
8659     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
8660   case AMDGPU::BI__builtin_amdgcn_lerp:
8661     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
8662   case AMDGPU::BI__builtin_amdgcn_uicmp:
8663   case AMDGPU::BI__builtin_amdgcn_uicmpl:
8664   case AMDGPU::BI__builtin_amdgcn_sicmp:
8665   case AMDGPU::BI__builtin_amdgcn_sicmpl:
8666     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_icmp);
8667   case AMDGPU::BI__builtin_amdgcn_fcmp:
8668   case AMDGPU::BI__builtin_amdgcn_fcmpf:
8669     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fcmp);
8670   case AMDGPU::BI__builtin_amdgcn_class:
8671   case AMDGPU::BI__builtin_amdgcn_classf:
8672   case AMDGPU::BI__builtin_amdgcn_classh:
8673     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
8674   case AMDGPU::BI__builtin_amdgcn_fmed3f:
8675   case AMDGPU::BI__builtin_amdgcn_fmed3h:
8676     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3);
8677   case AMDGPU::BI__builtin_amdgcn_read_exec: {
8678     CallInst *CI = cast<CallInst>(
8679       EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec"));
8680     CI->setConvergent();
8681     return CI;
8682   }
8683
8684   // amdgcn workitem
8685   case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
8686     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
8687   case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
8688     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
8689   case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
8690     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
8691
8692   // r600 intrinsics
8693   case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
8694   case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
8695     return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee);
8696   case AMDGPU::BI__builtin_r600_read_tidig_x:
8697     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
8698   case AMDGPU::BI__builtin_r600_read_tidig_y:
8699     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
8700   case AMDGPU::BI__builtin_r600_read_tidig_z:
8701     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
8702   default:
8703     return nullptr;
8704   }
8705 }
8706
8707 /// Handle a SystemZ function in which the final argument is a pointer
8708 /// to an int that receives the post-instruction CC value.  At the LLVM level
8709 /// this is represented as a function that returns a {result, cc} pair.
8710 static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF,
8711                                          unsigned IntrinsicID,
8712                                          const CallExpr *E) {
8713   unsigned NumArgs = E->getNumArgs() - 1;
8714   SmallVector<Value *, 8> Args(NumArgs);
8715   for (unsigned I = 0; I < NumArgs; ++I)
8716     Args[I] = CGF.EmitScalarExpr(E->getArg(I));
8717   Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
8718   Value *F = CGF.CGM.getIntrinsic(IntrinsicID);
8719   Value *Call = CGF.Builder.CreateCall(F, Args);
8720   Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
8721   CGF.Builder.CreateStore(CC, CCPtr);
8722   return CGF.Builder.CreateExtractValue(Call, 0);
8723 }
8724
8725 Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
8726                                                const CallExpr *E) {
8727   switch (BuiltinID) {
8728   case SystemZ::BI__builtin_tbegin: {
8729     Value *TDB = EmitScalarExpr(E->getArg(0));
8730     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
8731     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
8732     return Builder.CreateCall(F, {TDB, Control});
8733   }
8734   case SystemZ::BI__builtin_tbegin_nofloat: {
8735     Value *TDB = EmitScalarExpr(E->getArg(0));
8736     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
8737     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
8738     return Builder.CreateCall(F, {TDB, Control});
8739   }
8740   case SystemZ::BI__builtin_tbeginc: {
8741     Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
8742     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
8743     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
8744     return Builder.CreateCall(F, {TDB, Control});
8745   }
8746   case SystemZ::BI__builtin_tabort: {
8747     Value *Data = EmitScalarExpr(E->getArg(0));
8748     Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
8749     return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
8750   }
8751   case SystemZ::BI__builtin_non_tx_store: {
8752     Value *Address = EmitScalarExpr(E->getArg(0));
8753     Value *Data = EmitScalarExpr(E->getArg(1));
8754     Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
8755     return Builder.CreateCall(F, {Data, Address});
8756   }
8757
8758   // Vector builtins.  Note that most vector builtins are mapped automatically
8759   // to target-specific LLVM intrinsics.  The ones handled specially here can
8760   // be represented via standard LLVM IR, which is preferable to enable common
8761   // LLVM optimizations.
8762
8763   case SystemZ::BI__builtin_s390_vpopctb:
8764   case SystemZ::BI__builtin_s390_vpopcth:
8765   case SystemZ::BI__builtin_s390_vpopctf:
8766   case SystemZ::BI__builtin_s390_vpopctg: {
8767     llvm::Type *ResultType = ConvertType(E->getType());
8768     Value *X = EmitScalarExpr(E->getArg(0));
8769     Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
8770     return Builder.CreateCall(F, X);
8771   }
8772
8773   case SystemZ::BI__builtin_s390_vclzb:
8774   case SystemZ::BI__builtin_s390_vclzh:
8775   case SystemZ::BI__builtin_s390_vclzf:
8776   case SystemZ::BI__builtin_s390_vclzg: {
8777     llvm::Type *ResultType = ConvertType(E->getType());
8778     Value *X = EmitScalarExpr(E->getArg(0));
8779     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8780     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
8781     return Builder.CreateCall(F, {X, Undef});
8782   }
8783
8784   case SystemZ::BI__builtin_s390_vctzb:
8785   case SystemZ::BI__builtin_s390_vctzh:
8786   case SystemZ::BI__builtin_s390_vctzf:
8787   case SystemZ::BI__builtin_s390_vctzg: {
8788     llvm::Type *ResultType = ConvertType(E->getType());
8789     Value *X = EmitScalarExpr(E->getArg(0));
8790     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8791     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
8792     return Builder.CreateCall(F, {X, Undef});
8793   }
8794
8795   case SystemZ::BI__builtin_s390_vfsqsb:
8796   case SystemZ::BI__builtin_s390_vfsqdb: {
8797     llvm::Type *ResultType = ConvertType(E->getType());
8798     Value *X = EmitScalarExpr(E->getArg(0));
8799     Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
8800     return Builder.CreateCall(F, X);
8801   }
8802   case SystemZ::BI__builtin_s390_vfmasb:
8803   case SystemZ::BI__builtin_s390_vfmadb: {
8804     llvm::Type *ResultType = ConvertType(E->getType());
8805     Value *X = EmitScalarExpr(E->getArg(0));
8806     Value *Y = EmitScalarExpr(E->getArg(1));
8807     Value *Z = EmitScalarExpr(E->getArg(2));
8808     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
8809     return Builder.CreateCall(F, {X, Y, Z});
8810   }
8811   case SystemZ::BI__builtin_s390_vfmssb:
8812   case SystemZ::BI__builtin_s390_vfmsdb: {
8813     llvm::Type *ResultType = ConvertType(E->getType());
8814     Value *X = EmitScalarExpr(E->getArg(0));
8815     Value *Y = EmitScalarExpr(E->getArg(1));
8816     Value *Z = EmitScalarExpr(E->getArg(2));
8817     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
8818     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
8819     return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
8820   }
8821   case SystemZ::BI__builtin_s390_vfnmasb:
8822   case SystemZ::BI__builtin_s390_vfnmadb: {
8823     llvm::Type *ResultType = ConvertType(E->getType());
8824     Value *X = EmitScalarExpr(E->getArg(0));
8825     Value *Y = EmitScalarExpr(E->getArg(1));
8826     Value *Z = EmitScalarExpr(E->getArg(2));
8827     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
8828     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
8829     return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, Z}), "sub");
8830   }
8831   case SystemZ::BI__builtin_s390_vfnmssb:
8832   case SystemZ::BI__builtin_s390_vfnmsdb: {
8833     llvm::Type *ResultType = ConvertType(E->getType());
8834     Value *X = EmitScalarExpr(E->getArg(0));
8835     Value *Y = EmitScalarExpr(E->getArg(1));
8836     Value *Z = EmitScalarExpr(E->getArg(2));
8837     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
8838     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
8839     Value *NegZ = Builder.CreateFSub(Zero, Z, "sub");
8840     return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, NegZ}));
8841   }
8842   case SystemZ::BI__builtin_s390_vflpsb:
8843   case SystemZ::BI__builtin_s390_vflpdb: {
8844     llvm::Type *ResultType = ConvertType(E->getType());
8845     Value *X = EmitScalarExpr(E->getArg(0));
8846     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
8847     return Builder.CreateCall(F, X);
8848   }
8849   case SystemZ::BI__builtin_s390_vflnsb:
8850   case SystemZ::BI__builtin_s390_vflndb: {
8851     llvm::Type *ResultType = ConvertType(E->getType());
8852     Value *X = EmitScalarExpr(E->getArg(0));
8853     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
8854     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
8855     return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub");
8856   }
8857   case SystemZ::BI__builtin_s390_vfisb:
8858   case SystemZ::BI__builtin_s390_vfidb: {
8859     llvm::Type *ResultType = ConvertType(E->getType());
8860     Value *X = EmitScalarExpr(E->getArg(0));
8861     // Constant-fold the M4 and M5 mask arguments.
8862     llvm::APSInt M4, M5;
8863     bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext());
8864     bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext());
8865     assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?");
8866     (void)IsConstM4; (void)IsConstM5;
8867     // Check whether this instance can be represented via a LLVM standard
8868     // intrinsic.  We only support some combinations of M4 and M5.
8869     Intrinsic::ID ID = Intrinsic::not_intrinsic;
8870     switch (M4.getZExtValue()) {
8871     default: break;
8872     case 0:  // IEEE-inexact exception allowed
8873       switch (M5.getZExtValue()) {
8874       default: break;
8875       case 0: ID = Intrinsic::rint; break;
8876       }
8877       break;
8878     case 4:  // IEEE-inexact exception suppressed
8879       switch (M5.getZExtValue()) {
8880       default: break;
8881       case 0: ID = Intrinsic::nearbyint; break;
8882       case 1: ID = Intrinsic::round; break;
8883       case 5: ID = Intrinsic::trunc; break;
8884       case 6: ID = Intrinsic::ceil; break;
8885       case 7: ID = Intrinsic::floor; break;
8886       }
8887       break;
8888     }
8889     if (ID != Intrinsic::not_intrinsic) {
8890       Function *F = CGM.getIntrinsic(ID, ResultType);
8891       return Builder.CreateCall(F, X);
8892     }
8893     switch (BuiltinID) {
8894       case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;
8895       case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;
8896       default: llvm_unreachable("Unknown BuiltinID");
8897     }
8898     Function *F = CGM.getIntrinsic(ID);
8899     Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
8900     Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
8901     return Builder.CreateCall(F, {X, M4Value, M5Value});
8902   }
8903   case SystemZ::BI__builtin_s390_vfmaxsb:
8904   case SystemZ::BI__builtin_s390_vfmaxdb: {
8905     llvm::Type *ResultType = ConvertType(E->getType());
8906     Value *X = EmitScalarExpr(E->getArg(0));
8907     Value *Y = EmitScalarExpr(E->getArg(1));
8908     // Constant-fold the M4 mask argument.
8909     llvm::APSInt M4;
8910     bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext());
8911     assert(IsConstM4 && "Constant arg isn't actually constant?");
8912     (void)IsConstM4;
8913     // Check whether this instance can be represented via a LLVM standard
8914     // intrinsic.  We only support some values of M4.
8915     Intrinsic::ID ID = Intrinsic::not_intrinsic;
8916     switch (M4.getZExtValue()) {
8917     default: break;
8918     case 4: ID = Intrinsic::maxnum; break;
8919     }
8920     if (ID != Intrinsic::not_intrinsic) {
8921       Function *F = CGM.getIntrinsic(ID, ResultType);
8922       return Builder.CreateCall(F, {X, Y});
8923     }
8924     switch (BuiltinID) {
8925       case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;
8926       case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break;
8927       default: llvm_unreachable("Unknown BuiltinID");
8928     }
8929     Function *F = CGM.getIntrinsic(ID);
8930     Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
8931     return Builder.CreateCall(F, {X, Y, M4Value});
8932   }
8933   case SystemZ::BI__builtin_s390_vfminsb:
8934   case SystemZ::BI__builtin_s390_vfmindb: {
8935     llvm::Type *ResultType = ConvertType(E->getType());
8936     Value *X = EmitScalarExpr(E->getArg(0));
8937     Value *Y = EmitScalarExpr(E->getArg(1));
8938     // Constant-fold the M4 mask argument.
8939     llvm::APSInt M4;
8940     bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext());
8941     assert(IsConstM4 && "Constant arg isn't actually constant?");
8942     (void)IsConstM4;
8943     // Check whether this instance can be represented via a LLVM standard
8944     // intrinsic.  We only support some values of M4.
8945     Intrinsic::ID ID = Intrinsic::not_intrinsic;
8946     switch (M4.getZExtValue()) {
8947     default: break;
8948     case 4: ID = Intrinsic::minnum; break;
8949     }
8950     if (ID != Intrinsic::not_intrinsic) {
8951       Function *F = CGM.getIntrinsic(ID, ResultType);
8952       return Builder.CreateCall(F, {X, Y});
8953     }
8954     switch (BuiltinID) {
8955       case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;
8956       case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break;
8957       default: llvm_unreachable("Unknown BuiltinID");
8958     }
8959     Function *F = CGM.getIntrinsic(ID);
8960     Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
8961     return Builder.CreateCall(F, {X, Y, M4Value});
8962   }
8963
8964   // Vector intrisincs that output the post-instruction CC value.
8965
8966 #define INTRINSIC_WITH_CC(NAME) \
8967     case SystemZ::BI__builtin_##NAME: \
8968       return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
8969
8970   INTRINSIC_WITH_CC(s390_vpkshs);
8971   INTRINSIC_WITH_CC(s390_vpksfs);
8972   INTRINSIC_WITH_CC(s390_vpksgs);
8973
8974   INTRINSIC_WITH_CC(s390_vpklshs);
8975   INTRINSIC_WITH_CC(s390_vpklsfs);
8976   INTRINSIC_WITH_CC(s390_vpklsgs);
8977
8978   INTRINSIC_WITH_CC(s390_vceqbs);
8979   INTRINSIC_WITH_CC(s390_vceqhs);
8980   INTRINSIC_WITH_CC(s390_vceqfs);
8981   INTRINSIC_WITH_CC(s390_vceqgs);
8982
8983   INTRINSIC_WITH_CC(s390_vchbs);
8984   INTRINSIC_WITH_CC(s390_vchhs);
8985   INTRINSIC_WITH_CC(s390_vchfs);
8986   INTRINSIC_WITH_CC(s390_vchgs);
8987
8988   INTRINSIC_WITH_CC(s390_vchlbs);
8989   INTRINSIC_WITH_CC(s390_vchlhs);
8990   INTRINSIC_WITH_CC(s390_vchlfs);
8991   INTRINSIC_WITH_CC(s390_vchlgs);
8992
8993   INTRINSIC_WITH_CC(s390_vfaebs);
8994   INTRINSIC_WITH_CC(s390_vfaehs);
8995   INTRINSIC_WITH_CC(s390_vfaefs);
8996
8997   INTRINSIC_WITH_CC(s390_vfaezbs);
8998   INTRINSIC_WITH_CC(s390_vfaezhs);
8999   INTRINSIC_WITH_CC(s390_vfaezfs);
9000
9001   INTRINSIC_WITH_CC(s390_vfeebs);
9002   INTRINSIC_WITH_CC(s390_vfeehs);
9003   INTRINSIC_WITH_CC(s390_vfeefs);
9004
9005   INTRINSIC_WITH_CC(s390_vfeezbs);
9006   INTRINSIC_WITH_CC(s390_vfeezhs);
9007   INTRINSIC_WITH_CC(s390_vfeezfs);
9008
9009   INTRINSIC_WITH_CC(s390_vfenebs);
9010   INTRINSIC_WITH_CC(s390_vfenehs);
9011   INTRINSIC_WITH_CC(s390_vfenefs);
9012
9013   INTRINSIC_WITH_CC(s390_vfenezbs);
9014   INTRINSIC_WITH_CC(s390_vfenezhs);
9015   INTRINSIC_WITH_CC(s390_vfenezfs);
9016
9017   INTRINSIC_WITH_CC(s390_vistrbs);
9018   INTRINSIC_WITH_CC(s390_vistrhs);
9019   INTRINSIC_WITH_CC(s390_vistrfs);
9020
9021   INTRINSIC_WITH_CC(s390_vstrcbs);
9022   INTRINSIC_WITH_CC(s390_vstrchs);
9023   INTRINSIC_WITH_CC(s390_vstrcfs);
9024
9025   INTRINSIC_WITH_CC(s390_vstrczbs);
9026   INTRINSIC_WITH_CC(s390_vstrczhs);
9027   INTRINSIC_WITH_CC(s390_vstrczfs);
9028
9029   INTRINSIC_WITH_CC(s390_vfcesbs);
9030   INTRINSIC_WITH_CC(s390_vfcedbs);
9031   INTRINSIC_WITH_CC(s390_vfchsbs);
9032   INTRINSIC_WITH_CC(s390_vfchdbs);
9033   INTRINSIC_WITH_CC(s390_vfchesbs);
9034   INTRINSIC_WITH_CC(s390_vfchedbs);
9035
9036   INTRINSIC_WITH_CC(s390_vftcisb);
9037   INTRINSIC_WITH_CC(s390_vftcidb);
9038
9039 #undef INTRINSIC_WITH_CC
9040
9041   default:
9042     return nullptr;
9043   }
9044 }
9045
9046 Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
9047                                              const CallExpr *E) {
9048   auto MakeLdg = [&](unsigned IntrinsicID) {
9049     Value *Ptr = EmitScalarExpr(E->getArg(0));
9050     clang::CharUnits Align =
9051         getNaturalPointeeTypeAlignment(E->getArg(0)->getType());
9052     return Builder.CreateCall(
9053         CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
9054                                        Ptr->getType()}),
9055         {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())});
9056   };
9057   auto MakeScopedAtomic = [&](unsigned IntrinsicID) {
9058     Value *Ptr = EmitScalarExpr(E->getArg(0));
9059     return Builder.CreateCall(
9060         CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
9061                                        Ptr->getType()}),
9062         {Ptr, EmitScalarExpr(E->getArg(1))});
9063   };
9064   switch (BuiltinID) {
9065   case NVPTX::BI__nvvm_atom_add_gen_i:
9066   case NVPTX::BI__nvvm_atom_add_gen_l:
9067   case NVPTX::BI__nvvm_atom_add_gen_ll:
9068     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
9069
9070   case NVPTX::BI__nvvm_atom_sub_gen_i:
9071   case NVPTX::BI__nvvm_atom_sub_gen_l:
9072   case NVPTX::BI__nvvm_atom_sub_gen_ll:
9073     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
9074
9075   case NVPTX::BI__nvvm_atom_and_gen_i:
9076   case NVPTX::BI__nvvm_atom_and_gen_l:
9077   case NVPTX::BI__nvvm_atom_and_gen_ll:
9078     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
9079
9080   case NVPTX::BI__nvvm_atom_or_gen_i:
9081   case NVPTX::BI__nvvm_atom_or_gen_l:
9082   case NVPTX::BI__nvvm_atom_or_gen_ll:
9083     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
9084
9085   case NVPTX::BI__nvvm_atom_xor_gen_i:
9086   case NVPTX::BI__nvvm_atom_xor_gen_l:
9087   case NVPTX::BI__nvvm_atom_xor_gen_ll:
9088     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
9089
9090   case NVPTX::BI__nvvm_atom_xchg_gen_i:
9091   case NVPTX::BI__nvvm_atom_xchg_gen_l:
9092   case NVPTX::BI__nvvm_atom_xchg_gen_ll:
9093     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
9094
9095   case NVPTX::BI__nvvm_atom_max_gen_i:
9096   case NVPTX::BI__nvvm_atom_max_gen_l:
9097   case NVPTX::BI__nvvm_atom_max_gen_ll:
9098     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
9099
9100   case NVPTX::BI__nvvm_atom_max_gen_ui:
9101   case NVPTX::BI__nvvm_atom_max_gen_ul:
9102   case NVPTX::BI__nvvm_atom_max_gen_ull:
9103     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
9104
9105   case NVPTX::BI__nvvm_atom_min_gen_i:
9106   case NVPTX::BI__nvvm_atom_min_gen_l:
9107   case NVPTX::BI__nvvm_atom_min_gen_ll:
9108     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
9109
9110   case NVPTX::BI__nvvm_atom_min_gen_ui:
9111   case NVPTX::BI__nvvm_atom_min_gen_ul:
9112   case NVPTX::BI__nvvm_atom_min_gen_ull:
9113     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
9114
9115   case NVPTX::BI__nvvm_atom_cas_gen_i:
9116   case NVPTX::BI__nvvm_atom_cas_gen_l:
9117   case NVPTX::BI__nvvm_atom_cas_gen_ll:
9118     // __nvvm_atom_cas_gen_* should return the old value rather than the
9119     // success flag.
9120     return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
9121
9122   case NVPTX::BI__nvvm_atom_add_gen_f: {
9123     Value *Ptr = EmitScalarExpr(E->getArg(0));
9124     Value *Val = EmitScalarExpr(E->getArg(1));
9125     // atomicrmw only deals with integer arguments so we need to use
9126     // LLVM's nvvm_atomic_load_add_f32 intrinsic for that.
9127     Value *FnALAF32 =
9128         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType());
9129     return Builder.CreateCall(FnALAF32, {Ptr, Val});
9130   }
9131
9132   case NVPTX::BI__nvvm_atom_inc_gen_ui: {
9133     Value *Ptr = EmitScalarExpr(E->getArg(0));
9134     Value *Val = EmitScalarExpr(E->getArg(1));
9135     Value *FnALI32 =
9136         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
9137     return Builder.CreateCall(FnALI32, {Ptr, Val});
9138   }
9139
9140   case NVPTX::BI__nvvm_atom_dec_gen_ui: {
9141     Value *Ptr = EmitScalarExpr(E->getArg(0));
9142     Value *Val = EmitScalarExpr(E->getArg(1));
9143     Value *FnALD32 =
9144         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
9145     return Builder.CreateCall(FnALD32, {Ptr, Val});
9146   }
9147
9148   case NVPTX::BI__nvvm_ldg_c:
9149   case NVPTX::BI__nvvm_ldg_c2:
9150   case NVPTX::BI__nvvm_ldg_c4:
9151   case NVPTX::BI__nvvm_ldg_s:
9152   case NVPTX::BI__nvvm_ldg_s2:
9153   case NVPTX::BI__nvvm_ldg_s4:
9154   case NVPTX::BI__nvvm_ldg_i:
9155   case NVPTX::BI__nvvm_ldg_i2:
9156   case NVPTX::BI__nvvm_ldg_i4:
9157   case NVPTX::BI__nvvm_ldg_l:
9158   case NVPTX::BI__nvvm_ldg_ll:
9159   case NVPTX::BI__nvvm_ldg_ll2:
9160   case NVPTX::BI__nvvm_ldg_uc:
9161   case NVPTX::BI__nvvm_ldg_uc2:
9162   case NVPTX::BI__nvvm_ldg_uc4:
9163   case NVPTX::BI__nvvm_ldg_us:
9164   case NVPTX::BI__nvvm_ldg_us2:
9165   case NVPTX::BI__nvvm_ldg_us4:
9166   case NVPTX::BI__nvvm_ldg_ui:
9167   case NVPTX::BI__nvvm_ldg_ui2:
9168   case NVPTX::BI__nvvm_ldg_ui4:
9169   case NVPTX::BI__nvvm_ldg_ul:
9170   case NVPTX::BI__nvvm_ldg_ull:
9171   case NVPTX::BI__nvvm_ldg_ull2:
9172     // PTX Interoperability section 2.2: "For a vector with an even number of
9173     // elements, its alignment is set to number of elements times the alignment
9174     // of its member: n*alignof(t)."
9175     return MakeLdg(Intrinsic::nvvm_ldg_global_i);
9176   case NVPTX::BI__nvvm_ldg_f:
9177   case NVPTX::BI__nvvm_ldg_f2:
9178   case NVPTX::BI__nvvm_ldg_f4:
9179   case NVPTX::BI__nvvm_ldg_d:
9180   case NVPTX::BI__nvvm_ldg_d2:
9181     return MakeLdg(Intrinsic::nvvm_ldg_global_f);
9182
9183   case NVPTX::BI__nvvm_atom_cta_add_gen_i:
9184   case NVPTX::BI__nvvm_atom_cta_add_gen_l:
9185   case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
9186     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta);
9187   case NVPTX::BI__nvvm_atom_sys_add_gen_i:
9188   case NVPTX::BI__nvvm_atom_sys_add_gen_l:
9189   case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
9190     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys);
9191   case NVPTX::BI__nvvm_atom_cta_add_gen_f:
9192   case NVPTX::BI__nvvm_atom_cta_add_gen_d:
9193     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta);
9194   case NVPTX::BI__nvvm_atom_sys_add_gen_f:
9195   case NVPTX::BI__nvvm_atom_sys_add_gen_d:
9196     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys);
9197   case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
9198   case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
9199   case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
9200     return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta);
9201   case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
9202   case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
9203   case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
9204     return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys);
9205   case NVPTX::BI__nvvm_atom_cta_max_gen_i:
9206   case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
9207   case NVPTX::BI__nvvm_atom_cta_max_gen_l:
9208   case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
9209   case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
9210   case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
9211     return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta);
9212   case NVPTX::BI__nvvm_atom_sys_max_gen_i:
9213   case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
9214   case NVPTX::BI__nvvm_atom_sys_max_gen_l:
9215   case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
9216   case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
9217   case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
9218     return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys);
9219   case NVPTX::BI__nvvm_atom_cta_min_gen_i:
9220   case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
9221   case NVPTX::BI__nvvm_atom_cta_min_gen_l:
9222   case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
9223   case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
9224   case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
9225     return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta);
9226   case NVPTX::BI__nvvm_atom_sys_min_gen_i:
9227   case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
9228   case NVPTX::BI__nvvm_atom_sys_min_gen_l:
9229   case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
9230   case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
9231   case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
9232     return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys);
9233   case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
9234     return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta);
9235   case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
9236     return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta);
9237   case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
9238     return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys);
9239   case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
9240     return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys);
9241   case NVPTX::BI__nvvm_atom_cta_and_gen_i:
9242   case NVPTX::BI__nvvm_atom_cta_and_gen_l:
9243   case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
9244     return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta);
9245   case NVPTX::BI__nvvm_atom_sys_and_gen_i:
9246   case NVPTX::BI__nvvm_atom_sys_and_gen_l:
9247   case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
9248     return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys);
9249   case NVPTX::BI__nvvm_atom_cta_or_gen_i:
9250   case NVPTX::BI__nvvm_atom_cta_or_gen_l:
9251   case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
9252     return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta);
9253   case NVPTX::BI__nvvm_atom_sys_or_gen_i:
9254   case NVPTX::BI__nvvm_atom_sys_or_gen_l:
9255   case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
9256     return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys);
9257   case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
9258   case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
9259   case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
9260     return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta);
9261   case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
9262   case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
9263   case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
9264     return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys);
9265   case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
9266   case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
9267   case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
9268     Value *Ptr = EmitScalarExpr(E->getArg(0));
9269     return Builder.CreateCall(
9270         CGM.getIntrinsic(
9271             Intrinsic::nvvm_atomic_cas_gen_i_cta,
9272             {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
9273         {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
9274   }
9275   case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
9276   case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
9277   case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
9278     Value *Ptr = EmitScalarExpr(E->getArg(0));
9279     return Builder.CreateCall(
9280         CGM.getIntrinsic(
9281             Intrinsic::nvvm_atomic_cas_gen_i_sys,
9282             {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
9283         {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
9284   }
9285   default:
9286     return nullptr;
9287   }
9288 }
9289
9290 Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
9291                                                    const CallExpr *E) {
9292   switch (BuiltinID) {
9293   case WebAssembly::BI__builtin_wasm_current_memory: {
9294     llvm::Type *ResultType = ConvertType(E->getType());
9295     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType);
9296     return Builder.CreateCall(Callee);
9297   }
9298   case WebAssembly::BI__builtin_wasm_grow_memory: {
9299     Value *X = EmitScalarExpr(E->getArg(0));
9300     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType());
9301     return Builder.CreateCall(Callee, X);
9302   }
9303   case WebAssembly::BI__builtin_wasm_throw: {
9304     Value *Tag = EmitScalarExpr(E->getArg(0));
9305     Value *Obj = EmitScalarExpr(E->getArg(1));
9306     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw);
9307     return Builder.CreateCall(Callee, {Tag, Obj});
9308   }
9309   case WebAssembly::BI__builtin_wasm_rethrow: {
9310     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow);
9311     return Builder.CreateCall(Callee);
9312   }
9313
9314   default:
9315     return nullptr;
9316   }
9317 }