]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp
MFV r328255: 8972 zfs holds: In scripted mode, do not pad columns with spaces
[FreeBSD/FreeBSD.git] / contrib / llvm / tools / clang / lib / CodeGen / CGBuiltin.cpp
1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit Builtin calls as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "CGCXXABI.h"
15 #include "CGObjCRuntime.h"
16 #include "CGOpenCLRuntime.h"
17 #include "CodeGenFunction.h"
18 #include "CodeGenModule.h"
19 #include "ConstantEmitter.h"
20 #include "TargetInfo.h"
21 #include "clang/AST/ASTContext.h"
22 #include "clang/AST/Decl.h"
23 #include "clang/Analysis/Analyses/OSLog.h"
24 #include "clang/Basic/TargetBuiltins.h"
25 #include "clang/Basic/TargetInfo.h"
26 #include "clang/CodeGen/CGFunctionInfo.h"
27 #include "llvm/ADT/StringExtras.h"
28 #include "llvm/IR/CallSite.h"
29 #include "llvm/IR/DataLayout.h"
30 #include "llvm/IR/InlineAsm.h"
31 #include "llvm/IR/Intrinsics.h"
32 #include "llvm/IR/MDBuilder.h"
33 #include "llvm/Support/ConvertUTF.h"
34 #include "llvm/Support/ScopedPrinter.h"
35 #include "llvm/Support/TargetParser.h"
36 #include <sstream>
37
38 using namespace clang;
39 using namespace CodeGen;
40 using namespace llvm;
41
42 static
43 int64_t clamp(int64_t Value, int64_t Low, int64_t High) {
44   return std::min(High, std::max(Low, Value));
45 }
46
47 /// getBuiltinLibFunction - Given a builtin id for a function like
48 /// "__builtin_fabsf", return a Function* for "fabsf".
49 llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
50                                                      unsigned BuiltinID) {
51   assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
52
53   // Get the name, skip over the __builtin_ prefix (if necessary).
54   StringRef Name;
55   GlobalDecl D(FD);
56
57   // If the builtin has been declared explicitly with an assembler label,
58   // use the mangled name. This differs from the plain label on platforms
59   // that prefix labels.
60   if (FD->hasAttr<AsmLabelAttr>())
61     Name = getMangledName(D);
62   else
63     Name = Context.BuiltinInfo.getName(BuiltinID) + 10;
64
65   llvm::FunctionType *Ty =
66     cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
67
68   return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
69 }
70
71 /// Emit the conversions required to turn the given value into an
72 /// integer of the given size.
73 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
74                         QualType T, llvm::IntegerType *IntType) {
75   V = CGF.EmitToMemory(V, T);
76
77   if (V->getType()->isPointerTy())
78     return CGF.Builder.CreatePtrToInt(V, IntType);
79
80   assert(V->getType() == IntType);
81   return V;
82 }
83
84 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
85                           QualType T, llvm::Type *ResultType) {
86   V = CGF.EmitFromMemory(V, T);
87
88   if (ResultType->isPointerTy())
89     return CGF.Builder.CreateIntToPtr(V, ResultType);
90
91   assert(V->getType() == ResultType);
92   return V;
93 }
94
95 /// Utility to insert an atomic instruction based on Instrinsic::ID
96 /// and the expression node.
97 static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF,
98                                     llvm::AtomicRMWInst::BinOp Kind,
99                                     const CallExpr *E) {
100   QualType T = E->getType();
101   assert(E->getArg(0)->getType()->isPointerType());
102   assert(CGF.getContext().hasSameUnqualifiedType(T,
103                                   E->getArg(0)->getType()->getPointeeType()));
104   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
105
106   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
107   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
108
109   llvm::IntegerType *IntType =
110     llvm::IntegerType::get(CGF.getLLVMContext(),
111                            CGF.getContext().getTypeSize(T));
112   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
113
114   llvm::Value *Args[2];
115   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
116   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
117   llvm::Type *ValueType = Args[1]->getType();
118   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
119
120   llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
121       Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
122   return EmitFromInt(CGF, Result, T, ValueType);
123 }
124
125 static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
126   Value *Val = CGF.EmitScalarExpr(E->getArg(0));
127   Value *Address = CGF.EmitScalarExpr(E->getArg(1));
128
129   // Convert the type of the pointer to a pointer to the stored type.
130   Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
131   Value *BC = CGF.Builder.CreateBitCast(
132       Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
133   LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
134   LV.setNontemporal(true);
135   CGF.EmitStoreOfScalar(Val, LV, false);
136   return nullptr;
137 }
138
139 static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
140   Value *Address = CGF.EmitScalarExpr(E->getArg(0));
141
142   LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
143   LV.setNontemporal(true);
144   return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
145 }
146
147 static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
148                                llvm::AtomicRMWInst::BinOp Kind,
149                                const CallExpr *E) {
150   return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
151 }
152
153 /// Utility to insert an atomic instruction based Instrinsic::ID and
154 /// the expression node, where the return value is the result of the
155 /// operation.
156 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
157                                    llvm::AtomicRMWInst::BinOp Kind,
158                                    const CallExpr *E,
159                                    Instruction::BinaryOps Op,
160                                    bool Invert = false) {
161   QualType T = E->getType();
162   assert(E->getArg(0)->getType()->isPointerType());
163   assert(CGF.getContext().hasSameUnqualifiedType(T,
164                                   E->getArg(0)->getType()->getPointeeType()));
165   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
166
167   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
168   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
169
170   llvm::IntegerType *IntType =
171     llvm::IntegerType::get(CGF.getLLVMContext(),
172                            CGF.getContext().getTypeSize(T));
173   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
174
175   llvm::Value *Args[2];
176   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
177   llvm::Type *ValueType = Args[1]->getType();
178   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
179   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
180
181   llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
182       Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
183   Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
184   if (Invert)
185     Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
186                                      llvm::ConstantInt::get(IntType, -1));
187   Result = EmitFromInt(CGF, Result, T, ValueType);
188   return RValue::get(Result);
189 }
190
191 /// @brief Utility to insert an atomic cmpxchg instruction.
192 ///
193 /// @param CGF The current codegen function.
194 /// @param E   Builtin call expression to convert to cmpxchg.
195 ///            arg0 - address to operate on
196 ///            arg1 - value to compare with
197 ///            arg2 - new value
198 /// @param ReturnBool Specifies whether to return success flag of
199 ///                   cmpxchg result or the old value.
200 ///
201 /// @returns result of cmpxchg, according to ReturnBool
202 static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
203                                      bool ReturnBool) {
204   QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
205   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
206   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
207
208   llvm::IntegerType *IntType = llvm::IntegerType::get(
209       CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
210   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
211
212   Value *Args[3];
213   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
214   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
215   llvm::Type *ValueType = Args[1]->getType();
216   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
217   Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
218
219   Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
220       Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
221       llvm::AtomicOrdering::SequentiallyConsistent);
222   if (ReturnBool)
223     // Extract boolean success flag and zext it to int.
224     return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
225                                   CGF.ConvertType(E->getType()));
226   else
227     // Extract old value and emit it using the same type as compare value.
228     return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
229                        ValueType);
230 }
231
232 // Emit a simple mangled intrinsic that has 1 argument and a return type
233 // matching the argument type.
234 static Value *emitUnaryBuiltin(CodeGenFunction &CGF,
235                                const CallExpr *E,
236                                unsigned IntrinsicID) {
237   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
238
239   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
240   return CGF.Builder.CreateCall(F, Src0);
241 }
242
243 // Emit an intrinsic that has 2 operands of the same type as its result.
244 static Value *emitBinaryBuiltin(CodeGenFunction &CGF,
245                                 const CallExpr *E,
246                                 unsigned IntrinsicID) {
247   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
248   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
249
250   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
251   return CGF.Builder.CreateCall(F, { Src0, Src1 });
252 }
253
254 // Emit an intrinsic that has 3 operands of the same type as its result.
255 static Value *emitTernaryBuiltin(CodeGenFunction &CGF,
256                                  const CallExpr *E,
257                                  unsigned IntrinsicID) {
258   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
259   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
260   llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
261
262   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
263   return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
264 }
265
266 // Emit an intrinsic that has 1 float or double operand, and 1 integer.
267 static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
268                                const CallExpr *E,
269                                unsigned IntrinsicID) {
270   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
271   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
272
273   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
274   return CGF.Builder.CreateCall(F, {Src0, Src1});
275 }
276
277 /// EmitFAbs - Emit a call to @llvm.fabs().
278 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
279   Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
280   llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
281   Call->setDoesNotAccessMemory();
282   return Call;
283 }
284
285 /// Emit the computation of the sign bit for a floating point value. Returns
286 /// the i1 sign bit value.
287 static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
288   LLVMContext &C = CGF.CGM.getLLVMContext();
289
290   llvm::Type *Ty = V->getType();
291   int Width = Ty->getPrimitiveSizeInBits();
292   llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
293   V = CGF.Builder.CreateBitCast(V, IntTy);
294   if (Ty->isPPC_FP128Ty()) {
295     // We want the sign bit of the higher-order double. The bitcast we just
296     // did works as if the double-double was stored to memory and then
297     // read as an i128. The "store" will put the higher-order double in the
298     // lower address in both little- and big-Endian modes, but the "load"
299     // will treat those bits as a different part of the i128: the low bits in
300     // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
301     // we need to shift the high bits down to the low before truncating.
302     Width >>= 1;
303     if (CGF.getTarget().isBigEndian()) {
304       Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
305       V = CGF.Builder.CreateLShr(V, ShiftCst);
306     }
307     // We are truncating value in order to extract the higher-order
308     // double, which we will be using to extract the sign from.
309     IntTy = llvm::IntegerType::get(C, Width);
310     V = CGF.Builder.CreateTrunc(V, IntTy);
311   }
312   Value *Zero = llvm::Constant::getNullValue(IntTy);
313   return CGF.Builder.CreateICmpSLT(V, Zero);
314 }
315
316 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD,
317                               const CallExpr *E, llvm::Constant *calleeValue) {
318   CGCallee callee = CGCallee::forDirect(calleeValue, FD);
319   return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
320 }
321
322 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
323 /// depending on IntrinsicID.
324 ///
325 /// \arg CGF The current codegen function.
326 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
327 /// \arg X The first argument to the llvm.*.with.overflow.*.
328 /// \arg Y The second argument to the llvm.*.with.overflow.*.
329 /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
330 /// \returns The result (i.e. sum/product) returned by the intrinsic.
331 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
332                                           const llvm::Intrinsic::ID IntrinsicID,
333                                           llvm::Value *X, llvm::Value *Y,
334                                           llvm::Value *&Carry) {
335   // Make sure we have integers of the same width.
336   assert(X->getType() == Y->getType() &&
337          "Arguments must be the same type. (Did you forget to make sure both "
338          "arguments have the same integer width?)");
339
340   llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
341   llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
342   Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
343   return CGF.Builder.CreateExtractValue(Tmp, 0);
344 }
345
346 static Value *emitRangedBuiltin(CodeGenFunction &CGF,
347                                 unsigned IntrinsicID,
348                                 int low, int high) {
349     llvm::MDBuilder MDHelper(CGF.getLLVMContext());
350     llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
351     Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
352     llvm::Instruction *Call = CGF.Builder.CreateCall(F);
353     Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
354     return Call;
355 }
356
357 namespace {
358   struct WidthAndSignedness {
359     unsigned Width;
360     bool Signed;
361   };
362 }
363
364 static WidthAndSignedness
365 getIntegerWidthAndSignedness(const clang::ASTContext &context,
366                              const clang::QualType Type) {
367   assert(Type->isIntegerType() && "Given type is not an integer.");
368   unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width;
369   bool Signed = Type->isSignedIntegerType();
370   return {Width, Signed};
371 }
372
373 // Given one or more integer types, this function produces an integer type that
374 // encompasses them: any value in one of the given types could be expressed in
375 // the encompassing type.
376 static struct WidthAndSignedness
377 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
378   assert(Types.size() > 0 && "Empty list of types.");
379
380   // If any of the given types is signed, we must return a signed type.
381   bool Signed = false;
382   for (const auto &Type : Types) {
383     Signed |= Type.Signed;
384   }
385
386   // The encompassing type must have a width greater than or equal to the width
387   // of the specified types.  Aditionally, if the encompassing type is signed,
388   // its width must be strictly greater than the width of any unsigned types
389   // given.
390   unsigned Width = 0;
391   for (const auto &Type : Types) {
392     unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
393     if (Width < MinWidth) {
394       Width = MinWidth;
395     }
396   }
397
398   return {Width, Signed};
399 }
400
401 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
402   llvm::Type *DestType = Int8PtrTy;
403   if (ArgValue->getType() != DestType)
404     ArgValue =
405         Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
406
407   Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
408   return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
409 }
410
411 /// Checks if using the result of __builtin_object_size(p, @p From) in place of
412 /// __builtin_object_size(p, @p To) is correct
413 static bool areBOSTypesCompatible(int From, int To) {
414   // Note: Our __builtin_object_size implementation currently treats Type=0 and
415   // Type=2 identically. Encoding this implementation detail here may make
416   // improving __builtin_object_size difficult in the future, so it's omitted.
417   return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
418 }
419
420 static llvm::Value *
421 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
422   return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
423 }
424
425 llvm::Value *
426 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
427                                                  llvm::IntegerType *ResType,
428                                                  llvm::Value *EmittedE) {
429   uint64_t ObjectSize;
430   if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
431     return emitBuiltinObjectSize(E, Type, ResType, EmittedE);
432   return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
433 }
434
435 /// Returns a Value corresponding to the size of the given expression.
436 /// This Value may be either of the following:
437 ///   - A llvm::Argument (if E is a param with the pass_object_size attribute on
438 ///     it)
439 ///   - A call to the @llvm.objectsize intrinsic
440 ///
441 /// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
442 /// and we wouldn't otherwise try to reference a pass_object_size parameter,
443 /// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
444 llvm::Value *
445 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
446                                        llvm::IntegerType *ResType,
447                                        llvm::Value *EmittedE) {
448   // We need to reference an argument if the pointer is a parameter with the
449   // pass_object_size attribute.
450   if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
451     auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
452     auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
453     if (Param != nullptr && PS != nullptr &&
454         areBOSTypesCompatible(PS->getType(), Type)) {
455       auto Iter = SizeArguments.find(Param);
456       assert(Iter != SizeArguments.end());
457
458       const ImplicitParamDecl *D = Iter->second;
459       auto DIter = LocalDeclMap.find(D);
460       assert(DIter != LocalDeclMap.end());
461
462       return EmitLoadOfScalar(DIter->second, /*volatile=*/false,
463                               getContext().getSizeType(), E->getLocStart());
464     }
465   }
466
467   // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
468   // evaluate E for side-effects. In either case, we shouldn't lower to
469   // @llvm.objectsize.
470   if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
471     return getDefaultBuiltinObjectSizeResult(Type, ResType);
472
473   Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
474   assert(Ptr->getType()->isPointerTy() &&
475          "Non-pointer passed to __builtin_object_size?");
476
477   Value *F = CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
478
479   // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
480   Value *Min = Builder.getInt1((Type & 2) != 0);
481   // For GCC compatability, __builtin_object_size treat NULL as unknown size.
482   Value *NullIsUnknown = Builder.getTrue();
483   return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown});
484 }
485
486 // Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we
487 // handle them here.
488 enum class CodeGenFunction::MSVCIntrin {
489   _BitScanForward,
490   _BitScanReverse,
491   _InterlockedAnd,
492   _InterlockedDecrement,
493   _InterlockedExchange,
494   _InterlockedExchangeAdd,
495   _InterlockedExchangeSub,
496   _InterlockedIncrement,
497   _InterlockedOr,
498   _InterlockedXor,
499   _interlockedbittestandset,
500   __fastfail,
501 };
502
503 Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
504                                             const CallExpr *E) {
505   switch (BuiltinID) {
506   case MSVCIntrin::_BitScanForward:
507   case MSVCIntrin::_BitScanReverse: {
508     Value *ArgValue = EmitScalarExpr(E->getArg(1));
509
510     llvm::Type *ArgType = ArgValue->getType();
511     llvm::Type *IndexType =
512       EmitScalarExpr(E->getArg(0))->getType()->getPointerElementType();
513     llvm::Type *ResultType = ConvertType(E->getType());
514
515     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
516     Value *ResZero = llvm::Constant::getNullValue(ResultType);
517     Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
518
519     BasicBlock *Begin = Builder.GetInsertBlock();
520     BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
521     Builder.SetInsertPoint(End);
522     PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
523
524     Builder.SetInsertPoint(Begin);
525     Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
526     BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
527     Builder.CreateCondBr(IsZero, End, NotZero);
528     Result->addIncoming(ResZero, Begin);
529
530     Builder.SetInsertPoint(NotZero);
531     Address IndexAddress = EmitPointerWithAlignment(E->getArg(0));
532
533     if (BuiltinID == MSVCIntrin::_BitScanForward) {
534       Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
535       Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
536       ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
537       Builder.CreateStore(ZeroCount, IndexAddress, false);
538     } else {
539       unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
540       Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
541
542       Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
543       Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
544       ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
545       Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
546       Builder.CreateStore(Index, IndexAddress, false);
547     }
548     Builder.CreateBr(End);
549     Result->addIncoming(ResOne, NotZero);
550
551     Builder.SetInsertPoint(End);
552     return Result;
553   }
554   case MSVCIntrin::_InterlockedAnd:
555     return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
556   case MSVCIntrin::_InterlockedExchange:
557     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
558   case MSVCIntrin::_InterlockedExchangeAdd:
559     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
560   case MSVCIntrin::_InterlockedExchangeSub:
561     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
562   case MSVCIntrin::_InterlockedOr:
563     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
564   case MSVCIntrin::_InterlockedXor:
565     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
566
567   case MSVCIntrin::_interlockedbittestandset: {
568     llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
569     llvm::Value *Bit = EmitScalarExpr(E->getArg(1));
570     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
571         AtomicRMWInst::Or, Addr,
572         Builder.CreateShl(ConstantInt::get(Bit->getType(), 1), Bit),
573         llvm::AtomicOrdering::SequentiallyConsistent);
574     // Shift the relevant bit to the least significant position, truncate to
575     // the result type, and test the low bit.
576     llvm::Value *Shifted = Builder.CreateLShr(RMWI, Bit);
577     llvm::Value *Truncated =
578         Builder.CreateTrunc(Shifted, ConvertType(E->getType()));
579     return Builder.CreateAnd(Truncated,
580                              ConstantInt::get(Truncated->getType(), 1));
581   }
582
583   case MSVCIntrin::_InterlockedDecrement: {
584     llvm::Type *IntTy = ConvertType(E->getType());
585     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
586       AtomicRMWInst::Sub,
587       EmitScalarExpr(E->getArg(0)),
588       ConstantInt::get(IntTy, 1),
589       llvm::AtomicOrdering::SequentiallyConsistent);
590     return Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1));
591   }
592   case MSVCIntrin::_InterlockedIncrement: {
593     llvm::Type *IntTy = ConvertType(E->getType());
594     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
595       AtomicRMWInst::Add,
596       EmitScalarExpr(E->getArg(0)),
597       ConstantInt::get(IntTy, 1),
598       llvm::AtomicOrdering::SequentiallyConsistent);
599     return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1));
600   }
601
602   case MSVCIntrin::__fastfail: {
603     // Request immediate process termination from the kernel. The instruction
604     // sequences to do this are documented on MSDN:
605     // https://msdn.microsoft.com/en-us/library/dn774154.aspx
606     llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
607     StringRef Asm, Constraints;
608     switch (ISA) {
609     default:
610       ErrorUnsupported(E, "__fastfail call for this architecture");
611       break;
612     case llvm::Triple::x86:
613     case llvm::Triple::x86_64:
614       Asm = "int $$0x29";
615       Constraints = "{cx}";
616       break;
617     case llvm::Triple::thumb:
618       Asm = "udf #251";
619       Constraints = "{r0}";
620       break;
621     }
622     llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
623     llvm::InlineAsm *IA =
624         llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true);
625     llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
626         getLLVMContext(), llvm::AttributeList::FunctionIndex,
627         llvm::Attribute::NoReturn);
628     CallSite CS = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
629     CS.setAttributes(NoReturnAttr);
630     return CS.getInstruction();
631   }
632   }
633   llvm_unreachable("Incorrect MSVC intrinsic!");
634 }
635
636 namespace {
637 // ARC cleanup for __builtin_os_log_format
638 struct CallObjCArcUse final : EHScopeStack::Cleanup {
639   CallObjCArcUse(llvm::Value *object) : object(object) {}
640   llvm::Value *object;
641
642   void Emit(CodeGenFunction &CGF, Flags flags) override {
643     CGF.EmitARCIntrinsicUse(object);
644   }
645 };
646 }
647
648 Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E,
649                                                  BuiltinCheckKind Kind) {
650   assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero)
651           && "Unsupported builtin check kind");
652
653   Value *ArgValue = EmitScalarExpr(E);
654   if (!SanOpts.has(SanitizerKind::Builtin) || !getTarget().isCLZForZeroUndef())
655     return ArgValue;
656
657   SanitizerScope SanScope(this);
658   Value *Cond = Builder.CreateICmpNE(
659       ArgValue, llvm::Constant::getNullValue(ArgValue->getType()));
660   EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin),
661             SanitizerHandler::InvalidBuiltin,
662             {EmitCheckSourceLocation(E->getExprLoc()),
663              llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},
664             None);
665   return ArgValue;
666 }
667
668 /// Get the argument type for arguments to os_log_helper.
669 static CanQualType getOSLogArgType(ASTContext &C, int Size) {
670   QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false);
671   return C.getCanonicalType(UnsignedTy);
672 }
673
674 llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction(
675     const analyze_os_log::OSLogBufferLayout &Layout,
676     CharUnits BufferAlignment) {
677   ASTContext &Ctx = getContext();
678
679   llvm::SmallString<64> Name;
680   {
681     raw_svector_ostream OS(Name);
682     OS << "__os_log_helper";
683     OS << "_" << BufferAlignment.getQuantity();
684     OS << "_" << int(Layout.getSummaryByte());
685     OS << "_" << int(Layout.getNumArgsByte());
686     for (const auto &Item : Layout.Items)
687       OS << "_" << int(Item.getSizeByte()) << "_"
688          << int(Item.getDescriptorByte());
689   }
690
691   if (llvm::Function *F = CGM.getModule().getFunction(Name))
692     return F;
693
694   llvm::SmallVector<ImplicitParamDecl, 4> Params;
695   Params.emplace_back(Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"),
696                       Ctx.VoidPtrTy, ImplicitParamDecl::Other);
697
698   for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
699     char Size = Layout.Items[I].getSizeByte();
700     if (!Size)
701       continue;
702
703     Params.emplace_back(
704         Ctx, nullptr, SourceLocation(),
705         &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)),
706         getOSLogArgType(Ctx, Size), ImplicitParamDecl::Other);
707   }
708
709   FunctionArgList Args;
710   for (auto &P : Params)
711     Args.push_back(&P);
712
713   // The helper function has linkonce_odr linkage to enable the linker to merge
714   // identical functions. To ensure the merging always happens, 'noinline' is
715   // attached to the function when compiling with -Oz.
716   const CGFunctionInfo &FI =
717       CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
718   llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
719   llvm::Function *Fn = llvm::Function::Create(
720       FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule());
721   Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
722   CGM.SetLLVMFunctionAttributes(nullptr, FI, Fn);
723   CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn);
724
725   // Attach 'noinline' at -Oz.
726   if (CGM.getCodeGenOpts().OptimizeSize == 2)
727     Fn->addFnAttr(llvm::Attribute::NoInline);
728
729   auto NL = ApplyDebugLocation::CreateEmpty(*this);
730   IdentifierInfo *II = &Ctx.Idents.get(Name);
731   FunctionDecl *FD = FunctionDecl::Create(
732       Ctx, Ctx.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II,
733       Ctx.VoidTy, nullptr, SC_PrivateExtern, false, false);
734
735   StartFunction(FD, Ctx.VoidTy, Fn, FI, Args);
736
737   // Create a scope with an artificial location for the body of this function.
738   auto AL = ApplyDebugLocation::CreateArtificial(*this);
739
740   CharUnits Offset;
741   Address BufAddr(Builder.CreateLoad(GetAddrOfLocalVar(&Params[0]), "buf"),
742                   BufferAlignment);
743   Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()),
744                       Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
745   Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()),
746                       Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
747
748   unsigned I = 1;
749   for (const auto &Item : Layout.Items) {
750     Builder.CreateStore(
751         Builder.getInt8(Item.getDescriptorByte()),
752         Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
753     Builder.CreateStore(
754         Builder.getInt8(Item.getSizeByte()),
755         Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
756
757     CharUnits Size = Item.size();
758     if (!Size.getQuantity())
759       continue;
760
761     Address Arg = GetAddrOfLocalVar(&Params[I]);
762     Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData");
763     Addr = Builder.CreateBitCast(Addr, Arg.getPointer()->getType(),
764                                  "argDataCast");
765     Builder.CreateStore(Builder.CreateLoad(Arg), Addr);
766     Offset += Size;
767     ++I;
768   }
769
770   FinishFunction();
771
772   return Fn;
773 }
774
775 RValue CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr &E) {
776   assert(E.getNumArgs() >= 2 &&
777          "__builtin_os_log_format takes at least 2 arguments");
778   ASTContext &Ctx = getContext();
779   analyze_os_log::OSLogBufferLayout Layout;
780   analyze_os_log::computeOSLogBufferLayout(Ctx, &E, Layout);
781   Address BufAddr = EmitPointerWithAlignment(E.getArg(0));
782   llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
783
784   // Ignore argument 1, the format string. It is not currently used.
785   CallArgList Args;
786   Args.add(RValue::get(BufAddr.getPointer()), Ctx.VoidPtrTy);
787
788   for (const auto &Item : Layout.Items) {
789     int Size = Item.getSizeByte();
790     if (!Size)
791       continue;
792
793     llvm::Value *ArgVal;
794
795     if (const Expr *TheExpr = Item.getExpr()) {
796       ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);
797
798       // Check if this is a retainable type.
799       if (TheExpr->getType()->isObjCRetainableType()) {
800         assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
801                "Only scalar can be a ObjC retainable type");
802         // Check if the object is constant, if not, save it in
803         // RetainableOperands.
804         if (!isa<Constant>(ArgVal))
805           RetainableOperands.push_back(ArgVal);
806       }
807     } else {
808       ArgVal = Builder.getInt32(Item.getConstValue().getQuantity());
809     }
810
811     unsigned ArgValSize =
812         CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType());
813     llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(),
814                                                      ArgValSize);
815     ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy);
816     CanQualType ArgTy = getOSLogArgType(Ctx, Size);
817     // If ArgVal has type x86_fp80, zero-extend ArgVal.
818     ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy));
819     Args.add(RValue::get(ArgVal), ArgTy);
820   }
821
822   const CGFunctionInfo &FI =
823       CGM.getTypes().arrangeBuiltinFunctionCall(Ctx.VoidTy, Args);
824   llvm::Function *F = CodeGenFunction(CGM).generateBuiltinOSLogHelperFunction(
825       Layout, BufAddr.getAlignment());
826   EmitCall(FI, CGCallee::forDirect(F), ReturnValueSlot(), Args);
827
828   // Push a clang.arc.use cleanup for each object in RetainableOperands. The
829   // cleanup will cause the use to appear after the final log call, keeping
830   // the object valid while it’s held in the log buffer.  Note that if there’s
831   // a release cleanup on the object, it will already be active; since
832   // cleanups are emitted in reverse order, the use will occur before the
833   // object is released.
834   if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount &&
835       CGM.getCodeGenOpts().OptimizationLevel != 0)
836     for (llvm::Value *Object : RetainableOperands)
837       pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), Object);
838
839   return RValue::get(BufAddr.getPointer());
840 }
841
842 /// Determine if a binop is a checked mixed-sign multiply we can specialize.
843 static bool isSpecialMixedSignMultiply(unsigned BuiltinID,
844                                        WidthAndSignedness Op1Info,
845                                        WidthAndSignedness Op2Info,
846                                        WidthAndSignedness ResultInfo) {
847   return BuiltinID == Builtin::BI__builtin_mul_overflow &&
848          Op1Info.Width == Op2Info.Width && Op1Info.Width >= ResultInfo.Width &&
849          Op1Info.Signed != Op2Info.Signed;
850 }
851
852 /// Emit a checked mixed-sign multiply. This is a cheaper specialization of
853 /// the generic checked-binop irgen.
854 static RValue
855 EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1,
856                              WidthAndSignedness Op1Info, const clang::Expr *Op2,
857                              WidthAndSignedness Op2Info,
858                              const clang::Expr *ResultArg, QualType ResultQTy,
859                              WidthAndSignedness ResultInfo) {
860   assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info,
861                                     Op2Info, ResultInfo) &&
862          "Not a mixed-sign multipliction we can specialize");
863
864   // Emit the signed and unsigned operands.
865   const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2;
866   const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1;
867   llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp);
868   llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp);
869
870   llvm::Type *OpTy = Signed->getType();
871   llvm::Value *Zero = llvm::Constant::getNullValue(OpTy);
872   Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
873   llvm::Type *ResTy = ResultPtr.getElementType();
874
875   // Take the absolute value of the signed operand.
876   llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero);
877   llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed);
878   llvm::Value *AbsSigned =
879       CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed);
880
881   // Perform a checked unsigned multiplication.
882   llvm::Value *UnsignedOverflow;
883   llvm::Value *UnsignedResult =
884       EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned,
885                             Unsigned, UnsignedOverflow);
886
887   llvm::Value *Overflow, *Result;
888   if (ResultInfo.Signed) {
889     // Signed overflow occurs if the result is greater than INT_MAX or lesser
890     // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative).
891     auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width)
892                       .zextOrSelf(Op1Info.Width);
893     llvm::Value *MaxResult =
894         CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax),
895                               CGF.Builder.CreateZExt(IsNegative, OpTy));
896     llvm::Value *SignedOverflow =
897         CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult);
898     Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow);
899
900     // Prepare the signed result (possibly by negating it).
901     llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult);
902     llvm::Value *SignedResult =
903         CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult);
904     Result = CGF.Builder.CreateTrunc(SignedResult, ResTy);
905   } else {
906     // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX.
907     llvm::Value *Underflow = CGF.Builder.CreateAnd(
908         IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult));
909     Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow);
910     if (ResultInfo.Width < Op1Info.Width) {
911       auto IntMax =
912           llvm::APInt::getMaxValue(ResultInfo.Width).zext(Op1Info.Width);
913       llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT(
914           UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax));
915       Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow);
916     }
917
918     Result = CGF.Builder.CreateTrunc(UnsignedResult, ResTy);
919   }
920   assert(Overflow && Result && "Missing overflow or result");
921
922   bool isVolatile =
923       ResultArg->getType()->getPointeeType().isVolatileQualified();
924   CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
925                           isVolatile);
926   return RValue::get(Overflow);
927 }
928
929 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
930                                         unsigned BuiltinID, const CallExpr *E,
931                                         ReturnValueSlot ReturnValue) {
932   // See if we can constant fold this builtin.  If so, don't emit it at all.
933   Expr::EvalResult Result;
934   if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
935       !Result.hasSideEffects()) {
936     if (Result.Val.isInt())
937       return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
938                                                 Result.Val.getInt()));
939     if (Result.Val.isFloat())
940       return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
941                                                Result.Val.getFloat()));
942   }
943
944   // There are LLVM math intrinsics/instructions corresponding to math library
945   // functions except the LLVM op will never set errno while the math library
946   // might. Also, math builtins have the same semantics as their math library
947   // twins. Thus, we can transform math library and builtin calls to their
948   // LLVM counterparts if the call is marked 'const' (known to never set errno).
949   if (FD->hasAttr<ConstAttr>()) {
950     switch (BuiltinID) {
951     case Builtin::BIceil:
952     case Builtin::BIceilf:
953     case Builtin::BIceill:
954     case Builtin::BI__builtin_ceil:
955     case Builtin::BI__builtin_ceilf:
956     case Builtin::BI__builtin_ceill:
957       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil));
958
959     case Builtin::BIcopysign:
960     case Builtin::BIcopysignf:
961     case Builtin::BIcopysignl:
962     case Builtin::BI__builtin_copysign:
963     case Builtin::BI__builtin_copysignf:
964     case Builtin::BI__builtin_copysignl:
965       return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
966
967     case Builtin::BIcos:
968     case Builtin::BIcosf:
969     case Builtin::BIcosl:
970     case Builtin::BI__builtin_cos:
971     case Builtin::BI__builtin_cosf:
972     case Builtin::BI__builtin_cosl:
973       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::cos));
974
975     case Builtin::BIexp:
976     case Builtin::BIexpf:
977     case Builtin::BIexpl:
978     case Builtin::BI__builtin_exp:
979     case Builtin::BI__builtin_expf:
980     case Builtin::BI__builtin_expl:
981       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp));
982
983     case Builtin::BIexp2:
984     case Builtin::BIexp2f:
985     case Builtin::BIexp2l:
986     case Builtin::BI__builtin_exp2:
987     case Builtin::BI__builtin_exp2f:
988     case Builtin::BI__builtin_exp2l:
989       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp2));
990
991     case Builtin::BIfabs:
992     case Builtin::BIfabsf:
993     case Builtin::BIfabsl:
994     case Builtin::BI__builtin_fabs:
995     case Builtin::BI__builtin_fabsf:
996     case Builtin::BI__builtin_fabsl:
997       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
998
999     case Builtin::BIfloor:
1000     case Builtin::BIfloorf:
1001     case Builtin::BIfloorl:
1002     case Builtin::BI__builtin_floor:
1003     case Builtin::BI__builtin_floorf:
1004     case Builtin::BI__builtin_floorl:
1005       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor));
1006
1007     case Builtin::BIfma:
1008     case Builtin::BIfmaf:
1009     case Builtin::BIfmal:
1010     case Builtin::BI__builtin_fma:
1011     case Builtin::BI__builtin_fmaf:
1012     case Builtin::BI__builtin_fmal:
1013       return RValue::get(emitTernaryBuiltin(*this, E, Intrinsic::fma));
1014
1015     case Builtin::BIfmax:
1016     case Builtin::BIfmaxf:
1017     case Builtin::BIfmaxl:
1018     case Builtin::BI__builtin_fmax:
1019     case Builtin::BI__builtin_fmaxf:
1020     case Builtin::BI__builtin_fmaxl:
1021       return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum));
1022
1023     case Builtin::BIfmin:
1024     case Builtin::BIfminf:
1025     case Builtin::BIfminl:
1026     case Builtin::BI__builtin_fmin:
1027     case Builtin::BI__builtin_fminf:
1028     case Builtin::BI__builtin_fminl:
1029       return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum));
1030
1031     // fmod() is a special-case. It maps to the frem instruction rather than an
1032     // LLVM intrinsic.
1033     case Builtin::BIfmod:
1034     case Builtin::BIfmodf:
1035     case Builtin::BIfmodl:
1036     case Builtin::BI__builtin_fmod:
1037     case Builtin::BI__builtin_fmodf:
1038     case Builtin::BI__builtin_fmodl: {
1039       Value *Arg1 = EmitScalarExpr(E->getArg(0));
1040       Value *Arg2 = EmitScalarExpr(E->getArg(1));
1041       return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod"));
1042     }
1043
1044     case Builtin::BIlog:
1045     case Builtin::BIlogf:
1046     case Builtin::BIlogl:
1047     case Builtin::BI__builtin_log:
1048     case Builtin::BI__builtin_logf:
1049     case Builtin::BI__builtin_logl:
1050       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log));
1051
1052     case Builtin::BIlog10:
1053     case Builtin::BIlog10f:
1054     case Builtin::BIlog10l:
1055     case Builtin::BI__builtin_log10:
1056     case Builtin::BI__builtin_log10f:
1057     case Builtin::BI__builtin_log10l:
1058       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log10));
1059
1060     case Builtin::BIlog2:
1061     case Builtin::BIlog2f:
1062     case Builtin::BIlog2l:
1063     case Builtin::BI__builtin_log2:
1064     case Builtin::BI__builtin_log2f:
1065     case Builtin::BI__builtin_log2l:
1066       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log2));
1067
1068     case Builtin::BInearbyint:
1069     case Builtin::BInearbyintf:
1070     case Builtin::BInearbyintl:
1071     case Builtin::BI__builtin_nearbyint:
1072     case Builtin::BI__builtin_nearbyintf:
1073     case Builtin::BI__builtin_nearbyintl:
1074       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint));
1075
1076     case Builtin::BIpow:
1077     case Builtin::BIpowf:
1078     case Builtin::BIpowl:
1079     case Builtin::BI__builtin_pow:
1080     case Builtin::BI__builtin_powf:
1081     case Builtin::BI__builtin_powl:
1082       return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::pow));
1083
1084     case Builtin::BIrint:
1085     case Builtin::BIrintf:
1086     case Builtin::BIrintl:
1087     case Builtin::BI__builtin_rint:
1088     case Builtin::BI__builtin_rintf:
1089     case Builtin::BI__builtin_rintl:
1090       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint));
1091
1092     case Builtin::BIround:
1093     case Builtin::BIroundf:
1094     case Builtin::BIroundl:
1095     case Builtin::BI__builtin_round:
1096     case Builtin::BI__builtin_roundf:
1097     case Builtin::BI__builtin_roundl:
1098       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round));
1099
1100     case Builtin::BIsin:
1101     case Builtin::BIsinf:
1102     case Builtin::BIsinl:
1103     case Builtin::BI__builtin_sin:
1104     case Builtin::BI__builtin_sinf:
1105     case Builtin::BI__builtin_sinl:
1106       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::sin));
1107
1108     case Builtin::BIsqrt:
1109     case Builtin::BIsqrtf:
1110     case Builtin::BIsqrtl:
1111     case Builtin::BI__builtin_sqrt:
1112     case Builtin::BI__builtin_sqrtf:
1113     case Builtin::BI__builtin_sqrtl:
1114       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::sqrt));
1115
1116     case Builtin::BItrunc:
1117     case Builtin::BItruncf:
1118     case Builtin::BItruncl:
1119     case Builtin::BI__builtin_trunc:
1120     case Builtin::BI__builtin_truncf:
1121     case Builtin::BI__builtin_truncl:
1122       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc));
1123
1124     default:
1125       break;
1126     }
1127   }
1128
1129   switch (BuiltinID) {
1130   default: break;
1131   case Builtin::BI__builtin___CFStringMakeConstantString:
1132   case Builtin::BI__builtin___NSStringMakeConstantString:
1133     return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
1134   case Builtin::BI__builtin_stdarg_start:
1135   case Builtin::BI__builtin_va_start:
1136   case Builtin::BI__va_start:
1137   case Builtin::BI__builtin_va_end:
1138     return RValue::get(
1139         EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
1140                            ? EmitScalarExpr(E->getArg(0))
1141                            : EmitVAListRef(E->getArg(0)).getPointer(),
1142                        BuiltinID != Builtin::BI__builtin_va_end));
1143   case Builtin::BI__builtin_va_copy: {
1144     Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
1145     Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
1146
1147     llvm::Type *Type = Int8PtrTy;
1148
1149     DstPtr = Builder.CreateBitCast(DstPtr, Type);
1150     SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
1151     return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy),
1152                                           {DstPtr, SrcPtr}));
1153   }
1154   case Builtin::BI__builtin_abs:
1155   case Builtin::BI__builtin_labs:
1156   case Builtin::BI__builtin_llabs: {
1157     Value *ArgValue = EmitScalarExpr(E->getArg(0));
1158
1159     Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
1160     Value *CmpResult =
1161     Builder.CreateICmpSGE(ArgValue,
1162                           llvm::Constant::getNullValue(ArgValue->getType()),
1163                                                             "abscond");
1164     Value *Result =
1165       Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
1166
1167     return RValue::get(Result);
1168   }
1169   case Builtin::BI__builtin_conj:
1170   case Builtin::BI__builtin_conjf:
1171   case Builtin::BI__builtin_conjl: {
1172     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
1173     Value *Real = ComplexVal.first;
1174     Value *Imag = ComplexVal.second;
1175     Value *Zero =
1176       Imag->getType()->isFPOrFPVectorTy()
1177         ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
1178         : llvm::Constant::getNullValue(Imag->getType());
1179
1180     Imag = Builder.CreateFSub(Zero, Imag, "sub");
1181     return RValue::getComplex(std::make_pair(Real, Imag));
1182   }
1183   case Builtin::BI__builtin_creal:
1184   case Builtin::BI__builtin_crealf:
1185   case Builtin::BI__builtin_creall:
1186   case Builtin::BIcreal:
1187   case Builtin::BIcrealf:
1188   case Builtin::BIcreall: {
1189     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
1190     return RValue::get(ComplexVal.first);
1191   }
1192
1193   case Builtin::BI__builtin_cimag:
1194   case Builtin::BI__builtin_cimagf:
1195   case Builtin::BI__builtin_cimagl:
1196   case Builtin::BIcimag:
1197   case Builtin::BIcimagf:
1198   case Builtin::BIcimagl: {
1199     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
1200     return RValue::get(ComplexVal.second);
1201   }
1202
1203   case Builtin::BI__builtin_ctzs:
1204   case Builtin::BI__builtin_ctz:
1205   case Builtin::BI__builtin_ctzl:
1206   case Builtin::BI__builtin_ctzll: {
1207     Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero);
1208
1209     llvm::Type *ArgType = ArgValue->getType();
1210     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
1211
1212     llvm::Type *ResultType = ConvertType(E->getType());
1213     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
1214     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
1215     if (Result->getType() != ResultType)
1216       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1217                                      "cast");
1218     return RValue::get(Result);
1219   }
1220   case Builtin::BI__builtin_clzs:
1221   case Builtin::BI__builtin_clz:
1222   case Builtin::BI__builtin_clzl:
1223   case Builtin::BI__builtin_clzll: {
1224     Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero);
1225
1226     llvm::Type *ArgType = ArgValue->getType();
1227     Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
1228
1229     llvm::Type *ResultType = ConvertType(E->getType());
1230     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
1231     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
1232     if (Result->getType() != ResultType)
1233       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1234                                      "cast");
1235     return RValue::get(Result);
1236   }
1237   case Builtin::BI__builtin_ffs:
1238   case Builtin::BI__builtin_ffsl:
1239   case Builtin::BI__builtin_ffsll: {
1240     // ffs(x) -> x ? cttz(x) + 1 : 0
1241     Value *ArgValue = EmitScalarExpr(E->getArg(0));
1242
1243     llvm::Type *ArgType = ArgValue->getType();
1244     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
1245
1246     llvm::Type *ResultType = ConvertType(E->getType());
1247     Value *Tmp =
1248         Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
1249                           llvm::ConstantInt::get(ArgType, 1));
1250     Value *Zero = llvm::Constant::getNullValue(ArgType);
1251     Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
1252     Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
1253     if (Result->getType() != ResultType)
1254       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1255                                      "cast");
1256     return RValue::get(Result);
1257   }
1258   case Builtin::BI__builtin_parity:
1259   case Builtin::BI__builtin_parityl:
1260   case Builtin::BI__builtin_parityll: {
1261     // parity(x) -> ctpop(x) & 1
1262     Value *ArgValue = EmitScalarExpr(E->getArg(0));
1263
1264     llvm::Type *ArgType = ArgValue->getType();
1265     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
1266
1267     llvm::Type *ResultType = ConvertType(E->getType());
1268     Value *Tmp = Builder.CreateCall(F, ArgValue);
1269     Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
1270     if (Result->getType() != ResultType)
1271       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1272                                      "cast");
1273     return RValue::get(Result);
1274   }
1275   case Builtin::BI__popcnt16:
1276   case Builtin::BI__popcnt:
1277   case Builtin::BI__popcnt64:
1278   case Builtin::BI__builtin_popcount:
1279   case Builtin::BI__builtin_popcountl:
1280   case Builtin::BI__builtin_popcountll: {
1281     Value *ArgValue = EmitScalarExpr(E->getArg(0));
1282
1283     llvm::Type *ArgType = ArgValue->getType();
1284     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
1285
1286     llvm::Type *ResultType = ConvertType(E->getType());
1287     Value *Result = Builder.CreateCall(F, ArgValue);
1288     if (Result->getType() != ResultType)
1289       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1290                                      "cast");
1291     return RValue::get(Result);
1292   }
1293   case Builtin::BI_rotr8:
1294   case Builtin::BI_rotr16:
1295   case Builtin::BI_rotr:
1296   case Builtin::BI_lrotr:
1297   case Builtin::BI_rotr64: {
1298     Value *Val = EmitScalarExpr(E->getArg(0));
1299     Value *Shift = EmitScalarExpr(E->getArg(1));
1300
1301     llvm::Type *ArgType = Val->getType();
1302     Shift = Builder.CreateIntCast(Shift, ArgType, false);
1303     unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
1304     Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
1305     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
1306
1307     Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
1308     Shift = Builder.CreateAnd(Shift, Mask);
1309     Value *LeftShift = Builder.CreateSub(ArgTypeSize, Shift);
1310
1311     Value *RightShifted = Builder.CreateLShr(Val, Shift);
1312     Value *LeftShifted = Builder.CreateShl(Val, LeftShift);
1313     Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
1314
1315     Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
1316     Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
1317     return RValue::get(Result);
1318   }
1319   case Builtin::BI_rotl8:
1320   case Builtin::BI_rotl16:
1321   case Builtin::BI_rotl:
1322   case Builtin::BI_lrotl:
1323   case Builtin::BI_rotl64: {
1324     Value *Val = EmitScalarExpr(E->getArg(0));
1325     Value *Shift = EmitScalarExpr(E->getArg(1));
1326
1327     llvm::Type *ArgType = Val->getType();
1328     Shift = Builder.CreateIntCast(Shift, ArgType, false);
1329     unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
1330     Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
1331     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
1332
1333     Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
1334     Shift = Builder.CreateAnd(Shift, Mask);
1335     Value *RightShift = Builder.CreateSub(ArgTypeSize, Shift);
1336
1337     Value *LeftShifted = Builder.CreateShl(Val, Shift);
1338     Value *RightShifted = Builder.CreateLShr(Val, RightShift);
1339     Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
1340
1341     Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
1342     Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
1343     return RValue::get(Result);
1344   }
1345   case Builtin::BI__builtin_unpredictable: {
1346     // Always return the argument of __builtin_unpredictable. LLVM does not
1347     // handle this builtin. Metadata for this builtin should be added directly
1348     // to instructions such as branches or switches that use it.
1349     return RValue::get(EmitScalarExpr(E->getArg(0)));
1350   }
1351   case Builtin::BI__builtin_expect: {
1352     Value *ArgValue = EmitScalarExpr(E->getArg(0));
1353     llvm::Type *ArgType = ArgValue->getType();
1354
1355     Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
1356     // Don't generate llvm.expect on -O0 as the backend won't use it for
1357     // anything.
1358     // Note, we still IRGen ExpectedValue because it could have side-effects.
1359     if (CGM.getCodeGenOpts().OptimizationLevel == 0)
1360       return RValue::get(ArgValue);
1361
1362     Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
1363     Value *Result =
1364         Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
1365     return RValue::get(Result);
1366   }
1367   case Builtin::BI__builtin_assume_aligned: {
1368     Value *PtrValue = EmitScalarExpr(E->getArg(0));
1369     Value *OffsetValue =
1370       (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
1371
1372     Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
1373     ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
1374     unsigned Alignment = (unsigned) AlignmentCI->getZExtValue();
1375
1376     EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue);
1377     return RValue::get(PtrValue);
1378   }
1379   case Builtin::BI__assume:
1380   case Builtin::BI__builtin_assume: {
1381     if (E->getArg(0)->HasSideEffects(getContext()))
1382       return RValue::get(nullptr);
1383
1384     Value *ArgValue = EmitScalarExpr(E->getArg(0));
1385     Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
1386     return RValue::get(Builder.CreateCall(FnAssume, ArgValue));
1387   }
1388   case Builtin::BI__builtin_bswap16:
1389   case Builtin::BI__builtin_bswap32:
1390   case Builtin::BI__builtin_bswap64: {
1391     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
1392   }
1393   case Builtin::BI__builtin_bitreverse8:
1394   case Builtin::BI__builtin_bitreverse16:
1395   case Builtin::BI__builtin_bitreverse32:
1396   case Builtin::BI__builtin_bitreverse64: {
1397     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
1398   }
1399   case Builtin::BI__builtin_object_size: {
1400     unsigned Type =
1401         E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
1402     auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
1403
1404     // We pass this builtin onto the optimizer so that it can figure out the
1405     // object size in more complex cases.
1406     return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
1407                                              /*EmittedE=*/nullptr));
1408   }
1409   case Builtin::BI__builtin_prefetch: {
1410     Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
1411     // FIXME: Technically these constants should of type 'int', yes?
1412     RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
1413       llvm::ConstantInt::get(Int32Ty, 0);
1414     Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
1415       llvm::ConstantInt::get(Int32Ty, 3);
1416     Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
1417     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
1418     return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
1419   }
1420   case Builtin::BI__builtin_readcyclecounter: {
1421     Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
1422     return RValue::get(Builder.CreateCall(F));
1423   }
1424   case Builtin::BI__builtin___clear_cache: {
1425     Value *Begin = EmitScalarExpr(E->getArg(0));
1426     Value *End = EmitScalarExpr(E->getArg(1));
1427     Value *F = CGM.getIntrinsic(Intrinsic::clear_cache);
1428     return RValue::get(Builder.CreateCall(F, {Begin, End}));
1429   }
1430   case Builtin::BI__builtin_trap:
1431     return RValue::get(EmitTrapCall(Intrinsic::trap));
1432   case Builtin::BI__debugbreak:
1433     return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
1434   case Builtin::BI__builtin_unreachable: {
1435     EmitUnreachable(E->getExprLoc());
1436
1437     // We do need to preserve an insertion point.
1438     EmitBlock(createBasicBlock("unreachable.cont"));
1439
1440     return RValue::get(nullptr);
1441   }
1442
1443   case Builtin::BI__builtin_powi:
1444   case Builtin::BI__builtin_powif:
1445   case Builtin::BI__builtin_powil: {
1446     Value *Base = EmitScalarExpr(E->getArg(0));
1447     Value *Exponent = EmitScalarExpr(E->getArg(1));
1448     llvm::Type *ArgType = Base->getType();
1449     Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
1450     return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
1451   }
1452
1453   case Builtin::BI__builtin_isgreater:
1454   case Builtin::BI__builtin_isgreaterequal:
1455   case Builtin::BI__builtin_isless:
1456   case Builtin::BI__builtin_islessequal:
1457   case Builtin::BI__builtin_islessgreater:
1458   case Builtin::BI__builtin_isunordered: {
1459     // Ordered comparisons: we know the arguments to these are matching scalar
1460     // floating point values.
1461     Value *LHS = EmitScalarExpr(E->getArg(0));
1462     Value *RHS = EmitScalarExpr(E->getArg(1));
1463
1464     switch (BuiltinID) {
1465     default: llvm_unreachable("Unknown ordered comparison");
1466     case Builtin::BI__builtin_isgreater:
1467       LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
1468       break;
1469     case Builtin::BI__builtin_isgreaterequal:
1470       LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
1471       break;
1472     case Builtin::BI__builtin_isless:
1473       LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
1474       break;
1475     case Builtin::BI__builtin_islessequal:
1476       LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
1477       break;
1478     case Builtin::BI__builtin_islessgreater:
1479       LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
1480       break;
1481     case Builtin::BI__builtin_isunordered:
1482       LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
1483       break;
1484     }
1485     // ZExt bool to int type.
1486     return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
1487   }
1488   case Builtin::BI__builtin_isnan: {
1489     Value *V = EmitScalarExpr(E->getArg(0));
1490     V = Builder.CreateFCmpUNO(V, V, "cmp");
1491     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
1492   }
1493
1494   case Builtin::BIfinite:
1495   case Builtin::BI__finite:
1496   case Builtin::BIfinitef:
1497   case Builtin::BI__finitef:
1498   case Builtin::BIfinitel:
1499   case Builtin::BI__finitel:
1500   case Builtin::BI__builtin_isinf:
1501   case Builtin::BI__builtin_isfinite: {
1502     // isinf(x)    --> fabs(x) == infinity
1503     // isfinite(x) --> fabs(x) != infinity
1504     // x != NaN via the ordered compare in either case.
1505     Value *V = EmitScalarExpr(E->getArg(0));
1506     Value *Fabs = EmitFAbs(*this, V);
1507     Constant *Infinity = ConstantFP::getInfinity(V->getType());
1508     CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf)
1509                                   ? CmpInst::FCMP_OEQ
1510                                   : CmpInst::FCMP_ONE;
1511     Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf");
1512     return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType())));
1513   }
1514
1515   case Builtin::BI__builtin_isinf_sign: {
1516     // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
1517     Value *Arg = EmitScalarExpr(E->getArg(0));
1518     Value *AbsArg = EmitFAbs(*this, Arg);
1519     Value *IsInf = Builder.CreateFCmpOEQ(
1520         AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
1521     Value *IsNeg = EmitSignBit(*this, Arg);
1522
1523     llvm::Type *IntTy = ConvertType(E->getType());
1524     Value *Zero = Constant::getNullValue(IntTy);
1525     Value *One = ConstantInt::get(IntTy, 1);
1526     Value *NegativeOne = ConstantInt::get(IntTy, -1);
1527     Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
1528     Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
1529     return RValue::get(Result);
1530   }
1531
1532   case Builtin::BI__builtin_isnormal: {
1533     // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
1534     Value *V = EmitScalarExpr(E->getArg(0));
1535     Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
1536
1537     Value *Abs = EmitFAbs(*this, V);
1538     Value *IsLessThanInf =
1539       Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
1540     APFloat Smallest = APFloat::getSmallestNormalized(
1541                    getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
1542     Value *IsNormal =
1543       Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
1544                             "isnormal");
1545     V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
1546     V = Builder.CreateAnd(V, IsNormal, "and");
1547     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
1548   }
1549
1550   case Builtin::BI__builtin_fpclassify: {
1551     Value *V = EmitScalarExpr(E->getArg(5));
1552     llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
1553
1554     // Create Result
1555     BasicBlock *Begin = Builder.GetInsertBlock();
1556     BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
1557     Builder.SetInsertPoint(End);
1558     PHINode *Result =
1559       Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
1560                         "fpclassify_result");
1561
1562     // if (V==0) return FP_ZERO
1563     Builder.SetInsertPoint(Begin);
1564     Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
1565                                           "iszero");
1566     Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
1567     BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
1568     Builder.CreateCondBr(IsZero, End, NotZero);
1569     Result->addIncoming(ZeroLiteral, Begin);
1570
1571     // if (V != V) return FP_NAN
1572     Builder.SetInsertPoint(NotZero);
1573     Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
1574     Value *NanLiteral = EmitScalarExpr(E->getArg(0));
1575     BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
1576     Builder.CreateCondBr(IsNan, End, NotNan);
1577     Result->addIncoming(NanLiteral, NotZero);
1578
1579     // if (fabs(V) == infinity) return FP_INFINITY
1580     Builder.SetInsertPoint(NotNan);
1581     Value *VAbs = EmitFAbs(*this, V);
1582     Value *IsInf =
1583       Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
1584                             "isinf");
1585     Value *InfLiteral = EmitScalarExpr(E->getArg(1));
1586     BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
1587     Builder.CreateCondBr(IsInf, End, NotInf);
1588     Result->addIncoming(InfLiteral, NotNan);
1589
1590     // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
1591     Builder.SetInsertPoint(NotInf);
1592     APFloat Smallest = APFloat::getSmallestNormalized(
1593         getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
1594     Value *IsNormal =
1595       Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
1596                             "isnormal");
1597     Value *NormalResult =
1598       Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
1599                            EmitScalarExpr(E->getArg(3)));
1600     Builder.CreateBr(End);
1601     Result->addIncoming(NormalResult, NotInf);
1602
1603     // return Result
1604     Builder.SetInsertPoint(End);
1605     return RValue::get(Result);
1606   }
1607
1608   case Builtin::BIalloca:
1609   case Builtin::BI_alloca:
1610   case Builtin::BI__builtin_alloca: {
1611     Value *Size = EmitScalarExpr(E->getArg(0));
1612     const TargetInfo &TI = getContext().getTargetInfo();
1613     // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
1614     unsigned SuitableAlignmentInBytes =
1615         CGM.getContext()
1616             .toCharUnitsFromBits(TI.getSuitableAlign())
1617             .getQuantity();
1618     AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
1619     AI->setAlignment(SuitableAlignmentInBytes);
1620     return RValue::get(AI);
1621   }
1622
1623   case Builtin::BI__builtin_alloca_with_align: {
1624     Value *Size = EmitScalarExpr(E->getArg(0));
1625     Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
1626     auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
1627     unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
1628     unsigned AlignmentInBytes =
1629         CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity();
1630     AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
1631     AI->setAlignment(AlignmentInBytes);
1632     return RValue::get(AI);
1633   }
1634
1635   case Builtin::BIbzero:
1636   case Builtin::BI__builtin_bzero: {
1637     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1638     Value *SizeVal = EmitScalarExpr(E->getArg(1));
1639     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1640                         E->getArg(0)->getExprLoc(), FD, 0);
1641     Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
1642     return RValue::get(nullptr);
1643   }
1644   case Builtin::BImemcpy:
1645   case Builtin::BI__builtin_memcpy: {
1646     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1647     Address Src = EmitPointerWithAlignment(E->getArg(1));
1648     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1649     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1650                         E->getArg(0)->getExprLoc(), FD, 0);
1651     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1652                         E->getArg(1)->getExprLoc(), FD, 1);
1653     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1654     return RValue::get(Dest.getPointer());
1655   }
1656
1657   case Builtin::BI__builtin_char_memchr:
1658     BuiltinID = Builtin::BI__builtin_memchr;
1659     break;
1660
1661   case Builtin::BI__builtin___memcpy_chk: {
1662     // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
1663     llvm::APSInt Size, DstSize;
1664     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1665         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1666       break;
1667     if (Size.ugt(DstSize))
1668       break;
1669     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1670     Address Src = EmitPointerWithAlignment(E->getArg(1));
1671     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1672     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1673     return RValue::get(Dest.getPointer());
1674   }
1675
1676   case Builtin::BI__builtin_objc_memmove_collectable: {
1677     Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
1678     Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
1679     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1680     CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
1681                                                   DestAddr, SrcAddr, SizeVal);
1682     return RValue::get(DestAddr.getPointer());
1683   }
1684
1685   case Builtin::BI__builtin___memmove_chk: {
1686     // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
1687     llvm::APSInt Size, DstSize;
1688     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1689         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1690       break;
1691     if (Size.ugt(DstSize))
1692       break;
1693     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1694     Address Src = EmitPointerWithAlignment(E->getArg(1));
1695     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1696     Builder.CreateMemMove(Dest, Src, SizeVal, false);
1697     return RValue::get(Dest.getPointer());
1698   }
1699
1700   case Builtin::BImemmove:
1701   case Builtin::BI__builtin_memmove: {
1702     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1703     Address Src = EmitPointerWithAlignment(E->getArg(1));
1704     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1705     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1706                         E->getArg(0)->getExprLoc(), FD, 0);
1707     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1708                         E->getArg(1)->getExprLoc(), FD, 1);
1709     Builder.CreateMemMove(Dest, Src, SizeVal, false);
1710     return RValue::get(Dest.getPointer());
1711   }
1712   case Builtin::BImemset:
1713   case Builtin::BI__builtin_memset: {
1714     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1715     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1716                                          Builder.getInt8Ty());
1717     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1718     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1719                         E->getArg(0)->getExprLoc(), FD, 0);
1720     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1721     return RValue::get(Dest.getPointer());
1722   }
1723   case Builtin::BI__builtin___memset_chk: {
1724     // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
1725     llvm::APSInt Size, DstSize;
1726     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1727         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1728       break;
1729     if (Size.ugt(DstSize))
1730       break;
1731     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1732     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1733                                          Builder.getInt8Ty());
1734     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1735     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1736     return RValue::get(Dest.getPointer());
1737   }
1738   case Builtin::BI__builtin_dwarf_cfa: {
1739     // The offset in bytes from the first argument to the CFA.
1740     //
1741     // Why on earth is this in the frontend?  Is there any reason at
1742     // all that the backend can't reasonably determine this while
1743     // lowering llvm.eh.dwarf.cfa()?
1744     //
1745     // TODO: If there's a satisfactory reason, add a target hook for
1746     // this instead of hard-coding 0, which is correct for most targets.
1747     int32_t Offset = 0;
1748
1749     Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
1750     return RValue::get(Builder.CreateCall(F,
1751                                       llvm::ConstantInt::get(Int32Ty, Offset)));
1752   }
1753   case Builtin::BI__builtin_return_address: {
1754     Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
1755                                                    getContext().UnsignedIntTy);
1756     Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1757     return RValue::get(Builder.CreateCall(F, Depth));
1758   }
1759   case Builtin::BI_ReturnAddress: {
1760     Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1761     return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
1762   }
1763   case Builtin::BI__builtin_frame_address: {
1764     Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
1765                                                    getContext().UnsignedIntTy);
1766     Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
1767     return RValue::get(Builder.CreateCall(F, Depth));
1768   }
1769   case Builtin::BI__builtin_extract_return_addr: {
1770     Value *Address = EmitScalarExpr(E->getArg(0));
1771     Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
1772     return RValue::get(Result);
1773   }
1774   case Builtin::BI__builtin_frob_return_addr: {
1775     Value *Address = EmitScalarExpr(E->getArg(0));
1776     Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
1777     return RValue::get(Result);
1778   }
1779   case Builtin::BI__builtin_dwarf_sp_column: {
1780     llvm::IntegerType *Ty
1781       = cast<llvm::IntegerType>(ConvertType(E->getType()));
1782     int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
1783     if (Column == -1) {
1784       CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
1785       return RValue::get(llvm::UndefValue::get(Ty));
1786     }
1787     return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
1788   }
1789   case Builtin::BI__builtin_init_dwarf_reg_size_table: {
1790     Value *Address = EmitScalarExpr(E->getArg(0));
1791     if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
1792       CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
1793     return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
1794   }
1795   case Builtin::BI__builtin_eh_return: {
1796     Value *Int = EmitScalarExpr(E->getArg(0));
1797     Value *Ptr = EmitScalarExpr(E->getArg(1));
1798
1799     llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
1800     assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
1801            "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
1802     Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
1803                                   ? Intrinsic::eh_return_i32
1804                                   : Intrinsic::eh_return_i64);
1805     Builder.CreateCall(F, {Int, Ptr});
1806     Builder.CreateUnreachable();
1807
1808     // We do need to preserve an insertion point.
1809     EmitBlock(createBasicBlock("builtin_eh_return.cont"));
1810
1811     return RValue::get(nullptr);
1812   }
1813   case Builtin::BI__builtin_unwind_init: {
1814     Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
1815     return RValue::get(Builder.CreateCall(F));
1816   }
1817   case Builtin::BI__builtin_extend_pointer: {
1818     // Extends a pointer to the size of an _Unwind_Word, which is
1819     // uint64_t on all platforms.  Generally this gets poked into a
1820     // register and eventually used as an address, so if the
1821     // addressing registers are wider than pointers and the platform
1822     // doesn't implicitly ignore high-order bits when doing
1823     // addressing, we need to make sure we zext / sext based on
1824     // the platform's expectations.
1825     //
1826     // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
1827
1828     // Cast the pointer to intptr_t.
1829     Value *Ptr = EmitScalarExpr(E->getArg(0));
1830     Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
1831
1832     // If that's 64 bits, we're done.
1833     if (IntPtrTy->getBitWidth() == 64)
1834       return RValue::get(Result);
1835
1836     // Otherwise, ask the codegen data what to do.
1837     if (getTargetHooks().extendPointerWithSExt())
1838       return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
1839     else
1840       return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
1841   }
1842   case Builtin::BI__builtin_setjmp: {
1843     // Buffer is a void**.
1844     Address Buf = EmitPointerWithAlignment(E->getArg(0));
1845
1846     // Store the frame pointer to the setjmp buffer.
1847     Value *FrameAddr =
1848       Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
1849                          ConstantInt::get(Int32Ty, 0));
1850     Builder.CreateStore(FrameAddr, Buf);
1851
1852     // Store the stack pointer to the setjmp buffer.
1853     Value *StackAddr =
1854         Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
1855     Address StackSaveSlot =
1856       Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize());
1857     Builder.CreateStore(StackAddr, StackSaveSlot);
1858
1859     // Call LLVM's EH setjmp, which is lightweight.
1860     Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
1861     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1862     return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
1863   }
1864   case Builtin::BI__builtin_longjmp: {
1865     Value *Buf = EmitScalarExpr(E->getArg(0));
1866     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1867
1868     // Call LLVM's EH longjmp, which is lightweight.
1869     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
1870
1871     // longjmp doesn't return; mark this as unreachable.
1872     Builder.CreateUnreachable();
1873
1874     // We do need to preserve an insertion point.
1875     EmitBlock(createBasicBlock("longjmp.cont"));
1876
1877     return RValue::get(nullptr);
1878   }
1879   case Builtin::BI__sync_fetch_and_add:
1880   case Builtin::BI__sync_fetch_and_sub:
1881   case Builtin::BI__sync_fetch_and_or:
1882   case Builtin::BI__sync_fetch_and_and:
1883   case Builtin::BI__sync_fetch_and_xor:
1884   case Builtin::BI__sync_fetch_and_nand:
1885   case Builtin::BI__sync_add_and_fetch:
1886   case Builtin::BI__sync_sub_and_fetch:
1887   case Builtin::BI__sync_and_and_fetch:
1888   case Builtin::BI__sync_or_and_fetch:
1889   case Builtin::BI__sync_xor_and_fetch:
1890   case Builtin::BI__sync_nand_and_fetch:
1891   case Builtin::BI__sync_val_compare_and_swap:
1892   case Builtin::BI__sync_bool_compare_and_swap:
1893   case Builtin::BI__sync_lock_test_and_set:
1894   case Builtin::BI__sync_lock_release:
1895   case Builtin::BI__sync_swap:
1896     llvm_unreachable("Shouldn't make it through sema");
1897   case Builtin::BI__sync_fetch_and_add_1:
1898   case Builtin::BI__sync_fetch_and_add_2:
1899   case Builtin::BI__sync_fetch_and_add_4:
1900   case Builtin::BI__sync_fetch_and_add_8:
1901   case Builtin::BI__sync_fetch_and_add_16:
1902     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
1903   case Builtin::BI__sync_fetch_and_sub_1:
1904   case Builtin::BI__sync_fetch_and_sub_2:
1905   case Builtin::BI__sync_fetch_and_sub_4:
1906   case Builtin::BI__sync_fetch_and_sub_8:
1907   case Builtin::BI__sync_fetch_and_sub_16:
1908     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
1909   case Builtin::BI__sync_fetch_and_or_1:
1910   case Builtin::BI__sync_fetch_and_or_2:
1911   case Builtin::BI__sync_fetch_and_or_4:
1912   case Builtin::BI__sync_fetch_and_or_8:
1913   case Builtin::BI__sync_fetch_and_or_16:
1914     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
1915   case Builtin::BI__sync_fetch_and_and_1:
1916   case Builtin::BI__sync_fetch_and_and_2:
1917   case Builtin::BI__sync_fetch_and_and_4:
1918   case Builtin::BI__sync_fetch_and_and_8:
1919   case Builtin::BI__sync_fetch_and_and_16:
1920     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
1921   case Builtin::BI__sync_fetch_and_xor_1:
1922   case Builtin::BI__sync_fetch_and_xor_2:
1923   case Builtin::BI__sync_fetch_and_xor_4:
1924   case Builtin::BI__sync_fetch_and_xor_8:
1925   case Builtin::BI__sync_fetch_and_xor_16:
1926     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
1927   case Builtin::BI__sync_fetch_and_nand_1:
1928   case Builtin::BI__sync_fetch_and_nand_2:
1929   case Builtin::BI__sync_fetch_and_nand_4:
1930   case Builtin::BI__sync_fetch_and_nand_8:
1931   case Builtin::BI__sync_fetch_and_nand_16:
1932     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
1933
1934   // Clang extensions: not overloaded yet.
1935   case Builtin::BI__sync_fetch_and_min:
1936     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
1937   case Builtin::BI__sync_fetch_and_max:
1938     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
1939   case Builtin::BI__sync_fetch_and_umin:
1940     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
1941   case Builtin::BI__sync_fetch_and_umax:
1942     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
1943
1944   case Builtin::BI__sync_add_and_fetch_1:
1945   case Builtin::BI__sync_add_and_fetch_2:
1946   case Builtin::BI__sync_add_and_fetch_4:
1947   case Builtin::BI__sync_add_and_fetch_8:
1948   case Builtin::BI__sync_add_and_fetch_16:
1949     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
1950                                 llvm::Instruction::Add);
1951   case Builtin::BI__sync_sub_and_fetch_1:
1952   case Builtin::BI__sync_sub_and_fetch_2:
1953   case Builtin::BI__sync_sub_and_fetch_4:
1954   case Builtin::BI__sync_sub_and_fetch_8:
1955   case Builtin::BI__sync_sub_and_fetch_16:
1956     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
1957                                 llvm::Instruction::Sub);
1958   case Builtin::BI__sync_and_and_fetch_1:
1959   case Builtin::BI__sync_and_and_fetch_2:
1960   case Builtin::BI__sync_and_and_fetch_4:
1961   case Builtin::BI__sync_and_and_fetch_8:
1962   case Builtin::BI__sync_and_and_fetch_16:
1963     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
1964                                 llvm::Instruction::And);
1965   case Builtin::BI__sync_or_and_fetch_1:
1966   case Builtin::BI__sync_or_and_fetch_2:
1967   case Builtin::BI__sync_or_and_fetch_4:
1968   case Builtin::BI__sync_or_and_fetch_8:
1969   case Builtin::BI__sync_or_and_fetch_16:
1970     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
1971                                 llvm::Instruction::Or);
1972   case Builtin::BI__sync_xor_and_fetch_1:
1973   case Builtin::BI__sync_xor_and_fetch_2:
1974   case Builtin::BI__sync_xor_and_fetch_4:
1975   case Builtin::BI__sync_xor_and_fetch_8:
1976   case Builtin::BI__sync_xor_and_fetch_16:
1977     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
1978                                 llvm::Instruction::Xor);
1979   case Builtin::BI__sync_nand_and_fetch_1:
1980   case Builtin::BI__sync_nand_and_fetch_2:
1981   case Builtin::BI__sync_nand_and_fetch_4:
1982   case Builtin::BI__sync_nand_and_fetch_8:
1983   case Builtin::BI__sync_nand_and_fetch_16:
1984     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
1985                                 llvm::Instruction::And, true);
1986
1987   case Builtin::BI__sync_val_compare_and_swap_1:
1988   case Builtin::BI__sync_val_compare_and_swap_2:
1989   case Builtin::BI__sync_val_compare_and_swap_4:
1990   case Builtin::BI__sync_val_compare_and_swap_8:
1991   case Builtin::BI__sync_val_compare_and_swap_16:
1992     return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
1993
1994   case Builtin::BI__sync_bool_compare_and_swap_1:
1995   case Builtin::BI__sync_bool_compare_and_swap_2:
1996   case Builtin::BI__sync_bool_compare_and_swap_4:
1997   case Builtin::BI__sync_bool_compare_and_swap_8:
1998   case Builtin::BI__sync_bool_compare_and_swap_16:
1999     return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
2000
2001   case Builtin::BI__sync_swap_1:
2002   case Builtin::BI__sync_swap_2:
2003   case Builtin::BI__sync_swap_4:
2004   case Builtin::BI__sync_swap_8:
2005   case Builtin::BI__sync_swap_16:
2006     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
2007
2008   case Builtin::BI__sync_lock_test_and_set_1:
2009   case Builtin::BI__sync_lock_test_and_set_2:
2010   case Builtin::BI__sync_lock_test_and_set_4:
2011   case Builtin::BI__sync_lock_test_and_set_8:
2012   case Builtin::BI__sync_lock_test_and_set_16:
2013     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
2014
2015   case Builtin::BI__sync_lock_release_1:
2016   case Builtin::BI__sync_lock_release_2:
2017   case Builtin::BI__sync_lock_release_4:
2018   case Builtin::BI__sync_lock_release_8:
2019   case Builtin::BI__sync_lock_release_16: {
2020     Value *Ptr = EmitScalarExpr(E->getArg(0));
2021     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
2022     CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
2023     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
2024                                              StoreSize.getQuantity() * 8);
2025     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
2026     llvm::StoreInst *Store =
2027       Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
2028                                  StoreSize);
2029     Store->setAtomic(llvm::AtomicOrdering::Release);
2030     return RValue::get(nullptr);
2031   }
2032
2033   case Builtin::BI__sync_synchronize: {
2034     // We assume this is supposed to correspond to a C++0x-style
2035     // sequentially-consistent fence (i.e. this is only usable for
2036     // synchonization, not device I/O or anything like that). This intrinsic
2037     // is really badly designed in the sense that in theory, there isn't
2038     // any way to safely use it... but in practice, it mostly works
2039     // to use it with non-atomic loads and stores to get acquire/release
2040     // semantics.
2041     Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
2042     return RValue::get(nullptr);
2043   }
2044
2045   case Builtin::BI__builtin_nontemporal_load:
2046     return RValue::get(EmitNontemporalLoad(*this, E));
2047   case Builtin::BI__builtin_nontemporal_store:
2048     return RValue::get(EmitNontemporalStore(*this, E));
2049   case Builtin::BI__c11_atomic_is_lock_free:
2050   case Builtin::BI__atomic_is_lock_free: {
2051     // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
2052     // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
2053     // _Atomic(T) is always properly-aligned.
2054     const char *LibCallName = "__atomic_is_lock_free";
2055     CallArgList Args;
2056     Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
2057              getContext().getSizeType());
2058     if (BuiltinID == Builtin::BI__atomic_is_lock_free)
2059       Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
2060                getContext().VoidPtrTy);
2061     else
2062       Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
2063                getContext().VoidPtrTy);
2064     const CGFunctionInfo &FuncInfo =
2065         CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
2066     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
2067     llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
2068     return EmitCall(FuncInfo, CGCallee::forDirect(Func),
2069                     ReturnValueSlot(), Args);
2070   }
2071
2072   case Builtin::BI__atomic_test_and_set: {
2073     // Look at the argument type to determine whether this is a volatile
2074     // operation. The parameter type is always volatile.
2075     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
2076     bool Volatile =
2077         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
2078
2079     Value *Ptr = EmitScalarExpr(E->getArg(0));
2080     unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
2081     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
2082     Value *NewVal = Builder.getInt8(1);
2083     Value *Order = EmitScalarExpr(E->getArg(1));
2084     if (isa<llvm::ConstantInt>(Order)) {
2085       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
2086       AtomicRMWInst *Result = nullptr;
2087       switch (ord) {
2088       case 0:  // memory_order_relaxed
2089       default: // invalid order
2090         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2091                                          llvm::AtomicOrdering::Monotonic);
2092         break;
2093       case 1: // memory_order_consume
2094       case 2: // memory_order_acquire
2095         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2096                                          llvm::AtomicOrdering::Acquire);
2097         break;
2098       case 3: // memory_order_release
2099         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2100                                          llvm::AtomicOrdering::Release);
2101         break;
2102       case 4: // memory_order_acq_rel
2103
2104         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2105                                          llvm::AtomicOrdering::AcquireRelease);
2106         break;
2107       case 5: // memory_order_seq_cst
2108         Result = Builder.CreateAtomicRMW(
2109             llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2110             llvm::AtomicOrdering::SequentiallyConsistent);
2111         break;
2112       }
2113       Result->setVolatile(Volatile);
2114       return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
2115     }
2116
2117     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
2118
2119     llvm::BasicBlock *BBs[5] = {
2120       createBasicBlock("monotonic", CurFn),
2121       createBasicBlock("acquire", CurFn),
2122       createBasicBlock("release", CurFn),
2123       createBasicBlock("acqrel", CurFn),
2124       createBasicBlock("seqcst", CurFn)
2125     };
2126     llvm::AtomicOrdering Orders[5] = {
2127         llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
2128         llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
2129         llvm::AtomicOrdering::SequentiallyConsistent};
2130
2131     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
2132     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
2133
2134     Builder.SetInsertPoint(ContBB);
2135     PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
2136
2137     for (unsigned i = 0; i < 5; ++i) {
2138       Builder.SetInsertPoint(BBs[i]);
2139       AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
2140                                                    Ptr, NewVal, Orders[i]);
2141       RMW->setVolatile(Volatile);
2142       Result->addIncoming(RMW, BBs[i]);
2143       Builder.CreateBr(ContBB);
2144     }
2145
2146     SI->addCase(Builder.getInt32(0), BBs[0]);
2147     SI->addCase(Builder.getInt32(1), BBs[1]);
2148     SI->addCase(Builder.getInt32(2), BBs[1]);
2149     SI->addCase(Builder.getInt32(3), BBs[2]);
2150     SI->addCase(Builder.getInt32(4), BBs[3]);
2151     SI->addCase(Builder.getInt32(5), BBs[4]);
2152
2153     Builder.SetInsertPoint(ContBB);
2154     return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
2155   }
2156
2157   case Builtin::BI__atomic_clear: {
2158     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
2159     bool Volatile =
2160         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
2161
2162     Address Ptr = EmitPointerWithAlignment(E->getArg(0));
2163     unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace();
2164     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
2165     Value *NewVal = Builder.getInt8(0);
2166     Value *Order = EmitScalarExpr(E->getArg(1));
2167     if (isa<llvm::ConstantInt>(Order)) {
2168       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
2169       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
2170       switch (ord) {
2171       case 0:  // memory_order_relaxed
2172       default: // invalid order
2173         Store->setOrdering(llvm::AtomicOrdering::Monotonic);
2174         break;
2175       case 3:  // memory_order_release
2176         Store->setOrdering(llvm::AtomicOrdering::Release);
2177         break;
2178       case 5:  // memory_order_seq_cst
2179         Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
2180         break;
2181       }
2182       return RValue::get(nullptr);
2183     }
2184
2185     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
2186
2187     llvm::BasicBlock *BBs[3] = {
2188       createBasicBlock("monotonic", CurFn),
2189       createBasicBlock("release", CurFn),
2190       createBasicBlock("seqcst", CurFn)
2191     };
2192     llvm::AtomicOrdering Orders[3] = {
2193         llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
2194         llvm::AtomicOrdering::SequentiallyConsistent};
2195
2196     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
2197     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
2198
2199     for (unsigned i = 0; i < 3; ++i) {
2200       Builder.SetInsertPoint(BBs[i]);
2201       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
2202       Store->setOrdering(Orders[i]);
2203       Builder.CreateBr(ContBB);
2204     }
2205
2206     SI->addCase(Builder.getInt32(0), BBs[0]);
2207     SI->addCase(Builder.getInt32(3), BBs[1]);
2208     SI->addCase(Builder.getInt32(5), BBs[2]);
2209
2210     Builder.SetInsertPoint(ContBB);
2211     return RValue::get(nullptr);
2212   }
2213
2214   case Builtin::BI__atomic_thread_fence:
2215   case Builtin::BI__atomic_signal_fence:
2216   case Builtin::BI__c11_atomic_thread_fence:
2217   case Builtin::BI__c11_atomic_signal_fence: {
2218     llvm::SyncScope::ID SSID;
2219     if (BuiltinID == Builtin::BI__atomic_signal_fence ||
2220         BuiltinID == Builtin::BI__c11_atomic_signal_fence)
2221       SSID = llvm::SyncScope::SingleThread;
2222     else
2223       SSID = llvm::SyncScope::System;
2224     Value *Order = EmitScalarExpr(E->getArg(0));
2225     if (isa<llvm::ConstantInt>(Order)) {
2226       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
2227       switch (ord) {
2228       case 0:  // memory_order_relaxed
2229       default: // invalid order
2230         break;
2231       case 1:  // memory_order_consume
2232       case 2:  // memory_order_acquire
2233         Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
2234         break;
2235       case 3:  // memory_order_release
2236         Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
2237         break;
2238       case 4:  // memory_order_acq_rel
2239         Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
2240         break;
2241       case 5:  // memory_order_seq_cst
2242         Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
2243         break;
2244       }
2245       return RValue::get(nullptr);
2246     }
2247
2248     llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
2249     AcquireBB = createBasicBlock("acquire", CurFn);
2250     ReleaseBB = createBasicBlock("release", CurFn);
2251     AcqRelBB = createBasicBlock("acqrel", CurFn);
2252     SeqCstBB = createBasicBlock("seqcst", CurFn);
2253     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
2254
2255     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
2256     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
2257
2258     Builder.SetInsertPoint(AcquireBB);
2259     Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
2260     Builder.CreateBr(ContBB);
2261     SI->addCase(Builder.getInt32(1), AcquireBB);
2262     SI->addCase(Builder.getInt32(2), AcquireBB);
2263
2264     Builder.SetInsertPoint(ReleaseBB);
2265     Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
2266     Builder.CreateBr(ContBB);
2267     SI->addCase(Builder.getInt32(3), ReleaseBB);
2268
2269     Builder.SetInsertPoint(AcqRelBB);
2270     Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
2271     Builder.CreateBr(ContBB);
2272     SI->addCase(Builder.getInt32(4), AcqRelBB);
2273
2274     Builder.SetInsertPoint(SeqCstBB);
2275     Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
2276     Builder.CreateBr(ContBB);
2277     SI->addCase(Builder.getInt32(5), SeqCstBB);
2278
2279     Builder.SetInsertPoint(ContBB);
2280     return RValue::get(nullptr);
2281   }
2282
2283   case Builtin::BI__builtin_signbit:
2284   case Builtin::BI__builtin_signbitf:
2285   case Builtin::BI__builtin_signbitl: {
2286     return RValue::get(
2287         Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
2288                            ConvertType(E->getType())));
2289   }
2290   case Builtin::BI__annotation: {
2291     // Re-encode each wide string to UTF8 and make an MDString.
2292     SmallVector<Metadata *, 1> Strings;
2293     for (const Expr *Arg : E->arguments()) {
2294       const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts());
2295       assert(Str->getCharByteWidth() == 2);
2296       StringRef WideBytes = Str->getBytes();
2297       std::string StrUtf8;
2298       if (!convertUTF16ToUTF8String(
2299               makeArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {
2300         CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument");
2301         continue;
2302       }
2303       Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8));
2304     }
2305
2306     // Build and MDTuple of MDStrings and emit the intrinsic call.
2307     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {});
2308     MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings);
2309     Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple));
2310     return RValue::getIgnored();
2311   }
2312   case Builtin::BI__builtin_annotation: {
2313     llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
2314     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
2315                                       AnnVal->getType());
2316
2317     // Get the annotation string, go through casts. Sema requires this to be a
2318     // non-wide string literal, potentially casted, so the cast<> is safe.
2319     const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
2320     StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
2321     return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
2322   }
2323   case Builtin::BI__builtin_addcb:
2324   case Builtin::BI__builtin_addcs:
2325   case Builtin::BI__builtin_addc:
2326   case Builtin::BI__builtin_addcl:
2327   case Builtin::BI__builtin_addcll:
2328   case Builtin::BI__builtin_subcb:
2329   case Builtin::BI__builtin_subcs:
2330   case Builtin::BI__builtin_subc:
2331   case Builtin::BI__builtin_subcl:
2332   case Builtin::BI__builtin_subcll: {
2333
2334     // We translate all of these builtins from expressions of the form:
2335     //   int x = ..., y = ..., carryin = ..., carryout, result;
2336     //   result = __builtin_addc(x, y, carryin, &carryout);
2337     //
2338     // to LLVM IR of the form:
2339     //
2340     //   %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
2341     //   %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
2342     //   %carry1 = extractvalue {i32, i1} %tmp1, 1
2343     //   %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
2344     //                                                       i32 %carryin)
2345     //   %result = extractvalue {i32, i1} %tmp2, 0
2346     //   %carry2 = extractvalue {i32, i1} %tmp2, 1
2347     //   %tmp3 = or i1 %carry1, %carry2
2348     //   %tmp4 = zext i1 %tmp3 to i32
2349     //   store i32 %tmp4, i32* %carryout
2350
2351     // Scalarize our inputs.
2352     llvm::Value *X = EmitScalarExpr(E->getArg(0));
2353     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
2354     llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
2355     Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
2356
2357     // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
2358     llvm::Intrinsic::ID IntrinsicId;
2359     switch (BuiltinID) {
2360     default: llvm_unreachable("Unknown multiprecision builtin id.");
2361     case Builtin::BI__builtin_addcb:
2362     case Builtin::BI__builtin_addcs:
2363     case Builtin::BI__builtin_addc:
2364     case Builtin::BI__builtin_addcl:
2365     case Builtin::BI__builtin_addcll:
2366       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
2367       break;
2368     case Builtin::BI__builtin_subcb:
2369     case Builtin::BI__builtin_subcs:
2370     case Builtin::BI__builtin_subc:
2371     case Builtin::BI__builtin_subcl:
2372     case Builtin::BI__builtin_subcll:
2373       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
2374       break;
2375     }
2376
2377     // Construct our resulting LLVM IR expression.
2378     llvm::Value *Carry1;
2379     llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
2380                                               X, Y, Carry1);
2381     llvm::Value *Carry2;
2382     llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
2383                                               Sum1, Carryin, Carry2);
2384     llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
2385                                                X->getType());
2386     Builder.CreateStore(CarryOut, CarryOutPtr);
2387     return RValue::get(Sum2);
2388   }
2389
2390   case Builtin::BI__builtin_add_overflow:
2391   case Builtin::BI__builtin_sub_overflow:
2392   case Builtin::BI__builtin_mul_overflow: {
2393     const clang::Expr *LeftArg = E->getArg(0);
2394     const clang::Expr *RightArg = E->getArg(1);
2395     const clang::Expr *ResultArg = E->getArg(2);
2396
2397     clang::QualType ResultQTy =
2398         ResultArg->getType()->castAs<PointerType>()->getPointeeType();
2399
2400     WidthAndSignedness LeftInfo =
2401         getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
2402     WidthAndSignedness RightInfo =
2403         getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
2404     WidthAndSignedness ResultInfo =
2405         getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
2406
2407     // Handle mixed-sign multiplication as a special case, because adding
2408     // runtime or backend support for our generic irgen would be too expensive.
2409     if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo))
2410       return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg,
2411                                           RightInfo, ResultArg, ResultQTy,
2412                                           ResultInfo);
2413
2414     WidthAndSignedness EncompassingInfo =
2415         EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
2416
2417     llvm::Type *EncompassingLLVMTy =
2418         llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
2419
2420     llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
2421
2422     llvm::Intrinsic::ID IntrinsicId;
2423     switch (BuiltinID) {
2424     default:
2425       llvm_unreachable("Unknown overflow builtin id.");
2426     case Builtin::BI__builtin_add_overflow:
2427       IntrinsicId = EncompassingInfo.Signed
2428                         ? llvm::Intrinsic::sadd_with_overflow
2429                         : llvm::Intrinsic::uadd_with_overflow;
2430       break;
2431     case Builtin::BI__builtin_sub_overflow:
2432       IntrinsicId = EncompassingInfo.Signed
2433                         ? llvm::Intrinsic::ssub_with_overflow
2434                         : llvm::Intrinsic::usub_with_overflow;
2435       break;
2436     case Builtin::BI__builtin_mul_overflow:
2437       IntrinsicId = EncompassingInfo.Signed
2438                         ? llvm::Intrinsic::smul_with_overflow
2439                         : llvm::Intrinsic::umul_with_overflow;
2440       break;
2441     }
2442
2443     llvm::Value *Left = EmitScalarExpr(LeftArg);
2444     llvm::Value *Right = EmitScalarExpr(RightArg);
2445     Address ResultPtr = EmitPointerWithAlignment(ResultArg);
2446
2447     // Extend each operand to the encompassing type.
2448     Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
2449     Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
2450
2451     // Perform the operation on the extended values.
2452     llvm::Value *Overflow, *Result;
2453     Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
2454
2455     if (EncompassingInfo.Width > ResultInfo.Width) {
2456       // The encompassing type is wider than the result type, so we need to
2457       // truncate it.
2458       llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
2459
2460       // To see if the truncation caused an overflow, we will extend
2461       // the result and then compare it to the original result.
2462       llvm::Value *ResultTruncExt = Builder.CreateIntCast(
2463           ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
2464       llvm::Value *TruncationOverflow =
2465           Builder.CreateICmpNE(Result, ResultTruncExt);
2466
2467       Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
2468       Result = ResultTrunc;
2469     }
2470
2471     // Finally, store the result using the pointer.
2472     bool isVolatile =
2473       ResultArg->getType()->getPointeeType().isVolatileQualified();
2474     Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
2475
2476     return RValue::get(Overflow);
2477   }
2478
2479   case Builtin::BI__builtin_uadd_overflow:
2480   case Builtin::BI__builtin_uaddl_overflow:
2481   case Builtin::BI__builtin_uaddll_overflow:
2482   case Builtin::BI__builtin_usub_overflow:
2483   case Builtin::BI__builtin_usubl_overflow:
2484   case Builtin::BI__builtin_usubll_overflow:
2485   case Builtin::BI__builtin_umul_overflow:
2486   case Builtin::BI__builtin_umull_overflow:
2487   case Builtin::BI__builtin_umulll_overflow:
2488   case Builtin::BI__builtin_sadd_overflow:
2489   case Builtin::BI__builtin_saddl_overflow:
2490   case Builtin::BI__builtin_saddll_overflow:
2491   case Builtin::BI__builtin_ssub_overflow:
2492   case Builtin::BI__builtin_ssubl_overflow:
2493   case Builtin::BI__builtin_ssubll_overflow:
2494   case Builtin::BI__builtin_smul_overflow:
2495   case Builtin::BI__builtin_smull_overflow:
2496   case Builtin::BI__builtin_smulll_overflow: {
2497
2498     // We translate all of these builtins directly to the relevant llvm IR node.
2499
2500     // Scalarize our inputs.
2501     llvm::Value *X = EmitScalarExpr(E->getArg(0));
2502     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
2503     Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
2504
2505     // Decide which of the overflow intrinsics we are lowering to:
2506     llvm::Intrinsic::ID IntrinsicId;
2507     switch (BuiltinID) {
2508     default: llvm_unreachable("Unknown overflow builtin id.");
2509     case Builtin::BI__builtin_uadd_overflow:
2510     case Builtin::BI__builtin_uaddl_overflow:
2511     case Builtin::BI__builtin_uaddll_overflow:
2512       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
2513       break;
2514     case Builtin::BI__builtin_usub_overflow:
2515     case Builtin::BI__builtin_usubl_overflow:
2516     case Builtin::BI__builtin_usubll_overflow:
2517       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
2518       break;
2519     case Builtin::BI__builtin_umul_overflow:
2520     case Builtin::BI__builtin_umull_overflow:
2521     case Builtin::BI__builtin_umulll_overflow:
2522       IntrinsicId = llvm::Intrinsic::umul_with_overflow;
2523       break;
2524     case Builtin::BI__builtin_sadd_overflow:
2525     case Builtin::BI__builtin_saddl_overflow:
2526     case Builtin::BI__builtin_saddll_overflow:
2527       IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
2528       break;
2529     case Builtin::BI__builtin_ssub_overflow:
2530     case Builtin::BI__builtin_ssubl_overflow:
2531     case Builtin::BI__builtin_ssubll_overflow:
2532       IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
2533       break;
2534     case Builtin::BI__builtin_smul_overflow:
2535     case Builtin::BI__builtin_smull_overflow:
2536     case Builtin::BI__builtin_smulll_overflow:
2537       IntrinsicId = llvm::Intrinsic::smul_with_overflow;
2538       break;
2539     }
2540
2541
2542     llvm::Value *Carry;
2543     llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
2544     Builder.CreateStore(Sum, SumOutPtr);
2545
2546     return RValue::get(Carry);
2547   }
2548   case Builtin::BI__builtin_addressof:
2549     return RValue::get(EmitLValue(E->getArg(0)).getPointer());
2550   case Builtin::BI__builtin_operator_new:
2551     return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
2552                                     E->getArg(0), false);
2553   case Builtin::BI__builtin_operator_delete:
2554     return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
2555                                     E->getArg(0), true);
2556   case Builtin::BI__noop:
2557     // __noop always evaluates to an integer literal zero.
2558     return RValue::get(ConstantInt::get(IntTy, 0));
2559   case Builtin::BI__builtin_call_with_static_chain: {
2560     const CallExpr *Call = cast<CallExpr>(E->getArg(0));
2561     const Expr *Chain = E->getArg(1);
2562     return EmitCall(Call->getCallee()->getType(),
2563                     EmitCallee(Call->getCallee()), Call, ReturnValue,
2564                     EmitScalarExpr(Chain));
2565   }
2566   case Builtin::BI_InterlockedExchange8:
2567   case Builtin::BI_InterlockedExchange16:
2568   case Builtin::BI_InterlockedExchange:
2569   case Builtin::BI_InterlockedExchangePointer:
2570     return RValue::get(
2571         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
2572   case Builtin::BI_InterlockedCompareExchangePointer: {
2573     llvm::Type *RTy;
2574     llvm::IntegerType *IntType =
2575       IntegerType::get(getLLVMContext(),
2576                        getContext().getTypeSize(E->getType()));
2577     llvm::Type *IntPtrType = IntType->getPointerTo();
2578
2579     llvm::Value *Destination =
2580       Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType);
2581
2582     llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
2583     RTy = Exchange->getType();
2584     Exchange = Builder.CreatePtrToInt(Exchange, IntType);
2585
2586     llvm::Value *Comparand =
2587       Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
2588
2589     auto Result =
2590         Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
2591                                     AtomicOrdering::SequentiallyConsistent,
2592                                     AtomicOrdering::SequentiallyConsistent);
2593     Result->setVolatile(true);
2594
2595     return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
2596                                                                          0),
2597                                               RTy));
2598   }
2599   case Builtin::BI_InterlockedCompareExchange8:
2600   case Builtin::BI_InterlockedCompareExchange16:
2601   case Builtin::BI_InterlockedCompareExchange:
2602   case Builtin::BI_InterlockedCompareExchange64: {
2603     AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg(
2604         EmitScalarExpr(E->getArg(0)),
2605         EmitScalarExpr(E->getArg(2)),
2606         EmitScalarExpr(E->getArg(1)),
2607         AtomicOrdering::SequentiallyConsistent,
2608         AtomicOrdering::SequentiallyConsistent);
2609       CXI->setVolatile(true);
2610       return RValue::get(Builder.CreateExtractValue(CXI, 0));
2611   }
2612   case Builtin::BI_InterlockedIncrement16:
2613   case Builtin::BI_InterlockedIncrement:
2614     return RValue::get(
2615         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
2616   case Builtin::BI_InterlockedDecrement16:
2617   case Builtin::BI_InterlockedDecrement:
2618     return RValue::get(
2619         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
2620   case Builtin::BI_InterlockedAnd8:
2621   case Builtin::BI_InterlockedAnd16:
2622   case Builtin::BI_InterlockedAnd:
2623     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
2624   case Builtin::BI_InterlockedExchangeAdd8:
2625   case Builtin::BI_InterlockedExchangeAdd16:
2626   case Builtin::BI_InterlockedExchangeAdd:
2627     return RValue::get(
2628         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
2629   case Builtin::BI_InterlockedExchangeSub8:
2630   case Builtin::BI_InterlockedExchangeSub16:
2631   case Builtin::BI_InterlockedExchangeSub:
2632     return RValue::get(
2633         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
2634   case Builtin::BI_InterlockedOr8:
2635   case Builtin::BI_InterlockedOr16:
2636   case Builtin::BI_InterlockedOr:
2637     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
2638   case Builtin::BI_InterlockedXor8:
2639   case Builtin::BI_InterlockedXor16:
2640   case Builtin::BI_InterlockedXor:
2641     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
2642   case Builtin::BI_interlockedbittestandset:
2643     return RValue::get(
2644         EmitMSVCBuiltinExpr(MSVCIntrin::_interlockedbittestandset, E));
2645
2646   case Builtin::BI__exception_code:
2647   case Builtin::BI_exception_code:
2648     return RValue::get(EmitSEHExceptionCode());
2649   case Builtin::BI__exception_info:
2650   case Builtin::BI_exception_info:
2651     return RValue::get(EmitSEHExceptionInfo());
2652   case Builtin::BI__abnormal_termination:
2653   case Builtin::BI_abnormal_termination:
2654     return RValue::get(EmitSEHAbnormalTermination());
2655   case Builtin::BI_setjmpex: {
2656     if (getTarget().getTriple().isOSMSVCRT()) {
2657       llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2658       llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
2659           getLLVMContext(), llvm::AttributeList::FunctionIndex,
2660           llvm::Attribute::ReturnsTwice);
2661       llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction(
2662           llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2663           "_setjmpex", ReturnsTwiceAttr, /*Local=*/true);
2664       llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2665           EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2666       llvm::Value *FrameAddr =
2667           Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2668                              ConstantInt::get(Int32Ty, 0));
2669       llvm::Value *Args[] = {Buf, FrameAddr};
2670       llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args);
2671       CS.setAttributes(ReturnsTwiceAttr);
2672       return RValue::get(CS.getInstruction());
2673     }
2674     break;
2675   }
2676   case Builtin::BI_setjmp: {
2677     if (getTarget().getTriple().isOSMSVCRT()) {
2678       llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
2679           getLLVMContext(), llvm::AttributeList::FunctionIndex,
2680           llvm::Attribute::ReturnsTwice);
2681       llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2682           EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2683       llvm::CallSite CS;
2684       if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
2685         llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy};
2686         llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction(
2687             llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true),
2688             "_setjmp3", ReturnsTwiceAttr, /*Local=*/true);
2689         llvm::Value *Count = ConstantInt::get(IntTy, 0);
2690         llvm::Value *Args[] = {Buf, Count};
2691         CS = EmitRuntimeCallOrInvoke(SetJmp3, Args);
2692       } else {
2693         llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2694         llvm::Constant *SetJmp = CGM.CreateRuntimeFunction(
2695             llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2696             "_setjmp", ReturnsTwiceAttr, /*Local=*/true);
2697         llvm::Value *FrameAddr =
2698             Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2699                                ConstantInt::get(Int32Ty, 0));
2700         llvm::Value *Args[] = {Buf, FrameAddr};
2701         CS = EmitRuntimeCallOrInvoke(SetJmp, Args);
2702       }
2703       CS.setAttributes(ReturnsTwiceAttr);
2704       return RValue::get(CS.getInstruction());
2705     }
2706     break;
2707   }
2708
2709   case Builtin::BI__GetExceptionInfo: {
2710     if (llvm::GlobalVariable *GV =
2711             CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
2712       return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
2713     break;
2714   }
2715
2716   case Builtin::BI__fastfail:
2717     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
2718
2719   case Builtin::BI__builtin_coro_size: {
2720     auto & Context = getContext();
2721     auto SizeTy = Context.getSizeType();
2722     auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy));
2723     Value *F = CGM.getIntrinsic(Intrinsic::coro_size, T);
2724     return RValue::get(Builder.CreateCall(F));
2725   }
2726
2727   case Builtin::BI__builtin_coro_id:
2728     return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
2729   case Builtin::BI__builtin_coro_promise:
2730     return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
2731   case Builtin::BI__builtin_coro_resume:
2732     return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
2733   case Builtin::BI__builtin_coro_frame:
2734     return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
2735   case Builtin::BI__builtin_coro_free:
2736     return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
2737   case Builtin::BI__builtin_coro_destroy:
2738     return EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
2739   case Builtin::BI__builtin_coro_done:
2740     return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
2741   case Builtin::BI__builtin_coro_alloc:
2742     return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
2743   case Builtin::BI__builtin_coro_begin:
2744     return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
2745   case Builtin::BI__builtin_coro_end:
2746     return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
2747   case Builtin::BI__builtin_coro_suspend:
2748     return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
2749   case Builtin::BI__builtin_coro_param:
2750     return EmitCoroutineIntrinsic(E, Intrinsic::coro_param);
2751
2752   // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
2753   case Builtin::BIread_pipe:
2754   case Builtin::BIwrite_pipe: {
2755     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2756           *Arg1 = EmitScalarExpr(E->getArg(1));
2757     CGOpenCLRuntime OpenCLRT(CGM);
2758     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2759     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2760
2761     // Type of the generic packet parameter.
2762     unsigned GenericAS =
2763         getContext().getTargetAddressSpace(LangAS::opencl_generic);
2764     llvm::Type *I8PTy = llvm::PointerType::get(
2765         llvm::Type::getInt8Ty(getLLVMContext()), GenericAS);
2766
2767     // Testing which overloaded version we should generate the call for.
2768     if (2U == E->getNumArgs()) {
2769       const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
2770                                                              : "__write_pipe_2";
2771       // Creating a generic function type to be able to call with any builtin or
2772       // user defined type.
2773       llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
2774       llvm::FunctionType *FTy = llvm::FunctionType::get(
2775           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2776       Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
2777       return RValue::get(
2778           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2779                              {Arg0, BCast, PacketSize, PacketAlign}));
2780     } else {
2781       assert(4 == E->getNumArgs() &&
2782              "Illegal number of parameters to pipe function");
2783       const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
2784                                                              : "__write_pipe_4";
2785
2786       llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
2787                               Int32Ty, Int32Ty};
2788       Value *Arg2 = EmitScalarExpr(E->getArg(2)),
2789             *Arg3 = EmitScalarExpr(E->getArg(3));
2790       llvm::FunctionType *FTy = llvm::FunctionType::get(
2791           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2792       Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
2793       // We know the third argument is an integer type, but we may need to cast
2794       // it to i32.
2795       if (Arg2->getType() != Int32Ty)
2796         Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
2797       return RValue::get(Builder.CreateCall(
2798           CGM.CreateRuntimeFunction(FTy, Name),
2799           {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
2800     }
2801   }
2802   // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
2803   // functions
2804   case Builtin::BIreserve_read_pipe:
2805   case Builtin::BIreserve_write_pipe:
2806   case Builtin::BIwork_group_reserve_read_pipe:
2807   case Builtin::BIwork_group_reserve_write_pipe:
2808   case Builtin::BIsub_group_reserve_read_pipe:
2809   case Builtin::BIsub_group_reserve_write_pipe: {
2810     // Composing the mangled name for the function.
2811     const char *Name;
2812     if (BuiltinID == Builtin::BIreserve_read_pipe)
2813       Name = "__reserve_read_pipe";
2814     else if (BuiltinID == Builtin::BIreserve_write_pipe)
2815       Name = "__reserve_write_pipe";
2816     else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
2817       Name = "__work_group_reserve_read_pipe";
2818     else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
2819       Name = "__work_group_reserve_write_pipe";
2820     else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
2821       Name = "__sub_group_reserve_read_pipe";
2822     else
2823       Name = "__sub_group_reserve_write_pipe";
2824
2825     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2826           *Arg1 = EmitScalarExpr(E->getArg(1));
2827     llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
2828     CGOpenCLRuntime OpenCLRT(CGM);
2829     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2830     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2831
2832     // Building the generic function prototype.
2833     llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
2834     llvm::FunctionType *FTy = llvm::FunctionType::get(
2835         ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2836     // We know the second argument is an integer type, but we may need to cast
2837     // it to i32.
2838     if (Arg1->getType() != Int32Ty)
2839       Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
2840     return RValue::get(
2841         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2842                            {Arg0, Arg1, PacketSize, PacketAlign}));
2843   }
2844   // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
2845   // functions
2846   case Builtin::BIcommit_read_pipe:
2847   case Builtin::BIcommit_write_pipe:
2848   case Builtin::BIwork_group_commit_read_pipe:
2849   case Builtin::BIwork_group_commit_write_pipe:
2850   case Builtin::BIsub_group_commit_read_pipe:
2851   case Builtin::BIsub_group_commit_write_pipe: {
2852     const char *Name;
2853     if (BuiltinID == Builtin::BIcommit_read_pipe)
2854       Name = "__commit_read_pipe";
2855     else if (BuiltinID == Builtin::BIcommit_write_pipe)
2856       Name = "__commit_write_pipe";
2857     else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
2858       Name = "__work_group_commit_read_pipe";
2859     else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
2860       Name = "__work_group_commit_write_pipe";
2861     else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
2862       Name = "__sub_group_commit_read_pipe";
2863     else
2864       Name = "__sub_group_commit_write_pipe";
2865
2866     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2867           *Arg1 = EmitScalarExpr(E->getArg(1));
2868     CGOpenCLRuntime OpenCLRT(CGM);
2869     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2870     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2871
2872     // Building the generic function prototype.
2873     llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
2874     llvm::FunctionType *FTy =
2875         llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
2876                                 llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2877
2878     return RValue::get(
2879         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2880                            {Arg0, Arg1, PacketSize, PacketAlign}));
2881   }
2882   // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
2883   case Builtin::BIget_pipe_num_packets:
2884   case Builtin::BIget_pipe_max_packets: {
2885     const char *Name;
2886     if (BuiltinID == Builtin::BIget_pipe_num_packets)
2887       Name = "__get_pipe_num_packets";
2888     else
2889       Name = "__get_pipe_max_packets";
2890
2891     // Building the generic function prototype.
2892     Value *Arg0 = EmitScalarExpr(E->getArg(0));
2893     CGOpenCLRuntime OpenCLRT(CGM);
2894     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2895     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2896     llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
2897     llvm::FunctionType *FTy = llvm::FunctionType::get(
2898         Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2899
2900     return RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2901                                           {Arg0, PacketSize, PacketAlign}));
2902   }
2903
2904   // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
2905   case Builtin::BIto_global:
2906   case Builtin::BIto_local:
2907   case Builtin::BIto_private: {
2908     auto Arg0 = EmitScalarExpr(E->getArg(0));
2909     auto NewArgT = llvm::PointerType::get(Int8Ty,
2910       CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
2911     auto NewRetT = llvm::PointerType::get(Int8Ty,
2912       CGM.getContext().getTargetAddressSpace(
2913         E->getType()->getPointeeType().getAddressSpace()));
2914     auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
2915     llvm::Value *NewArg;
2916     if (Arg0->getType()->getPointerAddressSpace() !=
2917         NewArgT->getPointerAddressSpace())
2918       NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
2919     else
2920       NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
2921     auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
2922     auto NewCall =
2923         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
2924     return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
2925       ConvertType(E->getType())));
2926   }
2927
2928   // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
2929   // It contains four different overload formats specified in Table 6.13.17.1.
2930   case Builtin::BIenqueue_kernel: {
2931     StringRef Name; // Generated function call name
2932     unsigned NumArgs = E->getNumArgs();
2933
2934     llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
2935     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2936         getContext().getTargetAddressSpace(LangAS::opencl_generic));
2937
2938     llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
2939     llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
2940     LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
2941     llvm::Value *Range = NDRangeL.getAddress().getPointer();
2942     llvm::Type *RangeTy = NDRangeL.getAddress().getType();
2943
2944     if (NumArgs == 4) {
2945       // The most basic form of the call with parameters:
2946       // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
2947       Name = "__enqueue_kernel_basic";
2948       llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy,
2949                               GenericVoidPtrTy};
2950       llvm::FunctionType *FTy = llvm::FunctionType::get(
2951           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2952
2953       auto Info =
2954           CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
2955       llvm::Value *Kernel =
2956           Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
2957       llvm::Value *Block =
2958           Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
2959
2960       AttrBuilder B;
2961       B.addAttribute(Attribute::ByVal);
2962       llvm::AttributeList ByValAttrSet =
2963           llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
2964
2965       auto RTCall =
2966           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
2967                              {Queue, Flags, Range, Kernel, Block});
2968       RTCall->setAttributes(ByValAttrSet);
2969       return RValue::get(RTCall);
2970     }
2971     assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
2972
2973     // Create a temporary array to hold the sizes of local pointer arguments
2974     // for the block. \p First is the position of the first size argument.
2975     auto CreateArrayForSizeVar = [=](unsigned First) {
2976       auto *AT = llvm::ArrayType::get(SizeTy, NumArgs - First);
2977       auto *Arr = Builder.CreateAlloca(AT);
2978       llvm::Value *Ptr;
2979       // Each of the following arguments specifies the size of the corresponding
2980       // argument passed to the enqueued block.
2981       auto *Zero = llvm::ConstantInt::get(IntTy, 0);
2982       for (unsigned I = First; I < NumArgs; ++I) {
2983         auto *Index = llvm::ConstantInt::get(IntTy, I - First);
2984         auto *GEP = Builder.CreateGEP(Arr, {Zero, Index});
2985         if (I == First)
2986           Ptr = GEP;
2987         auto *V =
2988             Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
2989         Builder.CreateAlignedStore(
2990             V, GEP, CGM.getDataLayout().getPrefTypeAlignment(SizeTy));
2991       }
2992       return Ptr;
2993     };
2994
2995     // Could have events and/or vaargs.
2996     if (E->getArg(3)->getType()->isBlockPointerType()) {
2997       // No events passed, but has variadic arguments.
2998       Name = "__enqueue_kernel_vaargs";
2999       auto Info =
3000           CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
3001       llvm::Value *Kernel =
3002           Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3003       auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3004       auto *PtrToSizeArray = CreateArrayForSizeVar(4);
3005
3006       // Create a vector of the arguments, as well as a constant value to
3007       // express to the runtime the number of variadic arguments.
3008       std::vector<llvm::Value *> Args = {
3009           Queue,  Flags, Range,
3010           Kernel, Block, ConstantInt::get(IntTy, NumArgs - 4),
3011           PtrToSizeArray};
3012       std::vector<llvm::Type *> ArgTys = {
3013           QueueTy,          IntTy,            RangeTy,
3014           GenericVoidPtrTy, GenericVoidPtrTy, IntTy,
3015           PtrToSizeArray->getType()};
3016
3017       llvm::FunctionType *FTy = llvm::FunctionType::get(
3018           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3019       return RValue::get(
3020           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
3021                              llvm::ArrayRef<llvm::Value *>(Args)));
3022     }
3023     // Any calls now have event arguments passed.
3024     if (NumArgs >= 7) {
3025       llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy);
3026       llvm::Type *EventPtrTy = EventTy->getPointerTo(
3027           CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
3028
3029       llvm::Value *NumEvents =
3030           Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
3031       llvm::Value *EventList =
3032           E->getArg(4)->getType()->isArrayType()
3033               ? EmitArrayToPointerDecay(E->getArg(4)).getPointer()
3034               : EmitScalarExpr(E->getArg(4));
3035       llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5));
3036       // Convert to generic address space.
3037       EventList = Builder.CreatePointerCast(EventList, EventPtrTy);
3038       ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy);
3039       auto Info =
3040           CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6));
3041       llvm::Value *Kernel =
3042           Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3043       llvm::Value *Block =
3044           Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3045
3046       std::vector<llvm::Type *> ArgTys = {
3047           QueueTy,    Int32Ty,    RangeTy,          Int32Ty,
3048           EventPtrTy, EventPtrTy, GenericVoidPtrTy, GenericVoidPtrTy};
3049
3050       std::vector<llvm::Value *> Args = {Queue,     Flags,    Range,  NumEvents,
3051                                          EventList, ClkEvent, Kernel, Block};
3052
3053       if (NumArgs == 7) {
3054         // Has events but no variadics.
3055         Name = "__enqueue_kernel_basic_events";
3056         llvm::FunctionType *FTy = llvm::FunctionType::get(
3057             Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3058         return RValue::get(
3059             Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
3060                                llvm::ArrayRef<llvm::Value *>(Args)));
3061       }
3062       // Has event info and variadics
3063       // Pass the number of variadics to the runtime function too.
3064       Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
3065       ArgTys.push_back(Int32Ty);
3066       Name = "__enqueue_kernel_events_vaargs";
3067
3068       auto *PtrToSizeArray = CreateArrayForSizeVar(7);
3069       Args.push_back(PtrToSizeArray);
3070       ArgTys.push_back(PtrToSizeArray->getType());
3071
3072       llvm::FunctionType *FTy = llvm::FunctionType::get(
3073           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3074       return RValue::get(
3075           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
3076                              llvm::ArrayRef<llvm::Value *>(Args)));
3077     }
3078     LLVM_FALLTHROUGH;
3079   }
3080   // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
3081   // parameter.
3082   case Builtin::BIget_kernel_work_group_size: {
3083     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
3084         getContext().getTargetAddressSpace(LangAS::opencl_generic));
3085     auto Info =
3086         CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
3087     Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3088     Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3089     return RValue::get(Builder.CreateCall(
3090         CGM.CreateRuntimeFunction(
3091             llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
3092                                     false),
3093             "__get_kernel_work_group_size_impl"),
3094         {Kernel, Arg}));
3095   }
3096   case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
3097     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
3098         getContext().getTargetAddressSpace(LangAS::opencl_generic));
3099     auto Info =
3100         CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
3101     Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3102     Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3103     return RValue::get(Builder.CreateCall(
3104         CGM.CreateRuntimeFunction(
3105             llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
3106                                     false),
3107             "__get_kernel_preferred_work_group_multiple_impl"),
3108         {Kernel, Arg}));
3109   }
3110   case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
3111   case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
3112     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
3113         getContext().getTargetAddressSpace(LangAS::opencl_generic));
3114     LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
3115     llvm::Value *NDRange = NDRangeL.getAddress().getPointer();
3116     auto Info =
3117         CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1));
3118     Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3119     Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3120     const char *Name =
3121         BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
3122             ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
3123             : "__get_kernel_sub_group_count_for_ndrange_impl";
3124     return RValue::get(Builder.CreateCall(
3125         CGM.CreateRuntimeFunction(
3126             llvm::FunctionType::get(
3127                 IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},
3128                 false),
3129             Name),
3130         {NDRange, Kernel, Block}));
3131   }
3132
3133   case Builtin::BI__builtin_store_half:
3134   case Builtin::BI__builtin_store_halff: {
3135     Value *Val = EmitScalarExpr(E->getArg(0));
3136     Address Address = EmitPointerWithAlignment(E->getArg(1));
3137     Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy());
3138     return RValue::get(Builder.CreateStore(HalfVal, Address));
3139   }
3140   case Builtin::BI__builtin_load_half: {
3141     Address Address = EmitPointerWithAlignment(E->getArg(0));
3142     Value *HalfVal = Builder.CreateLoad(Address);
3143     return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy()));
3144   }
3145   case Builtin::BI__builtin_load_halff: {
3146     Address Address = EmitPointerWithAlignment(E->getArg(0));
3147     Value *HalfVal = Builder.CreateLoad(Address);
3148     return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
3149   }
3150   case Builtin::BIprintf:
3151     if (getTarget().getTriple().isNVPTX())
3152       return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue);
3153     break;
3154   case Builtin::BI__builtin_canonicalize:
3155   case Builtin::BI__builtin_canonicalizef:
3156   case Builtin::BI__builtin_canonicalizel:
3157     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
3158
3159   case Builtin::BI__builtin_thread_pointer: {
3160     if (!getContext().getTargetInfo().isTLSSupported())
3161       CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
3162     // Fall through - it's already mapped to the intrinsic by GCCBuiltin.
3163     break;
3164   }
3165   case Builtin::BI__builtin_os_log_format:
3166     return emitBuiltinOSLogFormat(*E);
3167
3168   case Builtin::BI__builtin_os_log_format_buffer_size: {
3169     analyze_os_log::OSLogBufferLayout Layout;
3170     analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout);
3171     return RValue::get(ConstantInt::get(ConvertType(E->getType()),
3172                                         Layout.size().getQuantity()));
3173   }
3174
3175   case Builtin::BI__xray_customevent: {
3176     if (!ShouldXRayInstrumentFunction())
3177       return RValue::getIgnored();
3178     if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
3179       if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents())
3180         return RValue::getIgnored();
3181
3182     Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
3183     auto FTy = F->getFunctionType();
3184     auto Arg0 = E->getArg(0);
3185     auto Arg0Val = EmitScalarExpr(Arg0);
3186     auto Arg0Ty = Arg0->getType();
3187     auto PTy0 = FTy->getParamType(0);
3188     if (PTy0 != Arg0Val->getType()) {
3189       if (Arg0Ty->isArrayType())
3190         Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer();
3191       else
3192         Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
3193     }
3194     auto Arg1 = EmitScalarExpr(E->getArg(1));
3195     auto PTy1 = FTy->getParamType(1);
3196     if (PTy1 != Arg1->getType())
3197       Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
3198     return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
3199   }
3200
3201   case Builtin::BI__builtin_ms_va_start:
3202   case Builtin::BI__builtin_ms_va_end:
3203     return RValue::get(
3204         EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
3205                        BuiltinID == Builtin::BI__builtin_ms_va_start));
3206
3207   case Builtin::BI__builtin_ms_va_copy: {
3208     // Lower this manually. We can't reliably determine whether or not any
3209     // given va_copy() is for a Win64 va_list from the calling convention
3210     // alone, because it's legal to do this from a System V ABI function.
3211     // With opaque pointer types, we won't have enough information in LLVM
3212     // IR to determine this from the argument types, either. Best to do it
3213     // now, while we have enough information.
3214     Address DestAddr = EmitMSVAListRef(E->getArg(0));
3215     Address SrcAddr = EmitMSVAListRef(E->getArg(1));
3216
3217     llvm::Type *BPP = Int8PtrPtrTy;
3218
3219     DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
3220                        DestAddr.getAlignment());
3221     SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
3222                       SrcAddr.getAlignment());
3223
3224     Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
3225     return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
3226   }
3227   }
3228
3229   // If this is an alias for a lib function (e.g. __builtin_sin), emit
3230   // the call using the normal call path, but using the unmangled
3231   // version of the function name.
3232   if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
3233     return emitLibraryCall(*this, FD, E,
3234                            CGM.getBuiltinLibFunction(FD, BuiltinID));
3235
3236   // If this is a predefined lib function (e.g. malloc), emit the call
3237   // using exactly the normal call path.
3238   if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
3239     return emitLibraryCall(*this, FD, E,
3240                       cast<llvm::Constant>(EmitScalarExpr(E->getCallee())));
3241
3242   // Check that a call to a target specific builtin has the correct target
3243   // features.
3244   // This is down here to avoid non-target specific builtins, however, if
3245   // generic builtins start to require generic target features then we
3246   // can move this up to the beginning of the function.
3247   checkTargetFeatures(E, FD);
3248
3249   // See if we have a target specific intrinsic.
3250   const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
3251   Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
3252   StringRef Prefix =
3253       llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
3254   if (!Prefix.empty()) {
3255     IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name);
3256     // NOTE we dont need to perform a compatibility flag check here since the
3257     // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
3258     // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
3259     if (IntrinsicID == Intrinsic::not_intrinsic)
3260       IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
3261   }
3262
3263   if (IntrinsicID != Intrinsic::not_intrinsic) {
3264     SmallVector<Value*, 16> Args;
3265
3266     // Find out if any arguments are required to be integer constant
3267     // expressions.
3268     unsigned ICEArguments = 0;
3269     ASTContext::GetBuiltinTypeError Error;
3270     getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
3271     assert(Error == ASTContext::GE_None && "Should not codegen an error");
3272
3273     Function *F = CGM.getIntrinsic(IntrinsicID);
3274     llvm::FunctionType *FTy = F->getFunctionType();
3275
3276     for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
3277       Value *ArgValue;
3278       // If this is a normal argument, just emit it as a scalar.
3279       if ((ICEArguments & (1 << i)) == 0) {
3280         ArgValue = EmitScalarExpr(E->getArg(i));
3281       } else {
3282         // If this is required to be a constant, constant fold it so that we
3283         // know that the generated intrinsic gets a ConstantInt.
3284         llvm::APSInt Result;
3285         bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
3286         assert(IsConst && "Constant arg isn't actually constant?");
3287         (void)IsConst;
3288         ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
3289       }
3290
3291       // If the intrinsic arg type is different from the builtin arg type
3292       // we need to do a bit cast.
3293       llvm::Type *PTy = FTy->getParamType(i);
3294       if (PTy != ArgValue->getType()) {
3295         assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
3296                "Must be able to losslessly bit cast to param");
3297         ArgValue = Builder.CreateBitCast(ArgValue, PTy);
3298       }
3299
3300       Args.push_back(ArgValue);
3301     }
3302
3303     Value *V = Builder.CreateCall(F, Args);
3304     QualType BuiltinRetType = E->getType();
3305
3306     llvm::Type *RetTy = VoidTy;
3307     if (!BuiltinRetType->isVoidType())
3308       RetTy = ConvertType(BuiltinRetType);
3309
3310     if (RetTy != V->getType()) {
3311       assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
3312              "Must be able to losslessly bit cast result type");
3313       V = Builder.CreateBitCast(V, RetTy);
3314     }
3315
3316     return RValue::get(V);
3317   }
3318
3319   // See if we have a target specific builtin that needs to be lowered.
3320   if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
3321     return RValue::get(V);
3322
3323   ErrorUnsupported(E, "builtin function");
3324
3325   // Unknown builtin, for now just dump it out and return undef.
3326   return GetUndefRValue(E->getType());
3327 }
3328
3329 static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
3330                                         unsigned BuiltinID, const CallExpr *E,
3331                                         llvm::Triple::ArchType Arch) {
3332   switch (Arch) {
3333   case llvm::Triple::arm:
3334   case llvm::Triple::armeb:
3335   case llvm::Triple::thumb:
3336   case llvm::Triple::thumbeb:
3337     return CGF->EmitARMBuiltinExpr(BuiltinID, E, Arch);
3338   case llvm::Triple::aarch64:
3339   case llvm::Triple::aarch64_be:
3340     return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch);
3341   case llvm::Triple::x86:
3342   case llvm::Triple::x86_64:
3343     return CGF->EmitX86BuiltinExpr(BuiltinID, E);
3344   case llvm::Triple::ppc:
3345   case llvm::Triple::ppc64:
3346   case llvm::Triple::ppc64le:
3347     return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
3348   case llvm::Triple::r600:
3349   case llvm::Triple::amdgcn:
3350     return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
3351   case llvm::Triple::systemz:
3352     return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
3353   case llvm::Triple::nvptx:
3354   case llvm::Triple::nvptx64:
3355     return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
3356   case llvm::Triple::wasm32:
3357   case llvm::Triple::wasm64:
3358     return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
3359   case llvm::Triple::hexagon:
3360     return CGF->EmitHexagonBuiltinExpr(BuiltinID, E);
3361   default:
3362     return nullptr;
3363   }
3364 }
3365
3366 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
3367                                               const CallExpr *E) {
3368   if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
3369     assert(getContext().getAuxTargetInfo() && "Missing aux target info");
3370     return EmitTargetArchBuiltinExpr(
3371         this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
3372         getContext().getAuxTargetInfo()->getTriple().getArch());
3373   }
3374
3375   return EmitTargetArchBuiltinExpr(this, BuiltinID, E,
3376                                    getTarget().getTriple().getArch());
3377 }
3378
3379 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
3380                                      NeonTypeFlags TypeFlags,
3381                                      llvm::Triple::ArchType Arch,
3382                                      bool V1Ty=false) {
3383   int IsQuad = TypeFlags.isQuad();
3384   switch (TypeFlags.getEltType()) {
3385   case NeonTypeFlags::Int8:
3386   case NeonTypeFlags::Poly8:
3387     return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
3388   case NeonTypeFlags::Int16:
3389   case NeonTypeFlags::Poly16:
3390     return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
3391   case NeonTypeFlags::Float16:
3392     // FIXME: Only AArch64 backend can so far properly handle half types.
3393     // Remove else part once ARM backend support for half is complete.
3394     if (Arch == llvm::Triple::aarch64)
3395       return llvm::VectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
3396     else
3397       return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
3398   case NeonTypeFlags::Int32:
3399     return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
3400   case NeonTypeFlags::Int64:
3401   case NeonTypeFlags::Poly64:
3402     return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
3403   case NeonTypeFlags::Poly128:
3404     // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
3405     // There is a lot of i128 and f128 API missing.
3406     // so we use v16i8 to represent poly128 and get pattern matched.
3407     return llvm::VectorType::get(CGF->Int8Ty, 16);
3408   case NeonTypeFlags::Float32:
3409     return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
3410   case NeonTypeFlags::Float64:
3411     return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
3412   }
3413   llvm_unreachable("Unknown vector element type!");
3414 }
3415
3416 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
3417                                           NeonTypeFlags IntTypeFlags) {
3418   int IsQuad = IntTypeFlags.isQuad();
3419   switch (IntTypeFlags.getEltType()) {
3420   case NeonTypeFlags::Int16:
3421     return llvm::VectorType::get(CGF->HalfTy, (4 << IsQuad));
3422   case NeonTypeFlags::Int32:
3423     return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
3424   case NeonTypeFlags::Int64:
3425     return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad));
3426   default:
3427     llvm_unreachable("Type can't be converted to floating-point!");
3428   }
3429 }
3430
3431 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
3432   unsigned nElts = V->getType()->getVectorNumElements();
3433   Value* SV = llvm::ConstantVector::getSplat(nElts, C);
3434   return Builder.CreateShuffleVector(V, V, SV, "lane");
3435 }
3436
3437 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
3438                                      const char *name,
3439                                      unsigned shift, bool rightshift) {
3440   unsigned j = 0;
3441   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
3442        ai != ae; ++ai, ++j)
3443     if (shift > 0 && shift == j)
3444       Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
3445     else
3446       Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
3447
3448   return Builder.CreateCall(F, Ops, name);
3449 }
3450
3451 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
3452                                             bool neg) {
3453   int SV = cast<ConstantInt>(V)->getSExtValue();
3454   return ConstantInt::get(Ty, neg ? -SV : SV);
3455 }
3456
3457 // \brief Right-shift a vector by a constant.
3458 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
3459                                           llvm::Type *Ty, bool usgn,
3460                                           const char *name) {
3461   llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
3462
3463   int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
3464   int EltSize = VTy->getScalarSizeInBits();
3465
3466   Vec = Builder.CreateBitCast(Vec, Ty);
3467
3468   // lshr/ashr are undefined when the shift amount is equal to the vector
3469   // element size.
3470   if (ShiftAmt == EltSize) {
3471     if (usgn) {
3472       // Right-shifting an unsigned value by its size yields 0.
3473       return llvm::ConstantAggregateZero::get(VTy);
3474     } else {
3475       // Right-shifting a signed value by its size is equivalent
3476       // to a shift of size-1.
3477       --ShiftAmt;
3478       Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
3479     }
3480   }
3481
3482   Shift = EmitNeonShiftVector(Shift, Ty, false);
3483   if (usgn)
3484     return Builder.CreateLShr(Vec, Shift, name);
3485   else
3486     return Builder.CreateAShr(Vec, Shift, name);
3487 }
3488
3489 enum {
3490   AddRetType = (1 << 0),
3491   Add1ArgType = (1 << 1),
3492   Add2ArgTypes = (1 << 2),
3493
3494   VectorizeRetType = (1 << 3),
3495   VectorizeArgTypes = (1 << 4),
3496
3497   InventFloatType = (1 << 5),
3498   UnsignedAlts = (1 << 6),
3499
3500   Use64BitVectors = (1 << 7),
3501   Use128BitVectors = (1 << 8),
3502
3503   Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
3504   VectorRet = AddRetType | VectorizeRetType,
3505   VectorRetGetArgs01 =
3506       AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
3507   FpCmpzModifiers =
3508       AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
3509 };
3510
3511 namespace {
3512 struct NeonIntrinsicInfo {
3513   const char *NameHint;
3514   unsigned BuiltinID;
3515   unsigned LLVMIntrinsic;
3516   unsigned AltLLVMIntrinsic;
3517   unsigned TypeModifier;
3518
3519   bool operator<(unsigned RHSBuiltinID) const {
3520     return BuiltinID < RHSBuiltinID;
3521   }
3522   bool operator<(const NeonIntrinsicInfo &TE) const {
3523     return BuiltinID < TE.BuiltinID;
3524   }
3525 };
3526 } // end anonymous namespace
3527
3528 #define NEONMAP0(NameBase) \
3529   { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
3530
3531 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
3532   { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
3533       Intrinsic::LLVMIntrinsic, 0, TypeModifier }
3534
3535 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
3536   { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
3537       Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
3538       TypeModifier }
3539
3540 static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
3541   NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
3542   NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
3543   NEONMAP1(vabs_v, arm_neon_vabs, 0),
3544   NEONMAP1(vabsq_v, arm_neon_vabs, 0),
3545   NEONMAP0(vaddhn_v),
3546   NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
3547   NEONMAP1(vaeseq_v, arm_neon_aese, 0),
3548   NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
3549   NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
3550   NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
3551   NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
3552   NEONMAP1(vcage_v, arm_neon_vacge, 0),
3553   NEONMAP1(vcageq_v, arm_neon_vacge, 0),
3554   NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
3555   NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
3556   NEONMAP1(vcale_v, arm_neon_vacge, 0),
3557   NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
3558   NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
3559   NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
3560   NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
3561   NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
3562   NEONMAP1(vclz_v, ctlz, Add1ArgType),
3563   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
3564   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
3565   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
3566   NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
3567   NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
3568   NEONMAP0(vcvt_f32_v),
3569   NEONMAP2(vcvt_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3570   NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3571   NEONMAP1(vcvt_n_s16_v, arm_neon_vcvtfp2fxs, 0),
3572   NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
3573   NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
3574   NEONMAP1(vcvt_n_u16_v, arm_neon_vcvtfp2fxu, 0),
3575   NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
3576   NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
3577   NEONMAP0(vcvt_s16_v),
3578   NEONMAP0(vcvt_s32_v),
3579   NEONMAP0(vcvt_s64_v),
3580   NEONMAP0(vcvt_u16_v),
3581   NEONMAP0(vcvt_u32_v),
3582   NEONMAP0(vcvt_u64_v),
3583   NEONMAP1(vcvta_s16_v, arm_neon_vcvtas, 0),
3584   NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
3585   NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
3586   NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
3587   NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
3588   NEONMAP1(vcvtaq_s16_v, arm_neon_vcvtas, 0),
3589   NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
3590   NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
3591   NEONMAP1(vcvtaq_u16_v, arm_neon_vcvtau, 0),
3592   NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
3593   NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
3594   NEONMAP1(vcvtm_s16_v, arm_neon_vcvtms, 0),
3595   NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
3596   NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
3597   NEONMAP1(vcvtm_u16_v, arm_neon_vcvtmu, 0),
3598   NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
3599   NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
3600   NEONMAP1(vcvtmq_s16_v, arm_neon_vcvtms, 0),
3601   NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
3602   NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
3603   NEONMAP1(vcvtmq_u16_v, arm_neon_vcvtmu, 0),
3604   NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
3605   NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
3606   NEONMAP1(vcvtn_s16_v, arm_neon_vcvtns, 0),
3607   NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
3608   NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
3609   NEONMAP1(vcvtn_u16_v, arm_neon_vcvtnu, 0),
3610   NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
3611   NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
3612   NEONMAP1(vcvtnq_s16_v, arm_neon_vcvtns, 0),
3613   NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
3614   NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
3615   NEONMAP1(vcvtnq_u16_v, arm_neon_vcvtnu, 0),
3616   NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
3617   NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
3618   NEONMAP1(vcvtp_s16_v, arm_neon_vcvtps, 0),
3619   NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
3620   NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
3621   NEONMAP1(vcvtp_u16_v, arm_neon_vcvtpu, 0),
3622   NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
3623   NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
3624   NEONMAP1(vcvtpq_s16_v, arm_neon_vcvtps, 0),
3625   NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
3626   NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
3627   NEONMAP1(vcvtpq_u16_v, arm_neon_vcvtpu, 0),
3628   NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
3629   NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
3630   NEONMAP0(vcvtq_f32_v),
3631   NEONMAP2(vcvtq_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3632   NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3633   NEONMAP1(vcvtq_n_s16_v, arm_neon_vcvtfp2fxs, 0),
3634   NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
3635   NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
3636   NEONMAP1(vcvtq_n_u16_v, arm_neon_vcvtfp2fxu, 0),
3637   NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
3638   NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
3639   NEONMAP0(vcvtq_s16_v),
3640   NEONMAP0(vcvtq_s32_v),
3641   NEONMAP0(vcvtq_s64_v),
3642   NEONMAP0(vcvtq_u16_v),
3643   NEONMAP0(vcvtq_u32_v),
3644   NEONMAP0(vcvtq_u64_v),
3645   NEONMAP0(vext_v),
3646   NEONMAP0(vextq_v),
3647   NEONMAP0(vfma_v),
3648   NEONMAP0(vfmaq_v),
3649   NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
3650   NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
3651   NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
3652   NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
3653   NEONMAP0(vld1_dup_v),
3654   NEONMAP1(vld1_v, arm_neon_vld1, 0),
3655   NEONMAP0(vld1q_dup_v),
3656   NEONMAP1(vld1q_v, arm_neon_vld1, 0),
3657   NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
3658   NEONMAP1(vld2_v, arm_neon_vld2, 0),
3659   NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
3660   NEONMAP1(vld2q_v, arm_neon_vld2, 0),
3661   NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
3662   NEONMAP1(vld3_v, arm_neon_vld3, 0),
3663   NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
3664   NEONMAP1(vld3q_v, arm_neon_vld3, 0),
3665   NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
3666   NEONMAP1(vld4_v, arm_neon_vld4, 0),
3667   NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
3668   NEONMAP1(vld4q_v, arm_neon_vld4, 0),
3669   NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
3670   NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
3671   NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
3672   NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
3673   NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
3674   NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
3675   NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
3676   NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
3677   NEONMAP0(vmovl_v),
3678   NEONMAP0(vmovn_v),
3679   NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
3680   NEONMAP0(vmull_v),
3681   NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
3682   NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
3683   NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
3684   NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
3685   NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
3686   NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
3687   NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
3688   NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
3689   NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
3690   NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
3691   NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
3692   NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
3693   NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
3694   NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0),
3695   NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0),
3696   NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
3697   NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
3698   NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
3699   NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
3700   NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
3701   NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
3702   NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
3703   NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
3704   NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
3705   NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
3706   NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
3707   NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
3708   NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
3709   NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
3710   NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
3711   NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
3712   NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
3713   NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
3714   NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
3715   NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
3716   NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
3717   NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
3718   NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
3719   NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
3720   NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
3721   NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
3722   NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
3723   NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
3724   NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
3725   NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
3726   NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
3727   NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
3728   NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
3729   NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
3730   NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
3731   NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
3732   NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
3733   NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
3734   NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
3735   NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
3736   NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
3737   NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
3738   NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
3739   NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
3740   NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
3741   NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
3742   NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
3743   NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
3744   NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
3745   NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
3746   NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
3747   NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
3748   NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
3749   NEONMAP0(vshl_n_v),
3750   NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
3751   NEONMAP0(vshll_n_v),
3752   NEONMAP0(vshlq_n_v),
3753   NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
3754   NEONMAP0(vshr_n_v),
3755   NEONMAP0(vshrn_n_v),
3756   NEONMAP0(vshrq_n_v),
3757   NEONMAP1(vst1_v, arm_neon_vst1, 0),
3758   NEONMAP1(vst1q_v, arm_neon_vst1, 0),
3759   NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
3760   NEONMAP1(vst2_v, arm_neon_vst2, 0),
3761   NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
3762   NEONMAP1(vst2q_v, arm_neon_vst2, 0),
3763   NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
3764   NEONMAP1(vst3_v, arm_neon_vst3, 0),
3765   NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
3766   NEONMAP1(vst3q_v, arm_neon_vst3, 0),
3767   NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
3768   NEONMAP1(vst4_v, arm_neon_vst4, 0),
3769   NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
3770   NEONMAP1(vst4q_v, arm_neon_vst4, 0),
3771   NEONMAP0(vsubhn_v),
3772   NEONMAP0(vtrn_v),
3773   NEONMAP0(vtrnq_v),
3774   NEONMAP0(vtst_v),
3775   NEONMAP0(vtstq_v),
3776   NEONMAP0(vuzp_v),
3777   NEONMAP0(vuzpq_v),
3778   NEONMAP0(vzip_v),
3779   NEONMAP0(vzipq_v)
3780 };
3781
3782 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
3783   NEONMAP1(vabs_v, aarch64_neon_abs, 0),
3784   NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
3785   NEONMAP0(vaddhn_v),
3786   NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
3787   NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
3788   NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
3789   NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
3790   NEONMAP1(vcage_v, aarch64_neon_facge, 0),
3791   NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
3792   NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
3793   NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
3794   NEONMAP1(vcale_v, aarch64_neon_facge, 0),
3795   NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
3796   NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
3797   NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
3798   NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
3799   NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
3800   NEONMAP1(vclz_v, ctlz, Add1ArgType),
3801   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
3802   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
3803   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
3804   NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
3805   NEONMAP0(vcvt_f16_v),
3806   NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
3807   NEONMAP0(vcvt_f32_v),
3808   NEONMAP2(vcvt_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3809   NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3810   NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3811   NEONMAP1(vcvt_n_s16_v, aarch64_neon_vcvtfp2fxs, 0),
3812   NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
3813   NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
3814   NEONMAP1(vcvt_n_u16_v, aarch64_neon_vcvtfp2fxu, 0),
3815   NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
3816   NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
3817   NEONMAP0(vcvtq_f16_v),
3818   NEONMAP0(vcvtq_f32_v),
3819   NEONMAP2(vcvtq_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3820   NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3821   NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3822   NEONMAP1(vcvtq_n_s16_v, aarch64_neon_vcvtfp2fxs, 0),
3823   NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
3824   NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
3825   NEONMAP1(vcvtq_n_u16_v, aarch64_neon_vcvtfp2fxu, 0),
3826   NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
3827   NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
3828   NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
3829   NEONMAP0(vext_v),
3830   NEONMAP0(vextq_v),
3831   NEONMAP0(vfma_v),
3832   NEONMAP0(vfmaq_v),
3833   NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
3834   NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
3835   NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
3836   NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
3837   NEONMAP0(vmovl_v),
3838   NEONMAP0(vmovn_v),
3839   NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
3840   NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
3841   NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
3842   NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
3843   NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
3844   NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
3845   NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
3846   NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
3847   NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
3848   NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
3849   NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
3850   NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
3851   NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
3852   NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
3853   NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
3854   NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
3855   NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
3856   NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
3857   NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
3858   NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
3859   NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
3860   NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
3861   NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
3862   NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
3863   NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
3864   NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
3865   NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
3866   NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
3867   NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
3868   NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
3869   NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
3870   NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
3871   NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
3872   NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
3873   NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
3874   NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
3875   NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
3876   NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
3877   NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
3878   NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
3879   NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
3880   NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
3881   NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
3882   NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
3883   NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
3884   NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
3885   NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
3886   NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
3887   NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
3888   NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
3889   NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
3890   NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
3891   NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
3892   NEONMAP0(vshl_n_v),
3893   NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3894   NEONMAP0(vshll_n_v),
3895   NEONMAP0(vshlq_n_v),
3896   NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3897   NEONMAP0(vshr_n_v),
3898   NEONMAP0(vshrn_n_v),
3899   NEONMAP0(vshrq_n_v),
3900   NEONMAP0(vsubhn_v),
3901   NEONMAP0(vtst_v),
3902   NEONMAP0(vtstq_v),
3903 };
3904
3905 static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
3906   NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
3907   NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
3908   NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
3909   NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3910   NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3911   NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3912   NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3913   NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3914   NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3915   NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3916   NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3917   NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
3918   NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3919   NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
3920   NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3921   NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3922   NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3923   NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3924   NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3925   NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3926   NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3927   NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3928   NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3929   NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3930   NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3931   NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3932   NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3933   NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3934   NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3935   NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3936   NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3937   NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3938   NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3939   NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3940   NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3941   NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3942   NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3943   NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3944   NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3945   NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3946   NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3947   NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3948   NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3949   NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3950   NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3951   NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3952   NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3953   NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3954   NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
3955   NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3956   NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3957   NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3958   NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3959   NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3960   NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3961   NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3962   NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3963   NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3964   NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3965   NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3966   NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3967   NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3968   NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3969   NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3970   NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3971   NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3972   NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3973   NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3974   NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3975   NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
3976   NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
3977   NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
3978   NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3979   NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3980   NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3981   NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3982   NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3983   NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3984   NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3985   NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3986   NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3987   NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3988   NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3989   NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
3990   NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3991   NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
3992   NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3993   NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3994   NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
3995   NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
3996   NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3997   NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3998   NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
3999   NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
4000   NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
4001   NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
4002   NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
4003   NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
4004   NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
4005   NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
4006   NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
4007   NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
4008   NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
4009   NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
4010   NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
4011   NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
4012   NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
4013   NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
4014   NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
4015   NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
4016   NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
4017   NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
4018   NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
4019   NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
4020   NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
4021   NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
4022   NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
4023   NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
4024   NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
4025   NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
4026   NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
4027   NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
4028   NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
4029   NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
4030   NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
4031   NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
4032   NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
4033   NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
4034   NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
4035   NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
4036   NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
4037   NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
4038   NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
4039   NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
4040   NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
4041   NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
4042   NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
4043   NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
4044   NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
4045   NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
4046   NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
4047   NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
4048   NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
4049   NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
4050   NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
4051   NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
4052   NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
4053   NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
4054   NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
4055   NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
4056   NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
4057   NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
4058   NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
4059   NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
4060   NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
4061   NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
4062   NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
4063   NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
4064   NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
4065   NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
4066   NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
4067   NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
4068   NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
4069   NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
4070   NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
4071   NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
4072   NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
4073   NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
4074   NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
4075   NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
4076   NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
4077   NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
4078   NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
4079   NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
4080   NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
4081   NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
4082   NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
4083   NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
4084   NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
4085   NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
4086   NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
4087   NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
4088   NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
4089   NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
4090   NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
4091   NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
4092   NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
4093   NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
4094   NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
4095   NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
4096   NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
4097   NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
4098 };
4099
4100 #undef NEONMAP0
4101 #undef NEONMAP1
4102 #undef NEONMAP2
4103
4104 static bool NEONSIMDIntrinsicsProvenSorted = false;
4105
4106 static bool AArch64SIMDIntrinsicsProvenSorted = false;
4107 static bool AArch64SISDIntrinsicsProvenSorted = false;
4108
4109
4110 static const NeonIntrinsicInfo *
4111 findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap,
4112                        unsigned BuiltinID, bool &MapProvenSorted) {
4113
4114 #ifndef NDEBUG
4115   if (!MapProvenSorted) {
4116     assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap)));
4117     MapProvenSorted = true;
4118   }
4119 #endif
4120
4121   const NeonIntrinsicInfo *Builtin =
4122       std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID);
4123
4124   if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
4125     return Builtin;
4126
4127   return nullptr;
4128 }
4129
4130 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
4131                                                    unsigned Modifier,
4132                                                    llvm::Type *ArgType,
4133                                                    const CallExpr *E) {
4134   int VectorSize = 0;
4135   if (Modifier & Use64BitVectors)
4136     VectorSize = 64;
4137   else if (Modifier & Use128BitVectors)
4138     VectorSize = 128;
4139
4140   // Return type.
4141   SmallVector<llvm::Type *, 3> Tys;
4142   if (Modifier & AddRetType) {
4143     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
4144     if (Modifier & VectorizeRetType)
4145       Ty = llvm::VectorType::get(
4146           Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
4147
4148     Tys.push_back(Ty);
4149   }
4150
4151   // Arguments.
4152   if (Modifier & VectorizeArgTypes) {
4153     int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
4154     ArgType = llvm::VectorType::get(ArgType, Elts);
4155   }
4156
4157   if (Modifier & (Add1ArgType | Add2ArgTypes))
4158     Tys.push_back(ArgType);
4159
4160   if (Modifier & Add2ArgTypes)
4161     Tys.push_back(ArgType);
4162
4163   if (Modifier & InventFloatType)
4164     Tys.push_back(FloatTy);
4165
4166   return CGM.getIntrinsic(IntrinsicID, Tys);
4167 }
4168
4169 static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF,
4170                                             const NeonIntrinsicInfo &SISDInfo,
4171                                             SmallVectorImpl<Value *> &Ops,
4172                                             const CallExpr *E) {
4173   unsigned BuiltinID = SISDInfo.BuiltinID;
4174   unsigned int Int = SISDInfo.LLVMIntrinsic;
4175   unsigned Modifier = SISDInfo.TypeModifier;
4176   const char *s = SISDInfo.NameHint;
4177
4178   switch (BuiltinID) {
4179   case NEON::BI__builtin_neon_vcled_s64:
4180   case NEON::BI__builtin_neon_vcled_u64:
4181   case NEON::BI__builtin_neon_vcles_f32:
4182   case NEON::BI__builtin_neon_vcled_f64:
4183   case NEON::BI__builtin_neon_vcltd_s64:
4184   case NEON::BI__builtin_neon_vcltd_u64:
4185   case NEON::BI__builtin_neon_vclts_f32:
4186   case NEON::BI__builtin_neon_vcltd_f64:
4187   case NEON::BI__builtin_neon_vcales_f32:
4188   case NEON::BI__builtin_neon_vcaled_f64:
4189   case NEON::BI__builtin_neon_vcalts_f32:
4190   case NEON::BI__builtin_neon_vcaltd_f64:
4191     // Only one direction of comparisons actually exist, cmle is actually a cmge
4192     // with swapped operands. The table gives us the right intrinsic but we
4193     // still need to do the swap.
4194     std::swap(Ops[0], Ops[1]);
4195     break;
4196   }
4197
4198   assert(Int && "Generic code assumes a valid intrinsic");
4199
4200   // Determine the type(s) of this overloaded AArch64 intrinsic.
4201   const Expr *Arg = E->getArg(0);
4202   llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
4203   Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
4204
4205   int j = 0;
4206   ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
4207   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
4208        ai != ae; ++ai, ++j) {
4209     llvm::Type *ArgTy = ai->getType();
4210     if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
4211              ArgTy->getPrimitiveSizeInBits())
4212       continue;
4213
4214     assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
4215     // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
4216     // it before inserting.
4217     Ops[j] =
4218         CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType());
4219     Ops[j] =
4220         CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
4221   }
4222
4223   Value *Result = CGF.EmitNeonCall(F, Ops, s);
4224   llvm::Type *ResultType = CGF.ConvertType(E->getType());
4225   if (ResultType->getPrimitiveSizeInBits() <
4226       Result->getType()->getPrimitiveSizeInBits())
4227     return CGF.Builder.CreateExtractElement(Result, C0);
4228
4229   return CGF.Builder.CreateBitCast(Result, ResultType, s);
4230 }
4231
4232 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
4233     unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
4234     const char *NameHint, unsigned Modifier, const CallExpr *E,
4235     SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
4236     llvm::Triple::ArchType Arch) {
4237   // Get the last argument, which specifies the vector type.
4238   llvm::APSInt NeonTypeConst;
4239   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
4240   if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
4241     return nullptr;
4242
4243   // Determine the type of this overloaded NEON intrinsic.
4244   NeonTypeFlags Type(NeonTypeConst.getZExtValue());
4245   bool Usgn = Type.isUnsigned();
4246   bool Quad = Type.isQuad();
4247
4248   llvm::VectorType *VTy = GetNeonType(this, Type, Arch);
4249   llvm::Type *Ty = VTy;
4250   if (!Ty)
4251     return nullptr;
4252
4253   auto getAlignmentValue32 = [&](Address addr) -> Value* {
4254     return Builder.getInt32(addr.getAlignment().getQuantity());
4255   };
4256
4257   unsigned Int = LLVMIntrinsic;
4258   if ((Modifier & UnsignedAlts) && !Usgn)
4259     Int = AltLLVMIntrinsic;
4260
4261   switch (BuiltinID) {
4262   default: break;
4263   case NEON::BI__builtin_neon_vabs_v:
4264   case NEON::BI__builtin_neon_vabsq_v:
4265     if (VTy->getElementType()->isFloatingPointTy())
4266       return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
4267     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
4268   case NEON::BI__builtin_neon_vaddhn_v: {
4269     llvm::VectorType *SrcTy =
4270         llvm::VectorType::getExtendedElementVectorType(VTy);
4271
4272     // %sum = add <4 x i32> %lhs, %rhs
4273     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4274     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
4275     Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
4276
4277     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
4278     Constant *ShiftAmt =
4279         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
4280     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
4281
4282     // %res = trunc <4 x i32> %high to <4 x i16>
4283     return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
4284   }
4285   case NEON::BI__builtin_neon_vcale_v:
4286   case NEON::BI__builtin_neon_vcaleq_v:
4287   case NEON::BI__builtin_neon_vcalt_v:
4288   case NEON::BI__builtin_neon_vcaltq_v:
4289     std::swap(Ops[0], Ops[1]);
4290     LLVM_FALLTHROUGH;
4291   case NEON::BI__builtin_neon_vcage_v:
4292   case NEON::BI__builtin_neon_vcageq_v:
4293   case NEON::BI__builtin_neon_vcagt_v:
4294   case NEON::BI__builtin_neon_vcagtq_v: {
4295     llvm::Type *Ty;
4296     switch (VTy->getScalarSizeInBits()) {
4297     default: llvm_unreachable("unexpected type");
4298     case 32:
4299       Ty = FloatTy;
4300       break;
4301     case 64:
4302       Ty = DoubleTy;
4303       break;
4304     case 16:
4305       Ty = HalfTy;
4306       break;
4307     }
4308     llvm::Type *VecFlt = llvm::VectorType::get(Ty, VTy->getNumElements());
4309     llvm::Type *Tys[] = { VTy, VecFlt };
4310     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
4311     return EmitNeonCall(F, Ops, NameHint);
4312   }
4313   case NEON::BI__builtin_neon_vclz_v:
4314   case NEON::BI__builtin_neon_vclzq_v:
4315     // We generate target-independent intrinsic, which needs a second argument
4316     // for whether or not clz of zero is undefined; on ARM it isn't.
4317     Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
4318     break;
4319   case NEON::BI__builtin_neon_vcvt_f32_v:
4320   case NEON::BI__builtin_neon_vcvtq_f32_v:
4321     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4322     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad), Arch);
4323     return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
4324                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
4325   case NEON::BI__builtin_neon_vcvt_f16_v:
4326   case NEON::BI__builtin_neon_vcvtq_f16_v:
4327     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4328     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad), Arch);
4329     return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
4330                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
4331   case NEON::BI__builtin_neon_vcvt_n_f16_v:
4332   case NEON::BI__builtin_neon_vcvt_n_f32_v:
4333   case NEON::BI__builtin_neon_vcvt_n_f64_v:
4334   case NEON::BI__builtin_neon_vcvtq_n_f16_v:
4335   case NEON::BI__builtin_neon_vcvtq_n_f32_v:
4336   case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
4337     llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
4338     Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
4339     Function *F = CGM.getIntrinsic(Int, Tys);
4340     return EmitNeonCall(F, Ops, "vcvt_n");
4341   }
4342   case NEON::BI__builtin_neon_vcvt_n_s16_v:
4343   case NEON::BI__builtin_neon_vcvt_n_s32_v:
4344   case NEON::BI__builtin_neon_vcvt_n_u16_v:
4345   case NEON::BI__builtin_neon_vcvt_n_u32_v:
4346   case NEON::BI__builtin_neon_vcvt_n_s64_v:
4347   case NEON::BI__builtin_neon_vcvt_n_u64_v:
4348   case NEON::BI__builtin_neon_vcvtq_n_s16_v:
4349   case NEON::BI__builtin_neon_vcvtq_n_s32_v:
4350   case NEON::BI__builtin_neon_vcvtq_n_u16_v:
4351   case NEON::BI__builtin_neon_vcvtq_n_u32_v:
4352   case NEON::BI__builtin_neon_vcvtq_n_s64_v:
4353   case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
4354     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
4355     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
4356     return EmitNeonCall(F, Ops, "vcvt_n");
4357   }
4358   case NEON::BI__builtin_neon_vcvt_s32_v:
4359   case NEON::BI__builtin_neon_vcvt_u32_v:
4360   case NEON::BI__builtin_neon_vcvt_s64_v:
4361   case NEON::BI__builtin_neon_vcvt_u64_v:
4362   case NEON::BI__builtin_neon_vcvt_s16_v:
4363   case NEON::BI__builtin_neon_vcvt_u16_v:
4364   case NEON::BI__builtin_neon_vcvtq_s32_v:
4365   case NEON::BI__builtin_neon_vcvtq_u32_v:
4366   case NEON::BI__builtin_neon_vcvtq_s64_v:
4367   case NEON::BI__builtin_neon_vcvtq_u64_v:
4368   case NEON::BI__builtin_neon_vcvtq_s16_v:
4369   case NEON::BI__builtin_neon_vcvtq_u16_v: {
4370     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
4371     return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
4372                 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
4373   }
4374   case NEON::BI__builtin_neon_vcvta_s16_v:
4375   case NEON::BI__builtin_neon_vcvta_s32_v:
4376   case NEON::BI__builtin_neon_vcvta_s64_v:
4377   case NEON::BI__builtin_neon_vcvta_u32_v:
4378   case NEON::BI__builtin_neon_vcvta_u64_v:
4379   case NEON::BI__builtin_neon_vcvtaq_s16_v:
4380   case NEON::BI__builtin_neon_vcvtaq_s32_v:
4381   case NEON::BI__builtin_neon_vcvtaq_s64_v:
4382   case NEON::BI__builtin_neon_vcvtaq_u16_v:
4383   case NEON::BI__builtin_neon_vcvtaq_u32_v:
4384   case NEON::BI__builtin_neon_vcvtaq_u64_v:
4385   case NEON::BI__builtin_neon_vcvtn_s16_v:
4386   case NEON::BI__builtin_neon_vcvtn_s32_v:
4387   case NEON::BI__builtin_neon_vcvtn_s64_v:
4388   case NEON::BI__builtin_neon_vcvtn_u16_v:
4389   case NEON::BI__builtin_neon_vcvtn_u32_v:
4390   case NEON::BI__builtin_neon_vcvtn_u64_v:
4391   case NEON::BI__builtin_neon_vcvtnq_s16_v:
4392   case NEON::BI__builtin_neon_vcvtnq_s32_v:
4393   case NEON::BI__builtin_neon_vcvtnq_s64_v:
4394   case NEON::BI__builtin_neon_vcvtnq_u16_v:
4395   case NEON::BI__builtin_neon_vcvtnq_u32_v:
4396   case NEON::BI__builtin_neon_vcvtnq_u64_v:
4397   case NEON::BI__builtin_neon_vcvtp_s16_v:
4398   case NEON::BI__builtin_neon_vcvtp_s32_v:
4399   case NEON::BI__builtin_neon_vcvtp_s64_v:
4400   case NEON::BI__builtin_neon_vcvtp_u16_v:
4401   case NEON::BI__builtin_neon_vcvtp_u32_v:
4402   case NEON::BI__builtin_neon_vcvtp_u64_v:
4403   case NEON::BI__builtin_neon_vcvtpq_s16_v:
4404   case NEON::BI__builtin_neon_vcvtpq_s32_v:
4405   case NEON::BI__builtin_neon_vcvtpq_s64_v:
4406   case NEON::BI__builtin_neon_vcvtpq_u16_v:
4407   case NEON::BI__builtin_neon_vcvtpq_u32_v:
4408   case NEON::BI__builtin_neon_vcvtpq_u64_v:
4409   case NEON::BI__builtin_neon_vcvtm_s16_v:
4410   case NEON::BI__builtin_neon_vcvtm_s32_v:
4411   case NEON::BI__builtin_neon_vcvtm_s64_v:
4412   case NEON::BI__builtin_neon_vcvtm_u16_v:
4413   case NEON::BI__builtin_neon_vcvtm_u32_v:
4414   case NEON::BI__builtin_neon_vcvtm_u64_v:
4415   case NEON::BI__builtin_neon_vcvtmq_s16_v:
4416   case NEON::BI__builtin_neon_vcvtmq_s32_v:
4417   case NEON::BI__builtin_neon_vcvtmq_s64_v:
4418   case NEON::BI__builtin_neon_vcvtmq_u16_v:
4419   case NEON::BI__builtin_neon_vcvtmq_u32_v:
4420   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
4421     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
4422     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
4423   }
4424   case NEON::BI__builtin_neon_vext_v:
4425   case NEON::BI__builtin_neon_vextq_v: {
4426     int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
4427     SmallVector<uint32_t, 16> Indices;
4428     for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
4429       Indices.push_back(i+CV);
4430
4431     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4432     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4433     return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
4434   }
4435   case NEON::BI__builtin_neon_vfma_v:
4436   case NEON::BI__builtin_neon_vfmaq_v: {
4437     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
4438     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4439     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4440     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4441
4442     // NEON intrinsic puts accumulator first, unlike the LLVM fma.
4443     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
4444   }
4445   case NEON::BI__builtin_neon_vld1_v:
4446   case NEON::BI__builtin_neon_vld1q_v: {
4447     llvm::Type *Tys[] = {Ty, Int8PtrTy};
4448     Ops.push_back(getAlignmentValue32(PtrOp0));
4449     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
4450   }
4451   case NEON::BI__builtin_neon_vld2_v:
4452   case NEON::BI__builtin_neon_vld2q_v:
4453   case NEON::BI__builtin_neon_vld3_v:
4454   case NEON::BI__builtin_neon_vld3q_v:
4455   case NEON::BI__builtin_neon_vld4_v:
4456   case NEON::BI__builtin_neon_vld4q_v: {
4457     llvm::Type *Tys[] = {Ty, Int8PtrTy};
4458     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
4459     Value *Align = getAlignmentValue32(PtrOp1);
4460     Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
4461     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4462     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4463     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4464   }
4465   case NEON::BI__builtin_neon_vld1_dup_v:
4466   case NEON::BI__builtin_neon_vld1q_dup_v: {
4467     Value *V = UndefValue::get(Ty);
4468     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
4469     PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty);
4470     LoadInst *Ld = Builder.CreateLoad(PtrOp0);
4471     llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
4472     Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
4473     return EmitNeonSplat(Ops[0], CI);
4474   }
4475   case NEON::BI__builtin_neon_vld2_lane_v:
4476   case NEON::BI__builtin_neon_vld2q_lane_v:
4477   case NEON::BI__builtin_neon_vld3_lane_v:
4478   case NEON::BI__builtin_neon_vld3q_lane_v:
4479   case NEON::BI__builtin_neon_vld4_lane_v:
4480   case NEON::BI__builtin_neon_vld4q_lane_v: {
4481     llvm::Type *Tys[] = {Ty, Int8PtrTy};
4482     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
4483     for (unsigned I = 2; I < Ops.size() - 1; ++I)
4484       Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
4485     Ops.push_back(getAlignmentValue32(PtrOp1));
4486     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
4487     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4488     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4489     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4490   }
4491   case NEON::BI__builtin_neon_vmovl_v: {
4492     llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
4493     Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
4494     if (Usgn)
4495       return Builder.CreateZExt(Ops[0], Ty, "vmovl");
4496     return Builder.CreateSExt(Ops[0], Ty, "vmovl");
4497   }
4498   case NEON::BI__builtin_neon_vmovn_v: {
4499     llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
4500     Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
4501     return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
4502   }
4503   case NEON::BI__builtin_neon_vmull_v:
4504     // FIXME: the integer vmull operations could be emitted in terms of pure
4505     // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
4506     // hoisting the exts outside loops. Until global ISel comes along that can
4507     // see through such movement this leads to bad CodeGen. So we need an
4508     // intrinsic for now.
4509     Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
4510     Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
4511     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
4512   case NEON::BI__builtin_neon_vpadal_v:
4513   case NEON::BI__builtin_neon_vpadalq_v: {
4514     // The source operand type has twice as many elements of half the size.
4515     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
4516     llvm::Type *EltTy =
4517       llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
4518     llvm::Type *NarrowTy =
4519       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
4520     llvm::Type *Tys[2] = { Ty, NarrowTy };
4521     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
4522   }
4523   case NEON::BI__builtin_neon_vpaddl_v:
4524   case NEON::BI__builtin_neon_vpaddlq_v: {
4525     // The source operand type has twice as many elements of half the size.
4526     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
4527     llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
4528     llvm::Type *NarrowTy =
4529       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
4530     llvm::Type *Tys[2] = { Ty, NarrowTy };
4531     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
4532   }
4533   case NEON::BI__builtin_neon_vqdmlal_v:
4534   case NEON::BI__builtin_neon_vqdmlsl_v: {
4535     SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
4536     Ops[1] =
4537         EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
4538     Ops.resize(2);
4539     return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
4540   }
4541   case NEON::BI__builtin_neon_vqshl_n_v:
4542   case NEON::BI__builtin_neon_vqshlq_n_v:
4543     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
4544                         1, false);
4545   case NEON::BI__builtin_neon_vqshlu_n_v:
4546   case NEON::BI__builtin_neon_vqshluq_n_v:
4547     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
4548                         1, false);
4549   case NEON::BI__builtin_neon_vrecpe_v:
4550   case NEON::BI__builtin_neon_vrecpeq_v:
4551   case NEON::BI__builtin_neon_vrsqrte_v:
4552   case NEON::BI__builtin_neon_vrsqrteq_v:
4553     Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
4554     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
4555
4556   case NEON::BI__builtin_neon_vrshr_n_v:
4557   case NEON::BI__builtin_neon_vrshrq_n_v:
4558     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
4559                         1, true);
4560   case NEON::BI__builtin_neon_vshl_n_v:
4561   case NEON::BI__builtin_neon_vshlq_n_v:
4562     Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
4563     return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
4564                              "vshl_n");
4565   case NEON::BI__builtin_neon_vshll_n_v: {
4566     llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
4567     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4568     if (Usgn)
4569       Ops[0] = Builder.CreateZExt(Ops[0], VTy);
4570     else
4571       Ops[0] = Builder.CreateSExt(Ops[0], VTy);
4572     Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
4573     return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
4574   }
4575   case NEON::BI__builtin_neon_vshrn_n_v: {
4576     llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
4577     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4578     Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
4579     if (Usgn)
4580       Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
4581     else
4582       Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
4583     return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
4584   }
4585   case NEON::BI__builtin_neon_vshr_n_v:
4586   case NEON::BI__builtin_neon_vshrq_n_v:
4587     return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
4588   case NEON::BI__builtin_neon_vst1_v:
4589   case NEON::BI__builtin_neon_vst1q_v:
4590   case NEON::BI__builtin_neon_vst2_v:
4591   case NEON::BI__builtin_neon_vst2q_v:
4592   case NEON::BI__builtin_neon_vst3_v:
4593   case NEON::BI__builtin_neon_vst3q_v:
4594   case NEON::BI__builtin_neon_vst4_v:
4595   case NEON::BI__builtin_neon_vst4q_v:
4596   case NEON::BI__builtin_neon_vst2_lane_v:
4597   case NEON::BI__builtin_neon_vst2q_lane_v:
4598   case NEON::BI__builtin_neon_vst3_lane_v:
4599   case NEON::BI__builtin_neon_vst3q_lane_v:
4600   case NEON::BI__builtin_neon_vst4_lane_v:
4601   case NEON::BI__builtin_neon_vst4q_lane_v: {
4602     llvm::Type *Tys[] = {Int8PtrTy, Ty};
4603     Ops.push_back(getAlignmentValue32(PtrOp0));
4604     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
4605   }
4606   case NEON::BI__builtin_neon_vsubhn_v: {
4607     llvm::VectorType *SrcTy =
4608         llvm::VectorType::getExtendedElementVectorType(VTy);
4609
4610     // %sum = add <4 x i32> %lhs, %rhs
4611     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4612     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
4613     Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
4614
4615     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
4616     Constant *ShiftAmt =
4617         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
4618     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
4619
4620     // %res = trunc <4 x i32> %high to <4 x i16>
4621     return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
4622   }
4623   case NEON::BI__builtin_neon_vtrn_v:
4624   case NEON::BI__builtin_neon_vtrnq_v: {
4625     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4626     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4627     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4628     Value *SV = nullptr;
4629
4630     for (unsigned vi = 0; vi != 2; ++vi) {
4631       SmallVector<uint32_t, 16> Indices;
4632       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
4633         Indices.push_back(i+vi);
4634         Indices.push_back(i+e+vi);
4635       }
4636       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4637       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
4638       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4639     }
4640     return SV;
4641   }
4642   case NEON::BI__builtin_neon_vtst_v:
4643   case NEON::BI__builtin_neon_vtstq_v: {
4644     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4645     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4646     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
4647     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
4648                                 ConstantAggregateZero::get(Ty));
4649     return Builder.CreateSExt(Ops[0], Ty, "vtst");
4650   }
4651   case NEON::BI__builtin_neon_vuzp_v:
4652   case NEON::BI__builtin_neon_vuzpq_v: {
4653     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4654     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4655     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4656     Value *SV = nullptr;
4657
4658     for (unsigned vi = 0; vi != 2; ++vi) {
4659       SmallVector<uint32_t, 16> Indices;
4660       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
4661         Indices.push_back(2*i+vi);
4662
4663       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4664       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
4665       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4666     }
4667     return SV;
4668   }
4669   case NEON::BI__builtin_neon_vzip_v:
4670   case NEON::BI__builtin_neon_vzipq_v: {
4671     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4672     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4673     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4674     Value *SV = nullptr;
4675
4676     for (unsigned vi = 0; vi != 2; ++vi) {
4677       SmallVector<uint32_t, 16> Indices;
4678       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
4679         Indices.push_back((i + vi*e) >> 1);
4680         Indices.push_back(((i + vi*e) >> 1)+e);
4681       }
4682       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4683       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
4684       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4685     }
4686     return SV;
4687   }
4688   }
4689
4690   assert(Int && "Expected valid intrinsic number");
4691
4692   // Determine the type(s) of this overloaded AArch64 intrinsic.
4693   Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
4694
4695   Value *Result = EmitNeonCall(F, Ops, NameHint);
4696   llvm::Type *ResultType = ConvertType(E->getType());
4697   // AArch64 intrinsic one-element vector type cast to
4698   // scalar type expected by the builtin
4699   return Builder.CreateBitCast(Result, ResultType, NameHint);
4700 }
4701
4702 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
4703     Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
4704     const CmpInst::Predicate Ip, const Twine &Name) {
4705   llvm::Type *OTy = Op->getType();
4706
4707   // FIXME: this is utterly horrific. We should not be looking at previous
4708   // codegen context to find out what needs doing. Unfortunately TableGen
4709   // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
4710   // (etc).
4711   if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
4712     OTy = BI->getOperand(0)->getType();
4713
4714   Op = Builder.CreateBitCast(Op, OTy);
4715   if (OTy->getScalarType()->isFloatingPointTy()) {
4716     Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
4717   } else {
4718     Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
4719   }
4720   return Builder.CreateSExt(Op, Ty, Name);
4721 }
4722
4723 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
4724                                  Value *ExtOp, Value *IndexOp,
4725                                  llvm::Type *ResTy, unsigned IntID,
4726                                  const char *Name) {
4727   SmallVector<Value *, 2> TblOps;
4728   if (ExtOp)
4729     TblOps.push_back(ExtOp);
4730
4731   // Build a vector containing sequential number like (0, 1, 2, ..., 15)
4732   SmallVector<uint32_t, 16> Indices;
4733   llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
4734   for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
4735     Indices.push_back(2*i);
4736     Indices.push_back(2*i+1);
4737   }
4738
4739   int PairPos = 0, End = Ops.size() - 1;
4740   while (PairPos < End) {
4741     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
4742                                                      Ops[PairPos+1], Indices,
4743                                                      Name));
4744     PairPos += 2;
4745   }
4746
4747   // If there's an odd number of 64-bit lookup table, fill the high 64-bit
4748   // of the 128-bit lookup table with zero.
4749   if (PairPos == End) {
4750     Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
4751     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
4752                                                      ZeroTbl, Indices, Name));
4753   }
4754
4755   Function *TblF;
4756   TblOps.push_back(IndexOp);
4757   TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
4758
4759   return CGF.EmitNeonCall(TblF, TblOps, Name);
4760 }
4761
4762 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
4763   unsigned Value;
4764   switch (BuiltinID) {
4765   default:
4766     return nullptr;
4767   case ARM::BI__builtin_arm_nop:
4768     Value = 0;
4769     break;
4770   case ARM::BI__builtin_arm_yield:
4771   case ARM::BI__yield:
4772     Value = 1;
4773     break;
4774   case ARM::BI__builtin_arm_wfe:
4775   case ARM::BI__wfe:
4776     Value = 2;
4777     break;
4778   case ARM::BI__builtin_arm_wfi:
4779   case ARM::BI__wfi:
4780     Value = 3;
4781     break;
4782   case ARM::BI__builtin_arm_sev:
4783   case ARM::BI__sev:
4784     Value = 4;
4785     break;
4786   case ARM::BI__builtin_arm_sevl:
4787   case ARM::BI__sevl:
4788     Value = 5;
4789     break;
4790   }
4791
4792   return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
4793                             llvm::ConstantInt::get(Int32Ty, Value));
4794 }
4795
4796 // Generates the IR for the read/write special register builtin,
4797 // ValueType is the type of the value that is to be written or read,
4798 // RegisterType is the type of the register being written to or read from.
4799 static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
4800                                          const CallExpr *E,
4801                                          llvm::Type *RegisterType,
4802                                          llvm::Type *ValueType,
4803                                          bool IsRead,
4804                                          StringRef SysReg = "") {
4805   // write and register intrinsics only support 32 and 64 bit operations.
4806   assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
4807           && "Unsupported size for register.");
4808
4809   CodeGen::CGBuilderTy &Builder = CGF.Builder;
4810   CodeGen::CodeGenModule &CGM = CGF.CGM;
4811   LLVMContext &Context = CGM.getLLVMContext();
4812
4813   if (SysReg.empty()) {
4814     const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
4815     SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
4816   }
4817
4818   llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
4819   llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4820   llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4821
4822   llvm::Type *Types[] = { RegisterType };
4823
4824   bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
4825   assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
4826             && "Can't fit 64-bit value in 32-bit register");
4827
4828   if (IsRead) {
4829     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
4830     llvm::Value *Call = Builder.CreateCall(F, Metadata);
4831
4832     if (MixedTypes)
4833       // Read into 64 bit register and then truncate result to 32 bit.
4834       return Builder.CreateTrunc(Call, ValueType);
4835
4836     if (ValueType->isPointerTy())
4837       // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
4838       return Builder.CreateIntToPtr(Call, ValueType);
4839
4840     return Call;
4841   }
4842
4843   llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
4844   llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
4845   if (MixedTypes) {
4846     // Extend 32 bit write value to 64 bit to pass to write.
4847     ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
4848     return Builder.CreateCall(F, { Metadata, ArgValue });
4849   }
4850
4851   if (ValueType->isPointerTy()) {
4852     // Have VoidPtrTy ArgValue but want to return an i32/i64.
4853     ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
4854     return Builder.CreateCall(F, { Metadata, ArgValue });
4855   }
4856
4857   return Builder.CreateCall(F, { Metadata, ArgValue });
4858 }
4859
4860 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
4861 /// argument that specifies the vector type.
4862 static bool HasExtraNeonArgument(unsigned BuiltinID) {
4863   switch (BuiltinID) {
4864   default: break;
4865   case NEON::BI__builtin_neon_vget_lane_i8:
4866   case NEON::BI__builtin_neon_vget_lane_i16:
4867   case NEON::BI__builtin_neon_vget_lane_i32:
4868   case NEON::BI__builtin_neon_vget_lane_i64:
4869   case NEON::BI__builtin_neon_vget_lane_f32:
4870   case NEON::BI__builtin_neon_vgetq_lane_i8:
4871   case NEON::BI__builtin_neon_vgetq_lane_i16:
4872   case NEON::BI__builtin_neon_vgetq_lane_i32:
4873   case NEON::BI__builtin_neon_vgetq_lane_i64:
4874   case NEON::BI__builtin_neon_vgetq_lane_f32:
4875   case NEON::BI__builtin_neon_vset_lane_i8:
4876   case NEON::BI__builtin_neon_vset_lane_i16:
4877   case NEON::BI__builtin_neon_vset_lane_i32:
4878   case NEON::BI__builtin_neon_vset_lane_i64:
4879   case NEON::BI__builtin_neon_vset_lane_f32:
4880   case NEON::BI__builtin_neon_vsetq_lane_i8:
4881   case NEON::BI__builtin_neon_vsetq_lane_i16:
4882   case NEON::BI__builtin_neon_vsetq_lane_i32:
4883   case NEON::BI__builtin_neon_vsetq_lane_i64:
4884   case NEON::BI__builtin_neon_vsetq_lane_f32:
4885   case NEON::BI__builtin_neon_vsha1h_u32:
4886   case NEON::BI__builtin_neon_vsha1cq_u32:
4887   case NEON::BI__builtin_neon_vsha1pq_u32:
4888   case NEON::BI__builtin_neon_vsha1mq_u32:
4889   case clang::ARM::BI_MoveToCoprocessor:
4890   case clang::ARM::BI_MoveToCoprocessor2:
4891     return false;
4892   }
4893   return true;
4894 }
4895
4896 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
4897                                            const CallExpr *E,
4898                                            llvm::Triple::ArchType Arch) {
4899   if (auto Hint = GetValueForARMHint(BuiltinID))
4900     return Hint;
4901
4902   if (BuiltinID == ARM::BI__emit) {
4903     bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
4904     llvm::FunctionType *FTy =
4905         llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
4906
4907     APSInt Value;
4908     if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext()))
4909       llvm_unreachable("Sema will ensure that the parameter is constant");
4910
4911     uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
4912
4913     llvm::InlineAsm *Emit =
4914         IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
4915                                  /*SideEffects=*/true)
4916                 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
4917                                  /*SideEffects=*/true);
4918
4919     return Builder.CreateCall(Emit);
4920   }
4921
4922   if (BuiltinID == ARM::BI__builtin_arm_dbg) {
4923     Value *Option = EmitScalarExpr(E->getArg(0));
4924     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
4925   }
4926
4927   if (BuiltinID == ARM::BI__builtin_arm_prefetch) {
4928     Value *Address = EmitScalarExpr(E->getArg(0));
4929     Value *RW      = EmitScalarExpr(E->getArg(1));
4930     Value *IsData  = EmitScalarExpr(E->getArg(2));
4931
4932     // Locality is not supported on ARM target
4933     Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
4934
4935     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
4936     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
4937   }
4938
4939   if (BuiltinID == ARM::BI__builtin_arm_rbit) {
4940     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4941     return Builder.CreateCall(
4942         CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
4943   }
4944
4945   if (BuiltinID == ARM::BI__clear_cache) {
4946     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
4947     const FunctionDecl *FD = E->getDirectCallee();
4948     Value *Ops[2];
4949     for (unsigned i = 0; i < 2; i++)
4950       Ops[i] = EmitScalarExpr(E->getArg(i));
4951     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
4952     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
4953     StringRef Name = FD->getName();
4954     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
4955   }
4956
4957   if (BuiltinID == ARM::BI__builtin_arm_mcrr ||
4958       BuiltinID == ARM::BI__builtin_arm_mcrr2) {
4959     Function *F;
4960
4961     switch (BuiltinID) {
4962     default: llvm_unreachable("unexpected builtin");
4963     case ARM::BI__builtin_arm_mcrr:
4964       F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
4965       break;
4966     case ARM::BI__builtin_arm_mcrr2:
4967       F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
4968       break;
4969     }
4970
4971     // MCRR{2} instruction has 5 operands but
4972     // the intrinsic has 4 because Rt and Rt2
4973     // are represented as a single unsigned 64
4974     // bit integer in the intrinsic definition
4975     // but internally it's represented as 2 32
4976     // bit integers.
4977
4978     Value *Coproc = EmitScalarExpr(E->getArg(0));
4979     Value *Opc1 = EmitScalarExpr(E->getArg(1));
4980     Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
4981     Value *CRm = EmitScalarExpr(E->getArg(3));
4982
4983     Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4984     Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
4985     Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
4986     Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
4987
4988     return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
4989   }
4990
4991   if (BuiltinID == ARM::BI__builtin_arm_mrrc ||
4992       BuiltinID == ARM::BI__builtin_arm_mrrc2) {
4993     Function *F;
4994
4995     switch (BuiltinID) {
4996     default: llvm_unreachable("unexpected builtin");
4997     case ARM::BI__builtin_arm_mrrc:
4998       F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
4999       break;
5000     case ARM::BI__builtin_arm_mrrc2:
5001       F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
5002       break;
5003     }
5004
5005     Value *Coproc = EmitScalarExpr(E->getArg(0));
5006     Value *Opc1 = EmitScalarExpr(E->getArg(1));
5007     Value *CRm  = EmitScalarExpr(E->getArg(2));
5008     Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
5009
5010     // Returns an unsigned 64 bit integer, represented
5011     // as two 32 bit integers.
5012
5013     Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
5014     Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
5015     Rt = Builder.CreateZExt(Rt, Int64Ty);
5016     Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
5017
5018     Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
5019     RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
5020     RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
5021
5022     return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
5023   }
5024
5025   if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
5026       ((BuiltinID == ARM::BI__builtin_arm_ldrex ||
5027         BuiltinID == ARM::BI__builtin_arm_ldaex) &&
5028        getContext().getTypeSize(E->getType()) == 64) ||
5029       BuiltinID == ARM::BI__ldrexd) {
5030     Function *F;
5031
5032     switch (BuiltinID) {
5033     default: llvm_unreachable("unexpected builtin");
5034     case ARM::BI__builtin_arm_ldaex:
5035       F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
5036       break;
5037     case ARM::BI__builtin_arm_ldrexd:
5038     case ARM::BI__builtin_arm_ldrex:
5039     case ARM::BI__ldrexd:
5040       F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
5041       break;
5042     }
5043
5044     Value *LdPtr = EmitScalarExpr(E->getArg(0));
5045     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
5046                                     "ldrexd");
5047
5048     Value *Val0 = Builder.CreateExtractValue(Val, 1);
5049     Value *Val1 = Builder.CreateExtractValue(Val, 0);
5050     Val0 = Builder.CreateZExt(Val0, Int64Ty);
5051     Val1 = Builder.CreateZExt(Val1, Int64Ty);
5052
5053     Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
5054     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
5055     Val = Builder.CreateOr(Val, Val1);
5056     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
5057   }
5058
5059   if (BuiltinID == ARM::BI__builtin_arm_ldrex ||
5060       BuiltinID == ARM::BI__builtin_arm_ldaex) {
5061     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
5062
5063     QualType Ty = E->getType();
5064     llvm::Type *RealResTy = ConvertType(Ty);
5065     llvm::Type *PtrTy = llvm::IntegerType::get(
5066         getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
5067     LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
5068
5069     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex
5070                                        ? Intrinsic::arm_ldaex
5071                                        : Intrinsic::arm_ldrex,
5072                                    PtrTy);
5073     Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
5074
5075     if (RealResTy->isPointerTy())
5076       return Builder.CreateIntToPtr(Val, RealResTy);
5077     else {
5078       llvm::Type *IntResTy = llvm::IntegerType::get(
5079           getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
5080       Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
5081       return Builder.CreateBitCast(Val, RealResTy);
5082     }
5083   }
5084
5085   if (BuiltinID == ARM::BI__builtin_arm_strexd ||
5086       ((BuiltinID == ARM::BI__builtin_arm_stlex ||
5087         BuiltinID == ARM::BI__builtin_arm_strex) &&
5088        getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
5089     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
5090                                        ? Intrinsic::arm_stlexd
5091                                        : Intrinsic::arm_strexd);
5092     llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
5093
5094     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
5095     Value *Val = EmitScalarExpr(E->getArg(0));
5096     Builder.CreateStore(Val, Tmp);
5097
5098     Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
5099     Val = Builder.CreateLoad(LdPtr);
5100
5101     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
5102     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
5103     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
5104     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
5105   }
5106
5107   if (BuiltinID == ARM::BI__builtin_arm_strex ||
5108       BuiltinID == ARM::BI__builtin_arm_stlex) {
5109     Value *StoreVal = EmitScalarExpr(E->getArg(0));
5110     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
5111
5112     QualType Ty = E->getArg(0)->getType();
5113     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
5114                                                  getContext().getTypeSize(Ty));
5115     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
5116
5117     if (StoreVal->getType()->isPointerTy())
5118       StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
5119     else {
5120       llvm::Type *IntTy = llvm::IntegerType::get(
5121           getLLVMContext(),
5122           CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
5123       StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
5124       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
5125     }
5126
5127     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
5128                                        ? Intrinsic::arm_stlex
5129                                        : Intrinsic::arm_strex,
5130                                    StoreAddr->getType());
5131     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
5132   }
5133
5134   switch (BuiltinID) {
5135   case ARM::BI__iso_volatile_load8:
5136   case ARM::BI__iso_volatile_load16:
5137   case ARM::BI__iso_volatile_load32:
5138   case ARM::BI__iso_volatile_load64: {
5139     Value *Ptr = EmitScalarExpr(E->getArg(0));
5140     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
5141     CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy);
5142     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
5143                                              LoadSize.getQuantity() * 8);
5144     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
5145     llvm::LoadInst *Load =
5146       Builder.CreateAlignedLoad(Ptr, LoadSize);
5147     Load->setVolatile(true);
5148     return Load;
5149   }
5150   case ARM::BI__iso_volatile_store8:
5151   case ARM::BI__iso_volatile_store16:
5152   case ARM::BI__iso_volatile_store32:
5153   case ARM::BI__iso_volatile_store64: {
5154     Value *Ptr = EmitScalarExpr(E->getArg(0));
5155     Value *Value = EmitScalarExpr(E->getArg(1));
5156     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
5157     CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
5158     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
5159                                              StoreSize.getQuantity() * 8);
5160     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
5161     llvm::StoreInst *Store =
5162       Builder.CreateAlignedStore(Value, Ptr,
5163                                  StoreSize);
5164     Store->setVolatile(true);
5165     return Store;
5166   }
5167   }
5168
5169   if (BuiltinID == ARM::BI__builtin_arm_clrex) {
5170     Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
5171     return Builder.CreateCall(F);
5172   }
5173
5174   // CRC32
5175   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
5176   switch (BuiltinID) {
5177   case ARM::BI__builtin_arm_crc32b:
5178     CRCIntrinsicID = Intrinsic::arm_crc32b; break;
5179   case ARM::BI__builtin_arm_crc32cb:
5180     CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
5181   case ARM::BI__builtin_arm_crc32h:
5182     CRCIntrinsicID = Intrinsic::arm_crc32h; break;
5183   case ARM::BI__builtin_arm_crc32ch:
5184     CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
5185   case ARM::BI__builtin_arm_crc32w:
5186   case ARM::BI__builtin_arm_crc32d:
5187     CRCIntrinsicID = Intrinsic::arm_crc32w; break;
5188   case ARM::BI__builtin_arm_crc32cw:
5189   case ARM::BI__builtin_arm_crc32cd:
5190     CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
5191   }
5192
5193   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
5194     Value *Arg0 = EmitScalarExpr(E->getArg(0));
5195     Value *Arg1 = EmitScalarExpr(E->getArg(1));
5196
5197     // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w
5198     // intrinsics, hence we need different codegen for these cases.
5199     if (BuiltinID == ARM::BI__builtin_arm_crc32d ||
5200         BuiltinID == ARM::BI__builtin_arm_crc32cd) {
5201       Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
5202       Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
5203       Value *Arg1b = Builder.CreateLShr(Arg1, C1);
5204       Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
5205
5206       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
5207       Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
5208       return Builder.CreateCall(F, {Res, Arg1b});
5209     } else {
5210       Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
5211
5212       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
5213       return Builder.CreateCall(F, {Arg0, Arg1});
5214     }
5215   }
5216
5217   if (BuiltinID == ARM::BI__builtin_arm_rsr ||
5218       BuiltinID == ARM::BI__builtin_arm_rsr64 ||
5219       BuiltinID == ARM::BI__builtin_arm_rsrp ||
5220       BuiltinID == ARM::BI__builtin_arm_wsr ||
5221       BuiltinID == ARM::BI__builtin_arm_wsr64 ||
5222       BuiltinID == ARM::BI__builtin_arm_wsrp) {
5223
5224     bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr ||
5225                   BuiltinID == ARM::BI__builtin_arm_rsr64 ||
5226                   BuiltinID == ARM::BI__builtin_arm_rsrp;
5227
5228     bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
5229                             BuiltinID == ARM::BI__builtin_arm_wsrp;
5230
5231     bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 ||
5232                    BuiltinID == ARM::BI__builtin_arm_wsr64;
5233
5234     llvm::Type *ValueType;
5235     llvm::Type *RegisterType;
5236     if (IsPointerBuiltin) {
5237       ValueType = VoidPtrTy;
5238       RegisterType = Int32Ty;
5239     } else if (Is64Bit) {
5240       ValueType = RegisterType = Int64Ty;
5241     } else {
5242       ValueType = RegisterType = Int32Ty;
5243     }
5244
5245     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
5246   }
5247
5248   // Find out if any arguments are required to be integer constant
5249   // expressions.
5250   unsigned ICEArguments = 0;
5251   ASTContext::GetBuiltinTypeError Error;
5252   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5253   assert(Error == ASTContext::GE_None && "Should not codegen an error");
5254
5255   auto getAlignmentValue32 = [&](Address addr) -> Value* {
5256     return Builder.getInt32(addr.getAlignment().getQuantity());
5257   };
5258
5259   Address PtrOp0 = Address::invalid();
5260   Address PtrOp1 = Address::invalid();
5261   SmallVector<Value*, 4> Ops;
5262   bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
5263   unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
5264   for (unsigned i = 0, e = NumArgs; i != e; i++) {
5265     if (i == 0) {
5266       switch (BuiltinID) {
5267       case NEON::BI__builtin_neon_vld1_v:
5268       case NEON::BI__builtin_neon_vld1q_v:
5269       case NEON::BI__builtin_neon_vld1q_lane_v:
5270       case NEON::BI__builtin_neon_vld1_lane_v:
5271       case NEON::BI__builtin_neon_vld1_dup_v:
5272       case NEON::BI__builtin_neon_vld1q_dup_v:
5273       case NEON::BI__builtin_neon_vst1_v:
5274       case NEON::BI__builtin_neon_vst1q_v:
5275       case NEON::BI__builtin_neon_vst1q_lane_v:
5276       case NEON::BI__builtin_neon_vst1_lane_v:
5277       case NEON::BI__builtin_neon_vst2_v:
5278       case NEON::BI__builtin_neon_vst2q_v:
5279       case NEON::BI__builtin_neon_vst2_lane_v:
5280       case NEON::BI__builtin_neon_vst2q_lane_v:
5281       case NEON::BI__builtin_neon_vst3_v:
5282       case NEON::BI__builtin_neon_vst3q_v:
5283       case NEON::BI__builtin_neon_vst3_lane_v:
5284       case NEON::BI__builtin_neon_vst3q_lane_v:
5285       case NEON::BI__builtin_neon_vst4_v:
5286       case NEON::BI__builtin_neon_vst4q_v:
5287       case NEON::BI__builtin_neon_vst4_lane_v:
5288       case NEON::BI__builtin_neon_vst4q_lane_v:
5289         // Get the alignment for the argument in addition to the value;
5290         // we'll use it later.
5291         PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
5292         Ops.push_back(PtrOp0.getPointer());
5293         continue;
5294       }
5295     }
5296     if (i == 1) {
5297       switch (BuiltinID) {
5298       case NEON::BI__builtin_neon_vld2_v:
5299       case NEON::BI__builtin_neon_vld2q_v:
5300       case NEON::BI__builtin_neon_vld3_v:
5301       case NEON::BI__builtin_neon_vld3q_v:
5302       case NEON::BI__builtin_neon_vld4_v:
5303       case NEON::BI__builtin_neon_vld4q_v:
5304       case NEON::BI__builtin_neon_vld2_lane_v:
5305       case NEON::BI__builtin_neon_vld2q_lane_v:
5306       case NEON::BI__builtin_neon_vld3_lane_v:
5307       case NEON::BI__builtin_neon_vld3q_lane_v:
5308       case NEON::BI__builtin_neon_vld4_lane_v:
5309       case NEON::BI__builtin_neon_vld4q_lane_v:
5310       case NEON::BI__builtin_neon_vld2_dup_v:
5311       case NEON::BI__builtin_neon_vld3_dup_v:
5312       case NEON::BI__builtin_neon_vld4_dup_v:
5313         // Get the alignment for the argument in addition to the value;
5314         // we'll use it later.
5315         PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
5316         Ops.push_back(PtrOp1.getPointer());
5317         continue;
5318       }
5319     }
5320
5321     if ((ICEArguments & (1 << i)) == 0) {
5322       Ops.push_back(EmitScalarExpr(E->getArg(i)));
5323     } else {
5324       // If this is required to be a constant, constant fold it so that we know
5325       // that the generated intrinsic gets a ConstantInt.
5326       llvm::APSInt Result;
5327       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
5328       assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
5329       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
5330     }
5331   }
5332
5333   switch (BuiltinID) {
5334   default: break;
5335
5336   case NEON::BI__builtin_neon_vget_lane_i8:
5337   case NEON::BI__builtin_neon_vget_lane_i16:
5338   case NEON::BI__builtin_neon_vget_lane_i32:
5339   case NEON::BI__builtin_neon_vget_lane_i64:
5340   case NEON::BI__builtin_neon_vget_lane_f32:
5341   case NEON::BI__builtin_neon_vgetq_lane_i8:
5342   case NEON::BI__builtin_neon_vgetq_lane_i16:
5343   case NEON::BI__builtin_neon_vgetq_lane_i32:
5344   case NEON::BI__builtin_neon_vgetq_lane_i64:
5345   case NEON::BI__builtin_neon_vgetq_lane_f32:
5346     return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5347
5348   case NEON::BI__builtin_neon_vset_lane_i8:
5349   case NEON::BI__builtin_neon_vset_lane_i16:
5350   case NEON::BI__builtin_neon_vset_lane_i32:
5351   case NEON::BI__builtin_neon_vset_lane_i64:
5352   case NEON::BI__builtin_neon_vset_lane_f32:
5353   case NEON::BI__builtin_neon_vsetq_lane_i8:
5354   case NEON::BI__builtin_neon_vsetq_lane_i16:
5355   case NEON::BI__builtin_neon_vsetq_lane_i32:
5356   case NEON::BI__builtin_neon_vsetq_lane_i64:
5357   case NEON::BI__builtin_neon_vsetq_lane_f32:
5358     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5359
5360   case NEON::BI__builtin_neon_vsha1h_u32:
5361     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
5362                         "vsha1h");
5363   case NEON::BI__builtin_neon_vsha1cq_u32:
5364     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
5365                         "vsha1h");
5366   case NEON::BI__builtin_neon_vsha1pq_u32:
5367     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
5368                         "vsha1h");
5369   case NEON::BI__builtin_neon_vsha1mq_u32:
5370     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
5371                         "vsha1h");
5372
5373   // The ARM _MoveToCoprocessor builtins put the input register value as
5374   // the first argument, but the LLVM intrinsic expects it as the third one.
5375   case ARM::BI_MoveToCoprocessor:
5376   case ARM::BI_MoveToCoprocessor2: {
5377     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
5378                                    Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
5379     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
5380                                   Ops[3], Ops[4], Ops[5]});
5381   }
5382   case ARM::BI_BitScanForward:
5383   case ARM::BI_BitScanForward64:
5384     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
5385   case ARM::BI_BitScanReverse:
5386   case ARM::BI_BitScanReverse64:
5387     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
5388
5389   case ARM::BI_InterlockedAnd64:
5390     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
5391   case ARM::BI_InterlockedExchange64:
5392     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
5393   case ARM::BI_InterlockedExchangeAdd64:
5394     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
5395   case ARM::BI_InterlockedExchangeSub64:
5396     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
5397   case ARM::BI_InterlockedOr64:
5398     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
5399   case ARM::BI_InterlockedXor64:
5400     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
5401   case ARM::BI_InterlockedDecrement64:
5402     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
5403   case ARM::BI_InterlockedIncrement64:
5404     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
5405   }
5406
5407   // Get the last argument, which specifies the vector type.
5408   assert(HasExtraArg);
5409   llvm::APSInt Result;
5410   const Expr *Arg = E->getArg(E->getNumArgs()-1);
5411   if (!Arg->isIntegerConstantExpr(Result, getContext()))
5412     return nullptr;
5413
5414   if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
5415       BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
5416     // Determine the overloaded type of this builtin.
5417     llvm::Type *Ty;
5418     if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
5419       Ty = FloatTy;
5420     else
5421       Ty = DoubleTy;
5422
5423     // Determine whether this is an unsigned conversion or not.
5424     bool usgn = Result.getZExtValue() == 1;
5425     unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
5426
5427     // Call the appropriate intrinsic.
5428     Function *F = CGM.getIntrinsic(Int, Ty);
5429     return Builder.CreateCall(F, Ops, "vcvtr");
5430   }
5431
5432   // Determine the type of this overloaded NEON intrinsic.
5433   NeonTypeFlags Type(Result.getZExtValue());
5434   bool usgn = Type.isUnsigned();
5435   bool rightShift = false;
5436
5437   llvm::VectorType *VTy = GetNeonType(this, Type, Arch);
5438   llvm::Type *Ty = VTy;
5439   if (!Ty)
5440     return nullptr;
5441
5442   // Many NEON builtins have identical semantics and uses in ARM and
5443   // AArch64. Emit these in a single function.
5444   auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap);
5445   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
5446       IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
5447   if (Builtin)
5448     return EmitCommonNeonBuiltinExpr(
5449         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
5450         Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
5451
5452   unsigned Int;
5453   switch (BuiltinID) {
5454   default: return nullptr;
5455   case NEON::BI__builtin_neon_vld1q_lane_v:
5456     // Handle 64-bit integer elements as a special case.  Use shuffles of
5457     // one-element vectors to avoid poor code for i64 in the backend.
5458     if (VTy->getElementType()->isIntegerTy(64)) {
5459       // Extract the other lane.
5460       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5461       uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
5462       Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
5463       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
5464       // Load the value as a one-element vector.
5465       Ty = llvm::VectorType::get(VTy->getElementType(), 1);
5466       llvm::Type *Tys[] = {Ty, Int8PtrTy};
5467       Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
5468       Value *Align = getAlignmentValue32(PtrOp0);
5469       Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
5470       // Combine them.
5471       uint32_t Indices[] = {1 - Lane, Lane};
5472       SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
5473       return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
5474     }
5475     LLVM_FALLTHROUGH;
5476   case NEON::BI__builtin_neon_vld1_lane_v: {
5477     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5478     PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
5479     Value *Ld = Builder.CreateLoad(PtrOp0);
5480     return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
5481   }
5482   case NEON::BI__builtin_neon_vld2_dup_v:
5483   case NEON::BI__builtin_neon_vld3_dup_v:
5484   case NEON::BI__builtin_neon_vld4_dup_v: {
5485     // Handle 64-bit elements as a special-case.  There is no "dup" needed.
5486     if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
5487       switch (BuiltinID) {
5488       case NEON::BI__builtin_neon_vld2_dup_v:
5489         Int = Intrinsic::arm_neon_vld2;
5490         break;
5491       case NEON::BI__builtin_neon_vld3_dup_v:
5492         Int = Intrinsic::arm_neon_vld3;
5493         break;
5494       case NEON::BI__builtin_neon_vld4_dup_v:
5495         Int = Intrinsic::arm_neon_vld4;
5496         break;
5497       default: llvm_unreachable("unknown vld_dup intrinsic?");
5498       }
5499       llvm::Type *Tys[] = {Ty, Int8PtrTy};
5500       Function *F = CGM.getIntrinsic(Int, Tys);
5501       llvm::Value *Align = getAlignmentValue32(PtrOp1);
5502       Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup");
5503       Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5504       Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5505       return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5506     }
5507     switch (BuiltinID) {
5508     case NEON::BI__builtin_neon_vld2_dup_v:
5509       Int = Intrinsic::arm_neon_vld2lane;
5510       break;
5511     case NEON::BI__builtin_neon_vld3_dup_v:
5512       Int = Intrinsic::arm_neon_vld3lane;
5513       break;
5514     case NEON::BI__builtin_neon_vld4_dup_v:
5515       Int = Intrinsic::arm_neon_vld4lane;
5516       break;
5517     default: llvm_unreachable("unknown vld_dup intrinsic?");
5518     }
5519     llvm::Type *Tys[] = {Ty, Int8PtrTy};
5520     Function *F = CGM.getIntrinsic(Int, Tys);
5521     llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
5522
5523     SmallVector<Value*, 6> Args;
5524     Args.push_back(Ops[1]);
5525     Args.append(STy->getNumElements(), UndefValue::get(Ty));
5526
5527     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
5528     Args.push_back(CI);
5529     Args.push_back(getAlignmentValue32(PtrOp1));
5530
5531     Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
5532     // splat lane 0 to all elts in each vector of the result.
5533     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
5534       Value *Val = Builder.CreateExtractValue(Ops[1], i);
5535       Value *Elt = Builder.CreateBitCast(Val, Ty);
5536       Elt = EmitNeonSplat(Elt, CI);
5537       Elt = Builder.CreateBitCast(Elt, Val->getType());
5538       Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
5539     }
5540     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5541     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5542     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5543   }
5544   case NEON::BI__builtin_neon_vqrshrn_n_v:
5545     Int =
5546       usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
5547     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
5548                         1, true);
5549   case NEON::BI__builtin_neon_vqrshrun_n_v:
5550     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
5551                         Ops, "vqrshrun_n", 1, true);
5552   case NEON::BI__builtin_neon_vqshrn_n_v:
5553     Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
5554     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
5555                         1, true);
5556   case NEON::BI__builtin_neon_vqshrun_n_v:
5557     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
5558                         Ops, "vqshrun_n", 1, true);
5559   case NEON::BI__builtin_neon_vrecpe_v:
5560   case NEON::BI__builtin_neon_vrecpeq_v:
5561     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
5562                         Ops, "vrecpe");
5563   case NEON::BI__builtin_neon_vrshrn_n_v:
5564     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
5565                         Ops, "vrshrn_n", 1, true);
5566   case NEON::BI__builtin_neon_vrsra_n_v:
5567   case NEON::BI__builtin_neon_vrsraq_n_v:
5568     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5569     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5570     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
5571     Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
5572     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
5573     return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
5574   case NEON::BI__builtin_neon_vsri_n_v:
5575   case NEON::BI__builtin_neon_vsriq_n_v:
5576     rightShift = true;
5577     LLVM_FALLTHROUGH;
5578   case NEON::BI__builtin_neon_vsli_n_v:
5579   case NEON::BI__builtin_neon_vsliq_n_v:
5580     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
5581     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
5582                         Ops, "vsli_n");
5583   case NEON::BI__builtin_neon_vsra_n_v:
5584   case NEON::BI__builtin_neon_vsraq_n_v:
5585     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5586     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
5587     return Builder.CreateAdd(Ops[0], Ops[1]);
5588   case NEON::BI__builtin_neon_vst1q_lane_v:
5589     // Handle 64-bit integer elements as a special case.  Use a shuffle to get
5590     // a one-element vector and avoid poor code for i64 in the backend.
5591     if (VTy->getElementType()->isIntegerTy(64)) {
5592       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5593       Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
5594       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
5595       Ops[2] = getAlignmentValue32(PtrOp0);
5596       llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
5597       return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
5598                                                  Tys), Ops);
5599     }
5600     LLVM_FALLTHROUGH;
5601   case NEON::BI__builtin_neon_vst1_lane_v: {
5602     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5603     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
5604     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5605     auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty));
5606     return St;
5607   }
5608   case NEON::BI__builtin_neon_vtbl1_v:
5609     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
5610                         Ops, "vtbl1");
5611   case NEON::BI__builtin_neon_vtbl2_v:
5612     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
5613                         Ops, "vtbl2");
5614   case NEON::BI__builtin_neon_vtbl3_v:
5615     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
5616                         Ops, "vtbl3");
5617   case NEON::BI__builtin_neon_vtbl4_v:
5618     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
5619                         Ops, "vtbl4");
5620   case NEON::BI__builtin_neon_vtbx1_v:
5621     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
5622                         Ops, "vtbx1");
5623   case NEON::BI__builtin_neon_vtbx2_v:
5624     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
5625                         Ops, "vtbx2");
5626   case NEON::BI__builtin_neon_vtbx3_v:
5627     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
5628                         Ops, "vtbx3");
5629   case NEON::BI__builtin_neon_vtbx4_v:
5630     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
5631                         Ops, "vtbx4");
5632   }
5633 }
5634
5635 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
5636                                       const CallExpr *E,
5637                                       SmallVectorImpl<Value *> &Ops,
5638                                       llvm::Triple::ArchType Arch) {
5639   unsigned int Int = 0;
5640   const char *s = nullptr;
5641
5642   switch (BuiltinID) {
5643   default:
5644     return nullptr;
5645   case NEON::BI__builtin_neon_vtbl1_v:
5646   case NEON::BI__builtin_neon_vqtbl1_v:
5647   case NEON::BI__builtin_neon_vqtbl1q_v:
5648   case NEON::BI__builtin_neon_vtbl2_v:
5649   case NEON::BI__builtin_neon_vqtbl2_v:
5650   case NEON::BI__builtin_neon_vqtbl2q_v:
5651   case NEON::BI__builtin_neon_vtbl3_v:
5652   case NEON::BI__builtin_neon_vqtbl3_v:
5653   case NEON::BI__builtin_neon_vqtbl3q_v:
5654   case NEON::BI__builtin_neon_vtbl4_v:
5655   case NEON::BI__builtin_neon_vqtbl4_v:
5656   case NEON::BI__builtin_neon_vqtbl4q_v:
5657     break;
5658   case NEON::BI__builtin_neon_vtbx1_v:
5659   case NEON::BI__builtin_neon_vqtbx1_v:
5660   case NEON::BI__builtin_neon_vqtbx1q_v:
5661   case NEON::BI__builtin_neon_vtbx2_v:
5662   case NEON::BI__builtin_neon_vqtbx2_v:
5663   case NEON::BI__builtin_neon_vqtbx2q_v:
5664   case NEON::BI__builtin_neon_vtbx3_v:
5665   case NEON::BI__builtin_neon_vqtbx3_v:
5666   case NEON::BI__builtin_neon_vqtbx3q_v:
5667   case NEON::BI__builtin_neon_vtbx4_v:
5668   case NEON::BI__builtin_neon_vqtbx4_v:
5669   case NEON::BI__builtin_neon_vqtbx4q_v:
5670     break;
5671   }
5672
5673   assert(E->getNumArgs() >= 3);
5674
5675   // Get the last argument, which specifies the vector type.
5676   llvm::APSInt Result;
5677   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
5678   if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
5679     return nullptr;
5680
5681   // Determine the type of this overloaded NEON intrinsic.
5682   NeonTypeFlags Type(Result.getZExtValue());
5683   llvm::VectorType *Ty = GetNeonType(&CGF, Type, Arch);
5684   if (!Ty)
5685     return nullptr;
5686
5687   CodeGen::CGBuilderTy &Builder = CGF.Builder;
5688
5689   // AArch64 scalar builtins are not overloaded, they do not have an extra
5690   // argument that specifies the vector type, need to handle each case.
5691   switch (BuiltinID) {
5692   case NEON::BI__builtin_neon_vtbl1_v: {
5693     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr,
5694                               Ops[1], Ty, Intrinsic::aarch64_neon_tbl1,
5695                               "vtbl1");
5696   }
5697   case NEON::BI__builtin_neon_vtbl2_v: {
5698     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr,
5699                               Ops[2], Ty, Intrinsic::aarch64_neon_tbl1,
5700                               "vtbl1");
5701   }
5702   case NEON::BI__builtin_neon_vtbl3_v: {
5703     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr,
5704                               Ops[3], Ty, Intrinsic::aarch64_neon_tbl2,
5705                               "vtbl2");
5706   }
5707   case NEON::BI__builtin_neon_vtbl4_v: {
5708     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr,
5709                               Ops[4], Ty, Intrinsic::aarch64_neon_tbl2,
5710                               "vtbl2");
5711   }
5712   case NEON::BI__builtin_neon_vtbx1_v: {
5713     Value *TblRes =
5714         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2],
5715                            Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
5716
5717     llvm::Constant *EightV = ConstantInt::get(Ty, 8);
5718     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
5719     CmpRes = Builder.CreateSExt(CmpRes, Ty);
5720
5721     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
5722     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
5723     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
5724   }
5725   case NEON::BI__builtin_neon_vtbx2_v: {
5726     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0],
5727                               Ops[3], Ty, Intrinsic::aarch64_neon_tbx1,
5728                               "vtbx1");
5729   }
5730   case NEON::BI__builtin_neon_vtbx3_v: {
5731     Value *TblRes =
5732         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4],
5733                            Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
5734
5735     llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
5736     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
5737                                            TwentyFourV);
5738     CmpRes = Builder.CreateSExt(CmpRes, Ty);
5739
5740     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
5741     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
5742     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
5743   }
5744   case NEON::BI__builtin_neon_vtbx4_v: {
5745     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0],
5746                               Ops[5], Ty, Intrinsic::aarch64_neon_tbx2,
5747                               "vtbx2");
5748   }
5749   case NEON::BI__builtin_neon_vqtbl1_v:
5750   case NEON::BI__builtin_neon_vqtbl1q_v:
5751     Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
5752   case NEON::BI__builtin_neon_vqtbl2_v:
5753   case NEON::BI__builtin_neon_vqtbl2q_v: {
5754     Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
5755   case NEON::BI__builtin_neon_vqtbl3_v:
5756   case NEON::BI__builtin_neon_vqtbl3q_v:
5757     Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
5758   case NEON::BI__builtin_neon_vqtbl4_v:
5759   case NEON::BI__builtin_neon_vqtbl4q_v:
5760     Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
5761   case NEON::BI__builtin_neon_vqtbx1_v:
5762   case NEON::BI__builtin_neon_vqtbx1q_v:
5763     Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
5764   case NEON::BI__builtin_neon_vqtbx2_v:
5765   case NEON::BI__builtin_neon_vqtbx2q_v:
5766     Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
5767   case NEON::BI__builtin_neon_vqtbx3_v:
5768   case NEON::BI__builtin_neon_vqtbx3q_v:
5769     Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
5770   case NEON::BI__builtin_neon_vqtbx4_v:
5771   case NEON::BI__builtin_neon_vqtbx4q_v:
5772     Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
5773   }
5774   }
5775
5776   if (!Int)
5777     return nullptr;
5778
5779   Function *F = CGF.CGM.getIntrinsic(Int, Ty);
5780   return CGF.EmitNeonCall(F, Ops, s);
5781 }
5782
5783 Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
5784   llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4);
5785   Op = Builder.CreateBitCast(Op, Int16Ty);
5786   Value *V = UndefValue::get(VTy);
5787   llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
5788   Op = Builder.CreateInsertElement(V, Op, CI);
5789   return Op;
5790 }
5791
5792 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
5793                                                const CallExpr *E,
5794                                                llvm::Triple::ArchType Arch) {
5795   unsigned HintID = static_cast<unsigned>(-1);
5796   switch (BuiltinID) {
5797   default: break;
5798   case AArch64::BI__builtin_arm_nop:
5799     HintID = 0;
5800     break;
5801   case AArch64::BI__builtin_arm_yield:
5802     HintID = 1;
5803     break;
5804   case AArch64::BI__builtin_arm_wfe:
5805     HintID = 2;
5806     break;
5807   case AArch64::BI__builtin_arm_wfi:
5808     HintID = 3;
5809     break;
5810   case AArch64::BI__builtin_arm_sev:
5811     HintID = 4;
5812     break;
5813   case AArch64::BI__builtin_arm_sevl:
5814     HintID = 5;
5815     break;
5816   }
5817
5818   if (HintID != static_cast<unsigned>(-1)) {
5819     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
5820     return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
5821   }
5822
5823   if (BuiltinID == AArch64::BI__builtin_arm_prefetch) {
5824     Value *Address         = EmitScalarExpr(E->getArg(0));
5825     Value *RW              = EmitScalarExpr(E->getArg(1));
5826     Value *CacheLevel      = EmitScalarExpr(E->getArg(2));
5827     Value *RetentionPolicy = EmitScalarExpr(E->getArg(3));
5828     Value *IsData          = EmitScalarExpr(E->getArg(4));
5829
5830     Value *Locality = nullptr;
5831     if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) {
5832       // Temporal fetch, needs to convert cache level to locality.
5833       Locality = llvm::ConstantInt::get(Int32Ty,
5834         -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3);
5835     } else {
5836       // Streaming fetch.
5837       Locality = llvm::ConstantInt::get(Int32Ty, 0);
5838     }
5839
5840     // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify
5841     // PLDL3STRM or PLDL2STRM.
5842     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
5843     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
5844   }
5845
5846   if (BuiltinID == AArch64::BI__builtin_arm_rbit) {
5847     assert((getContext().getTypeSize(E->getType()) == 32) &&
5848            "rbit of unusual size!");
5849     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5850     return Builder.CreateCall(
5851         CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5852   }
5853   if (BuiltinID == AArch64::BI__builtin_arm_rbit64) {
5854     assert((getContext().getTypeSize(E->getType()) == 64) &&
5855            "rbit of unusual size!");
5856     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5857     return Builder.CreateCall(
5858         CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5859   }
5860
5861   if (BuiltinID == AArch64::BI__clear_cache) {
5862     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
5863     const FunctionDecl *FD = E->getDirectCallee();
5864     Value *Ops[2];
5865     for (unsigned i = 0; i < 2; i++)
5866       Ops[i] = EmitScalarExpr(E->getArg(i));
5867     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
5868     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
5869     StringRef Name = FD->getName();
5870     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
5871   }
5872
5873   if ((BuiltinID == AArch64::BI__builtin_arm_ldrex ||
5874       BuiltinID == AArch64::BI__builtin_arm_ldaex) &&
5875       getContext().getTypeSize(E->getType()) == 128) {
5876     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
5877                                        ? Intrinsic::aarch64_ldaxp
5878                                        : Intrinsic::aarch64_ldxp);
5879
5880     Value *LdPtr = EmitScalarExpr(E->getArg(0));
5881     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
5882                                     "ldxp");
5883
5884     Value *Val0 = Builder.CreateExtractValue(Val, 1);
5885     Value *Val1 = Builder.CreateExtractValue(Val, 0);
5886     llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
5887     Val0 = Builder.CreateZExt(Val0, Int128Ty);
5888     Val1 = Builder.CreateZExt(Val1, Int128Ty);
5889
5890     Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
5891     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
5892     Val = Builder.CreateOr(Val, Val1);
5893     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
5894   } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex ||
5895              BuiltinID == AArch64::BI__builtin_arm_ldaex) {
5896     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
5897
5898     QualType Ty = E->getType();
5899     llvm::Type *RealResTy = ConvertType(Ty);
5900     llvm::Type *PtrTy = llvm::IntegerType::get(
5901         getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
5902     LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
5903
5904     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
5905                                        ? Intrinsic::aarch64_ldaxr
5906                                        : Intrinsic::aarch64_ldxr,
5907                                    PtrTy);
5908     Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
5909
5910     if (RealResTy->isPointerTy())
5911       return Builder.CreateIntToPtr(Val, RealResTy);
5912
5913     llvm::Type *IntResTy = llvm::IntegerType::get(
5914         getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
5915     Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
5916     return Builder.CreateBitCast(Val, RealResTy);
5917   }
5918
5919   if ((BuiltinID == AArch64::BI__builtin_arm_strex ||
5920        BuiltinID == AArch64::BI__builtin_arm_stlex) &&
5921       getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
5922     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
5923                                        ? Intrinsic::aarch64_stlxp
5924                                        : Intrinsic::aarch64_stxp);
5925     llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
5926
5927     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
5928     EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
5929
5930     Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy));
5931     llvm::Value *Val = Builder.CreateLoad(Tmp);
5932
5933     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
5934     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
5935     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
5936                                          Int8PtrTy);
5937     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
5938   }
5939
5940   if (BuiltinID == AArch64::BI__builtin_arm_strex ||
5941       BuiltinID == AArch64::BI__builtin_arm_stlex) {
5942     Value *StoreVal = EmitScalarExpr(E->getArg(0));
5943     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
5944
5945     QualType Ty = E->getArg(0)->getType();
5946     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
5947                                                  getContext().getTypeSize(Ty));
5948     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
5949
5950     if (StoreVal->getType()->isPointerTy())
5951       StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
5952     else {
5953       llvm::Type *IntTy = llvm::IntegerType::get(
5954           getLLVMContext(),
5955           CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
5956       StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
5957       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
5958     }
5959
5960     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
5961                                        ? Intrinsic::aarch64_stlxr
5962                                        : Intrinsic::aarch64_stxr,
5963                                    StoreAddr->getType());
5964     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
5965   }
5966
5967   if (BuiltinID == AArch64::BI__builtin_arm_clrex) {
5968     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
5969     return Builder.CreateCall(F);
5970   }
5971
5972   // CRC32
5973   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
5974   switch (BuiltinID) {
5975   case AArch64::BI__builtin_arm_crc32b:
5976     CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
5977   case AArch64::BI__builtin_arm_crc32cb:
5978     CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
5979   case AArch64::BI__builtin_arm_crc32h:
5980     CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
5981   case AArch64::BI__builtin_arm_crc32ch:
5982     CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
5983   case AArch64::BI__builtin_arm_crc32w:
5984     CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
5985   case AArch64::BI__builtin_arm_crc32cw:
5986     CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
5987   case AArch64::BI__builtin_arm_crc32d:
5988     CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
5989   case AArch64::BI__builtin_arm_crc32cd:
5990     CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
5991   }
5992
5993   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
5994     Value *Arg0 = EmitScalarExpr(E->getArg(0));
5995     Value *Arg1 = EmitScalarExpr(E->getArg(1));
5996     Function *F = CGM.getIntrinsic(CRCIntrinsicID);
5997
5998     llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
5999     Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
6000
6001     return Builder.CreateCall(F, {Arg0, Arg1});
6002   }
6003
6004   if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
6005       BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
6006       BuiltinID == AArch64::BI__builtin_arm_rsrp ||
6007       BuiltinID == AArch64::BI__builtin_arm_wsr ||
6008       BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
6009       BuiltinID == AArch64::BI__builtin_arm_wsrp) {
6010
6011     bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr ||
6012                   BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
6013                   BuiltinID == AArch64::BI__builtin_arm_rsrp;
6014
6015     bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp ||
6016                             BuiltinID == AArch64::BI__builtin_arm_wsrp;
6017
6018     bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr &&
6019                    BuiltinID != AArch64::BI__builtin_arm_wsr;
6020
6021     llvm::Type *ValueType;
6022     llvm::Type *RegisterType = Int64Ty;
6023     if (IsPointerBuiltin) {
6024       ValueType = VoidPtrTy;
6025     } else if (Is64Bit) {
6026       ValueType = Int64Ty;
6027     } else {
6028       ValueType = Int32Ty;
6029     }
6030
6031     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
6032   }
6033
6034   // Find out if any arguments are required to be integer constant
6035   // expressions.
6036   unsigned ICEArguments = 0;
6037   ASTContext::GetBuiltinTypeError Error;
6038   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
6039   assert(Error == ASTContext::GE_None && "Should not codegen an error");
6040
6041   llvm::SmallVector<Value*, 4> Ops;
6042   for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
6043     if ((ICEArguments & (1 << i)) == 0) {
6044       Ops.push_back(EmitScalarExpr(E->getArg(i)));
6045     } else {
6046       // If this is required to be a constant, constant fold it so that we know
6047       // that the generated intrinsic gets a ConstantInt.
6048       llvm::APSInt Result;
6049       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
6050       assert(IsConst && "Constant arg isn't actually constant?");
6051       (void)IsConst;
6052       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
6053     }
6054   }
6055
6056   auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap);
6057   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
6058       SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
6059
6060   if (Builtin) {
6061     Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
6062     Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
6063     assert(Result && "SISD intrinsic should have been handled");
6064     return Result;
6065   }
6066
6067   llvm::APSInt Result;
6068   const Expr *Arg = E->getArg(E->getNumArgs()-1);
6069   NeonTypeFlags Type(0);
6070   if (Arg->isIntegerConstantExpr(Result, getContext()))
6071     // Determine the type of this overloaded NEON intrinsic.
6072     Type = NeonTypeFlags(Result.getZExtValue());
6073
6074   bool usgn = Type.isUnsigned();
6075   bool quad = Type.isQuad();
6076
6077   // Handle non-overloaded intrinsics first.
6078   switch (BuiltinID) {
6079   default: break;
6080   case NEON::BI__builtin_neon_vldrq_p128: {
6081     llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
6082     llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0);
6083     Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
6084     return Builder.CreateAlignedLoad(Int128Ty, Ptr,
6085                                      CharUnits::fromQuantity(16));
6086   }
6087   case NEON::BI__builtin_neon_vstrq_p128: {
6088     llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
6089     Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
6090     return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
6091   }
6092   case NEON::BI__builtin_neon_vcvts_u32_f32:
6093   case NEON::BI__builtin_neon_vcvtd_u64_f64:
6094     usgn = true;
6095     LLVM_FALLTHROUGH;
6096   case NEON::BI__builtin_neon_vcvts_s32_f32:
6097   case NEON::BI__builtin_neon_vcvtd_s64_f64: {
6098     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6099     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
6100     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
6101     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
6102     Ops[0] = Builder.CreateBitCast(Ops[0], FTy);
6103     if (usgn)
6104       return Builder.CreateFPToUI(Ops[0], InTy);
6105     return Builder.CreateFPToSI(Ops[0], InTy);
6106   }
6107   case NEON::BI__builtin_neon_vcvts_f32_u32:
6108   case NEON::BI__builtin_neon_vcvtd_f64_u64:
6109     usgn = true;
6110     LLVM_FALLTHROUGH;
6111   case NEON::BI__builtin_neon_vcvts_f32_s32:
6112   case NEON::BI__builtin_neon_vcvtd_f64_s64: {
6113     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6114     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
6115     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
6116     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
6117     Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
6118     if (usgn)
6119       return Builder.CreateUIToFP(Ops[0], FTy);
6120     return Builder.CreateSIToFP(Ops[0], FTy);
6121   }
6122   case NEON::BI__builtin_neon_vpaddd_s64: {
6123     llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
6124     Value *Vec = EmitScalarExpr(E->getArg(0));
6125     // The vector is v2f64, so make sure it's bitcast to that.
6126     Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
6127     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
6128     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
6129     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
6130     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
6131     // Pairwise addition of a v2f64 into a scalar f64.
6132     return Builder.CreateAdd(Op0, Op1, "vpaddd");
6133   }
6134   case NEON::BI__builtin_neon_vpaddd_f64: {
6135     llvm::Type *Ty =
6136       llvm::VectorType::get(DoubleTy, 2);
6137     Value *Vec = EmitScalarExpr(E->getArg(0));
6138     // The vector is v2f64, so make sure it's bitcast to that.
6139     Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
6140     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
6141     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
6142     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
6143     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
6144     // Pairwise addition of a v2f64 into a scalar f64.
6145     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
6146   }
6147   case NEON::BI__builtin_neon_vpadds_f32: {
6148     llvm::Type *Ty =
6149       llvm::VectorType::get(FloatTy, 2);
6150     Value *Vec = EmitScalarExpr(E->getArg(0));
6151     // The vector is v2f32, so make sure it's bitcast to that.
6152     Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
6153     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
6154     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
6155     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
6156     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
6157     // Pairwise addition of a v2f32 into a scalar f32.
6158     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
6159   }
6160   case NEON::BI__builtin_neon_vceqzd_s64:
6161   case NEON::BI__builtin_neon_vceqzd_f64:
6162   case NEON::BI__builtin_neon_vceqzs_f32:
6163     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6164     return EmitAArch64CompareBuiltinExpr(
6165         Ops[0], ConvertType(E->getCallReturnType(getContext())),
6166         ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
6167   case NEON::BI__builtin_neon_vcgezd_s64:
6168   case NEON::BI__builtin_neon_vcgezd_f64:
6169   case NEON::BI__builtin_neon_vcgezs_f32:
6170     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6171     return EmitAArch64CompareBuiltinExpr(
6172         Ops[0], ConvertType(E->getCallReturnType(getContext())),
6173         ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
6174   case NEON::BI__builtin_neon_vclezd_s64:
6175   case NEON::BI__builtin_neon_vclezd_f64:
6176   case NEON::BI__builtin_neon_vclezs_f32:
6177     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6178     return EmitAArch64CompareBuiltinExpr(
6179         Ops[0], ConvertType(E->getCallReturnType(getContext())),
6180         ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
6181   case NEON::BI__builtin_neon_vcgtzd_s64:
6182   case NEON::BI__builtin_neon_vcgtzd_f64:
6183   case NEON::BI__builtin_neon_vcgtzs_f32:
6184     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6185     return EmitAArch64CompareBuiltinExpr(
6186         Ops[0], ConvertType(E->getCallReturnType(getContext())),
6187         ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
6188   case NEON::BI__builtin_neon_vcltzd_s64:
6189   case NEON::BI__builtin_neon_vcltzd_f64:
6190   case NEON::BI__builtin_neon_vcltzs_f32:
6191     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6192     return EmitAArch64CompareBuiltinExpr(
6193         Ops[0], ConvertType(E->getCallReturnType(getContext())),
6194         ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
6195
6196   case NEON::BI__builtin_neon_vceqzd_u64: {
6197     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6198     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
6199     Ops[0] =
6200         Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
6201     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
6202   }
6203   case NEON::BI__builtin_neon_vceqd_f64:
6204   case NEON::BI__builtin_neon_vcled_f64:
6205   case NEON::BI__builtin_neon_vcltd_f64:
6206   case NEON::BI__builtin_neon_vcged_f64:
6207   case NEON::BI__builtin_neon_vcgtd_f64: {
6208     llvm::CmpInst::Predicate P;
6209     switch (BuiltinID) {
6210     default: llvm_unreachable("missing builtin ID in switch!");
6211     case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
6212     case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
6213     case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
6214     case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
6215     case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
6216     }
6217     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6218     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6219     Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
6220     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
6221     return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
6222   }
6223   case NEON::BI__builtin_neon_vceqs_f32:
6224   case NEON::BI__builtin_neon_vcles_f32:
6225   case NEON::BI__builtin_neon_vclts_f32:
6226   case NEON::BI__builtin_neon_vcges_f32:
6227   case NEON::BI__builtin_neon_vcgts_f32: {
6228     llvm::CmpInst::Predicate P;
6229     switch (BuiltinID) {
6230     default: llvm_unreachable("missing builtin ID in switch!");
6231     case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
6232     case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
6233     case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
6234     case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
6235     case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
6236     }
6237     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6238     Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
6239     Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
6240     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
6241     return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
6242   }
6243   case NEON::BI__builtin_neon_vceqd_s64:
6244   case NEON::BI__builtin_neon_vceqd_u64:
6245   case NEON::BI__builtin_neon_vcgtd_s64:
6246   case NEON::BI__builtin_neon_vcgtd_u64:
6247   case NEON::BI__builtin_neon_vcltd_s64:
6248   case NEON::BI__builtin_neon_vcltd_u64:
6249   case NEON::BI__builtin_neon_vcged_u64:
6250   case NEON::BI__builtin_neon_vcged_s64:
6251   case NEON::BI__builtin_neon_vcled_u64:
6252   case NEON::BI__builtin_neon_vcled_s64: {
6253     llvm::CmpInst::Predicate P;
6254     switch (BuiltinID) {
6255     default: llvm_unreachable("missing builtin ID in switch!");
6256     case NEON::BI__builtin_neon_vceqd_s64:
6257     case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
6258     case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
6259     case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
6260     case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
6261     case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
6262     case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
6263     case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
6264     case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
6265     case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
6266     }
6267     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6268     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
6269     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
6270     Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
6271     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
6272   }
6273   case NEON::BI__builtin_neon_vtstd_s64:
6274   case NEON::BI__builtin_neon_vtstd_u64: {
6275     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6276     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
6277     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
6278     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
6279     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
6280                                 llvm::Constant::getNullValue(Int64Ty));
6281     return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
6282   }
6283   case NEON::BI__builtin_neon_vset_lane_i8:
6284   case NEON::BI__builtin_neon_vset_lane_i16:
6285   case NEON::BI__builtin_neon_vset_lane_i32:
6286   case NEON::BI__builtin_neon_vset_lane_i64:
6287   case NEON::BI__builtin_neon_vset_lane_f32:
6288   case NEON::BI__builtin_neon_vsetq_lane_i8:
6289   case NEON::BI__builtin_neon_vsetq_lane_i16:
6290   case NEON::BI__builtin_neon_vsetq_lane_i32:
6291   case NEON::BI__builtin_neon_vsetq_lane_i64:
6292   case NEON::BI__builtin_neon_vsetq_lane_f32:
6293     Ops.push_back(EmitScalarExpr(E->getArg(2)));
6294     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
6295   case NEON::BI__builtin_neon_vset_lane_f64:
6296     // The vector type needs a cast for the v1f64 variant.
6297     Ops[1] = Builder.CreateBitCast(Ops[1],
6298                                    llvm::VectorType::get(DoubleTy, 1));
6299     Ops.push_back(EmitScalarExpr(E->getArg(2)));
6300     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
6301   case NEON::BI__builtin_neon_vsetq_lane_f64:
6302     // The vector type needs a cast for the v2f64 variant.
6303     Ops[1] = Builder.CreateBitCast(Ops[1],
6304         llvm::VectorType::get(DoubleTy, 2));
6305     Ops.push_back(EmitScalarExpr(E->getArg(2)));
6306     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
6307
6308   case NEON::BI__builtin_neon_vget_lane_i8:
6309   case NEON::BI__builtin_neon_vdupb_lane_i8:
6310     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8));
6311     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6312                                         "vget_lane");
6313   case NEON::BI__builtin_neon_vgetq_lane_i8:
6314   case NEON::BI__builtin_neon_vdupb_laneq_i8:
6315     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16));
6316     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6317                                         "vgetq_lane");
6318   case NEON::BI__builtin_neon_vget_lane_i16:
6319   case NEON::BI__builtin_neon_vduph_lane_i16:
6320     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4));
6321     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6322                                         "vget_lane");
6323   case NEON::BI__builtin_neon_vgetq_lane_i16:
6324   case NEON::BI__builtin_neon_vduph_laneq_i16:
6325     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8));
6326     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6327                                         "vgetq_lane");
6328   case NEON::BI__builtin_neon_vget_lane_i32:
6329   case NEON::BI__builtin_neon_vdups_lane_i32:
6330     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2));
6331     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6332                                         "vget_lane");
6333   case NEON::BI__builtin_neon_vdups_lane_f32:
6334     Ops[0] = Builder.CreateBitCast(Ops[0],
6335         llvm::VectorType::get(FloatTy, 2));
6336     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6337                                         "vdups_lane");
6338   case NEON::BI__builtin_neon_vgetq_lane_i32:
6339   case NEON::BI__builtin_neon_vdups_laneq_i32:
6340     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
6341     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6342                                         "vgetq_lane");
6343   case NEON::BI__builtin_neon_vget_lane_i64:
6344   case NEON::BI__builtin_neon_vdupd_lane_i64:
6345     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1));
6346     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6347                                         "vget_lane");
6348   case NEON::BI__builtin_neon_vdupd_lane_f64:
6349     Ops[0] = Builder.CreateBitCast(Ops[0],
6350         llvm::VectorType::get(DoubleTy, 1));
6351     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6352                                         "vdupd_lane");
6353   case NEON::BI__builtin_neon_vgetq_lane_i64:
6354   case NEON::BI__builtin_neon_vdupd_laneq_i64:
6355     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
6356     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6357                                         "vgetq_lane");
6358   case NEON::BI__builtin_neon_vget_lane_f32:
6359     Ops[0] = Builder.CreateBitCast(Ops[0],
6360         llvm::VectorType::get(FloatTy, 2));
6361     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6362                                         "vget_lane");
6363   case NEON::BI__builtin_neon_vget_lane_f64:
6364     Ops[0] = Builder.CreateBitCast(Ops[0],
6365         llvm::VectorType::get(DoubleTy, 1));
6366     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6367                                         "vget_lane");
6368   case NEON::BI__builtin_neon_vgetq_lane_f32:
6369   case NEON::BI__builtin_neon_vdups_laneq_f32:
6370     Ops[0] = Builder.CreateBitCast(Ops[0],
6371         llvm::VectorType::get(FloatTy, 4));
6372     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6373                                         "vgetq_lane");
6374   case NEON::BI__builtin_neon_vgetq_lane_f64:
6375   case NEON::BI__builtin_neon_vdupd_laneq_f64:
6376     Ops[0] = Builder.CreateBitCast(Ops[0],
6377         llvm::VectorType::get(DoubleTy, 2));
6378     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6379                                         "vgetq_lane");
6380   case NEON::BI__builtin_neon_vaddd_s64:
6381   case NEON::BI__builtin_neon_vaddd_u64:
6382     return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
6383   case NEON::BI__builtin_neon_vsubd_s64:
6384   case NEON::BI__builtin_neon_vsubd_u64:
6385     return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
6386   case NEON::BI__builtin_neon_vqdmlalh_s16:
6387   case NEON::BI__builtin_neon_vqdmlslh_s16: {
6388     SmallVector<Value *, 2> ProductOps;
6389     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
6390     ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
6391     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
6392     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
6393                           ProductOps, "vqdmlXl");
6394     Constant *CI = ConstantInt::get(SizeTy, 0);
6395     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
6396
6397     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
6398                                         ? Intrinsic::aarch64_neon_sqadd
6399                                         : Intrinsic::aarch64_neon_sqsub;
6400     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
6401   }
6402   case NEON::BI__builtin_neon_vqshlud_n_s64: {
6403     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6404     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
6405     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
6406                         Ops, "vqshlu_n");
6407   }
6408   case NEON::BI__builtin_neon_vqshld_n_u64:
6409   case NEON::BI__builtin_neon_vqshld_n_s64: {
6410     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
6411                                    ? Intrinsic::aarch64_neon_uqshl
6412                                    : Intrinsic::aarch64_neon_sqshl;
6413     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6414     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
6415     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
6416   }
6417   case NEON::BI__builtin_neon_vrshrd_n_u64:
6418   case NEON::BI__builtin_neon_vrshrd_n_s64: {
6419     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
6420                                    ? Intrinsic::aarch64_neon_urshl
6421                                    : Intrinsic::aarch64_neon_srshl;
6422     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6423     int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
6424     Ops[1] = ConstantInt::get(Int64Ty, -SV);
6425     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
6426   }
6427   case NEON::BI__builtin_neon_vrsrad_n_u64:
6428   case NEON::BI__builtin_neon_vrsrad_n_s64: {
6429     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
6430                                    ? Intrinsic::aarch64_neon_urshl
6431                                    : Intrinsic::aarch64_neon_srshl;
6432     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
6433     Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
6434     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
6435                                 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
6436     return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
6437   }
6438   case NEON::BI__builtin_neon_vshld_n_s64:
6439   case NEON::BI__builtin_neon_vshld_n_u64: {
6440     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
6441     return Builder.CreateShl(
6442         Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
6443   }
6444   case NEON::BI__builtin_neon_vshrd_n_s64: {
6445     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
6446     return Builder.CreateAShr(
6447         Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
6448                                                    Amt->getZExtValue())),
6449         "shrd_n");
6450   }
6451   case NEON::BI__builtin_neon_vshrd_n_u64: {
6452     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
6453     uint64_t ShiftAmt = Amt->getZExtValue();
6454     // Right-shifting an unsigned value by its size yields 0.
6455     if (ShiftAmt == 64)
6456       return ConstantInt::get(Int64Ty, 0);
6457     return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
6458                               "shrd_n");
6459   }
6460   case NEON::BI__builtin_neon_vsrad_n_s64: {
6461     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
6462     Ops[1] = Builder.CreateAShr(
6463         Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
6464                                                    Amt->getZExtValue())),
6465         "shrd_n");
6466     return Builder.CreateAdd(Ops[0], Ops[1]);
6467   }
6468   case NEON::BI__builtin_neon_vsrad_n_u64: {
6469     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
6470     uint64_t ShiftAmt = Amt->getZExtValue();
6471     // Right-shifting an unsigned value by its size yields 0.
6472     // As Op + 0 = Op, return Ops[0] directly.
6473     if (ShiftAmt == 64)
6474       return Ops[0];
6475     Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
6476                                 "shrd_n");
6477     return Builder.CreateAdd(Ops[0], Ops[1]);
6478   }
6479   case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
6480   case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
6481   case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
6482   case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
6483     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
6484                                           "lane");
6485     SmallVector<Value *, 2> ProductOps;
6486     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
6487     ProductOps.push_back(vectorWrapScalar16(Ops[2]));
6488     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
6489     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
6490                           ProductOps, "vqdmlXl");
6491     Constant *CI = ConstantInt::get(SizeTy, 0);
6492     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
6493     Ops.pop_back();
6494
6495     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
6496                        BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
6497                           ? Intrinsic::aarch64_neon_sqadd
6498                           : Intrinsic::aarch64_neon_sqsub;
6499     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
6500   }
6501   case NEON::BI__builtin_neon_vqdmlals_s32:
6502   case NEON::BI__builtin_neon_vqdmlsls_s32: {
6503     SmallVector<Value *, 2> ProductOps;
6504     ProductOps.push_back(Ops[1]);
6505     ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
6506     Ops[1] =
6507         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6508                      ProductOps, "vqdmlXl");
6509
6510     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
6511                                         ? Intrinsic::aarch64_neon_sqadd
6512                                         : Intrinsic::aarch64_neon_sqsub;
6513     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
6514   }
6515   case NEON::BI__builtin_neon_vqdmlals_lane_s32:
6516   case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
6517   case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
6518   case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
6519     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
6520                                           "lane");
6521     SmallVector<Value *, 2> ProductOps;
6522     ProductOps.push_back(Ops[1]);
6523     ProductOps.push_back(Ops[2]);
6524     Ops[1] =
6525         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6526                      ProductOps, "vqdmlXl");
6527     Ops.pop_back();
6528
6529     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
6530                        BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
6531                           ? Intrinsic::aarch64_neon_sqadd
6532                           : Intrinsic::aarch64_neon_sqsub;
6533     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
6534   }
6535   }
6536
6537   llvm::VectorType *VTy = GetNeonType(this, Type, Arch);
6538   llvm::Type *Ty = VTy;
6539   if (!Ty)
6540     return nullptr;
6541
6542   // Not all intrinsics handled by the common case work for AArch64 yet, so only
6543   // defer to common code if it's been added to our special map.
6544   Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
6545                                    AArch64SIMDIntrinsicsProvenSorted);
6546
6547   if (Builtin)
6548     return EmitCommonNeonBuiltinExpr(
6549         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
6550         Builtin->NameHint, Builtin->TypeModifier, E, Ops,
6551         /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
6552
6553   if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
6554     return V;
6555
6556   unsigned Int;
6557   switch (BuiltinID) {
6558   default: return nullptr;
6559   case NEON::BI__builtin_neon_vbsl_v:
6560   case NEON::BI__builtin_neon_vbslq_v: {
6561     llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
6562     Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
6563     Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
6564     Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
6565
6566     Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
6567     Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
6568     Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
6569     return Builder.CreateBitCast(Ops[0], Ty);
6570   }
6571   case NEON::BI__builtin_neon_vfma_lane_v:
6572   case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
6573     // The ARM builtins (and instructions) have the addend as the first
6574     // operand, but the 'fma' intrinsics have it last. Swap it around here.
6575     Value *Addend = Ops[0];
6576     Value *Multiplicand = Ops[1];
6577     Value *LaneSource = Ops[2];
6578     Ops[0] = Multiplicand;
6579     Ops[1] = LaneSource;
6580     Ops[2] = Addend;
6581
6582     // Now adjust things to handle the lane access.
6583     llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ?
6584       llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) :
6585       VTy;
6586     llvm::Constant *cst = cast<Constant>(Ops[3]);
6587     Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst);
6588     Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
6589     Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
6590
6591     Ops.pop_back();
6592     Int = Intrinsic::fma;
6593     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
6594   }
6595   case NEON::BI__builtin_neon_vfma_laneq_v: {
6596     llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
6597     // v1f64 fma should be mapped to Neon scalar f64 fma
6598     if (VTy && VTy->getElementType() == DoubleTy) {
6599       Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6600       Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
6601       llvm::Type *VTy = GetNeonType(this,
6602         NeonTypeFlags(NeonTypeFlags::Float64, false, true), Arch);
6603       Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
6604       Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6605       Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
6606       Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
6607       return Builder.CreateBitCast(Result, Ty);
6608     }
6609     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6610     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6611     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6612
6613     llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
6614                                             VTy->getNumElements() * 2);
6615     Ops[2] = Builder.CreateBitCast(Ops[2], STy);
6616     Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
6617                                                cast<ConstantInt>(Ops[3]));
6618     Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
6619
6620     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
6621   }
6622   case NEON::BI__builtin_neon_vfmaq_laneq_v: {
6623     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6624     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6625     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6626
6627     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6628     Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
6629     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
6630   }
6631   case NEON::BI__builtin_neon_vfmah_lane_f16:
6632   case NEON::BI__builtin_neon_vfmas_lane_f32:
6633   case NEON::BI__builtin_neon_vfmah_laneq_f16:
6634   case NEON::BI__builtin_neon_vfmas_laneq_f32:
6635   case NEON::BI__builtin_neon_vfmad_lane_f64:
6636   case NEON::BI__builtin_neon_vfmad_laneq_f64: {
6637     Ops.push_back(EmitScalarExpr(E->getArg(3)));
6638     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
6639     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6640     Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6641     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
6642   }
6643   case NEON::BI__builtin_neon_vmull_v:
6644     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6645     Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
6646     if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
6647     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
6648   case NEON::BI__builtin_neon_vmax_v:
6649   case NEON::BI__builtin_neon_vmaxq_v:
6650     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6651     Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
6652     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
6653     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
6654   case NEON::BI__builtin_neon_vmin_v:
6655   case NEON::BI__builtin_neon_vminq_v:
6656     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6657     Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
6658     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
6659     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
6660   case NEON::BI__builtin_neon_vabd_v:
6661   case NEON::BI__builtin_neon_vabdq_v:
6662     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6663     Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
6664     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
6665     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
6666   case NEON::BI__builtin_neon_vpadal_v:
6667   case NEON::BI__builtin_neon_vpadalq_v: {
6668     unsigned ArgElts = VTy->getNumElements();
6669     llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
6670     unsigned BitWidth = EltTy->getBitWidth();
6671     llvm::Type *ArgTy = llvm::VectorType::get(
6672         llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts);
6673     llvm::Type* Tys[2] = { VTy, ArgTy };
6674     Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
6675     SmallVector<llvm::Value*, 1> TmpOps;
6676     TmpOps.push_back(Ops[1]);
6677     Function *F = CGM.getIntrinsic(Int, Tys);
6678     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
6679     llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
6680     return Builder.CreateAdd(tmp, addend);
6681   }
6682   case NEON::BI__builtin_neon_vpmin_v:
6683   case NEON::BI__builtin_neon_vpminq_v:
6684     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6685     Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
6686     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
6687     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
6688   case NEON::BI__builtin_neon_vpmax_v:
6689   case NEON::BI__builtin_neon_vpmaxq_v:
6690     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6691     Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
6692     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
6693     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
6694   case NEON::BI__builtin_neon_vminnm_v:
6695   case NEON::BI__builtin_neon_vminnmq_v:
6696     Int = Intrinsic::aarch64_neon_fminnm;
6697     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
6698   case NEON::BI__builtin_neon_vmaxnm_v:
6699   case NEON::BI__builtin_neon_vmaxnmq_v:
6700     Int = Intrinsic::aarch64_neon_fmaxnm;
6701     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
6702   case NEON::BI__builtin_neon_vrecpss_f32: {
6703     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6704     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
6705                         Ops, "vrecps");
6706   }
6707   case NEON::BI__builtin_neon_vrecpsd_f64: {
6708     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6709     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
6710                         Ops, "vrecps");
6711   }
6712   case NEON::BI__builtin_neon_vqshrun_n_v:
6713     Int = Intrinsic::aarch64_neon_sqshrun;
6714     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
6715   case NEON::BI__builtin_neon_vqrshrun_n_v:
6716     Int = Intrinsic::aarch64_neon_sqrshrun;
6717     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
6718   case NEON::BI__builtin_neon_vqshrn_n_v:
6719     Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
6720     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
6721   case NEON::BI__builtin_neon_vrshrn_n_v:
6722     Int = Intrinsic::aarch64_neon_rshrn;
6723     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
6724   case NEON::BI__builtin_neon_vqrshrn_n_v:
6725     Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
6726     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
6727   case NEON::BI__builtin_neon_vrnda_v:
6728   case NEON::BI__builtin_neon_vrndaq_v: {
6729     Int = Intrinsic::round;
6730     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
6731   }
6732   case NEON::BI__builtin_neon_vrndi_v:
6733   case NEON::BI__builtin_neon_vrndiq_v: {
6734     Int = Intrinsic::nearbyint;
6735     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi");
6736   }
6737   case NEON::BI__builtin_neon_vrndm_v:
6738   case NEON::BI__builtin_neon_vrndmq_v: {
6739     Int = Intrinsic::floor;
6740     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
6741   }
6742   case NEON::BI__builtin_neon_vrndn_v:
6743   case NEON::BI__builtin_neon_vrndnq_v: {
6744     Int = Intrinsic::aarch64_neon_frintn;
6745     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
6746   }
6747   case NEON::BI__builtin_neon_vrndp_v:
6748   case NEON::BI__builtin_neon_vrndpq_v: {
6749     Int = Intrinsic::ceil;
6750     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
6751   }
6752   case NEON::BI__builtin_neon_vrndx_v:
6753   case NEON::BI__builtin_neon_vrndxq_v: {
6754     Int = Intrinsic::rint;
6755     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
6756   }
6757   case NEON::BI__builtin_neon_vrnd_v:
6758   case NEON::BI__builtin_neon_vrndq_v: {
6759     Int = Intrinsic::trunc;
6760     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
6761   }
6762   case NEON::BI__builtin_neon_vceqz_v:
6763   case NEON::BI__builtin_neon_vceqzq_v:
6764     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
6765                                          ICmpInst::ICMP_EQ, "vceqz");
6766   case NEON::BI__builtin_neon_vcgez_v:
6767   case NEON::BI__builtin_neon_vcgezq_v:
6768     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
6769                                          ICmpInst::ICMP_SGE, "vcgez");
6770   case NEON::BI__builtin_neon_vclez_v:
6771   case NEON::BI__builtin_neon_vclezq_v:
6772     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
6773                                          ICmpInst::ICMP_SLE, "vclez");
6774   case NEON::BI__builtin_neon_vcgtz_v:
6775   case NEON::BI__builtin_neon_vcgtzq_v:
6776     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
6777                                          ICmpInst::ICMP_SGT, "vcgtz");
6778   case NEON::BI__builtin_neon_vcltz_v:
6779   case NEON::BI__builtin_neon_vcltzq_v:
6780     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
6781                                          ICmpInst::ICMP_SLT, "vcltz");
6782   case NEON::BI__builtin_neon_vcvt_f64_v:
6783   case NEON::BI__builtin_neon_vcvtq_f64_v:
6784     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6785     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad), Arch);
6786     return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
6787                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
6788   case NEON::BI__builtin_neon_vcvt_f64_f32: {
6789     assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
6790            "unexpected vcvt_f64_f32 builtin");
6791     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
6792     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag, Arch));
6793
6794     return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
6795   }
6796   case NEON::BI__builtin_neon_vcvt_f32_f64: {
6797     assert(Type.getEltType() == NeonTypeFlags::Float32 &&
6798            "unexpected vcvt_f32_f64 builtin");
6799     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
6800     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag, Arch));
6801
6802     return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
6803   }
6804   case NEON::BI__builtin_neon_vcvt_s32_v:
6805   case NEON::BI__builtin_neon_vcvt_u32_v:
6806   case NEON::BI__builtin_neon_vcvt_s64_v:
6807   case NEON::BI__builtin_neon_vcvt_u64_v:
6808         case NEON::BI__builtin_neon_vcvt_s16_v:
6809         case NEON::BI__builtin_neon_vcvt_u16_v:
6810   case NEON::BI__builtin_neon_vcvtq_s32_v:
6811   case NEON::BI__builtin_neon_vcvtq_u32_v:
6812   case NEON::BI__builtin_neon_vcvtq_s64_v:
6813   case NEON::BI__builtin_neon_vcvtq_u64_v:
6814         case NEON::BI__builtin_neon_vcvtq_s16_v:
6815         case NEON::BI__builtin_neon_vcvtq_u16_v: {
6816     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
6817     if (usgn)
6818       return Builder.CreateFPToUI(Ops[0], Ty);
6819     return Builder.CreateFPToSI(Ops[0], Ty);
6820   }
6821   case NEON::BI__builtin_neon_vcvta_s16_v:
6822   case NEON::BI__builtin_neon_vcvta_s32_v:
6823   case NEON::BI__builtin_neon_vcvtaq_s16_v:
6824   case NEON::BI__builtin_neon_vcvtaq_s32_v:
6825   case NEON::BI__builtin_neon_vcvta_u32_v:
6826   case NEON::BI__builtin_neon_vcvtaq_u16_v:
6827   case NEON::BI__builtin_neon_vcvtaq_u32_v:
6828   case NEON::BI__builtin_neon_vcvta_s64_v:
6829   case NEON::BI__builtin_neon_vcvtaq_s64_v:
6830   case NEON::BI__builtin_neon_vcvta_u64_v:
6831   case NEON::BI__builtin_neon_vcvtaq_u64_v: {
6832     Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
6833     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6834     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
6835   }
6836   case NEON::BI__builtin_neon_vcvtm_s16_v:
6837   case NEON::BI__builtin_neon_vcvtm_s32_v:
6838   case NEON::BI__builtin_neon_vcvtmq_s16_v:
6839   case NEON::BI__builtin_neon_vcvtmq_s32_v:
6840   case NEON::BI__builtin_neon_vcvtm_u16_v:
6841   case NEON::BI__builtin_neon_vcvtm_u32_v:
6842   case NEON::BI__builtin_neon_vcvtmq_u16_v:
6843   case NEON::BI__builtin_neon_vcvtmq_u32_v:
6844   case NEON::BI__builtin_neon_vcvtm_s64_v:
6845   case NEON::BI__builtin_neon_vcvtmq_s64_v:
6846   case NEON::BI__builtin_neon_vcvtm_u64_v:
6847   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
6848     Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
6849     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6850     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
6851   }
6852   case NEON::BI__builtin_neon_vcvtn_s16_v:
6853   case NEON::BI__builtin_neon_vcvtn_s32_v:
6854   case NEON::BI__builtin_neon_vcvtnq_s16_v:
6855   case NEON::BI__builtin_neon_vcvtnq_s32_v:
6856   case NEON::BI__builtin_neon_vcvtn_u16_v:
6857   case NEON::BI__builtin_neon_vcvtn_u32_v:
6858   case NEON::BI__builtin_neon_vcvtnq_u16_v:
6859   case NEON::BI__builtin_neon_vcvtnq_u32_v:
6860   case NEON::BI__builtin_neon_vcvtn_s64_v:
6861   case NEON::BI__builtin_neon_vcvtnq_s64_v:
6862   case NEON::BI__builtin_neon_vcvtn_u64_v:
6863   case NEON::BI__builtin_neon_vcvtnq_u64_v: {
6864     Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
6865     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6866     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
6867   }
6868   case NEON::BI__builtin_neon_vcvtp_s16_v:
6869   case NEON::BI__builtin_neon_vcvtp_s32_v:
6870   case NEON::BI__builtin_neon_vcvtpq_s16_v:
6871   case NEON::BI__builtin_neon_vcvtpq_s32_v:
6872   case NEON::BI__builtin_neon_vcvtp_u16_v:
6873   case NEON::BI__builtin_neon_vcvtp_u32_v:
6874   case NEON::BI__builtin_neon_vcvtpq_u16_v:
6875   case NEON::BI__builtin_neon_vcvtpq_u32_v:
6876   case NEON::BI__builtin_neon_vcvtp_s64_v:
6877   case NEON::BI__builtin_neon_vcvtpq_s64_v:
6878   case NEON::BI__builtin_neon_vcvtp_u64_v:
6879   case NEON::BI__builtin_neon_vcvtpq_u64_v: {
6880     Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
6881     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6882     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
6883   }
6884   case NEON::BI__builtin_neon_vmulx_v:
6885   case NEON::BI__builtin_neon_vmulxq_v: {
6886     Int = Intrinsic::aarch64_neon_fmulx;
6887     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
6888   }
6889   case NEON::BI__builtin_neon_vmul_lane_v:
6890   case NEON::BI__builtin_neon_vmul_laneq_v: {
6891     // v1f64 vmul_lane should be mapped to Neon scalar mul lane
6892     bool Quad = false;
6893     if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
6894       Quad = true;
6895     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6896     llvm::Type *VTy = GetNeonType(this,
6897       NeonTypeFlags(NeonTypeFlags::Float64, false, Quad), Arch);
6898     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6899     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
6900     Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
6901     return Builder.CreateBitCast(Result, Ty);
6902   }
6903   case NEON::BI__builtin_neon_vnegd_s64:
6904     return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
6905   case NEON::BI__builtin_neon_vpmaxnm_v:
6906   case NEON::BI__builtin_neon_vpmaxnmq_v: {
6907     Int = Intrinsic::aarch64_neon_fmaxnmp;
6908     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
6909   }
6910   case NEON::BI__builtin_neon_vpminnm_v:
6911   case NEON::BI__builtin_neon_vpminnmq_v: {
6912     Int = Intrinsic::aarch64_neon_fminnmp;
6913     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
6914   }
6915   case NEON::BI__builtin_neon_vsqrt_v:
6916   case NEON::BI__builtin_neon_vsqrtq_v: {
6917     Int = Intrinsic::sqrt;
6918     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6919     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
6920   }
6921   case NEON::BI__builtin_neon_vrbit_v:
6922   case NEON::BI__builtin_neon_vrbitq_v: {
6923     Int = Intrinsic::aarch64_neon_rbit;
6924     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
6925   }
6926   case NEON::BI__builtin_neon_vaddv_u8:
6927     // FIXME: These are handled by the AArch64 scalar code.
6928     usgn = true;
6929     LLVM_FALLTHROUGH;
6930   case NEON::BI__builtin_neon_vaddv_s8: {
6931     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6932     Ty = Int32Ty;
6933     VTy = llvm::VectorType::get(Int8Ty, 8);
6934     llvm::Type *Tys[2] = { Ty, VTy };
6935     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6936     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6937     return Builder.CreateTrunc(Ops[0], Int8Ty);
6938   }
6939   case NEON::BI__builtin_neon_vaddv_u16:
6940     usgn = true;
6941     LLVM_FALLTHROUGH;
6942   case NEON::BI__builtin_neon_vaddv_s16: {
6943     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6944     Ty = Int32Ty;
6945     VTy = llvm::VectorType::get(Int16Ty, 4);
6946     llvm::Type *Tys[2] = { Ty, VTy };
6947     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6948     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6949     return Builder.CreateTrunc(Ops[0], Int16Ty);
6950   }
6951   case NEON::BI__builtin_neon_vaddvq_u8:
6952     usgn = true;
6953     LLVM_FALLTHROUGH;
6954   case NEON::BI__builtin_neon_vaddvq_s8: {
6955     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6956     Ty = Int32Ty;
6957     VTy = llvm::VectorType::get(Int8Ty, 16);
6958     llvm::Type *Tys[2] = { Ty, VTy };
6959     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6960     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6961     return Builder.CreateTrunc(Ops[0], Int8Ty);
6962   }
6963   case NEON::BI__builtin_neon_vaddvq_u16:
6964     usgn = true;
6965     LLVM_FALLTHROUGH;
6966   case NEON::BI__builtin_neon_vaddvq_s16: {
6967     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6968     Ty = Int32Ty;
6969     VTy = llvm::VectorType::get(Int16Ty, 8);
6970     llvm::Type *Tys[2] = { Ty, VTy };
6971     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6972     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6973     return Builder.CreateTrunc(Ops[0], Int16Ty);
6974   }
6975   case NEON::BI__builtin_neon_vmaxv_u8: {
6976     Int = Intrinsic::aarch64_neon_umaxv;
6977     Ty = Int32Ty;
6978     VTy = llvm::VectorType::get(Int8Ty, 8);
6979     llvm::Type *Tys[2] = { Ty, VTy };
6980     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6981     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6982     return Builder.CreateTrunc(Ops[0], Int8Ty);
6983   }
6984   case NEON::BI__builtin_neon_vmaxv_u16: {
6985     Int = Intrinsic::aarch64_neon_umaxv;
6986     Ty = Int32Ty;
6987     VTy = llvm::VectorType::get(Int16Ty, 4);
6988     llvm::Type *Tys[2] = { Ty, VTy };
6989     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6990     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6991     return Builder.CreateTrunc(Ops[0], Int16Ty);
6992   }
6993   case NEON::BI__builtin_neon_vmaxvq_u8: {
6994     Int = Intrinsic::aarch64_neon_umaxv;
6995     Ty = Int32Ty;
6996     VTy = llvm::VectorType::get(Int8Ty, 16);
6997     llvm::Type *Tys[2] = { Ty, VTy };
6998     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6999     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7000     return Builder.CreateTrunc(Ops[0], Int8Ty);
7001   }
7002   case NEON::BI__builtin_neon_vmaxvq_u16: {
7003     Int = Intrinsic::aarch64_neon_umaxv;
7004     Ty = Int32Ty;
7005     VTy = llvm::VectorType::get(Int16Ty, 8);
7006     llvm::Type *Tys[2] = { Ty, VTy };
7007     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7008     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7009     return Builder.CreateTrunc(Ops[0], Int16Ty);
7010   }
7011   case NEON::BI__builtin_neon_vmaxv_s8: {
7012     Int = Intrinsic::aarch64_neon_smaxv;
7013     Ty = Int32Ty;
7014     VTy = llvm::VectorType::get(Int8Ty, 8);
7015     llvm::Type *Tys[2] = { Ty, VTy };
7016     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7017     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7018     return Builder.CreateTrunc(Ops[0], Int8Ty);
7019   }
7020   case NEON::BI__builtin_neon_vmaxv_s16: {
7021     Int = Intrinsic::aarch64_neon_smaxv;
7022     Ty = Int32Ty;
7023     VTy = llvm::VectorType::get(Int16Ty, 4);
7024     llvm::Type *Tys[2] = { Ty, VTy };
7025     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7026     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7027     return Builder.CreateTrunc(Ops[0], Int16Ty);
7028   }
7029   case NEON::BI__builtin_neon_vmaxvq_s8: {
7030     Int = Intrinsic::aarch64_neon_smaxv;
7031     Ty = Int32Ty;
7032     VTy = llvm::VectorType::get(Int8Ty, 16);
7033     llvm::Type *Tys[2] = { Ty, VTy };
7034     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7035     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7036     return Builder.CreateTrunc(Ops[0], Int8Ty);
7037   }
7038   case NEON::BI__builtin_neon_vmaxvq_s16: {
7039     Int = Intrinsic::aarch64_neon_smaxv;
7040     Ty = Int32Ty;
7041     VTy = llvm::VectorType::get(Int16Ty, 8);
7042     llvm::Type *Tys[2] = { Ty, VTy };
7043     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7044     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7045     return Builder.CreateTrunc(Ops[0], Int16Ty);
7046   }
7047   case NEON::BI__builtin_neon_vmaxv_f16: {
7048     Int = Intrinsic::aarch64_neon_fmaxv;
7049     Ty = HalfTy;
7050     VTy = llvm::VectorType::get(HalfTy, 4);
7051     llvm::Type *Tys[2] = { Ty, VTy };
7052     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7053     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7054     return Builder.CreateTrunc(Ops[0], HalfTy);
7055   }
7056   case NEON::BI__builtin_neon_vmaxvq_f16: {
7057     Int = Intrinsic::aarch64_neon_fmaxv;
7058     Ty = HalfTy;
7059     VTy = llvm::VectorType::get(HalfTy, 8);
7060     llvm::Type *Tys[2] = { Ty, VTy };
7061     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7062     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7063     return Builder.CreateTrunc(Ops[0], HalfTy);
7064   }
7065   case NEON::BI__builtin_neon_vminv_u8: {
7066     Int = Intrinsic::aarch64_neon_uminv;
7067     Ty = Int32Ty;
7068     VTy = llvm::VectorType::get(Int8Ty, 8);
7069     llvm::Type *Tys[2] = { Ty, VTy };
7070     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7071     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7072     return Builder.CreateTrunc(Ops[0], Int8Ty);
7073   }
7074   case NEON::BI__builtin_neon_vminv_u16: {
7075     Int = Intrinsic::aarch64_neon_uminv;
7076     Ty = Int32Ty;
7077     VTy = llvm::VectorType::get(Int16Ty, 4);
7078     llvm::Type *Tys[2] = { Ty, VTy };
7079     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7080     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7081     return Builder.CreateTrunc(Ops[0], Int16Ty);
7082   }
7083   case NEON::BI__builtin_neon_vminvq_u8: {
7084     Int = Intrinsic::aarch64_neon_uminv;
7085     Ty = Int32Ty;
7086     VTy = llvm::VectorType::get(Int8Ty, 16);
7087     llvm::Type *Tys[2] = { Ty, VTy };
7088     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7089     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7090     return Builder.CreateTrunc(Ops[0], Int8Ty);
7091   }
7092   case NEON::BI__builtin_neon_vminvq_u16: {
7093     Int = Intrinsic::aarch64_neon_uminv;
7094     Ty = Int32Ty;
7095     VTy = llvm::VectorType::get(Int16Ty, 8);
7096     llvm::Type *Tys[2] = { Ty, VTy };
7097     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7098     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7099     return Builder.CreateTrunc(Ops[0], Int16Ty);
7100   }
7101   case NEON::BI__builtin_neon_vminv_s8: {
7102     Int = Intrinsic::aarch64_neon_sminv;
7103     Ty = Int32Ty;
7104     VTy = llvm::VectorType::get(Int8Ty, 8);
7105     llvm::Type *Tys[2] = { Ty, VTy };
7106     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7107     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7108     return Builder.CreateTrunc(Ops[0], Int8Ty);
7109   }
7110   case NEON::BI__builtin_neon_vminv_s16: {
7111     Int = Intrinsic::aarch64_neon_sminv;
7112     Ty = Int32Ty;
7113     VTy = llvm::VectorType::get(Int16Ty, 4);
7114     llvm::Type *Tys[2] = { Ty, VTy };
7115     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7116     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7117     return Builder.CreateTrunc(Ops[0], Int16Ty);
7118   }
7119   case NEON::BI__builtin_neon_vminvq_s8: {
7120     Int = Intrinsic::aarch64_neon_sminv;
7121     Ty = Int32Ty;
7122     VTy = llvm::VectorType::get(Int8Ty, 16);
7123     llvm::Type *Tys[2] = { Ty, VTy };
7124     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7125     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7126     return Builder.CreateTrunc(Ops[0], Int8Ty);
7127   }
7128   case NEON::BI__builtin_neon_vminvq_s16: {
7129     Int = Intrinsic::aarch64_neon_sminv;
7130     Ty = Int32Ty;
7131     VTy = llvm::VectorType::get(Int16Ty, 8);
7132     llvm::Type *Tys[2] = { Ty, VTy };
7133     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7134     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7135     return Builder.CreateTrunc(Ops[0], Int16Ty);
7136   }
7137   case NEON::BI__builtin_neon_vminv_f16: {
7138     Int = Intrinsic::aarch64_neon_fminv;
7139     Ty = HalfTy;
7140     VTy = llvm::VectorType::get(HalfTy, 4);
7141     llvm::Type *Tys[2] = { Ty, VTy };
7142     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7143     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7144     return Builder.CreateTrunc(Ops[0], HalfTy);
7145   }
7146   case NEON::BI__builtin_neon_vminvq_f16: {
7147     Int = Intrinsic::aarch64_neon_fminv;
7148     Ty = HalfTy;
7149     VTy = llvm::VectorType::get(HalfTy, 8);
7150     llvm::Type *Tys[2] = { Ty, VTy };
7151     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7152     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7153     return Builder.CreateTrunc(Ops[0], HalfTy);
7154   }
7155   case NEON::BI__builtin_neon_vmaxnmv_f16: {
7156     Int = Intrinsic::aarch64_neon_fmaxnmv;
7157     Ty = HalfTy;
7158     VTy = llvm::VectorType::get(HalfTy, 4);
7159     llvm::Type *Tys[2] = { Ty, VTy };
7160     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7161     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
7162     return Builder.CreateTrunc(Ops[0], HalfTy);
7163   }
7164   case NEON::BI__builtin_neon_vmaxnmvq_f16: {
7165     Int = Intrinsic::aarch64_neon_fmaxnmv;
7166     Ty = HalfTy;
7167     VTy = llvm::VectorType::get(HalfTy, 8);
7168     llvm::Type *Tys[2] = { Ty, VTy };
7169     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7170     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
7171     return Builder.CreateTrunc(Ops[0], HalfTy);
7172   }
7173   case NEON::BI__builtin_neon_vminnmv_f16: {
7174     Int = Intrinsic::aarch64_neon_fminnmv;
7175     Ty = HalfTy;
7176     VTy = llvm::VectorType::get(HalfTy, 4);
7177     llvm::Type *Tys[2] = { Ty, VTy };
7178     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7179     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
7180     return Builder.CreateTrunc(Ops[0], HalfTy);
7181   }
7182   case NEON::BI__builtin_neon_vminnmvq_f16: {
7183     Int = Intrinsic::aarch64_neon_fminnmv;
7184     Ty = HalfTy;
7185     VTy = llvm::VectorType::get(HalfTy, 8);
7186     llvm::Type *Tys[2] = { Ty, VTy };
7187     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7188     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
7189     return Builder.CreateTrunc(Ops[0], HalfTy);
7190   }
7191   case NEON::BI__builtin_neon_vmul_n_f64: {
7192     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
7193     Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
7194     return Builder.CreateFMul(Ops[0], RHS);
7195   }
7196   case NEON::BI__builtin_neon_vaddlv_u8: {
7197     Int = Intrinsic::aarch64_neon_uaddlv;
7198     Ty = Int32Ty;
7199     VTy = llvm::VectorType::get(Int8Ty, 8);
7200     llvm::Type *Tys[2] = { Ty, VTy };
7201     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7202     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7203     return Builder.CreateTrunc(Ops[0], Int16Ty);
7204   }
7205   case NEON::BI__builtin_neon_vaddlv_u16: {
7206     Int = Intrinsic::aarch64_neon_uaddlv;
7207     Ty = Int32Ty;
7208     VTy = llvm::VectorType::get(Int16Ty, 4);
7209     llvm::Type *Tys[2] = { Ty, VTy };
7210     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7211     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7212   }
7213   case NEON::BI__builtin_neon_vaddlvq_u8: {
7214     Int = Intrinsic::aarch64_neon_uaddlv;
7215     Ty = Int32Ty;
7216     VTy = llvm::VectorType::get(Int8Ty, 16);
7217     llvm::Type *Tys[2] = { Ty, VTy };
7218     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7219     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7220     return Builder.CreateTrunc(Ops[0], Int16Ty);
7221   }
7222   case NEON::BI__builtin_neon_vaddlvq_u16: {
7223     Int = Intrinsic::aarch64_neon_uaddlv;
7224     Ty = Int32Ty;
7225     VTy = llvm::VectorType::get(Int16Ty, 8);
7226     llvm::Type *Tys[2] = { Ty, VTy };
7227     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7228     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7229   }
7230   case NEON::BI__builtin_neon_vaddlv_s8: {
7231     Int = Intrinsic::aarch64_neon_saddlv;
7232     Ty = Int32Ty;
7233     VTy = llvm::VectorType::get(Int8Ty, 8);
7234     llvm::Type *Tys[2] = { Ty, VTy };
7235     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7236     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7237     return Builder.CreateTrunc(Ops[0], Int16Ty);
7238   }
7239   case NEON::BI__builtin_neon_vaddlv_s16: {
7240     Int = Intrinsic::aarch64_neon_saddlv;
7241     Ty = Int32Ty;
7242     VTy = llvm::VectorType::get(Int16Ty, 4);
7243     llvm::Type *Tys[2] = { Ty, VTy };
7244     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7245     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7246   }
7247   case NEON::BI__builtin_neon_vaddlvq_s8: {
7248     Int = Intrinsic::aarch64_neon_saddlv;
7249     Ty = Int32Ty;
7250     VTy = llvm::VectorType::get(Int8Ty, 16);
7251     llvm::Type *Tys[2] = { Ty, VTy };
7252     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7253     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7254     return Builder.CreateTrunc(Ops[0], Int16Ty);
7255   }
7256   case NEON::BI__builtin_neon_vaddlvq_s16: {
7257     Int = Intrinsic::aarch64_neon_saddlv;
7258     Ty = Int32Ty;
7259     VTy = llvm::VectorType::get(Int16Ty, 8);
7260     llvm::Type *Tys[2] = { Ty, VTy };
7261     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7262     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7263   }
7264   case NEON::BI__builtin_neon_vsri_n_v:
7265   case NEON::BI__builtin_neon_vsriq_n_v: {
7266     Int = Intrinsic::aarch64_neon_vsri;
7267     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
7268     return EmitNeonCall(Intrin, Ops, "vsri_n");
7269   }
7270   case NEON::BI__builtin_neon_vsli_n_v:
7271   case NEON::BI__builtin_neon_vsliq_n_v: {
7272     Int = Intrinsic::aarch64_neon_vsli;
7273     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
7274     return EmitNeonCall(Intrin, Ops, "vsli_n");
7275   }
7276   case NEON::BI__builtin_neon_vsra_n_v:
7277   case NEON::BI__builtin_neon_vsraq_n_v:
7278     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7279     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
7280     return Builder.CreateAdd(Ops[0], Ops[1]);
7281   case NEON::BI__builtin_neon_vrsra_n_v:
7282   case NEON::BI__builtin_neon_vrsraq_n_v: {
7283     Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
7284     SmallVector<llvm::Value*,2> TmpOps;
7285     TmpOps.push_back(Ops[1]);
7286     TmpOps.push_back(Ops[2]);
7287     Function* F = CGM.getIntrinsic(Int, Ty);
7288     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
7289     Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
7290     return Builder.CreateAdd(Ops[0], tmp);
7291   }
7292     // FIXME: Sharing loads & stores with 32-bit is complicated by the absence
7293     // of an Align parameter here.
7294   case NEON::BI__builtin_neon_vld1_x2_v:
7295   case NEON::BI__builtin_neon_vld1q_x2_v:
7296   case NEON::BI__builtin_neon_vld1_x3_v:
7297   case NEON::BI__builtin_neon_vld1q_x3_v:
7298   case NEON::BI__builtin_neon_vld1_x4_v:
7299   case NEON::BI__builtin_neon_vld1q_x4_v: {
7300     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
7301     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
7302     llvm::Type *Tys[2] = { VTy, PTy };
7303     unsigned Int;
7304     switch (BuiltinID) {
7305     case NEON::BI__builtin_neon_vld1_x2_v:
7306     case NEON::BI__builtin_neon_vld1q_x2_v:
7307       Int = Intrinsic::aarch64_neon_ld1x2;
7308       break;
7309     case NEON::BI__builtin_neon_vld1_x3_v:
7310     case NEON::BI__builtin_neon_vld1q_x3_v:
7311       Int = Intrinsic::aarch64_neon_ld1x3;
7312       break;
7313     case NEON::BI__builtin_neon_vld1_x4_v:
7314     case NEON::BI__builtin_neon_vld1q_x4_v:
7315       Int = Intrinsic::aarch64_neon_ld1x4;
7316       break;
7317     }
7318     Function *F = CGM.getIntrinsic(Int, Tys);
7319     Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
7320     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
7321     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7322     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7323   }
7324   case NEON::BI__builtin_neon_vst1_x2_v:
7325   case NEON::BI__builtin_neon_vst1q_x2_v:
7326   case NEON::BI__builtin_neon_vst1_x3_v:
7327   case NEON::BI__builtin_neon_vst1q_x3_v:
7328   case NEON::BI__builtin_neon_vst1_x4_v:
7329   case NEON::BI__builtin_neon_vst1q_x4_v: {
7330     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
7331     llvm::Type *Tys[2] = { VTy, PTy };
7332     unsigned Int;
7333     switch (BuiltinID) {
7334     case NEON::BI__builtin_neon_vst1_x2_v:
7335     case NEON::BI__builtin_neon_vst1q_x2_v:
7336       Int = Intrinsic::aarch64_neon_st1x2;
7337       break;
7338     case NEON::BI__builtin_neon_vst1_x3_v:
7339     case NEON::BI__builtin_neon_vst1q_x3_v:
7340       Int = Intrinsic::aarch64_neon_st1x3;
7341       break;
7342     case NEON::BI__builtin_neon_vst1_x4_v:
7343     case NEON::BI__builtin_neon_vst1q_x4_v:
7344       Int = Intrinsic::aarch64_neon_st1x4;
7345       break;
7346     }
7347     std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7348     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
7349   }
7350   case NEON::BI__builtin_neon_vld1_v:
7351   case NEON::BI__builtin_neon_vld1q_v: {
7352     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
7353     auto Alignment = CharUnits::fromQuantity(
7354         BuiltinID == NEON::BI__builtin_neon_vld1_v ? 8 : 16);
7355     return Builder.CreateAlignedLoad(VTy, Ops[0], Alignment);
7356   }
7357   case NEON::BI__builtin_neon_vst1_v:
7358   case NEON::BI__builtin_neon_vst1q_v:
7359     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
7360     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
7361     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7362   case NEON::BI__builtin_neon_vld1_lane_v:
7363   case NEON::BI__builtin_neon_vld1q_lane_v: {
7364     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7365     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
7366     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7367     auto Alignment = CharUnits::fromQuantity(
7368         BuiltinID == NEON::BI__builtin_neon_vld1_lane_v ? 8 : 16);
7369     Ops[0] =
7370         Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
7371     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
7372   }
7373   case NEON::BI__builtin_neon_vld1_dup_v:
7374   case NEON::BI__builtin_neon_vld1q_dup_v: {
7375     Value *V = UndefValue::get(Ty);
7376     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
7377     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7378     auto Alignment = CharUnits::fromQuantity(
7379         BuiltinID == NEON::BI__builtin_neon_vld1_dup_v ? 8 : 16);
7380     Ops[0] =
7381         Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
7382     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
7383     Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
7384     return EmitNeonSplat(Ops[0], CI);
7385   }
7386   case NEON::BI__builtin_neon_vst1_lane_v:
7387   case NEON::BI__builtin_neon_vst1q_lane_v:
7388     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7389     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
7390     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
7391     return Builder.CreateDefaultAlignedStore(Ops[1],
7392                                              Builder.CreateBitCast(Ops[0], Ty));
7393   case NEON::BI__builtin_neon_vld2_v:
7394   case NEON::BI__builtin_neon_vld2q_v: {
7395     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
7396     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
7397     llvm::Type *Tys[2] = { VTy, PTy };
7398     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
7399     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
7400     Ops[0] = Builder.CreateBitCast(Ops[0],
7401                 llvm::PointerType::getUnqual(Ops[1]->getType()));
7402     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7403   }
7404   case NEON::BI__builtin_neon_vld3_v:
7405   case NEON::BI__builtin_neon_vld3q_v: {
7406     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
7407     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
7408     llvm::Type *Tys[2] = { VTy, PTy };
7409     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
7410     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
7411     Ops[0] = Builder.CreateBitCast(Ops[0],
7412                 llvm::PointerType::getUnqual(Ops[1]->getType()));
7413     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7414   }
7415   case NEON::BI__builtin_neon_vld4_v:
7416   case NEON::BI__builtin_neon_vld4q_v: {
7417     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
7418     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
7419     llvm::Type *Tys[2] = { VTy, PTy };
7420     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
7421     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
7422     Ops[0] = Builder.CreateBitCast(Ops[0],
7423                 llvm::PointerType::getUnqual(Ops[1]->getType()));
7424     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7425   }
7426   case NEON::BI__builtin_neon_vld2_dup_v:
7427   case NEON::BI__builtin_neon_vld2q_dup_v: {
7428     llvm::Type *PTy =
7429       llvm::PointerType::getUnqual(VTy->getElementType());
7430     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
7431     llvm::Type *Tys[2] = { VTy, PTy };
7432     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
7433     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
7434     Ops[0] = Builder.CreateBitCast(Ops[0],
7435                 llvm::PointerType::getUnqual(Ops[1]->getType()));
7436     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7437   }
7438   case NEON::BI__builtin_neon_vld3_dup_v:
7439   case NEON::BI__builtin_neon_vld3q_dup_v: {
7440     llvm::Type *PTy =
7441       llvm::PointerType::getUnqual(VTy->getElementType());
7442     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
7443     llvm::Type *Tys[2] = { VTy, PTy };
7444     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
7445     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
7446     Ops[0] = Builder.CreateBitCast(Ops[0],
7447                 llvm::PointerType::getUnqual(Ops[1]->getType()));
7448     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7449   }
7450   case NEON::BI__builtin_neon_vld4_dup_v:
7451   case NEON::BI__builtin_neon_vld4q_dup_v: {
7452     llvm::Type *PTy =
7453       llvm::PointerType::getUnqual(VTy->getElementType());
7454     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
7455     llvm::Type *Tys[2] = { VTy, PTy };
7456     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
7457     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
7458     Ops[0] = Builder.CreateBitCast(Ops[0],
7459                 llvm::PointerType::getUnqual(Ops[1]->getType()));
7460     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7461   }
7462   case NEON::BI__builtin_neon_vld2_lane_v:
7463   case NEON::BI__builtin_neon_vld2q_lane_v: {
7464     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7465     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
7466     Ops.push_back(Ops[1]);
7467     Ops.erase(Ops.begin()+1);
7468     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7469     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7470     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
7471     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
7472     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
7473     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7474     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7475   }
7476   case NEON::BI__builtin_neon_vld3_lane_v:
7477   case NEON::BI__builtin_neon_vld3q_lane_v: {
7478     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7479     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
7480     Ops.push_back(Ops[1]);
7481     Ops.erase(Ops.begin()+1);
7482     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7483     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7484     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
7485     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
7486     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
7487     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
7488     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7489     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7490   }
7491   case NEON::BI__builtin_neon_vld4_lane_v:
7492   case NEON::BI__builtin_neon_vld4q_lane_v: {
7493     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7494     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
7495     Ops.push_back(Ops[1]);
7496     Ops.erase(Ops.begin()+1);
7497     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7498     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7499     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
7500     Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
7501     Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
7502     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane");
7503     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
7504     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7505     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7506   }
7507   case NEON::BI__builtin_neon_vst2_v:
7508   case NEON::BI__builtin_neon_vst2q_v: {
7509     Ops.push_back(Ops[0]);
7510     Ops.erase(Ops.begin());
7511     llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
7512     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
7513                         Ops, "");
7514   }
7515   case NEON::BI__builtin_neon_vst2_lane_v:
7516   case NEON::BI__builtin_neon_vst2q_lane_v: {
7517     Ops.push_back(Ops[0]);
7518     Ops.erase(Ops.begin());
7519     Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
7520     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7521     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
7522                         Ops, "");
7523   }
7524   case NEON::BI__builtin_neon_vst3_v:
7525   case NEON::BI__builtin_neon_vst3q_v: {
7526     Ops.push_back(Ops[0]);
7527     Ops.erase(Ops.begin());
7528     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7529     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
7530                         Ops, "");
7531   }
7532   case NEON::BI__builtin_neon_vst3_lane_v:
7533   case NEON::BI__builtin_neon_vst3q_lane_v: {
7534     Ops.push_back(Ops[0]);
7535     Ops.erase(Ops.begin());
7536     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
7537     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7538     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
7539                         Ops, "");
7540   }
7541   case NEON::BI__builtin_neon_vst4_v:
7542   case NEON::BI__builtin_neon_vst4q_v: {
7543     Ops.push_back(Ops[0]);
7544     Ops.erase(Ops.begin());
7545     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7546     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
7547                         Ops, "");
7548   }
7549   case NEON::BI__builtin_neon_vst4_lane_v:
7550   case NEON::BI__builtin_neon_vst4q_lane_v: {
7551     Ops.push_back(Ops[0]);
7552     Ops.erase(Ops.begin());
7553     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
7554     llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
7555     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
7556                         Ops, "");
7557   }
7558   case NEON::BI__builtin_neon_vtrn_v:
7559   case NEON::BI__builtin_neon_vtrnq_v: {
7560     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
7561     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7562     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7563     Value *SV = nullptr;
7564
7565     for (unsigned vi = 0; vi != 2; ++vi) {
7566       SmallVector<uint32_t, 16> Indices;
7567       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7568         Indices.push_back(i+vi);
7569         Indices.push_back(i+e+vi);
7570       }
7571       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7572       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
7573       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7574     }
7575     return SV;
7576   }
7577   case NEON::BI__builtin_neon_vuzp_v:
7578   case NEON::BI__builtin_neon_vuzpq_v: {
7579     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
7580     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7581     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7582     Value *SV = nullptr;
7583
7584     for (unsigned vi = 0; vi != 2; ++vi) {
7585       SmallVector<uint32_t, 16> Indices;
7586       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
7587         Indices.push_back(2*i+vi);
7588
7589       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7590       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
7591       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7592     }
7593     return SV;
7594   }
7595   case NEON::BI__builtin_neon_vzip_v:
7596   case NEON::BI__builtin_neon_vzipq_v: {
7597     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
7598     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7599     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7600     Value *SV = nullptr;
7601
7602     for (unsigned vi = 0; vi != 2; ++vi) {
7603       SmallVector<uint32_t, 16> Indices;
7604       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7605         Indices.push_back((i + vi*e) >> 1);
7606         Indices.push_back(((i + vi*e) >> 1)+e);
7607       }
7608       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7609       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
7610       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7611     }
7612     return SV;
7613   }
7614   case NEON::BI__builtin_neon_vqtbl1q_v: {
7615     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
7616                         Ops, "vtbl1");
7617   }
7618   case NEON::BI__builtin_neon_vqtbl2q_v: {
7619     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
7620                         Ops, "vtbl2");
7621   }
7622   case NEON::BI__builtin_neon_vqtbl3q_v: {
7623     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
7624                         Ops, "vtbl3");
7625   }
7626   case NEON::BI__builtin_neon_vqtbl4q_v: {
7627     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
7628                         Ops, "vtbl4");
7629   }
7630   case NEON::BI__builtin_neon_vqtbx1q_v: {
7631     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
7632                         Ops, "vtbx1");
7633   }
7634   case NEON::BI__builtin_neon_vqtbx2q_v: {
7635     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
7636                         Ops, "vtbx2");
7637   }
7638   case NEON::BI__builtin_neon_vqtbx3q_v: {
7639     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
7640                         Ops, "vtbx3");
7641   }
7642   case NEON::BI__builtin_neon_vqtbx4q_v: {
7643     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
7644                         Ops, "vtbx4");
7645   }
7646   case NEON::BI__builtin_neon_vsqadd_v:
7647   case NEON::BI__builtin_neon_vsqaddq_v: {
7648     Int = Intrinsic::aarch64_neon_usqadd;
7649     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
7650   }
7651   case NEON::BI__builtin_neon_vuqadd_v:
7652   case NEON::BI__builtin_neon_vuqaddq_v: {
7653     Int = Intrinsic::aarch64_neon_suqadd;
7654     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
7655   }
7656   }
7657 }
7658
7659 llvm::Value *CodeGenFunction::
7660 BuildVector(ArrayRef<llvm::Value*> Ops) {
7661   assert((Ops.size() & (Ops.size() - 1)) == 0 &&
7662          "Not a power-of-two sized vector!");
7663   bool AllConstants = true;
7664   for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
7665     AllConstants &= isa<Constant>(Ops[i]);
7666
7667   // If this is a constant vector, create a ConstantVector.
7668   if (AllConstants) {
7669     SmallVector<llvm::Constant*, 16> CstOps;
7670     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
7671       CstOps.push_back(cast<Constant>(Ops[i]));
7672     return llvm::ConstantVector::get(CstOps);
7673   }
7674
7675   // Otherwise, insertelement the values to build the vector.
7676   Value *Result =
7677     llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
7678
7679   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
7680     Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
7681
7682   return Result;
7683 }
7684
7685 // Convert the mask from an integer type to a vector of i1.
7686 static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
7687                               unsigned NumElts) {
7688
7689   llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(),
7690                          cast<IntegerType>(Mask->getType())->getBitWidth());
7691   Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
7692
7693   // If we have less than 8 elements, then the starting mask was an i8 and
7694   // we need to extract down to the right number of elements.
7695   if (NumElts < 8) {
7696     uint32_t Indices[4];
7697     for (unsigned i = 0; i != NumElts; ++i)
7698       Indices[i] = i;
7699     MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec,
7700                                              makeArrayRef(Indices, NumElts),
7701                                              "extract");
7702   }
7703   return MaskVec;
7704 }
7705
7706 static Value *EmitX86MaskedStore(CodeGenFunction &CGF,
7707                                  SmallVectorImpl<Value *> &Ops,
7708                                  unsigned Align) {
7709   // Cast the pointer to right type.
7710   Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
7711                                llvm::PointerType::getUnqual(Ops[1]->getType()));
7712
7713   // If the mask is all ones just emit a regular store.
7714   if (const auto *C = dyn_cast<Constant>(Ops[2]))
7715     if (C->isAllOnesValue())
7716       return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align);
7717
7718   Value *MaskVec = getMaskVecValue(CGF, Ops[2],
7719                                    Ops[1]->getType()->getVectorNumElements());
7720
7721   return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec);
7722 }
7723
7724 static Value *EmitX86MaskedLoad(CodeGenFunction &CGF,
7725                                 SmallVectorImpl<Value *> &Ops, unsigned Align) {
7726   // Cast the pointer to right type.
7727   Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
7728                                llvm::PointerType::getUnqual(Ops[1]->getType()));
7729
7730   // If the mask is all ones just emit a regular store.
7731   if (const auto *C = dyn_cast<Constant>(Ops[2]))
7732     if (C->isAllOnesValue())
7733       return CGF.Builder.CreateAlignedLoad(Ops[0], Align);
7734
7735   Value *MaskVec = getMaskVecValue(CGF, Ops[2],
7736                                    Ops[1]->getType()->getVectorNumElements());
7737
7738   return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]);
7739 }
7740
7741 static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
7742                               unsigned NumElts, SmallVectorImpl<Value *> &Ops,
7743                               bool InvertLHS = false) {
7744   Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
7745   Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
7746
7747   if (InvertLHS)
7748     LHS = CGF.Builder.CreateNot(LHS);
7749
7750   return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),
7751                                   CGF.Builder.getIntNTy(std::max(NumElts, 8U)));
7752 }
7753
7754 static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF,
7755                                         SmallVectorImpl<Value *> &Ops,
7756                                         llvm::Type *DstTy,
7757                                         unsigned SrcSizeInBits,
7758                                         unsigned Align) {
7759   // Load the subvector.
7760   Ops[0] = CGF.Builder.CreateAlignedLoad(Ops[0], Align);
7761
7762   // Create broadcast mask.
7763   unsigned NumDstElts = DstTy->getVectorNumElements();
7764   unsigned NumSrcElts = SrcSizeInBits / DstTy->getScalarSizeInBits();
7765
7766   SmallVector<uint32_t, 8> Mask;
7767   for (unsigned i = 0; i != NumDstElts; i += NumSrcElts)
7768     for (unsigned j = 0; j != NumSrcElts; ++j)
7769       Mask.push_back(j);
7770
7771   return CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], Mask, "subvecbcst");
7772 }
7773
7774 static Value *EmitX86Select(CodeGenFunction &CGF,
7775                             Value *Mask, Value *Op0, Value *Op1) {
7776
7777   // If the mask is all ones just return first argument.
7778   if (const auto *C = dyn_cast<Constant>(Mask))
7779     if (C->isAllOnesValue())
7780       return Op0;
7781
7782   Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements());
7783
7784   return CGF.Builder.CreateSelect(Mask, Op0, Op1);
7785 }
7786
7787 static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
7788                                    bool Signed, SmallVectorImpl<Value *> &Ops) {
7789   unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
7790   Value *Cmp;
7791
7792   if (CC == 3) {
7793     Cmp = Constant::getNullValue(
7794                        llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
7795   } else if (CC == 7) {
7796     Cmp = Constant::getAllOnesValue(
7797                        llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
7798   } else {
7799     ICmpInst::Predicate Pred;
7800     switch (CC) {
7801     default: llvm_unreachable("Unknown condition code");
7802     case 0: Pred = ICmpInst::ICMP_EQ;  break;
7803     case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
7804     case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
7805     case 4: Pred = ICmpInst::ICMP_NE;  break;
7806     case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
7807     case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
7808     }
7809     Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
7810   }
7811
7812   const auto *C = dyn_cast<Constant>(Ops.back());
7813   if (!C || !C->isAllOnesValue())
7814     Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), NumElts));
7815
7816   if (NumElts < 8) {
7817     uint32_t Indices[8];
7818     for (unsigned i = 0; i != NumElts; ++i)
7819       Indices[i] = i;
7820     for (unsigned i = NumElts; i != 8; ++i)
7821       Indices[i] = i % NumElts + NumElts;
7822     Cmp = CGF.Builder.CreateShuffleVector(
7823         Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
7824   }
7825   return CGF.Builder.CreateBitCast(Cmp,
7826                                    IntegerType::get(CGF.getLLVMContext(),
7827                                                     std::max(NumElts, 8U)));
7828 }
7829
7830 static Value *EmitX86Abs(CodeGenFunction &CGF, ArrayRef<Value *> Ops) {
7831
7832   llvm::Type *Ty = Ops[0]->getType();
7833   Value *Zero = llvm::Constant::getNullValue(Ty);
7834   Value *Sub = CGF.Builder.CreateSub(Zero, Ops[0]);
7835   Value *Cmp = CGF.Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Zero);
7836   Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Sub);
7837   if (Ops.size() == 1)
7838     return Res;
7839   return EmitX86Select(CGF, Ops[2], Res, Ops[1]);
7840 }
7841
7842 static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred,
7843                             ArrayRef<Value *> Ops) {
7844   Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
7845   Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
7846
7847   if (Ops.size() == 2)
7848     return Res;
7849
7850   assert(Ops.size() == 4);
7851   return EmitX86Select(CGF, Ops[3], Res, Ops[2]);
7852 }
7853
7854 static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, 
7855                               llvm::Type *DstTy) {
7856   unsigned NumberOfElements = DstTy->getVectorNumElements();
7857   Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
7858   return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
7859 }
7860
7861 Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
7862   const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
7863   StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
7864   return EmitX86CpuIs(CPUStr);
7865 }
7866
7867 Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
7868
7869   llvm::Type *Int32Ty = Builder.getInt32Ty();
7870
7871   // Matching the struct layout from the compiler-rt/libgcc structure that is
7872   // filled in:
7873   // unsigned int __cpu_vendor;
7874   // unsigned int __cpu_type;
7875   // unsigned int __cpu_subtype;
7876   // unsigned int __cpu_features[1];
7877   llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
7878                                           llvm::ArrayType::get(Int32Ty, 1));
7879
7880   // Grab the global __cpu_model.
7881   llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
7882
7883   // Calculate the index needed to access the correct field based on the
7884   // range. Also adjust the expected value.
7885   unsigned Index;
7886   unsigned Value;
7887   std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
7888 #define X86_VENDOR(ENUM, STRING)                                               \
7889   .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)})
7890 #define X86_CPU_TYPE_COMPAT_WITH_ALIAS(ARCHNAME, ENUM, STR, ALIAS)             \
7891   .Cases(STR, ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
7892 #define X86_CPU_TYPE_COMPAT(ARCHNAME, ENUM, STR)                               \
7893   .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
7894 #define X86_CPU_SUBTYPE_COMPAT(ARCHNAME, ENUM, STR)                            \
7895   .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
7896 #include "llvm/Support/X86TargetParser.def"
7897                                .Default({0, 0});
7898   assert(Value != 0 && "Invalid CPUStr passed to CpuIs");
7899
7900   // Grab the appropriate field from __cpu_model.
7901   llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
7902                          ConstantInt::get(Int32Ty, Index)};
7903   llvm::Value *CpuValue = Builder.CreateGEP(STy, CpuModel, Idxs);
7904   CpuValue = Builder.CreateAlignedLoad(CpuValue, CharUnits::fromQuantity(4));
7905
7906   // Check the value of the field against the requested value.
7907   return Builder.CreateICmpEQ(CpuValue,
7908                                   llvm::ConstantInt::get(Int32Ty, Value));
7909 }
7910
7911 Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
7912   const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
7913   StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
7914   return EmitX86CpuSupports(FeatureStr);
7915 }
7916
7917 Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
7918   // Processor features and mapping to processor feature value.
7919
7920   uint32_t FeaturesMask = 0;
7921
7922   for (const StringRef &FeatureStr : FeatureStrs) {
7923     unsigned Feature =
7924         StringSwitch<unsigned>(FeatureStr)
7925 #define X86_FEATURE_COMPAT(VAL, ENUM, STR) .Case(STR, VAL)
7926 #include "llvm/Support/X86TargetParser.def"
7927         ;
7928     FeaturesMask |= (1U << Feature);
7929   }
7930
7931   // Matching the struct layout from the compiler-rt/libgcc structure that is
7932   // filled in:
7933   // unsigned int __cpu_vendor;
7934   // unsigned int __cpu_type;
7935   // unsigned int __cpu_subtype;
7936   // unsigned int __cpu_features[1];
7937   llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
7938                                           llvm::ArrayType::get(Int32Ty, 1));
7939
7940   // Grab the global __cpu_model.
7941   llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
7942
7943   // Grab the first (0th) element from the field __cpu_features off of the
7944   // global in the struct STy.
7945   Value *Idxs[] = {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 3),
7946                    ConstantInt::get(Int32Ty, 0)};
7947   Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
7948   Value *Features =
7949       Builder.CreateAlignedLoad(CpuFeatures, CharUnits::fromQuantity(4));
7950
7951   // Check the value of the bit corresponding to the feature requested.
7952   Value *Bitset = Builder.CreateAnd(
7953       Features, llvm::ConstantInt::get(Int32Ty, FeaturesMask));
7954   return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0));
7955 }
7956
7957 Value *CodeGenFunction::EmitX86CpuInit() {
7958   llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,
7959                                                     /*Variadic*/ false);
7960   llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init");
7961   return Builder.CreateCall(Func);
7962 }
7963
7964 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
7965                                            const CallExpr *E) {
7966   if (BuiltinID == X86::BI__builtin_cpu_is)
7967     return EmitX86CpuIs(E);
7968   if (BuiltinID == X86::BI__builtin_cpu_supports)
7969     return EmitX86CpuSupports(E);
7970   if (BuiltinID == X86::BI__builtin_cpu_init)
7971     return EmitX86CpuInit();
7972
7973   SmallVector<Value*, 4> Ops;
7974
7975   // Find out if any arguments are required to be integer constant expressions.
7976   unsigned ICEArguments = 0;
7977   ASTContext::GetBuiltinTypeError Error;
7978   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
7979   assert(Error == ASTContext::GE_None && "Should not codegen an error");
7980
7981   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
7982     // If this is a normal argument, just emit it as a scalar.
7983     if ((ICEArguments & (1 << i)) == 0) {
7984       Ops.push_back(EmitScalarExpr(E->getArg(i)));
7985       continue;
7986     }
7987
7988     // If this is required to be a constant, constant fold it so that we know
7989     // that the generated intrinsic gets a ConstantInt.
7990     llvm::APSInt Result;
7991     bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
7992     assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
7993     Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
7994   }
7995
7996   // These exist so that the builtin that takes an immediate can be bounds
7997   // checked by clang to avoid passing bad immediates to the backend. Since
7998   // AVX has a larger immediate than SSE we would need separate builtins to
7999   // do the different bounds checking. Rather than create a clang specific
8000   // SSE only builtin, this implements eight separate builtins to match gcc
8001   // implementation.
8002   auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
8003     Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
8004     llvm::Function *F = CGM.getIntrinsic(ID);
8005     return Builder.CreateCall(F, Ops);
8006   };
8007
8008   // For the vector forms of FP comparisons, translate the builtins directly to
8009   // IR.
8010   // TODO: The builtins could be removed if the SSE header files used vector
8011   // extension comparisons directly (vector ordered/unordered may need
8012   // additional support via __builtin_isnan()).
8013   auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) {
8014     Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
8015     llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
8016     llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
8017     Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
8018     return Builder.CreateBitCast(Sext, FPVecTy);
8019   };
8020
8021   switch (BuiltinID) {
8022   default: return nullptr;
8023   case X86::BI_mm_prefetch: {
8024     Value *Address = Ops[0];
8025     ConstantInt *C = cast<ConstantInt>(Ops[1]);
8026     Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);
8027     Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);
8028     Value *Data = ConstantInt::get(Int32Ty, 1);
8029     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
8030     return Builder.CreateCall(F, {Address, RW, Locality, Data});
8031   }
8032   case X86::BI_mm_clflush: {
8033     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
8034                               Ops[0]);
8035   }
8036   case X86::BI_mm_lfence: {
8037     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
8038   }
8039   case X86::BI_mm_mfence: {
8040     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
8041   }
8042   case X86::BI_mm_sfence: {
8043     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
8044   }
8045   case X86::BI_mm_pause: {
8046     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
8047   }
8048   case X86::BI__rdtsc: {
8049     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
8050   }
8051   case X86::BI__builtin_ia32_undef128:
8052   case X86::BI__builtin_ia32_undef256:
8053   case X86::BI__builtin_ia32_undef512:
8054     // The x86 definition of "undef" is not the same as the LLVM definition
8055     // (PR32176). We leave optimizing away an unnecessary zero constant to the
8056     // IR optimizer and backend.
8057     // TODO: If we had a "freeze" IR instruction to generate a fixed undef
8058     // value, we should use that here instead of a zero.
8059     return llvm::Constant::getNullValue(ConvertType(E->getType()));
8060   case X86::BI__builtin_ia32_vec_init_v8qi:
8061   case X86::BI__builtin_ia32_vec_init_v4hi:
8062   case X86::BI__builtin_ia32_vec_init_v2si:
8063     return Builder.CreateBitCast(BuildVector(Ops),
8064                                  llvm::Type::getX86_MMXTy(getLLVMContext()));
8065   case X86::BI__builtin_ia32_vec_ext_v2si:
8066     return Builder.CreateExtractElement(Ops[0],
8067                                   llvm::ConstantInt::get(Ops[1]->getType(), 0));
8068   case X86::BI_mm_setcsr:
8069   case X86::BI__builtin_ia32_ldmxcsr: {
8070     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
8071     Builder.CreateStore(Ops[0], Tmp);
8072     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
8073                           Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
8074   }
8075   case X86::BI_mm_getcsr:
8076   case X86::BI__builtin_ia32_stmxcsr: {
8077     Address Tmp = CreateMemTemp(E->getType());
8078     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
8079                        Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
8080     return Builder.CreateLoad(Tmp, "stmxcsr");
8081   }
8082   case X86::BI__builtin_ia32_xsave:
8083   case X86::BI__builtin_ia32_xsave64:
8084   case X86::BI__builtin_ia32_xrstor:
8085   case X86::BI__builtin_ia32_xrstor64:
8086   case X86::BI__builtin_ia32_xsaveopt:
8087   case X86::BI__builtin_ia32_xsaveopt64:
8088   case X86::BI__builtin_ia32_xrstors:
8089   case X86::BI__builtin_ia32_xrstors64:
8090   case X86::BI__builtin_ia32_xsavec:
8091   case X86::BI__builtin_ia32_xsavec64:
8092   case X86::BI__builtin_ia32_xsaves:
8093   case X86::BI__builtin_ia32_xsaves64: {
8094     Intrinsic::ID ID;
8095 #define INTRINSIC_X86_XSAVE_ID(NAME) \
8096     case X86::BI__builtin_ia32_##NAME: \
8097       ID = Intrinsic::x86_##NAME; \
8098       break
8099     switch (BuiltinID) {
8100     default: llvm_unreachable("Unsupported intrinsic!");
8101     INTRINSIC_X86_XSAVE_ID(xsave);
8102     INTRINSIC_X86_XSAVE_ID(xsave64);
8103     INTRINSIC_X86_XSAVE_ID(xrstor);
8104     INTRINSIC_X86_XSAVE_ID(xrstor64);
8105     INTRINSIC_X86_XSAVE_ID(xsaveopt);
8106     INTRINSIC_X86_XSAVE_ID(xsaveopt64);
8107     INTRINSIC_X86_XSAVE_ID(xrstors);
8108     INTRINSIC_X86_XSAVE_ID(xrstors64);
8109     INTRINSIC_X86_XSAVE_ID(xsavec);
8110     INTRINSIC_X86_XSAVE_ID(xsavec64);
8111     INTRINSIC_X86_XSAVE_ID(xsaves);
8112     INTRINSIC_X86_XSAVE_ID(xsaves64);
8113     }
8114 #undef INTRINSIC_X86_XSAVE_ID
8115     Value *Mhi = Builder.CreateTrunc(
8116       Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
8117     Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
8118     Ops[1] = Mhi;
8119     Ops.push_back(Mlo);
8120     return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
8121   }
8122   case X86::BI__builtin_ia32_storedqudi128_mask:
8123   case X86::BI__builtin_ia32_storedqusi128_mask:
8124   case X86::BI__builtin_ia32_storedquhi128_mask:
8125   case X86::BI__builtin_ia32_storedquqi128_mask:
8126   case X86::BI__builtin_ia32_storeupd128_mask:
8127   case X86::BI__builtin_ia32_storeups128_mask:
8128   case X86::BI__builtin_ia32_storedqudi256_mask:
8129   case X86::BI__builtin_ia32_storedqusi256_mask:
8130   case X86::BI__builtin_ia32_storedquhi256_mask:
8131   case X86::BI__builtin_ia32_storedquqi256_mask:
8132   case X86::BI__builtin_ia32_storeupd256_mask:
8133   case X86::BI__builtin_ia32_storeups256_mask:
8134   case X86::BI__builtin_ia32_storedqudi512_mask:
8135   case X86::BI__builtin_ia32_storedqusi512_mask:
8136   case X86::BI__builtin_ia32_storedquhi512_mask:
8137   case X86::BI__builtin_ia32_storedquqi512_mask:
8138   case X86::BI__builtin_ia32_storeupd512_mask:
8139   case X86::BI__builtin_ia32_storeups512_mask:
8140     return EmitX86MaskedStore(*this, Ops, 1);
8141
8142   case X86::BI__builtin_ia32_storess128_mask:
8143   case X86::BI__builtin_ia32_storesd128_mask: {
8144     return EmitX86MaskedStore(*this, Ops, 16);
8145   }
8146   case X86::BI__builtin_ia32_vpopcntb_128:
8147   case X86::BI__builtin_ia32_vpopcntd_128:
8148   case X86::BI__builtin_ia32_vpopcntq_128:
8149   case X86::BI__builtin_ia32_vpopcntw_128:
8150   case X86::BI__builtin_ia32_vpopcntb_256:
8151   case X86::BI__builtin_ia32_vpopcntd_256:
8152   case X86::BI__builtin_ia32_vpopcntq_256:
8153   case X86::BI__builtin_ia32_vpopcntw_256:
8154   case X86::BI__builtin_ia32_vpopcntb_512:
8155   case X86::BI__builtin_ia32_vpopcntd_512:
8156   case X86::BI__builtin_ia32_vpopcntq_512:
8157   case X86::BI__builtin_ia32_vpopcntw_512: {
8158     llvm::Type *ResultType = ConvertType(E->getType());
8159     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
8160     return Builder.CreateCall(F, Ops);
8161   }
8162   case X86::BI__builtin_ia32_cvtmask2b128:
8163   case X86::BI__builtin_ia32_cvtmask2b256:
8164   case X86::BI__builtin_ia32_cvtmask2b512:
8165   case X86::BI__builtin_ia32_cvtmask2w128:
8166   case X86::BI__builtin_ia32_cvtmask2w256:
8167   case X86::BI__builtin_ia32_cvtmask2w512:
8168   case X86::BI__builtin_ia32_cvtmask2d128:
8169   case X86::BI__builtin_ia32_cvtmask2d256:
8170   case X86::BI__builtin_ia32_cvtmask2d512:
8171   case X86::BI__builtin_ia32_cvtmask2q128:
8172   case X86::BI__builtin_ia32_cvtmask2q256:
8173   case X86::BI__builtin_ia32_cvtmask2q512:
8174     return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
8175
8176   case X86::BI__builtin_ia32_movdqa32store128_mask:
8177   case X86::BI__builtin_ia32_movdqa64store128_mask:
8178   case X86::BI__builtin_ia32_storeaps128_mask:
8179   case X86::BI__builtin_ia32_storeapd128_mask:
8180   case X86::BI__builtin_ia32_movdqa32store256_mask:
8181   case X86::BI__builtin_ia32_movdqa64store256_mask:
8182   case X86::BI__builtin_ia32_storeaps256_mask:
8183   case X86::BI__builtin_ia32_storeapd256_mask:
8184   case X86::BI__builtin_ia32_movdqa32store512_mask:
8185   case X86::BI__builtin_ia32_movdqa64store512_mask:
8186   case X86::BI__builtin_ia32_storeaps512_mask:
8187   case X86::BI__builtin_ia32_storeapd512_mask: {
8188     unsigned Align =
8189       getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
8190     return EmitX86MaskedStore(*this, Ops, Align);
8191   }
8192   case X86::BI__builtin_ia32_loadups128_mask:
8193   case X86::BI__builtin_ia32_loadups256_mask:
8194   case X86::BI__builtin_ia32_loadups512_mask:
8195   case X86::BI__builtin_ia32_loadupd128_mask:
8196   case X86::BI__builtin_ia32_loadupd256_mask:
8197   case X86::BI__builtin_ia32_loadupd512_mask:
8198   case X86::BI__builtin_ia32_loaddquqi128_mask:
8199   case X86::BI__builtin_ia32_loaddquqi256_mask:
8200   case X86::BI__builtin_ia32_loaddquqi512_mask:
8201   case X86::BI__builtin_ia32_loaddquhi128_mask:
8202   case X86::BI__builtin_ia32_loaddquhi256_mask:
8203   case X86::BI__builtin_ia32_loaddquhi512_mask:
8204   case X86::BI__builtin_ia32_loaddqusi128_mask:
8205   case X86::BI__builtin_ia32_loaddqusi256_mask:
8206   case X86::BI__builtin_ia32_loaddqusi512_mask:
8207   case X86::BI__builtin_ia32_loaddqudi128_mask:
8208   case X86::BI__builtin_ia32_loaddqudi256_mask:
8209   case X86::BI__builtin_ia32_loaddqudi512_mask:
8210     return EmitX86MaskedLoad(*this, Ops, 1);
8211
8212   case X86::BI__builtin_ia32_loadss128_mask:
8213   case X86::BI__builtin_ia32_loadsd128_mask:
8214     return EmitX86MaskedLoad(*this, Ops, 16);
8215
8216   case X86::BI__builtin_ia32_loadaps128_mask:
8217   case X86::BI__builtin_ia32_loadaps256_mask:
8218   case X86::BI__builtin_ia32_loadaps512_mask:
8219   case X86::BI__builtin_ia32_loadapd128_mask:
8220   case X86::BI__builtin_ia32_loadapd256_mask:
8221   case X86::BI__builtin_ia32_loadapd512_mask:
8222   case X86::BI__builtin_ia32_movdqa32load128_mask:
8223   case X86::BI__builtin_ia32_movdqa32load256_mask:
8224   case X86::BI__builtin_ia32_movdqa32load512_mask:
8225   case X86::BI__builtin_ia32_movdqa64load128_mask:
8226   case X86::BI__builtin_ia32_movdqa64load256_mask:
8227   case X86::BI__builtin_ia32_movdqa64load512_mask: {
8228     unsigned Align =
8229       getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
8230     return EmitX86MaskedLoad(*this, Ops, Align);
8231   }
8232
8233   case X86::BI__builtin_ia32_vbroadcastf128_pd256:
8234   case X86::BI__builtin_ia32_vbroadcastf128_ps256: {
8235     llvm::Type *DstTy = ConvertType(E->getType());
8236     return EmitX86SubVectorBroadcast(*this, Ops, DstTy, 128, 1);
8237   }
8238
8239   case X86::BI__builtin_ia32_storehps:
8240   case X86::BI__builtin_ia32_storelps: {
8241     llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
8242     llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
8243
8244     // cast val v2i64
8245     Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
8246
8247     // extract (0, 1)
8248     unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
8249     llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index);
8250     Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
8251
8252     // cast pointer to i64 & store
8253     Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
8254     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8255   }
8256   case X86::BI__builtin_ia32_palignr128:
8257   case X86::BI__builtin_ia32_palignr256:
8258   case X86::BI__builtin_ia32_palignr512_mask: {
8259     unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
8260
8261     unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
8262     assert(NumElts % 16 == 0);
8263
8264     // If palignr is shifting the pair of vectors more than the size of two
8265     // lanes, emit zero.
8266     if (ShiftVal >= 32)
8267       return llvm::Constant::getNullValue(ConvertType(E->getType()));
8268
8269     // If palignr is shifting the pair of input vectors more than one lane,
8270     // but less than two lanes, convert to shifting in zeroes.
8271     if (ShiftVal > 16) {
8272       ShiftVal -= 16;
8273       Ops[1] = Ops[0];
8274       Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
8275     }
8276
8277     uint32_t Indices[64];
8278     // 256-bit palignr operates on 128-bit lanes so we need to handle that
8279     for (unsigned l = 0; l != NumElts; l += 16) {
8280       for (unsigned i = 0; i != 16; ++i) {
8281         unsigned Idx = ShiftVal + i;
8282         if (Idx >= 16)
8283           Idx += NumElts - 16; // End of lane, switch operand.
8284         Indices[l + i] = Idx + l;
8285       }
8286     }
8287
8288     Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0],
8289                                                makeArrayRef(Indices, NumElts),
8290                                                "palignr");
8291
8292     // If this isn't a masked builtin, just return the align operation.
8293     if (Ops.size() == 3)
8294       return Align;
8295
8296     return EmitX86Select(*this, Ops[4], Align, Ops[3]);
8297   }
8298
8299   case X86::BI__builtin_ia32_vperm2f128_pd256:
8300   case X86::BI__builtin_ia32_vperm2f128_ps256:
8301   case X86::BI__builtin_ia32_vperm2f128_si256:
8302   case X86::BI__builtin_ia32_permti256: {
8303     unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
8304     unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
8305
8306     // This takes a very simple approach since there are two lanes and a
8307     // shuffle can have 2 inputs. So we reserve the first input for the first
8308     // lane and the second input for the second lane. This may result in
8309     // duplicate sources, but this can be dealt with in the backend.
8310
8311     Value *OutOps[2];
8312     uint32_t Indices[8];
8313     for (unsigned l = 0; l != 2; ++l) {
8314       // Determine the source for this lane.
8315       if (Imm & (1 << ((l * 4) + 3)))
8316         OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
8317       else if (Imm & (1 << ((l * 4) + 1)))
8318         OutOps[l] = Ops[1];
8319       else
8320         OutOps[l] = Ops[0];
8321
8322       for (unsigned i = 0; i != NumElts/2; ++i) {
8323         // Start with ith element of the source for this lane.
8324         unsigned Idx = (l * NumElts) + i;
8325         // If bit 0 of the immediate half is set, switch to the high half of
8326         // the source.
8327         if (Imm & (1 << (l * 4)))
8328           Idx += NumElts/2;
8329         Indices[(l * (NumElts/2)) + i] = Idx;
8330       }
8331     }
8332
8333     return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
8334                                        makeArrayRef(Indices, NumElts),
8335                                        "vperm");
8336   }
8337
8338   case X86::BI__builtin_ia32_movnti:
8339   case X86::BI__builtin_ia32_movnti64:
8340   case X86::BI__builtin_ia32_movntsd:
8341   case X86::BI__builtin_ia32_movntss: {
8342     llvm::MDNode *Node = llvm::MDNode::get(
8343         getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
8344
8345     Value *Ptr = Ops[0];
8346     Value *Src = Ops[1];
8347
8348     // Extract the 0'th element of the source vector.
8349     if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
8350         BuiltinID == X86::BI__builtin_ia32_movntss)
8351       Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
8352
8353     // Convert the type of the pointer to a pointer to the stored type.
8354     Value *BC = Builder.CreateBitCast(
8355         Ptr, llvm::PointerType::getUnqual(Src->getType()), "cast");
8356
8357     // Unaligned nontemporal store of the scalar value.
8358     StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC);
8359     SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
8360     SI->setAlignment(1);
8361     return SI;
8362   }
8363
8364   case X86::BI__builtin_ia32_selectb_128:
8365   case X86::BI__builtin_ia32_selectb_256:
8366   case X86::BI__builtin_ia32_selectb_512:
8367   case X86::BI__builtin_ia32_selectw_128:
8368   case X86::BI__builtin_ia32_selectw_256:
8369   case X86::BI__builtin_ia32_selectw_512:
8370   case X86::BI__builtin_ia32_selectd_128:
8371   case X86::BI__builtin_ia32_selectd_256:
8372   case X86::BI__builtin_ia32_selectd_512:
8373   case X86::BI__builtin_ia32_selectq_128:
8374   case X86::BI__builtin_ia32_selectq_256:
8375   case X86::BI__builtin_ia32_selectq_512:
8376   case X86::BI__builtin_ia32_selectps_128:
8377   case X86::BI__builtin_ia32_selectps_256:
8378   case X86::BI__builtin_ia32_selectps_512:
8379   case X86::BI__builtin_ia32_selectpd_128:
8380   case X86::BI__builtin_ia32_selectpd_256:
8381   case X86::BI__builtin_ia32_selectpd_512:
8382     return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
8383   case X86::BI__builtin_ia32_cmpb128_mask:
8384   case X86::BI__builtin_ia32_cmpb256_mask:
8385   case X86::BI__builtin_ia32_cmpb512_mask:
8386   case X86::BI__builtin_ia32_cmpw128_mask:
8387   case X86::BI__builtin_ia32_cmpw256_mask:
8388   case X86::BI__builtin_ia32_cmpw512_mask:
8389   case X86::BI__builtin_ia32_cmpd128_mask:
8390   case X86::BI__builtin_ia32_cmpd256_mask:
8391   case X86::BI__builtin_ia32_cmpd512_mask:
8392   case X86::BI__builtin_ia32_cmpq128_mask:
8393   case X86::BI__builtin_ia32_cmpq256_mask:
8394   case X86::BI__builtin_ia32_cmpq512_mask: {
8395     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
8396     return EmitX86MaskedCompare(*this, CC, true, Ops);
8397   }
8398   case X86::BI__builtin_ia32_ucmpb128_mask:
8399   case X86::BI__builtin_ia32_ucmpb256_mask:
8400   case X86::BI__builtin_ia32_ucmpb512_mask:
8401   case X86::BI__builtin_ia32_ucmpw128_mask:
8402   case X86::BI__builtin_ia32_ucmpw256_mask:
8403   case X86::BI__builtin_ia32_ucmpw512_mask:
8404   case X86::BI__builtin_ia32_ucmpd128_mask:
8405   case X86::BI__builtin_ia32_ucmpd256_mask:
8406   case X86::BI__builtin_ia32_ucmpd512_mask:
8407   case X86::BI__builtin_ia32_ucmpq128_mask:
8408   case X86::BI__builtin_ia32_ucmpq256_mask:
8409   case X86::BI__builtin_ia32_ucmpq512_mask: {
8410     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
8411     return EmitX86MaskedCompare(*this, CC, false, Ops);
8412   }
8413
8414   case X86::BI__builtin_ia32_kandhi:
8415     return EmitX86MaskLogic(*this, Instruction::And, 16, Ops);
8416   case X86::BI__builtin_ia32_kandnhi:
8417     return EmitX86MaskLogic(*this, Instruction::And, 16, Ops, true);
8418   case X86::BI__builtin_ia32_korhi:
8419     return EmitX86MaskLogic(*this, Instruction::Or, 16, Ops);
8420   case X86::BI__builtin_ia32_kxnorhi:
8421     return EmitX86MaskLogic(*this, Instruction::Xor, 16, Ops, true);
8422   case X86::BI__builtin_ia32_kxorhi:
8423     return EmitX86MaskLogic(*this, Instruction::Xor, 16, Ops);
8424   case X86::BI__builtin_ia32_knothi: {
8425     Ops[0] = getMaskVecValue(*this, Ops[0], 16);
8426     return Builder.CreateBitCast(Builder.CreateNot(Ops[0]),
8427                                  Builder.getInt16Ty());
8428   }
8429
8430   case X86::BI__builtin_ia32_vplzcntd_128_mask:
8431   case X86::BI__builtin_ia32_vplzcntd_256_mask:
8432   case X86::BI__builtin_ia32_vplzcntd_512_mask:
8433   case X86::BI__builtin_ia32_vplzcntq_128_mask:
8434   case X86::BI__builtin_ia32_vplzcntq_256_mask:
8435   case X86::BI__builtin_ia32_vplzcntq_512_mask: {
8436     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
8437     return EmitX86Select(*this, Ops[2],
8438                          Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}),
8439                          Ops[1]);
8440   }
8441
8442   case X86::BI__builtin_ia32_pabsb128:
8443   case X86::BI__builtin_ia32_pabsw128:
8444   case X86::BI__builtin_ia32_pabsd128:
8445   case X86::BI__builtin_ia32_pabsb256:
8446   case X86::BI__builtin_ia32_pabsw256:
8447   case X86::BI__builtin_ia32_pabsd256:
8448   case X86::BI__builtin_ia32_pabsq128_mask:
8449   case X86::BI__builtin_ia32_pabsq256_mask:
8450   case X86::BI__builtin_ia32_pabsb512_mask:
8451   case X86::BI__builtin_ia32_pabsw512_mask:
8452   case X86::BI__builtin_ia32_pabsd512_mask:
8453   case X86::BI__builtin_ia32_pabsq512_mask:
8454     return EmitX86Abs(*this, Ops);
8455
8456   case X86::BI__builtin_ia32_pmaxsb128:
8457   case X86::BI__builtin_ia32_pmaxsw128:
8458   case X86::BI__builtin_ia32_pmaxsd128:
8459   case X86::BI__builtin_ia32_pmaxsq128_mask:
8460   case X86::BI__builtin_ia32_pmaxsb256:
8461   case X86::BI__builtin_ia32_pmaxsw256:
8462   case X86::BI__builtin_ia32_pmaxsd256:
8463   case X86::BI__builtin_ia32_pmaxsq256_mask:
8464   case X86::BI__builtin_ia32_pmaxsb512_mask:
8465   case X86::BI__builtin_ia32_pmaxsw512_mask:
8466   case X86::BI__builtin_ia32_pmaxsd512_mask:
8467   case X86::BI__builtin_ia32_pmaxsq512_mask:
8468     return EmitX86MinMax(*this, ICmpInst::ICMP_SGT, Ops);
8469   case X86::BI__builtin_ia32_pmaxub128:
8470   case X86::BI__builtin_ia32_pmaxuw128:
8471   case X86::BI__builtin_ia32_pmaxud128:
8472   case X86::BI__builtin_ia32_pmaxuq128_mask:
8473   case X86::BI__builtin_ia32_pmaxub256:
8474   case X86::BI__builtin_ia32_pmaxuw256:
8475   case X86::BI__builtin_ia32_pmaxud256:
8476   case X86::BI__builtin_ia32_pmaxuq256_mask:
8477   case X86::BI__builtin_ia32_pmaxub512_mask:
8478   case X86::BI__builtin_ia32_pmaxuw512_mask:
8479   case X86::BI__builtin_ia32_pmaxud512_mask:
8480   case X86::BI__builtin_ia32_pmaxuq512_mask:
8481     return EmitX86MinMax(*this, ICmpInst::ICMP_UGT, Ops);
8482   case X86::BI__builtin_ia32_pminsb128:
8483   case X86::BI__builtin_ia32_pminsw128:
8484   case X86::BI__builtin_ia32_pminsd128:
8485   case X86::BI__builtin_ia32_pminsq128_mask:
8486   case X86::BI__builtin_ia32_pminsb256:
8487   case X86::BI__builtin_ia32_pminsw256:
8488   case X86::BI__builtin_ia32_pminsd256:
8489   case X86::BI__builtin_ia32_pminsq256_mask:
8490   case X86::BI__builtin_ia32_pminsb512_mask:
8491   case X86::BI__builtin_ia32_pminsw512_mask:
8492   case X86::BI__builtin_ia32_pminsd512_mask:
8493   case X86::BI__builtin_ia32_pminsq512_mask:
8494     return EmitX86MinMax(*this, ICmpInst::ICMP_SLT, Ops);
8495   case X86::BI__builtin_ia32_pminub128:
8496   case X86::BI__builtin_ia32_pminuw128:
8497   case X86::BI__builtin_ia32_pminud128:
8498   case X86::BI__builtin_ia32_pminuq128_mask:
8499   case X86::BI__builtin_ia32_pminub256:
8500   case X86::BI__builtin_ia32_pminuw256:
8501   case X86::BI__builtin_ia32_pminud256:
8502   case X86::BI__builtin_ia32_pminuq256_mask:
8503   case X86::BI__builtin_ia32_pminub512_mask:
8504   case X86::BI__builtin_ia32_pminuw512_mask:
8505   case X86::BI__builtin_ia32_pminud512_mask:
8506   case X86::BI__builtin_ia32_pminuq512_mask:
8507     return EmitX86MinMax(*this, ICmpInst::ICMP_ULT, Ops);
8508
8509   // 3DNow!
8510   case X86::BI__builtin_ia32_pswapdsf:
8511   case X86::BI__builtin_ia32_pswapdsi: {
8512     llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
8513     Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
8514     llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
8515     return Builder.CreateCall(F, Ops, "pswapd");
8516   }
8517   case X86::BI__builtin_ia32_rdrand16_step:
8518   case X86::BI__builtin_ia32_rdrand32_step:
8519   case X86::BI__builtin_ia32_rdrand64_step:
8520   case X86::BI__builtin_ia32_rdseed16_step:
8521   case X86::BI__builtin_ia32_rdseed32_step:
8522   case X86::BI__builtin_ia32_rdseed64_step: {
8523     Intrinsic::ID ID;
8524     switch (BuiltinID) {
8525     default: llvm_unreachable("Unsupported intrinsic!");
8526     case X86::BI__builtin_ia32_rdrand16_step:
8527       ID = Intrinsic::x86_rdrand_16;
8528       break;
8529     case X86::BI__builtin_ia32_rdrand32_step:
8530       ID = Intrinsic::x86_rdrand_32;
8531       break;
8532     case X86::BI__builtin_ia32_rdrand64_step:
8533       ID = Intrinsic::x86_rdrand_64;
8534       break;
8535     case X86::BI__builtin_ia32_rdseed16_step:
8536       ID = Intrinsic::x86_rdseed_16;
8537       break;
8538     case X86::BI__builtin_ia32_rdseed32_step:
8539       ID = Intrinsic::x86_rdseed_32;
8540       break;
8541     case X86::BI__builtin_ia32_rdseed64_step:
8542       ID = Intrinsic::x86_rdseed_64;
8543       break;
8544     }
8545
8546     Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
8547     Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
8548                                       Ops[0]);
8549     return Builder.CreateExtractValue(Call, 1);
8550   }
8551
8552   // SSE packed comparison intrinsics
8553   case X86::BI__builtin_ia32_cmpeqps:
8554   case X86::BI__builtin_ia32_cmpeqpd:
8555     return getVectorFCmpIR(CmpInst::FCMP_OEQ);
8556   case X86::BI__builtin_ia32_cmpltps:
8557   case X86::BI__builtin_ia32_cmpltpd:
8558     return getVectorFCmpIR(CmpInst::FCMP_OLT);
8559   case X86::BI__builtin_ia32_cmpleps:
8560   case X86::BI__builtin_ia32_cmplepd:
8561     return getVectorFCmpIR(CmpInst::FCMP_OLE);
8562   case X86::BI__builtin_ia32_cmpunordps:
8563   case X86::BI__builtin_ia32_cmpunordpd:
8564     return getVectorFCmpIR(CmpInst::FCMP_UNO);
8565   case X86::BI__builtin_ia32_cmpneqps:
8566   case X86::BI__builtin_ia32_cmpneqpd:
8567     return getVectorFCmpIR(CmpInst::FCMP_UNE);
8568   case X86::BI__builtin_ia32_cmpnltps:
8569   case X86::BI__builtin_ia32_cmpnltpd:
8570     return getVectorFCmpIR(CmpInst::FCMP_UGE);
8571   case X86::BI__builtin_ia32_cmpnleps:
8572   case X86::BI__builtin_ia32_cmpnlepd:
8573     return getVectorFCmpIR(CmpInst::FCMP_UGT);
8574   case X86::BI__builtin_ia32_cmpordps:
8575   case X86::BI__builtin_ia32_cmpordpd:
8576     return getVectorFCmpIR(CmpInst::FCMP_ORD);
8577   case X86::BI__builtin_ia32_cmpps:
8578   case X86::BI__builtin_ia32_cmpps256:
8579   case X86::BI__builtin_ia32_cmppd:
8580   case X86::BI__builtin_ia32_cmppd256: {
8581     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
8582     // If this one of the SSE immediates, we can use native IR.
8583     if (CC < 8) {
8584       FCmpInst::Predicate Pred;
8585       switch (CC) {
8586       case 0: Pred = FCmpInst::FCMP_OEQ; break;
8587       case 1: Pred = FCmpInst::FCMP_OLT; break;
8588       case 2: Pred = FCmpInst::FCMP_OLE; break;
8589       case 3: Pred = FCmpInst::FCMP_UNO; break;
8590       case 4: Pred = FCmpInst::FCMP_UNE; break;
8591       case 5: Pred = FCmpInst::FCMP_UGE; break;
8592       case 6: Pred = FCmpInst::FCMP_UGT; break;
8593       case 7: Pred = FCmpInst::FCMP_ORD; break;
8594       }
8595       return getVectorFCmpIR(Pred);
8596     }
8597
8598     // We can't handle 8-31 immediates with native IR, use the intrinsic.
8599     // Except for predicates that create constants.
8600     Intrinsic::ID ID;
8601     switch (BuiltinID) {
8602     default: llvm_unreachable("Unsupported intrinsic!");
8603     case X86::BI__builtin_ia32_cmpps:
8604       ID = Intrinsic::x86_sse_cmp_ps;
8605       break;
8606     case X86::BI__builtin_ia32_cmpps256:
8607       // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector
8608       // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0...
8609       if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) {
8610          Value *Constant = (CC == 0xf || CC == 0x1f) ?
8611                 llvm::Constant::getAllOnesValue(Builder.getInt32Ty()) :
8612                 llvm::Constant::getNullValue(Builder.getInt32Ty());
8613          Value *Vec = Builder.CreateVectorSplat(
8614                         Ops[0]->getType()->getVectorNumElements(), Constant);
8615          return Builder.CreateBitCast(Vec, Ops[0]->getType());
8616       }
8617       ID = Intrinsic::x86_avx_cmp_ps_256;
8618       break;
8619     case X86::BI__builtin_ia32_cmppd:
8620       ID = Intrinsic::x86_sse2_cmp_pd;
8621       break;
8622     case X86::BI__builtin_ia32_cmppd256:
8623       // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector
8624       // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0...
8625       if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) {
8626          Value *Constant = (CC == 0xf || CC == 0x1f) ?
8627                 llvm::Constant::getAllOnesValue(Builder.getInt64Ty()) :
8628                 llvm::Constant::getNullValue(Builder.getInt64Ty());
8629          Value *Vec = Builder.CreateVectorSplat(
8630                         Ops[0]->getType()->getVectorNumElements(), Constant);
8631          return Builder.CreateBitCast(Vec, Ops[0]->getType());
8632       }
8633       ID = Intrinsic::x86_avx_cmp_pd_256;
8634       break;
8635     }
8636
8637     return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
8638   }
8639
8640   // SSE scalar comparison intrinsics
8641   case X86::BI__builtin_ia32_cmpeqss:
8642     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
8643   case X86::BI__builtin_ia32_cmpltss:
8644     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
8645   case X86::BI__builtin_ia32_cmpless:
8646     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
8647   case X86::BI__builtin_ia32_cmpunordss:
8648     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
8649   case X86::BI__builtin_ia32_cmpneqss:
8650     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
8651   case X86::BI__builtin_ia32_cmpnltss:
8652     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
8653   case X86::BI__builtin_ia32_cmpnless:
8654     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
8655   case X86::BI__builtin_ia32_cmpordss:
8656     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
8657   case X86::BI__builtin_ia32_cmpeqsd:
8658     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
8659   case X86::BI__builtin_ia32_cmpltsd:
8660     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
8661   case X86::BI__builtin_ia32_cmplesd:
8662     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
8663   case X86::BI__builtin_ia32_cmpunordsd:
8664     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
8665   case X86::BI__builtin_ia32_cmpneqsd:
8666     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
8667   case X86::BI__builtin_ia32_cmpnltsd:
8668     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
8669   case X86::BI__builtin_ia32_cmpnlesd:
8670     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
8671   case X86::BI__builtin_ia32_cmpordsd:
8672     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
8673
8674   case X86::BI__emul:
8675   case X86::BI__emulu: {
8676     llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
8677     bool isSigned = (BuiltinID == X86::BI__emul);
8678     Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
8679     Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
8680     return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
8681   }
8682   case X86::BI__mulh:
8683   case X86::BI__umulh:
8684   case X86::BI_mul128:
8685   case X86::BI_umul128: {
8686     llvm::Type *ResType = ConvertType(E->getType());
8687     llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
8688
8689     bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
8690     Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
8691     Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
8692
8693     Value *MulResult, *HigherBits;
8694     if (IsSigned) {
8695       MulResult = Builder.CreateNSWMul(LHS, RHS);
8696       HigherBits = Builder.CreateAShr(MulResult, 64);
8697     } else {
8698       MulResult = Builder.CreateNUWMul(LHS, RHS);
8699       HigherBits = Builder.CreateLShr(MulResult, 64);
8700     }
8701     HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
8702
8703     if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
8704       return HigherBits;
8705
8706     Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
8707     Builder.CreateStore(HigherBits, HighBitsAddress);
8708     return Builder.CreateIntCast(MulResult, ResType, IsSigned);
8709   }
8710
8711   case X86::BI__faststorefence: {
8712     return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
8713                                llvm::SyncScope::System);
8714   }
8715   case X86::BI_ReadWriteBarrier:
8716   case X86::BI_ReadBarrier:
8717   case X86::BI_WriteBarrier: {
8718     return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
8719                                llvm::SyncScope::SingleThread);
8720   }
8721   case X86::BI_BitScanForward:
8722   case X86::BI_BitScanForward64:
8723     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
8724   case X86::BI_BitScanReverse:
8725   case X86::BI_BitScanReverse64:
8726     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
8727
8728   case X86::BI_InterlockedAnd64:
8729     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
8730   case X86::BI_InterlockedExchange64:
8731     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
8732   case X86::BI_InterlockedExchangeAdd64:
8733     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
8734   case X86::BI_InterlockedExchangeSub64:
8735     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
8736   case X86::BI_InterlockedOr64:
8737     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
8738   case X86::BI_InterlockedXor64:
8739     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
8740   case X86::BI_InterlockedDecrement64:
8741     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
8742   case X86::BI_InterlockedIncrement64:
8743     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
8744   case X86::BI_InterlockedCompareExchange128: {
8745     // InterlockedCompareExchange128 doesn't directly refer to 128bit ints,
8746     // instead it takes pointers to 64bit ints for Destination and
8747     // ComparandResult, and exchange is taken as two 64bit ints (high & low).
8748     // The previous value is written to ComparandResult, and success is
8749     // returned.
8750
8751     llvm::Type *Int128Ty = Builder.getInt128Ty();
8752     llvm::Type *Int128PtrTy = Int128Ty->getPointerTo();
8753
8754     Value *Destination =
8755         Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PtrTy);
8756     Value *ExchangeHigh128 =
8757         Builder.CreateZExt(EmitScalarExpr(E->getArg(1)), Int128Ty);
8758     Value *ExchangeLow128 =
8759         Builder.CreateZExt(EmitScalarExpr(E->getArg(2)), Int128Ty);
8760     Address ComparandResult(
8761         Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int128PtrTy),
8762         getContext().toCharUnitsFromBits(128));
8763
8764     Value *Exchange = Builder.CreateOr(
8765         Builder.CreateShl(ExchangeHigh128, 64, "", false, false),
8766         ExchangeLow128);
8767
8768     Value *Comparand = Builder.CreateLoad(ComparandResult);
8769
8770     AtomicCmpXchgInst *CXI =
8771         Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
8772                                     AtomicOrdering::SequentiallyConsistent,
8773                                     AtomicOrdering::SequentiallyConsistent);
8774     CXI->setVolatile(true);
8775
8776     // Write the result back to the inout pointer.
8777     Builder.CreateStore(Builder.CreateExtractValue(CXI, 0), ComparandResult);
8778
8779     // Get the success boolean and zero extend it to i8.
8780     Value *Success = Builder.CreateExtractValue(CXI, 1);
8781     return Builder.CreateZExt(Success, ConvertType(E->getType()));
8782   }
8783
8784   case X86::BI_AddressOfReturnAddress: {
8785     Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress);
8786     return Builder.CreateCall(F);
8787   }
8788   case X86::BI__stosb: {
8789     // We treat __stosb as a volatile memset - it may not generate "rep stosb"
8790     // instruction, but it will create a memset that won't be optimized away.
8791     return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true);
8792   }
8793   case X86::BI__ud2:
8794     // llvm.trap makes a ud2a instruction on x86.
8795     return EmitTrapCall(Intrinsic::trap);
8796   case X86::BI__int2c: {
8797     // This syscall signals a driver assertion failure in x86 NT kernels.
8798     llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
8799     llvm::InlineAsm *IA =
8800         llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*SideEffects=*/true);
8801     llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
8802         getLLVMContext(), llvm::AttributeList::FunctionIndex,
8803         llvm::Attribute::NoReturn);
8804     CallSite CS = Builder.CreateCall(IA);
8805     CS.setAttributes(NoReturnAttr);
8806     return CS.getInstruction();
8807   }
8808   case X86::BI__readfsbyte:
8809   case X86::BI__readfsword:
8810   case X86::BI__readfsdword:
8811   case X86::BI__readfsqword: {
8812     llvm::Type *IntTy = ConvertType(E->getType());
8813     Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
8814                                         llvm::PointerType::get(IntTy, 257));
8815     LoadInst *Load = Builder.CreateAlignedLoad(
8816         IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
8817     Load->setVolatile(true);
8818     return Load;
8819   }
8820   case X86::BI__readgsbyte:
8821   case X86::BI__readgsword:
8822   case X86::BI__readgsdword:
8823   case X86::BI__readgsqword: {
8824     llvm::Type *IntTy = ConvertType(E->getType());
8825     Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
8826                                         llvm::PointerType::get(IntTy, 256));
8827     LoadInst *Load = Builder.CreateAlignedLoad(
8828         IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
8829     Load->setVolatile(true);
8830     return Load;
8831   }
8832   }
8833 }
8834
8835
8836 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
8837                                            const CallExpr *E) {
8838   SmallVector<Value*, 4> Ops;
8839
8840   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
8841     Ops.push_back(EmitScalarExpr(E->getArg(i)));
8842
8843   Intrinsic::ID ID = Intrinsic::not_intrinsic;
8844
8845   switch (BuiltinID) {
8846   default: return nullptr;
8847
8848   // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
8849   // call __builtin_readcyclecounter.
8850   case PPC::BI__builtin_ppc_get_timebase:
8851     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
8852
8853   // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
8854   case PPC::BI__builtin_altivec_lvx:
8855   case PPC::BI__builtin_altivec_lvxl:
8856   case PPC::BI__builtin_altivec_lvebx:
8857   case PPC::BI__builtin_altivec_lvehx:
8858   case PPC::BI__builtin_altivec_lvewx:
8859   case PPC::BI__builtin_altivec_lvsl:
8860   case PPC::BI__builtin_altivec_lvsr:
8861   case PPC::BI__builtin_vsx_lxvd2x:
8862   case PPC::BI__builtin_vsx_lxvw4x:
8863   case PPC::BI__builtin_vsx_lxvd2x_be:
8864   case PPC::BI__builtin_vsx_lxvw4x_be:
8865   case PPC::BI__builtin_vsx_lxvl:
8866   case PPC::BI__builtin_vsx_lxvll:
8867   {
8868     if(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
8869        BuiltinID == PPC::BI__builtin_vsx_lxvll){
8870       Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy);
8871     }else {
8872       Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
8873       Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
8874       Ops.pop_back();
8875     }
8876
8877     switch (BuiltinID) {
8878     default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
8879     case PPC::BI__builtin_altivec_lvx:
8880       ID = Intrinsic::ppc_altivec_lvx;
8881       break;
8882     case PPC::BI__builtin_altivec_lvxl:
8883       ID = Intrinsic::ppc_altivec_lvxl;
8884       break;
8885     case PPC::BI__builtin_altivec_lvebx:
8886       ID = Intrinsic::ppc_altivec_lvebx;
8887       break;
8888     case PPC::BI__builtin_altivec_lvehx:
8889       ID = Intrinsic::ppc_altivec_lvehx;
8890       break;
8891     case PPC::BI__builtin_altivec_lvewx:
8892       ID = Intrinsic::ppc_altivec_lvewx;
8893       break;
8894     case PPC::BI__builtin_altivec_lvsl:
8895       ID = Intrinsic::ppc_altivec_lvsl;
8896       break;
8897     case PPC::BI__builtin_altivec_lvsr:
8898       ID = Intrinsic::ppc_altivec_lvsr;
8899       break;
8900     case PPC::BI__builtin_vsx_lxvd2x:
8901       ID = Intrinsic::ppc_vsx_lxvd2x;
8902       break;
8903     case PPC::BI__builtin_vsx_lxvw4x:
8904       ID = Intrinsic::ppc_vsx_lxvw4x;
8905       break;
8906     case PPC::BI__builtin_vsx_lxvd2x_be:
8907       ID = Intrinsic::ppc_vsx_lxvd2x_be;
8908       break;
8909     case PPC::BI__builtin_vsx_lxvw4x_be:
8910       ID = Intrinsic::ppc_vsx_lxvw4x_be;
8911       break;
8912     case PPC::BI__builtin_vsx_lxvl:
8913       ID = Intrinsic::ppc_vsx_lxvl;
8914       break;
8915     case PPC::BI__builtin_vsx_lxvll:
8916       ID = Intrinsic::ppc_vsx_lxvll;
8917       break;
8918     }
8919     llvm::Function *F = CGM.getIntrinsic(ID);
8920     return Builder.CreateCall(F, Ops, "");
8921   }
8922
8923   // vec_st, vec_xst_be
8924   case PPC::BI__builtin_altivec_stvx:
8925   case PPC::BI__builtin_altivec_stvxl:
8926   case PPC::BI__builtin_altivec_stvebx:
8927   case PPC::BI__builtin_altivec_stvehx:
8928   case PPC::BI__builtin_altivec_stvewx:
8929   case PPC::BI__builtin_vsx_stxvd2x:
8930   case PPC::BI__builtin_vsx_stxvw4x:
8931   case PPC::BI__builtin_vsx_stxvd2x_be:
8932   case PPC::BI__builtin_vsx_stxvw4x_be:
8933   case PPC::BI__builtin_vsx_stxvl:
8934   case PPC::BI__builtin_vsx_stxvll:
8935   {
8936     if(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
8937       BuiltinID == PPC::BI__builtin_vsx_stxvll ){
8938       Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
8939     }else {
8940       Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
8941       Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
8942       Ops.pop_back();
8943     }
8944
8945     switch (BuiltinID) {
8946     default: llvm_unreachable("Unsupported st intrinsic!");
8947     case PPC::BI__builtin_altivec_stvx:
8948       ID = Intrinsic::ppc_altivec_stvx;
8949       break;
8950     case PPC::BI__builtin_altivec_stvxl:
8951       ID = Intrinsic::ppc_altivec_stvxl;
8952       break;
8953     case PPC::BI__builtin_altivec_stvebx:
8954       ID = Intrinsic::ppc_altivec_stvebx;
8955       break;
8956     case PPC::BI__builtin_altivec_stvehx:
8957       ID = Intrinsic::ppc_altivec_stvehx;
8958       break;
8959     case PPC::BI__builtin_altivec_stvewx:
8960       ID = Intrinsic::ppc_altivec_stvewx;
8961       break;
8962     case PPC::BI__builtin_vsx_stxvd2x:
8963       ID = Intrinsic::ppc_vsx_stxvd2x;
8964       break;
8965     case PPC::BI__builtin_vsx_stxvw4x:
8966       ID = Intrinsic::ppc_vsx_stxvw4x;
8967       break;
8968     case PPC::BI__builtin_vsx_stxvd2x_be:
8969       ID = Intrinsic::ppc_vsx_stxvd2x_be;
8970       break;
8971     case PPC::BI__builtin_vsx_stxvw4x_be:
8972       ID = Intrinsic::ppc_vsx_stxvw4x_be;
8973       break;
8974     case PPC::BI__builtin_vsx_stxvl:
8975       ID = Intrinsic::ppc_vsx_stxvl;
8976       break;
8977     case PPC::BI__builtin_vsx_stxvll:
8978       ID = Intrinsic::ppc_vsx_stxvll;
8979       break;
8980     }
8981     llvm::Function *F = CGM.getIntrinsic(ID);
8982     return Builder.CreateCall(F, Ops, "");
8983   }
8984   // Square root
8985   case PPC::BI__builtin_vsx_xvsqrtsp:
8986   case PPC::BI__builtin_vsx_xvsqrtdp: {
8987     llvm::Type *ResultType = ConvertType(E->getType());
8988     Value *X = EmitScalarExpr(E->getArg(0));
8989     ID = Intrinsic::sqrt;
8990     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
8991     return Builder.CreateCall(F, X);
8992   }
8993   // Count leading zeros
8994   case PPC::BI__builtin_altivec_vclzb:
8995   case PPC::BI__builtin_altivec_vclzh:
8996   case PPC::BI__builtin_altivec_vclzw:
8997   case PPC::BI__builtin_altivec_vclzd: {
8998     llvm::Type *ResultType = ConvertType(E->getType());
8999     Value *X = EmitScalarExpr(E->getArg(0));
9000     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
9001     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
9002     return Builder.CreateCall(F, {X, Undef});
9003   }
9004   case PPC::BI__builtin_altivec_vctzb:
9005   case PPC::BI__builtin_altivec_vctzh:
9006   case PPC::BI__builtin_altivec_vctzw:
9007   case PPC::BI__builtin_altivec_vctzd: {
9008     llvm::Type *ResultType = ConvertType(E->getType());
9009     Value *X = EmitScalarExpr(E->getArg(0));
9010     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
9011     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
9012     return Builder.CreateCall(F, {X, Undef});
9013   }
9014   case PPC::BI__builtin_altivec_vpopcntb:
9015   case PPC::BI__builtin_altivec_vpopcnth:
9016   case PPC::BI__builtin_altivec_vpopcntw:
9017   case PPC::BI__builtin_altivec_vpopcntd: {
9018     llvm::Type *ResultType = ConvertType(E->getType());
9019     Value *X = EmitScalarExpr(E->getArg(0));
9020     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
9021     return Builder.CreateCall(F, X);
9022   }
9023   // Copy sign
9024   case PPC::BI__builtin_vsx_xvcpsgnsp:
9025   case PPC::BI__builtin_vsx_xvcpsgndp: {
9026     llvm::Type *ResultType = ConvertType(E->getType());
9027     Value *X = EmitScalarExpr(E->getArg(0));
9028     Value *Y = EmitScalarExpr(E->getArg(1));
9029     ID = Intrinsic::copysign;
9030     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
9031     return Builder.CreateCall(F, {X, Y});
9032   }
9033   // Rounding/truncation
9034   case PPC::BI__builtin_vsx_xvrspip:
9035   case PPC::BI__builtin_vsx_xvrdpip:
9036   case PPC::BI__builtin_vsx_xvrdpim:
9037   case PPC::BI__builtin_vsx_xvrspim:
9038   case PPC::BI__builtin_vsx_xvrdpi:
9039   case PPC::BI__builtin_vsx_xvrspi:
9040   case PPC::BI__builtin_vsx_xvrdpic:
9041   case PPC::BI__builtin_vsx_xvrspic:
9042   case PPC::BI__builtin_vsx_xvrdpiz:
9043   case PPC::BI__builtin_vsx_xvrspiz: {
9044     llvm::Type *ResultType = ConvertType(E->getType());
9045     Value *X = EmitScalarExpr(E->getArg(0));
9046     if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
9047         BuiltinID == PPC::BI__builtin_vsx_xvrspim)
9048       ID = Intrinsic::floor;
9049     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
9050              BuiltinID == PPC::BI__builtin_vsx_xvrspi)
9051       ID = Intrinsic::round;
9052     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
9053              BuiltinID == PPC::BI__builtin_vsx_xvrspic)
9054       ID = Intrinsic::nearbyint;
9055     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
9056              BuiltinID == PPC::BI__builtin_vsx_xvrspip)
9057       ID = Intrinsic::ceil;
9058     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
9059              BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
9060       ID = Intrinsic::trunc;
9061     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
9062     return Builder.CreateCall(F, X);
9063   }
9064
9065   // Absolute value
9066   case PPC::BI__builtin_vsx_xvabsdp:
9067   case PPC::BI__builtin_vsx_xvabssp: {
9068     llvm::Type *ResultType = ConvertType(E->getType());
9069     Value *X = EmitScalarExpr(E->getArg(0));
9070     llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
9071     return Builder.CreateCall(F, X);
9072   }
9073
9074   // FMA variations
9075   case PPC::BI__builtin_vsx_xvmaddadp:
9076   case PPC::BI__builtin_vsx_xvmaddasp:
9077   case PPC::BI__builtin_vsx_xvnmaddadp:
9078   case PPC::BI__builtin_vsx_xvnmaddasp:
9079   case PPC::BI__builtin_vsx_xvmsubadp:
9080   case PPC::BI__builtin_vsx_xvmsubasp:
9081   case PPC::BI__builtin_vsx_xvnmsubadp:
9082   case PPC::BI__builtin_vsx_xvnmsubasp: {
9083     llvm::Type *ResultType = ConvertType(E->getType());
9084     Value *X = EmitScalarExpr(E->getArg(0));
9085     Value *Y = EmitScalarExpr(E->getArg(1));
9086     Value *Z = EmitScalarExpr(E->getArg(2));
9087     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
9088     llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
9089     switch (BuiltinID) {
9090       case PPC::BI__builtin_vsx_xvmaddadp:
9091       case PPC::BI__builtin_vsx_xvmaddasp:
9092         return Builder.CreateCall(F, {X, Y, Z});
9093       case PPC::BI__builtin_vsx_xvnmaddadp:
9094       case PPC::BI__builtin_vsx_xvnmaddasp:
9095         return Builder.CreateFSub(Zero,
9096                                   Builder.CreateCall(F, {X, Y, Z}), "sub");
9097       case PPC::BI__builtin_vsx_xvmsubadp:
9098       case PPC::BI__builtin_vsx_xvmsubasp:
9099         return Builder.CreateCall(F,
9100                                   {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
9101       case PPC::BI__builtin_vsx_xvnmsubadp:
9102       case PPC::BI__builtin_vsx_xvnmsubasp:
9103         Value *FsubRes =
9104           Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
9105         return Builder.CreateFSub(Zero, FsubRes, "sub");
9106     }
9107     llvm_unreachable("Unknown FMA operation");
9108     return nullptr; // Suppress no-return warning
9109   }
9110
9111   case PPC::BI__builtin_vsx_insertword: {
9112     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
9113
9114     // Third argument is a compile time constant int. It must be clamped to
9115     // to the range [0, 12].
9116     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
9117     assert(ArgCI &&
9118            "Third arg to xxinsertw intrinsic must be constant integer");
9119     const int64_t MaxIndex = 12;
9120     int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
9121
9122     // The builtin semantics don't exactly match the xxinsertw instructions
9123     // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
9124     // word from the first argument, and inserts it in the second argument. The
9125     // instruction extracts the word from its second input register and inserts
9126     // it into its first input register, so swap the first and second arguments.
9127     std::swap(Ops[0], Ops[1]);
9128
9129     // Need to cast the second argument from a vector of unsigned int to a
9130     // vector of long long.
9131     Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
9132
9133     if (getTarget().isLittleEndian()) {
9134       // Create a shuffle mask of (1, 0)
9135       Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
9136                                    ConstantInt::get(Int32Ty, 0)
9137                                  };
9138       Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
9139
9140       // Reverse the double words in the vector we will extract from.
9141       Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
9142       Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ShuffleMask);
9143
9144       // Reverse the index.
9145       Index = MaxIndex - Index;
9146     }
9147
9148     // Intrinsic expects the first arg to be a vector of int.
9149     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
9150     Ops[2] = ConstantInt::getSigned(Int32Ty, Index);
9151     return Builder.CreateCall(F, Ops);
9152   }
9153
9154   case PPC::BI__builtin_vsx_extractuword: {
9155     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
9156
9157     // Intrinsic expects the first argument to be a vector of doublewords.
9158     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
9159
9160     // The second argument is a compile time constant int that needs to
9161     // be clamped to the range [0, 12].
9162     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[1]);
9163     assert(ArgCI &&
9164            "Second Arg to xxextractuw intrinsic must be a constant integer!");
9165     const int64_t MaxIndex = 12;
9166     int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
9167
9168     if (getTarget().isLittleEndian()) {
9169       // Reverse the index.
9170       Index = MaxIndex - Index;
9171       Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
9172
9173       // Emit the call, then reverse the double words of the results vector.
9174       Value *Call = Builder.CreateCall(F, Ops);
9175
9176       // Create a shuffle mask of (1, 0)
9177       Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
9178                                    ConstantInt::get(Int32Ty, 0)
9179                                  };
9180       Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
9181
9182       Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ShuffleMask);
9183       return ShuffleCall;
9184     } else {
9185       Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
9186       return Builder.CreateCall(F, Ops);
9187     }
9188   }
9189
9190   case PPC::BI__builtin_vsx_xxpermdi: {
9191     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
9192     assert(ArgCI && "Third arg must be constant integer!");
9193
9194     unsigned Index = ArgCI->getZExtValue();
9195     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
9196     Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
9197
9198     // Element zero comes from the first input vector and element one comes from
9199     // the second. The element indices within each vector are numbered in big
9200     // endian order so the shuffle mask must be adjusted for this on little
9201     // endian platforms (i.e. index is complemented and source vector reversed).
9202     unsigned ElemIdx0;
9203     unsigned ElemIdx1;
9204     if (getTarget().isLittleEndian()) {
9205       ElemIdx0 = (~Index & 1) + 2;
9206       ElemIdx1 = (~Index & 2) >> 1;
9207     } else { // BigEndian
9208       ElemIdx0 = (Index & 2) >> 1;
9209       ElemIdx1 = 2 + (Index & 1);
9210     }
9211
9212     Constant *ShuffleElts[2] = {ConstantInt::get(Int32Ty, ElemIdx0),
9213                                 ConstantInt::get(Int32Ty, ElemIdx1)};
9214     Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
9215
9216     Value *ShuffleCall =
9217         Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask);
9218     QualType BIRetType = E->getType();
9219     auto RetTy = ConvertType(BIRetType);
9220     return Builder.CreateBitCast(ShuffleCall, RetTy);
9221   }
9222
9223   case PPC::BI__builtin_vsx_xxsldwi: {
9224     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
9225     assert(ArgCI && "Third argument must be a compile time constant");
9226     unsigned Index = ArgCI->getZExtValue() & 0x3;
9227     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
9228     Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int32Ty, 4));
9229
9230     // Create a shuffle mask
9231     unsigned ElemIdx0;
9232     unsigned ElemIdx1;
9233     unsigned ElemIdx2;
9234     unsigned ElemIdx3;
9235     if (getTarget().isLittleEndian()) {
9236       // Little endian element N comes from element 8+N-Index of the
9237       // concatenated wide vector (of course, using modulo arithmetic on
9238       // the total number of elements).
9239       ElemIdx0 = (8 - Index) % 8;
9240       ElemIdx1 = (9 - Index) % 8;
9241       ElemIdx2 = (10 - Index) % 8;
9242       ElemIdx3 = (11 - Index) % 8;
9243     } else {
9244       // Big endian ElemIdx<N> = Index + N
9245       ElemIdx0 = Index;
9246       ElemIdx1 = Index + 1;
9247       ElemIdx2 = Index + 2;
9248       ElemIdx3 = Index + 3;
9249     }
9250
9251     Constant *ShuffleElts[4] = {ConstantInt::get(Int32Ty, ElemIdx0),
9252                                 ConstantInt::get(Int32Ty, ElemIdx1),
9253                                 ConstantInt::get(Int32Ty, ElemIdx2),
9254                                 ConstantInt::get(Int32Ty, ElemIdx3)};
9255
9256     Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
9257     Value *ShuffleCall =
9258         Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask);
9259     QualType BIRetType = E->getType();
9260     auto RetTy = ConvertType(BIRetType);
9261     return Builder.CreateBitCast(ShuffleCall, RetTy);
9262   }
9263   }
9264 }
9265
9266 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
9267                                               const CallExpr *E) {
9268   switch (BuiltinID) {
9269   case AMDGPU::BI__builtin_amdgcn_div_scale:
9270   case AMDGPU::BI__builtin_amdgcn_div_scalef: {
9271     // Translate from the intrinsics's struct return to the builtin's out
9272     // argument.
9273
9274     Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
9275
9276     llvm::Value *X = EmitScalarExpr(E->getArg(0));
9277     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
9278     llvm::Value *Z = EmitScalarExpr(E->getArg(2));
9279
9280     llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
9281                                            X->getType());
9282
9283     llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
9284
9285     llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
9286     llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
9287
9288     llvm::Type *RealFlagType
9289       = FlagOutPtr.getPointer()->getType()->getPointerElementType();
9290
9291     llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
9292     Builder.CreateStore(FlagExt, FlagOutPtr);
9293     return Result;
9294   }
9295   case AMDGPU::BI__builtin_amdgcn_div_fmas:
9296   case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
9297     llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
9298     llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
9299     llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
9300     llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
9301
9302     llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
9303                                       Src0->getType());
9304     llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
9305     return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
9306   }
9307
9308   case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
9309     return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
9310   case AMDGPU::BI__builtin_amdgcn_mov_dpp: {
9311     llvm::SmallVector<llvm::Value *, 5> Args;
9312     for (unsigned I = 0; I != 5; ++I)
9313       Args.push_back(EmitScalarExpr(E->getArg(I)));
9314     Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_mov_dpp,
9315                                     Args[0]->getType());
9316     return Builder.CreateCall(F, Args);
9317   }
9318   case AMDGPU::BI__builtin_amdgcn_div_fixup:
9319   case AMDGPU::BI__builtin_amdgcn_div_fixupf:
9320   case AMDGPU::BI__builtin_amdgcn_div_fixuph:
9321     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
9322   case AMDGPU::BI__builtin_amdgcn_trig_preop:
9323   case AMDGPU::BI__builtin_amdgcn_trig_preopf:
9324     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
9325   case AMDGPU::BI__builtin_amdgcn_rcp:
9326   case AMDGPU::BI__builtin_amdgcn_rcpf:
9327   case AMDGPU::BI__builtin_amdgcn_rcph:
9328     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
9329   case AMDGPU::BI__builtin_amdgcn_rsq:
9330   case AMDGPU::BI__builtin_amdgcn_rsqf:
9331   case AMDGPU::BI__builtin_amdgcn_rsqh:
9332     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
9333   case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
9334   case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
9335     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
9336   case AMDGPU::BI__builtin_amdgcn_sinf:
9337   case AMDGPU::BI__builtin_amdgcn_sinh:
9338     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
9339   case AMDGPU::BI__builtin_amdgcn_cosf:
9340   case AMDGPU::BI__builtin_amdgcn_cosh:
9341     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
9342   case AMDGPU::BI__builtin_amdgcn_log_clampf:
9343     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
9344   case AMDGPU::BI__builtin_amdgcn_ldexp:
9345   case AMDGPU::BI__builtin_amdgcn_ldexpf:
9346   case AMDGPU::BI__builtin_amdgcn_ldexph:
9347     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
9348   case AMDGPU::BI__builtin_amdgcn_frexp_mant:
9349   case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
9350   case AMDGPU::BI__builtin_amdgcn_frexp_manth:
9351     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
9352   case AMDGPU::BI__builtin_amdgcn_frexp_exp:
9353   case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
9354     Value *Src0 = EmitScalarExpr(E->getArg(0));
9355     Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
9356                                 { Builder.getInt32Ty(), Src0->getType() });
9357     return Builder.CreateCall(F, Src0);
9358   }
9359   case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
9360     Value *Src0 = EmitScalarExpr(E->getArg(0));
9361     Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
9362                                 { Builder.getInt16Ty(), Src0->getType() });
9363     return Builder.CreateCall(F, Src0);
9364   }
9365   case AMDGPU::BI__builtin_amdgcn_fract:
9366   case AMDGPU::BI__builtin_amdgcn_fractf:
9367   case AMDGPU::BI__builtin_amdgcn_fracth:
9368     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
9369   case AMDGPU::BI__builtin_amdgcn_lerp:
9370     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
9371   case AMDGPU::BI__builtin_amdgcn_uicmp:
9372   case AMDGPU::BI__builtin_amdgcn_uicmpl:
9373   case AMDGPU::BI__builtin_amdgcn_sicmp:
9374   case AMDGPU::BI__builtin_amdgcn_sicmpl:
9375     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_icmp);
9376   case AMDGPU::BI__builtin_amdgcn_fcmp:
9377   case AMDGPU::BI__builtin_amdgcn_fcmpf:
9378     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fcmp);
9379   case AMDGPU::BI__builtin_amdgcn_class:
9380   case AMDGPU::BI__builtin_amdgcn_classf:
9381   case AMDGPU::BI__builtin_amdgcn_classh:
9382     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
9383   case AMDGPU::BI__builtin_amdgcn_fmed3f:
9384   case AMDGPU::BI__builtin_amdgcn_fmed3h:
9385     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3);
9386   case AMDGPU::BI__builtin_amdgcn_read_exec: {
9387     CallInst *CI = cast<CallInst>(
9388       EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec"));
9389     CI->setConvergent();
9390     return CI;
9391   }
9392   case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
9393   case AMDGPU::BI__builtin_amdgcn_read_exec_hi: {
9394     StringRef RegName = BuiltinID == AMDGPU::BI__builtin_amdgcn_read_exec_lo ?
9395       "exec_lo" : "exec_hi";
9396     CallInst *CI = cast<CallInst>(
9397       EmitSpecialRegisterBuiltin(*this, E, Int32Ty, Int32Ty, true, RegName));
9398     CI->setConvergent();
9399     return CI;
9400   }
9401
9402   // amdgcn workitem
9403   case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
9404     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
9405   case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
9406     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
9407   case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
9408     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
9409
9410   // r600 intrinsics
9411   case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
9412   case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
9413     return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee);
9414   case AMDGPU::BI__builtin_r600_read_tidig_x:
9415     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
9416   case AMDGPU::BI__builtin_r600_read_tidig_y:
9417     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
9418   case AMDGPU::BI__builtin_r600_read_tidig_z:
9419     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
9420   default:
9421     return nullptr;
9422   }
9423 }
9424
9425 /// Handle a SystemZ function in which the final argument is a pointer
9426 /// to an int that receives the post-instruction CC value.  At the LLVM level
9427 /// this is represented as a function that returns a {result, cc} pair.
9428 static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF,
9429                                          unsigned IntrinsicID,
9430                                          const CallExpr *E) {
9431   unsigned NumArgs = E->getNumArgs() - 1;
9432   SmallVector<Value *, 8> Args(NumArgs);
9433   for (unsigned I = 0; I < NumArgs; ++I)
9434     Args[I] = CGF.EmitScalarExpr(E->getArg(I));
9435   Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
9436   Value *F = CGF.CGM.getIntrinsic(IntrinsicID);
9437   Value *Call = CGF.Builder.CreateCall(F, Args);
9438   Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
9439   CGF.Builder.CreateStore(CC, CCPtr);
9440   return CGF.Builder.CreateExtractValue(Call, 0);
9441 }
9442
9443 Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
9444                                                const CallExpr *E) {
9445   switch (BuiltinID) {
9446   case SystemZ::BI__builtin_tbegin: {
9447     Value *TDB = EmitScalarExpr(E->getArg(0));
9448     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
9449     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
9450     return Builder.CreateCall(F, {TDB, Control});
9451   }
9452   case SystemZ::BI__builtin_tbegin_nofloat: {
9453     Value *TDB = EmitScalarExpr(E->getArg(0));
9454     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
9455     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
9456     return Builder.CreateCall(F, {TDB, Control});
9457   }
9458   case SystemZ::BI__builtin_tbeginc: {
9459     Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
9460     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
9461     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
9462     return Builder.CreateCall(F, {TDB, Control});
9463   }
9464   case SystemZ::BI__builtin_tabort: {
9465     Value *Data = EmitScalarExpr(E->getArg(0));
9466     Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
9467     return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
9468   }
9469   case SystemZ::BI__builtin_non_tx_store: {
9470     Value *Address = EmitScalarExpr(E->getArg(0));
9471     Value *Data = EmitScalarExpr(E->getArg(1));
9472     Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
9473     return Builder.CreateCall(F, {Data, Address});
9474   }
9475
9476   // Vector builtins.  Note that most vector builtins are mapped automatically
9477   // to target-specific LLVM intrinsics.  The ones handled specially here can
9478   // be represented via standard LLVM IR, which is preferable to enable common
9479   // LLVM optimizations.
9480
9481   case SystemZ::BI__builtin_s390_vpopctb:
9482   case SystemZ::BI__builtin_s390_vpopcth:
9483   case SystemZ::BI__builtin_s390_vpopctf:
9484   case SystemZ::BI__builtin_s390_vpopctg: {
9485     llvm::Type *ResultType = ConvertType(E->getType());
9486     Value *X = EmitScalarExpr(E->getArg(0));
9487     Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
9488     return Builder.CreateCall(F, X);
9489   }
9490
9491   case SystemZ::BI__builtin_s390_vclzb:
9492   case SystemZ::BI__builtin_s390_vclzh:
9493   case SystemZ::BI__builtin_s390_vclzf:
9494   case SystemZ::BI__builtin_s390_vclzg: {
9495     llvm::Type *ResultType = ConvertType(E->getType());
9496     Value *X = EmitScalarExpr(E->getArg(0));
9497     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
9498     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
9499     return Builder.CreateCall(F, {X, Undef});
9500   }
9501
9502   case SystemZ::BI__builtin_s390_vctzb:
9503   case SystemZ::BI__builtin_s390_vctzh:
9504   case SystemZ::BI__builtin_s390_vctzf:
9505   case SystemZ::BI__builtin_s390_vctzg: {
9506     llvm::Type *ResultType = ConvertType(E->getType());
9507     Value *X = EmitScalarExpr(E->getArg(0));
9508     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
9509     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
9510     return Builder.CreateCall(F, {X, Undef});
9511   }
9512
9513   case SystemZ::BI__builtin_s390_vfsqsb:
9514   case SystemZ::BI__builtin_s390_vfsqdb: {
9515     llvm::Type *ResultType = ConvertType(E->getType());
9516     Value *X = EmitScalarExpr(E->getArg(0));
9517     Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
9518     return Builder.CreateCall(F, X);
9519   }
9520   case SystemZ::BI__builtin_s390_vfmasb:
9521   case SystemZ::BI__builtin_s390_vfmadb: {
9522     llvm::Type *ResultType = ConvertType(E->getType());
9523     Value *X = EmitScalarExpr(E->getArg(0));
9524     Value *Y = EmitScalarExpr(E->getArg(1));
9525     Value *Z = EmitScalarExpr(E->getArg(2));
9526     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
9527     return Builder.CreateCall(F, {X, Y, Z});
9528   }
9529   case SystemZ::BI__builtin_s390_vfmssb:
9530   case SystemZ::BI__builtin_s390_vfmsdb: {
9531     llvm::Type *ResultType = ConvertType(E->getType());
9532     Value *X = EmitScalarExpr(E->getArg(0));
9533     Value *Y = EmitScalarExpr(E->getArg(1));
9534     Value *Z = EmitScalarExpr(E->getArg(2));
9535     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
9536     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
9537     return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
9538   }
9539   case SystemZ::BI__builtin_s390_vfnmasb:
9540   case SystemZ::BI__builtin_s390_vfnmadb: {
9541     llvm::Type *ResultType = ConvertType(E->getType());
9542     Value *X = EmitScalarExpr(E->getArg(0));
9543     Value *Y = EmitScalarExpr(E->getArg(1));
9544     Value *Z = EmitScalarExpr(E->getArg(2));
9545     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
9546     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
9547     return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, Z}), "sub");
9548   }
9549   case SystemZ::BI__builtin_s390_vfnmssb:
9550   case SystemZ::BI__builtin_s390_vfnmsdb: {
9551     llvm::Type *ResultType = ConvertType(E->getType());
9552     Value *X = EmitScalarExpr(E->getArg(0));
9553     Value *Y = EmitScalarExpr(E->getArg(1));
9554     Value *Z = EmitScalarExpr(E->getArg(2));
9555     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
9556     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
9557     Value *NegZ = Builder.CreateFSub(Zero, Z, "sub");
9558     return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, NegZ}));
9559   }
9560   case SystemZ::BI__builtin_s390_vflpsb:
9561   case SystemZ::BI__builtin_s390_vflpdb: {
9562     llvm::Type *ResultType = ConvertType(E->getType());
9563     Value *X = EmitScalarExpr(E->getArg(0));
9564     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
9565     return Builder.CreateCall(F, X);
9566   }
9567   case SystemZ::BI__builtin_s390_vflnsb:
9568   case SystemZ::BI__builtin_s390_vflndb: {
9569     llvm::Type *ResultType = ConvertType(E->getType());
9570     Value *X = EmitScalarExpr(E->getArg(0));
9571     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
9572     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
9573     return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub");
9574   }
9575   case SystemZ::BI__builtin_s390_vfisb:
9576   case SystemZ::BI__builtin_s390_vfidb: {
9577     llvm::Type *ResultType = ConvertType(E->getType());
9578     Value *X = EmitScalarExpr(E->getArg(0));
9579     // Constant-fold the M4 and M5 mask arguments.
9580     llvm::APSInt M4, M5;
9581     bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext());
9582     bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext());
9583     assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?");
9584     (void)IsConstM4; (void)IsConstM5;
9585     // Check whether this instance can be represented via a LLVM standard
9586     // intrinsic.  We only support some combinations of M4 and M5.
9587     Intrinsic::ID ID = Intrinsic::not_intrinsic;
9588     switch (M4.getZExtValue()) {
9589     default: break;
9590     case 0:  // IEEE-inexact exception allowed
9591       switch (M5.getZExtValue()) {
9592       default: break;
9593       case 0: ID = Intrinsic::rint; break;
9594       }
9595       break;
9596     case 4:  // IEEE-inexact exception suppressed
9597       switch (M5.getZExtValue()) {
9598       default: break;
9599       case 0: ID = Intrinsic::nearbyint; break;
9600       case 1: ID = Intrinsic::round; break;
9601       case 5: ID = Intrinsic::trunc; break;
9602       case 6: ID = Intrinsic::ceil; break;
9603       case 7: ID = Intrinsic::floor; break;
9604       }
9605       break;
9606     }
9607     if (ID != Intrinsic::not_intrinsic) {
9608       Function *F = CGM.getIntrinsic(ID, ResultType);
9609       return Builder.CreateCall(F, X);
9610     }
9611     switch (BuiltinID) {
9612       case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;
9613       case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;
9614       default: llvm_unreachable("Unknown BuiltinID");
9615     }
9616     Function *F = CGM.getIntrinsic(ID);
9617     Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
9618     Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
9619     return Builder.CreateCall(F, {X, M4Value, M5Value});
9620   }
9621   case SystemZ::BI__builtin_s390_vfmaxsb:
9622   case SystemZ::BI__builtin_s390_vfmaxdb: {
9623     llvm::Type *ResultType = ConvertType(E->getType());
9624     Value *X = EmitScalarExpr(E->getArg(0));
9625     Value *Y = EmitScalarExpr(E->getArg(1));
9626     // Constant-fold the M4 mask argument.
9627     llvm::APSInt M4;
9628     bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext());
9629     assert(IsConstM4 && "Constant arg isn't actually constant?");
9630     (void)IsConstM4;
9631     // Check whether this instance can be represented via a LLVM standard
9632     // intrinsic.  We only support some values of M4.
9633     Intrinsic::ID ID = Intrinsic::not_intrinsic;
9634     switch (M4.getZExtValue()) {
9635     default: break;
9636     case 4: ID = Intrinsic::maxnum; break;
9637     }
9638     if (ID != Intrinsic::not_intrinsic) {
9639       Function *F = CGM.getIntrinsic(ID, ResultType);
9640       return Builder.CreateCall(F, {X, Y});
9641     }
9642     switch (BuiltinID) {
9643       case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;
9644       case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break;
9645       default: llvm_unreachable("Unknown BuiltinID");
9646     }
9647     Function *F = CGM.getIntrinsic(ID);
9648     Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
9649     return Builder.CreateCall(F, {X, Y, M4Value});
9650   }
9651   case SystemZ::BI__builtin_s390_vfminsb:
9652   case SystemZ::BI__builtin_s390_vfmindb: {
9653     llvm::Type *ResultType = ConvertType(E->getType());
9654     Value *X = EmitScalarExpr(E->getArg(0));
9655     Value *Y = EmitScalarExpr(E->getArg(1));
9656     // Constant-fold the M4 mask argument.
9657     llvm::APSInt M4;
9658     bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext());
9659     assert(IsConstM4 && "Constant arg isn't actually constant?");
9660     (void)IsConstM4;
9661     // Check whether this instance can be represented via a LLVM standard
9662     // intrinsic.  We only support some values of M4.
9663     Intrinsic::ID ID = Intrinsic::not_intrinsic;
9664     switch (M4.getZExtValue()) {
9665     default: break;
9666     case 4: ID = Intrinsic::minnum; break;
9667     }
9668     if (ID != Intrinsic::not_intrinsic) {
9669       Function *F = CGM.getIntrinsic(ID, ResultType);
9670       return Builder.CreateCall(F, {X, Y});
9671     }
9672     switch (BuiltinID) {
9673       case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;
9674       case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break;
9675       default: llvm_unreachable("Unknown BuiltinID");
9676     }
9677     Function *F = CGM.getIntrinsic(ID);
9678     Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
9679     return Builder.CreateCall(F, {X, Y, M4Value});
9680   }
9681
9682   // Vector intrisincs that output the post-instruction CC value.
9683
9684 #define INTRINSIC_WITH_CC(NAME) \
9685     case SystemZ::BI__builtin_##NAME: \
9686       return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
9687
9688   INTRINSIC_WITH_CC(s390_vpkshs);
9689   INTRINSIC_WITH_CC(s390_vpksfs);
9690   INTRINSIC_WITH_CC(s390_vpksgs);
9691
9692   INTRINSIC_WITH_CC(s390_vpklshs);
9693   INTRINSIC_WITH_CC(s390_vpklsfs);
9694   INTRINSIC_WITH_CC(s390_vpklsgs);
9695
9696   INTRINSIC_WITH_CC(s390_vceqbs);
9697   INTRINSIC_WITH_CC(s390_vceqhs);
9698   INTRINSIC_WITH_CC(s390_vceqfs);
9699   INTRINSIC_WITH_CC(s390_vceqgs);
9700
9701   INTRINSIC_WITH_CC(s390_vchbs);
9702   INTRINSIC_WITH_CC(s390_vchhs);
9703   INTRINSIC_WITH_CC(s390_vchfs);
9704   INTRINSIC_WITH_CC(s390_vchgs);
9705
9706   INTRINSIC_WITH_CC(s390_vchlbs);
9707   INTRINSIC_WITH_CC(s390_vchlhs);
9708   INTRINSIC_WITH_CC(s390_vchlfs);
9709   INTRINSIC_WITH_CC(s390_vchlgs);
9710
9711   INTRINSIC_WITH_CC(s390_vfaebs);
9712   INTRINSIC_WITH_CC(s390_vfaehs);
9713   INTRINSIC_WITH_CC(s390_vfaefs);
9714
9715   INTRINSIC_WITH_CC(s390_vfaezbs);
9716   INTRINSIC_WITH_CC(s390_vfaezhs);
9717   INTRINSIC_WITH_CC(s390_vfaezfs);
9718
9719   INTRINSIC_WITH_CC(s390_vfeebs);
9720   INTRINSIC_WITH_CC(s390_vfeehs);
9721   INTRINSIC_WITH_CC(s390_vfeefs);
9722
9723   INTRINSIC_WITH_CC(s390_vfeezbs);
9724   INTRINSIC_WITH_CC(s390_vfeezhs);
9725   INTRINSIC_WITH_CC(s390_vfeezfs);
9726
9727   INTRINSIC_WITH_CC(s390_vfenebs);
9728   INTRINSIC_WITH_CC(s390_vfenehs);
9729   INTRINSIC_WITH_CC(s390_vfenefs);
9730
9731   INTRINSIC_WITH_CC(s390_vfenezbs);
9732   INTRINSIC_WITH_CC(s390_vfenezhs);
9733   INTRINSIC_WITH_CC(s390_vfenezfs);
9734
9735   INTRINSIC_WITH_CC(s390_vistrbs);
9736   INTRINSIC_WITH_CC(s390_vistrhs);
9737   INTRINSIC_WITH_CC(s390_vistrfs);
9738
9739   INTRINSIC_WITH_CC(s390_vstrcbs);
9740   INTRINSIC_WITH_CC(s390_vstrchs);
9741   INTRINSIC_WITH_CC(s390_vstrcfs);
9742
9743   INTRINSIC_WITH_CC(s390_vstrczbs);
9744   INTRINSIC_WITH_CC(s390_vstrczhs);
9745   INTRINSIC_WITH_CC(s390_vstrczfs);
9746
9747   INTRINSIC_WITH_CC(s390_vfcesbs);
9748   INTRINSIC_WITH_CC(s390_vfcedbs);
9749   INTRINSIC_WITH_CC(s390_vfchsbs);
9750   INTRINSIC_WITH_CC(s390_vfchdbs);
9751   INTRINSIC_WITH_CC(s390_vfchesbs);
9752   INTRINSIC_WITH_CC(s390_vfchedbs);
9753
9754   INTRINSIC_WITH_CC(s390_vftcisb);
9755   INTRINSIC_WITH_CC(s390_vftcidb);
9756
9757 #undef INTRINSIC_WITH_CC
9758
9759   default:
9760     return nullptr;
9761   }
9762 }
9763
9764 Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
9765                                              const CallExpr *E) {
9766   auto MakeLdg = [&](unsigned IntrinsicID) {
9767     Value *Ptr = EmitScalarExpr(E->getArg(0));
9768     clang::CharUnits Align =
9769         getNaturalPointeeTypeAlignment(E->getArg(0)->getType());
9770     return Builder.CreateCall(
9771         CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
9772                                        Ptr->getType()}),
9773         {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())});
9774   };
9775   auto MakeScopedAtomic = [&](unsigned IntrinsicID) {
9776     Value *Ptr = EmitScalarExpr(E->getArg(0));
9777     return Builder.CreateCall(
9778         CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
9779                                        Ptr->getType()}),
9780         {Ptr, EmitScalarExpr(E->getArg(1))});
9781   };
9782   switch (BuiltinID) {
9783   case NVPTX::BI__nvvm_atom_add_gen_i:
9784   case NVPTX::BI__nvvm_atom_add_gen_l:
9785   case NVPTX::BI__nvvm_atom_add_gen_ll:
9786     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
9787
9788   case NVPTX::BI__nvvm_atom_sub_gen_i:
9789   case NVPTX::BI__nvvm_atom_sub_gen_l:
9790   case NVPTX::BI__nvvm_atom_sub_gen_ll:
9791     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
9792
9793   case NVPTX::BI__nvvm_atom_and_gen_i:
9794   case NVPTX::BI__nvvm_atom_and_gen_l:
9795   case NVPTX::BI__nvvm_atom_and_gen_ll:
9796     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
9797
9798   case NVPTX::BI__nvvm_atom_or_gen_i:
9799   case NVPTX::BI__nvvm_atom_or_gen_l:
9800   case NVPTX::BI__nvvm_atom_or_gen_ll:
9801     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
9802
9803   case NVPTX::BI__nvvm_atom_xor_gen_i:
9804   case NVPTX::BI__nvvm_atom_xor_gen_l:
9805   case NVPTX::BI__nvvm_atom_xor_gen_ll:
9806     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
9807
9808   case NVPTX::BI__nvvm_atom_xchg_gen_i:
9809   case NVPTX::BI__nvvm_atom_xchg_gen_l:
9810   case NVPTX::BI__nvvm_atom_xchg_gen_ll:
9811     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
9812
9813   case NVPTX::BI__nvvm_atom_max_gen_i:
9814   case NVPTX::BI__nvvm_atom_max_gen_l:
9815   case NVPTX::BI__nvvm_atom_max_gen_ll:
9816     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
9817
9818   case NVPTX::BI__nvvm_atom_max_gen_ui:
9819   case NVPTX::BI__nvvm_atom_max_gen_ul:
9820   case NVPTX::BI__nvvm_atom_max_gen_ull:
9821     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
9822
9823   case NVPTX::BI__nvvm_atom_min_gen_i:
9824   case NVPTX::BI__nvvm_atom_min_gen_l:
9825   case NVPTX::BI__nvvm_atom_min_gen_ll:
9826     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
9827
9828   case NVPTX::BI__nvvm_atom_min_gen_ui:
9829   case NVPTX::BI__nvvm_atom_min_gen_ul:
9830   case NVPTX::BI__nvvm_atom_min_gen_ull:
9831     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
9832
9833   case NVPTX::BI__nvvm_atom_cas_gen_i:
9834   case NVPTX::BI__nvvm_atom_cas_gen_l:
9835   case NVPTX::BI__nvvm_atom_cas_gen_ll:
9836     // __nvvm_atom_cas_gen_* should return the old value rather than the
9837     // success flag.
9838     return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
9839
9840   case NVPTX::BI__nvvm_atom_add_gen_f: {
9841     Value *Ptr = EmitScalarExpr(E->getArg(0));
9842     Value *Val = EmitScalarExpr(E->getArg(1));
9843     // atomicrmw only deals with integer arguments so we need to use
9844     // LLVM's nvvm_atomic_load_add_f32 intrinsic for that.
9845     Value *FnALAF32 =
9846         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType());
9847     return Builder.CreateCall(FnALAF32, {Ptr, Val});
9848   }
9849
9850   case NVPTX::BI__nvvm_atom_add_gen_d: {
9851     Value *Ptr = EmitScalarExpr(E->getArg(0));
9852     Value *Val = EmitScalarExpr(E->getArg(1));
9853     // atomicrmw only deals with integer arguments, so we need to use
9854     // LLVM's nvvm_atomic_load_add_f64 intrinsic.
9855     Value *FnALAF64 =
9856         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f64, Ptr->getType());
9857     return Builder.CreateCall(FnALAF64, {Ptr, Val});
9858   }
9859
9860   case NVPTX::BI__nvvm_atom_inc_gen_ui: {
9861     Value *Ptr = EmitScalarExpr(E->getArg(0));
9862     Value *Val = EmitScalarExpr(E->getArg(1));
9863     Value *FnALI32 =
9864         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
9865     return Builder.CreateCall(FnALI32, {Ptr, Val});
9866   }
9867
9868   case NVPTX::BI__nvvm_atom_dec_gen_ui: {
9869     Value *Ptr = EmitScalarExpr(E->getArg(0));
9870     Value *Val = EmitScalarExpr(E->getArg(1));
9871     Value *FnALD32 =
9872         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
9873     return Builder.CreateCall(FnALD32, {Ptr, Val});
9874   }
9875
9876   case NVPTX::BI__nvvm_ldg_c:
9877   case NVPTX::BI__nvvm_ldg_c2:
9878   case NVPTX::BI__nvvm_ldg_c4:
9879   case NVPTX::BI__nvvm_ldg_s:
9880   case NVPTX::BI__nvvm_ldg_s2:
9881   case NVPTX::BI__nvvm_ldg_s4:
9882   case NVPTX::BI__nvvm_ldg_i:
9883   case NVPTX::BI__nvvm_ldg_i2:
9884   case NVPTX::BI__nvvm_ldg_i4:
9885   case NVPTX::BI__nvvm_ldg_l:
9886   case NVPTX::BI__nvvm_ldg_ll:
9887   case NVPTX::BI__nvvm_ldg_ll2:
9888   case NVPTX::BI__nvvm_ldg_uc:
9889   case NVPTX::BI__nvvm_ldg_uc2:
9890   case NVPTX::BI__nvvm_ldg_uc4:
9891   case NVPTX::BI__nvvm_ldg_us:
9892   case NVPTX::BI__nvvm_ldg_us2:
9893   case NVPTX::BI__nvvm_ldg_us4:
9894   case NVPTX::BI__nvvm_ldg_ui:
9895   case NVPTX::BI__nvvm_ldg_ui2:
9896   case NVPTX::BI__nvvm_ldg_ui4:
9897   case NVPTX::BI__nvvm_ldg_ul:
9898   case NVPTX::BI__nvvm_ldg_ull:
9899   case NVPTX::BI__nvvm_ldg_ull2:
9900     // PTX Interoperability section 2.2: "For a vector with an even number of
9901     // elements, its alignment is set to number of elements times the alignment
9902     // of its member: n*alignof(t)."
9903     return MakeLdg(Intrinsic::nvvm_ldg_global_i);
9904   case NVPTX::BI__nvvm_ldg_f:
9905   case NVPTX::BI__nvvm_ldg_f2:
9906   case NVPTX::BI__nvvm_ldg_f4:
9907   case NVPTX::BI__nvvm_ldg_d:
9908   case NVPTX::BI__nvvm_ldg_d2:
9909     return MakeLdg(Intrinsic::nvvm_ldg_global_f);
9910
9911   case NVPTX::BI__nvvm_atom_cta_add_gen_i:
9912   case NVPTX::BI__nvvm_atom_cta_add_gen_l:
9913   case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
9914     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta);
9915   case NVPTX::BI__nvvm_atom_sys_add_gen_i:
9916   case NVPTX::BI__nvvm_atom_sys_add_gen_l:
9917   case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
9918     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys);
9919   case NVPTX::BI__nvvm_atom_cta_add_gen_f:
9920   case NVPTX::BI__nvvm_atom_cta_add_gen_d:
9921     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta);
9922   case NVPTX::BI__nvvm_atom_sys_add_gen_f:
9923   case NVPTX::BI__nvvm_atom_sys_add_gen_d:
9924     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys);
9925   case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
9926   case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
9927   case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
9928     return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta);
9929   case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
9930   case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
9931   case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
9932     return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys);
9933   case NVPTX::BI__nvvm_atom_cta_max_gen_i:
9934   case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
9935   case NVPTX::BI__nvvm_atom_cta_max_gen_l:
9936   case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
9937   case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
9938   case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
9939     return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta);
9940   case NVPTX::BI__nvvm_atom_sys_max_gen_i:
9941   case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
9942   case NVPTX::BI__nvvm_atom_sys_max_gen_l:
9943   case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
9944   case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
9945   case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
9946     return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys);
9947   case NVPTX::BI__nvvm_atom_cta_min_gen_i:
9948   case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
9949   case NVPTX::BI__nvvm_atom_cta_min_gen_l:
9950   case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
9951   case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
9952   case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
9953     return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta);
9954   case NVPTX::BI__nvvm_atom_sys_min_gen_i:
9955   case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
9956   case NVPTX::BI__nvvm_atom_sys_min_gen_l:
9957   case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
9958   case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
9959   case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
9960     return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys);
9961   case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
9962     return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta);
9963   case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
9964     return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta);
9965   case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
9966     return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys);
9967   case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
9968     return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys);
9969   case NVPTX::BI__nvvm_atom_cta_and_gen_i:
9970   case NVPTX::BI__nvvm_atom_cta_and_gen_l:
9971   case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
9972     return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta);
9973   case NVPTX::BI__nvvm_atom_sys_and_gen_i:
9974   case NVPTX::BI__nvvm_atom_sys_and_gen_l:
9975   case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
9976     return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys);
9977   case NVPTX::BI__nvvm_atom_cta_or_gen_i:
9978   case NVPTX::BI__nvvm_atom_cta_or_gen_l:
9979   case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
9980     return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta);
9981   case NVPTX::BI__nvvm_atom_sys_or_gen_i:
9982   case NVPTX::BI__nvvm_atom_sys_or_gen_l:
9983   case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
9984     return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys);
9985   case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
9986   case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
9987   case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
9988     return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta);
9989   case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
9990   case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
9991   case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
9992     return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys);
9993   case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
9994   case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
9995   case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
9996     Value *Ptr = EmitScalarExpr(E->getArg(0));
9997     return Builder.CreateCall(
9998         CGM.getIntrinsic(
9999             Intrinsic::nvvm_atomic_cas_gen_i_cta,
10000             {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
10001         {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
10002   }
10003   case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
10004   case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
10005   case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
10006     Value *Ptr = EmitScalarExpr(E->getArg(0));
10007     return Builder.CreateCall(
10008         CGM.getIntrinsic(
10009             Intrinsic::nvvm_atomic_cas_gen_i_sys,
10010             {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
10011         {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
10012   }
10013   case NVPTX::BI__nvvm_match_all_sync_i32p:
10014   case NVPTX::BI__nvvm_match_all_sync_i64p: {
10015     Value *Mask = EmitScalarExpr(E->getArg(0));
10016     Value *Val = EmitScalarExpr(E->getArg(1));
10017     Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2));
10018     Value *ResultPair = Builder.CreateCall(
10019         CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p
10020                              ? Intrinsic::nvvm_match_all_sync_i32p
10021                              : Intrinsic::nvvm_match_all_sync_i64p),
10022         {Mask, Val});
10023     Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1),
10024                                      PredOutPtr.getElementType());
10025     Builder.CreateStore(Pred, PredOutPtr);
10026     return Builder.CreateExtractValue(ResultPair, 0);
10027   }
10028   case NVPTX::BI__hmma_m16n16k16_ld_a:
10029   case NVPTX::BI__hmma_m16n16k16_ld_b:
10030   case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
10031   case NVPTX::BI__hmma_m16n16k16_ld_c_f32: {
10032     Address Dst = EmitPointerWithAlignment(E->getArg(0));
10033     Value *Src = EmitScalarExpr(E->getArg(1));
10034     Value *Ldm = EmitScalarExpr(E->getArg(2));
10035     llvm::APSInt isColMajorArg;
10036     if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext()))
10037       return nullptr;
10038     bool isColMajor = isColMajorArg.getSExtValue();
10039     unsigned IID;
10040     unsigned NumResults;
10041     switch (BuiltinID) {
10042     case NVPTX::BI__hmma_m16n16k16_ld_a:
10043       IID = isColMajor ? Intrinsic::nvvm_wmma_load_a_f16_col_stride
10044                        : Intrinsic::nvvm_wmma_load_a_f16_row_stride;
10045       NumResults = 8;
10046       break;
10047     case NVPTX::BI__hmma_m16n16k16_ld_b:
10048       IID = isColMajor ? Intrinsic::nvvm_wmma_load_b_f16_col_stride
10049                        : Intrinsic::nvvm_wmma_load_b_f16_row_stride;
10050       NumResults = 8;
10051       break;
10052     case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
10053       IID = isColMajor ? Intrinsic::nvvm_wmma_load_c_f16_col_stride
10054                        : Intrinsic::nvvm_wmma_load_c_f16_row_stride;
10055       NumResults = 4;
10056       break;
10057     case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
10058       IID = isColMajor ? Intrinsic::nvvm_wmma_load_c_f32_col_stride
10059                        : Intrinsic::nvvm_wmma_load_c_f32_row_stride;
10060       NumResults = 8;
10061       break;
10062     default:
10063       llvm_unreachable("Unexpected builtin ID.");
10064     }
10065     Value *Result =
10066         Builder.CreateCall(CGM.getIntrinsic(IID),
10067                            {Builder.CreatePointerCast(Src, VoidPtrTy), Ldm});
10068
10069     // Save returned values.
10070     for (unsigned i = 0; i < NumResults; ++i) {
10071       Builder.CreateAlignedStore(
10072           Builder.CreateBitCast(Builder.CreateExtractValue(Result, i),
10073                                 Dst.getElementType()),
10074           Builder.CreateGEP(Dst.getPointer(), llvm::ConstantInt::get(IntTy, i)),
10075           CharUnits::fromQuantity(4));
10076     }
10077     return Result;
10078   }
10079
10080   case NVPTX::BI__hmma_m16n16k16_st_c_f16:
10081   case NVPTX::BI__hmma_m16n16k16_st_c_f32: {
10082     Value *Dst = EmitScalarExpr(E->getArg(0));
10083     Address Src = EmitPointerWithAlignment(E->getArg(1));
10084     Value *Ldm = EmitScalarExpr(E->getArg(2));
10085     llvm::APSInt isColMajorArg;
10086     if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext()))
10087       return nullptr;
10088     bool isColMajor = isColMajorArg.getSExtValue();
10089     unsigned IID;
10090     unsigned NumResults = 8;
10091     // PTX Instructions (and LLVM instrinsics) are defined for slice _d_, yet
10092     // for some reason nvcc builtins use _c_.
10093     switch (BuiltinID) {
10094     case NVPTX::BI__hmma_m16n16k16_st_c_f16:
10095       IID = isColMajor ? Intrinsic::nvvm_wmma_store_d_f16_col_stride
10096                        : Intrinsic::nvvm_wmma_store_d_f16_row_stride;
10097       NumResults = 4;
10098       break;
10099     case NVPTX::BI__hmma_m16n16k16_st_c_f32:
10100       IID = isColMajor ? Intrinsic::nvvm_wmma_store_d_f32_col_stride
10101                        : Intrinsic::nvvm_wmma_store_d_f32_row_stride;
10102       break;
10103     default:
10104       llvm_unreachable("Unexpected builtin ID.");
10105     }
10106     Function *Intrinsic = CGM.getIntrinsic(IID);
10107     llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);
10108     SmallVector<Value *, 10> Values;
10109     Values.push_back(Builder.CreatePointerCast(Dst, VoidPtrTy));
10110     for (unsigned i = 0; i < NumResults; ++i) {
10111       Value *V = Builder.CreateAlignedLoad(
10112           Builder.CreateGEP(Src.getPointer(), llvm::ConstantInt::get(IntTy, i)),
10113           CharUnits::fromQuantity(4));
10114       Values.push_back(Builder.CreateBitCast(V, ParamType));
10115     }
10116     Values.push_back(Ldm);
10117     Value *Result = Builder.CreateCall(Intrinsic, Values);
10118     return Result;
10119   }
10120
10121   // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf)
10122   //  --> Intrinsic::nvvm_wmma_mma_sync<layout A,B><DType><CType><Satf>
10123   case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
10124   case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
10125   case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
10126   case NVPTX::BI__hmma_m16n16k16_mma_f16f32: {
10127     Address Dst = EmitPointerWithAlignment(E->getArg(0));
10128     Address SrcA = EmitPointerWithAlignment(E->getArg(1));
10129     Address SrcB = EmitPointerWithAlignment(E->getArg(2));
10130     Address SrcC = EmitPointerWithAlignment(E->getArg(3));
10131     llvm::APSInt LayoutArg;
10132     if (!E->getArg(4)->isIntegerConstantExpr(LayoutArg, getContext()))
10133       return nullptr;
10134     int Layout = LayoutArg.getSExtValue();
10135     if (Layout < 0 || Layout > 3)
10136       return nullptr;
10137     llvm::APSInt SatfArg;
10138     if (!E->getArg(5)->isIntegerConstantExpr(SatfArg, getContext()))
10139       return nullptr;
10140     bool Satf = SatfArg.getSExtValue();
10141
10142     // clang-format off
10143 #define MMA_VARIANTS(type) {{                                   \
10144       Intrinsic::nvvm_wmma_mma_sync_row_row_##type,             \
10145       Intrinsic::nvvm_wmma_mma_sync_row_row_##type##_satfinite, \
10146       Intrinsic::nvvm_wmma_mma_sync_row_col_##type,             \
10147       Intrinsic::nvvm_wmma_mma_sync_row_col_##type##_satfinite, \
10148       Intrinsic::nvvm_wmma_mma_sync_col_row_##type,             \
10149       Intrinsic::nvvm_wmma_mma_sync_col_row_##type##_satfinite, \
10150       Intrinsic::nvvm_wmma_mma_sync_col_col_##type,             \
10151       Intrinsic::nvvm_wmma_mma_sync_col_col_##type##_satfinite  \
10152     }}
10153     // clang-format on
10154
10155     auto getMMAIntrinsic = [Layout, Satf](std::array<unsigned, 8> Variants) {
10156       unsigned Index = Layout * 2 + Satf;
10157       assert(Index < 8);
10158       return Variants[Index];
10159     };
10160     unsigned IID;
10161     unsigned NumEltsC;
10162     unsigned NumEltsD;
10163     switch (BuiltinID) {
10164     case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
10165       IID = getMMAIntrinsic(MMA_VARIANTS(f16_f16));
10166       NumEltsC = 4;
10167       NumEltsD = 4;
10168       break;
10169     case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
10170       IID = getMMAIntrinsic(MMA_VARIANTS(f32_f16));
10171       NumEltsC = 4;
10172       NumEltsD = 8;
10173       break;
10174     case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
10175       IID = getMMAIntrinsic(MMA_VARIANTS(f16_f32));
10176       NumEltsC = 8;
10177       NumEltsD = 4;
10178       break;
10179     case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
10180       IID = getMMAIntrinsic(MMA_VARIANTS(f32_f32));
10181       NumEltsC = 8;
10182       NumEltsD = 8;
10183       break;
10184     default:
10185       llvm_unreachable("Unexpected builtin ID.");
10186     }
10187 #undef MMA_VARIANTS
10188
10189     SmallVector<Value *, 24> Values;
10190     Function *Intrinsic = CGM.getIntrinsic(IID);
10191     llvm::Type *ABType = Intrinsic->getFunctionType()->getParamType(0);
10192     // Load A
10193     for (unsigned i = 0; i < 8; ++i) {
10194       Value *V = Builder.CreateAlignedLoad(
10195           Builder.CreateGEP(SrcA.getPointer(),
10196                             llvm::ConstantInt::get(IntTy, i)),
10197           CharUnits::fromQuantity(4));
10198       Values.push_back(Builder.CreateBitCast(V, ABType));
10199     }
10200     // Load B
10201     for (unsigned i = 0; i < 8; ++i) {
10202       Value *V = Builder.CreateAlignedLoad(
10203           Builder.CreateGEP(SrcB.getPointer(),
10204                             llvm::ConstantInt::get(IntTy, i)),
10205           CharUnits::fromQuantity(4));
10206       Values.push_back(Builder.CreateBitCast(V, ABType));
10207     }
10208     // Load C
10209     llvm::Type *CType = Intrinsic->getFunctionType()->getParamType(16);
10210     for (unsigned i = 0; i < NumEltsC; ++i) {
10211       Value *V = Builder.CreateAlignedLoad(
10212           Builder.CreateGEP(SrcC.getPointer(),
10213                             llvm::ConstantInt::get(IntTy, i)),
10214           CharUnits::fromQuantity(4));
10215       Values.push_back(Builder.CreateBitCast(V, CType));
10216     }
10217     Value *Result = Builder.CreateCall(Intrinsic, Values);
10218     llvm::Type *DType = Dst.getElementType();
10219     for (unsigned i = 0; i < NumEltsD; ++i)
10220       Builder.CreateAlignedStore(
10221           Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType),
10222           Builder.CreateGEP(Dst.getPointer(), llvm::ConstantInt::get(IntTy, i)),
10223           CharUnits::fromQuantity(4));
10224     return Result;
10225   }
10226   default:
10227     return nullptr;
10228   }
10229 }
10230
10231 Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
10232                                                    const CallExpr *E) {
10233   switch (BuiltinID) {
10234   case WebAssembly::BI__builtin_wasm_current_memory: {
10235     llvm::Type *ResultType = ConvertType(E->getType());
10236     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType);
10237     return Builder.CreateCall(Callee);
10238   }
10239   case WebAssembly::BI__builtin_wasm_grow_memory: {
10240     Value *X = EmitScalarExpr(E->getArg(0));
10241     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType());
10242     return Builder.CreateCall(Callee, X);
10243   }
10244   case WebAssembly::BI__builtin_wasm_throw: {
10245     Value *Tag = EmitScalarExpr(E->getArg(0));
10246     Value *Obj = EmitScalarExpr(E->getArg(1));
10247     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw);
10248     return Builder.CreateCall(Callee, {Tag, Obj});
10249   }
10250   case WebAssembly::BI__builtin_wasm_rethrow: {
10251     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow);
10252     return Builder.CreateCall(Callee);
10253   }
10254
10255   default:
10256     return nullptr;
10257   }
10258 }
10259
10260 Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID,
10261                                                const CallExpr *E) {
10262   SmallVector<llvm::Value *, 4> Ops;
10263   Intrinsic::ID ID = Intrinsic::not_intrinsic;
10264
10265   switch (BuiltinID) {
10266   case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry:
10267   case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B: {
10268     Address Dest = EmitPointerWithAlignment(E->getArg(2));
10269     unsigned Size;
10270     if (BuiltinID == Hexagon::BI__builtin_HEXAGON_V6_vaddcarry) {
10271       Size = 512;
10272       ID = Intrinsic::hexagon_V6_vaddcarry;
10273     } else {
10274       Size = 1024;
10275       ID = Intrinsic::hexagon_V6_vaddcarry_128B;
10276     }
10277     Dest = Builder.CreateBitCast(Dest,
10278         llvm::VectorType::get(Builder.getInt1Ty(), Size)->getPointerTo(0));
10279     LoadInst *QLd = Builder.CreateLoad(Dest);
10280     Ops = { EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), QLd };
10281     llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
10282     llvm::Value *Vprd = Builder.CreateExtractValue(Result, 1);
10283     llvm::Value *Base = Builder.CreateBitCast(EmitScalarExpr(E->getArg(2)),
10284                                               Vprd->getType()->getPointerTo(0));
10285     Builder.CreateAlignedStore(Vprd, Base, Dest.getAlignment());
10286     return Builder.CreateExtractValue(Result, 0);
10287   }
10288   case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry:
10289   case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: {
10290     Address Dest = EmitPointerWithAlignment(E->getArg(2));
10291     unsigned Size;
10292     if (BuiltinID == Hexagon::BI__builtin_HEXAGON_V6_vsubcarry) {
10293       Size = 512;
10294       ID = Intrinsic::hexagon_V6_vsubcarry;
10295     } else {
10296       Size = 1024;
10297       ID = Intrinsic::hexagon_V6_vsubcarry_128B;
10298     }
10299     Dest = Builder.CreateBitCast(Dest,
10300         llvm::VectorType::get(Builder.getInt1Ty(), Size)->getPointerTo(0));
10301     LoadInst *QLd = Builder.CreateLoad(Dest);
10302     Ops = { EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), QLd };
10303     llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
10304     llvm::Value *Vprd = Builder.CreateExtractValue(Result, 1);
10305     llvm::Value *Base = Builder.CreateBitCast(EmitScalarExpr(E->getArg(2)),
10306                                               Vprd->getType()->getPointerTo(0));
10307     Builder.CreateAlignedStore(Vprd, Base, Dest.getAlignment());
10308     return Builder.CreateExtractValue(Result, 0);
10309   }
10310   } // switch
10311
10312   return nullptr;
10313 }