]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp
Merge ^/head r320042 through r320397.
[FreeBSD/FreeBSD.git] / contrib / llvm / tools / clang / lib / CodeGen / CGBuiltin.cpp
1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit Builtin calls as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "CGCXXABI.h"
15 #include "CGObjCRuntime.h"
16 #include "CGOpenCLRuntime.h"
17 #include "CodeGenFunction.h"
18 #include "CodeGenModule.h"
19 #include "TargetInfo.h"
20 #include "clang/AST/ASTContext.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/Analysis/Analyses/OSLog.h"
23 #include "clang/Basic/TargetBuiltins.h"
24 #include "clang/Basic/TargetInfo.h"
25 #include "clang/CodeGen/CGFunctionInfo.h"
26 #include "llvm/ADT/StringExtras.h"
27 #include "llvm/IR/CallSite.h"
28 #include "llvm/IR/DataLayout.h"
29 #include "llvm/IR/InlineAsm.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/IR/MDBuilder.h"
32 #include <sstream>
33
34 using namespace clang;
35 using namespace CodeGen;
36 using namespace llvm;
37
38 static
39 int64_t clamp(int64_t Value, int64_t Low, int64_t High) {
40   return std::min(High, std::max(Low, Value));
41 }
42
43 /// getBuiltinLibFunction - Given a builtin id for a function like
44 /// "__builtin_fabsf", return a Function* for "fabsf".
45 llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
46                                                      unsigned BuiltinID) {
47   assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
48
49   // Get the name, skip over the __builtin_ prefix (if necessary).
50   StringRef Name;
51   GlobalDecl D(FD);
52
53   // If the builtin has been declared explicitly with an assembler label,
54   // use the mangled name. This differs from the plain label on platforms
55   // that prefix labels.
56   if (FD->hasAttr<AsmLabelAttr>())
57     Name = getMangledName(D);
58   else
59     Name = Context.BuiltinInfo.getName(BuiltinID) + 10;
60
61   llvm::FunctionType *Ty =
62     cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
63
64   return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
65 }
66
67 /// Emit the conversions required to turn the given value into an
68 /// integer of the given size.
69 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
70                         QualType T, llvm::IntegerType *IntType) {
71   V = CGF.EmitToMemory(V, T);
72
73   if (V->getType()->isPointerTy())
74     return CGF.Builder.CreatePtrToInt(V, IntType);
75
76   assert(V->getType() == IntType);
77   return V;
78 }
79
80 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
81                           QualType T, llvm::Type *ResultType) {
82   V = CGF.EmitFromMemory(V, T);
83
84   if (ResultType->isPointerTy())
85     return CGF.Builder.CreateIntToPtr(V, ResultType);
86
87   assert(V->getType() == ResultType);
88   return V;
89 }
90
91 /// Utility to insert an atomic instruction based on Instrinsic::ID
92 /// and the expression node.
93 static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF,
94                                     llvm::AtomicRMWInst::BinOp Kind,
95                                     const CallExpr *E) {
96   QualType T = E->getType();
97   assert(E->getArg(0)->getType()->isPointerType());
98   assert(CGF.getContext().hasSameUnqualifiedType(T,
99                                   E->getArg(0)->getType()->getPointeeType()));
100   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
101
102   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
103   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
104
105   llvm::IntegerType *IntType =
106     llvm::IntegerType::get(CGF.getLLVMContext(),
107                            CGF.getContext().getTypeSize(T));
108   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
109
110   llvm::Value *Args[2];
111   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
112   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
113   llvm::Type *ValueType = Args[1]->getType();
114   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
115
116   llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
117       Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
118   return EmitFromInt(CGF, Result, T, ValueType);
119 }
120
121 static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
122   Value *Val = CGF.EmitScalarExpr(E->getArg(0));
123   Value *Address = CGF.EmitScalarExpr(E->getArg(1));
124
125   // Convert the type of the pointer to a pointer to the stored type.
126   Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
127   Value *BC = CGF.Builder.CreateBitCast(
128       Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
129   LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
130   LV.setNontemporal(true);
131   CGF.EmitStoreOfScalar(Val, LV, false);
132   return nullptr;
133 }
134
135 static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
136   Value *Address = CGF.EmitScalarExpr(E->getArg(0));
137
138   LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
139   LV.setNontemporal(true);
140   return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
141 }
142
143 static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
144                                llvm::AtomicRMWInst::BinOp Kind,
145                                const CallExpr *E) {
146   return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
147 }
148
149 /// Utility to insert an atomic instruction based Instrinsic::ID and
150 /// the expression node, where the return value is the result of the
151 /// operation.
152 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
153                                    llvm::AtomicRMWInst::BinOp Kind,
154                                    const CallExpr *E,
155                                    Instruction::BinaryOps Op,
156                                    bool Invert = false) {
157   QualType T = E->getType();
158   assert(E->getArg(0)->getType()->isPointerType());
159   assert(CGF.getContext().hasSameUnqualifiedType(T,
160                                   E->getArg(0)->getType()->getPointeeType()));
161   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
162
163   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
164   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
165
166   llvm::IntegerType *IntType =
167     llvm::IntegerType::get(CGF.getLLVMContext(),
168                            CGF.getContext().getTypeSize(T));
169   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
170
171   llvm::Value *Args[2];
172   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
173   llvm::Type *ValueType = Args[1]->getType();
174   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
175   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
176
177   llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
178       Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
179   Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
180   if (Invert)
181     Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
182                                      llvm::ConstantInt::get(IntType, -1));
183   Result = EmitFromInt(CGF, Result, T, ValueType);
184   return RValue::get(Result);
185 }
186
187 /// @brief Utility to insert an atomic cmpxchg instruction.
188 ///
189 /// @param CGF The current codegen function.
190 /// @param E   Builtin call expression to convert to cmpxchg.
191 ///            arg0 - address to operate on
192 ///            arg1 - value to compare with
193 ///            arg2 - new value
194 /// @param ReturnBool Specifies whether to return success flag of
195 ///                   cmpxchg result or the old value.
196 ///
197 /// @returns result of cmpxchg, according to ReturnBool
198 static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
199                                      bool ReturnBool) {
200   QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
201   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
202   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
203
204   llvm::IntegerType *IntType = llvm::IntegerType::get(
205       CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
206   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
207
208   Value *Args[3];
209   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
210   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
211   llvm::Type *ValueType = Args[1]->getType();
212   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
213   Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
214
215   Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
216       Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
217       llvm::AtomicOrdering::SequentiallyConsistent);
218   if (ReturnBool)
219     // Extract boolean success flag and zext it to int.
220     return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
221                                   CGF.ConvertType(E->getType()));
222   else
223     // Extract old value and emit it using the same type as compare value.
224     return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
225                        ValueType);
226 }
227
228 // Emit a simple mangled intrinsic that has 1 argument and a return type
229 // matching the argument type.
230 static Value *emitUnaryBuiltin(CodeGenFunction &CGF,
231                                const CallExpr *E,
232                                unsigned IntrinsicID) {
233   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
234
235   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
236   return CGF.Builder.CreateCall(F, Src0);
237 }
238
239 // Emit an intrinsic that has 2 operands of the same type as its result.
240 static Value *emitBinaryBuiltin(CodeGenFunction &CGF,
241                                 const CallExpr *E,
242                                 unsigned IntrinsicID) {
243   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
244   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
245
246   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
247   return CGF.Builder.CreateCall(F, { Src0, Src1 });
248 }
249
250 // Emit an intrinsic that has 3 operands of the same type as its result.
251 static Value *emitTernaryBuiltin(CodeGenFunction &CGF,
252                                  const CallExpr *E,
253                                  unsigned IntrinsicID) {
254   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
255   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
256   llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
257
258   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
259   return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
260 }
261
262 // Emit an intrinsic that has 1 float or double operand, and 1 integer.
263 static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
264                                const CallExpr *E,
265                                unsigned IntrinsicID) {
266   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
267   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
268
269   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
270   return CGF.Builder.CreateCall(F, {Src0, Src1});
271 }
272
273 /// EmitFAbs - Emit a call to @llvm.fabs().
274 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
275   Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
276   llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
277   Call->setDoesNotAccessMemory();
278   return Call;
279 }
280
281 /// Emit the computation of the sign bit for a floating point value. Returns
282 /// the i1 sign bit value.
283 static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
284   LLVMContext &C = CGF.CGM.getLLVMContext();
285
286   llvm::Type *Ty = V->getType();
287   int Width = Ty->getPrimitiveSizeInBits();
288   llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
289   V = CGF.Builder.CreateBitCast(V, IntTy);
290   if (Ty->isPPC_FP128Ty()) {
291     // We want the sign bit of the higher-order double. The bitcast we just
292     // did works as if the double-double was stored to memory and then
293     // read as an i128. The "store" will put the higher-order double in the
294     // lower address in both little- and big-Endian modes, but the "load"
295     // will treat those bits as a different part of the i128: the low bits in
296     // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
297     // we need to shift the high bits down to the low before truncating.
298     Width >>= 1;
299     if (CGF.getTarget().isBigEndian()) {
300       Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
301       V = CGF.Builder.CreateLShr(V, ShiftCst);
302     }
303     // We are truncating value in order to extract the higher-order
304     // double, which we will be using to extract the sign from.
305     IntTy = llvm::IntegerType::get(C, Width);
306     V = CGF.Builder.CreateTrunc(V, IntTy);
307   }
308   Value *Zero = llvm::Constant::getNullValue(IntTy);
309   return CGF.Builder.CreateICmpSLT(V, Zero);
310 }
311
312 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD,
313                               const CallExpr *E, llvm::Constant *calleeValue) {
314   CGCallee callee = CGCallee::forDirect(calleeValue, FD);
315   return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
316 }
317
318 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
319 /// depending on IntrinsicID.
320 ///
321 /// \arg CGF The current codegen function.
322 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
323 /// \arg X The first argument to the llvm.*.with.overflow.*.
324 /// \arg Y The second argument to the llvm.*.with.overflow.*.
325 /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
326 /// \returns The result (i.e. sum/product) returned by the intrinsic.
327 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
328                                           const llvm::Intrinsic::ID IntrinsicID,
329                                           llvm::Value *X, llvm::Value *Y,
330                                           llvm::Value *&Carry) {
331   // Make sure we have integers of the same width.
332   assert(X->getType() == Y->getType() &&
333          "Arguments must be the same type. (Did you forget to make sure both "
334          "arguments have the same integer width?)");
335
336   llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
337   llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
338   Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
339   return CGF.Builder.CreateExtractValue(Tmp, 0);
340 }
341
342 static Value *emitRangedBuiltin(CodeGenFunction &CGF,
343                                 unsigned IntrinsicID,
344                                 int low, int high) {
345     llvm::MDBuilder MDHelper(CGF.getLLVMContext());
346     llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
347     Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
348     llvm::Instruction *Call = CGF.Builder.CreateCall(F);
349     Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
350     return Call;
351 }
352
353 namespace {
354   struct WidthAndSignedness {
355     unsigned Width;
356     bool Signed;
357   };
358 }
359
360 static WidthAndSignedness
361 getIntegerWidthAndSignedness(const clang::ASTContext &context,
362                              const clang::QualType Type) {
363   assert(Type->isIntegerType() && "Given type is not an integer.");
364   unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width;
365   bool Signed = Type->isSignedIntegerType();
366   return {Width, Signed};
367 }
368
369 // Given one or more integer types, this function produces an integer type that
370 // encompasses them: any value in one of the given types could be expressed in
371 // the encompassing type.
372 static struct WidthAndSignedness
373 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
374   assert(Types.size() > 0 && "Empty list of types.");
375
376   // If any of the given types is signed, we must return a signed type.
377   bool Signed = false;
378   for (const auto &Type : Types) {
379     Signed |= Type.Signed;
380   }
381
382   // The encompassing type must have a width greater than or equal to the width
383   // of the specified types.  Aditionally, if the encompassing type is signed,
384   // its width must be strictly greater than the width of any unsigned types
385   // given.
386   unsigned Width = 0;
387   for (const auto &Type : Types) {
388     unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
389     if (Width < MinWidth) {
390       Width = MinWidth;
391     }
392   }
393
394   return {Width, Signed};
395 }
396
397 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
398   llvm::Type *DestType = Int8PtrTy;
399   if (ArgValue->getType() != DestType)
400     ArgValue =
401         Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
402
403   Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
404   return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
405 }
406
407 /// Checks if using the result of __builtin_object_size(p, @p From) in place of
408 /// __builtin_object_size(p, @p To) is correct
409 static bool areBOSTypesCompatible(int From, int To) {
410   // Note: Our __builtin_object_size implementation currently treats Type=0 and
411   // Type=2 identically. Encoding this implementation detail here may make
412   // improving __builtin_object_size difficult in the future, so it's omitted.
413   return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
414 }
415
416 static llvm::Value *
417 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
418   return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
419 }
420
421 llvm::Value *
422 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
423                                                  llvm::IntegerType *ResType,
424                                                  llvm::Value *EmittedE) {
425   uint64_t ObjectSize;
426   if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
427     return emitBuiltinObjectSize(E, Type, ResType, EmittedE);
428   return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
429 }
430
431 /// Returns a Value corresponding to the size of the given expression.
432 /// This Value may be either of the following:
433 ///   - A llvm::Argument (if E is a param with the pass_object_size attribute on
434 ///     it)
435 ///   - A call to the @llvm.objectsize intrinsic
436 ///
437 /// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
438 /// and we wouldn't otherwise try to reference a pass_object_size parameter,
439 /// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
440 llvm::Value *
441 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
442                                        llvm::IntegerType *ResType,
443                                        llvm::Value *EmittedE) {
444   // We need to reference an argument if the pointer is a parameter with the
445   // pass_object_size attribute.
446   if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
447     auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
448     auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
449     if (Param != nullptr && PS != nullptr &&
450         areBOSTypesCompatible(PS->getType(), Type)) {
451       auto Iter = SizeArguments.find(Param);
452       assert(Iter != SizeArguments.end());
453
454       const ImplicitParamDecl *D = Iter->second;
455       auto DIter = LocalDeclMap.find(D);
456       assert(DIter != LocalDeclMap.end());
457
458       return EmitLoadOfScalar(DIter->second, /*volatile=*/false,
459                               getContext().getSizeType(), E->getLocStart());
460     }
461   }
462
463   // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
464   // evaluate E for side-effects. In either case, we shouldn't lower to
465   // @llvm.objectsize.
466   if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
467     return getDefaultBuiltinObjectSizeResult(Type, ResType);
468
469   Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
470   assert(Ptr->getType()->isPointerTy() &&
471          "Non-pointer passed to __builtin_object_size?");
472
473   Value *F = CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
474
475   // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
476   Value *Min = Builder.getInt1((Type & 2) != 0);
477   // For GCC compatability, __builtin_object_size treat NULL as unknown size.
478   Value *NullIsUnknown = Builder.getTrue();
479   return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown});
480 }
481
482 // Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we
483 // handle them here.
484 enum class CodeGenFunction::MSVCIntrin {
485   _BitScanForward,
486   _BitScanReverse,
487   _InterlockedAnd,
488   _InterlockedDecrement,
489   _InterlockedExchange,
490   _InterlockedExchangeAdd,
491   _InterlockedExchangeSub,
492   _InterlockedIncrement,
493   _InterlockedOr,
494   _InterlockedXor,
495   _interlockedbittestandset,
496   __fastfail,
497 };
498
499 Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
500                                             const CallExpr *E) {
501   switch (BuiltinID) {
502   case MSVCIntrin::_BitScanForward:
503   case MSVCIntrin::_BitScanReverse: {
504     Value *ArgValue = EmitScalarExpr(E->getArg(1));
505
506     llvm::Type *ArgType = ArgValue->getType();
507     llvm::Type *IndexType =
508       EmitScalarExpr(E->getArg(0))->getType()->getPointerElementType();
509     llvm::Type *ResultType = ConvertType(E->getType());
510
511     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
512     Value *ResZero = llvm::Constant::getNullValue(ResultType);
513     Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
514
515     BasicBlock *Begin = Builder.GetInsertBlock();
516     BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
517     Builder.SetInsertPoint(End);
518     PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
519
520     Builder.SetInsertPoint(Begin);
521     Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
522     BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
523     Builder.CreateCondBr(IsZero, End, NotZero);
524     Result->addIncoming(ResZero, Begin);
525
526     Builder.SetInsertPoint(NotZero);
527     Address IndexAddress = EmitPointerWithAlignment(E->getArg(0));
528
529     if (BuiltinID == MSVCIntrin::_BitScanForward) {
530       Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
531       Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
532       ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
533       Builder.CreateStore(ZeroCount, IndexAddress, false);
534     } else {
535       unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
536       Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
537
538       Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
539       Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
540       ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
541       Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
542       Builder.CreateStore(Index, IndexAddress, false);
543     }
544     Builder.CreateBr(End);
545     Result->addIncoming(ResOne, NotZero);
546
547     Builder.SetInsertPoint(End);
548     return Result;
549   }
550   case MSVCIntrin::_InterlockedAnd:
551     return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
552   case MSVCIntrin::_InterlockedExchange:
553     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
554   case MSVCIntrin::_InterlockedExchangeAdd:
555     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
556   case MSVCIntrin::_InterlockedExchangeSub:
557     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
558   case MSVCIntrin::_InterlockedOr:
559     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
560   case MSVCIntrin::_InterlockedXor:
561     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
562
563   case MSVCIntrin::_interlockedbittestandset: {
564     llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
565     llvm::Value *Bit = EmitScalarExpr(E->getArg(1));
566     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
567         AtomicRMWInst::Or, Addr,
568         Builder.CreateShl(ConstantInt::get(Bit->getType(), 1), Bit),
569         llvm::AtomicOrdering::SequentiallyConsistent);
570     // Shift the relevant bit to the least significant position, truncate to
571     // the result type, and test the low bit.
572     llvm::Value *Shifted = Builder.CreateLShr(RMWI, Bit);
573     llvm::Value *Truncated =
574         Builder.CreateTrunc(Shifted, ConvertType(E->getType()));
575     return Builder.CreateAnd(Truncated,
576                              ConstantInt::get(Truncated->getType(), 1));
577   }
578
579   case MSVCIntrin::_InterlockedDecrement: {
580     llvm::Type *IntTy = ConvertType(E->getType());
581     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
582       AtomicRMWInst::Sub,
583       EmitScalarExpr(E->getArg(0)),
584       ConstantInt::get(IntTy, 1),
585       llvm::AtomicOrdering::SequentiallyConsistent);
586     return Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1));
587   }
588   case MSVCIntrin::_InterlockedIncrement: {
589     llvm::Type *IntTy = ConvertType(E->getType());
590     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
591       AtomicRMWInst::Add,
592       EmitScalarExpr(E->getArg(0)),
593       ConstantInt::get(IntTy, 1),
594       llvm::AtomicOrdering::SequentiallyConsistent);
595     return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1));
596   }
597
598   case MSVCIntrin::__fastfail: {
599     // Request immediate process termination from the kernel. The instruction
600     // sequences to do this are documented on MSDN:
601     // https://msdn.microsoft.com/en-us/library/dn774154.aspx
602     llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
603     StringRef Asm, Constraints;
604     switch (ISA) {
605     default:
606       ErrorUnsupported(E, "__fastfail call for this architecture");
607       break;
608     case llvm::Triple::x86:
609     case llvm::Triple::x86_64:
610       Asm = "int $$0x29";
611       Constraints = "{cx}";
612       break;
613     case llvm::Triple::thumb:
614       Asm = "udf #251";
615       Constraints = "{r0}";
616       break;
617     }
618     llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
619     llvm::InlineAsm *IA =
620         llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true);
621     llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
622         getLLVMContext(), llvm::AttributeList::FunctionIndex,
623         llvm::Attribute::NoReturn);
624     CallSite CS = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
625     CS.setAttributes(NoReturnAttr);
626     return CS.getInstruction();
627   }
628   }
629   llvm_unreachable("Incorrect MSVC intrinsic!");
630 }
631
632 namespace {
633 // ARC cleanup for __builtin_os_log_format
634 struct CallObjCArcUse final : EHScopeStack::Cleanup {
635   CallObjCArcUse(llvm::Value *object) : object(object) {}
636   llvm::Value *object;
637
638   void Emit(CodeGenFunction &CGF, Flags flags) override {
639     CGF.EmitARCIntrinsicUse(object);
640   }
641 };
642 }
643
644 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
645                                         unsigned BuiltinID, const CallExpr *E,
646                                         ReturnValueSlot ReturnValue) {
647   // See if we can constant fold this builtin.  If so, don't emit it at all.
648   Expr::EvalResult Result;
649   if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
650       !Result.hasSideEffects()) {
651     if (Result.Val.isInt())
652       return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
653                                                 Result.Val.getInt()));
654     if (Result.Val.isFloat())
655       return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
656                                                Result.Val.getFloat()));
657   }
658
659   switch (BuiltinID) {
660   default: break;  // Handle intrinsics and libm functions below.
661   case Builtin::BI__builtin___CFStringMakeConstantString:
662   case Builtin::BI__builtin___NSStringMakeConstantString:
663     return RValue::get(CGM.EmitConstantExpr(E, E->getType(), nullptr));
664   case Builtin::BI__builtin_stdarg_start:
665   case Builtin::BI__builtin_va_start:
666   case Builtin::BI__va_start:
667   case Builtin::BI__builtin_va_end:
668     return RValue::get(
669         EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
670                            ? EmitScalarExpr(E->getArg(0))
671                            : EmitVAListRef(E->getArg(0)).getPointer(),
672                        BuiltinID != Builtin::BI__builtin_va_end));
673   case Builtin::BI__builtin_va_copy: {
674     Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
675     Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
676
677     llvm::Type *Type = Int8PtrTy;
678
679     DstPtr = Builder.CreateBitCast(DstPtr, Type);
680     SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
681     return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy),
682                                           {DstPtr, SrcPtr}));
683   }
684   case Builtin::BI__builtin_abs:
685   case Builtin::BI__builtin_labs:
686   case Builtin::BI__builtin_llabs: {
687     Value *ArgValue = EmitScalarExpr(E->getArg(0));
688
689     Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
690     Value *CmpResult =
691     Builder.CreateICmpSGE(ArgValue,
692                           llvm::Constant::getNullValue(ArgValue->getType()),
693                                                             "abscond");
694     Value *Result =
695       Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
696
697     return RValue::get(Result);
698   }
699   case Builtin::BI__builtin_fabs:
700   case Builtin::BI__builtin_fabsf:
701   case Builtin::BI__builtin_fabsl: {
702     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
703   }
704   case Builtin::BI__builtin_fmod:
705   case Builtin::BI__builtin_fmodf:
706   case Builtin::BI__builtin_fmodl: {
707     Value *Arg1 = EmitScalarExpr(E->getArg(0));
708     Value *Arg2 = EmitScalarExpr(E->getArg(1));
709     Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod");
710     return RValue::get(Result);
711   }
712   case Builtin::BI__builtin_copysign:
713   case Builtin::BI__builtin_copysignf:
714   case Builtin::BI__builtin_copysignl: {
715     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
716   }
717   case Builtin::BI__builtin_ceil:
718   case Builtin::BI__builtin_ceilf:
719   case Builtin::BI__builtin_ceill: {
720     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil));
721   }
722   case Builtin::BI__builtin_floor:
723   case Builtin::BI__builtin_floorf:
724   case Builtin::BI__builtin_floorl: {
725     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor));
726   }
727   case Builtin::BI__builtin_trunc:
728   case Builtin::BI__builtin_truncf:
729   case Builtin::BI__builtin_truncl: {
730     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc));
731   }
732   case Builtin::BI__builtin_rint:
733   case Builtin::BI__builtin_rintf:
734   case Builtin::BI__builtin_rintl: {
735     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint));
736   }
737   case Builtin::BI__builtin_nearbyint:
738   case Builtin::BI__builtin_nearbyintf:
739   case Builtin::BI__builtin_nearbyintl: {
740     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint));
741   }
742   case Builtin::BI__builtin_round:
743   case Builtin::BI__builtin_roundf:
744   case Builtin::BI__builtin_roundl: {
745     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round));
746   }
747   case Builtin::BI__builtin_fmin:
748   case Builtin::BI__builtin_fminf:
749   case Builtin::BI__builtin_fminl: {
750     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum));
751   }
752   case Builtin::BI__builtin_fmax:
753   case Builtin::BI__builtin_fmaxf:
754   case Builtin::BI__builtin_fmaxl: {
755     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum));
756   }
757   case Builtin::BI__builtin_conj:
758   case Builtin::BI__builtin_conjf:
759   case Builtin::BI__builtin_conjl: {
760     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
761     Value *Real = ComplexVal.first;
762     Value *Imag = ComplexVal.second;
763     Value *Zero =
764       Imag->getType()->isFPOrFPVectorTy()
765         ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
766         : llvm::Constant::getNullValue(Imag->getType());
767
768     Imag = Builder.CreateFSub(Zero, Imag, "sub");
769     return RValue::getComplex(std::make_pair(Real, Imag));
770   }
771   case Builtin::BI__builtin_creal:
772   case Builtin::BI__builtin_crealf:
773   case Builtin::BI__builtin_creall:
774   case Builtin::BIcreal:
775   case Builtin::BIcrealf:
776   case Builtin::BIcreall: {
777     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
778     return RValue::get(ComplexVal.first);
779   }
780
781   case Builtin::BI__builtin_cimag:
782   case Builtin::BI__builtin_cimagf:
783   case Builtin::BI__builtin_cimagl:
784   case Builtin::BIcimag:
785   case Builtin::BIcimagf:
786   case Builtin::BIcimagl: {
787     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
788     return RValue::get(ComplexVal.second);
789   }
790
791   case Builtin::BI__builtin_ctzs:
792   case Builtin::BI__builtin_ctz:
793   case Builtin::BI__builtin_ctzl:
794   case Builtin::BI__builtin_ctzll: {
795     Value *ArgValue = EmitScalarExpr(E->getArg(0));
796
797     llvm::Type *ArgType = ArgValue->getType();
798     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
799
800     llvm::Type *ResultType = ConvertType(E->getType());
801     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
802     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
803     if (Result->getType() != ResultType)
804       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
805                                      "cast");
806     return RValue::get(Result);
807   }
808   case Builtin::BI__builtin_clzs:
809   case Builtin::BI__builtin_clz:
810   case Builtin::BI__builtin_clzl:
811   case Builtin::BI__builtin_clzll: {
812     Value *ArgValue = EmitScalarExpr(E->getArg(0));
813
814     llvm::Type *ArgType = ArgValue->getType();
815     Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
816
817     llvm::Type *ResultType = ConvertType(E->getType());
818     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
819     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
820     if (Result->getType() != ResultType)
821       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
822                                      "cast");
823     return RValue::get(Result);
824   }
825   case Builtin::BI__builtin_ffs:
826   case Builtin::BI__builtin_ffsl:
827   case Builtin::BI__builtin_ffsll: {
828     // ffs(x) -> x ? cttz(x) + 1 : 0
829     Value *ArgValue = EmitScalarExpr(E->getArg(0));
830
831     llvm::Type *ArgType = ArgValue->getType();
832     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
833
834     llvm::Type *ResultType = ConvertType(E->getType());
835     Value *Tmp =
836         Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
837                           llvm::ConstantInt::get(ArgType, 1));
838     Value *Zero = llvm::Constant::getNullValue(ArgType);
839     Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
840     Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
841     if (Result->getType() != ResultType)
842       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
843                                      "cast");
844     return RValue::get(Result);
845   }
846   case Builtin::BI__builtin_parity:
847   case Builtin::BI__builtin_parityl:
848   case Builtin::BI__builtin_parityll: {
849     // parity(x) -> ctpop(x) & 1
850     Value *ArgValue = EmitScalarExpr(E->getArg(0));
851
852     llvm::Type *ArgType = ArgValue->getType();
853     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
854
855     llvm::Type *ResultType = ConvertType(E->getType());
856     Value *Tmp = Builder.CreateCall(F, ArgValue);
857     Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
858     if (Result->getType() != ResultType)
859       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
860                                      "cast");
861     return RValue::get(Result);
862   }
863   case Builtin::BI__popcnt16:
864   case Builtin::BI__popcnt:
865   case Builtin::BI__popcnt64:
866   case Builtin::BI__builtin_popcount:
867   case Builtin::BI__builtin_popcountl:
868   case Builtin::BI__builtin_popcountll: {
869     Value *ArgValue = EmitScalarExpr(E->getArg(0));
870
871     llvm::Type *ArgType = ArgValue->getType();
872     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
873
874     llvm::Type *ResultType = ConvertType(E->getType());
875     Value *Result = Builder.CreateCall(F, ArgValue);
876     if (Result->getType() != ResultType)
877       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
878                                      "cast");
879     return RValue::get(Result);
880   }
881   case Builtin::BI_rotr8:
882   case Builtin::BI_rotr16:
883   case Builtin::BI_rotr:
884   case Builtin::BI_lrotr:
885   case Builtin::BI_rotr64: {
886     Value *Val = EmitScalarExpr(E->getArg(0));
887     Value *Shift = EmitScalarExpr(E->getArg(1));
888
889     llvm::Type *ArgType = Val->getType();
890     Shift = Builder.CreateIntCast(Shift, ArgType, false);
891     unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
892     Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
893     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
894
895     Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
896     Shift = Builder.CreateAnd(Shift, Mask);
897     Value *LeftShift = Builder.CreateSub(ArgTypeSize, Shift);
898
899     Value *RightShifted = Builder.CreateLShr(Val, Shift);
900     Value *LeftShifted = Builder.CreateShl(Val, LeftShift);
901     Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
902
903     Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
904     Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
905     return RValue::get(Result);
906   }
907   case Builtin::BI_rotl8:
908   case Builtin::BI_rotl16:
909   case Builtin::BI_rotl:
910   case Builtin::BI_lrotl:
911   case Builtin::BI_rotl64: {
912     Value *Val = EmitScalarExpr(E->getArg(0));
913     Value *Shift = EmitScalarExpr(E->getArg(1));
914
915     llvm::Type *ArgType = Val->getType();
916     Shift = Builder.CreateIntCast(Shift, ArgType, false);
917     unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
918     Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
919     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
920
921     Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
922     Shift = Builder.CreateAnd(Shift, Mask);
923     Value *RightShift = Builder.CreateSub(ArgTypeSize, Shift);
924
925     Value *LeftShifted = Builder.CreateShl(Val, Shift);
926     Value *RightShifted = Builder.CreateLShr(Val, RightShift);
927     Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
928
929     Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
930     Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
931     return RValue::get(Result);
932   }
933   case Builtin::BI__builtin_unpredictable: {
934     // Always return the argument of __builtin_unpredictable. LLVM does not
935     // handle this builtin. Metadata for this builtin should be added directly
936     // to instructions such as branches or switches that use it.
937     return RValue::get(EmitScalarExpr(E->getArg(0)));
938   }
939   case Builtin::BI__builtin_expect: {
940     Value *ArgValue = EmitScalarExpr(E->getArg(0));
941     llvm::Type *ArgType = ArgValue->getType();
942
943     Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
944     // Don't generate llvm.expect on -O0 as the backend won't use it for
945     // anything.
946     // Note, we still IRGen ExpectedValue because it could have side-effects.
947     if (CGM.getCodeGenOpts().OptimizationLevel == 0)
948       return RValue::get(ArgValue);
949
950     Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
951     Value *Result =
952         Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
953     return RValue::get(Result);
954   }
955   case Builtin::BI__builtin_assume_aligned: {
956     Value *PtrValue = EmitScalarExpr(E->getArg(0));
957     Value *OffsetValue =
958       (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
959
960     Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
961     ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
962     unsigned Alignment = (unsigned) AlignmentCI->getZExtValue();
963
964     EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue);
965     return RValue::get(PtrValue);
966   }
967   case Builtin::BI__assume:
968   case Builtin::BI__builtin_assume: {
969     if (E->getArg(0)->HasSideEffects(getContext()))
970       return RValue::get(nullptr);
971
972     Value *ArgValue = EmitScalarExpr(E->getArg(0));
973     Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
974     return RValue::get(Builder.CreateCall(FnAssume, ArgValue));
975   }
976   case Builtin::BI__builtin_bswap16:
977   case Builtin::BI__builtin_bswap32:
978   case Builtin::BI__builtin_bswap64: {
979     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
980   }
981   case Builtin::BI__builtin_bitreverse8:
982   case Builtin::BI__builtin_bitreverse16:
983   case Builtin::BI__builtin_bitreverse32:
984   case Builtin::BI__builtin_bitreverse64: {
985     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
986   }
987   case Builtin::BI__builtin_object_size: {
988     unsigned Type =
989         E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
990     auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
991
992     // We pass this builtin onto the optimizer so that it can figure out the
993     // object size in more complex cases.
994     return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
995                                              /*EmittedE=*/nullptr));
996   }
997   case Builtin::BI__builtin_prefetch: {
998     Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
999     // FIXME: Technically these constants should of type 'int', yes?
1000     RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
1001       llvm::ConstantInt::get(Int32Ty, 0);
1002     Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
1003       llvm::ConstantInt::get(Int32Ty, 3);
1004     Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
1005     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
1006     return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
1007   }
1008   case Builtin::BI__builtin_readcyclecounter: {
1009     Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
1010     return RValue::get(Builder.CreateCall(F));
1011   }
1012   case Builtin::BI__builtin___clear_cache: {
1013     Value *Begin = EmitScalarExpr(E->getArg(0));
1014     Value *End = EmitScalarExpr(E->getArg(1));
1015     Value *F = CGM.getIntrinsic(Intrinsic::clear_cache);
1016     return RValue::get(Builder.CreateCall(F, {Begin, End}));
1017   }
1018   case Builtin::BI__builtin_trap:
1019     return RValue::get(EmitTrapCall(Intrinsic::trap));
1020   case Builtin::BI__debugbreak:
1021     return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
1022   case Builtin::BI__builtin_unreachable: {
1023     if (SanOpts.has(SanitizerKind::Unreachable)) {
1024       SanitizerScope SanScope(this);
1025       EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()),
1026                                SanitizerKind::Unreachable),
1027                 SanitizerHandler::BuiltinUnreachable,
1028                 EmitCheckSourceLocation(E->getExprLoc()), None);
1029     } else
1030       Builder.CreateUnreachable();
1031
1032     // We do need to preserve an insertion point.
1033     EmitBlock(createBasicBlock("unreachable.cont"));
1034
1035     return RValue::get(nullptr);
1036   }
1037
1038   case Builtin::BI__builtin_powi:
1039   case Builtin::BI__builtin_powif:
1040   case Builtin::BI__builtin_powil: {
1041     Value *Base = EmitScalarExpr(E->getArg(0));
1042     Value *Exponent = EmitScalarExpr(E->getArg(1));
1043     llvm::Type *ArgType = Base->getType();
1044     Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
1045     return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
1046   }
1047
1048   case Builtin::BI__builtin_isgreater:
1049   case Builtin::BI__builtin_isgreaterequal:
1050   case Builtin::BI__builtin_isless:
1051   case Builtin::BI__builtin_islessequal:
1052   case Builtin::BI__builtin_islessgreater:
1053   case Builtin::BI__builtin_isunordered: {
1054     // Ordered comparisons: we know the arguments to these are matching scalar
1055     // floating point values.
1056     Value *LHS = EmitScalarExpr(E->getArg(0));
1057     Value *RHS = EmitScalarExpr(E->getArg(1));
1058
1059     switch (BuiltinID) {
1060     default: llvm_unreachable("Unknown ordered comparison");
1061     case Builtin::BI__builtin_isgreater:
1062       LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
1063       break;
1064     case Builtin::BI__builtin_isgreaterequal:
1065       LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
1066       break;
1067     case Builtin::BI__builtin_isless:
1068       LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
1069       break;
1070     case Builtin::BI__builtin_islessequal:
1071       LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
1072       break;
1073     case Builtin::BI__builtin_islessgreater:
1074       LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
1075       break;
1076     case Builtin::BI__builtin_isunordered:
1077       LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
1078       break;
1079     }
1080     // ZExt bool to int type.
1081     return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
1082   }
1083   case Builtin::BI__builtin_isnan: {
1084     Value *V = EmitScalarExpr(E->getArg(0));
1085     V = Builder.CreateFCmpUNO(V, V, "cmp");
1086     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
1087   }
1088
1089   case Builtin::BIfinite:
1090   case Builtin::BI__finite:
1091   case Builtin::BIfinitef:
1092   case Builtin::BI__finitef:
1093   case Builtin::BIfinitel:
1094   case Builtin::BI__finitel:
1095   case Builtin::BI__builtin_isinf:
1096   case Builtin::BI__builtin_isfinite: {
1097     // isinf(x)    --> fabs(x) == infinity
1098     // isfinite(x) --> fabs(x) != infinity
1099     // x != NaN via the ordered compare in either case.
1100     Value *V = EmitScalarExpr(E->getArg(0));
1101     Value *Fabs = EmitFAbs(*this, V);
1102     Constant *Infinity = ConstantFP::getInfinity(V->getType());
1103     CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf)
1104                                   ? CmpInst::FCMP_OEQ
1105                                   : CmpInst::FCMP_ONE;
1106     Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf");
1107     return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType())));
1108   }
1109
1110   case Builtin::BI__builtin_isinf_sign: {
1111     // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
1112     Value *Arg = EmitScalarExpr(E->getArg(0));
1113     Value *AbsArg = EmitFAbs(*this, Arg);
1114     Value *IsInf = Builder.CreateFCmpOEQ(
1115         AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
1116     Value *IsNeg = EmitSignBit(*this, Arg);
1117
1118     llvm::Type *IntTy = ConvertType(E->getType());
1119     Value *Zero = Constant::getNullValue(IntTy);
1120     Value *One = ConstantInt::get(IntTy, 1);
1121     Value *NegativeOne = ConstantInt::get(IntTy, -1);
1122     Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
1123     Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
1124     return RValue::get(Result);
1125   }
1126
1127   case Builtin::BI__builtin_isnormal: {
1128     // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
1129     Value *V = EmitScalarExpr(E->getArg(0));
1130     Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
1131
1132     Value *Abs = EmitFAbs(*this, V);
1133     Value *IsLessThanInf =
1134       Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
1135     APFloat Smallest = APFloat::getSmallestNormalized(
1136                    getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
1137     Value *IsNormal =
1138       Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
1139                             "isnormal");
1140     V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
1141     V = Builder.CreateAnd(V, IsNormal, "and");
1142     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
1143   }
1144
1145   case Builtin::BI__builtin_fpclassify: {
1146     Value *V = EmitScalarExpr(E->getArg(5));
1147     llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
1148
1149     // Create Result
1150     BasicBlock *Begin = Builder.GetInsertBlock();
1151     BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
1152     Builder.SetInsertPoint(End);
1153     PHINode *Result =
1154       Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
1155                         "fpclassify_result");
1156
1157     // if (V==0) return FP_ZERO
1158     Builder.SetInsertPoint(Begin);
1159     Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
1160                                           "iszero");
1161     Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
1162     BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
1163     Builder.CreateCondBr(IsZero, End, NotZero);
1164     Result->addIncoming(ZeroLiteral, Begin);
1165
1166     // if (V != V) return FP_NAN
1167     Builder.SetInsertPoint(NotZero);
1168     Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
1169     Value *NanLiteral = EmitScalarExpr(E->getArg(0));
1170     BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
1171     Builder.CreateCondBr(IsNan, End, NotNan);
1172     Result->addIncoming(NanLiteral, NotZero);
1173
1174     // if (fabs(V) == infinity) return FP_INFINITY
1175     Builder.SetInsertPoint(NotNan);
1176     Value *VAbs = EmitFAbs(*this, V);
1177     Value *IsInf =
1178       Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
1179                             "isinf");
1180     Value *InfLiteral = EmitScalarExpr(E->getArg(1));
1181     BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
1182     Builder.CreateCondBr(IsInf, End, NotInf);
1183     Result->addIncoming(InfLiteral, NotNan);
1184
1185     // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
1186     Builder.SetInsertPoint(NotInf);
1187     APFloat Smallest = APFloat::getSmallestNormalized(
1188         getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
1189     Value *IsNormal =
1190       Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
1191                             "isnormal");
1192     Value *NormalResult =
1193       Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
1194                            EmitScalarExpr(E->getArg(3)));
1195     Builder.CreateBr(End);
1196     Result->addIncoming(NormalResult, NotInf);
1197
1198     // return Result
1199     Builder.SetInsertPoint(End);
1200     return RValue::get(Result);
1201   }
1202
1203   case Builtin::BIalloca:
1204   case Builtin::BI_alloca:
1205   case Builtin::BI__builtin_alloca: {
1206     Value *Size = EmitScalarExpr(E->getArg(0));
1207     const TargetInfo &TI = getContext().getTargetInfo();
1208     // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
1209     unsigned SuitableAlignmentInBytes =
1210         CGM.getContext()
1211             .toCharUnitsFromBits(TI.getSuitableAlign())
1212             .getQuantity();
1213     AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
1214     AI->setAlignment(SuitableAlignmentInBytes);
1215     return RValue::get(AI);
1216   }
1217
1218   case Builtin::BI__builtin_alloca_with_align: {
1219     Value *Size = EmitScalarExpr(E->getArg(0));
1220     Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
1221     auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
1222     unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
1223     unsigned AlignmentInBytes =
1224         CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity();
1225     AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
1226     AI->setAlignment(AlignmentInBytes);
1227     return RValue::get(AI);
1228   }
1229
1230   case Builtin::BIbzero:
1231   case Builtin::BI__builtin_bzero: {
1232     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1233     Value *SizeVal = EmitScalarExpr(E->getArg(1));
1234     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1235                         E->getArg(0)->getExprLoc(), FD, 0);
1236     Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
1237     return RValue::get(Dest.getPointer());
1238   }
1239   case Builtin::BImemcpy:
1240   case Builtin::BI__builtin_memcpy: {
1241     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1242     Address Src = EmitPointerWithAlignment(E->getArg(1));
1243     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1244     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1245                         E->getArg(0)->getExprLoc(), FD, 0);
1246     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1247                         E->getArg(1)->getExprLoc(), FD, 1);
1248     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1249     return RValue::get(Dest.getPointer());
1250   }
1251
1252   case Builtin::BI__builtin_char_memchr:
1253     BuiltinID = Builtin::BI__builtin_memchr;
1254     break;
1255
1256   case Builtin::BI__builtin___memcpy_chk: {
1257     // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
1258     llvm::APSInt Size, DstSize;
1259     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1260         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1261       break;
1262     if (Size.ugt(DstSize))
1263       break;
1264     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1265     Address Src = EmitPointerWithAlignment(E->getArg(1));
1266     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1267     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1268     return RValue::get(Dest.getPointer());
1269   }
1270
1271   case Builtin::BI__builtin_objc_memmove_collectable: {
1272     Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
1273     Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
1274     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1275     CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
1276                                                   DestAddr, SrcAddr, SizeVal);
1277     return RValue::get(DestAddr.getPointer());
1278   }
1279
1280   case Builtin::BI__builtin___memmove_chk: {
1281     // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
1282     llvm::APSInt Size, DstSize;
1283     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1284         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1285       break;
1286     if (Size.ugt(DstSize))
1287       break;
1288     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1289     Address Src = EmitPointerWithAlignment(E->getArg(1));
1290     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1291     Builder.CreateMemMove(Dest, Src, SizeVal, false);
1292     return RValue::get(Dest.getPointer());
1293   }
1294
1295   case Builtin::BImemmove:
1296   case Builtin::BI__builtin_memmove: {
1297     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1298     Address Src = EmitPointerWithAlignment(E->getArg(1));
1299     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1300     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1301                         E->getArg(0)->getExprLoc(), FD, 0);
1302     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1303                         E->getArg(1)->getExprLoc(), FD, 1);
1304     Builder.CreateMemMove(Dest, Src, SizeVal, false);
1305     return RValue::get(Dest.getPointer());
1306   }
1307   case Builtin::BImemset:
1308   case Builtin::BI__builtin_memset: {
1309     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1310     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1311                                          Builder.getInt8Ty());
1312     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1313     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1314                         E->getArg(0)->getExprLoc(), FD, 0);
1315     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1316     return RValue::get(Dest.getPointer());
1317   }
1318   case Builtin::BI__builtin___memset_chk: {
1319     // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
1320     llvm::APSInt Size, DstSize;
1321     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1322         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1323       break;
1324     if (Size.ugt(DstSize))
1325       break;
1326     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1327     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1328                                          Builder.getInt8Ty());
1329     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1330     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1331     return RValue::get(Dest.getPointer());
1332   }
1333   case Builtin::BI__builtin_dwarf_cfa: {
1334     // The offset in bytes from the first argument to the CFA.
1335     //
1336     // Why on earth is this in the frontend?  Is there any reason at
1337     // all that the backend can't reasonably determine this while
1338     // lowering llvm.eh.dwarf.cfa()?
1339     //
1340     // TODO: If there's a satisfactory reason, add a target hook for
1341     // this instead of hard-coding 0, which is correct for most targets.
1342     int32_t Offset = 0;
1343
1344     Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
1345     return RValue::get(Builder.CreateCall(F,
1346                                       llvm::ConstantInt::get(Int32Ty, Offset)));
1347   }
1348   case Builtin::BI__builtin_return_address: {
1349     Value *Depth =
1350         CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
1351     Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1352     return RValue::get(Builder.CreateCall(F, Depth));
1353   }
1354   case Builtin::BI_ReturnAddress: {
1355     Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1356     return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
1357   }
1358   case Builtin::BI__builtin_frame_address: {
1359     Value *Depth =
1360         CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
1361     Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
1362     return RValue::get(Builder.CreateCall(F, Depth));
1363   }
1364   case Builtin::BI__builtin_extract_return_addr: {
1365     Value *Address = EmitScalarExpr(E->getArg(0));
1366     Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
1367     return RValue::get(Result);
1368   }
1369   case Builtin::BI__builtin_frob_return_addr: {
1370     Value *Address = EmitScalarExpr(E->getArg(0));
1371     Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
1372     return RValue::get(Result);
1373   }
1374   case Builtin::BI__builtin_dwarf_sp_column: {
1375     llvm::IntegerType *Ty
1376       = cast<llvm::IntegerType>(ConvertType(E->getType()));
1377     int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
1378     if (Column == -1) {
1379       CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
1380       return RValue::get(llvm::UndefValue::get(Ty));
1381     }
1382     return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
1383   }
1384   case Builtin::BI__builtin_init_dwarf_reg_size_table: {
1385     Value *Address = EmitScalarExpr(E->getArg(0));
1386     if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
1387       CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
1388     return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
1389   }
1390   case Builtin::BI__builtin_eh_return: {
1391     Value *Int = EmitScalarExpr(E->getArg(0));
1392     Value *Ptr = EmitScalarExpr(E->getArg(1));
1393
1394     llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
1395     assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
1396            "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
1397     Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
1398                                   ? Intrinsic::eh_return_i32
1399                                   : Intrinsic::eh_return_i64);
1400     Builder.CreateCall(F, {Int, Ptr});
1401     Builder.CreateUnreachable();
1402
1403     // We do need to preserve an insertion point.
1404     EmitBlock(createBasicBlock("builtin_eh_return.cont"));
1405
1406     return RValue::get(nullptr);
1407   }
1408   case Builtin::BI__builtin_unwind_init: {
1409     Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
1410     return RValue::get(Builder.CreateCall(F));
1411   }
1412   case Builtin::BI__builtin_extend_pointer: {
1413     // Extends a pointer to the size of an _Unwind_Word, which is
1414     // uint64_t on all platforms.  Generally this gets poked into a
1415     // register and eventually used as an address, so if the
1416     // addressing registers are wider than pointers and the platform
1417     // doesn't implicitly ignore high-order bits when doing
1418     // addressing, we need to make sure we zext / sext based on
1419     // the platform's expectations.
1420     //
1421     // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
1422
1423     // Cast the pointer to intptr_t.
1424     Value *Ptr = EmitScalarExpr(E->getArg(0));
1425     Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
1426
1427     // If that's 64 bits, we're done.
1428     if (IntPtrTy->getBitWidth() == 64)
1429       return RValue::get(Result);
1430
1431     // Otherwise, ask the codegen data what to do.
1432     if (getTargetHooks().extendPointerWithSExt())
1433       return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
1434     else
1435       return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
1436   }
1437   case Builtin::BI__builtin_setjmp: {
1438     // Buffer is a void**.
1439     Address Buf = EmitPointerWithAlignment(E->getArg(0));
1440
1441     // Store the frame pointer to the setjmp buffer.
1442     Value *FrameAddr =
1443       Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
1444                          ConstantInt::get(Int32Ty, 0));
1445     Builder.CreateStore(FrameAddr, Buf);
1446
1447     // Store the stack pointer to the setjmp buffer.
1448     Value *StackAddr =
1449         Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
1450     Address StackSaveSlot =
1451       Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize());
1452     Builder.CreateStore(StackAddr, StackSaveSlot);
1453
1454     // Call LLVM's EH setjmp, which is lightweight.
1455     Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
1456     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1457     return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
1458   }
1459   case Builtin::BI__builtin_longjmp: {
1460     Value *Buf = EmitScalarExpr(E->getArg(0));
1461     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1462
1463     // Call LLVM's EH longjmp, which is lightweight.
1464     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
1465
1466     // longjmp doesn't return; mark this as unreachable.
1467     Builder.CreateUnreachable();
1468
1469     // We do need to preserve an insertion point.
1470     EmitBlock(createBasicBlock("longjmp.cont"));
1471
1472     return RValue::get(nullptr);
1473   }
1474   case Builtin::BI__sync_fetch_and_add:
1475   case Builtin::BI__sync_fetch_and_sub:
1476   case Builtin::BI__sync_fetch_and_or:
1477   case Builtin::BI__sync_fetch_and_and:
1478   case Builtin::BI__sync_fetch_and_xor:
1479   case Builtin::BI__sync_fetch_and_nand:
1480   case Builtin::BI__sync_add_and_fetch:
1481   case Builtin::BI__sync_sub_and_fetch:
1482   case Builtin::BI__sync_and_and_fetch:
1483   case Builtin::BI__sync_or_and_fetch:
1484   case Builtin::BI__sync_xor_and_fetch:
1485   case Builtin::BI__sync_nand_and_fetch:
1486   case Builtin::BI__sync_val_compare_and_swap:
1487   case Builtin::BI__sync_bool_compare_and_swap:
1488   case Builtin::BI__sync_lock_test_and_set:
1489   case Builtin::BI__sync_lock_release:
1490   case Builtin::BI__sync_swap:
1491     llvm_unreachable("Shouldn't make it through sema");
1492   case Builtin::BI__sync_fetch_and_add_1:
1493   case Builtin::BI__sync_fetch_and_add_2:
1494   case Builtin::BI__sync_fetch_and_add_4:
1495   case Builtin::BI__sync_fetch_and_add_8:
1496   case Builtin::BI__sync_fetch_and_add_16:
1497     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
1498   case Builtin::BI__sync_fetch_and_sub_1:
1499   case Builtin::BI__sync_fetch_and_sub_2:
1500   case Builtin::BI__sync_fetch_and_sub_4:
1501   case Builtin::BI__sync_fetch_and_sub_8:
1502   case Builtin::BI__sync_fetch_and_sub_16:
1503     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
1504   case Builtin::BI__sync_fetch_and_or_1:
1505   case Builtin::BI__sync_fetch_and_or_2:
1506   case Builtin::BI__sync_fetch_and_or_4:
1507   case Builtin::BI__sync_fetch_and_or_8:
1508   case Builtin::BI__sync_fetch_and_or_16:
1509     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
1510   case Builtin::BI__sync_fetch_and_and_1:
1511   case Builtin::BI__sync_fetch_and_and_2:
1512   case Builtin::BI__sync_fetch_and_and_4:
1513   case Builtin::BI__sync_fetch_and_and_8:
1514   case Builtin::BI__sync_fetch_and_and_16:
1515     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
1516   case Builtin::BI__sync_fetch_and_xor_1:
1517   case Builtin::BI__sync_fetch_and_xor_2:
1518   case Builtin::BI__sync_fetch_and_xor_4:
1519   case Builtin::BI__sync_fetch_and_xor_8:
1520   case Builtin::BI__sync_fetch_and_xor_16:
1521     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
1522   case Builtin::BI__sync_fetch_and_nand_1:
1523   case Builtin::BI__sync_fetch_and_nand_2:
1524   case Builtin::BI__sync_fetch_and_nand_4:
1525   case Builtin::BI__sync_fetch_and_nand_8:
1526   case Builtin::BI__sync_fetch_and_nand_16:
1527     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
1528
1529   // Clang extensions: not overloaded yet.
1530   case Builtin::BI__sync_fetch_and_min:
1531     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
1532   case Builtin::BI__sync_fetch_and_max:
1533     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
1534   case Builtin::BI__sync_fetch_and_umin:
1535     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
1536   case Builtin::BI__sync_fetch_and_umax:
1537     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
1538
1539   case Builtin::BI__sync_add_and_fetch_1:
1540   case Builtin::BI__sync_add_and_fetch_2:
1541   case Builtin::BI__sync_add_and_fetch_4:
1542   case Builtin::BI__sync_add_and_fetch_8:
1543   case Builtin::BI__sync_add_and_fetch_16:
1544     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
1545                                 llvm::Instruction::Add);
1546   case Builtin::BI__sync_sub_and_fetch_1:
1547   case Builtin::BI__sync_sub_and_fetch_2:
1548   case Builtin::BI__sync_sub_and_fetch_4:
1549   case Builtin::BI__sync_sub_and_fetch_8:
1550   case Builtin::BI__sync_sub_and_fetch_16:
1551     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
1552                                 llvm::Instruction::Sub);
1553   case Builtin::BI__sync_and_and_fetch_1:
1554   case Builtin::BI__sync_and_and_fetch_2:
1555   case Builtin::BI__sync_and_and_fetch_4:
1556   case Builtin::BI__sync_and_and_fetch_8:
1557   case Builtin::BI__sync_and_and_fetch_16:
1558     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
1559                                 llvm::Instruction::And);
1560   case Builtin::BI__sync_or_and_fetch_1:
1561   case Builtin::BI__sync_or_and_fetch_2:
1562   case Builtin::BI__sync_or_and_fetch_4:
1563   case Builtin::BI__sync_or_and_fetch_8:
1564   case Builtin::BI__sync_or_and_fetch_16:
1565     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
1566                                 llvm::Instruction::Or);
1567   case Builtin::BI__sync_xor_and_fetch_1:
1568   case Builtin::BI__sync_xor_and_fetch_2:
1569   case Builtin::BI__sync_xor_and_fetch_4:
1570   case Builtin::BI__sync_xor_and_fetch_8:
1571   case Builtin::BI__sync_xor_and_fetch_16:
1572     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
1573                                 llvm::Instruction::Xor);
1574   case Builtin::BI__sync_nand_and_fetch_1:
1575   case Builtin::BI__sync_nand_and_fetch_2:
1576   case Builtin::BI__sync_nand_and_fetch_4:
1577   case Builtin::BI__sync_nand_and_fetch_8:
1578   case Builtin::BI__sync_nand_and_fetch_16:
1579     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
1580                                 llvm::Instruction::And, true);
1581
1582   case Builtin::BI__sync_val_compare_and_swap_1:
1583   case Builtin::BI__sync_val_compare_and_swap_2:
1584   case Builtin::BI__sync_val_compare_and_swap_4:
1585   case Builtin::BI__sync_val_compare_and_swap_8:
1586   case Builtin::BI__sync_val_compare_and_swap_16:
1587     return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
1588
1589   case Builtin::BI__sync_bool_compare_and_swap_1:
1590   case Builtin::BI__sync_bool_compare_and_swap_2:
1591   case Builtin::BI__sync_bool_compare_and_swap_4:
1592   case Builtin::BI__sync_bool_compare_and_swap_8:
1593   case Builtin::BI__sync_bool_compare_and_swap_16:
1594     return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
1595
1596   case Builtin::BI__sync_swap_1:
1597   case Builtin::BI__sync_swap_2:
1598   case Builtin::BI__sync_swap_4:
1599   case Builtin::BI__sync_swap_8:
1600   case Builtin::BI__sync_swap_16:
1601     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1602
1603   case Builtin::BI__sync_lock_test_and_set_1:
1604   case Builtin::BI__sync_lock_test_and_set_2:
1605   case Builtin::BI__sync_lock_test_and_set_4:
1606   case Builtin::BI__sync_lock_test_and_set_8:
1607   case Builtin::BI__sync_lock_test_and_set_16:
1608     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1609
1610   case Builtin::BI__sync_lock_release_1:
1611   case Builtin::BI__sync_lock_release_2:
1612   case Builtin::BI__sync_lock_release_4:
1613   case Builtin::BI__sync_lock_release_8:
1614   case Builtin::BI__sync_lock_release_16: {
1615     Value *Ptr = EmitScalarExpr(E->getArg(0));
1616     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
1617     CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
1618     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
1619                                              StoreSize.getQuantity() * 8);
1620     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
1621     llvm::StoreInst *Store =
1622       Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
1623                                  StoreSize);
1624     Store->setAtomic(llvm::AtomicOrdering::Release);
1625     return RValue::get(nullptr);
1626   }
1627
1628   case Builtin::BI__sync_synchronize: {
1629     // We assume this is supposed to correspond to a C++0x-style
1630     // sequentially-consistent fence (i.e. this is only usable for
1631     // synchonization, not device I/O or anything like that). This intrinsic
1632     // is really badly designed in the sense that in theory, there isn't
1633     // any way to safely use it... but in practice, it mostly works
1634     // to use it with non-atomic loads and stores to get acquire/release
1635     // semantics.
1636     Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
1637     return RValue::get(nullptr);
1638   }
1639
1640   case Builtin::BI__builtin_nontemporal_load:
1641     return RValue::get(EmitNontemporalLoad(*this, E));
1642   case Builtin::BI__builtin_nontemporal_store:
1643     return RValue::get(EmitNontemporalStore(*this, E));
1644   case Builtin::BI__c11_atomic_is_lock_free:
1645   case Builtin::BI__atomic_is_lock_free: {
1646     // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
1647     // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
1648     // _Atomic(T) is always properly-aligned.
1649     const char *LibCallName = "__atomic_is_lock_free";
1650     CallArgList Args;
1651     Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
1652              getContext().getSizeType());
1653     if (BuiltinID == Builtin::BI__atomic_is_lock_free)
1654       Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
1655                getContext().VoidPtrTy);
1656     else
1657       Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
1658                getContext().VoidPtrTy);
1659     const CGFunctionInfo &FuncInfo =
1660         CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
1661     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
1662     llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
1663     return EmitCall(FuncInfo, CGCallee::forDirect(Func),
1664                     ReturnValueSlot(), Args);
1665   }
1666
1667   case Builtin::BI__atomic_test_and_set: {
1668     // Look at the argument type to determine whether this is a volatile
1669     // operation. The parameter type is always volatile.
1670     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1671     bool Volatile =
1672         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1673
1674     Value *Ptr = EmitScalarExpr(E->getArg(0));
1675     unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
1676     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1677     Value *NewVal = Builder.getInt8(1);
1678     Value *Order = EmitScalarExpr(E->getArg(1));
1679     if (isa<llvm::ConstantInt>(Order)) {
1680       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1681       AtomicRMWInst *Result = nullptr;
1682       switch (ord) {
1683       case 0:  // memory_order_relaxed
1684       default: // invalid order
1685         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1686                                          llvm::AtomicOrdering::Monotonic);
1687         break;
1688       case 1: // memory_order_consume
1689       case 2: // memory_order_acquire
1690         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1691                                          llvm::AtomicOrdering::Acquire);
1692         break;
1693       case 3: // memory_order_release
1694         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1695                                          llvm::AtomicOrdering::Release);
1696         break;
1697       case 4: // memory_order_acq_rel
1698
1699         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1700                                          llvm::AtomicOrdering::AcquireRelease);
1701         break;
1702       case 5: // memory_order_seq_cst
1703         Result = Builder.CreateAtomicRMW(
1704             llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1705             llvm::AtomicOrdering::SequentiallyConsistent);
1706         break;
1707       }
1708       Result->setVolatile(Volatile);
1709       return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1710     }
1711
1712     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1713
1714     llvm::BasicBlock *BBs[5] = {
1715       createBasicBlock("monotonic", CurFn),
1716       createBasicBlock("acquire", CurFn),
1717       createBasicBlock("release", CurFn),
1718       createBasicBlock("acqrel", CurFn),
1719       createBasicBlock("seqcst", CurFn)
1720     };
1721     llvm::AtomicOrdering Orders[5] = {
1722         llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
1723         llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
1724         llvm::AtomicOrdering::SequentiallyConsistent};
1725
1726     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1727     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1728
1729     Builder.SetInsertPoint(ContBB);
1730     PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
1731
1732     for (unsigned i = 0; i < 5; ++i) {
1733       Builder.SetInsertPoint(BBs[i]);
1734       AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1735                                                    Ptr, NewVal, Orders[i]);
1736       RMW->setVolatile(Volatile);
1737       Result->addIncoming(RMW, BBs[i]);
1738       Builder.CreateBr(ContBB);
1739     }
1740
1741     SI->addCase(Builder.getInt32(0), BBs[0]);
1742     SI->addCase(Builder.getInt32(1), BBs[1]);
1743     SI->addCase(Builder.getInt32(2), BBs[1]);
1744     SI->addCase(Builder.getInt32(3), BBs[2]);
1745     SI->addCase(Builder.getInt32(4), BBs[3]);
1746     SI->addCase(Builder.getInt32(5), BBs[4]);
1747
1748     Builder.SetInsertPoint(ContBB);
1749     return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1750   }
1751
1752   case Builtin::BI__atomic_clear: {
1753     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1754     bool Volatile =
1755         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1756
1757     Address Ptr = EmitPointerWithAlignment(E->getArg(0));
1758     unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace();
1759     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1760     Value *NewVal = Builder.getInt8(0);
1761     Value *Order = EmitScalarExpr(E->getArg(1));
1762     if (isa<llvm::ConstantInt>(Order)) {
1763       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1764       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1765       switch (ord) {
1766       case 0:  // memory_order_relaxed
1767       default: // invalid order
1768         Store->setOrdering(llvm::AtomicOrdering::Monotonic);
1769         break;
1770       case 3:  // memory_order_release
1771         Store->setOrdering(llvm::AtomicOrdering::Release);
1772         break;
1773       case 5:  // memory_order_seq_cst
1774         Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
1775         break;
1776       }
1777       return RValue::get(nullptr);
1778     }
1779
1780     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1781
1782     llvm::BasicBlock *BBs[3] = {
1783       createBasicBlock("monotonic", CurFn),
1784       createBasicBlock("release", CurFn),
1785       createBasicBlock("seqcst", CurFn)
1786     };
1787     llvm::AtomicOrdering Orders[3] = {
1788         llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
1789         llvm::AtomicOrdering::SequentiallyConsistent};
1790
1791     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1792     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1793
1794     for (unsigned i = 0; i < 3; ++i) {
1795       Builder.SetInsertPoint(BBs[i]);
1796       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1797       Store->setOrdering(Orders[i]);
1798       Builder.CreateBr(ContBB);
1799     }
1800
1801     SI->addCase(Builder.getInt32(0), BBs[0]);
1802     SI->addCase(Builder.getInt32(3), BBs[1]);
1803     SI->addCase(Builder.getInt32(5), BBs[2]);
1804
1805     Builder.SetInsertPoint(ContBB);
1806     return RValue::get(nullptr);
1807   }
1808
1809   case Builtin::BI__atomic_thread_fence:
1810   case Builtin::BI__atomic_signal_fence:
1811   case Builtin::BI__c11_atomic_thread_fence:
1812   case Builtin::BI__c11_atomic_signal_fence: {
1813     llvm::SynchronizationScope Scope;
1814     if (BuiltinID == Builtin::BI__atomic_signal_fence ||
1815         BuiltinID == Builtin::BI__c11_atomic_signal_fence)
1816       Scope = llvm::SingleThread;
1817     else
1818       Scope = llvm::CrossThread;
1819     Value *Order = EmitScalarExpr(E->getArg(0));
1820     if (isa<llvm::ConstantInt>(Order)) {
1821       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1822       switch (ord) {
1823       case 0:  // memory_order_relaxed
1824       default: // invalid order
1825         break;
1826       case 1:  // memory_order_consume
1827       case 2:  // memory_order_acquire
1828         Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope);
1829         break;
1830       case 3:  // memory_order_release
1831         Builder.CreateFence(llvm::AtomicOrdering::Release, Scope);
1832         break;
1833       case 4:  // memory_order_acq_rel
1834         Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope);
1835         break;
1836       case 5:  // memory_order_seq_cst
1837         Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
1838                             Scope);
1839         break;
1840       }
1841       return RValue::get(nullptr);
1842     }
1843
1844     llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
1845     AcquireBB = createBasicBlock("acquire", CurFn);
1846     ReleaseBB = createBasicBlock("release", CurFn);
1847     AcqRelBB = createBasicBlock("acqrel", CurFn);
1848     SeqCstBB = createBasicBlock("seqcst", CurFn);
1849     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1850
1851     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1852     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
1853
1854     Builder.SetInsertPoint(AcquireBB);
1855     Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope);
1856     Builder.CreateBr(ContBB);
1857     SI->addCase(Builder.getInt32(1), AcquireBB);
1858     SI->addCase(Builder.getInt32(2), AcquireBB);
1859
1860     Builder.SetInsertPoint(ReleaseBB);
1861     Builder.CreateFence(llvm::AtomicOrdering::Release, Scope);
1862     Builder.CreateBr(ContBB);
1863     SI->addCase(Builder.getInt32(3), ReleaseBB);
1864
1865     Builder.SetInsertPoint(AcqRelBB);
1866     Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope);
1867     Builder.CreateBr(ContBB);
1868     SI->addCase(Builder.getInt32(4), AcqRelBB);
1869
1870     Builder.SetInsertPoint(SeqCstBB);
1871     Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, Scope);
1872     Builder.CreateBr(ContBB);
1873     SI->addCase(Builder.getInt32(5), SeqCstBB);
1874
1875     Builder.SetInsertPoint(ContBB);
1876     return RValue::get(nullptr);
1877   }
1878
1879     // Library functions with special handling.
1880   case Builtin::BIsqrt:
1881   case Builtin::BIsqrtf:
1882   case Builtin::BIsqrtl: {
1883     // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only
1884     // in finite- or unsafe-math mode (the intrinsic has different semantics
1885     // for handling negative numbers compared to the library function, so
1886     // -fmath-errno=0 is not enough).
1887     if (!FD->hasAttr<ConstAttr>())
1888       break;
1889     if (!(CGM.getCodeGenOpts().UnsafeFPMath ||
1890           CGM.getCodeGenOpts().NoNaNsFPMath))
1891       break;
1892     Value *Arg0 = EmitScalarExpr(E->getArg(0));
1893     llvm::Type *ArgType = Arg0->getType();
1894     Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType);
1895     return RValue::get(Builder.CreateCall(F, Arg0));
1896   }
1897
1898   case Builtin::BI__builtin_pow:
1899   case Builtin::BI__builtin_powf:
1900   case Builtin::BI__builtin_powl:
1901   case Builtin::BIpow:
1902   case Builtin::BIpowf:
1903   case Builtin::BIpowl: {
1904     // Transform a call to pow* into a @llvm.pow.* intrinsic call.
1905     if (!FD->hasAttr<ConstAttr>())
1906       break;
1907     Value *Base = EmitScalarExpr(E->getArg(0));
1908     Value *Exponent = EmitScalarExpr(E->getArg(1));
1909     llvm::Type *ArgType = Base->getType();
1910     Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
1911     return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
1912   }
1913
1914   case Builtin::BIfma:
1915   case Builtin::BIfmaf:
1916   case Builtin::BIfmal:
1917   case Builtin::BI__builtin_fma:
1918   case Builtin::BI__builtin_fmaf:
1919   case Builtin::BI__builtin_fmal: {
1920     // Rewrite fma to intrinsic.
1921     Value *FirstArg = EmitScalarExpr(E->getArg(0));
1922     llvm::Type *ArgType = FirstArg->getType();
1923     Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType);
1924     return RValue::get(
1925         Builder.CreateCall(F, {FirstArg, EmitScalarExpr(E->getArg(1)),
1926                                EmitScalarExpr(E->getArg(2))}));
1927   }
1928
1929   case Builtin::BI__builtin_signbit:
1930   case Builtin::BI__builtin_signbitf:
1931   case Builtin::BI__builtin_signbitl: {
1932     return RValue::get(
1933         Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
1934                            ConvertType(E->getType())));
1935   }
1936   case Builtin::BI__builtin_annotation: {
1937     llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
1938     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
1939                                       AnnVal->getType());
1940
1941     // Get the annotation string, go through casts. Sema requires this to be a
1942     // non-wide string literal, potentially casted, so the cast<> is safe.
1943     const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
1944     StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
1945     return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
1946   }
1947   case Builtin::BI__builtin_addcb:
1948   case Builtin::BI__builtin_addcs:
1949   case Builtin::BI__builtin_addc:
1950   case Builtin::BI__builtin_addcl:
1951   case Builtin::BI__builtin_addcll:
1952   case Builtin::BI__builtin_subcb:
1953   case Builtin::BI__builtin_subcs:
1954   case Builtin::BI__builtin_subc:
1955   case Builtin::BI__builtin_subcl:
1956   case Builtin::BI__builtin_subcll: {
1957
1958     // We translate all of these builtins from expressions of the form:
1959     //   int x = ..., y = ..., carryin = ..., carryout, result;
1960     //   result = __builtin_addc(x, y, carryin, &carryout);
1961     //
1962     // to LLVM IR of the form:
1963     //
1964     //   %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
1965     //   %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
1966     //   %carry1 = extractvalue {i32, i1} %tmp1, 1
1967     //   %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
1968     //                                                       i32 %carryin)
1969     //   %result = extractvalue {i32, i1} %tmp2, 0
1970     //   %carry2 = extractvalue {i32, i1} %tmp2, 1
1971     //   %tmp3 = or i1 %carry1, %carry2
1972     //   %tmp4 = zext i1 %tmp3 to i32
1973     //   store i32 %tmp4, i32* %carryout
1974
1975     // Scalarize our inputs.
1976     llvm::Value *X = EmitScalarExpr(E->getArg(0));
1977     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
1978     llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
1979     Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
1980
1981     // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
1982     llvm::Intrinsic::ID IntrinsicId;
1983     switch (BuiltinID) {
1984     default: llvm_unreachable("Unknown multiprecision builtin id.");
1985     case Builtin::BI__builtin_addcb:
1986     case Builtin::BI__builtin_addcs:
1987     case Builtin::BI__builtin_addc:
1988     case Builtin::BI__builtin_addcl:
1989     case Builtin::BI__builtin_addcll:
1990       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
1991       break;
1992     case Builtin::BI__builtin_subcb:
1993     case Builtin::BI__builtin_subcs:
1994     case Builtin::BI__builtin_subc:
1995     case Builtin::BI__builtin_subcl:
1996     case Builtin::BI__builtin_subcll:
1997       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
1998       break;
1999     }
2000
2001     // Construct our resulting LLVM IR expression.
2002     llvm::Value *Carry1;
2003     llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
2004                                               X, Y, Carry1);
2005     llvm::Value *Carry2;
2006     llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
2007                                               Sum1, Carryin, Carry2);
2008     llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
2009                                                X->getType());
2010     Builder.CreateStore(CarryOut, CarryOutPtr);
2011     return RValue::get(Sum2);
2012   }
2013
2014   case Builtin::BI__builtin_add_overflow:
2015   case Builtin::BI__builtin_sub_overflow:
2016   case Builtin::BI__builtin_mul_overflow: {
2017     const clang::Expr *LeftArg = E->getArg(0);
2018     const clang::Expr *RightArg = E->getArg(1);
2019     const clang::Expr *ResultArg = E->getArg(2);
2020
2021     clang::QualType ResultQTy =
2022         ResultArg->getType()->castAs<PointerType>()->getPointeeType();
2023
2024     WidthAndSignedness LeftInfo =
2025         getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
2026     WidthAndSignedness RightInfo =
2027         getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
2028     WidthAndSignedness ResultInfo =
2029         getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
2030     WidthAndSignedness EncompassingInfo =
2031         EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
2032
2033     llvm::Type *EncompassingLLVMTy =
2034         llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
2035
2036     llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
2037
2038     llvm::Intrinsic::ID IntrinsicId;
2039     switch (BuiltinID) {
2040     default:
2041       llvm_unreachable("Unknown overflow builtin id.");
2042     case Builtin::BI__builtin_add_overflow:
2043       IntrinsicId = EncompassingInfo.Signed
2044                         ? llvm::Intrinsic::sadd_with_overflow
2045                         : llvm::Intrinsic::uadd_with_overflow;
2046       break;
2047     case Builtin::BI__builtin_sub_overflow:
2048       IntrinsicId = EncompassingInfo.Signed
2049                         ? llvm::Intrinsic::ssub_with_overflow
2050                         : llvm::Intrinsic::usub_with_overflow;
2051       break;
2052     case Builtin::BI__builtin_mul_overflow:
2053       IntrinsicId = EncompassingInfo.Signed
2054                         ? llvm::Intrinsic::smul_with_overflow
2055                         : llvm::Intrinsic::umul_with_overflow;
2056       break;
2057     }
2058
2059     llvm::Value *Left = EmitScalarExpr(LeftArg);
2060     llvm::Value *Right = EmitScalarExpr(RightArg);
2061     Address ResultPtr = EmitPointerWithAlignment(ResultArg);
2062
2063     // Extend each operand to the encompassing type.
2064     Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
2065     Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
2066
2067     // Perform the operation on the extended values.
2068     llvm::Value *Overflow, *Result;
2069     Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
2070
2071     if (EncompassingInfo.Width > ResultInfo.Width) {
2072       // The encompassing type is wider than the result type, so we need to
2073       // truncate it.
2074       llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
2075
2076       // To see if the truncation caused an overflow, we will extend
2077       // the result and then compare it to the original result.
2078       llvm::Value *ResultTruncExt = Builder.CreateIntCast(
2079           ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
2080       llvm::Value *TruncationOverflow =
2081           Builder.CreateICmpNE(Result, ResultTruncExt);
2082
2083       Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
2084       Result = ResultTrunc;
2085     }
2086
2087     // Finally, store the result using the pointer.
2088     bool isVolatile =
2089       ResultArg->getType()->getPointeeType().isVolatileQualified();
2090     Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
2091
2092     return RValue::get(Overflow);
2093   }
2094
2095   case Builtin::BI__builtin_uadd_overflow:
2096   case Builtin::BI__builtin_uaddl_overflow:
2097   case Builtin::BI__builtin_uaddll_overflow:
2098   case Builtin::BI__builtin_usub_overflow:
2099   case Builtin::BI__builtin_usubl_overflow:
2100   case Builtin::BI__builtin_usubll_overflow:
2101   case Builtin::BI__builtin_umul_overflow:
2102   case Builtin::BI__builtin_umull_overflow:
2103   case Builtin::BI__builtin_umulll_overflow:
2104   case Builtin::BI__builtin_sadd_overflow:
2105   case Builtin::BI__builtin_saddl_overflow:
2106   case Builtin::BI__builtin_saddll_overflow:
2107   case Builtin::BI__builtin_ssub_overflow:
2108   case Builtin::BI__builtin_ssubl_overflow:
2109   case Builtin::BI__builtin_ssubll_overflow:
2110   case Builtin::BI__builtin_smul_overflow:
2111   case Builtin::BI__builtin_smull_overflow:
2112   case Builtin::BI__builtin_smulll_overflow: {
2113
2114     // We translate all of these builtins directly to the relevant llvm IR node.
2115
2116     // Scalarize our inputs.
2117     llvm::Value *X = EmitScalarExpr(E->getArg(0));
2118     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
2119     Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
2120
2121     // Decide which of the overflow intrinsics we are lowering to:
2122     llvm::Intrinsic::ID IntrinsicId;
2123     switch (BuiltinID) {
2124     default: llvm_unreachable("Unknown overflow builtin id.");
2125     case Builtin::BI__builtin_uadd_overflow:
2126     case Builtin::BI__builtin_uaddl_overflow:
2127     case Builtin::BI__builtin_uaddll_overflow:
2128       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
2129       break;
2130     case Builtin::BI__builtin_usub_overflow:
2131     case Builtin::BI__builtin_usubl_overflow:
2132     case Builtin::BI__builtin_usubll_overflow:
2133       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
2134       break;
2135     case Builtin::BI__builtin_umul_overflow:
2136     case Builtin::BI__builtin_umull_overflow:
2137     case Builtin::BI__builtin_umulll_overflow:
2138       IntrinsicId = llvm::Intrinsic::umul_with_overflow;
2139       break;
2140     case Builtin::BI__builtin_sadd_overflow:
2141     case Builtin::BI__builtin_saddl_overflow:
2142     case Builtin::BI__builtin_saddll_overflow:
2143       IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
2144       break;
2145     case Builtin::BI__builtin_ssub_overflow:
2146     case Builtin::BI__builtin_ssubl_overflow:
2147     case Builtin::BI__builtin_ssubll_overflow:
2148       IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
2149       break;
2150     case Builtin::BI__builtin_smul_overflow:
2151     case Builtin::BI__builtin_smull_overflow:
2152     case Builtin::BI__builtin_smulll_overflow:
2153       IntrinsicId = llvm::Intrinsic::smul_with_overflow;
2154       break;
2155     }
2156
2157
2158     llvm::Value *Carry;
2159     llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
2160     Builder.CreateStore(Sum, SumOutPtr);
2161
2162     return RValue::get(Carry);
2163   }
2164   case Builtin::BI__builtin_addressof:
2165     return RValue::get(EmitLValue(E->getArg(0)).getPointer());
2166   case Builtin::BI__builtin_operator_new:
2167     return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
2168                                     E->getArg(0), false);
2169   case Builtin::BI__builtin_operator_delete:
2170     return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
2171                                     E->getArg(0), true);
2172   case Builtin::BI__noop:
2173     // __noop always evaluates to an integer literal zero.
2174     return RValue::get(ConstantInt::get(IntTy, 0));
2175   case Builtin::BI__builtin_call_with_static_chain: {
2176     const CallExpr *Call = cast<CallExpr>(E->getArg(0));
2177     const Expr *Chain = E->getArg(1);
2178     return EmitCall(Call->getCallee()->getType(),
2179                     EmitCallee(Call->getCallee()), Call, ReturnValue,
2180                     EmitScalarExpr(Chain));
2181   }
2182   case Builtin::BI_InterlockedExchange8:
2183   case Builtin::BI_InterlockedExchange16:
2184   case Builtin::BI_InterlockedExchange:
2185   case Builtin::BI_InterlockedExchangePointer:
2186     return RValue::get(
2187         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
2188   case Builtin::BI_InterlockedCompareExchangePointer: {
2189     llvm::Type *RTy;
2190     llvm::IntegerType *IntType =
2191       IntegerType::get(getLLVMContext(),
2192                        getContext().getTypeSize(E->getType()));
2193     llvm::Type *IntPtrType = IntType->getPointerTo();
2194
2195     llvm::Value *Destination =
2196       Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType);
2197
2198     llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
2199     RTy = Exchange->getType();
2200     Exchange = Builder.CreatePtrToInt(Exchange, IntType);
2201
2202     llvm::Value *Comparand =
2203       Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
2204
2205     auto Result =
2206         Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
2207                                     AtomicOrdering::SequentiallyConsistent,
2208                                     AtomicOrdering::SequentiallyConsistent);
2209     Result->setVolatile(true);
2210
2211     return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
2212                                                                          0),
2213                                               RTy));
2214   }
2215   case Builtin::BI_InterlockedCompareExchange8:
2216   case Builtin::BI_InterlockedCompareExchange16:
2217   case Builtin::BI_InterlockedCompareExchange:
2218   case Builtin::BI_InterlockedCompareExchange64: {
2219     AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg(
2220         EmitScalarExpr(E->getArg(0)),
2221         EmitScalarExpr(E->getArg(2)),
2222         EmitScalarExpr(E->getArg(1)),
2223         AtomicOrdering::SequentiallyConsistent,
2224         AtomicOrdering::SequentiallyConsistent);
2225       CXI->setVolatile(true);
2226       return RValue::get(Builder.CreateExtractValue(CXI, 0));
2227   }
2228   case Builtin::BI_InterlockedIncrement16:
2229   case Builtin::BI_InterlockedIncrement:
2230     return RValue::get(
2231         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
2232   case Builtin::BI_InterlockedDecrement16:
2233   case Builtin::BI_InterlockedDecrement:
2234     return RValue::get(
2235         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
2236   case Builtin::BI_InterlockedAnd8:
2237   case Builtin::BI_InterlockedAnd16:
2238   case Builtin::BI_InterlockedAnd:
2239     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
2240   case Builtin::BI_InterlockedExchangeAdd8:
2241   case Builtin::BI_InterlockedExchangeAdd16:
2242   case Builtin::BI_InterlockedExchangeAdd:
2243     return RValue::get(
2244         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
2245   case Builtin::BI_InterlockedExchangeSub8:
2246   case Builtin::BI_InterlockedExchangeSub16:
2247   case Builtin::BI_InterlockedExchangeSub:
2248     return RValue::get(
2249         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
2250   case Builtin::BI_InterlockedOr8:
2251   case Builtin::BI_InterlockedOr16:
2252   case Builtin::BI_InterlockedOr:
2253     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
2254   case Builtin::BI_InterlockedXor8:
2255   case Builtin::BI_InterlockedXor16:
2256   case Builtin::BI_InterlockedXor:
2257     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
2258   case Builtin::BI_interlockedbittestandset:
2259     return RValue::get(
2260         EmitMSVCBuiltinExpr(MSVCIntrin::_interlockedbittestandset, E));
2261
2262   case Builtin::BI__exception_code:
2263   case Builtin::BI_exception_code:
2264     return RValue::get(EmitSEHExceptionCode());
2265   case Builtin::BI__exception_info:
2266   case Builtin::BI_exception_info:
2267     return RValue::get(EmitSEHExceptionInfo());
2268   case Builtin::BI__abnormal_termination:
2269   case Builtin::BI_abnormal_termination:
2270     return RValue::get(EmitSEHAbnormalTermination());
2271   case Builtin::BI_setjmpex: {
2272     if (getTarget().getTriple().isOSMSVCRT()) {
2273       llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2274       llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
2275           getLLVMContext(), llvm::AttributeList::FunctionIndex,
2276           llvm::Attribute::ReturnsTwice);
2277       llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction(
2278           llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2279           "_setjmpex", ReturnsTwiceAttr, /*Local=*/true);
2280       llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2281           EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2282       llvm::Value *FrameAddr =
2283           Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2284                              ConstantInt::get(Int32Ty, 0));
2285       llvm::Value *Args[] = {Buf, FrameAddr};
2286       llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args);
2287       CS.setAttributes(ReturnsTwiceAttr);
2288       return RValue::get(CS.getInstruction());
2289     }
2290     break;
2291   }
2292   case Builtin::BI_setjmp: {
2293     if (getTarget().getTriple().isOSMSVCRT()) {
2294       llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
2295           getLLVMContext(), llvm::AttributeList::FunctionIndex,
2296           llvm::Attribute::ReturnsTwice);
2297       llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2298           EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2299       llvm::CallSite CS;
2300       if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
2301         llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy};
2302         llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction(
2303             llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true),
2304             "_setjmp3", ReturnsTwiceAttr, /*Local=*/true);
2305         llvm::Value *Count = ConstantInt::get(IntTy, 0);
2306         llvm::Value *Args[] = {Buf, Count};
2307         CS = EmitRuntimeCallOrInvoke(SetJmp3, Args);
2308       } else {
2309         llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2310         llvm::Constant *SetJmp = CGM.CreateRuntimeFunction(
2311             llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2312             "_setjmp", ReturnsTwiceAttr, /*Local=*/true);
2313         llvm::Value *FrameAddr =
2314             Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2315                                ConstantInt::get(Int32Ty, 0));
2316         llvm::Value *Args[] = {Buf, FrameAddr};
2317         CS = EmitRuntimeCallOrInvoke(SetJmp, Args);
2318       }
2319       CS.setAttributes(ReturnsTwiceAttr);
2320       return RValue::get(CS.getInstruction());
2321     }
2322     break;
2323   }
2324
2325   case Builtin::BI__GetExceptionInfo: {
2326     if (llvm::GlobalVariable *GV =
2327             CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
2328       return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
2329     break;
2330   }
2331
2332   case Builtin::BI__fastfail:
2333     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
2334
2335   case Builtin::BI__builtin_coro_size: {
2336     auto & Context = getContext();
2337     auto SizeTy = Context.getSizeType();
2338     auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy));
2339     Value *F = CGM.getIntrinsic(Intrinsic::coro_size, T);
2340     return RValue::get(Builder.CreateCall(F));
2341   }
2342
2343   case Builtin::BI__builtin_coro_id:
2344     return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
2345   case Builtin::BI__builtin_coro_promise:
2346     return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
2347   case Builtin::BI__builtin_coro_resume:
2348     return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
2349   case Builtin::BI__builtin_coro_frame:
2350     return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
2351   case Builtin::BI__builtin_coro_free:
2352     return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
2353   case Builtin::BI__builtin_coro_destroy:
2354     return EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
2355   case Builtin::BI__builtin_coro_done:
2356     return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
2357   case Builtin::BI__builtin_coro_alloc:
2358     return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
2359   case Builtin::BI__builtin_coro_begin:
2360     return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
2361   case Builtin::BI__builtin_coro_end:
2362     return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
2363   case Builtin::BI__builtin_coro_suspend:
2364     return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
2365   case Builtin::BI__builtin_coro_param:
2366     return EmitCoroutineIntrinsic(E, Intrinsic::coro_param);
2367
2368   // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
2369   case Builtin::BIread_pipe:
2370   case Builtin::BIwrite_pipe: {
2371     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2372           *Arg1 = EmitScalarExpr(E->getArg(1));
2373     CGOpenCLRuntime OpenCLRT(CGM);
2374     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2375     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2376
2377     // Type of the generic packet parameter.
2378     unsigned GenericAS =
2379         getContext().getTargetAddressSpace(LangAS::opencl_generic);
2380     llvm::Type *I8PTy = llvm::PointerType::get(
2381         llvm::Type::getInt8Ty(getLLVMContext()), GenericAS);
2382
2383     // Testing which overloaded version we should generate the call for.
2384     if (2U == E->getNumArgs()) {
2385       const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
2386                                                              : "__write_pipe_2";
2387       // Creating a generic function type to be able to call with any builtin or
2388       // user defined type.
2389       llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
2390       llvm::FunctionType *FTy = llvm::FunctionType::get(
2391           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2392       Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
2393       return RValue::get(
2394           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2395                              {Arg0, BCast, PacketSize, PacketAlign}));
2396     } else {
2397       assert(4 == E->getNumArgs() &&
2398              "Illegal number of parameters to pipe function");
2399       const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
2400                                                              : "__write_pipe_4";
2401
2402       llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
2403                               Int32Ty, Int32Ty};
2404       Value *Arg2 = EmitScalarExpr(E->getArg(2)),
2405             *Arg3 = EmitScalarExpr(E->getArg(3));
2406       llvm::FunctionType *FTy = llvm::FunctionType::get(
2407           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2408       Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
2409       // We know the third argument is an integer type, but we may need to cast
2410       // it to i32.
2411       if (Arg2->getType() != Int32Ty)
2412         Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
2413       return RValue::get(Builder.CreateCall(
2414           CGM.CreateRuntimeFunction(FTy, Name),
2415           {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
2416     }
2417   }
2418   // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
2419   // functions
2420   case Builtin::BIreserve_read_pipe:
2421   case Builtin::BIreserve_write_pipe:
2422   case Builtin::BIwork_group_reserve_read_pipe:
2423   case Builtin::BIwork_group_reserve_write_pipe:
2424   case Builtin::BIsub_group_reserve_read_pipe:
2425   case Builtin::BIsub_group_reserve_write_pipe: {
2426     // Composing the mangled name for the function.
2427     const char *Name;
2428     if (BuiltinID == Builtin::BIreserve_read_pipe)
2429       Name = "__reserve_read_pipe";
2430     else if (BuiltinID == Builtin::BIreserve_write_pipe)
2431       Name = "__reserve_write_pipe";
2432     else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
2433       Name = "__work_group_reserve_read_pipe";
2434     else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
2435       Name = "__work_group_reserve_write_pipe";
2436     else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
2437       Name = "__sub_group_reserve_read_pipe";
2438     else
2439       Name = "__sub_group_reserve_write_pipe";
2440
2441     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2442           *Arg1 = EmitScalarExpr(E->getArg(1));
2443     llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
2444     CGOpenCLRuntime OpenCLRT(CGM);
2445     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2446     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2447
2448     // Building the generic function prototype.
2449     llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
2450     llvm::FunctionType *FTy = llvm::FunctionType::get(
2451         ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2452     // We know the second argument is an integer type, but we may need to cast
2453     // it to i32.
2454     if (Arg1->getType() != Int32Ty)
2455       Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
2456     return RValue::get(
2457         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2458                            {Arg0, Arg1, PacketSize, PacketAlign}));
2459   }
2460   // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
2461   // functions
2462   case Builtin::BIcommit_read_pipe:
2463   case Builtin::BIcommit_write_pipe:
2464   case Builtin::BIwork_group_commit_read_pipe:
2465   case Builtin::BIwork_group_commit_write_pipe:
2466   case Builtin::BIsub_group_commit_read_pipe:
2467   case Builtin::BIsub_group_commit_write_pipe: {
2468     const char *Name;
2469     if (BuiltinID == Builtin::BIcommit_read_pipe)
2470       Name = "__commit_read_pipe";
2471     else if (BuiltinID == Builtin::BIcommit_write_pipe)
2472       Name = "__commit_write_pipe";
2473     else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
2474       Name = "__work_group_commit_read_pipe";
2475     else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
2476       Name = "__work_group_commit_write_pipe";
2477     else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
2478       Name = "__sub_group_commit_read_pipe";
2479     else
2480       Name = "__sub_group_commit_write_pipe";
2481
2482     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2483           *Arg1 = EmitScalarExpr(E->getArg(1));
2484     CGOpenCLRuntime OpenCLRT(CGM);
2485     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2486     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2487
2488     // Building the generic function prototype.
2489     llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
2490     llvm::FunctionType *FTy =
2491         llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
2492                                 llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2493
2494     return RValue::get(
2495         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2496                            {Arg0, Arg1, PacketSize, PacketAlign}));
2497   }
2498   // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
2499   case Builtin::BIget_pipe_num_packets:
2500   case Builtin::BIget_pipe_max_packets: {
2501     const char *Name;
2502     if (BuiltinID == Builtin::BIget_pipe_num_packets)
2503       Name = "__get_pipe_num_packets";
2504     else
2505       Name = "__get_pipe_max_packets";
2506
2507     // Building the generic function prototype.
2508     Value *Arg0 = EmitScalarExpr(E->getArg(0));
2509     CGOpenCLRuntime OpenCLRT(CGM);
2510     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2511     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2512     llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
2513     llvm::FunctionType *FTy = llvm::FunctionType::get(
2514         Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2515
2516     return RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2517                                           {Arg0, PacketSize, PacketAlign}));
2518   }
2519
2520   // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
2521   case Builtin::BIto_global:
2522   case Builtin::BIto_local:
2523   case Builtin::BIto_private: {
2524     auto Arg0 = EmitScalarExpr(E->getArg(0));
2525     auto NewArgT = llvm::PointerType::get(Int8Ty,
2526       CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
2527     auto NewRetT = llvm::PointerType::get(Int8Ty,
2528       CGM.getContext().getTargetAddressSpace(
2529         E->getType()->getPointeeType().getAddressSpace()));
2530     auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
2531     llvm::Value *NewArg;
2532     if (Arg0->getType()->getPointerAddressSpace() !=
2533         NewArgT->getPointerAddressSpace())
2534       NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
2535     else
2536       NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
2537     auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
2538     auto NewCall =
2539         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
2540     return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
2541       ConvertType(E->getType())));
2542   }
2543
2544   // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
2545   // It contains four different overload formats specified in Table 6.13.17.1.
2546   case Builtin::BIenqueue_kernel: {
2547     StringRef Name; // Generated function call name
2548     unsigned NumArgs = E->getNumArgs();
2549
2550     llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
2551     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2552         getContext().getTargetAddressSpace(LangAS::opencl_generic));
2553
2554     llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
2555     llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
2556     LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
2557     llvm::Value *Range = NDRangeL.getAddress().getPointer();
2558     llvm::Type *RangeTy = NDRangeL.getAddress().getType();
2559
2560     if (NumArgs == 4) {
2561       // The most basic form of the call with parameters:
2562       // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
2563       Name = "__enqueue_kernel_basic";
2564       llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy};
2565       llvm::FunctionType *FTy = llvm::FunctionType::get(
2566           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false);
2567
2568       llvm::Value *Block = Builder.CreatePointerCast(
2569           EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy);
2570
2571       AttrBuilder B;
2572       B.addAttribute(Attribute::ByVal);
2573       llvm::AttributeList ByValAttrSet =
2574           llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
2575
2576       auto RTCall =
2577           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
2578                              {Queue, Flags, Range, Block});
2579       RTCall->setAttributes(ByValAttrSet);
2580       return RValue::get(RTCall);
2581     }
2582     assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
2583
2584     // Could have events and/or vaargs.
2585     if (E->getArg(3)->getType()->isBlockPointerType()) {
2586       // No events passed, but has variadic arguments.
2587       Name = "__enqueue_kernel_vaargs";
2588       llvm::Value *Block = Builder.CreatePointerCast(
2589           EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy);
2590       // Create a vector of the arguments, as well as a constant value to
2591       // express to the runtime the number of variadic arguments.
2592       std::vector<llvm::Value *> Args = {Queue, Flags, Range, Block,
2593                                          ConstantInt::get(IntTy, NumArgs - 4)};
2594       std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy,
2595                                           GenericVoidPtrTy, IntTy};
2596
2597       // Each of the following arguments specifies the size of the corresponding
2598       // argument passed to the enqueued block.
2599       for (unsigned I = 4/*Position of the first size arg*/; I < NumArgs; ++I)
2600         Args.push_back(
2601             Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy));
2602
2603       llvm::FunctionType *FTy = llvm::FunctionType::get(
2604           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
2605       return RValue::get(
2606           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2607                              llvm::ArrayRef<llvm::Value *>(Args)));
2608     }
2609     // Any calls now have event arguments passed.
2610     if (NumArgs >= 7) {
2611       llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy);
2612       llvm::Type *EventPtrTy = EventTy->getPointerTo(
2613           CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
2614
2615       llvm::Value *NumEvents =
2616           Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
2617       llvm::Value *EventList =
2618           E->getArg(4)->getType()->isArrayType()
2619               ? EmitArrayToPointerDecay(E->getArg(4)).getPointer()
2620               : EmitScalarExpr(E->getArg(4));
2621       llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5));
2622       // Convert to generic address space.
2623       EventList = Builder.CreatePointerCast(EventList, EventPtrTy);
2624       ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy);
2625       llvm::Value *Block = Builder.CreatePointerCast(
2626           EmitScalarExpr(E->getArg(6)), GenericVoidPtrTy);
2627
2628       std::vector<llvm::Type *> ArgTys = {
2629           QueueTy,    Int32Ty,    RangeTy,         Int32Ty,
2630           EventPtrTy, EventPtrTy, GenericVoidPtrTy};
2631
2632       std::vector<llvm::Value *> Args = {Queue,     Flags,    Range, NumEvents,
2633                                          EventList, ClkEvent, Block};
2634
2635       if (NumArgs == 7) {
2636         // Has events but no variadics.
2637         Name = "__enqueue_kernel_basic_events";
2638         llvm::FunctionType *FTy = llvm::FunctionType::get(
2639             Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2640         return RValue::get(
2641             Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2642                                llvm::ArrayRef<llvm::Value *>(Args)));
2643       }
2644       // Has event info and variadics
2645       // Pass the number of variadics to the runtime function too.
2646       Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
2647       ArgTys.push_back(Int32Ty);
2648       Name = "__enqueue_kernel_events_vaargs";
2649
2650       // Each of the following arguments specifies the size of the corresponding
2651       // argument passed to the enqueued block.
2652       for (unsigned I = 7/*Position of the first size arg*/; I < NumArgs; ++I)
2653         Args.push_back(
2654             Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy));
2655
2656       llvm::FunctionType *FTy = llvm::FunctionType::get(
2657           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
2658       return RValue::get(
2659           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2660                              llvm::ArrayRef<llvm::Value *>(Args)));
2661     }
2662     LLVM_FALLTHROUGH;
2663   }
2664   // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
2665   // parameter.
2666   case Builtin::BIget_kernel_work_group_size: {
2667     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2668         getContext().getTargetAddressSpace(LangAS::opencl_generic));
2669     Value *Arg = EmitScalarExpr(E->getArg(0));
2670     Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy);
2671     return RValue::get(Builder.CreateCall(
2672         CGM.CreateRuntimeFunction(
2673             llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false),
2674             "__get_kernel_work_group_size_impl"),
2675         Arg));
2676   }
2677   case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
2678     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2679         getContext().getTargetAddressSpace(LangAS::opencl_generic));
2680     Value *Arg = EmitScalarExpr(E->getArg(0));
2681     Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy);
2682     return RValue::get(Builder.CreateCall(
2683         CGM.CreateRuntimeFunction(
2684             llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false),
2685             "__get_kernel_preferred_work_group_multiple_impl"),
2686         Arg));
2687   }
2688   case Builtin::BIprintf:
2689     if (getTarget().getTriple().isNVPTX())
2690       return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue);
2691     break;
2692   case Builtin::BI__builtin_canonicalize:
2693   case Builtin::BI__builtin_canonicalizef:
2694   case Builtin::BI__builtin_canonicalizel:
2695     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
2696
2697   case Builtin::BI__builtin_thread_pointer: {
2698     if (!getContext().getTargetInfo().isTLSSupported())
2699       CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
2700     // Fall through - it's already mapped to the intrinsic by GCCBuiltin.
2701     break;
2702   }
2703   case Builtin::BI__builtin_os_log_format: {
2704     assert(E->getNumArgs() >= 2 &&
2705            "__builtin_os_log_format takes at least 2 arguments");
2706     analyze_os_log::OSLogBufferLayout Layout;
2707     analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout);
2708     Address BufAddr = EmitPointerWithAlignment(E->getArg(0));
2709     // Ignore argument 1, the format string. It is not currently used.
2710     CharUnits Offset;
2711     Builder.CreateStore(
2712         Builder.getInt8(Layout.getSummaryByte()),
2713         Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
2714     Builder.CreateStore(
2715         Builder.getInt8(Layout.getNumArgsByte()),
2716         Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
2717
2718     llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
2719     for (const auto &Item : Layout.Items) {
2720       Builder.CreateStore(
2721           Builder.getInt8(Item.getDescriptorByte()),
2722           Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
2723       Builder.CreateStore(
2724           Builder.getInt8(Item.getSizeByte()),
2725           Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
2726       Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset);
2727       if (const Expr *TheExpr = Item.getExpr()) {
2728         Addr = Builder.CreateElementBitCast(
2729             Addr, ConvertTypeForMem(TheExpr->getType()));
2730         // Check if this is a retainable type.
2731         if (TheExpr->getType()->isObjCRetainableType()) {
2732           assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
2733                  "Only scalar can be a ObjC retainable type");
2734           llvm::Value *SV = EmitScalarExpr(TheExpr, /*Ignore*/ false);
2735           RValue RV = RValue::get(SV);
2736           LValue LV = MakeAddrLValue(Addr, TheExpr->getType());
2737           EmitStoreThroughLValue(RV, LV);
2738           // Check if the object is constant, if not, save it in
2739           // RetainableOperands.
2740           if (!isa<Constant>(SV))
2741             RetainableOperands.push_back(SV);
2742         } else {
2743           EmitAnyExprToMem(TheExpr, Addr, Qualifiers(), /*isInit*/ true);
2744         }
2745       } else {
2746         Addr = Builder.CreateElementBitCast(Addr, Int32Ty);
2747         Builder.CreateStore(
2748             Builder.getInt32(Item.getConstValue().getQuantity()), Addr);
2749       }
2750       Offset += Item.size();
2751     }
2752
2753     // Push a clang.arc.use cleanup for each object in RetainableOperands. The
2754     // cleanup will cause the use to appear after the final log call, keeping
2755     // the object valid while it's held in the log buffer.  Note that if there's
2756     // a release cleanup on the object, it will already be active; since
2757     // cleanups are emitted in reverse order, the use will occur before the
2758     // object is released.
2759     if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount &&
2760         CGM.getCodeGenOpts().OptimizationLevel != 0)
2761       for (llvm::Value *object : RetainableOperands)
2762         pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), object);
2763
2764     return RValue::get(BufAddr.getPointer());
2765   }
2766
2767   case Builtin::BI__builtin_os_log_format_buffer_size: {
2768     analyze_os_log::OSLogBufferLayout Layout;
2769     analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout);
2770     return RValue::get(ConstantInt::get(ConvertType(E->getType()),
2771                                         Layout.size().getQuantity()));
2772   }
2773
2774   case Builtin::BI__xray_customevent: {
2775     if (!ShouldXRayInstrumentFunction())
2776       return RValue::getIgnored();
2777     if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>()) {
2778       if (XRayAttr->neverXRayInstrument())
2779         return RValue::getIgnored();
2780     }
2781     Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
2782     auto FTy = F->getFunctionType();
2783     auto Arg0 = E->getArg(0);
2784     auto Arg0Val = EmitScalarExpr(Arg0);
2785     auto Arg0Ty = Arg0->getType();
2786     auto PTy0 = FTy->getParamType(0);
2787     if (PTy0 != Arg0Val->getType()) {
2788       if (Arg0Ty->isArrayType())
2789         Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer();
2790       else
2791         Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
2792     }
2793     auto Arg1 = EmitScalarExpr(E->getArg(1));
2794     auto PTy1 = FTy->getParamType(1);
2795     if (PTy1 != Arg1->getType())
2796       Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
2797     return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
2798   }
2799   }
2800
2801   // If this is an alias for a lib function (e.g. __builtin_sin), emit
2802   // the call using the normal call path, but using the unmangled
2803   // version of the function name.
2804   if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
2805     return emitLibraryCall(*this, FD, E,
2806                            CGM.getBuiltinLibFunction(FD, BuiltinID));
2807
2808   // If this is a predefined lib function (e.g. malloc), emit the call
2809   // using exactly the normal call path.
2810   if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
2811     return emitLibraryCall(*this, FD, E,
2812                       cast<llvm::Constant>(EmitScalarExpr(E->getCallee())));
2813
2814   // Check that a call to a target specific builtin has the correct target
2815   // features.
2816   // This is down here to avoid non-target specific builtins, however, if
2817   // generic builtins start to require generic target features then we
2818   // can move this up to the beginning of the function.
2819   checkTargetFeatures(E, FD);
2820
2821   // See if we have a target specific intrinsic.
2822   const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
2823   Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
2824   StringRef Prefix =
2825       llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
2826   if (!Prefix.empty()) {
2827     IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name);
2828     // NOTE we dont need to perform a compatibility flag check here since the
2829     // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
2830     // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
2831     if (IntrinsicID == Intrinsic::not_intrinsic)
2832       IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
2833   }
2834
2835   if (IntrinsicID != Intrinsic::not_intrinsic) {
2836     SmallVector<Value*, 16> Args;
2837
2838     // Find out if any arguments are required to be integer constant
2839     // expressions.
2840     unsigned ICEArguments = 0;
2841     ASTContext::GetBuiltinTypeError Error;
2842     getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
2843     assert(Error == ASTContext::GE_None && "Should not codegen an error");
2844
2845     Function *F = CGM.getIntrinsic(IntrinsicID);
2846     llvm::FunctionType *FTy = F->getFunctionType();
2847
2848     for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
2849       Value *ArgValue;
2850       // If this is a normal argument, just emit it as a scalar.
2851       if ((ICEArguments & (1 << i)) == 0) {
2852         ArgValue = EmitScalarExpr(E->getArg(i));
2853       } else {
2854         // If this is required to be a constant, constant fold it so that we
2855         // know that the generated intrinsic gets a ConstantInt.
2856         llvm::APSInt Result;
2857         bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
2858         assert(IsConst && "Constant arg isn't actually constant?");
2859         (void)IsConst;
2860         ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
2861       }
2862
2863       // If the intrinsic arg type is different from the builtin arg type
2864       // we need to do a bit cast.
2865       llvm::Type *PTy = FTy->getParamType(i);
2866       if (PTy != ArgValue->getType()) {
2867         assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
2868                "Must be able to losslessly bit cast to param");
2869         ArgValue = Builder.CreateBitCast(ArgValue, PTy);
2870       }
2871
2872       Args.push_back(ArgValue);
2873     }
2874
2875     Value *V = Builder.CreateCall(F, Args);
2876     QualType BuiltinRetType = E->getType();
2877
2878     llvm::Type *RetTy = VoidTy;
2879     if (!BuiltinRetType->isVoidType())
2880       RetTy = ConvertType(BuiltinRetType);
2881
2882     if (RetTy != V->getType()) {
2883       assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
2884              "Must be able to losslessly bit cast result type");
2885       V = Builder.CreateBitCast(V, RetTy);
2886     }
2887
2888     return RValue::get(V);
2889   }
2890
2891   // See if we have a target specific builtin that needs to be lowered.
2892   if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
2893     return RValue::get(V);
2894
2895   ErrorUnsupported(E, "builtin function");
2896
2897   // Unknown builtin, for now just dump it out and return undef.
2898   return GetUndefRValue(E->getType());
2899 }
2900
2901 static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
2902                                         unsigned BuiltinID, const CallExpr *E,
2903                                         llvm::Triple::ArchType Arch) {
2904   switch (Arch) {
2905   case llvm::Triple::arm:
2906   case llvm::Triple::armeb:
2907   case llvm::Triple::thumb:
2908   case llvm::Triple::thumbeb:
2909     return CGF->EmitARMBuiltinExpr(BuiltinID, E);
2910   case llvm::Triple::aarch64:
2911   case llvm::Triple::aarch64_be:
2912     return CGF->EmitAArch64BuiltinExpr(BuiltinID, E);
2913   case llvm::Triple::x86:
2914   case llvm::Triple::x86_64:
2915     return CGF->EmitX86BuiltinExpr(BuiltinID, E);
2916   case llvm::Triple::ppc:
2917   case llvm::Triple::ppc64:
2918   case llvm::Triple::ppc64le:
2919     return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
2920   case llvm::Triple::r600:
2921   case llvm::Triple::amdgcn:
2922     return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
2923   case llvm::Triple::systemz:
2924     return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
2925   case llvm::Triple::nvptx:
2926   case llvm::Triple::nvptx64:
2927     return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
2928   case llvm::Triple::wasm32:
2929   case llvm::Triple::wasm64:
2930     return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
2931   default:
2932     return nullptr;
2933   }
2934 }
2935
2936 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
2937                                               const CallExpr *E) {
2938   if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
2939     assert(getContext().getAuxTargetInfo() && "Missing aux target info");
2940     return EmitTargetArchBuiltinExpr(
2941         this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
2942         getContext().getAuxTargetInfo()->getTriple().getArch());
2943   }
2944
2945   return EmitTargetArchBuiltinExpr(this, BuiltinID, E,
2946                                    getTarget().getTriple().getArch());
2947 }
2948
2949 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
2950                                      NeonTypeFlags TypeFlags,
2951                                      bool V1Ty=false) {
2952   int IsQuad = TypeFlags.isQuad();
2953   switch (TypeFlags.getEltType()) {
2954   case NeonTypeFlags::Int8:
2955   case NeonTypeFlags::Poly8:
2956     return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
2957   case NeonTypeFlags::Int16:
2958   case NeonTypeFlags::Poly16:
2959     return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
2960   case NeonTypeFlags::Float16:
2961     return llvm::VectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
2962   case NeonTypeFlags::Int32:
2963     return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
2964   case NeonTypeFlags::Int64:
2965   case NeonTypeFlags::Poly64:
2966     return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
2967   case NeonTypeFlags::Poly128:
2968     // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
2969     // There is a lot of i128 and f128 API missing.
2970     // so we use v16i8 to represent poly128 and get pattern matched.
2971     return llvm::VectorType::get(CGF->Int8Ty, 16);
2972   case NeonTypeFlags::Float32:
2973     return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
2974   case NeonTypeFlags::Float64:
2975     return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
2976   }
2977   llvm_unreachable("Unknown vector element type!");
2978 }
2979
2980 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
2981                                           NeonTypeFlags IntTypeFlags) {
2982   int IsQuad = IntTypeFlags.isQuad();
2983   switch (IntTypeFlags.getEltType()) {
2984   case NeonTypeFlags::Int16:
2985     return llvm::VectorType::get(CGF->HalfTy, (4 << IsQuad));
2986   case NeonTypeFlags::Int32:
2987     return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
2988   case NeonTypeFlags::Int64:
2989     return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad));
2990   default:
2991     llvm_unreachable("Type can't be converted to floating-point!");
2992   }
2993 }
2994
2995 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
2996   unsigned nElts = V->getType()->getVectorNumElements();
2997   Value* SV = llvm::ConstantVector::getSplat(nElts, C);
2998   return Builder.CreateShuffleVector(V, V, SV, "lane");
2999 }
3000
3001 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
3002                                      const char *name,
3003                                      unsigned shift, bool rightshift) {
3004   unsigned j = 0;
3005   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
3006        ai != ae; ++ai, ++j)
3007     if (shift > 0 && shift == j)
3008       Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
3009     else
3010       Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
3011
3012   return Builder.CreateCall(F, Ops, name);
3013 }
3014
3015 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
3016                                             bool neg) {
3017   int SV = cast<ConstantInt>(V)->getSExtValue();
3018   return ConstantInt::get(Ty, neg ? -SV : SV);
3019 }
3020
3021 // \brief Right-shift a vector by a constant.
3022 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
3023                                           llvm::Type *Ty, bool usgn,
3024                                           const char *name) {
3025   llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
3026
3027   int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
3028   int EltSize = VTy->getScalarSizeInBits();
3029
3030   Vec = Builder.CreateBitCast(Vec, Ty);
3031
3032   // lshr/ashr are undefined when the shift amount is equal to the vector
3033   // element size.
3034   if (ShiftAmt == EltSize) {
3035     if (usgn) {
3036       // Right-shifting an unsigned value by its size yields 0.
3037       return llvm::ConstantAggregateZero::get(VTy);
3038     } else {
3039       // Right-shifting a signed value by its size is equivalent
3040       // to a shift of size-1.
3041       --ShiftAmt;
3042       Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
3043     }
3044   }
3045
3046   Shift = EmitNeonShiftVector(Shift, Ty, false);
3047   if (usgn)
3048     return Builder.CreateLShr(Vec, Shift, name);
3049   else
3050     return Builder.CreateAShr(Vec, Shift, name);
3051 }
3052
3053 enum {
3054   AddRetType = (1 << 0),
3055   Add1ArgType = (1 << 1),
3056   Add2ArgTypes = (1 << 2),
3057
3058   VectorizeRetType = (1 << 3),
3059   VectorizeArgTypes = (1 << 4),
3060
3061   InventFloatType = (1 << 5),
3062   UnsignedAlts = (1 << 6),
3063
3064   Use64BitVectors = (1 << 7),
3065   Use128BitVectors = (1 << 8),
3066
3067   Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
3068   VectorRet = AddRetType | VectorizeRetType,
3069   VectorRetGetArgs01 =
3070       AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
3071   FpCmpzModifiers =
3072       AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
3073 };
3074
3075 namespace {
3076 struct NeonIntrinsicInfo {
3077   const char *NameHint;
3078   unsigned BuiltinID;
3079   unsigned LLVMIntrinsic;
3080   unsigned AltLLVMIntrinsic;
3081   unsigned TypeModifier;
3082
3083   bool operator<(unsigned RHSBuiltinID) const {
3084     return BuiltinID < RHSBuiltinID;
3085   }
3086   bool operator<(const NeonIntrinsicInfo &TE) const {
3087     return BuiltinID < TE.BuiltinID;
3088   }
3089 };
3090 } // end anonymous namespace
3091
3092 #define NEONMAP0(NameBase) \
3093   { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
3094
3095 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
3096   { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
3097       Intrinsic::LLVMIntrinsic, 0, TypeModifier }
3098
3099 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
3100   { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
3101       Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
3102       TypeModifier }
3103
3104 static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
3105   NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
3106   NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
3107   NEONMAP1(vabs_v, arm_neon_vabs, 0),
3108   NEONMAP1(vabsq_v, arm_neon_vabs, 0),
3109   NEONMAP0(vaddhn_v),
3110   NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
3111   NEONMAP1(vaeseq_v, arm_neon_aese, 0),
3112   NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
3113   NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
3114   NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
3115   NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
3116   NEONMAP1(vcage_v, arm_neon_vacge, 0),
3117   NEONMAP1(vcageq_v, arm_neon_vacge, 0),
3118   NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
3119   NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
3120   NEONMAP1(vcale_v, arm_neon_vacge, 0),
3121   NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
3122   NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
3123   NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
3124   NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
3125   NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
3126   NEONMAP1(vclz_v, ctlz, Add1ArgType),
3127   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
3128   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
3129   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
3130   NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
3131   NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
3132   NEONMAP0(vcvt_f32_v),
3133   NEONMAP2(vcvt_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3134   NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3135   NEONMAP1(vcvt_n_s16_v, arm_neon_vcvtfp2fxs, 0),
3136   NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
3137   NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
3138   NEONMAP1(vcvt_n_u16_v, arm_neon_vcvtfp2fxu, 0),
3139   NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
3140   NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
3141   NEONMAP0(vcvt_s16_v),
3142   NEONMAP0(vcvt_s32_v),
3143   NEONMAP0(vcvt_s64_v),
3144   NEONMAP0(vcvt_u16_v),
3145   NEONMAP0(vcvt_u32_v),
3146   NEONMAP0(vcvt_u64_v),
3147   NEONMAP1(vcvta_s16_v, arm_neon_vcvtas, 0),
3148   NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
3149   NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
3150   NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
3151   NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
3152   NEONMAP1(vcvtaq_s16_v, arm_neon_vcvtas, 0),
3153   NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
3154   NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
3155   NEONMAP1(vcvtaq_u16_v, arm_neon_vcvtau, 0),
3156   NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
3157   NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
3158   NEONMAP1(vcvtm_s16_v, arm_neon_vcvtms, 0),
3159   NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
3160   NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
3161   NEONMAP1(vcvtm_u16_v, arm_neon_vcvtmu, 0),
3162   NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
3163   NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
3164   NEONMAP1(vcvtmq_s16_v, arm_neon_vcvtms, 0),
3165   NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
3166   NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
3167   NEONMAP1(vcvtmq_u16_v, arm_neon_vcvtmu, 0),
3168   NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
3169   NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
3170   NEONMAP1(vcvtn_s16_v, arm_neon_vcvtns, 0),
3171   NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
3172   NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
3173   NEONMAP1(vcvtn_u16_v, arm_neon_vcvtnu, 0),
3174   NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
3175   NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
3176   NEONMAP1(vcvtnq_s16_v, arm_neon_vcvtns, 0),
3177   NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
3178   NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
3179   NEONMAP1(vcvtnq_u16_v, arm_neon_vcvtnu, 0),
3180   NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
3181   NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
3182   NEONMAP1(vcvtp_s16_v, arm_neon_vcvtps, 0),
3183   NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
3184   NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
3185   NEONMAP1(vcvtp_u16_v, arm_neon_vcvtpu, 0),
3186   NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
3187   NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
3188   NEONMAP1(vcvtpq_s16_v, arm_neon_vcvtps, 0),
3189   NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
3190   NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
3191   NEONMAP1(vcvtpq_u16_v, arm_neon_vcvtpu, 0),
3192   NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
3193   NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
3194   NEONMAP0(vcvtq_f32_v),
3195   NEONMAP2(vcvtq_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3196   NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3197   NEONMAP1(vcvtq_n_s16_v, arm_neon_vcvtfp2fxs, 0),
3198   NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
3199   NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
3200   NEONMAP1(vcvtq_n_u16_v, arm_neon_vcvtfp2fxu, 0),
3201   NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
3202   NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
3203   NEONMAP0(vcvtq_s16_v),
3204   NEONMAP0(vcvtq_s32_v),
3205   NEONMAP0(vcvtq_s64_v),
3206   NEONMAP0(vcvtq_u16_v),
3207   NEONMAP0(vcvtq_u32_v),
3208   NEONMAP0(vcvtq_u64_v),
3209   NEONMAP0(vext_v),
3210   NEONMAP0(vextq_v),
3211   NEONMAP0(vfma_v),
3212   NEONMAP0(vfmaq_v),
3213   NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
3214   NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
3215   NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
3216   NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
3217   NEONMAP0(vld1_dup_v),
3218   NEONMAP1(vld1_v, arm_neon_vld1, 0),
3219   NEONMAP0(vld1q_dup_v),
3220   NEONMAP1(vld1q_v, arm_neon_vld1, 0),
3221   NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
3222   NEONMAP1(vld2_v, arm_neon_vld2, 0),
3223   NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
3224   NEONMAP1(vld2q_v, arm_neon_vld2, 0),
3225   NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
3226   NEONMAP1(vld3_v, arm_neon_vld3, 0),
3227   NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
3228   NEONMAP1(vld3q_v, arm_neon_vld3, 0),
3229   NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
3230   NEONMAP1(vld4_v, arm_neon_vld4, 0),
3231   NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
3232   NEONMAP1(vld4q_v, arm_neon_vld4, 0),
3233   NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
3234   NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
3235   NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
3236   NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
3237   NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
3238   NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
3239   NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
3240   NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
3241   NEONMAP0(vmovl_v),
3242   NEONMAP0(vmovn_v),
3243   NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
3244   NEONMAP0(vmull_v),
3245   NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
3246   NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
3247   NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
3248   NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
3249   NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
3250   NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
3251   NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
3252   NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
3253   NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
3254   NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
3255   NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
3256   NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
3257   NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
3258   NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0),
3259   NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0),
3260   NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
3261   NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
3262   NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
3263   NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
3264   NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
3265   NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
3266   NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
3267   NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
3268   NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
3269   NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
3270   NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
3271   NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
3272   NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
3273   NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
3274   NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
3275   NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
3276   NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
3277   NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
3278   NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
3279   NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
3280   NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
3281   NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
3282   NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
3283   NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
3284   NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
3285   NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
3286   NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
3287   NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
3288   NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
3289   NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
3290   NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
3291   NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
3292   NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
3293   NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
3294   NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
3295   NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
3296   NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
3297   NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
3298   NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
3299   NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
3300   NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
3301   NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
3302   NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
3303   NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
3304   NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
3305   NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
3306   NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
3307   NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
3308   NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
3309   NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
3310   NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
3311   NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
3312   NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
3313   NEONMAP0(vshl_n_v),
3314   NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
3315   NEONMAP0(vshll_n_v),
3316   NEONMAP0(vshlq_n_v),
3317   NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
3318   NEONMAP0(vshr_n_v),
3319   NEONMAP0(vshrn_n_v),
3320   NEONMAP0(vshrq_n_v),
3321   NEONMAP1(vst1_v, arm_neon_vst1, 0),
3322   NEONMAP1(vst1q_v, arm_neon_vst1, 0),
3323   NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
3324   NEONMAP1(vst2_v, arm_neon_vst2, 0),
3325   NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
3326   NEONMAP1(vst2q_v, arm_neon_vst2, 0),
3327   NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
3328   NEONMAP1(vst3_v, arm_neon_vst3, 0),
3329   NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
3330   NEONMAP1(vst3q_v, arm_neon_vst3, 0),
3331   NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
3332   NEONMAP1(vst4_v, arm_neon_vst4, 0),
3333   NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
3334   NEONMAP1(vst4q_v, arm_neon_vst4, 0),
3335   NEONMAP0(vsubhn_v),
3336   NEONMAP0(vtrn_v),
3337   NEONMAP0(vtrnq_v),
3338   NEONMAP0(vtst_v),
3339   NEONMAP0(vtstq_v),
3340   NEONMAP0(vuzp_v),
3341   NEONMAP0(vuzpq_v),
3342   NEONMAP0(vzip_v),
3343   NEONMAP0(vzipq_v)
3344 };
3345
3346 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
3347   NEONMAP1(vabs_v, aarch64_neon_abs, 0),
3348   NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
3349   NEONMAP0(vaddhn_v),
3350   NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
3351   NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
3352   NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
3353   NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
3354   NEONMAP1(vcage_v, aarch64_neon_facge, 0),
3355   NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
3356   NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
3357   NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
3358   NEONMAP1(vcale_v, aarch64_neon_facge, 0),
3359   NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
3360   NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
3361   NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
3362   NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
3363   NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
3364   NEONMAP1(vclz_v, ctlz, Add1ArgType),
3365   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
3366   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
3367   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
3368   NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
3369   NEONMAP0(vcvt_f16_v),
3370   NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
3371   NEONMAP0(vcvt_f32_v),
3372   NEONMAP2(vcvt_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3373   NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3374   NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3375   NEONMAP1(vcvt_n_s16_v, aarch64_neon_vcvtfp2fxs, 0),
3376   NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
3377   NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
3378   NEONMAP1(vcvt_n_u16_v, aarch64_neon_vcvtfp2fxu, 0),
3379   NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
3380   NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
3381   NEONMAP0(vcvtq_f16_v),
3382   NEONMAP0(vcvtq_f32_v),
3383   NEONMAP2(vcvtq_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3384   NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3385   NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3386   NEONMAP1(vcvtq_n_s16_v, aarch64_neon_vcvtfp2fxs, 0),
3387   NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
3388   NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
3389   NEONMAP1(vcvtq_n_u16_v, aarch64_neon_vcvtfp2fxu, 0),
3390   NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
3391   NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
3392   NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
3393   NEONMAP0(vext_v),
3394   NEONMAP0(vextq_v),
3395   NEONMAP0(vfma_v),
3396   NEONMAP0(vfmaq_v),
3397   NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
3398   NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
3399   NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
3400   NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
3401   NEONMAP0(vmovl_v),
3402   NEONMAP0(vmovn_v),
3403   NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
3404   NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
3405   NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
3406   NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
3407   NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
3408   NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
3409   NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
3410   NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
3411   NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
3412   NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
3413   NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
3414   NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
3415   NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
3416   NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
3417   NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
3418   NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
3419   NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
3420   NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
3421   NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
3422   NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
3423   NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
3424   NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
3425   NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
3426   NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
3427   NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
3428   NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
3429   NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
3430   NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
3431   NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
3432   NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
3433   NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
3434   NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
3435   NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
3436   NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
3437   NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
3438   NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
3439   NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
3440   NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
3441   NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
3442   NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
3443   NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
3444   NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
3445   NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
3446   NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
3447   NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
3448   NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
3449   NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
3450   NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
3451   NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
3452   NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
3453   NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
3454   NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
3455   NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
3456   NEONMAP0(vshl_n_v),
3457   NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3458   NEONMAP0(vshll_n_v),
3459   NEONMAP0(vshlq_n_v),
3460   NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3461   NEONMAP0(vshr_n_v),
3462   NEONMAP0(vshrn_n_v),
3463   NEONMAP0(vshrq_n_v),
3464   NEONMAP0(vsubhn_v),
3465   NEONMAP0(vtst_v),
3466   NEONMAP0(vtstq_v),
3467 };
3468
3469 static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
3470   NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
3471   NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
3472   NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
3473   NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3474   NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3475   NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3476   NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3477   NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3478   NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3479   NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3480   NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3481   NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
3482   NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3483   NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
3484   NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3485   NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3486   NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3487   NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3488   NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3489   NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3490   NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3491   NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3492   NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3493   NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3494   NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3495   NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3496   NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3497   NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3498   NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3499   NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3500   NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3501   NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3502   NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3503   NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3504   NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3505   NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3506   NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3507   NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3508   NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3509   NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3510   NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3511   NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3512   NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3513   NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3514   NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3515   NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3516   NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3517   NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3518   NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
3519   NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3520   NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3521   NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3522   NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3523   NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3524   NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3525   NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3526   NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3527   NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3528   NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3529   NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3530   NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3531   NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3532   NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3533   NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3534   NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3535   NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3536   NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3537   NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3538   NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3539   NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
3540   NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
3541   NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
3542   NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3543   NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3544   NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3545   NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3546   NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3547   NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3548   NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3549   NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3550   NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3551   NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3552   NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3553   NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
3554   NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3555   NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
3556   NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3557   NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3558   NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
3559   NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
3560   NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3561   NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3562   NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
3563   NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
3564   NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
3565   NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
3566   NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
3567   NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
3568   NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
3569   NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
3570   NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3571   NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3572   NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3573   NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3574   NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
3575   NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3576   NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3577   NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3578   NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
3579   NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3580   NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
3581   NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
3582   NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
3583   NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3584   NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3585   NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
3586   NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
3587   NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3588   NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3589   NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
3590   NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
3591   NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
3592   NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
3593   NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3594   NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3595   NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3596   NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3597   NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
3598   NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3599   NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3600   NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3601   NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3602   NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3603   NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3604   NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
3605   NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
3606   NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3607   NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3608   NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3609   NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3610   NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
3611   NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
3612   NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
3613   NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
3614   NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3615   NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3616   NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
3617   NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
3618   NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
3619   NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3620   NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3621   NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3622   NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3623   NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
3624   NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3625   NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3626   NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3627   NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3628   NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
3629   NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
3630   NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3631   NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3632   NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
3633   NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
3634   NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
3635   NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
3636   NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
3637   NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
3638   NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
3639   NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
3640   NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
3641   NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
3642   NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
3643   NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
3644   NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
3645   NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
3646   NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
3647   NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
3648   NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
3649   NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
3650   NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
3651   NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
3652   NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3653   NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
3654   NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3655   NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
3656   NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
3657   NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
3658   NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3659   NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
3660   NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3661   NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
3662 };
3663
3664 #undef NEONMAP0
3665 #undef NEONMAP1
3666 #undef NEONMAP2
3667
3668 static bool NEONSIMDIntrinsicsProvenSorted = false;
3669
3670 static bool AArch64SIMDIntrinsicsProvenSorted = false;
3671 static bool AArch64SISDIntrinsicsProvenSorted = false;
3672
3673
3674 static const NeonIntrinsicInfo *
3675 findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap,
3676                        unsigned BuiltinID, bool &MapProvenSorted) {
3677
3678 #ifndef NDEBUG
3679   if (!MapProvenSorted) {
3680     assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap)));
3681     MapProvenSorted = true;
3682   }
3683 #endif
3684
3685   const NeonIntrinsicInfo *Builtin =
3686       std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID);
3687
3688   if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
3689     return Builtin;
3690
3691   return nullptr;
3692 }
3693
3694 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
3695                                                    unsigned Modifier,
3696                                                    llvm::Type *ArgType,
3697                                                    const CallExpr *E) {
3698   int VectorSize = 0;
3699   if (Modifier & Use64BitVectors)
3700     VectorSize = 64;
3701   else if (Modifier & Use128BitVectors)
3702     VectorSize = 128;
3703
3704   // Return type.
3705   SmallVector<llvm::Type *, 3> Tys;
3706   if (Modifier & AddRetType) {
3707     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
3708     if (Modifier & VectorizeRetType)
3709       Ty = llvm::VectorType::get(
3710           Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
3711
3712     Tys.push_back(Ty);
3713   }
3714
3715   // Arguments.
3716   if (Modifier & VectorizeArgTypes) {
3717     int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
3718     ArgType = llvm::VectorType::get(ArgType, Elts);
3719   }
3720
3721   if (Modifier & (Add1ArgType | Add2ArgTypes))
3722     Tys.push_back(ArgType);
3723
3724   if (Modifier & Add2ArgTypes)
3725     Tys.push_back(ArgType);
3726
3727   if (Modifier & InventFloatType)
3728     Tys.push_back(FloatTy);
3729
3730   return CGM.getIntrinsic(IntrinsicID, Tys);
3731 }
3732
3733 static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF,
3734                                             const NeonIntrinsicInfo &SISDInfo,
3735                                             SmallVectorImpl<Value *> &Ops,
3736                                             const CallExpr *E) {
3737   unsigned BuiltinID = SISDInfo.BuiltinID;
3738   unsigned int Int = SISDInfo.LLVMIntrinsic;
3739   unsigned Modifier = SISDInfo.TypeModifier;
3740   const char *s = SISDInfo.NameHint;
3741
3742   switch (BuiltinID) {
3743   case NEON::BI__builtin_neon_vcled_s64:
3744   case NEON::BI__builtin_neon_vcled_u64:
3745   case NEON::BI__builtin_neon_vcles_f32:
3746   case NEON::BI__builtin_neon_vcled_f64:
3747   case NEON::BI__builtin_neon_vcltd_s64:
3748   case NEON::BI__builtin_neon_vcltd_u64:
3749   case NEON::BI__builtin_neon_vclts_f32:
3750   case NEON::BI__builtin_neon_vcltd_f64:
3751   case NEON::BI__builtin_neon_vcales_f32:
3752   case NEON::BI__builtin_neon_vcaled_f64:
3753   case NEON::BI__builtin_neon_vcalts_f32:
3754   case NEON::BI__builtin_neon_vcaltd_f64:
3755     // Only one direction of comparisons actually exist, cmle is actually a cmge
3756     // with swapped operands. The table gives us the right intrinsic but we
3757     // still need to do the swap.
3758     std::swap(Ops[0], Ops[1]);
3759     break;
3760   }
3761
3762   assert(Int && "Generic code assumes a valid intrinsic");
3763
3764   // Determine the type(s) of this overloaded AArch64 intrinsic.
3765   const Expr *Arg = E->getArg(0);
3766   llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
3767   Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
3768
3769   int j = 0;
3770   ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
3771   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
3772        ai != ae; ++ai, ++j) {
3773     llvm::Type *ArgTy = ai->getType();
3774     if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
3775              ArgTy->getPrimitiveSizeInBits())
3776       continue;
3777
3778     assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
3779     // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
3780     // it before inserting.
3781     Ops[j] =
3782         CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType());
3783     Ops[j] =
3784         CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
3785   }
3786
3787   Value *Result = CGF.EmitNeonCall(F, Ops, s);
3788   llvm::Type *ResultType = CGF.ConvertType(E->getType());
3789   if (ResultType->getPrimitiveSizeInBits() <
3790       Result->getType()->getPrimitiveSizeInBits())
3791     return CGF.Builder.CreateExtractElement(Result, C0);
3792
3793   return CGF.Builder.CreateBitCast(Result, ResultType, s);
3794 }
3795
3796 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
3797     unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
3798     const char *NameHint, unsigned Modifier, const CallExpr *E,
3799     SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1) {
3800   // Get the last argument, which specifies the vector type.
3801   llvm::APSInt NeonTypeConst;
3802   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
3803   if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
3804     return nullptr;
3805
3806   // Determine the type of this overloaded NEON intrinsic.
3807   NeonTypeFlags Type(NeonTypeConst.getZExtValue());
3808   bool Usgn = Type.isUnsigned();
3809   bool Quad = Type.isQuad();
3810
3811   llvm::VectorType *VTy = GetNeonType(this, Type);
3812   llvm::Type *Ty = VTy;
3813   if (!Ty)
3814     return nullptr;
3815
3816   auto getAlignmentValue32 = [&](Address addr) -> Value* {
3817     return Builder.getInt32(addr.getAlignment().getQuantity());
3818   };
3819
3820   unsigned Int = LLVMIntrinsic;
3821   if ((Modifier & UnsignedAlts) && !Usgn)
3822     Int = AltLLVMIntrinsic;
3823
3824   switch (BuiltinID) {
3825   default: break;
3826   case NEON::BI__builtin_neon_vabs_v:
3827   case NEON::BI__builtin_neon_vabsq_v:
3828     if (VTy->getElementType()->isFloatingPointTy())
3829       return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
3830     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
3831   case NEON::BI__builtin_neon_vaddhn_v: {
3832     llvm::VectorType *SrcTy =
3833         llvm::VectorType::getExtendedElementVectorType(VTy);
3834
3835     // %sum = add <4 x i32> %lhs, %rhs
3836     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3837     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
3838     Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
3839
3840     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
3841     Constant *ShiftAmt =
3842         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
3843     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
3844
3845     // %res = trunc <4 x i32> %high to <4 x i16>
3846     return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
3847   }
3848   case NEON::BI__builtin_neon_vcale_v:
3849   case NEON::BI__builtin_neon_vcaleq_v:
3850   case NEON::BI__builtin_neon_vcalt_v:
3851   case NEON::BI__builtin_neon_vcaltq_v:
3852     std::swap(Ops[0], Ops[1]);
3853     LLVM_FALLTHROUGH;
3854   case NEON::BI__builtin_neon_vcage_v:
3855   case NEON::BI__builtin_neon_vcageq_v:
3856   case NEON::BI__builtin_neon_vcagt_v:
3857   case NEON::BI__builtin_neon_vcagtq_v: {
3858     llvm::Type *Ty;
3859     switch (VTy->getScalarSizeInBits()) {
3860     default: llvm_unreachable("unexpected type");
3861     case 32:
3862       Ty = FloatTy;
3863       break;
3864     case 64:
3865       Ty = DoubleTy;
3866       break;
3867     case 16:
3868       Ty = HalfTy;
3869       break;
3870     }
3871     llvm::Type *VecFlt = llvm::VectorType::get(Ty, VTy->getNumElements());
3872     llvm::Type *Tys[] = { VTy, VecFlt };
3873     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3874     return EmitNeonCall(F, Ops, NameHint);
3875   }
3876   case NEON::BI__builtin_neon_vclz_v:
3877   case NEON::BI__builtin_neon_vclzq_v:
3878     // We generate target-independent intrinsic, which needs a second argument
3879     // for whether or not clz of zero is undefined; on ARM it isn't.
3880     Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
3881     break;
3882   case NEON::BI__builtin_neon_vcvt_f32_v:
3883   case NEON::BI__builtin_neon_vcvtq_f32_v:
3884     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3885     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad));
3886     return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
3887                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
3888   case NEON::BI__builtin_neon_vcvt_f16_v:
3889   case NEON::BI__builtin_neon_vcvtq_f16_v:
3890     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3891     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad));
3892     return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
3893                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
3894   case NEON::BI__builtin_neon_vcvt_n_f16_v:
3895   case NEON::BI__builtin_neon_vcvt_n_f32_v:
3896   case NEON::BI__builtin_neon_vcvt_n_f64_v:
3897   case NEON::BI__builtin_neon_vcvtq_n_f16_v:
3898   case NEON::BI__builtin_neon_vcvtq_n_f32_v:
3899   case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
3900     llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
3901     Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
3902     Function *F = CGM.getIntrinsic(Int, Tys);
3903     return EmitNeonCall(F, Ops, "vcvt_n");
3904   }
3905   case NEON::BI__builtin_neon_vcvt_n_s16_v:
3906   case NEON::BI__builtin_neon_vcvt_n_s32_v:
3907   case NEON::BI__builtin_neon_vcvt_n_u16_v:
3908   case NEON::BI__builtin_neon_vcvt_n_u32_v:
3909   case NEON::BI__builtin_neon_vcvt_n_s64_v:
3910   case NEON::BI__builtin_neon_vcvt_n_u64_v:
3911   case NEON::BI__builtin_neon_vcvtq_n_s16_v:
3912   case NEON::BI__builtin_neon_vcvtq_n_s32_v:
3913   case NEON::BI__builtin_neon_vcvtq_n_u16_v:
3914   case NEON::BI__builtin_neon_vcvtq_n_u32_v:
3915   case NEON::BI__builtin_neon_vcvtq_n_s64_v:
3916   case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
3917     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
3918     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3919     return EmitNeonCall(F, Ops, "vcvt_n");
3920   }
3921   case NEON::BI__builtin_neon_vcvt_s32_v:
3922   case NEON::BI__builtin_neon_vcvt_u32_v:
3923   case NEON::BI__builtin_neon_vcvt_s64_v:
3924   case NEON::BI__builtin_neon_vcvt_u64_v:
3925   case NEON::BI__builtin_neon_vcvt_s16_v:
3926   case NEON::BI__builtin_neon_vcvt_u16_v:
3927   case NEON::BI__builtin_neon_vcvtq_s32_v:
3928   case NEON::BI__builtin_neon_vcvtq_u32_v:
3929   case NEON::BI__builtin_neon_vcvtq_s64_v:
3930   case NEON::BI__builtin_neon_vcvtq_u64_v:
3931   case NEON::BI__builtin_neon_vcvtq_s16_v:
3932   case NEON::BI__builtin_neon_vcvtq_u16_v: {
3933     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
3934     return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
3935                 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
3936   }
3937   case NEON::BI__builtin_neon_vcvta_s16_v:
3938   case NEON::BI__builtin_neon_vcvta_s32_v:
3939   case NEON::BI__builtin_neon_vcvta_s64_v:
3940   case NEON::BI__builtin_neon_vcvta_u32_v:
3941   case NEON::BI__builtin_neon_vcvta_u64_v:
3942   case NEON::BI__builtin_neon_vcvtaq_s16_v:
3943   case NEON::BI__builtin_neon_vcvtaq_s32_v:
3944   case NEON::BI__builtin_neon_vcvtaq_s64_v:
3945   case NEON::BI__builtin_neon_vcvtaq_u16_v:
3946   case NEON::BI__builtin_neon_vcvtaq_u32_v:
3947   case NEON::BI__builtin_neon_vcvtaq_u64_v:
3948   case NEON::BI__builtin_neon_vcvtn_s16_v:
3949   case NEON::BI__builtin_neon_vcvtn_s32_v:
3950   case NEON::BI__builtin_neon_vcvtn_s64_v:
3951   case NEON::BI__builtin_neon_vcvtn_u16_v:
3952   case NEON::BI__builtin_neon_vcvtn_u32_v:
3953   case NEON::BI__builtin_neon_vcvtn_u64_v:
3954   case NEON::BI__builtin_neon_vcvtnq_s16_v:
3955   case NEON::BI__builtin_neon_vcvtnq_s32_v:
3956   case NEON::BI__builtin_neon_vcvtnq_s64_v:
3957   case NEON::BI__builtin_neon_vcvtnq_u16_v:
3958   case NEON::BI__builtin_neon_vcvtnq_u32_v:
3959   case NEON::BI__builtin_neon_vcvtnq_u64_v:
3960   case NEON::BI__builtin_neon_vcvtp_s16_v:
3961   case NEON::BI__builtin_neon_vcvtp_s32_v:
3962   case NEON::BI__builtin_neon_vcvtp_s64_v:
3963   case NEON::BI__builtin_neon_vcvtp_u16_v:
3964   case NEON::BI__builtin_neon_vcvtp_u32_v:
3965   case NEON::BI__builtin_neon_vcvtp_u64_v:
3966   case NEON::BI__builtin_neon_vcvtpq_s16_v:
3967   case NEON::BI__builtin_neon_vcvtpq_s32_v:
3968   case NEON::BI__builtin_neon_vcvtpq_s64_v:
3969   case NEON::BI__builtin_neon_vcvtpq_u16_v:
3970   case NEON::BI__builtin_neon_vcvtpq_u32_v:
3971   case NEON::BI__builtin_neon_vcvtpq_u64_v:
3972   case NEON::BI__builtin_neon_vcvtm_s16_v:
3973   case NEON::BI__builtin_neon_vcvtm_s32_v:
3974   case NEON::BI__builtin_neon_vcvtm_s64_v:
3975   case NEON::BI__builtin_neon_vcvtm_u16_v:
3976   case NEON::BI__builtin_neon_vcvtm_u32_v:
3977   case NEON::BI__builtin_neon_vcvtm_u64_v:
3978   case NEON::BI__builtin_neon_vcvtmq_s16_v:
3979   case NEON::BI__builtin_neon_vcvtmq_s32_v:
3980   case NEON::BI__builtin_neon_vcvtmq_s64_v:
3981   case NEON::BI__builtin_neon_vcvtmq_u16_v:
3982   case NEON::BI__builtin_neon_vcvtmq_u32_v:
3983   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
3984     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
3985     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
3986   }
3987   case NEON::BI__builtin_neon_vext_v:
3988   case NEON::BI__builtin_neon_vextq_v: {
3989     int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
3990     SmallVector<uint32_t, 16> Indices;
3991     for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
3992       Indices.push_back(i+CV);
3993
3994     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3995     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3996     return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
3997   }
3998   case NEON::BI__builtin_neon_vfma_v:
3999   case NEON::BI__builtin_neon_vfmaq_v: {
4000     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
4001     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4002     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4003     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4004
4005     // NEON intrinsic puts accumulator first, unlike the LLVM fma.
4006     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
4007   }
4008   case NEON::BI__builtin_neon_vld1_v:
4009   case NEON::BI__builtin_neon_vld1q_v: {
4010     llvm::Type *Tys[] = {Ty, Int8PtrTy};
4011     Ops.push_back(getAlignmentValue32(PtrOp0));
4012     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
4013   }
4014   case NEON::BI__builtin_neon_vld2_v:
4015   case NEON::BI__builtin_neon_vld2q_v:
4016   case NEON::BI__builtin_neon_vld3_v:
4017   case NEON::BI__builtin_neon_vld3q_v:
4018   case NEON::BI__builtin_neon_vld4_v:
4019   case NEON::BI__builtin_neon_vld4q_v: {
4020     llvm::Type *Tys[] = {Ty, Int8PtrTy};
4021     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
4022     Value *Align = getAlignmentValue32(PtrOp1);
4023     Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
4024     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4025     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4026     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4027   }
4028   case NEON::BI__builtin_neon_vld1_dup_v:
4029   case NEON::BI__builtin_neon_vld1q_dup_v: {
4030     Value *V = UndefValue::get(Ty);
4031     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
4032     PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty);
4033     LoadInst *Ld = Builder.CreateLoad(PtrOp0);
4034     llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
4035     Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
4036     return EmitNeonSplat(Ops[0], CI);
4037   }
4038   case NEON::BI__builtin_neon_vld2_lane_v:
4039   case NEON::BI__builtin_neon_vld2q_lane_v:
4040   case NEON::BI__builtin_neon_vld3_lane_v:
4041   case NEON::BI__builtin_neon_vld3q_lane_v:
4042   case NEON::BI__builtin_neon_vld4_lane_v:
4043   case NEON::BI__builtin_neon_vld4q_lane_v: {
4044     llvm::Type *Tys[] = {Ty, Int8PtrTy};
4045     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
4046     for (unsigned I = 2; I < Ops.size() - 1; ++I)
4047       Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
4048     Ops.push_back(getAlignmentValue32(PtrOp1));
4049     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
4050     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4051     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4052     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4053   }
4054   case NEON::BI__builtin_neon_vmovl_v: {
4055     llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
4056     Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
4057     if (Usgn)
4058       return Builder.CreateZExt(Ops[0], Ty, "vmovl");
4059     return Builder.CreateSExt(Ops[0], Ty, "vmovl");
4060   }
4061   case NEON::BI__builtin_neon_vmovn_v: {
4062     llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
4063     Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
4064     return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
4065   }
4066   case NEON::BI__builtin_neon_vmull_v:
4067     // FIXME: the integer vmull operations could be emitted in terms of pure
4068     // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
4069     // hoisting the exts outside loops. Until global ISel comes along that can
4070     // see through such movement this leads to bad CodeGen. So we need an
4071     // intrinsic for now.
4072     Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
4073     Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
4074     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
4075   case NEON::BI__builtin_neon_vpadal_v:
4076   case NEON::BI__builtin_neon_vpadalq_v: {
4077     // The source operand type has twice as many elements of half the size.
4078     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
4079     llvm::Type *EltTy =
4080       llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
4081     llvm::Type *NarrowTy =
4082       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
4083     llvm::Type *Tys[2] = { Ty, NarrowTy };
4084     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
4085   }
4086   case NEON::BI__builtin_neon_vpaddl_v:
4087   case NEON::BI__builtin_neon_vpaddlq_v: {
4088     // The source operand type has twice as many elements of half the size.
4089     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
4090     llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
4091     llvm::Type *NarrowTy =
4092       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
4093     llvm::Type *Tys[2] = { Ty, NarrowTy };
4094     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
4095   }
4096   case NEON::BI__builtin_neon_vqdmlal_v:
4097   case NEON::BI__builtin_neon_vqdmlsl_v: {
4098     SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
4099     Ops[1] =
4100         EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
4101     Ops.resize(2);
4102     return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
4103   }
4104   case NEON::BI__builtin_neon_vqshl_n_v:
4105   case NEON::BI__builtin_neon_vqshlq_n_v:
4106     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
4107                         1, false);
4108   case NEON::BI__builtin_neon_vqshlu_n_v:
4109   case NEON::BI__builtin_neon_vqshluq_n_v:
4110     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
4111                         1, false);
4112   case NEON::BI__builtin_neon_vrecpe_v:
4113   case NEON::BI__builtin_neon_vrecpeq_v:
4114   case NEON::BI__builtin_neon_vrsqrte_v:
4115   case NEON::BI__builtin_neon_vrsqrteq_v:
4116     Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
4117     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
4118
4119   case NEON::BI__builtin_neon_vrshr_n_v:
4120   case NEON::BI__builtin_neon_vrshrq_n_v:
4121     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
4122                         1, true);
4123   case NEON::BI__builtin_neon_vshl_n_v:
4124   case NEON::BI__builtin_neon_vshlq_n_v:
4125     Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
4126     return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
4127                              "vshl_n");
4128   case NEON::BI__builtin_neon_vshll_n_v: {
4129     llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
4130     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4131     if (Usgn)
4132       Ops[0] = Builder.CreateZExt(Ops[0], VTy);
4133     else
4134       Ops[0] = Builder.CreateSExt(Ops[0], VTy);
4135     Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
4136     return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
4137   }
4138   case NEON::BI__builtin_neon_vshrn_n_v: {
4139     llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
4140     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4141     Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
4142     if (Usgn)
4143       Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
4144     else
4145       Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
4146     return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
4147   }
4148   case NEON::BI__builtin_neon_vshr_n_v:
4149   case NEON::BI__builtin_neon_vshrq_n_v:
4150     return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
4151   case NEON::BI__builtin_neon_vst1_v:
4152   case NEON::BI__builtin_neon_vst1q_v:
4153   case NEON::BI__builtin_neon_vst2_v:
4154   case NEON::BI__builtin_neon_vst2q_v:
4155   case NEON::BI__builtin_neon_vst3_v:
4156   case NEON::BI__builtin_neon_vst3q_v:
4157   case NEON::BI__builtin_neon_vst4_v:
4158   case NEON::BI__builtin_neon_vst4q_v:
4159   case NEON::BI__builtin_neon_vst2_lane_v:
4160   case NEON::BI__builtin_neon_vst2q_lane_v:
4161   case NEON::BI__builtin_neon_vst3_lane_v:
4162   case NEON::BI__builtin_neon_vst3q_lane_v:
4163   case NEON::BI__builtin_neon_vst4_lane_v:
4164   case NEON::BI__builtin_neon_vst4q_lane_v: {
4165     llvm::Type *Tys[] = {Int8PtrTy, Ty};
4166     Ops.push_back(getAlignmentValue32(PtrOp0));
4167     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
4168   }
4169   case NEON::BI__builtin_neon_vsubhn_v: {
4170     llvm::VectorType *SrcTy =
4171         llvm::VectorType::getExtendedElementVectorType(VTy);
4172
4173     // %sum = add <4 x i32> %lhs, %rhs
4174     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4175     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
4176     Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
4177
4178     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
4179     Constant *ShiftAmt =
4180         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
4181     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
4182
4183     // %res = trunc <4 x i32> %high to <4 x i16>
4184     return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
4185   }
4186   case NEON::BI__builtin_neon_vtrn_v:
4187   case NEON::BI__builtin_neon_vtrnq_v: {
4188     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4189     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4190     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4191     Value *SV = nullptr;
4192
4193     for (unsigned vi = 0; vi != 2; ++vi) {
4194       SmallVector<uint32_t, 16> Indices;
4195       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
4196         Indices.push_back(i+vi);
4197         Indices.push_back(i+e+vi);
4198       }
4199       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4200       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
4201       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4202     }
4203     return SV;
4204   }
4205   case NEON::BI__builtin_neon_vtst_v:
4206   case NEON::BI__builtin_neon_vtstq_v: {
4207     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4208     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4209     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
4210     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
4211                                 ConstantAggregateZero::get(Ty));
4212     return Builder.CreateSExt(Ops[0], Ty, "vtst");
4213   }
4214   case NEON::BI__builtin_neon_vuzp_v:
4215   case NEON::BI__builtin_neon_vuzpq_v: {
4216     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4217     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4218     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4219     Value *SV = nullptr;
4220
4221     for (unsigned vi = 0; vi != 2; ++vi) {
4222       SmallVector<uint32_t, 16> Indices;
4223       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
4224         Indices.push_back(2*i+vi);
4225
4226       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4227       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
4228       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4229     }
4230     return SV;
4231   }
4232   case NEON::BI__builtin_neon_vzip_v:
4233   case NEON::BI__builtin_neon_vzipq_v: {
4234     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4235     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4236     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4237     Value *SV = nullptr;
4238
4239     for (unsigned vi = 0; vi != 2; ++vi) {
4240       SmallVector<uint32_t, 16> Indices;
4241       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
4242         Indices.push_back((i + vi*e) >> 1);
4243         Indices.push_back(((i + vi*e) >> 1)+e);
4244       }
4245       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4246       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
4247       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4248     }
4249     return SV;
4250   }
4251   }
4252
4253   assert(Int && "Expected valid intrinsic number");
4254
4255   // Determine the type(s) of this overloaded AArch64 intrinsic.
4256   Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
4257
4258   Value *Result = EmitNeonCall(F, Ops, NameHint);
4259   llvm::Type *ResultType = ConvertType(E->getType());
4260   // AArch64 intrinsic one-element vector type cast to
4261   // scalar type expected by the builtin
4262   return Builder.CreateBitCast(Result, ResultType, NameHint);
4263 }
4264
4265 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
4266     Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
4267     const CmpInst::Predicate Ip, const Twine &Name) {
4268   llvm::Type *OTy = Op->getType();
4269
4270   // FIXME: this is utterly horrific. We should not be looking at previous
4271   // codegen context to find out what needs doing. Unfortunately TableGen
4272   // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
4273   // (etc).
4274   if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
4275     OTy = BI->getOperand(0)->getType();
4276
4277   Op = Builder.CreateBitCast(Op, OTy);
4278   if (OTy->getScalarType()->isFloatingPointTy()) {
4279     Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
4280   } else {
4281     Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
4282   }
4283   return Builder.CreateSExt(Op, Ty, Name);
4284 }
4285
4286 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
4287                                  Value *ExtOp, Value *IndexOp,
4288                                  llvm::Type *ResTy, unsigned IntID,
4289                                  const char *Name) {
4290   SmallVector<Value *, 2> TblOps;
4291   if (ExtOp)
4292     TblOps.push_back(ExtOp);
4293
4294   // Build a vector containing sequential number like (0, 1, 2, ..., 15)
4295   SmallVector<uint32_t, 16> Indices;
4296   llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
4297   for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
4298     Indices.push_back(2*i);
4299     Indices.push_back(2*i+1);
4300   }
4301
4302   int PairPos = 0, End = Ops.size() - 1;
4303   while (PairPos < End) {
4304     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
4305                                                      Ops[PairPos+1], Indices,
4306                                                      Name));
4307     PairPos += 2;
4308   }
4309
4310   // If there's an odd number of 64-bit lookup table, fill the high 64-bit
4311   // of the 128-bit lookup table with zero.
4312   if (PairPos == End) {
4313     Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
4314     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
4315                                                      ZeroTbl, Indices, Name));
4316   }
4317
4318   Function *TblF;
4319   TblOps.push_back(IndexOp);
4320   TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
4321
4322   return CGF.EmitNeonCall(TblF, TblOps, Name);
4323 }
4324
4325 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
4326   unsigned Value;
4327   switch (BuiltinID) {
4328   default:
4329     return nullptr;
4330   case ARM::BI__builtin_arm_nop:
4331     Value = 0;
4332     break;
4333   case ARM::BI__builtin_arm_yield:
4334   case ARM::BI__yield:
4335     Value = 1;
4336     break;
4337   case ARM::BI__builtin_arm_wfe:
4338   case ARM::BI__wfe:
4339     Value = 2;
4340     break;
4341   case ARM::BI__builtin_arm_wfi:
4342   case ARM::BI__wfi:
4343     Value = 3;
4344     break;
4345   case ARM::BI__builtin_arm_sev:
4346   case ARM::BI__sev:
4347     Value = 4;
4348     break;
4349   case ARM::BI__builtin_arm_sevl:
4350   case ARM::BI__sevl:
4351     Value = 5;
4352     break;
4353   }
4354
4355   return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
4356                             llvm::ConstantInt::get(Int32Ty, Value));
4357 }
4358
4359 // Generates the IR for the read/write special register builtin,
4360 // ValueType is the type of the value that is to be written or read,
4361 // RegisterType is the type of the register being written to or read from.
4362 static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
4363                                          const CallExpr *E,
4364                                          llvm::Type *RegisterType,
4365                                          llvm::Type *ValueType,
4366                                          bool IsRead,
4367                                          StringRef SysReg = "") {
4368   // write and register intrinsics only support 32 and 64 bit operations.
4369   assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
4370           && "Unsupported size for register.");
4371
4372   CodeGen::CGBuilderTy &Builder = CGF.Builder;
4373   CodeGen::CodeGenModule &CGM = CGF.CGM;
4374   LLVMContext &Context = CGM.getLLVMContext();
4375
4376   if (SysReg.empty()) {
4377     const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
4378     SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
4379   }
4380
4381   llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
4382   llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4383   llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4384
4385   llvm::Type *Types[] = { RegisterType };
4386
4387   bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
4388   assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
4389             && "Can't fit 64-bit value in 32-bit register");
4390
4391   if (IsRead) {
4392     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
4393     llvm::Value *Call = Builder.CreateCall(F, Metadata);
4394
4395     if (MixedTypes)
4396       // Read into 64 bit register and then truncate result to 32 bit.
4397       return Builder.CreateTrunc(Call, ValueType);
4398
4399     if (ValueType->isPointerTy())
4400       // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
4401       return Builder.CreateIntToPtr(Call, ValueType);
4402
4403     return Call;
4404   }
4405
4406   llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
4407   llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
4408   if (MixedTypes) {
4409     // Extend 32 bit write value to 64 bit to pass to write.
4410     ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
4411     return Builder.CreateCall(F, { Metadata, ArgValue });
4412   }
4413
4414   if (ValueType->isPointerTy()) {
4415     // Have VoidPtrTy ArgValue but want to return an i32/i64.
4416     ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
4417     return Builder.CreateCall(F, { Metadata, ArgValue });
4418   }
4419
4420   return Builder.CreateCall(F, { Metadata, ArgValue });
4421 }
4422
4423 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
4424 /// argument that specifies the vector type.
4425 static bool HasExtraNeonArgument(unsigned BuiltinID) {
4426   switch (BuiltinID) {
4427   default: break;
4428   case NEON::BI__builtin_neon_vget_lane_i8:
4429   case NEON::BI__builtin_neon_vget_lane_i16:
4430   case NEON::BI__builtin_neon_vget_lane_i32:
4431   case NEON::BI__builtin_neon_vget_lane_i64:
4432   case NEON::BI__builtin_neon_vget_lane_f32:
4433   case NEON::BI__builtin_neon_vgetq_lane_i8:
4434   case NEON::BI__builtin_neon_vgetq_lane_i16:
4435   case NEON::BI__builtin_neon_vgetq_lane_i32:
4436   case NEON::BI__builtin_neon_vgetq_lane_i64:
4437   case NEON::BI__builtin_neon_vgetq_lane_f32:
4438   case NEON::BI__builtin_neon_vset_lane_i8:
4439   case NEON::BI__builtin_neon_vset_lane_i16:
4440   case NEON::BI__builtin_neon_vset_lane_i32:
4441   case NEON::BI__builtin_neon_vset_lane_i64:
4442   case NEON::BI__builtin_neon_vset_lane_f32:
4443   case NEON::BI__builtin_neon_vsetq_lane_i8:
4444   case NEON::BI__builtin_neon_vsetq_lane_i16:
4445   case NEON::BI__builtin_neon_vsetq_lane_i32:
4446   case NEON::BI__builtin_neon_vsetq_lane_i64:
4447   case NEON::BI__builtin_neon_vsetq_lane_f32:
4448   case NEON::BI__builtin_neon_vsha1h_u32:
4449   case NEON::BI__builtin_neon_vsha1cq_u32:
4450   case NEON::BI__builtin_neon_vsha1pq_u32:
4451   case NEON::BI__builtin_neon_vsha1mq_u32:
4452   case ARM::BI_MoveToCoprocessor:
4453   case ARM::BI_MoveToCoprocessor2:
4454     return false;
4455   }
4456   return true;
4457 }
4458
4459 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
4460                                            const CallExpr *E) {
4461   if (auto Hint = GetValueForARMHint(BuiltinID))
4462     return Hint;
4463
4464   if (BuiltinID == ARM::BI__emit) {
4465     bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
4466     llvm::FunctionType *FTy =
4467         llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
4468
4469     APSInt Value;
4470     if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext()))
4471       llvm_unreachable("Sema will ensure that the parameter is constant");
4472
4473     uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
4474
4475     llvm::InlineAsm *Emit =
4476         IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
4477                                  /*SideEffects=*/true)
4478                 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
4479                                  /*SideEffects=*/true);
4480
4481     return Builder.CreateCall(Emit);
4482   }
4483
4484   if (BuiltinID == ARM::BI__builtin_arm_dbg) {
4485     Value *Option = EmitScalarExpr(E->getArg(0));
4486     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
4487   }
4488
4489   if (BuiltinID == ARM::BI__builtin_arm_prefetch) {
4490     Value *Address = EmitScalarExpr(E->getArg(0));
4491     Value *RW      = EmitScalarExpr(E->getArg(1));
4492     Value *IsData  = EmitScalarExpr(E->getArg(2));
4493
4494     // Locality is not supported on ARM target
4495     Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
4496
4497     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
4498     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
4499   }
4500
4501   if (BuiltinID == ARM::BI__builtin_arm_rbit) {
4502     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4503     return Builder.CreateCall(
4504         CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
4505   }
4506
4507   if (BuiltinID == ARM::BI__clear_cache) {
4508     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
4509     const FunctionDecl *FD = E->getDirectCallee();
4510     Value *Ops[2];
4511     for (unsigned i = 0; i < 2; i++)
4512       Ops[i] = EmitScalarExpr(E->getArg(i));
4513     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
4514     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
4515     StringRef Name = FD->getName();
4516     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
4517   }
4518
4519   if (BuiltinID == ARM::BI__builtin_arm_mcrr ||
4520       BuiltinID == ARM::BI__builtin_arm_mcrr2) {
4521     Function *F;
4522
4523     switch (BuiltinID) {
4524     default: llvm_unreachable("unexpected builtin");
4525     case ARM::BI__builtin_arm_mcrr:
4526       F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
4527       break;
4528     case ARM::BI__builtin_arm_mcrr2:
4529       F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
4530       break;
4531     }
4532
4533     // MCRR{2} instruction has 5 operands but
4534     // the intrinsic has 4 because Rt and Rt2
4535     // are represented as a single unsigned 64
4536     // bit integer in the intrinsic definition
4537     // but internally it's represented as 2 32
4538     // bit integers.
4539
4540     Value *Coproc = EmitScalarExpr(E->getArg(0));
4541     Value *Opc1 = EmitScalarExpr(E->getArg(1));
4542     Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
4543     Value *CRm = EmitScalarExpr(E->getArg(3));
4544
4545     Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4546     Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
4547     Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
4548     Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
4549
4550     return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
4551   }
4552
4553   if (BuiltinID == ARM::BI__builtin_arm_mrrc ||
4554       BuiltinID == ARM::BI__builtin_arm_mrrc2) {
4555     Function *F;
4556
4557     switch (BuiltinID) {
4558     default: llvm_unreachable("unexpected builtin");
4559     case ARM::BI__builtin_arm_mrrc:
4560       F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
4561       break;
4562     case ARM::BI__builtin_arm_mrrc2:
4563       F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
4564       break;
4565     }
4566
4567     Value *Coproc = EmitScalarExpr(E->getArg(0));
4568     Value *Opc1 = EmitScalarExpr(E->getArg(1));
4569     Value *CRm  = EmitScalarExpr(E->getArg(2));
4570     Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
4571
4572     // Returns an unsigned 64 bit integer, represented
4573     // as two 32 bit integers.
4574
4575     Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
4576     Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
4577     Rt = Builder.CreateZExt(Rt, Int64Ty);
4578     Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
4579
4580     Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
4581     RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
4582     RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
4583
4584     return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
4585   }
4586
4587   if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
4588       ((BuiltinID == ARM::BI__builtin_arm_ldrex ||
4589         BuiltinID == ARM::BI__builtin_arm_ldaex) &&
4590        getContext().getTypeSize(E->getType()) == 64) ||
4591       BuiltinID == ARM::BI__ldrexd) {
4592     Function *F;
4593
4594     switch (BuiltinID) {
4595     default: llvm_unreachable("unexpected builtin");
4596     case ARM::BI__builtin_arm_ldaex:
4597       F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
4598       break;
4599     case ARM::BI__builtin_arm_ldrexd:
4600     case ARM::BI__builtin_arm_ldrex:
4601     case ARM::BI__ldrexd:
4602       F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
4603       break;
4604     }
4605
4606     Value *LdPtr = EmitScalarExpr(E->getArg(0));
4607     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
4608                                     "ldrexd");
4609
4610     Value *Val0 = Builder.CreateExtractValue(Val, 1);
4611     Value *Val1 = Builder.CreateExtractValue(Val, 0);
4612     Val0 = Builder.CreateZExt(Val0, Int64Ty);
4613     Val1 = Builder.CreateZExt(Val1, Int64Ty);
4614
4615     Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
4616     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
4617     Val = Builder.CreateOr(Val, Val1);
4618     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
4619   }
4620
4621   if (BuiltinID == ARM::BI__builtin_arm_ldrex ||
4622       BuiltinID == ARM::BI__builtin_arm_ldaex) {
4623     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
4624
4625     QualType Ty = E->getType();
4626     llvm::Type *RealResTy = ConvertType(Ty);
4627     llvm::Type *PtrTy = llvm::IntegerType::get(
4628         getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
4629     LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
4630
4631     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex
4632                                        ? Intrinsic::arm_ldaex
4633                                        : Intrinsic::arm_ldrex,
4634                                    PtrTy);
4635     Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
4636
4637     if (RealResTy->isPointerTy())
4638       return Builder.CreateIntToPtr(Val, RealResTy);
4639     else {
4640       llvm::Type *IntResTy = llvm::IntegerType::get(
4641           getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
4642       Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
4643       return Builder.CreateBitCast(Val, RealResTy);
4644     }
4645   }
4646
4647   if (BuiltinID == ARM::BI__builtin_arm_strexd ||
4648       ((BuiltinID == ARM::BI__builtin_arm_stlex ||
4649         BuiltinID == ARM::BI__builtin_arm_strex) &&
4650        getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
4651     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4652                                        ? Intrinsic::arm_stlexd
4653                                        : Intrinsic::arm_strexd);
4654     llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
4655
4656     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
4657     Value *Val = EmitScalarExpr(E->getArg(0));
4658     Builder.CreateStore(Val, Tmp);
4659
4660     Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
4661     Val = Builder.CreateLoad(LdPtr);
4662
4663     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
4664     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
4665     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
4666     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
4667   }
4668
4669   if (BuiltinID == ARM::BI__builtin_arm_strex ||
4670       BuiltinID == ARM::BI__builtin_arm_stlex) {
4671     Value *StoreVal = EmitScalarExpr(E->getArg(0));
4672     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
4673
4674     QualType Ty = E->getArg(0)->getType();
4675     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
4676                                                  getContext().getTypeSize(Ty));
4677     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
4678
4679     if (StoreVal->getType()->isPointerTy())
4680       StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
4681     else {
4682       llvm::Type *IntTy = llvm::IntegerType::get(
4683           getLLVMContext(),
4684           CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
4685       StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
4686       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
4687     }
4688
4689     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4690                                        ? Intrinsic::arm_stlex
4691                                        : Intrinsic::arm_strex,
4692                                    StoreAddr->getType());
4693     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
4694   }
4695
4696   switch (BuiltinID) {
4697   case ARM::BI__iso_volatile_load8:
4698   case ARM::BI__iso_volatile_load16:
4699   case ARM::BI__iso_volatile_load32:
4700   case ARM::BI__iso_volatile_load64: {
4701     Value *Ptr = EmitScalarExpr(E->getArg(0));
4702     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4703     CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy);
4704     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
4705                                              LoadSize.getQuantity() * 8);
4706     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
4707     llvm::LoadInst *Load =
4708       Builder.CreateAlignedLoad(Ptr, LoadSize);
4709     Load->setVolatile(true);
4710     return Load;
4711   }
4712   case ARM::BI__iso_volatile_store8:
4713   case ARM::BI__iso_volatile_store16:
4714   case ARM::BI__iso_volatile_store32:
4715   case ARM::BI__iso_volatile_store64: {
4716     Value *Ptr = EmitScalarExpr(E->getArg(0));
4717     Value *Value = EmitScalarExpr(E->getArg(1));
4718     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4719     CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
4720     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
4721                                              StoreSize.getQuantity() * 8);
4722     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
4723     llvm::StoreInst *Store =
4724       Builder.CreateAlignedStore(Value, Ptr,
4725                                  StoreSize);
4726     Store->setVolatile(true);
4727     return Store;
4728   }
4729   }
4730
4731   if (BuiltinID == ARM::BI__builtin_arm_clrex) {
4732     Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
4733     return Builder.CreateCall(F);
4734   }
4735
4736   // CRC32
4737   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
4738   switch (BuiltinID) {
4739   case ARM::BI__builtin_arm_crc32b:
4740     CRCIntrinsicID = Intrinsic::arm_crc32b; break;
4741   case ARM::BI__builtin_arm_crc32cb:
4742     CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
4743   case ARM::BI__builtin_arm_crc32h:
4744     CRCIntrinsicID = Intrinsic::arm_crc32h; break;
4745   case ARM::BI__builtin_arm_crc32ch:
4746     CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
4747   case ARM::BI__builtin_arm_crc32w:
4748   case ARM::BI__builtin_arm_crc32d:
4749     CRCIntrinsicID = Intrinsic::arm_crc32w; break;
4750   case ARM::BI__builtin_arm_crc32cw:
4751   case ARM::BI__builtin_arm_crc32cd:
4752     CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
4753   }
4754
4755   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
4756     Value *Arg0 = EmitScalarExpr(E->getArg(0));
4757     Value *Arg1 = EmitScalarExpr(E->getArg(1));
4758
4759     // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w
4760     // intrinsics, hence we need different codegen for these cases.
4761     if (BuiltinID == ARM::BI__builtin_arm_crc32d ||
4762         BuiltinID == ARM::BI__builtin_arm_crc32cd) {
4763       Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4764       Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
4765       Value *Arg1b = Builder.CreateLShr(Arg1, C1);
4766       Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
4767
4768       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4769       Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
4770       return Builder.CreateCall(F, {Res, Arg1b});
4771     } else {
4772       Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
4773
4774       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4775       return Builder.CreateCall(F, {Arg0, Arg1});
4776     }
4777   }
4778
4779   if (BuiltinID == ARM::BI__builtin_arm_rsr ||
4780       BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4781       BuiltinID == ARM::BI__builtin_arm_rsrp ||
4782       BuiltinID == ARM::BI__builtin_arm_wsr ||
4783       BuiltinID == ARM::BI__builtin_arm_wsr64 ||
4784       BuiltinID == ARM::BI__builtin_arm_wsrp) {
4785
4786     bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr ||
4787                   BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4788                   BuiltinID == ARM::BI__builtin_arm_rsrp;
4789
4790     bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
4791                             BuiltinID == ARM::BI__builtin_arm_wsrp;
4792
4793     bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4794                    BuiltinID == ARM::BI__builtin_arm_wsr64;
4795
4796     llvm::Type *ValueType;
4797     llvm::Type *RegisterType;
4798     if (IsPointerBuiltin) {
4799       ValueType = VoidPtrTy;
4800       RegisterType = Int32Ty;
4801     } else if (Is64Bit) {
4802       ValueType = RegisterType = Int64Ty;
4803     } else {
4804       ValueType = RegisterType = Int32Ty;
4805     }
4806
4807     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
4808   }
4809
4810   // Find out if any arguments are required to be integer constant
4811   // expressions.
4812   unsigned ICEArguments = 0;
4813   ASTContext::GetBuiltinTypeError Error;
4814   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
4815   assert(Error == ASTContext::GE_None && "Should not codegen an error");
4816
4817   auto getAlignmentValue32 = [&](Address addr) -> Value* {
4818     return Builder.getInt32(addr.getAlignment().getQuantity());
4819   };
4820
4821   Address PtrOp0 = Address::invalid();
4822   Address PtrOp1 = Address::invalid();
4823   SmallVector<Value*, 4> Ops;
4824   bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
4825   unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
4826   for (unsigned i = 0, e = NumArgs; i != e; i++) {
4827     if (i == 0) {
4828       switch (BuiltinID) {
4829       case NEON::BI__builtin_neon_vld1_v:
4830       case NEON::BI__builtin_neon_vld1q_v:
4831       case NEON::BI__builtin_neon_vld1q_lane_v:
4832       case NEON::BI__builtin_neon_vld1_lane_v:
4833       case NEON::BI__builtin_neon_vld1_dup_v:
4834       case NEON::BI__builtin_neon_vld1q_dup_v:
4835       case NEON::BI__builtin_neon_vst1_v:
4836       case NEON::BI__builtin_neon_vst1q_v:
4837       case NEON::BI__builtin_neon_vst1q_lane_v:
4838       case NEON::BI__builtin_neon_vst1_lane_v:
4839       case NEON::BI__builtin_neon_vst2_v:
4840       case NEON::BI__builtin_neon_vst2q_v:
4841       case NEON::BI__builtin_neon_vst2_lane_v:
4842       case NEON::BI__builtin_neon_vst2q_lane_v:
4843       case NEON::BI__builtin_neon_vst3_v:
4844       case NEON::BI__builtin_neon_vst3q_v:
4845       case NEON::BI__builtin_neon_vst3_lane_v:
4846       case NEON::BI__builtin_neon_vst3q_lane_v:
4847       case NEON::BI__builtin_neon_vst4_v:
4848       case NEON::BI__builtin_neon_vst4q_v:
4849       case NEON::BI__builtin_neon_vst4_lane_v:
4850       case NEON::BI__builtin_neon_vst4q_lane_v:
4851         // Get the alignment for the argument in addition to the value;
4852         // we'll use it later.
4853         PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
4854         Ops.push_back(PtrOp0.getPointer());
4855         continue;
4856       }
4857     }
4858     if (i == 1) {
4859       switch (BuiltinID) {
4860       case NEON::BI__builtin_neon_vld2_v:
4861       case NEON::BI__builtin_neon_vld2q_v:
4862       case NEON::BI__builtin_neon_vld3_v:
4863       case NEON::BI__builtin_neon_vld3q_v:
4864       case NEON::BI__builtin_neon_vld4_v:
4865       case NEON::BI__builtin_neon_vld4q_v:
4866       case NEON::BI__builtin_neon_vld2_lane_v:
4867       case NEON::BI__builtin_neon_vld2q_lane_v:
4868       case NEON::BI__builtin_neon_vld3_lane_v:
4869       case NEON::BI__builtin_neon_vld3q_lane_v:
4870       case NEON::BI__builtin_neon_vld4_lane_v:
4871       case NEON::BI__builtin_neon_vld4q_lane_v:
4872       case NEON::BI__builtin_neon_vld2_dup_v:
4873       case NEON::BI__builtin_neon_vld3_dup_v:
4874       case NEON::BI__builtin_neon_vld4_dup_v:
4875         // Get the alignment for the argument in addition to the value;
4876         // we'll use it later.
4877         PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
4878         Ops.push_back(PtrOp1.getPointer());
4879         continue;
4880       }
4881     }
4882
4883     if ((ICEArguments & (1 << i)) == 0) {
4884       Ops.push_back(EmitScalarExpr(E->getArg(i)));
4885     } else {
4886       // If this is required to be a constant, constant fold it so that we know
4887       // that the generated intrinsic gets a ConstantInt.
4888       llvm::APSInt Result;
4889       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
4890       assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
4891       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
4892     }
4893   }
4894
4895   switch (BuiltinID) {
4896   default: break;
4897
4898   case NEON::BI__builtin_neon_vget_lane_i8:
4899   case NEON::BI__builtin_neon_vget_lane_i16:
4900   case NEON::BI__builtin_neon_vget_lane_i32:
4901   case NEON::BI__builtin_neon_vget_lane_i64:
4902   case NEON::BI__builtin_neon_vget_lane_f32:
4903   case NEON::BI__builtin_neon_vgetq_lane_i8:
4904   case NEON::BI__builtin_neon_vgetq_lane_i16:
4905   case NEON::BI__builtin_neon_vgetq_lane_i32:
4906   case NEON::BI__builtin_neon_vgetq_lane_i64:
4907   case NEON::BI__builtin_neon_vgetq_lane_f32:
4908     return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
4909
4910   case NEON::BI__builtin_neon_vset_lane_i8:
4911   case NEON::BI__builtin_neon_vset_lane_i16:
4912   case NEON::BI__builtin_neon_vset_lane_i32:
4913   case NEON::BI__builtin_neon_vset_lane_i64:
4914   case NEON::BI__builtin_neon_vset_lane_f32:
4915   case NEON::BI__builtin_neon_vsetq_lane_i8:
4916   case NEON::BI__builtin_neon_vsetq_lane_i16:
4917   case NEON::BI__builtin_neon_vsetq_lane_i32:
4918   case NEON::BI__builtin_neon_vsetq_lane_i64:
4919   case NEON::BI__builtin_neon_vsetq_lane_f32:
4920     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
4921
4922   case NEON::BI__builtin_neon_vsha1h_u32:
4923     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
4924                         "vsha1h");
4925   case NEON::BI__builtin_neon_vsha1cq_u32:
4926     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
4927                         "vsha1h");
4928   case NEON::BI__builtin_neon_vsha1pq_u32:
4929     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
4930                         "vsha1h");
4931   case NEON::BI__builtin_neon_vsha1mq_u32:
4932     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
4933                         "vsha1h");
4934
4935   // The ARM _MoveToCoprocessor builtins put the input register value as
4936   // the first argument, but the LLVM intrinsic expects it as the third one.
4937   case ARM::BI_MoveToCoprocessor:
4938   case ARM::BI_MoveToCoprocessor2: {
4939     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
4940                                    Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
4941     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
4942                                   Ops[3], Ops[4], Ops[5]});
4943   }
4944   case ARM::BI_BitScanForward:
4945   case ARM::BI_BitScanForward64:
4946     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
4947   case ARM::BI_BitScanReverse:
4948   case ARM::BI_BitScanReverse64:
4949     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
4950
4951   case ARM::BI_InterlockedAnd64:
4952     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
4953   case ARM::BI_InterlockedExchange64:
4954     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
4955   case ARM::BI_InterlockedExchangeAdd64:
4956     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
4957   case ARM::BI_InterlockedExchangeSub64:
4958     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
4959   case ARM::BI_InterlockedOr64:
4960     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
4961   case ARM::BI_InterlockedXor64:
4962     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
4963   case ARM::BI_InterlockedDecrement64:
4964     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
4965   case ARM::BI_InterlockedIncrement64:
4966     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
4967   }
4968
4969   // Get the last argument, which specifies the vector type.
4970   assert(HasExtraArg);
4971   llvm::APSInt Result;
4972   const Expr *Arg = E->getArg(E->getNumArgs()-1);
4973   if (!Arg->isIntegerConstantExpr(Result, getContext()))
4974     return nullptr;
4975
4976   if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
4977       BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
4978     // Determine the overloaded type of this builtin.
4979     llvm::Type *Ty;
4980     if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
4981       Ty = FloatTy;
4982     else
4983       Ty = DoubleTy;
4984
4985     // Determine whether this is an unsigned conversion or not.
4986     bool usgn = Result.getZExtValue() == 1;
4987     unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
4988
4989     // Call the appropriate intrinsic.
4990     Function *F = CGM.getIntrinsic(Int, Ty);
4991     return Builder.CreateCall(F, Ops, "vcvtr");
4992   }
4993
4994   // Determine the type of this overloaded NEON intrinsic.
4995   NeonTypeFlags Type(Result.getZExtValue());
4996   bool usgn = Type.isUnsigned();
4997   bool rightShift = false;
4998
4999   llvm::VectorType *VTy = GetNeonType(this, Type);
5000   llvm::Type *Ty = VTy;
5001   if (!Ty)
5002     return nullptr;
5003
5004   // Many NEON builtins have identical semantics and uses in ARM and
5005   // AArch64. Emit these in a single function.
5006   auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap);
5007   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
5008       IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
5009   if (Builtin)
5010     return EmitCommonNeonBuiltinExpr(
5011         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
5012         Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1);
5013
5014   unsigned Int;
5015   switch (BuiltinID) {
5016   default: return nullptr;
5017   case NEON::BI__builtin_neon_vld1q_lane_v:
5018     // Handle 64-bit integer elements as a special case.  Use shuffles of
5019     // one-element vectors to avoid poor code for i64 in the backend.
5020     if (VTy->getElementType()->isIntegerTy(64)) {
5021       // Extract the other lane.
5022       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5023       uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
5024       Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
5025       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
5026       // Load the value as a one-element vector.
5027       Ty = llvm::VectorType::get(VTy->getElementType(), 1);
5028       llvm::Type *Tys[] = {Ty, Int8PtrTy};
5029       Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
5030       Value *Align = getAlignmentValue32(PtrOp0);
5031       Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
5032       // Combine them.
5033       uint32_t Indices[] = {1 - Lane, Lane};
5034       SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
5035       return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
5036     }
5037     // fall through
5038   case NEON::BI__builtin_neon_vld1_lane_v: {
5039     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5040     PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
5041     Value *Ld = Builder.CreateLoad(PtrOp0);
5042     return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
5043   }
5044   case NEON::BI__builtin_neon_vld2_dup_v:
5045   case NEON::BI__builtin_neon_vld3_dup_v:
5046   case NEON::BI__builtin_neon_vld4_dup_v: {
5047     // Handle 64-bit elements as a special-case.  There is no "dup" needed.
5048     if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
5049       switch (BuiltinID) {
5050       case NEON::BI__builtin_neon_vld2_dup_v:
5051         Int = Intrinsic::arm_neon_vld2;
5052         break;
5053       case NEON::BI__builtin_neon_vld3_dup_v:
5054         Int = Intrinsic::arm_neon_vld3;
5055         break;
5056       case NEON::BI__builtin_neon_vld4_dup_v:
5057         Int = Intrinsic::arm_neon_vld4;
5058         break;
5059       default: llvm_unreachable("unknown vld_dup intrinsic?");
5060       }
5061       llvm::Type *Tys[] = {Ty, Int8PtrTy};
5062       Function *F = CGM.getIntrinsic(Int, Tys);
5063       llvm::Value *Align = getAlignmentValue32(PtrOp1);
5064       Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup");
5065       Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5066       Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5067       return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5068     }
5069     switch (BuiltinID) {
5070     case NEON::BI__builtin_neon_vld2_dup_v:
5071       Int = Intrinsic::arm_neon_vld2lane;
5072       break;
5073     case NEON::BI__builtin_neon_vld3_dup_v:
5074       Int = Intrinsic::arm_neon_vld3lane;
5075       break;
5076     case NEON::BI__builtin_neon_vld4_dup_v:
5077       Int = Intrinsic::arm_neon_vld4lane;
5078       break;
5079     default: llvm_unreachable("unknown vld_dup intrinsic?");
5080     }
5081     llvm::Type *Tys[] = {Ty, Int8PtrTy};
5082     Function *F = CGM.getIntrinsic(Int, Tys);
5083     llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
5084
5085     SmallVector<Value*, 6> Args;
5086     Args.push_back(Ops[1]);
5087     Args.append(STy->getNumElements(), UndefValue::get(Ty));
5088
5089     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
5090     Args.push_back(CI);
5091     Args.push_back(getAlignmentValue32(PtrOp1));
5092
5093     Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
5094     // splat lane 0 to all elts in each vector of the result.
5095     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
5096       Value *Val = Builder.CreateExtractValue(Ops[1], i);
5097       Value *Elt = Builder.CreateBitCast(Val, Ty);
5098       Elt = EmitNeonSplat(Elt, CI);
5099       Elt = Builder.CreateBitCast(Elt, Val->getType());
5100       Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
5101     }
5102     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5103     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5104     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5105   }
5106   case NEON::BI__builtin_neon_vqrshrn_n_v:
5107     Int =
5108       usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
5109     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
5110                         1, true);
5111   case NEON::BI__builtin_neon_vqrshrun_n_v:
5112     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
5113                         Ops, "vqrshrun_n", 1, true);
5114   case NEON::BI__builtin_neon_vqshrn_n_v:
5115     Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
5116     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
5117                         1, true);
5118   case NEON::BI__builtin_neon_vqshrun_n_v:
5119     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
5120                         Ops, "vqshrun_n", 1, true);
5121   case NEON::BI__builtin_neon_vrecpe_v:
5122   case NEON::BI__builtin_neon_vrecpeq_v:
5123     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
5124                         Ops, "vrecpe");
5125   case NEON::BI__builtin_neon_vrshrn_n_v:
5126     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
5127                         Ops, "vrshrn_n", 1, true);
5128   case NEON::BI__builtin_neon_vrsra_n_v:
5129   case NEON::BI__builtin_neon_vrsraq_n_v:
5130     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5131     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5132     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
5133     Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
5134     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
5135     return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
5136   case NEON::BI__builtin_neon_vsri_n_v:
5137   case NEON::BI__builtin_neon_vsriq_n_v:
5138     rightShift = true;
5139     LLVM_FALLTHROUGH;
5140   case NEON::BI__builtin_neon_vsli_n_v:
5141   case NEON::BI__builtin_neon_vsliq_n_v:
5142     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
5143     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
5144                         Ops, "vsli_n");
5145   case NEON::BI__builtin_neon_vsra_n_v:
5146   case NEON::BI__builtin_neon_vsraq_n_v:
5147     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5148     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
5149     return Builder.CreateAdd(Ops[0], Ops[1]);
5150   case NEON::BI__builtin_neon_vst1q_lane_v:
5151     // Handle 64-bit integer elements as a special case.  Use a shuffle to get
5152     // a one-element vector and avoid poor code for i64 in the backend.
5153     if (VTy->getElementType()->isIntegerTy(64)) {
5154       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5155       Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
5156       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
5157       Ops[2] = getAlignmentValue32(PtrOp0);
5158       llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
5159       return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
5160                                                  Tys), Ops);
5161     }
5162     // fall through
5163   case NEON::BI__builtin_neon_vst1_lane_v: {
5164     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5165     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
5166     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5167     auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty));
5168     return St;
5169   }
5170   case NEON::BI__builtin_neon_vtbl1_v:
5171     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
5172                         Ops, "vtbl1");
5173   case NEON::BI__builtin_neon_vtbl2_v:
5174     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
5175                         Ops, "vtbl2");
5176   case NEON::BI__builtin_neon_vtbl3_v:
5177     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
5178                         Ops, "vtbl3");
5179   case NEON::BI__builtin_neon_vtbl4_v:
5180     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
5181                         Ops, "vtbl4");
5182   case NEON::BI__builtin_neon_vtbx1_v:
5183     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
5184                         Ops, "vtbx1");
5185   case NEON::BI__builtin_neon_vtbx2_v:
5186     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
5187                         Ops, "vtbx2");
5188   case NEON::BI__builtin_neon_vtbx3_v:
5189     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
5190                         Ops, "vtbx3");
5191   case NEON::BI__builtin_neon_vtbx4_v:
5192     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
5193                         Ops, "vtbx4");
5194   }
5195 }
5196
5197 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
5198                                       const CallExpr *E,
5199                                       SmallVectorImpl<Value *> &Ops) {
5200   unsigned int Int = 0;
5201   const char *s = nullptr;
5202
5203   switch (BuiltinID) {
5204   default:
5205     return nullptr;
5206   case NEON::BI__builtin_neon_vtbl1_v:
5207   case NEON::BI__builtin_neon_vqtbl1_v:
5208   case NEON::BI__builtin_neon_vqtbl1q_v:
5209   case NEON::BI__builtin_neon_vtbl2_v:
5210   case NEON::BI__builtin_neon_vqtbl2_v:
5211   case NEON::BI__builtin_neon_vqtbl2q_v:
5212   case NEON::BI__builtin_neon_vtbl3_v:
5213   case NEON::BI__builtin_neon_vqtbl3_v:
5214   case NEON::BI__builtin_neon_vqtbl3q_v:
5215   case NEON::BI__builtin_neon_vtbl4_v:
5216   case NEON::BI__builtin_neon_vqtbl4_v:
5217   case NEON::BI__builtin_neon_vqtbl4q_v:
5218     break;
5219   case NEON::BI__builtin_neon_vtbx1_v:
5220   case NEON::BI__builtin_neon_vqtbx1_v:
5221   case NEON::BI__builtin_neon_vqtbx1q_v:
5222   case NEON::BI__builtin_neon_vtbx2_v:
5223   case NEON::BI__builtin_neon_vqtbx2_v:
5224   case NEON::BI__builtin_neon_vqtbx2q_v:
5225   case NEON::BI__builtin_neon_vtbx3_v:
5226   case NEON::BI__builtin_neon_vqtbx3_v:
5227   case NEON::BI__builtin_neon_vqtbx3q_v:
5228   case NEON::BI__builtin_neon_vtbx4_v:
5229   case NEON::BI__builtin_neon_vqtbx4_v:
5230   case NEON::BI__builtin_neon_vqtbx4q_v:
5231     break;
5232   }
5233
5234   assert(E->getNumArgs() >= 3);
5235
5236   // Get the last argument, which specifies the vector type.
5237   llvm::APSInt Result;
5238   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
5239   if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
5240     return nullptr;
5241
5242   // Determine the type of this overloaded NEON intrinsic.
5243   NeonTypeFlags Type(Result.getZExtValue());
5244   llvm::VectorType *Ty = GetNeonType(&CGF, Type);
5245   if (!Ty)
5246     return nullptr;
5247
5248   CodeGen::CGBuilderTy &Builder = CGF.Builder;
5249
5250   // AArch64 scalar builtins are not overloaded, they do not have an extra
5251   // argument that specifies the vector type, need to handle each case.
5252   switch (BuiltinID) {
5253   case NEON::BI__builtin_neon_vtbl1_v: {
5254     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr,
5255                               Ops[1], Ty, Intrinsic::aarch64_neon_tbl1,
5256                               "vtbl1");
5257   }
5258   case NEON::BI__builtin_neon_vtbl2_v: {
5259     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr,
5260                               Ops[2], Ty, Intrinsic::aarch64_neon_tbl1,
5261                               "vtbl1");
5262   }
5263   case NEON::BI__builtin_neon_vtbl3_v: {
5264     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr,
5265                               Ops[3], Ty, Intrinsic::aarch64_neon_tbl2,
5266                               "vtbl2");
5267   }
5268   case NEON::BI__builtin_neon_vtbl4_v: {
5269     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr,
5270                               Ops[4], Ty, Intrinsic::aarch64_neon_tbl2,
5271                               "vtbl2");
5272   }
5273   case NEON::BI__builtin_neon_vtbx1_v: {
5274     Value *TblRes =
5275         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2],
5276                            Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
5277
5278     llvm::Constant *EightV = ConstantInt::get(Ty, 8);
5279     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
5280     CmpRes = Builder.CreateSExt(CmpRes, Ty);
5281
5282     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
5283     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
5284     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
5285   }
5286   case NEON::BI__builtin_neon_vtbx2_v: {
5287     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0],
5288                               Ops[3], Ty, Intrinsic::aarch64_neon_tbx1,
5289                               "vtbx1");
5290   }
5291   case NEON::BI__builtin_neon_vtbx3_v: {
5292     Value *TblRes =
5293         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4],
5294                            Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
5295
5296     llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
5297     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
5298                                            TwentyFourV);
5299     CmpRes = Builder.CreateSExt(CmpRes, Ty);
5300
5301     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
5302     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
5303     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
5304   }
5305   case NEON::BI__builtin_neon_vtbx4_v: {
5306     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0],
5307                               Ops[5], Ty, Intrinsic::aarch64_neon_tbx2,
5308                               "vtbx2");
5309   }
5310   case NEON::BI__builtin_neon_vqtbl1_v:
5311   case NEON::BI__builtin_neon_vqtbl1q_v:
5312     Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
5313   case NEON::BI__builtin_neon_vqtbl2_v:
5314   case NEON::BI__builtin_neon_vqtbl2q_v: {
5315     Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
5316   case NEON::BI__builtin_neon_vqtbl3_v:
5317   case NEON::BI__builtin_neon_vqtbl3q_v:
5318     Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
5319   case NEON::BI__builtin_neon_vqtbl4_v:
5320   case NEON::BI__builtin_neon_vqtbl4q_v:
5321     Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
5322   case NEON::BI__builtin_neon_vqtbx1_v:
5323   case NEON::BI__builtin_neon_vqtbx1q_v:
5324     Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
5325   case NEON::BI__builtin_neon_vqtbx2_v:
5326   case NEON::BI__builtin_neon_vqtbx2q_v:
5327     Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
5328   case NEON::BI__builtin_neon_vqtbx3_v:
5329   case NEON::BI__builtin_neon_vqtbx3q_v:
5330     Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
5331   case NEON::BI__builtin_neon_vqtbx4_v:
5332   case NEON::BI__builtin_neon_vqtbx4q_v:
5333     Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
5334   }
5335   }
5336
5337   if (!Int)
5338     return nullptr;
5339
5340   Function *F = CGF.CGM.getIntrinsic(Int, Ty);
5341   return CGF.EmitNeonCall(F, Ops, s);
5342 }
5343
5344 Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
5345   llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4);
5346   Op = Builder.CreateBitCast(Op, Int16Ty);
5347   Value *V = UndefValue::get(VTy);
5348   llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
5349   Op = Builder.CreateInsertElement(V, Op, CI);
5350   return Op;
5351 }
5352
5353 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
5354                                                const CallExpr *E) {
5355   unsigned HintID = static_cast<unsigned>(-1);
5356   switch (BuiltinID) {
5357   default: break;
5358   case AArch64::BI__builtin_arm_nop:
5359     HintID = 0;
5360     break;
5361   case AArch64::BI__builtin_arm_yield:
5362     HintID = 1;
5363     break;
5364   case AArch64::BI__builtin_arm_wfe:
5365     HintID = 2;
5366     break;
5367   case AArch64::BI__builtin_arm_wfi:
5368     HintID = 3;
5369     break;
5370   case AArch64::BI__builtin_arm_sev:
5371     HintID = 4;
5372     break;
5373   case AArch64::BI__builtin_arm_sevl:
5374     HintID = 5;
5375     break;
5376   }
5377
5378   if (HintID != static_cast<unsigned>(-1)) {
5379     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
5380     return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
5381   }
5382
5383   if (BuiltinID == AArch64::BI__builtin_arm_prefetch) {
5384     Value *Address         = EmitScalarExpr(E->getArg(0));
5385     Value *RW              = EmitScalarExpr(E->getArg(1));
5386     Value *CacheLevel      = EmitScalarExpr(E->getArg(2));
5387     Value *RetentionPolicy = EmitScalarExpr(E->getArg(3));
5388     Value *IsData          = EmitScalarExpr(E->getArg(4));
5389
5390     Value *Locality = nullptr;
5391     if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) {
5392       // Temporal fetch, needs to convert cache level to locality.
5393       Locality = llvm::ConstantInt::get(Int32Ty,
5394         -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3);
5395     } else {
5396       // Streaming fetch.
5397       Locality = llvm::ConstantInt::get(Int32Ty, 0);
5398     }
5399
5400     // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify
5401     // PLDL3STRM or PLDL2STRM.
5402     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
5403     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
5404   }
5405
5406   if (BuiltinID == AArch64::BI__builtin_arm_rbit) {
5407     assert((getContext().getTypeSize(E->getType()) == 32) &&
5408            "rbit of unusual size!");
5409     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5410     return Builder.CreateCall(
5411         CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5412   }
5413   if (BuiltinID == AArch64::BI__builtin_arm_rbit64) {
5414     assert((getContext().getTypeSize(E->getType()) == 64) &&
5415            "rbit of unusual size!");
5416     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5417     return Builder.CreateCall(
5418         CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5419   }
5420
5421   if (BuiltinID == AArch64::BI__clear_cache) {
5422     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
5423     const FunctionDecl *FD = E->getDirectCallee();
5424     Value *Ops[2];
5425     for (unsigned i = 0; i < 2; i++)
5426       Ops[i] = EmitScalarExpr(E->getArg(i));
5427     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
5428     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
5429     StringRef Name = FD->getName();
5430     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
5431   }
5432
5433   if ((BuiltinID == AArch64::BI__builtin_arm_ldrex ||
5434       BuiltinID == AArch64::BI__builtin_arm_ldaex) &&
5435       getContext().getTypeSize(E->getType()) == 128) {
5436     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
5437                                        ? Intrinsic::aarch64_ldaxp
5438                                        : Intrinsic::aarch64_ldxp);
5439
5440     Value *LdPtr = EmitScalarExpr(E->getArg(0));
5441     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
5442                                     "ldxp");
5443
5444     Value *Val0 = Builder.CreateExtractValue(Val, 1);
5445     Value *Val1 = Builder.CreateExtractValue(Val, 0);
5446     llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
5447     Val0 = Builder.CreateZExt(Val0, Int128Ty);
5448     Val1 = Builder.CreateZExt(Val1, Int128Ty);
5449
5450     Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
5451     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
5452     Val = Builder.CreateOr(Val, Val1);
5453     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
5454   } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex ||
5455              BuiltinID == AArch64::BI__builtin_arm_ldaex) {
5456     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
5457
5458     QualType Ty = E->getType();
5459     llvm::Type *RealResTy = ConvertType(Ty);
5460     llvm::Type *PtrTy = llvm::IntegerType::get(
5461         getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
5462     LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
5463
5464     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
5465                                        ? Intrinsic::aarch64_ldaxr
5466                                        : Intrinsic::aarch64_ldxr,
5467                                    PtrTy);
5468     Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
5469
5470     if (RealResTy->isPointerTy())
5471       return Builder.CreateIntToPtr(Val, RealResTy);
5472
5473     llvm::Type *IntResTy = llvm::IntegerType::get(
5474         getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
5475     Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
5476     return Builder.CreateBitCast(Val, RealResTy);
5477   }
5478
5479   if ((BuiltinID == AArch64::BI__builtin_arm_strex ||
5480        BuiltinID == AArch64::BI__builtin_arm_stlex) &&
5481       getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
5482     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
5483                                        ? Intrinsic::aarch64_stlxp
5484                                        : Intrinsic::aarch64_stxp);
5485     llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
5486
5487     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
5488     EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
5489
5490     Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy));
5491     llvm::Value *Val = Builder.CreateLoad(Tmp);
5492
5493     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
5494     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
5495     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
5496                                          Int8PtrTy);
5497     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
5498   }
5499
5500   if (BuiltinID == AArch64::BI__builtin_arm_strex ||
5501       BuiltinID == AArch64::BI__builtin_arm_stlex) {
5502     Value *StoreVal = EmitScalarExpr(E->getArg(0));
5503     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
5504
5505     QualType Ty = E->getArg(0)->getType();
5506     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
5507                                                  getContext().getTypeSize(Ty));
5508     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
5509
5510     if (StoreVal->getType()->isPointerTy())
5511       StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
5512     else {
5513       llvm::Type *IntTy = llvm::IntegerType::get(
5514           getLLVMContext(),
5515           CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
5516       StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
5517       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
5518     }
5519
5520     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
5521                                        ? Intrinsic::aarch64_stlxr
5522                                        : Intrinsic::aarch64_stxr,
5523                                    StoreAddr->getType());
5524     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
5525   }
5526
5527   if (BuiltinID == AArch64::BI__builtin_arm_clrex) {
5528     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
5529     return Builder.CreateCall(F);
5530   }
5531
5532   // CRC32
5533   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
5534   switch (BuiltinID) {
5535   case AArch64::BI__builtin_arm_crc32b:
5536     CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
5537   case AArch64::BI__builtin_arm_crc32cb:
5538     CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
5539   case AArch64::BI__builtin_arm_crc32h:
5540     CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
5541   case AArch64::BI__builtin_arm_crc32ch:
5542     CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
5543   case AArch64::BI__builtin_arm_crc32w:
5544     CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
5545   case AArch64::BI__builtin_arm_crc32cw:
5546     CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
5547   case AArch64::BI__builtin_arm_crc32d:
5548     CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
5549   case AArch64::BI__builtin_arm_crc32cd:
5550     CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
5551   }
5552
5553   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
5554     Value *Arg0 = EmitScalarExpr(E->getArg(0));
5555     Value *Arg1 = EmitScalarExpr(E->getArg(1));
5556     Function *F = CGM.getIntrinsic(CRCIntrinsicID);
5557
5558     llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
5559     Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
5560
5561     return Builder.CreateCall(F, {Arg0, Arg1});
5562   }
5563
5564   if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
5565       BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
5566       BuiltinID == AArch64::BI__builtin_arm_rsrp ||
5567       BuiltinID == AArch64::BI__builtin_arm_wsr ||
5568       BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
5569       BuiltinID == AArch64::BI__builtin_arm_wsrp) {
5570
5571     bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr ||
5572                   BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
5573                   BuiltinID == AArch64::BI__builtin_arm_rsrp;
5574
5575     bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp ||
5576                             BuiltinID == AArch64::BI__builtin_arm_wsrp;
5577
5578     bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr &&
5579                    BuiltinID != AArch64::BI__builtin_arm_wsr;
5580
5581     llvm::Type *ValueType;
5582     llvm::Type *RegisterType = Int64Ty;
5583     if (IsPointerBuiltin) {
5584       ValueType = VoidPtrTy;
5585     } else if (Is64Bit) {
5586       ValueType = Int64Ty;
5587     } else {
5588       ValueType = Int32Ty;
5589     }
5590
5591     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
5592   }
5593
5594   // Find out if any arguments are required to be integer constant
5595   // expressions.
5596   unsigned ICEArguments = 0;
5597   ASTContext::GetBuiltinTypeError Error;
5598   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5599   assert(Error == ASTContext::GE_None && "Should not codegen an error");
5600
5601   llvm::SmallVector<Value*, 4> Ops;
5602   for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
5603     if ((ICEArguments & (1 << i)) == 0) {
5604       Ops.push_back(EmitScalarExpr(E->getArg(i)));
5605     } else {
5606       // If this is required to be a constant, constant fold it so that we know
5607       // that the generated intrinsic gets a ConstantInt.
5608       llvm::APSInt Result;
5609       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
5610       assert(IsConst && "Constant arg isn't actually constant?");
5611       (void)IsConst;
5612       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
5613     }
5614   }
5615
5616   auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap);
5617   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
5618       SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
5619
5620   if (Builtin) {
5621     Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
5622     Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
5623     assert(Result && "SISD intrinsic should have been handled");
5624     return Result;
5625   }
5626
5627   llvm::APSInt Result;
5628   const Expr *Arg = E->getArg(E->getNumArgs()-1);
5629   NeonTypeFlags Type(0);
5630   if (Arg->isIntegerConstantExpr(Result, getContext()))
5631     // Determine the type of this overloaded NEON intrinsic.
5632     Type = NeonTypeFlags(Result.getZExtValue());
5633
5634   bool usgn = Type.isUnsigned();
5635   bool quad = Type.isQuad();
5636
5637   // Handle non-overloaded intrinsics first.
5638   switch (BuiltinID) {
5639   default: break;
5640   case NEON::BI__builtin_neon_vldrq_p128: {
5641     llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
5642     llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0);
5643     Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
5644     return Builder.CreateAlignedLoad(Int128Ty, Ptr,
5645                                      CharUnits::fromQuantity(16));
5646   }
5647   case NEON::BI__builtin_neon_vstrq_p128: {
5648     llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
5649     Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
5650     return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
5651   }
5652   case NEON::BI__builtin_neon_vcvts_u32_f32:
5653   case NEON::BI__builtin_neon_vcvtd_u64_f64:
5654     usgn = true;
5655     // FALL THROUGH
5656   case NEON::BI__builtin_neon_vcvts_s32_f32:
5657   case NEON::BI__builtin_neon_vcvtd_s64_f64: {
5658     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5659     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5660     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5661     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5662     Ops[0] = Builder.CreateBitCast(Ops[0], FTy);
5663     if (usgn)
5664       return Builder.CreateFPToUI(Ops[0], InTy);
5665     return Builder.CreateFPToSI(Ops[0], InTy);
5666   }
5667   case NEON::BI__builtin_neon_vcvts_f32_u32:
5668   case NEON::BI__builtin_neon_vcvtd_f64_u64:
5669     usgn = true;
5670     // FALL THROUGH
5671   case NEON::BI__builtin_neon_vcvts_f32_s32:
5672   case NEON::BI__builtin_neon_vcvtd_f64_s64: {
5673     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5674     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5675     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5676     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5677     Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
5678     if (usgn)
5679       return Builder.CreateUIToFP(Ops[0], FTy);
5680     return Builder.CreateSIToFP(Ops[0], FTy);
5681   }
5682   case NEON::BI__builtin_neon_vpaddd_s64: {
5683     llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
5684     Value *Vec = EmitScalarExpr(E->getArg(0));
5685     // The vector is v2f64, so make sure it's bitcast to that.
5686     Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
5687     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5688     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5689     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5690     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5691     // Pairwise addition of a v2f64 into a scalar f64.
5692     return Builder.CreateAdd(Op0, Op1, "vpaddd");
5693   }
5694   case NEON::BI__builtin_neon_vpaddd_f64: {
5695     llvm::Type *Ty =
5696       llvm::VectorType::get(DoubleTy, 2);
5697     Value *Vec = EmitScalarExpr(E->getArg(0));
5698     // The vector is v2f64, so make sure it's bitcast to that.
5699     Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
5700     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5701     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5702     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5703     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5704     // Pairwise addition of a v2f64 into a scalar f64.
5705     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5706   }
5707   case NEON::BI__builtin_neon_vpadds_f32: {
5708     llvm::Type *Ty =
5709       llvm::VectorType::get(FloatTy, 2);
5710     Value *Vec = EmitScalarExpr(E->getArg(0));
5711     // The vector is v2f32, so make sure it's bitcast to that.
5712     Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
5713     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5714     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5715     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5716     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5717     // Pairwise addition of a v2f32 into a scalar f32.
5718     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5719   }
5720   case NEON::BI__builtin_neon_vceqzd_s64:
5721   case NEON::BI__builtin_neon_vceqzd_f64:
5722   case NEON::BI__builtin_neon_vceqzs_f32:
5723     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5724     return EmitAArch64CompareBuiltinExpr(
5725         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5726         ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
5727   case NEON::BI__builtin_neon_vcgezd_s64:
5728   case NEON::BI__builtin_neon_vcgezd_f64:
5729   case NEON::BI__builtin_neon_vcgezs_f32:
5730     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5731     return EmitAArch64CompareBuiltinExpr(
5732         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5733         ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
5734   case NEON::BI__builtin_neon_vclezd_s64:
5735   case NEON::BI__builtin_neon_vclezd_f64:
5736   case NEON::BI__builtin_neon_vclezs_f32:
5737     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5738     return EmitAArch64CompareBuiltinExpr(
5739         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5740         ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
5741   case NEON::BI__builtin_neon_vcgtzd_s64:
5742   case NEON::BI__builtin_neon_vcgtzd_f64:
5743   case NEON::BI__builtin_neon_vcgtzs_f32:
5744     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5745     return EmitAArch64CompareBuiltinExpr(
5746         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5747         ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
5748   case NEON::BI__builtin_neon_vcltzd_s64:
5749   case NEON::BI__builtin_neon_vcltzd_f64:
5750   case NEON::BI__builtin_neon_vcltzs_f32:
5751     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5752     return EmitAArch64CompareBuiltinExpr(
5753         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5754         ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
5755
5756   case NEON::BI__builtin_neon_vceqzd_u64: {
5757     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5758     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5759     Ops[0] =
5760         Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
5761     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
5762   }
5763   case NEON::BI__builtin_neon_vceqd_f64:
5764   case NEON::BI__builtin_neon_vcled_f64:
5765   case NEON::BI__builtin_neon_vcltd_f64:
5766   case NEON::BI__builtin_neon_vcged_f64:
5767   case NEON::BI__builtin_neon_vcgtd_f64: {
5768     llvm::CmpInst::Predicate P;
5769     switch (BuiltinID) {
5770     default: llvm_unreachable("missing builtin ID in switch!");
5771     case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
5772     case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
5773     case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
5774     case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
5775     case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
5776     }
5777     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5778     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5779     Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
5780     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5781     return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
5782   }
5783   case NEON::BI__builtin_neon_vceqs_f32:
5784   case NEON::BI__builtin_neon_vcles_f32:
5785   case NEON::BI__builtin_neon_vclts_f32:
5786   case NEON::BI__builtin_neon_vcges_f32:
5787   case NEON::BI__builtin_neon_vcgts_f32: {
5788     llvm::CmpInst::Predicate P;
5789     switch (BuiltinID) {
5790     default: llvm_unreachable("missing builtin ID in switch!");
5791     case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
5792     case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
5793     case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
5794     case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
5795     case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
5796     }
5797     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5798     Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
5799     Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
5800     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5801     return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
5802   }
5803   case NEON::BI__builtin_neon_vceqd_s64:
5804   case NEON::BI__builtin_neon_vceqd_u64:
5805   case NEON::BI__builtin_neon_vcgtd_s64:
5806   case NEON::BI__builtin_neon_vcgtd_u64:
5807   case NEON::BI__builtin_neon_vcltd_s64:
5808   case NEON::BI__builtin_neon_vcltd_u64:
5809   case NEON::BI__builtin_neon_vcged_u64:
5810   case NEON::BI__builtin_neon_vcged_s64:
5811   case NEON::BI__builtin_neon_vcled_u64:
5812   case NEON::BI__builtin_neon_vcled_s64: {
5813     llvm::CmpInst::Predicate P;
5814     switch (BuiltinID) {
5815     default: llvm_unreachable("missing builtin ID in switch!");
5816     case NEON::BI__builtin_neon_vceqd_s64:
5817     case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
5818     case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
5819     case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
5820     case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
5821     case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
5822     case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
5823     case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
5824     case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
5825     case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
5826     }
5827     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5828     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5829     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5830     Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
5831     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
5832   }
5833   case NEON::BI__builtin_neon_vtstd_s64:
5834   case NEON::BI__builtin_neon_vtstd_u64: {
5835     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5836     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5837     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5838     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
5839     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
5840                                 llvm::Constant::getNullValue(Int64Ty));
5841     return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
5842   }
5843   case NEON::BI__builtin_neon_vset_lane_i8:
5844   case NEON::BI__builtin_neon_vset_lane_i16:
5845   case NEON::BI__builtin_neon_vset_lane_i32:
5846   case NEON::BI__builtin_neon_vset_lane_i64:
5847   case NEON::BI__builtin_neon_vset_lane_f32:
5848   case NEON::BI__builtin_neon_vsetq_lane_i8:
5849   case NEON::BI__builtin_neon_vsetq_lane_i16:
5850   case NEON::BI__builtin_neon_vsetq_lane_i32:
5851   case NEON::BI__builtin_neon_vsetq_lane_i64:
5852   case NEON::BI__builtin_neon_vsetq_lane_f32:
5853     Ops.push_back(EmitScalarExpr(E->getArg(2)));
5854     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5855   case NEON::BI__builtin_neon_vset_lane_f64:
5856     // The vector type needs a cast for the v1f64 variant.
5857     Ops[1] = Builder.CreateBitCast(Ops[1],
5858                                    llvm::VectorType::get(DoubleTy, 1));
5859     Ops.push_back(EmitScalarExpr(E->getArg(2)));
5860     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5861   case NEON::BI__builtin_neon_vsetq_lane_f64:
5862     // The vector type needs a cast for the v2f64 variant.
5863     Ops[1] = Builder.CreateBitCast(Ops[1],
5864         llvm::VectorType::get(DoubleTy, 2));
5865     Ops.push_back(EmitScalarExpr(E->getArg(2)));
5866     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5867
5868   case NEON::BI__builtin_neon_vget_lane_i8:
5869   case NEON::BI__builtin_neon_vdupb_lane_i8:
5870     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8));
5871     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5872                                         "vget_lane");
5873   case NEON::BI__builtin_neon_vgetq_lane_i8:
5874   case NEON::BI__builtin_neon_vdupb_laneq_i8:
5875     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16));
5876     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5877                                         "vgetq_lane");
5878   case NEON::BI__builtin_neon_vget_lane_i16:
5879   case NEON::BI__builtin_neon_vduph_lane_i16:
5880     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4));
5881     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5882                                         "vget_lane");
5883   case NEON::BI__builtin_neon_vgetq_lane_i16:
5884   case NEON::BI__builtin_neon_vduph_laneq_i16:
5885     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8));
5886     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5887                                         "vgetq_lane");
5888   case NEON::BI__builtin_neon_vget_lane_i32:
5889   case NEON::BI__builtin_neon_vdups_lane_i32:
5890     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2));
5891     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5892                                         "vget_lane");
5893   case NEON::BI__builtin_neon_vdups_lane_f32:
5894     Ops[0] = Builder.CreateBitCast(Ops[0],
5895         llvm::VectorType::get(FloatTy, 2));
5896     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5897                                         "vdups_lane");
5898   case NEON::BI__builtin_neon_vgetq_lane_i32:
5899   case NEON::BI__builtin_neon_vdups_laneq_i32:
5900     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
5901     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5902                                         "vgetq_lane");
5903   case NEON::BI__builtin_neon_vget_lane_i64:
5904   case NEON::BI__builtin_neon_vdupd_lane_i64:
5905     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1));
5906     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5907                                         "vget_lane");
5908   case NEON::BI__builtin_neon_vdupd_lane_f64:
5909     Ops[0] = Builder.CreateBitCast(Ops[0],
5910         llvm::VectorType::get(DoubleTy, 1));
5911     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5912                                         "vdupd_lane");
5913   case NEON::BI__builtin_neon_vgetq_lane_i64:
5914   case NEON::BI__builtin_neon_vdupd_laneq_i64:
5915     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
5916     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5917                                         "vgetq_lane");
5918   case NEON::BI__builtin_neon_vget_lane_f32:
5919     Ops[0] = Builder.CreateBitCast(Ops[0],
5920         llvm::VectorType::get(FloatTy, 2));
5921     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5922                                         "vget_lane");
5923   case NEON::BI__builtin_neon_vget_lane_f64:
5924     Ops[0] = Builder.CreateBitCast(Ops[0],
5925         llvm::VectorType::get(DoubleTy, 1));
5926     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5927                                         "vget_lane");
5928   case NEON::BI__builtin_neon_vgetq_lane_f32:
5929   case NEON::BI__builtin_neon_vdups_laneq_f32:
5930     Ops[0] = Builder.CreateBitCast(Ops[0],
5931         llvm::VectorType::get(FloatTy, 4));
5932     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5933                                         "vgetq_lane");
5934   case NEON::BI__builtin_neon_vgetq_lane_f64:
5935   case NEON::BI__builtin_neon_vdupd_laneq_f64:
5936     Ops[0] = Builder.CreateBitCast(Ops[0],
5937         llvm::VectorType::get(DoubleTy, 2));
5938     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5939                                         "vgetq_lane");
5940   case NEON::BI__builtin_neon_vaddd_s64:
5941   case NEON::BI__builtin_neon_vaddd_u64:
5942     return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
5943   case NEON::BI__builtin_neon_vsubd_s64:
5944   case NEON::BI__builtin_neon_vsubd_u64:
5945     return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
5946   case NEON::BI__builtin_neon_vqdmlalh_s16:
5947   case NEON::BI__builtin_neon_vqdmlslh_s16: {
5948     SmallVector<Value *, 2> ProductOps;
5949     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5950     ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
5951     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
5952     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5953                           ProductOps, "vqdmlXl");
5954     Constant *CI = ConstantInt::get(SizeTy, 0);
5955     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5956
5957     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
5958                                         ? Intrinsic::aarch64_neon_sqadd
5959                                         : Intrinsic::aarch64_neon_sqsub;
5960     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
5961   }
5962   case NEON::BI__builtin_neon_vqshlud_n_s64: {
5963     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5964     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5965     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
5966                         Ops, "vqshlu_n");
5967   }
5968   case NEON::BI__builtin_neon_vqshld_n_u64:
5969   case NEON::BI__builtin_neon_vqshld_n_s64: {
5970     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
5971                                    ? Intrinsic::aarch64_neon_uqshl
5972                                    : Intrinsic::aarch64_neon_sqshl;
5973     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5974     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5975     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
5976   }
5977   case NEON::BI__builtin_neon_vrshrd_n_u64:
5978   case NEON::BI__builtin_neon_vrshrd_n_s64: {
5979     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
5980                                    ? Intrinsic::aarch64_neon_urshl
5981                                    : Intrinsic::aarch64_neon_srshl;
5982     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5983     int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
5984     Ops[1] = ConstantInt::get(Int64Ty, -SV);
5985     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
5986   }
5987   case NEON::BI__builtin_neon_vrsrad_n_u64:
5988   case NEON::BI__builtin_neon_vrsrad_n_s64: {
5989     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
5990                                    ? Intrinsic::aarch64_neon_urshl
5991                                    : Intrinsic::aarch64_neon_srshl;
5992     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5993     Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
5994     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
5995                                 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
5996     return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
5997   }
5998   case NEON::BI__builtin_neon_vshld_n_s64:
5999   case NEON::BI__builtin_neon_vshld_n_u64: {
6000     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
6001     return Builder.CreateShl(
6002         Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
6003   }
6004   case NEON::BI__builtin_neon_vshrd_n_s64: {
6005     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
6006     return Builder.CreateAShr(
6007         Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
6008                                                    Amt->getZExtValue())),
6009         "shrd_n");
6010   }
6011   case NEON::BI__builtin_neon_vshrd_n_u64: {
6012     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
6013     uint64_t ShiftAmt = Amt->getZExtValue();
6014     // Right-shifting an unsigned value by its size yields 0.
6015     if (ShiftAmt == 64)
6016       return ConstantInt::get(Int64Ty, 0);
6017     return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
6018                               "shrd_n");
6019   }
6020   case NEON::BI__builtin_neon_vsrad_n_s64: {
6021     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
6022     Ops[1] = Builder.CreateAShr(
6023         Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
6024                                                    Amt->getZExtValue())),
6025         "shrd_n");
6026     return Builder.CreateAdd(Ops[0], Ops[1]);
6027   }
6028   case NEON::BI__builtin_neon_vsrad_n_u64: {
6029     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
6030     uint64_t ShiftAmt = Amt->getZExtValue();
6031     // Right-shifting an unsigned value by its size yields 0.
6032     // As Op + 0 = Op, return Ops[0] directly.
6033     if (ShiftAmt == 64)
6034       return Ops[0];
6035     Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
6036                                 "shrd_n");
6037     return Builder.CreateAdd(Ops[0], Ops[1]);
6038   }
6039   case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
6040   case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
6041   case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
6042   case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
6043     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
6044                                           "lane");
6045     SmallVector<Value *, 2> ProductOps;
6046     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
6047     ProductOps.push_back(vectorWrapScalar16(Ops[2]));
6048     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
6049     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
6050                           ProductOps, "vqdmlXl");
6051     Constant *CI = ConstantInt::get(SizeTy, 0);
6052     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
6053     Ops.pop_back();
6054
6055     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
6056                        BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
6057                           ? Intrinsic::aarch64_neon_sqadd
6058                           : Intrinsic::aarch64_neon_sqsub;
6059     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
6060   }
6061   case NEON::BI__builtin_neon_vqdmlals_s32:
6062   case NEON::BI__builtin_neon_vqdmlsls_s32: {
6063     SmallVector<Value *, 2> ProductOps;
6064     ProductOps.push_back(Ops[1]);
6065     ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
6066     Ops[1] =
6067         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6068                      ProductOps, "vqdmlXl");
6069
6070     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
6071                                         ? Intrinsic::aarch64_neon_sqadd
6072                                         : Intrinsic::aarch64_neon_sqsub;
6073     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
6074   }
6075   case NEON::BI__builtin_neon_vqdmlals_lane_s32:
6076   case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
6077   case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
6078   case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
6079     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
6080                                           "lane");
6081     SmallVector<Value *, 2> ProductOps;
6082     ProductOps.push_back(Ops[1]);
6083     ProductOps.push_back(Ops[2]);
6084     Ops[1] =
6085         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6086                      ProductOps, "vqdmlXl");
6087     Ops.pop_back();
6088
6089     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
6090                        BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
6091                           ? Intrinsic::aarch64_neon_sqadd
6092                           : Intrinsic::aarch64_neon_sqsub;
6093     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
6094   }
6095   }
6096
6097   llvm::VectorType *VTy = GetNeonType(this, Type);
6098   llvm::Type *Ty = VTy;
6099   if (!Ty)
6100     return nullptr;
6101
6102   // Not all intrinsics handled by the common case work for AArch64 yet, so only
6103   // defer to common code if it's been added to our special map.
6104   Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
6105                                    AArch64SIMDIntrinsicsProvenSorted);
6106
6107   if (Builtin)
6108     return EmitCommonNeonBuiltinExpr(
6109         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
6110         Builtin->NameHint, Builtin->TypeModifier, E, Ops,
6111         /*never use addresses*/ Address::invalid(), Address::invalid());
6112
6113   if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops))
6114     return V;
6115
6116   unsigned Int;
6117   switch (BuiltinID) {
6118   default: return nullptr;
6119   case NEON::BI__builtin_neon_vbsl_v:
6120   case NEON::BI__builtin_neon_vbslq_v: {
6121     llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
6122     Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
6123     Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
6124     Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
6125
6126     Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
6127     Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
6128     Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
6129     return Builder.CreateBitCast(Ops[0], Ty);
6130   }
6131   case NEON::BI__builtin_neon_vfma_lane_v:
6132   case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
6133     // The ARM builtins (and instructions) have the addend as the first
6134     // operand, but the 'fma' intrinsics have it last. Swap it around here.
6135     Value *Addend = Ops[0];
6136     Value *Multiplicand = Ops[1];
6137     Value *LaneSource = Ops[2];
6138     Ops[0] = Multiplicand;
6139     Ops[1] = LaneSource;
6140     Ops[2] = Addend;
6141
6142     // Now adjust things to handle the lane access.
6143     llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ?
6144       llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) :
6145       VTy;
6146     llvm::Constant *cst = cast<Constant>(Ops[3]);
6147     Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst);
6148     Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
6149     Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
6150
6151     Ops.pop_back();
6152     Int = Intrinsic::fma;
6153     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
6154   }
6155   case NEON::BI__builtin_neon_vfma_laneq_v: {
6156     llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
6157     // v1f64 fma should be mapped to Neon scalar f64 fma
6158     if (VTy && VTy->getElementType() == DoubleTy) {
6159       Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6160       Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
6161       llvm::Type *VTy = GetNeonType(this,
6162         NeonTypeFlags(NeonTypeFlags::Float64, false, true));
6163       Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
6164       Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6165       Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
6166       Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
6167       return Builder.CreateBitCast(Result, Ty);
6168     }
6169     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6170     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6171     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6172
6173     llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
6174                                             VTy->getNumElements() * 2);
6175     Ops[2] = Builder.CreateBitCast(Ops[2], STy);
6176     Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
6177                                                cast<ConstantInt>(Ops[3]));
6178     Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
6179
6180     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
6181   }
6182   case NEON::BI__builtin_neon_vfmaq_laneq_v: {
6183     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6184     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6185     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6186
6187     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6188     Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
6189     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
6190   }
6191   case NEON::BI__builtin_neon_vfmah_lane_f16:
6192   case NEON::BI__builtin_neon_vfmas_lane_f32:
6193   case NEON::BI__builtin_neon_vfmah_laneq_f16:
6194   case NEON::BI__builtin_neon_vfmas_laneq_f32:
6195   case NEON::BI__builtin_neon_vfmad_lane_f64:
6196   case NEON::BI__builtin_neon_vfmad_laneq_f64: {
6197     Ops.push_back(EmitScalarExpr(E->getArg(3)));
6198     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
6199     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6200     Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6201     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
6202   }
6203   case NEON::BI__builtin_neon_vmull_v:
6204     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6205     Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
6206     if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
6207     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
6208   case NEON::BI__builtin_neon_vmax_v:
6209   case NEON::BI__builtin_neon_vmaxq_v:
6210     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6211     Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
6212     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
6213     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
6214   case NEON::BI__builtin_neon_vmin_v:
6215   case NEON::BI__builtin_neon_vminq_v:
6216     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6217     Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
6218     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
6219     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
6220   case NEON::BI__builtin_neon_vabd_v:
6221   case NEON::BI__builtin_neon_vabdq_v:
6222     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6223     Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
6224     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
6225     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
6226   case NEON::BI__builtin_neon_vpadal_v:
6227   case NEON::BI__builtin_neon_vpadalq_v: {
6228     unsigned ArgElts = VTy->getNumElements();
6229     llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
6230     unsigned BitWidth = EltTy->getBitWidth();
6231     llvm::Type *ArgTy = llvm::VectorType::get(
6232         llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts);
6233     llvm::Type* Tys[2] = { VTy, ArgTy };
6234     Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
6235     SmallVector<llvm::Value*, 1> TmpOps;
6236     TmpOps.push_back(Ops[1]);
6237     Function *F = CGM.getIntrinsic(Int, Tys);
6238     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
6239     llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
6240     return Builder.CreateAdd(tmp, addend);
6241   }
6242   case NEON::BI__builtin_neon_vpmin_v:
6243   case NEON::BI__builtin_neon_vpminq_v:
6244     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6245     Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
6246     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
6247     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
6248   case NEON::BI__builtin_neon_vpmax_v:
6249   case NEON::BI__builtin_neon_vpmaxq_v:
6250     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6251     Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
6252     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
6253     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
6254   case NEON::BI__builtin_neon_vminnm_v:
6255   case NEON::BI__builtin_neon_vminnmq_v:
6256     Int = Intrinsic::aarch64_neon_fminnm;
6257     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
6258   case NEON::BI__builtin_neon_vmaxnm_v:
6259   case NEON::BI__builtin_neon_vmaxnmq_v:
6260     Int = Intrinsic::aarch64_neon_fmaxnm;
6261     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
6262   case NEON::BI__builtin_neon_vrecpss_f32: {
6263     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6264     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
6265                         Ops, "vrecps");
6266   }
6267   case NEON::BI__builtin_neon_vrecpsd_f64: {
6268     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6269     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
6270                         Ops, "vrecps");
6271   }
6272   case NEON::BI__builtin_neon_vqshrun_n_v:
6273     Int = Intrinsic::aarch64_neon_sqshrun;
6274     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
6275   case NEON::BI__builtin_neon_vqrshrun_n_v:
6276     Int = Intrinsic::aarch64_neon_sqrshrun;
6277     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
6278   case NEON::BI__builtin_neon_vqshrn_n_v:
6279     Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
6280     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
6281   case NEON::BI__builtin_neon_vrshrn_n_v:
6282     Int = Intrinsic::aarch64_neon_rshrn;
6283     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
6284   case NEON::BI__builtin_neon_vqrshrn_n_v:
6285     Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
6286     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
6287   case NEON::BI__builtin_neon_vrnda_v:
6288   case NEON::BI__builtin_neon_vrndaq_v: {
6289     Int = Intrinsic::round;
6290     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
6291   }
6292   case NEON::BI__builtin_neon_vrndi_v:
6293   case NEON::BI__builtin_neon_vrndiq_v: {
6294     Int = Intrinsic::nearbyint;
6295     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi");
6296   }
6297   case NEON::BI__builtin_neon_vrndm_v:
6298   case NEON::BI__builtin_neon_vrndmq_v: {
6299     Int = Intrinsic::floor;
6300     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
6301   }
6302   case NEON::BI__builtin_neon_vrndn_v:
6303   case NEON::BI__builtin_neon_vrndnq_v: {
6304     Int = Intrinsic::aarch64_neon_frintn;
6305     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
6306   }
6307   case NEON::BI__builtin_neon_vrndp_v:
6308   case NEON::BI__builtin_neon_vrndpq_v: {
6309     Int = Intrinsic::ceil;
6310     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
6311   }
6312   case NEON::BI__builtin_neon_vrndx_v:
6313   case NEON::BI__builtin_neon_vrndxq_v: {
6314     Int = Intrinsic::rint;
6315     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
6316   }
6317   case NEON::BI__builtin_neon_vrnd_v:
6318   case NEON::BI__builtin_neon_vrndq_v: {
6319     Int = Intrinsic::trunc;
6320     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
6321   }
6322   case NEON::BI__builtin_neon_vceqz_v:
6323   case NEON::BI__builtin_neon_vceqzq_v:
6324     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
6325                                          ICmpInst::ICMP_EQ, "vceqz");
6326   case NEON::BI__builtin_neon_vcgez_v:
6327   case NEON::BI__builtin_neon_vcgezq_v:
6328     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
6329                                          ICmpInst::ICMP_SGE, "vcgez");
6330   case NEON::BI__builtin_neon_vclez_v:
6331   case NEON::BI__builtin_neon_vclezq_v:
6332     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
6333                                          ICmpInst::ICMP_SLE, "vclez");
6334   case NEON::BI__builtin_neon_vcgtz_v:
6335   case NEON::BI__builtin_neon_vcgtzq_v:
6336     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
6337                                          ICmpInst::ICMP_SGT, "vcgtz");
6338   case NEON::BI__builtin_neon_vcltz_v:
6339   case NEON::BI__builtin_neon_vcltzq_v:
6340     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
6341                                          ICmpInst::ICMP_SLT, "vcltz");
6342   case NEON::BI__builtin_neon_vcvt_f64_v:
6343   case NEON::BI__builtin_neon_vcvtq_f64_v:
6344     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6345     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
6346     return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
6347                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
6348   case NEON::BI__builtin_neon_vcvt_f64_f32: {
6349     assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
6350            "unexpected vcvt_f64_f32 builtin");
6351     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
6352     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6353
6354     return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
6355   }
6356   case NEON::BI__builtin_neon_vcvt_f32_f64: {
6357     assert(Type.getEltType() == NeonTypeFlags::Float32 &&
6358            "unexpected vcvt_f32_f64 builtin");
6359     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
6360     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6361
6362     return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
6363   }
6364   case NEON::BI__builtin_neon_vcvt_s32_v:
6365   case NEON::BI__builtin_neon_vcvt_u32_v:
6366   case NEON::BI__builtin_neon_vcvt_s64_v:
6367   case NEON::BI__builtin_neon_vcvt_u64_v:
6368         case NEON::BI__builtin_neon_vcvt_s16_v:
6369         case NEON::BI__builtin_neon_vcvt_u16_v:
6370   case NEON::BI__builtin_neon_vcvtq_s32_v:
6371   case NEON::BI__builtin_neon_vcvtq_u32_v:
6372   case NEON::BI__builtin_neon_vcvtq_s64_v:
6373   case NEON::BI__builtin_neon_vcvtq_u64_v:
6374         case NEON::BI__builtin_neon_vcvtq_s16_v:
6375         case NEON::BI__builtin_neon_vcvtq_u16_v: {
6376     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
6377     if (usgn)
6378       return Builder.CreateFPToUI(Ops[0], Ty);
6379     return Builder.CreateFPToSI(Ops[0], Ty);
6380   }
6381   case NEON::BI__builtin_neon_vcvta_s16_v:
6382   case NEON::BI__builtin_neon_vcvta_s32_v:
6383   case NEON::BI__builtin_neon_vcvtaq_s16_v:
6384   case NEON::BI__builtin_neon_vcvtaq_s32_v:
6385   case NEON::BI__builtin_neon_vcvta_u32_v:
6386   case NEON::BI__builtin_neon_vcvtaq_u16_v:
6387   case NEON::BI__builtin_neon_vcvtaq_u32_v:
6388   case NEON::BI__builtin_neon_vcvta_s64_v:
6389   case NEON::BI__builtin_neon_vcvtaq_s64_v:
6390   case NEON::BI__builtin_neon_vcvta_u64_v:
6391   case NEON::BI__builtin_neon_vcvtaq_u64_v: {
6392     Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
6393     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6394     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
6395   }
6396   case NEON::BI__builtin_neon_vcvtm_s16_v:
6397   case NEON::BI__builtin_neon_vcvtm_s32_v:
6398   case NEON::BI__builtin_neon_vcvtmq_s16_v:
6399   case NEON::BI__builtin_neon_vcvtmq_s32_v:
6400   case NEON::BI__builtin_neon_vcvtm_u16_v:
6401   case NEON::BI__builtin_neon_vcvtm_u32_v:
6402   case NEON::BI__builtin_neon_vcvtmq_u16_v:
6403   case NEON::BI__builtin_neon_vcvtmq_u32_v:
6404   case NEON::BI__builtin_neon_vcvtm_s64_v:
6405   case NEON::BI__builtin_neon_vcvtmq_s64_v:
6406   case NEON::BI__builtin_neon_vcvtm_u64_v:
6407   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
6408     Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
6409     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6410     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
6411   }
6412   case NEON::BI__builtin_neon_vcvtn_s16_v:
6413   case NEON::BI__builtin_neon_vcvtn_s32_v:
6414   case NEON::BI__builtin_neon_vcvtnq_s16_v:
6415   case NEON::BI__builtin_neon_vcvtnq_s32_v:
6416   case NEON::BI__builtin_neon_vcvtn_u16_v:
6417   case NEON::BI__builtin_neon_vcvtn_u32_v:
6418   case NEON::BI__builtin_neon_vcvtnq_u16_v:
6419   case NEON::BI__builtin_neon_vcvtnq_u32_v:
6420   case NEON::BI__builtin_neon_vcvtn_s64_v:
6421   case NEON::BI__builtin_neon_vcvtnq_s64_v:
6422   case NEON::BI__builtin_neon_vcvtn_u64_v:
6423   case NEON::BI__builtin_neon_vcvtnq_u64_v: {
6424     Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
6425     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6426     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
6427   }
6428   case NEON::BI__builtin_neon_vcvtp_s16_v:
6429   case NEON::BI__builtin_neon_vcvtp_s32_v:
6430   case NEON::BI__builtin_neon_vcvtpq_s16_v:
6431   case NEON::BI__builtin_neon_vcvtpq_s32_v:
6432   case NEON::BI__builtin_neon_vcvtp_u16_v:
6433   case NEON::BI__builtin_neon_vcvtp_u32_v:
6434   case NEON::BI__builtin_neon_vcvtpq_u16_v:
6435   case NEON::BI__builtin_neon_vcvtpq_u32_v:
6436   case NEON::BI__builtin_neon_vcvtp_s64_v:
6437   case NEON::BI__builtin_neon_vcvtpq_s64_v:
6438   case NEON::BI__builtin_neon_vcvtp_u64_v:
6439   case NEON::BI__builtin_neon_vcvtpq_u64_v: {
6440     Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
6441     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6442     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
6443   }
6444   case NEON::BI__builtin_neon_vmulx_v:
6445   case NEON::BI__builtin_neon_vmulxq_v: {
6446     Int = Intrinsic::aarch64_neon_fmulx;
6447     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
6448   }
6449   case NEON::BI__builtin_neon_vmul_lane_v:
6450   case NEON::BI__builtin_neon_vmul_laneq_v: {
6451     // v1f64 vmul_lane should be mapped to Neon scalar mul lane
6452     bool Quad = false;
6453     if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
6454       Quad = true;
6455     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6456     llvm::Type *VTy = GetNeonType(this,
6457       NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
6458     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6459     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
6460     Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
6461     return Builder.CreateBitCast(Result, Ty);
6462   }
6463   case NEON::BI__builtin_neon_vnegd_s64:
6464     return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
6465   case NEON::BI__builtin_neon_vpmaxnm_v:
6466   case NEON::BI__builtin_neon_vpmaxnmq_v: {
6467     Int = Intrinsic::aarch64_neon_fmaxnmp;
6468     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
6469   }
6470   case NEON::BI__builtin_neon_vpminnm_v:
6471   case NEON::BI__builtin_neon_vpminnmq_v: {
6472     Int = Intrinsic::aarch64_neon_fminnmp;
6473     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
6474   }
6475   case NEON::BI__builtin_neon_vsqrt_v:
6476   case NEON::BI__builtin_neon_vsqrtq_v: {
6477     Int = Intrinsic::sqrt;
6478     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6479     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
6480   }
6481   case NEON::BI__builtin_neon_vrbit_v:
6482   case NEON::BI__builtin_neon_vrbitq_v: {
6483     Int = Intrinsic::aarch64_neon_rbit;
6484     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
6485   }
6486   case NEON::BI__builtin_neon_vaddv_u8:
6487     // FIXME: These are handled by the AArch64 scalar code.
6488     usgn = true;
6489     // FALLTHROUGH
6490   case NEON::BI__builtin_neon_vaddv_s8: {
6491     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6492     Ty = Int32Ty;
6493     VTy = llvm::VectorType::get(Int8Ty, 8);
6494     llvm::Type *Tys[2] = { Ty, VTy };
6495     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6496     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6497     return Builder.CreateTrunc(Ops[0], Int8Ty);
6498   }
6499   case NEON::BI__builtin_neon_vaddv_u16:
6500     usgn = true;
6501     // FALLTHROUGH
6502   case NEON::BI__builtin_neon_vaddv_s16: {
6503     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6504     Ty = Int32Ty;
6505     VTy = llvm::VectorType::get(Int16Ty, 4);
6506     llvm::Type *Tys[2] = { Ty, VTy };
6507     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6508     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6509     return Builder.CreateTrunc(Ops[0], Int16Ty);
6510   }
6511   case NEON::BI__builtin_neon_vaddvq_u8:
6512     usgn = true;
6513     // FALLTHROUGH
6514   case NEON::BI__builtin_neon_vaddvq_s8: {
6515     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6516     Ty = Int32Ty;
6517     VTy = llvm::VectorType::get(Int8Ty, 16);
6518     llvm::Type *Tys[2] = { Ty, VTy };
6519     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6520     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6521     return Builder.CreateTrunc(Ops[0], Int8Ty);
6522   }
6523   case NEON::BI__builtin_neon_vaddvq_u16:
6524     usgn = true;
6525     // FALLTHROUGH
6526   case NEON::BI__builtin_neon_vaddvq_s16: {
6527     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6528     Ty = Int32Ty;
6529     VTy = llvm::VectorType::get(Int16Ty, 8);
6530     llvm::Type *Tys[2] = { Ty, VTy };
6531     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6532     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6533     return Builder.CreateTrunc(Ops[0], Int16Ty);
6534   }
6535   case NEON::BI__builtin_neon_vmaxv_u8: {
6536     Int = Intrinsic::aarch64_neon_umaxv;
6537     Ty = Int32Ty;
6538     VTy = llvm::VectorType::get(Int8Ty, 8);
6539     llvm::Type *Tys[2] = { Ty, VTy };
6540     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6541     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6542     return Builder.CreateTrunc(Ops[0], Int8Ty);
6543   }
6544   case NEON::BI__builtin_neon_vmaxv_u16: {
6545     Int = Intrinsic::aarch64_neon_umaxv;
6546     Ty = Int32Ty;
6547     VTy = llvm::VectorType::get(Int16Ty, 4);
6548     llvm::Type *Tys[2] = { Ty, VTy };
6549     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6550     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6551     return Builder.CreateTrunc(Ops[0], Int16Ty);
6552   }
6553   case NEON::BI__builtin_neon_vmaxvq_u8: {
6554     Int = Intrinsic::aarch64_neon_umaxv;
6555     Ty = Int32Ty;
6556     VTy = llvm::VectorType::get(Int8Ty, 16);
6557     llvm::Type *Tys[2] = { Ty, VTy };
6558     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6559     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6560     return Builder.CreateTrunc(Ops[0], Int8Ty);
6561   }
6562   case NEON::BI__builtin_neon_vmaxvq_u16: {
6563     Int = Intrinsic::aarch64_neon_umaxv;
6564     Ty = Int32Ty;
6565     VTy = llvm::VectorType::get(Int16Ty, 8);
6566     llvm::Type *Tys[2] = { Ty, VTy };
6567     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6568     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6569     return Builder.CreateTrunc(Ops[0], Int16Ty);
6570   }
6571   case NEON::BI__builtin_neon_vmaxv_s8: {
6572     Int = Intrinsic::aarch64_neon_smaxv;
6573     Ty = Int32Ty;
6574     VTy = llvm::VectorType::get(Int8Ty, 8);
6575     llvm::Type *Tys[2] = { Ty, VTy };
6576     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6577     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6578     return Builder.CreateTrunc(Ops[0], Int8Ty);
6579   }
6580   case NEON::BI__builtin_neon_vmaxv_s16: {
6581     Int = Intrinsic::aarch64_neon_smaxv;
6582     Ty = Int32Ty;
6583     VTy = llvm::VectorType::get(Int16Ty, 4);
6584     llvm::Type *Tys[2] = { Ty, VTy };
6585     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6586     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6587     return Builder.CreateTrunc(Ops[0], Int16Ty);
6588   }
6589   case NEON::BI__builtin_neon_vmaxvq_s8: {
6590     Int = Intrinsic::aarch64_neon_smaxv;
6591     Ty = Int32Ty;
6592     VTy = llvm::VectorType::get(Int8Ty, 16);
6593     llvm::Type *Tys[2] = { Ty, VTy };
6594     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6595     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6596     return Builder.CreateTrunc(Ops[0], Int8Ty);
6597   }
6598   case NEON::BI__builtin_neon_vmaxvq_s16: {
6599     Int = Intrinsic::aarch64_neon_smaxv;
6600     Ty = Int32Ty;
6601     VTy = llvm::VectorType::get(Int16Ty, 8);
6602     llvm::Type *Tys[2] = { Ty, VTy };
6603     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6604     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6605     return Builder.CreateTrunc(Ops[0], Int16Ty);
6606   }
6607   case NEON::BI__builtin_neon_vmaxv_f16: {
6608     Int = Intrinsic::aarch64_neon_fmaxv;
6609     Ty = HalfTy;
6610     VTy = llvm::VectorType::get(HalfTy, 4);
6611     llvm::Type *Tys[2] = { Ty, VTy };
6612     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6613     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6614     return Builder.CreateTrunc(Ops[0], HalfTy);
6615   }
6616   case NEON::BI__builtin_neon_vmaxvq_f16: {
6617     Int = Intrinsic::aarch64_neon_fmaxv;
6618     Ty = HalfTy;
6619     VTy = llvm::VectorType::get(HalfTy, 8);
6620     llvm::Type *Tys[2] = { Ty, VTy };
6621     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6622     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6623     return Builder.CreateTrunc(Ops[0], HalfTy);
6624   }
6625   case NEON::BI__builtin_neon_vminv_u8: {
6626     Int = Intrinsic::aarch64_neon_uminv;
6627     Ty = Int32Ty;
6628     VTy = llvm::VectorType::get(Int8Ty, 8);
6629     llvm::Type *Tys[2] = { Ty, VTy };
6630     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6631     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6632     return Builder.CreateTrunc(Ops[0], Int8Ty);
6633   }
6634   case NEON::BI__builtin_neon_vminv_u16: {
6635     Int = Intrinsic::aarch64_neon_uminv;
6636     Ty = Int32Ty;
6637     VTy = llvm::VectorType::get(Int16Ty, 4);
6638     llvm::Type *Tys[2] = { Ty, VTy };
6639     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6640     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6641     return Builder.CreateTrunc(Ops[0], Int16Ty);
6642   }
6643   case NEON::BI__builtin_neon_vminvq_u8: {
6644     Int = Intrinsic::aarch64_neon_uminv;
6645     Ty = Int32Ty;
6646     VTy = llvm::VectorType::get(Int8Ty, 16);
6647     llvm::Type *Tys[2] = { Ty, VTy };
6648     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6649     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6650     return Builder.CreateTrunc(Ops[0], Int8Ty);
6651   }
6652   case NEON::BI__builtin_neon_vminvq_u16: {
6653     Int = Intrinsic::aarch64_neon_uminv;
6654     Ty = Int32Ty;
6655     VTy = llvm::VectorType::get(Int16Ty, 8);
6656     llvm::Type *Tys[2] = { Ty, VTy };
6657     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6658     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6659     return Builder.CreateTrunc(Ops[0], Int16Ty);
6660   }
6661   case NEON::BI__builtin_neon_vminv_s8: {
6662     Int = Intrinsic::aarch64_neon_sminv;
6663     Ty = Int32Ty;
6664     VTy = llvm::VectorType::get(Int8Ty, 8);
6665     llvm::Type *Tys[2] = { Ty, VTy };
6666     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6667     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6668     return Builder.CreateTrunc(Ops[0], Int8Ty);
6669   }
6670   case NEON::BI__builtin_neon_vminv_s16: {
6671     Int = Intrinsic::aarch64_neon_sminv;
6672     Ty = Int32Ty;
6673     VTy = llvm::VectorType::get(Int16Ty, 4);
6674     llvm::Type *Tys[2] = { Ty, VTy };
6675     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6676     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6677     return Builder.CreateTrunc(Ops[0], Int16Ty);
6678   }
6679   case NEON::BI__builtin_neon_vminvq_s8: {
6680     Int = Intrinsic::aarch64_neon_sminv;
6681     Ty = Int32Ty;
6682     VTy = llvm::VectorType::get(Int8Ty, 16);
6683     llvm::Type *Tys[2] = { Ty, VTy };
6684     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6685     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6686     return Builder.CreateTrunc(Ops[0], Int8Ty);
6687   }
6688   case NEON::BI__builtin_neon_vminvq_s16: {
6689     Int = Intrinsic::aarch64_neon_sminv;
6690     Ty = Int32Ty;
6691     VTy = llvm::VectorType::get(Int16Ty, 8);
6692     llvm::Type *Tys[2] = { Ty, VTy };
6693     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6694     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6695     return Builder.CreateTrunc(Ops[0], Int16Ty);
6696   }
6697   case NEON::BI__builtin_neon_vminv_f16: {
6698     Int = Intrinsic::aarch64_neon_fminv;
6699     Ty = HalfTy;
6700     VTy = llvm::VectorType::get(HalfTy, 4);
6701     llvm::Type *Tys[2] = { Ty, VTy };
6702     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6703     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6704     return Builder.CreateTrunc(Ops[0], HalfTy);
6705   }
6706   case NEON::BI__builtin_neon_vminvq_f16: {
6707     Int = Intrinsic::aarch64_neon_fminv;
6708     Ty = HalfTy;
6709     VTy = llvm::VectorType::get(HalfTy, 8);
6710     llvm::Type *Tys[2] = { Ty, VTy };
6711     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6712     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6713     return Builder.CreateTrunc(Ops[0], HalfTy);
6714   }
6715   case NEON::BI__builtin_neon_vmaxnmv_f16: {
6716     Int = Intrinsic::aarch64_neon_fmaxnmv;
6717     Ty = HalfTy;
6718     VTy = llvm::VectorType::get(HalfTy, 4);
6719     llvm::Type *Tys[2] = { Ty, VTy };
6720     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6721     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
6722     return Builder.CreateTrunc(Ops[0], HalfTy);
6723   }
6724   case NEON::BI__builtin_neon_vmaxnmvq_f16: {
6725     Int = Intrinsic::aarch64_neon_fmaxnmv;
6726     Ty = HalfTy;
6727     VTy = llvm::VectorType::get(HalfTy, 8);
6728     llvm::Type *Tys[2] = { Ty, VTy };
6729     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6730     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
6731     return Builder.CreateTrunc(Ops[0], HalfTy);
6732   }
6733   case NEON::BI__builtin_neon_vminnmv_f16: {
6734     Int = Intrinsic::aarch64_neon_fminnmv;
6735     Ty = HalfTy;
6736     VTy = llvm::VectorType::get(HalfTy, 4);
6737     llvm::Type *Tys[2] = { Ty, VTy };
6738     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6739     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
6740     return Builder.CreateTrunc(Ops[0], HalfTy);
6741   }
6742   case NEON::BI__builtin_neon_vminnmvq_f16: {
6743     Int = Intrinsic::aarch64_neon_fminnmv;
6744     Ty = HalfTy;
6745     VTy = llvm::VectorType::get(HalfTy, 8);
6746     llvm::Type *Tys[2] = { Ty, VTy };
6747     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6748     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
6749     return Builder.CreateTrunc(Ops[0], HalfTy);
6750   }
6751   case NEON::BI__builtin_neon_vmul_n_f64: {
6752     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6753     Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
6754     return Builder.CreateFMul(Ops[0], RHS);
6755   }
6756   case NEON::BI__builtin_neon_vaddlv_u8: {
6757     Int = Intrinsic::aarch64_neon_uaddlv;
6758     Ty = Int32Ty;
6759     VTy = llvm::VectorType::get(Int8Ty, 8);
6760     llvm::Type *Tys[2] = { Ty, VTy };
6761     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6762     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6763     return Builder.CreateTrunc(Ops[0], Int16Ty);
6764   }
6765   case NEON::BI__builtin_neon_vaddlv_u16: {
6766     Int = Intrinsic::aarch64_neon_uaddlv;
6767     Ty = Int32Ty;
6768     VTy = llvm::VectorType::get(Int16Ty, 4);
6769     llvm::Type *Tys[2] = { Ty, VTy };
6770     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6771     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6772   }
6773   case NEON::BI__builtin_neon_vaddlvq_u8: {
6774     Int = Intrinsic::aarch64_neon_uaddlv;
6775     Ty = Int32Ty;
6776     VTy = llvm::VectorType::get(Int8Ty, 16);
6777     llvm::Type *Tys[2] = { Ty, VTy };
6778     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6779     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6780     return Builder.CreateTrunc(Ops[0], Int16Ty);
6781   }
6782   case NEON::BI__builtin_neon_vaddlvq_u16: {
6783     Int = Intrinsic::aarch64_neon_uaddlv;
6784     Ty = Int32Ty;
6785     VTy = llvm::VectorType::get(Int16Ty, 8);
6786     llvm::Type *Tys[2] = { Ty, VTy };
6787     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6788     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6789   }
6790   case NEON::BI__builtin_neon_vaddlv_s8: {
6791     Int = Intrinsic::aarch64_neon_saddlv;
6792     Ty = Int32Ty;
6793     VTy = llvm::VectorType::get(Int8Ty, 8);
6794     llvm::Type *Tys[2] = { Ty, VTy };
6795     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6796     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6797     return Builder.CreateTrunc(Ops[0], Int16Ty);
6798   }
6799   case NEON::BI__builtin_neon_vaddlv_s16: {
6800     Int = Intrinsic::aarch64_neon_saddlv;
6801     Ty = Int32Ty;
6802     VTy = llvm::VectorType::get(Int16Ty, 4);
6803     llvm::Type *Tys[2] = { Ty, VTy };
6804     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6805     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6806   }
6807   case NEON::BI__builtin_neon_vaddlvq_s8: {
6808     Int = Intrinsic::aarch64_neon_saddlv;
6809     Ty = Int32Ty;
6810     VTy = llvm::VectorType::get(Int8Ty, 16);
6811     llvm::Type *Tys[2] = { Ty, VTy };
6812     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6813     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6814     return Builder.CreateTrunc(Ops[0], Int16Ty);
6815   }
6816   case NEON::BI__builtin_neon_vaddlvq_s16: {
6817     Int = Intrinsic::aarch64_neon_saddlv;
6818     Ty = Int32Ty;
6819     VTy = llvm::VectorType::get(Int16Ty, 8);
6820     llvm::Type *Tys[2] = { Ty, VTy };
6821     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6822     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6823   }
6824   case NEON::BI__builtin_neon_vsri_n_v:
6825   case NEON::BI__builtin_neon_vsriq_n_v: {
6826     Int = Intrinsic::aarch64_neon_vsri;
6827     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6828     return EmitNeonCall(Intrin, Ops, "vsri_n");
6829   }
6830   case NEON::BI__builtin_neon_vsli_n_v:
6831   case NEON::BI__builtin_neon_vsliq_n_v: {
6832     Int = Intrinsic::aarch64_neon_vsli;
6833     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6834     return EmitNeonCall(Intrin, Ops, "vsli_n");
6835   }
6836   case NEON::BI__builtin_neon_vsra_n_v:
6837   case NEON::BI__builtin_neon_vsraq_n_v:
6838     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6839     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
6840     return Builder.CreateAdd(Ops[0], Ops[1]);
6841   case NEON::BI__builtin_neon_vrsra_n_v:
6842   case NEON::BI__builtin_neon_vrsraq_n_v: {
6843     Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
6844     SmallVector<llvm::Value*,2> TmpOps;
6845     TmpOps.push_back(Ops[1]);
6846     TmpOps.push_back(Ops[2]);
6847     Function* F = CGM.getIntrinsic(Int, Ty);
6848     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
6849     Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
6850     return Builder.CreateAdd(Ops[0], tmp);
6851   }
6852     // FIXME: Sharing loads & stores with 32-bit is complicated by the absence
6853     // of an Align parameter here.
6854   case NEON::BI__builtin_neon_vld1_x2_v:
6855   case NEON::BI__builtin_neon_vld1q_x2_v:
6856   case NEON::BI__builtin_neon_vld1_x3_v:
6857   case NEON::BI__builtin_neon_vld1q_x3_v:
6858   case NEON::BI__builtin_neon_vld1_x4_v:
6859   case NEON::BI__builtin_neon_vld1q_x4_v: {
6860     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
6861     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6862     llvm::Type *Tys[2] = { VTy, PTy };
6863     unsigned Int;
6864     switch (BuiltinID) {
6865     case NEON::BI__builtin_neon_vld1_x2_v:
6866     case NEON::BI__builtin_neon_vld1q_x2_v:
6867       Int = Intrinsic::aarch64_neon_ld1x2;
6868       break;
6869     case NEON::BI__builtin_neon_vld1_x3_v:
6870     case NEON::BI__builtin_neon_vld1q_x3_v:
6871       Int = Intrinsic::aarch64_neon_ld1x3;
6872       break;
6873     case NEON::BI__builtin_neon_vld1_x4_v:
6874     case NEON::BI__builtin_neon_vld1q_x4_v:
6875       Int = Intrinsic::aarch64_neon_ld1x4;
6876       break;
6877     }
6878     Function *F = CGM.getIntrinsic(Int, Tys);
6879     Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
6880     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6881     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6882     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6883   }
6884   case NEON::BI__builtin_neon_vst1_x2_v:
6885   case NEON::BI__builtin_neon_vst1q_x2_v:
6886   case NEON::BI__builtin_neon_vst1_x3_v:
6887   case NEON::BI__builtin_neon_vst1q_x3_v:
6888   case NEON::BI__builtin_neon_vst1_x4_v:
6889   case NEON::BI__builtin_neon_vst1q_x4_v: {
6890     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
6891     llvm::Type *Tys[2] = { VTy, PTy };
6892     unsigned Int;
6893     switch (BuiltinID) {
6894     case NEON::BI__builtin_neon_vst1_x2_v:
6895     case NEON::BI__builtin_neon_vst1q_x2_v:
6896       Int = Intrinsic::aarch64_neon_st1x2;
6897       break;
6898     case NEON::BI__builtin_neon_vst1_x3_v:
6899     case NEON::BI__builtin_neon_vst1q_x3_v:
6900       Int = Intrinsic::aarch64_neon_st1x3;
6901       break;
6902     case NEON::BI__builtin_neon_vst1_x4_v:
6903     case NEON::BI__builtin_neon_vst1q_x4_v:
6904       Int = Intrinsic::aarch64_neon_st1x4;
6905       break;
6906     }
6907     std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6908     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
6909   }
6910   case NEON::BI__builtin_neon_vld1_v:
6911   case NEON::BI__builtin_neon_vld1q_v: {
6912     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
6913     auto Alignment = CharUnits::fromQuantity(
6914         BuiltinID == NEON::BI__builtin_neon_vld1_v ? 8 : 16);
6915     return Builder.CreateAlignedLoad(VTy, Ops[0], Alignment);
6916   }
6917   case NEON::BI__builtin_neon_vst1_v:
6918   case NEON::BI__builtin_neon_vst1q_v:
6919     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
6920     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6921     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6922   case NEON::BI__builtin_neon_vld1_lane_v:
6923   case NEON::BI__builtin_neon_vld1q_lane_v: {
6924     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6925     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
6926     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6927     auto Alignment = CharUnits::fromQuantity(
6928         BuiltinID == NEON::BI__builtin_neon_vld1_lane_v ? 8 : 16);
6929     Ops[0] =
6930         Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
6931     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
6932   }
6933   case NEON::BI__builtin_neon_vld1_dup_v:
6934   case NEON::BI__builtin_neon_vld1q_dup_v: {
6935     Value *V = UndefValue::get(Ty);
6936     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
6937     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6938     auto Alignment = CharUnits::fromQuantity(
6939         BuiltinID == NEON::BI__builtin_neon_vld1_dup_v ? 8 : 16);
6940     Ops[0] =
6941         Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
6942     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
6943     Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
6944     return EmitNeonSplat(Ops[0], CI);
6945   }
6946   case NEON::BI__builtin_neon_vst1_lane_v:
6947   case NEON::BI__builtin_neon_vst1q_lane_v:
6948     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6949     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
6950     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6951     return Builder.CreateDefaultAlignedStore(Ops[1],
6952                                              Builder.CreateBitCast(Ops[0], Ty));
6953   case NEON::BI__builtin_neon_vld2_v:
6954   case NEON::BI__builtin_neon_vld2q_v: {
6955     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6956     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6957     llvm::Type *Tys[2] = { VTy, PTy };
6958     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
6959     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6960     Ops[0] = Builder.CreateBitCast(Ops[0],
6961                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6962     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6963   }
6964   case NEON::BI__builtin_neon_vld3_v:
6965   case NEON::BI__builtin_neon_vld3q_v: {
6966     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6967     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6968     llvm::Type *Tys[2] = { VTy, PTy };
6969     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
6970     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6971     Ops[0] = Builder.CreateBitCast(Ops[0],
6972                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6973     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6974   }
6975   case NEON::BI__builtin_neon_vld4_v:
6976   case NEON::BI__builtin_neon_vld4q_v: {
6977     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6978     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6979     llvm::Type *Tys[2] = { VTy, PTy };
6980     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
6981     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6982     Ops[0] = Builder.CreateBitCast(Ops[0],
6983                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6984     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6985   }
6986   case NEON::BI__builtin_neon_vld2_dup_v:
6987   case NEON::BI__builtin_neon_vld2q_dup_v: {
6988     llvm::Type *PTy =
6989       llvm::PointerType::getUnqual(VTy->getElementType());
6990     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6991     llvm::Type *Tys[2] = { VTy, PTy };
6992     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
6993     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6994     Ops[0] = Builder.CreateBitCast(Ops[0],
6995                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6996     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6997   }
6998   case NEON::BI__builtin_neon_vld3_dup_v:
6999   case NEON::BI__builtin_neon_vld3q_dup_v: {
7000     llvm::Type *PTy =
7001       llvm::PointerType::getUnqual(VTy->getElementType());
7002     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
7003     llvm::Type *Tys[2] = { VTy, PTy };
7004     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
7005     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
7006     Ops[0] = Builder.CreateBitCast(Ops[0],
7007                 llvm::PointerType::getUnqual(Ops[1]->getType()));
7008     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7009   }
7010   case NEON::BI__builtin_neon_vld4_dup_v:
7011   case NEON::BI__builtin_neon_vld4q_dup_v: {
7012     llvm::Type *PTy =
7013       llvm::PointerType::getUnqual(VTy->getElementType());
7014     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
7015     llvm::Type *Tys[2] = { VTy, PTy };
7016     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
7017     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
7018     Ops[0] = Builder.CreateBitCast(Ops[0],
7019                 llvm::PointerType::getUnqual(Ops[1]->getType()));
7020     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7021   }
7022   case NEON::BI__builtin_neon_vld2_lane_v:
7023   case NEON::BI__builtin_neon_vld2q_lane_v: {
7024     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7025     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
7026     Ops.push_back(Ops[1]);
7027     Ops.erase(Ops.begin()+1);
7028     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7029     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7030     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
7031     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
7032     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
7033     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7034     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7035   }
7036   case NEON::BI__builtin_neon_vld3_lane_v:
7037   case NEON::BI__builtin_neon_vld3q_lane_v: {
7038     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7039     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
7040     Ops.push_back(Ops[1]);
7041     Ops.erase(Ops.begin()+1);
7042     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7043     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7044     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
7045     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
7046     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
7047     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
7048     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7049     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7050   }
7051   case NEON::BI__builtin_neon_vld4_lane_v:
7052   case NEON::BI__builtin_neon_vld4q_lane_v: {
7053     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7054     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
7055     Ops.push_back(Ops[1]);
7056     Ops.erase(Ops.begin()+1);
7057     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7058     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7059     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
7060     Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
7061     Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
7062     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane");
7063     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
7064     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7065     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7066   }
7067   case NEON::BI__builtin_neon_vst2_v:
7068   case NEON::BI__builtin_neon_vst2q_v: {
7069     Ops.push_back(Ops[0]);
7070     Ops.erase(Ops.begin());
7071     llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
7072     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
7073                         Ops, "");
7074   }
7075   case NEON::BI__builtin_neon_vst2_lane_v:
7076   case NEON::BI__builtin_neon_vst2q_lane_v: {
7077     Ops.push_back(Ops[0]);
7078     Ops.erase(Ops.begin());
7079     Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
7080     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7081     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
7082                         Ops, "");
7083   }
7084   case NEON::BI__builtin_neon_vst3_v:
7085   case NEON::BI__builtin_neon_vst3q_v: {
7086     Ops.push_back(Ops[0]);
7087     Ops.erase(Ops.begin());
7088     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7089     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
7090                         Ops, "");
7091   }
7092   case NEON::BI__builtin_neon_vst3_lane_v:
7093   case NEON::BI__builtin_neon_vst3q_lane_v: {
7094     Ops.push_back(Ops[0]);
7095     Ops.erase(Ops.begin());
7096     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
7097     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7098     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
7099                         Ops, "");
7100   }
7101   case NEON::BI__builtin_neon_vst4_v:
7102   case NEON::BI__builtin_neon_vst4q_v: {
7103     Ops.push_back(Ops[0]);
7104     Ops.erase(Ops.begin());
7105     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7106     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
7107                         Ops, "");
7108   }
7109   case NEON::BI__builtin_neon_vst4_lane_v:
7110   case NEON::BI__builtin_neon_vst4q_lane_v: {
7111     Ops.push_back(Ops[0]);
7112     Ops.erase(Ops.begin());
7113     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
7114     llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
7115     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
7116                         Ops, "");
7117   }
7118   case NEON::BI__builtin_neon_vtrn_v:
7119   case NEON::BI__builtin_neon_vtrnq_v: {
7120     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
7121     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7122     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7123     Value *SV = nullptr;
7124
7125     for (unsigned vi = 0; vi != 2; ++vi) {
7126       SmallVector<uint32_t, 16> Indices;
7127       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7128         Indices.push_back(i+vi);
7129         Indices.push_back(i+e+vi);
7130       }
7131       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7132       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
7133       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7134     }
7135     return SV;
7136   }
7137   case NEON::BI__builtin_neon_vuzp_v:
7138   case NEON::BI__builtin_neon_vuzpq_v: {
7139     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
7140     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7141     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7142     Value *SV = nullptr;
7143
7144     for (unsigned vi = 0; vi != 2; ++vi) {
7145       SmallVector<uint32_t, 16> Indices;
7146       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
7147         Indices.push_back(2*i+vi);
7148
7149       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7150       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
7151       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7152     }
7153     return SV;
7154   }
7155   case NEON::BI__builtin_neon_vzip_v:
7156   case NEON::BI__builtin_neon_vzipq_v: {
7157     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
7158     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7159     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7160     Value *SV = nullptr;
7161
7162     for (unsigned vi = 0; vi != 2; ++vi) {
7163       SmallVector<uint32_t, 16> Indices;
7164       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7165         Indices.push_back((i + vi*e) >> 1);
7166         Indices.push_back(((i + vi*e) >> 1)+e);
7167       }
7168       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7169       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
7170       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7171     }
7172     return SV;
7173   }
7174   case NEON::BI__builtin_neon_vqtbl1q_v: {
7175     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
7176                         Ops, "vtbl1");
7177   }
7178   case NEON::BI__builtin_neon_vqtbl2q_v: {
7179     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
7180                         Ops, "vtbl2");
7181   }
7182   case NEON::BI__builtin_neon_vqtbl3q_v: {
7183     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
7184                         Ops, "vtbl3");
7185   }
7186   case NEON::BI__builtin_neon_vqtbl4q_v: {
7187     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
7188                         Ops, "vtbl4");
7189   }
7190   case NEON::BI__builtin_neon_vqtbx1q_v: {
7191     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
7192                         Ops, "vtbx1");
7193   }
7194   case NEON::BI__builtin_neon_vqtbx2q_v: {
7195     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
7196                         Ops, "vtbx2");
7197   }
7198   case NEON::BI__builtin_neon_vqtbx3q_v: {
7199     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
7200                         Ops, "vtbx3");
7201   }
7202   case NEON::BI__builtin_neon_vqtbx4q_v: {
7203     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
7204                         Ops, "vtbx4");
7205   }
7206   case NEON::BI__builtin_neon_vsqadd_v:
7207   case NEON::BI__builtin_neon_vsqaddq_v: {
7208     Int = Intrinsic::aarch64_neon_usqadd;
7209     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
7210   }
7211   case NEON::BI__builtin_neon_vuqadd_v:
7212   case NEON::BI__builtin_neon_vuqaddq_v: {
7213     Int = Intrinsic::aarch64_neon_suqadd;
7214     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
7215   }
7216   }
7217 }
7218
7219 llvm::Value *CodeGenFunction::
7220 BuildVector(ArrayRef<llvm::Value*> Ops) {
7221   assert((Ops.size() & (Ops.size() - 1)) == 0 &&
7222          "Not a power-of-two sized vector!");
7223   bool AllConstants = true;
7224   for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
7225     AllConstants &= isa<Constant>(Ops[i]);
7226
7227   // If this is a constant vector, create a ConstantVector.
7228   if (AllConstants) {
7229     SmallVector<llvm::Constant*, 16> CstOps;
7230     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
7231       CstOps.push_back(cast<Constant>(Ops[i]));
7232     return llvm::ConstantVector::get(CstOps);
7233   }
7234
7235   // Otherwise, insertelement the values to build the vector.
7236   Value *Result =
7237     llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
7238
7239   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
7240     Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
7241
7242   return Result;
7243 }
7244
7245 // Convert the mask from an integer type to a vector of i1.
7246 static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
7247                               unsigned NumElts) {
7248
7249   llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(),
7250                          cast<IntegerType>(Mask->getType())->getBitWidth());
7251   Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
7252
7253   // If we have less than 8 elements, then the starting mask was an i8 and
7254   // we need to extract down to the right number of elements.
7255   if (NumElts < 8) {
7256     uint32_t Indices[4];
7257     for (unsigned i = 0; i != NumElts; ++i)
7258       Indices[i] = i;
7259     MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec,
7260                                              makeArrayRef(Indices, NumElts),
7261                                              "extract");
7262   }
7263   return MaskVec;
7264 }
7265
7266 static Value *EmitX86MaskedStore(CodeGenFunction &CGF,
7267                                  SmallVectorImpl<Value *> &Ops,
7268                                  unsigned Align) {
7269   // Cast the pointer to right type.
7270   Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
7271                                llvm::PointerType::getUnqual(Ops[1]->getType()));
7272
7273   // If the mask is all ones just emit a regular store.
7274   if (const auto *C = dyn_cast<Constant>(Ops[2]))
7275     if (C->isAllOnesValue())
7276       return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align);
7277
7278   Value *MaskVec = getMaskVecValue(CGF, Ops[2],
7279                                    Ops[1]->getType()->getVectorNumElements());
7280
7281   return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec);
7282 }
7283
7284 static Value *EmitX86MaskedLoad(CodeGenFunction &CGF,
7285                                 SmallVectorImpl<Value *> &Ops, unsigned Align) {
7286   // Cast the pointer to right type.
7287   Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
7288                                llvm::PointerType::getUnqual(Ops[1]->getType()));
7289
7290   // If the mask is all ones just emit a regular store.
7291   if (const auto *C = dyn_cast<Constant>(Ops[2]))
7292     if (C->isAllOnesValue())
7293       return CGF.Builder.CreateAlignedLoad(Ops[0], Align);
7294
7295   Value *MaskVec = getMaskVecValue(CGF, Ops[2],
7296                                    Ops[1]->getType()->getVectorNumElements());
7297
7298   return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]);
7299 }
7300
7301 static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF,
7302                                         SmallVectorImpl<Value *> &Ops,
7303                                         llvm::Type *DstTy,
7304                                         unsigned SrcSizeInBits,
7305                                         unsigned Align) {
7306   // Load the subvector.
7307   Ops[0] = CGF.Builder.CreateAlignedLoad(Ops[0], Align);
7308
7309   // Create broadcast mask.
7310   unsigned NumDstElts = DstTy->getVectorNumElements();
7311   unsigned NumSrcElts = SrcSizeInBits / DstTy->getScalarSizeInBits();
7312
7313   SmallVector<uint32_t, 8> Mask;
7314   for (unsigned i = 0; i != NumDstElts; i += NumSrcElts)
7315     for (unsigned j = 0; j != NumSrcElts; ++j)
7316       Mask.push_back(j);
7317
7318   return CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], Mask, "subvecbcst");
7319 }
7320
7321 static Value *EmitX86Select(CodeGenFunction &CGF,
7322                             Value *Mask, Value *Op0, Value *Op1) {
7323
7324   // If the mask is all ones just return first argument.
7325   if (const auto *C = dyn_cast<Constant>(Mask))
7326     if (C->isAllOnesValue())
7327       return Op0;
7328
7329   Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements());
7330
7331   return CGF.Builder.CreateSelect(Mask, Op0, Op1);
7332 }
7333
7334 static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
7335                                    bool Signed, SmallVectorImpl<Value *> &Ops) {
7336   unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
7337   Value *Cmp;
7338
7339   if (CC == 3) {
7340     Cmp = Constant::getNullValue(
7341                        llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
7342   } else if (CC == 7) {
7343     Cmp = Constant::getAllOnesValue(
7344                        llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
7345   } else {
7346     ICmpInst::Predicate Pred;
7347     switch (CC) {
7348     default: llvm_unreachable("Unknown condition code");
7349     case 0: Pred = ICmpInst::ICMP_EQ;  break;
7350     case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
7351     case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
7352     case 4: Pred = ICmpInst::ICMP_NE;  break;
7353     case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
7354     case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
7355     }
7356     Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
7357   }
7358
7359   const auto *C = dyn_cast<Constant>(Ops.back());
7360   if (!C || !C->isAllOnesValue())
7361     Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), NumElts));
7362
7363   if (NumElts < 8) {
7364     uint32_t Indices[8];
7365     for (unsigned i = 0; i != NumElts; ++i)
7366       Indices[i] = i;
7367     for (unsigned i = NumElts; i != 8; ++i)
7368       Indices[i] = i % NumElts + NumElts;
7369     Cmp = CGF.Builder.CreateShuffleVector(
7370         Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
7371   }
7372   return CGF.Builder.CreateBitCast(Cmp,
7373                                    IntegerType::get(CGF.getLLVMContext(),
7374                                                     std::max(NumElts, 8U)));
7375 }
7376
7377 static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred,
7378                             ArrayRef<Value *> Ops) {
7379   Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
7380   Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
7381
7382   if (Ops.size() == 2)
7383     return Res;
7384
7385   assert(Ops.size() == 4);
7386   return EmitX86Select(CGF, Ops[3], Res, Ops[2]);
7387 }
7388
7389 static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, 
7390                               llvm::Type *DstTy) {
7391   unsigned NumberOfElements = DstTy->getVectorNumElements();
7392   Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
7393   return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
7394 }
7395
7396 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
7397                                            const CallExpr *E) {
7398   if (BuiltinID == X86::BI__builtin_ms_va_start ||
7399       BuiltinID == X86::BI__builtin_ms_va_end)
7400     return EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
7401                           BuiltinID == X86::BI__builtin_ms_va_start);
7402   if (BuiltinID == X86::BI__builtin_ms_va_copy) {
7403     // Lower this manually. We can't reliably determine whether or not any
7404     // given va_copy() is for a Win64 va_list from the calling convention
7405     // alone, because it's legal to do this from a System V ABI function.
7406     // With opaque pointer types, we won't have enough information in LLVM
7407     // IR to determine this from the argument types, either. Best to do it
7408     // now, while we have enough information.
7409     Address DestAddr = EmitMSVAListRef(E->getArg(0));
7410     Address SrcAddr = EmitMSVAListRef(E->getArg(1));
7411
7412     llvm::Type *BPP = Int8PtrPtrTy;
7413
7414     DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
7415                        DestAddr.getAlignment());
7416     SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
7417                       SrcAddr.getAlignment());
7418
7419     Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
7420     return Builder.CreateStore(ArgPtr, DestAddr);
7421   }
7422
7423   SmallVector<Value*, 4> Ops;
7424
7425   // Find out if any arguments are required to be integer constant expressions.
7426   unsigned ICEArguments = 0;
7427   ASTContext::GetBuiltinTypeError Error;
7428   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
7429   assert(Error == ASTContext::GE_None && "Should not codegen an error");
7430
7431   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
7432     // If this is a normal argument, just emit it as a scalar.
7433     if ((ICEArguments & (1 << i)) == 0) {
7434       Ops.push_back(EmitScalarExpr(E->getArg(i)));
7435       continue;
7436     }
7437
7438     // If this is required to be a constant, constant fold it so that we know
7439     // that the generated intrinsic gets a ConstantInt.
7440     llvm::APSInt Result;
7441     bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
7442     assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
7443     Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
7444   }
7445
7446   // These exist so that the builtin that takes an immediate can be bounds
7447   // checked by clang to avoid passing bad immediates to the backend. Since
7448   // AVX has a larger immediate than SSE we would need separate builtins to
7449   // do the different bounds checking. Rather than create a clang specific
7450   // SSE only builtin, this implements eight separate builtins to match gcc
7451   // implementation.
7452   auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
7453     Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
7454     llvm::Function *F = CGM.getIntrinsic(ID);
7455     return Builder.CreateCall(F, Ops);
7456   };
7457
7458   // For the vector forms of FP comparisons, translate the builtins directly to
7459   // IR.
7460   // TODO: The builtins could be removed if the SSE header files used vector
7461   // extension comparisons directly (vector ordered/unordered may need
7462   // additional support via __builtin_isnan()).
7463   auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) {
7464     Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
7465     llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
7466     llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
7467     Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
7468     return Builder.CreateBitCast(Sext, FPVecTy);
7469   };
7470
7471   switch (BuiltinID) {
7472   default: return nullptr;
7473   case X86::BI__builtin_cpu_supports: {
7474     const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
7475     StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
7476
7477     // TODO: When/if this becomes more than x86 specific then use a TargetInfo
7478     // based mapping.
7479     // Processor features and mapping to processor feature value.
7480     enum X86Features {
7481       CMOV = 0,
7482       MMX,
7483       POPCNT,
7484       SSE,
7485       SSE2,
7486       SSE3,
7487       SSSE3,
7488       SSE4_1,
7489       SSE4_2,
7490       AVX,
7491       AVX2,
7492       SSE4_A,
7493       FMA4,
7494       XOP,
7495       FMA,
7496       AVX512F,
7497       BMI,
7498       BMI2,
7499       AES,
7500       PCLMUL,
7501       AVX512VL,
7502       AVX512BW,
7503       AVX512DQ,
7504       AVX512CD,
7505       AVX512ER,
7506       AVX512PF,
7507       AVX512VBMI,
7508       AVX512IFMA,
7509       AVX512VPOPCNTDQ,
7510       MAX
7511     };
7512
7513     X86Features Feature =
7514         StringSwitch<X86Features>(FeatureStr)
7515             .Case("cmov", X86Features::CMOV)
7516             .Case("mmx", X86Features::MMX)
7517             .Case("popcnt", X86Features::POPCNT)
7518             .Case("sse", X86Features::SSE)
7519             .Case("sse2", X86Features::SSE2)
7520             .Case("sse3", X86Features::SSE3)
7521             .Case("ssse3", X86Features::SSSE3)
7522             .Case("sse4.1", X86Features::SSE4_1)
7523             .Case("sse4.2", X86Features::SSE4_2)
7524             .Case("avx", X86Features::AVX)
7525             .Case("avx2", X86Features::AVX2)
7526             .Case("sse4a", X86Features::SSE4_A)
7527             .Case("fma4", X86Features::FMA4)
7528             .Case("xop", X86Features::XOP)
7529             .Case("fma", X86Features::FMA)
7530             .Case("avx512f", X86Features::AVX512F)
7531             .Case("bmi", X86Features::BMI)
7532             .Case("bmi2", X86Features::BMI2)
7533             .Case("aes", X86Features::AES)
7534             .Case("pclmul", X86Features::PCLMUL)
7535             .Case("avx512vl", X86Features::AVX512VL)
7536             .Case("avx512bw", X86Features::AVX512BW)
7537             .Case("avx512dq", X86Features::AVX512DQ)
7538             .Case("avx512cd", X86Features::AVX512CD)
7539             .Case("avx512er", X86Features::AVX512ER)
7540             .Case("avx512pf", X86Features::AVX512PF)
7541             .Case("avx512vbmi", X86Features::AVX512VBMI)
7542             .Case("avx512ifma", X86Features::AVX512IFMA)
7543             .Case("avx512vpopcntdq", X86Features::AVX512VPOPCNTDQ)
7544             .Default(X86Features::MAX);
7545     assert(Feature != X86Features::MAX && "Invalid feature!");
7546
7547     // Matching the struct layout from the compiler-rt/libgcc structure that is
7548     // filled in:
7549     // unsigned int __cpu_vendor;
7550     // unsigned int __cpu_type;
7551     // unsigned int __cpu_subtype;
7552     // unsigned int __cpu_features[1];
7553     llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
7554                                             llvm::ArrayType::get(Int32Ty, 1));
7555
7556     // Grab the global __cpu_model.
7557     llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
7558
7559     // Grab the first (0th) element from the field __cpu_features off of the
7560     // global in the struct STy.
7561     Value *Idxs[] = {
7562       ConstantInt::get(Int32Ty, 0),
7563       ConstantInt::get(Int32Ty, 3),
7564       ConstantInt::get(Int32Ty, 0)
7565     };
7566     Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
7567     Value *Features = Builder.CreateAlignedLoad(CpuFeatures,
7568                                                 CharUnits::fromQuantity(4));
7569
7570     // Check the value of the bit corresponding to the feature requested.
7571     Value *Bitset = Builder.CreateAnd(
7572         Features, llvm::ConstantInt::get(Int32Ty, 1ULL << Feature));
7573     return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0));
7574   }
7575   case X86::BI_mm_prefetch: {
7576     Value *Address = Ops[0];
7577     Value *RW = ConstantInt::get(Int32Ty, 0);
7578     Value *Locality = Ops[1];
7579     Value *Data = ConstantInt::get(Int32Ty, 1);
7580     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
7581     return Builder.CreateCall(F, {Address, RW, Locality, Data});
7582   }
7583   case X86::BI_mm_clflush: {
7584     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
7585                               Ops[0]);
7586   }
7587   case X86::BI_mm_lfence: {
7588     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
7589   }
7590   case X86::BI_mm_mfence: {
7591     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
7592   }
7593   case X86::BI_mm_sfence: {
7594     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
7595   }
7596   case X86::BI_mm_pause: {
7597     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
7598   }
7599   case X86::BI__rdtsc: {
7600     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
7601   }
7602   case X86::BI__builtin_ia32_undef128:
7603   case X86::BI__builtin_ia32_undef256:
7604   case X86::BI__builtin_ia32_undef512:
7605     // The x86 definition of "undef" is not the same as the LLVM definition
7606     // (PR32176). We leave optimizing away an unnecessary zero constant to the
7607     // IR optimizer and backend.
7608     // TODO: If we had a "freeze" IR instruction to generate a fixed undef
7609     // value, we should use that here instead of a zero.
7610     return llvm::Constant::getNullValue(ConvertType(E->getType()));
7611   case X86::BI__builtin_ia32_vec_init_v8qi:
7612   case X86::BI__builtin_ia32_vec_init_v4hi:
7613   case X86::BI__builtin_ia32_vec_init_v2si:
7614     return Builder.CreateBitCast(BuildVector(Ops),
7615                                  llvm::Type::getX86_MMXTy(getLLVMContext()));
7616   case X86::BI__builtin_ia32_vec_ext_v2si:
7617     return Builder.CreateExtractElement(Ops[0],
7618                                   llvm::ConstantInt::get(Ops[1]->getType(), 0));
7619   case X86::BI_mm_setcsr:
7620   case X86::BI__builtin_ia32_ldmxcsr: {
7621     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
7622     Builder.CreateStore(Ops[0], Tmp);
7623     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
7624                           Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
7625   }
7626   case X86::BI_mm_getcsr:
7627   case X86::BI__builtin_ia32_stmxcsr: {
7628     Address Tmp = CreateMemTemp(E->getType());
7629     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
7630                        Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
7631     return Builder.CreateLoad(Tmp, "stmxcsr");
7632   }
7633   case X86::BI__builtin_ia32_xsave:
7634   case X86::BI__builtin_ia32_xsave64:
7635   case X86::BI__builtin_ia32_xrstor:
7636   case X86::BI__builtin_ia32_xrstor64:
7637   case X86::BI__builtin_ia32_xsaveopt:
7638   case X86::BI__builtin_ia32_xsaveopt64:
7639   case X86::BI__builtin_ia32_xrstors:
7640   case X86::BI__builtin_ia32_xrstors64:
7641   case X86::BI__builtin_ia32_xsavec:
7642   case X86::BI__builtin_ia32_xsavec64:
7643   case X86::BI__builtin_ia32_xsaves:
7644   case X86::BI__builtin_ia32_xsaves64: {
7645     Intrinsic::ID ID;
7646 #define INTRINSIC_X86_XSAVE_ID(NAME) \
7647     case X86::BI__builtin_ia32_##NAME: \
7648       ID = Intrinsic::x86_##NAME; \
7649       break
7650     switch (BuiltinID) {
7651     default: llvm_unreachable("Unsupported intrinsic!");
7652     INTRINSIC_X86_XSAVE_ID(xsave);
7653     INTRINSIC_X86_XSAVE_ID(xsave64);
7654     INTRINSIC_X86_XSAVE_ID(xrstor);
7655     INTRINSIC_X86_XSAVE_ID(xrstor64);
7656     INTRINSIC_X86_XSAVE_ID(xsaveopt);
7657     INTRINSIC_X86_XSAVE_ID(xsaveopt64);
7658     INTRINSIC_X86_XSAVE_ID(xrstors);
7659     INTRINSIC_X86_XSAVE_ID(xrstors64);
7660     INTRINSIC_X86_XSAVE_ID(xsavec);
7661     INTRINSIC_X86_XSAVE_ID(xsavec64);
7662     INTRINSIC_X86_XSAVE_ID(xsaves);
7663     INTRINSIC_X86_XSAVE_ID(xsaves64);
7664     }
7665 #undef INTRINSIC_X86_XSAVE_ID
7666     Value *Mhi = Builder.CreateTrunc(
7667       Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
7668     Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
7669     Ops[1] = Mhi;
7670     Ops.push_back(Mlo);
7671     return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
7672   }
7673   case X86::BI__builtin_ia32_storedqudi128_mask:
7674   case X86::BI__builtin_ia32_storedqusi128_mask:
7675   case X86::BI__builtin_ia32_storedquhi128_mask:
7676   case X86::BI__builtin_ia32_storedquqi128_mask:
7677   case X86::BI__builtin_ia32_storeupd128_mask:
7678   case X86::BI__builtin_ia32_storeups128_mask:
7679   case X86::BI__builtin_ia32_storedqudi256_mask:
7680   case X86::BI__builtin_ia32_storedqusi256_mask:
7681   case X86::BI__builtin_ia32_storedquhi256_mask:
7682   case X86::BI__builtin_ia32_storedquqi256_mask:
7683   case X86::BI__builtin_ia32_storeupd256_mask:
7684   case X86::BI__builtin_ia32_storeups256_mask:
7685   case X86::BI__builtin_ia32_storedqudi512_mask:
7686   case X86::BI__builtin_ia32_storedqusi512_mask:
7687   case X86::BI__builtin_ia32_storedquhi512_mask:
7688   case X86::BI__builtin_ia32_storedquqi512_mask:
7689   case X86::BI__builtin_ia32_storeupd512_mask:
7690   case X86::BI__builtin_ia32_storeups512_mask:
7691     return EmitX86MaskedStore(*this, Ops, 1);
7692
7693   case X86::BI__builtin_ia32_storess128_mask:
7694   case X86::BI__builtin_ia32_storesd128_mask: {
7695     return EmitX86MaskedStore(*this, Ops, 16);
7696   }
7697   case X86::BI__builtin_ia32_vpopcntd_512:
7698   case X86::BI__builtin_ia32_vpopcntq_512: {
7699     llvm::Type *ResultType = ConvertType(E->getType());
7700     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
7701     return Builder.CreateCall(F, Ops);
7702   }
7703   case X86::BI__builtin_ia32_cvtmask2b128:
7704   case X86::BI__builtin_ia32_cvtmask2b256:
7705   case X86::BI__builtin_ia32_cvtmask2b512:
7706   case X86::BI__builtin_ia32_cvtmask2w128:
7707   case X86::BI__builtin_ia32_cvtmask2w256:
7708   case X86::BI__builtin_ia32_cvtmask2w512:
7709   case X86::BI__builtin_ia32_cvtmask2d128:
7710   case X86::BI__builtin_ia32_cvtmask2d256:
7711   case X86::BI__builtin_ia32_cvtmask2d512:
7712   case X86::BI__builtin_ia32_cvtmask2q128:
7713   case X86::BI__builtin_ia32_cvtmask2q256:
7714   case X86::BI__builtin_ia32_cvtmask2q512:
7715     return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
7716
7717   case X86::BI__builtin_ia32_movdqa32store128_mask:
7718   case X86::BI__builtin_ia32_movdqa64store128_mask:
7719   case X86::BI__builtin_ia32_storeaps128_mask:
7720   case X86::BI__builtin_ia32_storeapd128_mask:
7721   case X86::BI__builtin_ia32_movdqa32store256_mask:
7722   case X86::BI__builtin_ia32_movdqa64store256_mask:
7723   case X86::BI__builtin_ia32_storeaps256_mask:
7724   case X86::BI__builtin_ia32_storeapd256_mask:
7725   case X86::BI__builtin_ia32_movdqa32store512_mask:
7726   case X86::BI__builtin_ia32_movdqa64store512_mask:
7727   case X86::BI__builtin_ia32_storeaps512_mask:
7728   case X86::BI__builtin_ia32_storeapd512_mask: {
7729     unsigned Align =
7730       getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
7731     return EmitX86MaskedStore(*this, Ops, Align);
7732   }
7733   case X86::BI__builtin_ia32_loadups128_mask:
7734   case X86::BI__builtin_ia32_loadups256_mask:
7735   case X86::BI__builtin_ia32_loadups512_mask:
7736   case X86::BI__builtin_ia32_loadupd128_mask:
7737   case X86::BI__builtin_ia32_loadupd256_mask:
7738   case X86::BI__builtin_ia32_loadupd512_mask:
7739   case X86::BI__builtin_ia32_loaddquqi128_mask:
7740   case X86::BI__builtin_ia32_loaddquqi256_mask:
7741   case X86::BI__builtin_ia32_loaddquqi512_mask:
7742   case X86::BI__builtin_ia32_loaddquhi128_mask:
7743   case X86::BI__builtin_ia32_loaddquhi256_mask:
7744   case X86::BI__builtin_ia32_loaddquhi512_mask:
7745   case X86::BI__builtin_ia32_loaddqusi128_mask:
7746   case X86::BI__builtin_ia32_loaddqusi256_mask:
7747   case X86::BI__builtin_ia32_loaddqusi512_mask:
7748   case X86::BI__builtin_ia32_loaddqudi128_mask:
7749   case X86::BI__builtin_ia32_loaddqudi256_mask:
7750   case X86::BI__builtin_ia32_loaddqudi512_mask:
7751     return EmitX86MaskedLoad(*this, Ops, 1);
7752
7753   case X86::BI__builtin_ia32_loadss128_mask:
7754   case X86::BI__builtin_ia32_loadsd128_mask:
7755     return EmitX86MaskedLoad(*this, Ops, 16);
7756
7757   case X86::BI__builtin_ia32_loadaps128_mask:
7758   case X86::BI__builtin_ia32_loadaps256_mask:
7759   case X86::BI__builtin_ia32_loadaps512_mask:
7760   case X86::BI__builtin_ia32_loadapd128_mask:
7761   case X86::BI__builtin_ia32_loadapd256_mask:
7762   case X86::BI__builtin_ia32_loadapd512_mask:
7763   case X86::BI__builtin_ia32_movdqa32load128_mask:
7764   case X86::BI__builtin_ia32_movdqa32load256_mask:
7765   case X86::BI__builtin_ia32_movdqa32load512_mask:
7766   case X86::BI__builtin_ia32_movdqa64load128_mask:
7767   case X86::BI__builtin_ia32_movdqa64load256_mask:
7768   case X86::BI__builtin_ia32_movdqa64load512_mask: {
7769     unsigned Align =
7770       getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
7771     return EmitX86MaskedLoad(*this, Ops, Align);
7772   }
7773
7774   case X86::BI__builtin_ia32_vbroadcastf128_pd256:
7775   case X86::BI__builtin_ia32_vbroadcastf128_ps256: {
7776     llvm::Type *DstTy = ConvertType(E->getType());
7777     return EmitX86SubVectorBroadcast(*this, Ops, DstTy, 128, 1);
7778   }
7779
7780   case X86::BI__builtin_ia32_storehps:
7781   case X86::BI__builtin_ia32_storelps: {
7782     llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
7783     llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
7784
7785     // cast val v2i64
7786     Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
7787
7788     // extract (0, 1)
7789     unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
7790     llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index);
7791     Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
7792
7793     // cast pointer to i64 & store
7794     Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
7795     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7796   }
7797   case X86::BI__builtin_ia32_palignr128:
7798   case X86::BI__builtin_ia32_palignr256:
7799   case X86::BI__builtin_ia32_palignr512_mask: {
7800     unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
7801
7802     unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
7803     assert(NumElts % 16 == 0);
7804
7805     // If palignr is shifting the pair of vectors more than the size of two
7806     // lanes, emit zero.
7807     if (ShiftVal >= 32)
7808       return llvm::Constant::getNullValue(ConvertType(E->getType()));
7809
7810     // If palignr is shifting the pair of input vectors more than one lane,
7811     // but less than two lanes, convert to shifting in zeroes.
7812     if (ShiftVal > 16) {
7813       ShiftVal -= 16;
7814       Ops[1] = Ops[0];
7815       Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
7816     }
7817
7818     uint32_t Indices[64];
7819     // 256-bit palignr operates on 128-bit lanes so we need to handle that
7820     for (unsigned l = 0; l != NumElts; l += 16) {
7821       for (unsigned i = 0; i != 16; ++i) {
7822         unsigned Idx = ShiftVal + i;
7823         if (Idx >= 16)
7824           Idx += NumElts - 16; // End of lane, switch operand.
7825         Indices[l + i] = Idx + l;
7826       }
7827     }
7828
7829     Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0],
7830                                                makeArrayRef(Indices, NumElts),
7831                                                "palignr");
7832
7833     // If this isn't a masked builtin, just return the align operation.
7834     if (Ops.size() == 3)
7835       return Align;
7836
7837     return EmitX86Select(*this, Ops[4], Align, Ops[3]);
7838   }
7839
7840   case X86::BI__builtin_ia32_movnti:
7841   case X86::BI__builtin_ia32_movnti64:
7842   case X86::BI__builtin_ia32_movntsd:
7843   case X86::BI__builtin_ia32_movntss: {
7844     llvm::MDNode *Node = llvm::MDNode::get(
7845         getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
7846
7847     Value *Ptr = Ops[0];
7848     Value *Src = Ops[1];
7849
7850     // Extract the 0'th element of the source vector.
7851     if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
7852         BuiltinID == X86::BI__builtin_ia32_movntss)
7853       Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
7854
7855     // Convert the type of the pointer to a pointer to the stored type.
7856     Value *BC = Builder.CreateBitCast(
7857         Ptr, llvm::PointerType::getUnqual(Src->getType()), "cast");
7858
7859     // Unaligned nontemporal store of the scalar value.
7860     StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC);
7861     SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
7862     SI->setAlignment(1);
7863     return SI;
7864   }
7865
7866   case X86::BI__builtin_ia32_selectb_128:
7867   case X86::BI__builtin_ia32_selectb_256:
7868   case X86::BI__builtin_ia32_selectb_512:
7869   case X86::BI__builtin_ia32_selectw_128:
7870   case X86::BI__builtin_ia32_selectw_256:
7871   case X86::BI__builtin_ia32_selectw_512:
7872   case X86::BI__builtin_ia32_selectd_128:
7873   case X86::BI__builtin_ia32_selectd_256:
7874   case X86::BI__builtin_ia32_selectd_512:
7875   case X86::BI__builtin_ia32_selectq_128:
7876   case X86::BI__builtin_ia32_selectq_256:
7877   case X86::BI__builtin_ia32_selectq_512:
7878   case X86::BI__builtin_ia32_selectps_128:
7879   case X86::BI__builtin_ia32_selectps_256:
7880   case X86::BI__builtin_ia32_selectps_512:
7881   case X86::BI__builtin_ia32_selectpd_128:
7882   case X86::BI__builtin_ia32_selectpd_256:
7883   case X86::BI__builtin_ia32_selectpd_512:
7884     return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
7885   case X86::BI__builtin_ia32_pcmpeqb128_mask:
7886   case X86::BI__builtin_ia32_pcmpeqb256_mask:
7887   case X86::BI__builtin_ia32_pcmpeqb512_mask:
7888   case X86::BI__builtin_ia32_pcmpeqw128_mask:
7889   case X86::BI__builtin_ia32_pcmpeqw256_mask:
7890   case X86::BI__builtin_ia32_pcmpeqw512_mask:
7891   case X86::BI__builtin_ia32_pcmpeqd128_mask:
7892   case X86::BI__builtin_ia32_pcmpeqd256_mask:
7893   case X86::BI__builtin_ia32_pcmpeqd512_mask:
7894   case X86::BI__builtin_ia32_pcmpeqq128_mask:
7895   case X86::BI__builtin_ia32_pcmpeqq256_mask:
7896   case X86::BI__builtin_ia32_pcmpeqq512_mask:
7897     return EmitX86MaskedCompare(*this, 0, false, Ops);
7898   case X86::BI__builtin_ia32_pcmpgtb128_mask:
7899   case X86::BI__builtin_ia32_pcmpgtb256_mask:
7900   case X86::BI__builtin_ia32_pcmpgtb512_mask:
7901   case X86::BI__builtin_ia32_pcmpgtw128_mask:
7902   case X86::BI__builtin_ia32_pcmpgtw256_mask:
7903   case X86::BI__builtin_ia32_pcmpgtw512_mask:
7904   case X86::BI__builtin_ia32_pcmpgtd128_mask:
7905   case X86::BI__builtin_ia32_pcmpgtd256_mask:
7906   case X86::BI__builtin_ia32_pcmpgtd512_mask:
7907   case X86::BI__builtin_ia32_pcmpgtq128_mask:
7908   case X86::BI__builtin_ia32_pcmpgtq256_mask:
7909   case X86::BI__builtin_ia32_pcmpgtq512_mask:
7910     return EmitX86MaskedCompare(*this, 6, true, Ops);
7911   case X86::BI__builtin_ia32_cmpb128_mask:
7912   case X86::BI__builtin_ia32_cmpb256_mask:
7913   case X86::BI__builtin_ia32_cmpb512_mask:
7914   case X86::BI__builtin_ia32_cmpw128_mask:
7915   case X86::BI__builtin_ia32_cmpw256_mask:
7916   case X86::BI__builtin_ia32_cmpw512_mask:
7917   case X86::BI__builtin_ia32_cmpd128_mask:
7918   case X86::BI__builtin_ia32_cmpd256_mask:
7919   case X86::BI__builtin_ia32_cmpd512_mask:
7920   case X86::BI__builtin_ia32_cmpq128_mask:
7921   case X86::BI__builtin_ia32_cmpq256_mask:
7922   case X86::BI__builtin_ia32_cmpq512_mask: {
7923     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
7924     return EmitX86MaskedCompare(*this, CC, true, Ops);
7925   }
7926   case X86::BI__builtin_ia32_ucmpb128_mask:
7927   case X86::BI__builtin_ia32_ucmpb256_mask:
7928   case X86::BI__builtin_ia32_ucmpb512_mask:
7929   case X86::BI__builtin_ia32_ucmpw128_mask:
7930   case X86::BI__builtin_ia32_ucmpw256_mask:
7931   case X86::BI__builtin_ia32_ucmpw512_mask:
7932   case X86::BI__builtin_ia32_ucmpd128_mask:
7933   case X86::BI__builtin_ia32_ucmpd256_mask:
7934   case X86::BI__builtin_ia32_ucmpd512_mask:
7935   case X86::BI__builtin_ia32_ucmpq128_mask:
7936   case X86::BI__builtin_ia32_ucmpq256_mask:
7937   case X86::BI__builtin_ia32_ucmpq512_mask: {
7938     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
7939     return EmitX86MaskedCompare(*this, CC, false, Ops);
7940   }
7941
7942   case X86::BI__builtin_ia32_vplzcntd_128_mask:
7943   case X86::BI__builtin_ia32_vplzcntd_256_mask:
7944   case X86::BI__builtin_ia32_vplzcntd_512_mask:
7945   case X86::BI__builtin_ia32_vplzcntq_128_mask:
7946   case X86::BI__builtin_ia32_vplzcntq_256_mask:
7947   case X86::BI__builtin_ia32_vplzcntq_512_mask: {
7948     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
7949     return EmitX86Select(*this, Ops[2],
7950                          Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}),
7951                          Ops[1]);
7952   }
7953
7954   case X86::BI__builtin_ia32_pmaxsb128:
7955   case X86::BI__builtin_ia32_pmaxsw128:
7956   case X86::BI__builtin_ia32_pmaxsd128:
7957   case X86::BI__builtin_ia32_pmaxsq128_mask:
7958   case X86::BI__builtin_ia32_pmaxsb256:
7959   case X86::BI__builtin_ia32_pmaxsw256:
7960   case X86::BI__builtin_ia32_pmaxsd256:
7961   case X86::BI__builtin_ia32_pmaxsq256_mask:
7962   case X86::BI__builtin_ia32_pmaxsb512_mask:
7963   case X86::BI__builtin_ia32_pmaxsw512_mask:
7964   case X86::BI__builtin_ia32_pmaxsd512_mask:
7965   case X86::BI__builtin_ia32_pmaxsq512_mask:
7966     return EmitX86MinMax(*this, ICmpInst::ICMP_SGT, Ops);
7967   case X86::BI__builtin_ia32_pmaxub128:
7968   case X86::BI__builtin_ia32_pmaxuw128:
7969   case X86::BI__builtin_ia32_pmaxud128:
7970   case X86::BI__builtin_ia32_pmaxuq128_mask:
7971   case X86::BI__builtin_ia32_pmaxub256:
7972   case X86::BI__builtin_ia32_pmaxuw256:
7973   case X86::BI__builtin_ia32_pmaxud256:
7974   case X86::BI__builtin_ia32_pmaxuq256_mask:
7975   case X86::BI__builtin_ia32_pmaxub512_mask:
7976   case X86::BI__builtin_ia32_pmaxuw512_mask:
7977   case X86::BI__builtin_ia32_pmaxud512_mask:
7978   case X86::BI__builtin_ia32_pmaxuq512_mask:
7979     return EmitX86MinMax(*this, ICmpInst::ICMP_UGT, Ops);
7980   case X86::BI__builtin_ia32_pminsb128:
7981   case X86::BI__builtin_ia32_pminsw128:
7982   case X86::BI__builtin_ia32_pminsd128:
7983   case X86::BI__builtin_ia32_pminsq128_mask:
7984   case X86::BI__builtin_ia32_pminsb256:
7985   case X86::BI__builtin_ia32_pminsw256:
7986   case X86::BI__builtin_ia32_pminsd256:
7987   case X86::BI__builtin_ia32_pminsq256_mask:
7988   case X86::BI__builtin_ia32_pminsb512_mask:
7989   case X86::BI__builtin_ia32_pminsw512_mask:
7990   case X86::BI__builtin_ia32_pminsd512_mask:
7991   case X86::BI__builtin_ia32_pminsq512_mask:
7992     return EmitX86MinMax(*this, ICmpInst::ICMP_SLT, Ops);
7993   case X86::BI__builtin_ia32_pminub128:
7994   case X86::BI__builtin_ia32_pminuw128:
7995   case X86::BI__builtin_ia32_pminud128:
7996   case X86::BI__builtin_ia32_pminuq128_mask:
7997   case X86::BI__builtin_ia32_pminub256:
7998   case X86::BI__builtin_ia32_pminuw256:
7999   case X86::BI__builtin_ia32_pminud256:
8000   case X86::BI__builtin_ia32_pminuq256_mask:
8001   case X86::BI__builtin_ia32_pminub512_mask:
8002   case X86::BI__builtin_ia32_pminuw512_mask:
8003   case X86::BI__builtin_ia32_pminud512_mask:
8004   case X86::BI__builtin_ia32_pminuq512_mask:
8005     return EmitX86MinMax(*this, ICmpInst::ICMP_ULT, Ops);
8006
8007   // 3DNow!
8008   case X86::BI__builtin_ia32_pswapdsf:
8009   case X86::BI__builtin_ia32_pswapdsi: {
8010     llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
8011     Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
8012     llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
8013     return Builder.CreateCall(F, Ops, "pswapd");
8014   }
8015   case X86::BI__builtin_ia32_rdrand16_step:
8016   case X86::BI__builtin_ia32_rdrand32_step:
8017   case X86::BI__builtin_ia32_rdrand64_step:
8018   case X86::BI__builtin_ia32_rdseed16_step:
8019   case X86::BI__builtin_ia32_rdseed32_step:
8020   case X86::BI__builtin_ia32_rdseed64_step: {
8021     Intrinsic::ID ID;
8022     switch (BuiltinID) {
8023     default: llvm_unreachable("Unsupported intrinsic!");
8024     case X86::BI__builtin_ia32_rdrand16_step:
8025       ID = Intrinsic::x86_rdrand_16;
8026       break;
8027     case X86::BI__builtin_ia32_rdrand32_step:
8028       ID = Intrinsic::x86_rdrand_32;
8029       break;
8030     case X86::BI__builtin_ia32_rdrand64_step:
8031       ID = Intrinsic::x86_rdrand_64;
8032       break;
8033     case X86::BI__builtin_ia32_rdseed16_step:
8034       ID = Intrinsic::x86_rdseed_16;
8035       break;
8036     case X86::BI__builtin_ia32_rdseed32_step:
8037       ID = Intrinsic::x86_rdseed_32;
8038       break;
8039     case X86::BI__builtin_ia32_rdseed64_step:
8040       ID = Intrinsic::x86_rdseed_64;
8041       break;
8042     }
8043
8044     Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
8045     Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
8046                                       Ops[0]);
8047     return Builder.CreateExtractValue(Call, 1);
8048   }
8049
8050   // SSE packed comparison intrinsics
8051   case X86::BI__builtin_ia32_cmpeqps:
8052   case X86::BI__builtin_ia32_cmpeqpd:
8053     return getVectorFCmpIR(CmpInst::FCMP_OEQ);
8054   case X86::BI__builtin_ia32_cmpltps:
8055   case X86::BI__builtin_ia32_cmpltpd:
8056     return getVectorFCmpIR(CmpInst::FCMP_OLT);
8057   case X86::BI__builtin_ia32_cmpleps:
8058   case X86::BI__builtin_ia32_cmplepd:
8059     return getVectorFCmpIR(CmpInst::FCMP_OLE);
8060   case X86::BI__builtin_ia32_cmpunordps:
8061   case X86::BI__builtin_ia32_cmpunordpd:
8062     return getVectorFCmpIR(CmpInst::FCMP_UNO);
8063   case X86::BI__builtin_ia32_cmpneqps:
8064   case X86::BI__builtin_ia32_cmpneqpd:
8065     return getVectorFCmpIR(CmpInst::FCMP_UNE);
8066   case X86::BI__builtin_ia32_cmpnltps:
8067   case X86::BI__builtin_ia32_cmpnltpd:
8068     return getVectorFCmpIR(CmpInst::FCMP_UGE);
8069   case X86::BI__builtin_ia32_cmpnleps:
8070   case X86::BI__builtin_ia32_cmpnlepd:
8071     return getVectorFCmpIR(CmpInst::FCMP_UGT);
8072   case X86::BI__builtin_ia32_cmpordps:
8073   case X86::BI__builtin_ia32_cmpordpd:
8074     return getVectorFCmpIR(CmpInst::FCMP_ORD);
8075   case X86::BI__builtin_ia32_cmpps:
8076   case X86::BI__builtin_ia32_cmpps256:
8077   case X86::BI__builtin_ia32_cmppd:
8078   case X86::BI__builtin_ia32_cmppd256: {
8079     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
8080     // If this one of the SSE immediates, we can use native IR.
8081     if (CC < 8) {
8082       FCmpInst::Predicate Pred;
8083       switch (CC) {
8084       case 0: Pred = FCmpInst::FCMP_OEQ; break;
8085       case 1: Pred = FCmpInst::FCMP_OLT; break;
8086       case 2: Pred = FCmpInst::FCMP_OLE; break;
8087       case 3: Pred = FCmpInst::FCMP_UNO; break;
8088       case 4: Pred = FCmpInst::FCMP_UNE; break;
8089       case 5: Pred = FCmpInst::FCMP_UGE; break;
8090       case 6: Pred = FCmpInst::FCMP_UGT; break;
8091       case 7: Pred = FCmpInst::FCMP_ORD; break;
8092       }
8093       return getVectorFCmpIR(Pred);
8094     }
8095
8096     // We can't handle 8-31 immediates with native IR, use the intrinsic.
8097     // Except for predicates that create constants.
8098     Intrinsic::ID ID;
8099     switch (BuiltinID) {
8100     default: llvm_unreachable("Unsupported intrinsic!");
8101     case X86::BI__builtin_ia32_cmpps:
8102       ID = Intrinsic::x86_sse_cmp_ps;
8103       break;
8104     case X86::BI__builtin_ia32_cmpps256:
8105       // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector
8106       // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0...
8107       if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) {
8108          Value *Constant = (CC == 0xf || CC == 0x1f) ?
8109                 llvm::Constant::getAllOnesValue(Builder.getInt32Ty()) :
8110                 llvm::Constant::getNullValue(Builder.getInt32Ty());
8111          Value *Vec = Builder.CreateVectorSplat(
8112                         Ops[0]->getType()->getVectorNumElements(), Constant);
8113          return Builder.CreateBitCast(Vec, Ops[0]->getType());
8114       }
8115       ID = Intrinsic::x86_avx_cmp_ps_256;
8116       break;
8117     case X86::BI__builtin_ia32_cmppd:
8118       ID = Intrinsic::x86_sse2_cmp_pd;
8119       break;
8120     case X86::BI__builtin_ia32_cmppd256:
8121       // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector
8122       // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0...
8123       if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) {
8124          Value *Constant = (CC == 0xf || CC == 0x1f) ?
8125                 llvm::Constant::getAllOnesValue(Builder.getInt64Ty()) :
8126                 llvm::Constant::getNullValue(Builder.getInt64Ty());
8127          Value *Vec = Builder.CreateVectorSplat(
8128                         Ops[0]->getType()->getVectorNumElements(), Constant);
8129          return Builder.CreateBitCast(Vec, Ops[0]->getType());
8130       }
8131       ID = Intrinsic::x86_avx_cmp_pd_256;
8132       break;
8133     }
8134
8135     return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
8136   }
8137
8138   // SSE scalar comparison intrinsics
8139   case X86::BI__builtin_ia32_cmpeqss:
8140     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
8141   case X86::BI__builtin_ia32_cmpltss:
8142     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
8143   case X86::BI__builtin_ia32_cmpless:
8144     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
8145   case X86::BI__builtin_ia32_cmpunordss:
8146     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
8147   case X86::BI__builtin_ia32_cmpneqss:
8148     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
8149   case X86::BI__builtin_ia32_cmpnltss:
8150     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
8151   case X86::BI__builtin_ia32_cmpnless:
8152     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
8153   case X86::BI__builtin_ia32_cmpordss:
8154     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
8155   case X86::BI__builtin_ia32_cmpeqsd:
8156     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
8157   case X86::BI__builtin_ia32_cmpltsd:
8158     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
8159   case X86::BI__builtin_ia32_cmplesd:
8160     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
8161   case X86::BI__builtin_ia32_cmpunordsd:
8162     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
8163   case X86::BI__builtin_ia32_cmpneqsd:
8164     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
8165   case X86::BI__builtin_ia32_cmpnltsd:
8166     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
8167   case X86::BI__builtin_ia32_cmpnlesd:
8168     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
8169   case X86::BI__builtin_ia32_cmpordsd:
8170     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
8171
8172   case X86::BI__emul:
8173   case X86::BI__emulu: {
8174     llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
8175     bool isSigned = (BuiltinID == X86::BI__emul);
8176     Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
8177     Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
8178     return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
8179   }
8180   case X86::BI__mulh:
8181   case X86::BI__umulh:
8182   case X86::BI_mul128:
8183   case X86::BI_umul128: {
8184     llvm::Type *ResType = ConvertType(E->getType());
8185     llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
8186
8187     bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
8188     Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
8189     Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
8190
8191     Value *MulResult, *HigherBits;
8192     if (IsSigned) {
8193       MulResult = Builder.CreateNSWMul(LHS, RHS);
8194       HigherBits = Builder.CreateAShr(MulResult, 64);
8195     } else {
8196       MulResult = Builder.CreateNUWMul(LHS, RHS);
8197       HigherBits = Builder.CreateLShr(MulResult, 64);
8198     }
8199     HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
8200
8201     if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
8202       return HigherBits;
8203
8204     Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
8205     Builder.CreateStore(HigherBits, HighBitsAddress);
8206     return Builder.CreateIntCast(MulResult, ResType, IsSigned);
8207   }
8208
8209   case X86::BI__faststorefence: {
8210     return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
8211                                llvm::CrossThread);
8212   }
8213   case X86::BI_ReadWriteBarrier:
8214   case X86::BI_ReadBarrier:
8215   case X86::BI_WriteBarrier: {
8216     return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
8217                                llvm::SingleThread);
8218   }
8219   case X86::BI_BitScanForward:
8220   case X86::BI_BitScanForward64:
8221     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
8222   case X86::BI_BitScanReverse:
8223   case X86::BI_BitScanReverse64:
8224     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
8225
8226   case X86::BI_InterlockedAnd64:
8227     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
8228   case X86::BI_InterlockedExchange64:
8229     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
8230   case X86::BI_InterlockedExchangeAdd64:
8231     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
8232   case X86::BI_InterlockedExchangeSub64:
8233     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
8234   case X86::BI_InterlockedOr64:
8235     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
8236   case X86::BI_InterlockedXor64:
8237     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
8238   case X86::BI_InterlockedDecrement64:
8239     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
8240   case X86::BI_InterlockedIncrement64:
8241     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
8242
8243   case X86::BI_AddressOfReturnAddress: {
8244     Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress);
8245     return Builder.CreateCall(F);
8246   }
8247   case X86::BI__stosb: {
8248     // We treat __stosb as a volatile memset - it may not generate "rep stosb"
8249     // instruction, but it will create a memset that won't be optimized away.
8250     return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true);
8251   }
8252   case X86::BI__ud2:
8253     // llvm.trap makes a ud2a instruction on x86.
8254     return EmitTrapCall(Intrinsic::trap);
8255   case X86::BI__int2c: {
8256     // This syscall signals a driver assertion failure in x86 NT kernels.
8257     llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
8258     llvm::InlineAsm *IA =
8259         llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*SideEffects=*/true);
8260     llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
8261         getLLVMContext(), llvm::AttributeList::FunctionIndex,
8262         llvm::Attribute::NoReturn);
8263     CallSite CS = Builder.CreateCall(IA);
8264     CS.setAttributes(NoReturnAttr);
8265     return CS.getInstruction();
8266   }
8267   case X86::BI__readfsbyte:
8268   case X86::BI__readfsword:
8269   case X86::BI__readfsdword:
8270   case X86::BI__readfsqword: {
8271     llvm::Type *IntTy = ConvertType(E->getType());
8272     Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
8273                                         llvm::PointerType::get(IntTy, 257));
8274     LoadInst *Load = Builder.CreateAlignedLoad(
8275         IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
8276     Load->setVolatile(true);
8277     return Load;
8278   }
8279   case X86::BI__readgsbyte:
8280   case X86::BI__readgsword:
8281   case X86::BI__readgsdword:
8282   case X86::BI__readgsqword: {
8283     llvm::Type *IntTy = ConvertType(E->getType());
8284     Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
8285                                         llvm::PointerType::get(IntTy, 256));
8286     LoadInst *Load = Builder.CreateAlignedLoad(
8287         IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
8288     Load->setVolatile(true);
8289     return Load;
8290   }
8291   }
8292 }
8293
8294
8295 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
8296                                            const CallExpr *E) {
8297   SmallVector<Value*, 4> Ops;
8298
8299   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
8300     Ops.push_back(EmitScalarExpr(E->getArg(i)));
8301
8302   Intrinsic::ID ID = Intrinsic::not_intrinsic;
8303
8304   switch (BuiltinID) {
8305   default: return nullptr;
8306
8307   // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
8308   // call __builtin_readcyclecounter.
8309   case PPC::BI__builtin_ppc_get_timebase:
8310     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
8311
8312   // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
8313   case PPC::BI__builtin_altivec_lvx:
8314   case PPC::BI__builtin_altivec_lvxl:
8315   case PPC::BI__builtin_altivec_lvebx:
8316   case PPC::BI__builtin_altivec_lvehx:
8317   case PPC::BI__builtin_altivec_lvewx:
8318   case PPC::BI__builtin_altivec_lvsl:
8319   case PPC::BI__builtin_altivec_lvsr:
8320   case PPC::BI__builtin_vsx_lxvd2x:
8321   case PPC::BI__builtin_vsx_lxvw4x:
8322   case PPC::BI__builtin_vsx_lxvd2x_be:
8323   case PPC::BI__builtin_vsx_lxvw4x_be:
8324   case PPC::BI__builtin_vsx_lxvl:
8325   case PPC::BI__builtin_vsx_lxvll:
8326   {
8327     if(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
8328        BuiltinID == PPC::BI__builtin_vsx_lxvll){
8329       Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy);
8330     }else {
8331       Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
8332       Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
8333       Ops.pop_back();
8334     }
8335
8336     switch (BuiltinID) {
8337     default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
8338     case PPC::BI__builtin_altivec_lvx:
8339       ID = Intrinsic::ppc_altivec_lvx;
8340       break;
8341     case PPC::BI__builtin_altivec_lvxl:
8342       ID = Intrinsic::ppc_altivec_lvxl;
8343       break;
8344     case PPC::BI__builtin_altivec_lvebx:
8345       ID = Intrinsic::ppc_altivec_lvebx;
8346       break;
8347     case PPC::BI__builtin_altivec_lvehx:
8348       ID = Intrinsic::ppc_altivec_lvehx;
8349       break;
8350     case PPC::BI__builtin_altivec_lvewx:
8351       ID = Intrinsic::ppc_altivec_lvewx;
8352       break;
8353     case PPC::BI__builtin_altivec_lvsl:
8354       ID = Intrinsic::ppc_altivec_lvsl;
8355       break;
8356     case PPC::BI__builtin_altivec_lvsr:
8357       ID = Intrinsic::ppc_altivec_lvsr;
8358       break;
8359     case PPC::BI__builtin_vsx_lxvd2x:
8360       ID = Intrinsic::ppc_vsx_lxvd2x;
8361       break;
8362     case PPC::BI__builtin_vsx_lxvw4x:
8363       ID = Intrinsic::ppc_vsx_lxvw4x;
8364       break;
8365     case PPC::BI__builtin_vsx_lxvd2x_be:
8366       ID = Intrinsic::ppc_vsx_lxvd2x_be;
8367       break;
8368     case PPC::BI__builtin_vsx_lxvw4x_be:
8369       ID = Intrinsic::ppc_vsx_lxvw4x_be;
8370       break;
8371     case PPC::BI__builtin_vsx_lxvl:
8372       ID = Intrinsic::ppc_vsx_lxvl;
8373       break;
8374     case PPC::BI__builtin_vsx_lxvll:
8375       ID = Intrinsic::ppc_vsx_lxvll;
8376       break;
8377     }
8378     llvm::Function *F = CGM.getIntrinsic(ID);
8379     return Builder.CreateCall(F, Ops, "");
8380   }
8381
8382   // vec_st, vec_xst_be
8383   case PPC::BI__builtin_altivec_stvx:
8384   case PPC::BI__builtin_altivec_stvxl:
8385   case PPC::BI__builtin_altivec_stvebx:
8386   case PPC::BI__builtin_altivec_stvehx:
8387   case PPC::BI__builtin_altivec_stvewx:
8388   case PPC::BI__builtin_vsx_stxvd2x:
8389   case PPC::BI__builtin_vsx_stxvw4x:
8390   case PPC::BI__builtin_vsx_stxvd2x_be:
8391   case PPC::BI__builtin_vsx_stxvw4x_be:
8392   case PPC::BI__builtin_vsx_stxvl:
8393   case PPC::BI__builtin_vsx_stxvll:
8394   {
8395     if(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
8396       BuiltinID == PPC::BI__builtin_vsx_stxvll ){
8397       Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
8398     }else {
8399       Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
8400       Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
8401       Ops.pop_back();
8402     }
8403
8404     switch (BuiltinID) {
8405     default: llvm_unreachable("Unsupported st intrinsic!");
8406     case PPC::BI__builtin_altivec_stvx:
8407       ID = Intrinsic::ppc_altivec_stvx;
8408       break;
8409     case PPC::BI__builtin_altivec_stvxl:
8410       ID = Intrinsic::ppc_altivec_stvxl;
8411       break;
8412     case PPC::BI__builtin_altivec_stvebx:
8413       ID = Intrinsic::ppc_altivec_stvebx;
8414       break;
8415     case PPC::BI__builtin_altivec_stvehx:
8416       ID = Intrinsic::ppc_altivec_stvehx;
8417       break;
8418     case PPC::BI__builtin_altivec_stvewx:
8419       ID = Intrinsic::ppc_altivec_stvewx;
8420       break;
8421     case PPC::BI__builtin_vsx_stxvd2x:
8422       ID = Intrinsic::ppc_vsx_stxvd2x;
8423       break;
8424     case PPC::BI__builtin_vsx_stxvw4x:
8425       ID = Intrinsic::ppc_vsx_stxvw4x;
8426       break;
8427     case PPC::BI__builtin_vsx_stxvd2x_be:
8428       ID = Intrinsic::ppc_vsx_stxvd2x_be;
8429       break;
8430     case PPC::BI__builtin_vsx_stxvw4x_be:
8431       ID = Intrinsic::ppc_vsx_stxvw4x_be;
8432       break;
8433     case PPC::BI__builtin_vsx_stxvl:
8434       ID = Intrinsic::ppc_vsx_stxvl;
8435       break;
8436     case PPC::BI__builtin_vsx_stxvll:
8437       ID = Intrinsic::ppc_vsx_stxvll;
8438       break;
8439     }
8440     llvm::Function *F = CGM.getIntrinsic(ID);
8441     return Builder.CreateCall(F, Ops, "");
8442   }
8443   // Square root
8444   case PPC::BI__builtin_vsx_xvsqrtsp:
8445   case PPC::BI__builtin_vsx_xvsqrtdp: {
8446     llvm::Type *ResultType = ConvertType(E->getType());
8447     Value *X = EmitScalarExpr(E->getArg(0));
8448     ID = Intrinsic::sqrt;
8449     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
8450     return Builder.CreateCall(F, X);
8451   }
8452   // Count leading zeros
8453   case PPC::BI__builtin_altivec_vclzb:
8454   case PPC::BI__builtin_altivec_vclzh:
8455   case PPC::BI__builtin_altivec_vclzw:
8456   case PPC::BI__builtin_altivec_vclzd: {
8457     llvm::Type *ResultType = ConvertType(E->getType());
8458     Value *X = EmitScalarExpr(E->getArg(0));
8459     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8460     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
8461     return Builder.CreateCall(F, {X, Undef});
8462   }
8463   case PPC::BI__builtin_altivec_vctzb:
8464   case PPC::BI__builtin_altivec_vctzh:
8465   case PPC::BI__builtin_altivec_vctzw:
8466   case PPC::BI__builtin_altivec_vctzd: {
8467     llvm::Type *ResultType = ConvertType(E->getType());
8468     Value *X = EmitScalarExpr(E->getArg(0));
8469     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8470     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
8471     return Builder.CreateCall(F, {X, Undef});
8472   }
8473   case PPC::BI__builtin_altivec_vpopcntb:
8474   case PPC::BI__builtin_altivec_vpopcnth:
8475   case PPC::BI__builtin_altivec_vpopcntw:
8476   case PPC::BI__builtin_altivec_vpopcntd: {
8477     llvm::Type *ResultType = ConvertType(E->getType());
8478     Value *X = EmitScalarExpr(E->getArg(0));
8479     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
8480     return Builder.CreateCall(F, X);
8481   }
8482   // Copy sign
8483   case PPC::BI__builtin_vsx_xvcpsgnsp:
8484   case PPC::BI__builtin_vsx_xvcpsgndp: {
8485     llvm::Type *ResultType = ConvertType(E->getType());
8486     Value *X = EmitScalarExpr(E->getArg(0));
8487     Value *Y = EmitScalarExpr(E->getArg(1));
8488     ID = Intrinsic::copysign;
8489     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
8490     return Builder.CreateCall(F, {X, Y});
8491   }
8492   // Rounding/truncation
8493   case PPC::BI__builtin_vsx_xvrspip:
8494   case PPC::BI__builtin_vsx_xvrdpip:
8495   case PPC::BI__builtin_vsx_xvrdpim:
8496   case PPC::BI__builtin_vsx_xvrspim:
8497   case PPC::BI__builtin_vsx_xvrdpi:
8498   case PPC::BI__builtin_vsx_xvrspi:
8499   case PPC::BI__builtin_vsx_xvrdpic:
8500   case PPC::BI__builtin_vsx_xvrspic:
8501   case PPC::BI__builtin_vsx_xvrdpiz:
8502   case PPC::BI__builtin_vsx_xvrspiz: {
8503     llvm::Type *ResultType = ConvertType(E->getType());
8504     Value *X = EmitScalarExpr(E->getArg(0));
8505     if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
8506         BuiltinID == PPC::BI__builtin_vsx_xvrspim)
8507       ID = Intrinsic::floor;
8508     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
8509              BuiltinID == PPC::BI__builtin_vsx_xvrspi)
8510       ID = Intrinsic::round;
8511     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
8512              BuiltinID == PPC::BI__builtin_vsx_xvrspic)
8513       ID = Intrinsic::nearbyint;
8514     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
8515              BuiltinID == PPC::BI__builtin_vsx_xvrspip)
8516       ID = Intrinsic::ceil;
8517     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
8518              BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
8519       ID = Intrinsic::trunc;
8520     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
8521     return Builder.CreateCall(F, X);
8522   }
8523
8524   // Absolute value
8525   case PPC::BI__builtin_vsx_xvabsdp:
8526   case PPC::BI__builtin_vsx_xvabssp: {
8527     llvm::Type *ResultType = ConvertType(E->getType());
8528     Value *X = EmitScalarExpr(E->getArg(0));
8529     llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
8530     return Builder.CreateCall(F, X);
8531   }
8532
8533   // FMA variations
8534   case PPC::BI__builtin_vsx_xvmaddadp:
8535   case PPC::BI__builtin_vsx_xvmaddasp:
8536   case PPC::BI__builtin_vsx_xvnmaddadp:
8537   case PPC::BI__builtin_vsx_xvnmaddasp:
8538   case PPC::BI__builtin_vsx_xvmsubadp:
8539   case PPC::BI__builtin_vsx_xvmsubasp:
8540   case PPC::BI__builtin_vsx_xvnmsubadp:
8541   case PPC::BI__builtin_vsx_xvnmsubasp: {
8542     llvm::Type *ResultType = ConvertType(E->getType());
8543     Value *X = EmitScalarExpr(E->getArg(0));
8544     Value *Y = EmitScalarExpr(E->getArg(1));
8545     Value *Z = EmitScalarExpr(E->getArg(2));
8546     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
8547     llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
8548     switch (BuiltinID) {
8549       case PPC::BI__builtin_vsx_xvmaddadp:
8550       case PPC::BI__builtin_vsx_xvmaddasp:
8551         return Builder.CreateCall(F, {X, Y, Z});
8552       case PPC::BI__builtin_vsx_xvnmaddadp:
8553       case PPC::BI__builtin_vsx_xvnmaddasp:
8554         return Builder.CreateFSub(Zero,
8555                                   Builder.CreateCall(F, {X, Y, Z}), "sub");
8556       case PPC::BI__builtin_vsx_xvmsubadp:
8557       case PPC::BI__builtin_vsx_xvmsubasp:
8558         return Builder.CreateCall(F,
8559                                   {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
8560       case PPC::BI__builtin_vsx_xvnmsubadp:
8561       case PPC::BI__builtin_vsx_xvnmsubasp:
8562         Value *FsubRes =
8563           Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
8564         return Builder.CreateFSub(Zero, FsubRes, "sub");
8565     }
8566     llvm_unreachable("Unknown FMA operation");
8567     return nullptr; // Suppress no-return warning
8568   }
8569
8570   case PPC::BI__builtin_vsx_insertword: {
8571     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
8572
8573     // Third argument is a compile time constant int. It must be clamped to
8574     // to the range [0, 12].
8575     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
8576     assert(ArgCI &&
8577            "Third arg to xxinsertw intrinsic must be constant integer");
8578     const int64_t MaxIndex = 12;
8579     int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
8580
8581     // The builtin semantics don't exactly match the xxinsertw instructions
8582     // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
8583     // word from the first argument, and inserts it in the second argument. The
8584     // instruction extracts the word from its second input register and inserts
8585     // it into its first input register, so swap the first and second arguments.
8586     std::swap(Ops[0], Ops[1]);
8587
8588     // Need to cast the second argument from a vector of unsigned int to a
8589     // vector of long long.
8590     Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
8591
8592     if (getTarget().isLittleEndian()) {
8593       // Create a shuffle mask of (1, 0)
8594       Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
8595                                    ConstantInt::get(Int32Ty, 0)
8596                                  };
8597       Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8598
8599       // Reverse the double words in the vector we will extract from.
8600       Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
8601       Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ShuffleMask);
8602
8603       // Reverse the index.
8604       Index = MaxIndex - Index;
8605     }
8606
8607     // Intrinsic expects the first arg to be a vector of int.
8608     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
8609     Ops[2] = ConstantInt::getSigned(Int32Ty, Index);
8610     return Builder.CreateCall(F, Ops);
8611   }
8612
8613   case PPC::BI__builtin_vsx_extractuword: {
8614     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
8615
8616     // Intrinsic expects the first argument to be a vector of doublewords.
8617     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
8618
8619     // The second argument is a compile time constant int that needs to
8620     // be clamped to the range [0, 12].
8621     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[1]);
8622     assert(ArgCI &&
8623            "Second Arg to xxextractuw intrinsic must be a constant integer!");
8624     const int64_t MaxIndex = 12;
8625     int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
8626
8627     if (getTarget().isLittleEndian()) {
8628       // Reverse the index.
8629       Index = MaxIndex - Index;
8630       Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
8631
8632       // Emit the call, then reverse the double words of the results vector.
8633       Value *Call = Builder.CreateCall(F, Ops);
8634
8635       // Create a shuffle mask of (1, 0)
8636       Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
8637                                    ConstantInt::get(Int32Ty, 0)
8638                                  };
8639       Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8640
8641       Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ShuffleMask);
8642       return ShuffleCall;
8643     } else {
8644       Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
8645       return Builder.CreateCall(F, Ops);
8646     }
8647   }
8648
8649   case PPC::BI__builtin_vsx_xxpermdi: {
8650     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
8651     assert(ArgCI && "Third arg must be constant integer!");
8652
8653     unsigned Index = ArgCI->getZExtValue();
8654     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
8655     Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
8656
8657     // Element zero comes from the first input vector and element one comes from
8658     // the second. The element indices within each vector are numbered in big
8659     // endian order so the shuffle mask must be adjusted for this on little
8660     // endian platforms (i.e. index is complemented and source vector reversed).
8661     unsigned ElemIdx0;
8662     unsigned ElemIdx1;
8663     if (getTarget().isLittleEndian()) {
8664       ElemIdx0 = (~Index & 1) + 2;
8665       ElemIdx1 = (~Index & 2) >> 1;
8666     } else { // BigEndian
8667       ElemIdx0 = (Index & 2) >> 1;
8668       ElemIdx1 = 2 + (Index & 1);
8669     }
8670
8671     Constant *ShuffleElts[2] = {ConstantInt::get(Int32Ty, ElemIdx0),
8672                                 ConstantInt::get(Int32Ty, ElemIdx1)};
8673     Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8674
8675     Value *ShuffleCall =
8676         Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask);
8677     QualType BIRetType = E->getType();
8678     auto RetTy = ConvertType(BIRetType);
8679     return Builder.CreateBitCast(ShuffleCall, RetTy);
8680   }
8681
8682   case PPC::BI__builtin_vsx_xxsldwi: {
8683     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
8684     assert(ArgCI && "Third argument must be a compile time constant");
8685     unsigned Index = ArgCI->getZExtValue() & 0x3;
8686     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
8687     Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int32Ty, 4));
8688
8689     // Create a shuffle mask
8690     unsigned ElemIdx0;
8691     unsigned ElemIdx1;
8692     unsigned ElemIdx2;
8693     unsigned ElemIdx3;
8694     if (getTarget().isLittleEndian()) {
8695       // Little endian element N comes from element 8+N-Index of the
8696       // concatenated wide vector (of course, using modulo arithmetic on
8697       // the total number of elements).
8698       ElemIdx0 = (8 - Index) % 8;
8699       ElemIdx1 = (9 - Index) % 8;
8700       ElemIdx2 = (10 - Index) % 8;
8701       ElemIdx3 = (11 - Index) % 8;
8702     } else {
8703       // Big endian ElemIdx<N> = Index + N
8704       ElemIdx0 = Index;
8705       ElemIdx1 = Index + 1;
8706       ElemIdx2 = Index + 2;
8707       ElemIdx3 = Index + 3;
8708     }
8709
8710     Constant *ShuffleElts[4] = {ConstantInt::get(Int32Ty, ElemIdx0),
8711                                 ConstantInt::get(Int32Ty, ElemIdx1),
8712                                 ConstantInt::get(Int32Ty, ElemIdx2),
8713                                 ConstantInt::get(Int32Ty, ElemIdx3)};
8714
8715     Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8716     Value *ShuffleCall =
8717         Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask);
8718     QualType BIRetType = E->getType();
8719     auto RetTy = ConvertType(BIRetType);
8720     return Builder.CreateBitCast(ShuffleCall, RetTy);
8721   }
8722   }
8723 }
8724
8725 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
8726                                               const CallExpr *E) {
8727   switch (BuiltinID) {
8728   case AMDGPU::BI__builtin_amdgcn_div_scale:
8729   case AMDGPU::BI__builtin_amdgcn_div_scalef: {
8730     // Translate from the intrinsics's struct return to the builtin's out
8731     // argument.
8732
8733     Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
8734
8735     llvm::Value *X = EmitScalarExpr(E->getArg(0));
8736     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
8737     llvm::Value *Z = EmitScalarExpr(E->getArg(2));
8738
8739     llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
8740                                            X->getType());
8741
8742     llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
8743
8744     llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
8745     llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
8746
8747     llvm::Type *RealFlagType
8748       = FlagOutPtr.getPointer()->getType()->getPointerElementType();
8749
8750     llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
8751     Builder.CreateStore(FlagExt, FlagOutPtr);
8752     return Result;
8753   }
8754   case AMDGPU::BI__builtin_amdgcn_div_fmas:
8755   case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
8756     llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
8757     llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
8758     llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
8759     llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
8760
8761     llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
8762                                       Src0->getType());
8763     llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
8764     return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
8765   }
8766
8767   case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
8768     return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
8769   case AMDGPU::BI__builtin_amdgcn_mov_dpp: {
8770     llvm::SmallVector<llvm::Value *, 5> Args;
8771     for (unsigned I = 0; I != 5; ++I)
8772       Args.push_back(EmitScalarExpr(E->getArg(I)));
8773     Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_mov_dpp,
8774                                     Args[0]->getType());
8775     return Builder.CreateCall(F, Args);
8776   }
8777   case AMDGPU::BI__builtin_amdgcn_div_fixup:
8778   case AMDGPU::BI__builtin_amdgcn_div_fixupf:
8779   case AMDGPU::BI__builtin_amdgcn_div_fixuph:
8780     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
8781   case AMDGPU::BI__builtin_amdgcn_trig_preop:
8782   case AMDGPU::BI__builtin_amdgcn_trig_preopf:
8783     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
8784   case AMDGPU::BI__builtin_amdgcn_rcp:
8785   case AMDGPU::BI__builtin_amdgcn_rcpf:
8786   case AMDGPU::BI__builtin_amdgcn_rcph:
8787     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
8788   case AMDGPU::BI__builtin_amdgcn_rsq:
8789   case AMDGPU::BI__builtin_amdgcn_rsqf:
8790   case AMDGPU::BI__builtin_amdgcn_rsqh:
8791     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
8792   case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
8793   case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
8794     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
8795   case AMDGPU::BI__builtin_amdgcn_sinf:
8796   case AMDGPU::BI__builtin_amdgcn_sinh:
8797     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
8798   case AMDGPU::BI__builtin_amdgcn_cosf:
8799   case AMDGPU::BI__builtin_amdgcn_cosh:
8800     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
8801   case AMDGPU::BI__builtin_amdgcn_log_clampf:
8802     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
8803   case AMDGPU::BI__builtin_amdgcn_ldexp:
8804   case AMDGPU::BI__builtin_amdgcn_ldexpf:
8805   case AMDGPU::BI__builtin_amdgcn_ldexph:
8806     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
8807   case AMDGPU::BI__builtin_amdgcn_frexp_mant:
8808   case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
8809   case AMDGPU::BI__builtin_amdgcn_frexp_manth:
8810     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
8811   case AMDGPU::BI__builtin_amdgcn_frexp_exp:
8812   case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
8813     Value *Src0 = EmitScalarExpr(E->getArg(0));
8814     Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
8815                                 { Builder.getInt32Ty(), Src0->getType() });
8816     return Builder.CreateCall(F, Src0);
8817   }
8818   case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
8819     Value *Src0 = EmitScalarExpr(E->getArg(0));
8820     Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
8821                                 { Builder.getInt16Ty(), Src0->getType() });
8822     return Builder.CreateCall(F, Src0);
8823   }
8824   case AMDGPU::BI__builtin_amdgcn_fract:
8825   case AMDGPU::BI__builtin_amdgcn_fractf:
8826   case AMDGPU::BI__builtin_amdgcn_fracth:
8827     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
8828   case AMDGPU::BI__builtin_amdgcn_lerp:
8829     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
8830   case AMDGPU::BI__builtin_amdgcn_uicmp:
8831   case AMDGPU::BI__builtin_amdgcn_uicmpl:
8832   case AMDGPU::BI__builtin_amdgcn_sicmp:
8833   case AMDGPU::BI__builtin_amdgcn_sicmpl:
8834     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_icmp);
8835   case AMDGPU::BI__builtin_amdgcn_fcmp:
8836   case AMDGPU::BI__builtin_amdgcn_fcmpf:
8837     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fcmp);
8838   case AMDGPU::BI__builtin_amdgcn_class:
8839   case AMDGPU::BI__builtin_amdgcn_classf:
8840   case AMDGPU::BI__builtin_amdgcn_classh:
8841     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
8842   case AMDGPU::BI__builtin_amdgcn_fmed3f:
8843   case AMDGPU::BI__builtin_amdgcn_fmed3h:
8844     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3);
8845   case AMDGPU::BI__builtin_amdgcn_read_exec: {
8846     CallInst *CI = cast<CallInst>(
8847       EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec"));
8848     CI->setConvergent();
8849     return CI;
8850   }
8851
8852   // amdgcn workitem
8853   case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
8854     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
8855   case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
8856     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
8857   case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
8858     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
8859
8860   // r600 intrinsics
8861   case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
8862   case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
8863     return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee);
8864   case AMDGPU::BI__builtin_r600_read_tidig_x:
8865     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
8866   case AMDGPU::BI__builtin_r600_read_tidig_y:
8867     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
8868   case AMDGPU::BI__builtin_r600_read_tidig_z:
8869     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
8870   default:
8871     return nullptr;
8872   }
8873 }
8874
8875 /// Handle a SystemZ function in which the final argument is a pointer
8876 /// to an int that receives the post-instruction CC value.  At the LLVM level
8877 /// this is represented as a function that returns a {result, cc} pair.
8878 static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF,
8879                                          unsigned IntrinsicID,
8880                                          const CallExpr *E) {
8881   unsigned NumArgs = E->getNumArgs() - 1;
8882   SmallVector<Value *, 8> Args(NumArgs);
8883   for (unsigned I = 0; I < NumArgs; ++I)
8884     Args[I] = CGF.EmitScalarExpr(E->getArg(I));
8885   Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
8886   Value *F = CGF.CGM.getIntrinsic(IntrinsicID);
8887   Value *Call = CGF.Builder.CreateCall(F, Args);
8888   Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
8889   CGF.Builder.CreateStore(CC, CCPtr);
8890   return CGF.Builder.CreateExtractValue(Call, 0);
8891 }
8892
8893 Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
8894                                                const CallExpr *E) {
8895   switch (BuiltinID) {
8896   case SystemZ::BI__builtin_tbegin: {
8897     Value *TDB = EmitScalarExpr(E->getArg(0));
8898     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
8899     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
8900     return Builder.CreateCall(F, {TDB, Control});
8901   }
8902   case SystemZ::BI__builtin_tbegin_nofloat: {
8903     Value *TDB = EmitScalarExpr(E->getArg(0));
8904     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
8905     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
8906     return Builder.CreateCall(F, {TDB, Control});
8907   }
8908   case SystemZ::BI__builtin_tbeginc: {
8909     Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
8910     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
8911     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
8912     return Builder.CreateCall(F, {TDB, Control});
8913   }
8914   case SystemZ::BI__builtin_tabort: {
8915     Value *Data = EmitScalarExpr(E->getArg(0));
8916     Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
8917     return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
8918   }
8919   case SystemZ::BI__builtin_non_tx_store: {
8920     Value *Address = EmitScalarExpr(E->getArg(0));
8921     Value *Data = EmitScalarExpr(E->getArg(1));
8922     Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
8923     return Builder.CreateCall(F, {Data, Address});
8924   }
8925
8926   // Vector builtins.  Note that most vector builtins are mapped automatically
8927   // to target-specific LLVM intrinsics.  The ones handled specially here can
8928   // be represented via standard LLVM IR, which is preferable to enable common
8929   // LLVM optimizations.
8930
8931   case SystemZ::BI__builtin_s390_vpopctb:
8932   case SystemZ::BI__builtin_s390_vpopcth:
8933   case SystemZ::BI__builtin_s390_vpopctf:
8934   case SystemZ::BI__builtin_s390_vpopctg: {
8935     llvm::Type *ResultType = ConvertType(E->getType());
8936     Value *X = EmitScalarExpr(E->getArg(0));
8937     Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
8938     return Builder.CreateCall(F, X);
8939   }
8940
8941   case SystemZ::BI__builtin_s390_vclzb:
8942   case SystemZ::BI__builtin_s390_vclzh:
8943   case SystemZ::BI__builtin_s390_vclzf:
8944   case SystemZ::BI__builtin_s390_vclzg: {
8945     llvm::Type *ResultType = ConvertType(E->getType());
8946     Value *X = EmitScalarExpr(E->getArg(0));
8947     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8948     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
8949     return Builder.CreateCall(F, {X, Undef});
8950   }
8951
8952   case SystemZ::BI__builtin_s390_vctzb:
8953   case SystemZ::BI__builtin_s390_vctzh:
8954   case SystemZ::BI__builtin_s390_vctzf:
8955   case SystemZ::BI__builtin_s390_vctzg: {
8956     llvm::Type *ResultType = ConvertType(E->getType());
8957     Value *X = EmitScalarExpr(E->getArg(0));
8958     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8959     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
8960     return Builder.CreateCall(F, {X, Undef});
8961   }
8962
8963   case SystemZ::BI__builtin_s390_vfsqdb: {
8964     llvm::Type *ResultType = ConvertType(E->getType());
8965     Value *X = EmitScalarExpr(E->getArg(0));
8966     Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
8967     return Builder.CreateCall(F, X);
8968   }
8969   case SystemZ::BI__builtin_s390_vfmadb: {
8970     llvm::Type *ResultType = ConvertType(E->getType());
8971     Value *X = EmitScalarExpr(E->getArg(0));
8972     Value *Y = EmitScalarExpr(E->getArg(1));
8973     Value *Z = EmitScalarExpr(E->getArg(2));
8974     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
8975     return Builder.CreateCall(F, {X, Y, Z});
8976   }
8977   case SystemZ::BI__builtin_s390_vfmsdb: {
8978     llvm::Type *ResultType = ConvertType(E->getType());
8979     Value *X = EmitScalarExpr(E->getArg(0));
8980     Value *Y = EmitScalarExpr(E->getArg(1));
8981     Value *Z = EmitScalarExpr(E->getArg(2));
8982     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
8983     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
8984     return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
8985   }
8986   case SystemZ::BI__builtin_s390_vflpdb: {
8987     llvm::Type *ResultType = ConvertType(E->getType());
8988     Value *X = EmitScalarExpr(E->getArg(0));
8989     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
8990     return Builder.CreateCall(F, X);
8991   }
8992   case SystemZ::BI__builtin_s390_vflndb: {
8993     llvm::Type *ResultType = ConvertType(E->getType());
8994     Value *X = EmitScalarExpr(E->getArg(0));
8995     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
8996     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
8997     return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub");
8998   }
8999   case SystemZ::BI__builtin_s390_vfidb: {
9000     llvm::Type *ResultType = ConvertType(E->getType());
9001     Value *X = EmitScalarExpr(E->getArg(0));
9002     // Constant-fold the M4 and M5 mask arguments.
9003     llvm::APSInt M4, M5;
9004     bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext());
9005     bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext());
9006     assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?");
9007     (void)IsConstM4; (void)IsConstM5;
9008     // Check whether this instance of vfidb can be represented via a LLVM
9009     // standard intrinsic.  We only support some combinations of M4 and M5.
9010     Intrinsic::ID ID = Intrinsic::not_intrinsic;
9011     switch (M4.getZExtValue()) {
9012     default: break;
9013     case 0:  // IEEE-inexact exception allowed
9014       switch (M5.getZExtValue()) {
9015       default: break;
9016       case 0: ID = Intrinsic::rint; break;
9017       }
9018       break;
9019     case 4:  // IEEE-inexact exception suppressed
9020       switch (M5.getZExtValue()) {
9021       default: break;
9022       case 0: ID = Intrinsic::nearbyint; break;
9023       case 1: ID = Intrinsic::round; break;
9024       case 5: ID = Intrinsic::trunc; break;
9025       case 6: ID = Intrinsic::ceil; break;
9026       case 7: ID = Intrinsic::floor; break;
9027       }
9028       break;
9029     }
9030     if (ID != Intrinsic::not_intrinsic) {
9031       Function *F = CGM.getIntrinsic(ID, ResultType);
9032       return Builder.CreateCall(F, X);
9033     }
9034     Function *F = CGM.getIntrinsic(Intrinsic::s390_vfidb);
9035     Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
9036     Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
9037     return Builder.CreateCall(F, {X, M4Value, M5Value});
9038   }
9039
9040   // Vector intrisincs that output the post-instruction CC value.
9041
9042 #define INTRINSIC_WITH_CC(NAME) \
9043     case SystemZ::BI__builtin_##NAME: \
9044       return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
9045
9046   INTRINSIC_WITH_CC(s390_vpkshs);
9047   INTRINSIC_WITH_CC(s390_vpksfs);
9048   INTRINSIC_WITH_CC(s390_vpksgs);
9049
9050   INTRINSIC_WITH_CC(s390_vpklshs);
9051   INTRINSIC_WITH_CC(s390_vpklsfs);
9052   INTRINSIC_WITH_CC(s390_vpklsgs);
9053
9054   INTRINSIC_WITH_CC(s390_vceqbs);
9055   INTRINSIC_WITH_CC(s390_vceqhs);
9056   INTRINSIC_WITH_CC(s390_vceqfs);
9057   INTRINSIC_WITH_CC(s390_vceqgs);
9058
9059   INTRINSIC_WITH_CC(s390_vchbs);
9060   INTRINSIC_WITH_CC(s390_vchhs);
9061   INTRINSIC_WITH_CC(s390_vchfs);
9062   INTRINSIC_WITH_CC(s390_vchgs);
9063
9064   INTRINSIC_WITH_CC(s390_vchlbs);
9065   INTRINSIC_WITH_CC(s390_vchlhs);
9066   INTRINSIC_WITH_CC(s390_vchlfs);
9067   INTRINSIC_WITH_CC(s390_vchlgs);
9068
9069   INTRINSIC_WITH_CC(s390_vfaebs);
9070   INTRINSIC_WITH_CC(s390_vfaehs);
9071   INTRINSIC_WITH_CC(s390_vfaefs);
9072
9073   INTRINSIC_WITH_CC(s390_vfaezbs);
9074   INTRINSIC_WITH_CC(s390_vfaezhs);
9075   INTRINSIC_WITH_CC(s390_vfaezfs);
9076
9077   INTRINSIC_WITH_CC(s390_vfeebs);
9078   INTRINSIC_WITH_CC(s390_vfeehs);
9079   INTRINSIC_WITH_CC(s390_vfeefs);
9080
9081   INTRINSIC_WITH_CC(s390_vfeezbs);
9082   INTRINSIC_WITH_CC(s390_vfeezhs);
9083   INTRINSIC_WITH_CC(s390_vfeezfs);
9084
9085   INTRINSIC_WITH_CC(s390_vfenebs);
9086   INTRINSIC_WITH_CC(s390_vfenehs);
9087   INTRINSIC_WITH_CC(s390_vfenefs);
9088
9089   INTRINSIC_WITH_CC(s390_vfenezbs);
9090   INTRINSIC_WITH_CC(s390_vfenezhs);
9091   INTRINSIC_WITH_CC(s390_vfenezfs);
9092
9093   INTRINSIC_WITH_CC(s390_vistrbs);
9094   INTRINSIC_WITH_CC(s390_vistrhs);
9095   INTRINSIC_WITH_CC(s390_vistrfs);
9096
9097   INTRINSIC_WITH_CC(s390_vstrcbs);
9098   INTRINSIC_WITH_CC(s390_vstrchs);
9099   INTRINSIC_WITH_CC(s390_vstrcfs);
9100
9101   INTRINSIC_WITH_CC(s390_vstrczbs);
9102   INTRINSIC_WITH_CC(s390_vstrczhs);
9103   INTRINSIC_WITH_CC(s390_vstrczfs);
9104
9105   INTRINSIC_WITH_CC(s390_vfcedbs);
9106   INTRINSIC_WITH_CC(s390_vfchdbs);
9107   INTRINSIC_WITH_CC(s390_vfchedbs);
9108
9109   INTRINSIC_WITH_CC(s390_vftcidb);
9110
9111 #undef INTRINSIC_WITH_CC
9112
9113   default:
9114     return nullptr;
9115   }
9116 }
9117
9118 Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
9119                                              const CallExpr *E) {
9120   auto MakeLdg = [&](unsigned IntrinsicID) {
9121     Value *Ptr = EmitScalarExpr(E->getArg(0));
9122     clang::CharUnits Align =
9123         getNaturalPointeeTypeAlignment(E->getArg(0)->getType());
9124     return Builder.CreateCall(
9125         CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
9126                                        Ptr->getType()}),
9127         {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())});
9128   };
9129   auto MakeScopedAtomic = [&](unsigned IntrinsicID) {
9130     Value *Ptr = EmitScalarExpr(E->getArg(0));
9131     return Builder.CreateCall(
9132         CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
9133                                        Ptr->getType()}),
9134         {Ptr, EmitScalarExpr(E->getArg(1))});
9135   };
9136   switch (BuiltinID) {
9137   case NVPTX::BI__nvvm_atom_add_gen_i:
9138   case NVPTX::BI__nvvm_atom_add_gen_l:
9139   case NVPTX::BI__nvvm_atom_add_gen_ll:
9140     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
9141
9142   case NVPTX::BI__nvvm_atom_sub_gen_i:
9143   case NVPTX::BI__nvvm_atom_sub_gen_l:
9144   case NVPTX::BI__nvvm_atom_sub_gen_ll:
9145     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
9146
9147   case NVPTX::BI__nvvm_atom_and_gen_i:
9148   case NVPTX::BI__nvvm_atom_and_gen_l:
9149   case NVPTX::BI__nvvm_atom_and_gen_ll:
9150     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
9151
9152   case NVPTX::BI__nvvm_atom_or_gen_i:
9153   case NVPTX::BI__nvvm_atom_or_gen_l:
9154   case NVPTX::BI__nvvm_atom_or_gen_ll:
9155     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
9156
9157   case NVPTX::BI__nvvm_atom_xor_gen_i:
9158   case NVPTX::BI__nvvm_atom_xor_gen_l:
9159   case NVPTX::BI__nvvm_atom_xor_gen_ll:
9160     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
9161
9162   case NVPTX::BI__nvvm_atom_xchg_gen_i:
9163   case NVPTX::BI__nvvm_atom_xchg_gen_l:
9164   case NVPTX::BI__nvvm_atom_xchg_gen_ll:
9165     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
9166
9167   case NVPTX::BI__nvvm_atom_max_gen_i:
9168   case NVPTX::BI__nvvm_atom_max_gen_l:
9169   case NVPTX::BI__nvvm_atom_max_gen_ll:
9170     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
9171
9172   case NVPTX::BI__nvvm_atom_max_gen_ui:
9173   case NVPTX::BI__nvvm_atom_max_gen_ul:
9174   case NVPTX::BI__nvvm_atom_max_gen_ull:
9175     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
9176
9177   case NVPTX::BI__nvvm_atom_min_gen_i:
9178   case NVPTX::BI__nvvm_atom_min_gen_l:
9179   case NVPTX::BI__nvvm_atom_min_gen_ll:
9180     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
9181
9182   case NVPTX::BI__nvvm_atom_min_gen_ui:
9183   case NVPTX::BI__nvvm_atom_min_gen_ul:
9184   case NVPTX::BI__nvvm_atom_min_gen_ull:
9185     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
9186
9187   case NVPTX::BI__nvvm_atom_cas_gen_i:
9188   case NVPTX::BI__nvvm_atom_cas_gen_l:
9189   case NVPTX::BI__nvvm_atom_cas_gen_ll:
9190     // __nvvm_atom_cas_gen_* should return the old value rather than the
9191     // success flag.
9192     return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
9193
9194   case NVPTX::BI__nvvm_atom_add_gen_f: {
9195     Value *Ptr = EmitScalarExpr(E->getArg(0));
9196     Value *Val = EmitScalarExpr(E->getArg(1));
9197     // atomicrmw only deals with integer arguments so we need to use
9198     // LLVM's nvvm_atomic_load_add_f32 intrinsic for that.
9199     Value *FnALAF32 =
9200         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType());
9201     return Builder.CreateCall(FnALAF32, {Ptr, Val});
9202   }
9203
9204   case NVPTX::BI__nvvm_atom_inc_gen_ui: {
9205     Value *Ptr = EmitScalarExpr(E->getArg(0));
9206     Value *Val = EmitScalarExpr(E->getArg(1));
9207     Value *FnALI32 =
9208         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
9209     return Builder.CreateCall(FnALI32, {Ptr, Val});
9210   }
9211
9212   case NVPTX::BI__nvvm_atom_dec_gen_ui: {
9213     Value *Ptr = EmitScalarExpr(E->getArg(0));
9214     Value *Val = EmitScalarExpr(E->getArg(1));
9215     Value *FnALD32 =
9216         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
9217     return Builder.CreateCall(FnALD32, {Ptr, Val});
9218   }
9219
9220   case NVPTX::BI__nvvm_ldg_c:
9221   case NVPTX::BI__nvvm_ldg_c2:
9222   case NVPTX::BI__nvvm_ldg_c4:
9223   case NVPTX::BI__nvvm_ldg_s:
9224   case NVPTX::BI__nvvm_ldg_s2:
9225   case NVPTX::BI__nvvm_ldg_s4:
9226   case NVPTX::BI__nvvm_ldg_i:
9227   case NVPTX::BI__nvvm_ldg_i2:
9228   case NVPTX::BI__nvvm_ldg_i4:
9229   case NVPTX::BI__nvvm_ldg_l:
9230   case NVPTX::BI__nvvm_ldg_ll:
9231   case NVPTX::BI__nvvm_ldg_ll2:
9232   case NVPTX::BI__nvvm_ldg_uc:
9233   case NVPTX::BI__nvvm_ldg_uc2:
9234   case NVPTX::BI__nvvm_ldg_uc4:
9235   case NVPTX::BI__nvvm_ldg_us:
9236   case NVPTX::BI__nvvm_ldg_us2:
9237   case NVPTX::BI__nvvm_ldg_us4:
9238   case NVPTX::BI__nvvm_ldg_ui:
9239   case NVPTX::BI__nvvm_ldg_ui2:
9240   case NVPTX::BI__nvvm_ldg_ui4:
9241   case NVPTX::BI__nvvm_ldg_ul:
9242   case NVPTX::BI__nvvm_ldg_ull:
9243   case NVPTX::BI__nvvm_ldg_ull2:
9244     // PTX Interoperability section 2.2: "For a vector with an even number of
9245     // elements, its alignment is set to number of elements times the alignment
9246     // of its member: n*alignof(t)."
9247     return MakeLdg(Intrinsic::nvvm_ldg_global_i);
9248   case NVPTX::BI__nvvm_ldg_f:
9249   case NVPTX::BI__nvvm_ldg_f2:
9250   case NVPTX::BI__nvvm_ldg_f4:
9251   case NVPTX::BI__nvvm_ldg_d:
9252   case NVPTX::BI__nvvm_ldg_d2:
9253     return MakeLdg(Intrinsic::nvvm_ldg_global_f);
9254
9255   case NVPTX::BI__nvvm_atom_cta_add_gen_i:
9256   case NVPTX::BI__nvvm_atom_cta_add_gen_l:
9257   case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
9258     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta);
9259   case NVPTX::BI__nvvm_atom_sys_add_gen_i:
9260   case NVPTX::BI__nvvm_atom_sys_add_gen_l:
9261   case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
9262     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys);
9263   case NVPTX::BI__nvvm_atom_cta_add_gen_f:
9264   case NVPTX::BI__nvvm_atom_cta_add_gen_d:
9265     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta);
9266   case NVPTX::BI__nvvm_atom_sys_add_gen_f:
9267   case NVPTX::BI__nvvm_atom_sys_add_gen_d:
9268     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys);
9269   case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
9270   case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
9271   case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
9272     return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta);
9273   case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
9274   case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
9275   case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
9276     return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys);
9277   case NVPTX::BI__nvvm_atom_cta_max_gen_i:
9278   case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
9279   case NVPTX::BI__nvvm_atom_cta_max_gen_l:
9280   case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
9281   case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
9282   case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
9283     return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta);
9284   case NVPTX::BI__nvvm_atom_sys_max_gen_i:
9285   case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
9286   case NVPTX::BI__nvvm_atom_sys_max_gen_l:
9287   case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
9288   case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
9289   case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
9290     return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys);
9291   case NVPTX::BI__nvvm_atom_cta_min_gen_i:
9292   case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
9293   case NVPTX::BI__nvvm_atom_cta_min_gen_l:
9294   case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
9295   case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
9296   case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
9297     return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta);
9298   case NVPTX::BI__nvvm_atom_sys_min_gen_i:
9299   case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
9300   case NVPTX::BI__nvvm_atom_sys_min_gen_l:
9301   case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
9302   case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
9303   case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
9304     return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys);
9305   case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
9306     return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta);
9307   case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
9308     return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta);
9309   case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
9310     return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys);
9311   case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
9312     return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys);
9313   case NVPTX::BI__nvvm_atom_cta_and_gen_i:
9314   case NVPTX::BI__nvvm_atom_cta_and_gen_l:
9315   case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
9316     return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta);
9317   case NVPTX::BI__nvvm_atom_sys_and_gen_i:
9318   case NVPTX::BI__nvvm_atom_sys_and_gen_l:
9319   case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
9320     return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys);
9321   case NVPTX::BI__nvvm_atom_cta_or_gen_i:
9322   case NVPTX::BI__nvvm_atom_cta_or_gen_l:
9323   case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
9324     return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta);
9325   case NVPTX::BI__nvvm_atom_sys_or_gen_i:
9326   case NVPTX::BI__nvvm_atom_sys_or_gen_l:
9327   case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
9328     return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys);
9329   case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
9330   case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
9331   case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
9332     return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta);
9333   case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
9334   case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
9335   case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
9336     return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys);
9337   case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
9338   case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
9339   case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
9340     Value *Ptr = EmitScalarExpr(E->getArg(0));
9341     return Builder.CreateCall(
9342         CGM.getIntrinsic(
9343             Intrinsic::nvvm_atomic_cas_gen_i_cta,
9344             {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
9345         {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
9346   }
9347   case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
9348   case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
9349   case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
9350     Value *Ptr = EmitScalarExpr(E->getArg(0));
9351     return Builder.CreateCall(
9352         CGM.getIntrinsic(
9353             Intrinsic::nvvm_atomic_cas_gen_i_sys,
9354             {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
9355         {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
9356   }
9357   default:
9358     return nullptr;
9359   }
9360 }
9361
9362 Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
9363                                                    const CallExpr *E) {
9364   switch (BuiltinID) {
9365   case WebAssembly::BI__builtin_wasm_current_memory: {
9366     llvm::Type *ResultType = ConvertType(E->getType());
9367     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType);
9368     return Builder.CreateCall(Callee);
9369   }
9370   case WebAssembly::BI__builtin_wasm_grow_memory: {
9371     Value *X = EmitScalarExpr(E->getArg(0));
9372     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType());
9373     return Builder.CreateCall(Callee, X);
9374   }
9375
9376   default:
9377     return nullptr;
9378   }
9379 }