]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp
Merge ^/head r319801 through r320041.
[FreeBSD/FreeBSD.git] / contrib / llvm / tools / clang / lib / CodeGen / CGBuiltin.cpp
1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit Builtin calls as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "CGCXXABI.h"
15 #include "CGObjCRuntime.h"
16 #include "CGOpenCLRuntime.h"
17 #include "CodeGenFunction.h"
18 #include "CodeGenModule.h"
19 #include "TargetInfo.h"
20 #include "clang/AST/ASTContext.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/Analysis/Analyses/OSLog.h"
23 #include "clang/Basic/TargetBuiltins.h"
24 #include "clang/Basic/TargetInfo.h"
25 #include "clang/CodeGen/CGFunctionInfo.h"
26 #include "llvm/ADT/StringExtras.h"
27 #include "llvm/IR/CallSite.h"
28 #include "llvm/IR/DataLayout.h"
29 #include "llvm/IR/InlineAsm.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/IR/MDBuilder.h"
32 #include <sstream>
33
34 using namespace clang;
35 using namespace CodeGen;
36 using namespace llvm;
37
38 static
39 int64_t clamp(int64_t Value, int64_t Low, int64_t High) {
40   return std::min(High, std::max(Low, Value));
41 }
42
43 /// getBuiltinLibFunction - Given a builtin id for a function like
44 /// "__builtin_fabsf", return a Function* for "fabsf".
45 llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
46                                                      unsigned BuiltinID) {
47   assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
48
49   // Get the name, skip over the __builtin_ prefix (if necessary).
50   StringRef Name;
51   GlobalDecl D(FD);
52
53   // If the builtin has been declared explicitly with an assembler label,
54   // use the mangled name. This differs from the plain label on platforms
55   // that prefix labels.
56   if (FD->hasAttr<AsmLabelAttr>())
57     Name = getMangledName(D);
58   else
59     Name = Context.BuiltinInfo.getName(BuiltinID) + 10;
60
61   llvm::FunctionType *Ty =
62     cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
63
64   return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
65 }
66
67 /// Emit the conversions required to turn the given value into an
68 /// integer of the given size.
69 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
70                         QualType T, llvm::IntegerType *IntType) {
71   V = CGF.EmitToMemory(V, T);
72
73   if (V->getType()->isPointerTy())
74     return CGF.Builder.CreatePtrToInt(V, IntType);
75
76   assert(V->getType() == IntType);
77   return V;
78 }
79
80 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
81                           QualType T, llvm::Type *ResultType) {
82   V = CGF.EmitFromMemory(V, T);
83
84   if (ResultType->isPointerTy())
85     return CGF.Builder.CreateIntToPtr(V, ResultType);
86
87   assert(V->getType() == ResultType);
88   return V;
89 }
90
91 /// Utility to insert an atomic instruction based on Instrinsic::ID
92 /// and the expression node.
93 static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF,
94                                     llvm::AtomicRMWInst::BinOp Kind,
95                                     const CallExpr *E) {
96   QualType T = E->getType();
97   assert(E->getArg(0)->getType()->isPointerType());
98   assert(CGF.getContext().hasSameUnqualifiedType(T,
99                                   E->getArg(0)->getType()->getPointeeType()));
100   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
101
102   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
103   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
104
105   llvm::IntegerType *IntType =
106     llvm::IntegerType::get(CGF.getLLVMContext(),
107                            CGF.getContext().getTypeSize(T));
108   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
109
110   llvm::Value *Args[2];
111   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
112   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
113   llvm::Type *ValueType = Args[1]->getType();
114   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
115
116   llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
117       Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
118   return EmitFromInt(CGF, Result, T, ValueType);
119 }
120
121 static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
122   Value *Val = CGF.EmitScalarExpr(E->getArg(0));
123   Value *Address = CGF.EmitScalarExpr(E->getArg(1));
124
125   // Convert the type of the pointer to a pointer to the stored type.
126   Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
127   Value *BC = CGF.Builder.CreateBitCast(
128       Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
129   LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
130   LV.setNontemporal(true);
131   CGF.EmitStoreOfScalar(Val, LV, false);
132   return nullptr;
133 }
134
135 static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
136   Value *Address = CGF.EmitScalarExpr(E->getArg(0));
137
138   LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
139   LV.setNontemporal(true);
140   return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
141 }
142
143 static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
144                                llvm::AtomicRMWInst::BinOp Kind,
145                                const CallExpr *E) {
146   return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
147 }
148
149 /// Utility to insert an atomic instruction based Instrinsic::ID and
150 /// the expression node, where the return value is the result of the
151 /// operation.
152 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
153                                    llvm::AtomicRMWInst::BinOp Kind,
154                                    const CallExpr *E,
155                                    Instruction::BinaryOps Op,
156                                    bool Invert = false) {
157   QualType T = E->getType();
158   assert(E->getArg(0)->getType()->isPointerType());
159   assert(CGF.getContext().hasSameUnqualifiedType(T,
160                                   E->getArg(0)->getType()->getPointeeType()));
161   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
162
163   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
164   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
165
166   llvm::IntegerType *IntType =
167     llvm::IntegerType::get(CGF.getLLVMContext(),
168                            CGF.getContext().getTypeSize(T));
169   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
170
171   llvm::Value *Args[2];
172   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
173   llvm::Type *ValueType = Args[1]->getType();
174   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
175   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
176
177   llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
178       Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
179   Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
180   if (Invert)
181     Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
182                                      llvm::ConstantInt::get(IntType, -1));
183   Result = EmitFromInt(CGF, Result, T, ValueType);
184   return RValue::get(Result);
185 }
186
187 /// @brief Utility to insert an atomic cmpxchg instruction.
188 ///
189 /// @param CGF The current codegen function.
190 /// @param E   Builtin call expression to convert to cmpxchg.
191 ///            arg0 - address to operate on
192 ///            arg1 - value to compare with
193 ///            arg2 - new value
194 /// @param ReturnBool Specifies whether to return success flag of
195 ///                   cmpxchg result or the old value.
196 ///
197 /// @returns result of cmpxchg, according to ReturnBool
198 static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
199                                      bool ReturnBool) {
200   QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
201   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
202   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
203
204   llvm::IntegerType *IntType = llvm::IntegerType::get(
205       CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
206   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
207
208   Value *Args[3];
209   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
210   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
211   llvm::Type *ValueType = Args[1]->getType();
212   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
213   Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
214
215   Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
216       Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
217       llvm::AtomicOrdering::SequentiallyConsistent);
218   if (ReturnBool)
219     // Extract boolean success flag and zext it to int.
220     return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
221                                   CGF.ConvertType(E->getType()));
222   else
223     // Extract old value and emit it using the same type as compare value.
224     return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
225                        ValueType);
226 }
227
228 // Emit a simple mangled intrinsic that has 1 argument and a return type
229 // matching the argument type.
230 static Value *emitUnaryBuiltin(CodeGenFunction &CGF,
231                                const CallExpr *E,
232                                unsigned IntrinsicID) {
233   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
234
235   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
236   return CGF.Builder.CreateCall(F, Src0);
237 }
238
239 // Emit an intrinsic that has 2 operands of the same type as its result.
240 static Value *emitBinaryBuiltin(CodeGenFunction &CGF,
241                                 const CallExpr *E,
242                                 unsigned IntrinsicID) {
243   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
244   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
245
246   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
247   return CGF.Builder.CreateCall(F, { Src0, Src1 });
248 }
249
250 // Emit an intrinsic that has 3 operands of the same type as its result.
251 static Value *emitTernaryBuiltin(CodeGenFunction &CGF,
252                                  const CallExpr *E,
253                                  unsigned IntrinsicID) {
254   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
255   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
256   llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
257
258   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
259   return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
260 }
261
262 // Emit an intrinsic that has 1 float or double operand, and 1 integer.
263 static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
264                                const CallExpr *E,
265                                unsigned IntrinsicID) {
266   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
267   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
268
269   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
270   return CGF.Builder.CreateCall(F, {Src0, Src1});
271 }
272
273 /// EmitFAbs - Emit a call to @llvm.fabs().
274 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
275   Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
276   llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
277   Call->setDoesNotAccessMemory();
278   return Call;
279 }
280
281 /// Emit the computation of the sign bit for a floating point value. Returns
282 /// the i1 sign bit value.
283 static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
284   LLVMContext &C = CGF.CGM.getLLVMContext();
285
286   llvm::Type *Ty = V->getType();
287   int Width = Ty->getPrimitiveSizeInBits();
288   llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
289   V = CGF.Builder.CreateBitCast(V, IntTy);
290   if (Ty->isPPC_FP128Ty()) {
291     // We want the sign bit of the higher-order double. The bitcast we just
292     // did works as if the double-double was stored to memory and then
293     // read as an i128. The "store" will put the higher-order double in the
294     // lower address in both little- and big-Endian modes, but the "load"
295     // will treat those bits as a different part of the i128: the low bits in
296     // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
297     // we need to shift the high bits down to the low before truncating.
298     Width >>= 1;
299     if (CGF.getTarget().isBigEndian()) {
300       Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
301       V = CGF.Builder.CreateLShr(V, ShiftCst);
302     }
303     // We are truncating value in order to extract the higher-order
304     // double, which we will be using to extract the sign from.
305     IntTy = llvm::IntegerType::get(C, Width);
306     V = CGF.Builder.CreateTrunc(V, IntTy);
307   }
308   Value *Zero = llvm::Constant::getNullValue(IntTy);
309   return CGF.Builder.CreateICmpSLT(V, Zero);
310 }
311
312 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD,
313                               const CallExpr *E, llvm::Constant *calleeValue) {
314   CGCallee callee = CGCallee::forDirect(calleeValue, FD);
315   return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
316 }
317
318 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
319 /// depending on IntrinsicID.
320 ///
321 /// \arg CGF The current codegen function.
322 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
323 /// \arg X The first argument to the llvm.*.with.overflow.*.
324 /// \arg Y The second argument to the llvm.*.with.overflow.*.
325 /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
326 /// \returns The result (i.e. sum/product) returned by the intrinsic.
327 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
328                                           const llvm::Intrinsic::ID IntrinsicID,
329                                           llvm::Value *X, llvm::Value *Y,
330                                           llvm::Value *&Carry) {
331   // Make sure we have integers of the same width.
332   assert(X->getType() == Y->getType() &&
333          "Arguments must be the same type. (Did you forget to make sure both "
334          "arguments have the same integer width?)");
335
336   llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
337   llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
338   Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
339   return CGF.Builder.CreateExtractValue(Tmp, 0);
340 }
341
342 static Value *emitRangedBuiltin(CodeGenFunction &CGF,
343                                 unsigned IntrinsicID,
344                                 int low, int high) {
345     llvm::MDBuilder MDHelper(CGF.getLLVMContext());
346     llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
347     Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
348     llvm::Instruction *Call = CGF.Builder.CreateCall(F);
349     Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
350     return Call;
351 }
352
353 namespace {
354   struct WidthAndSignedness {
355     unsigned Width;
356     bool Signed;
357   };
358 }
359
360 static WidthAndSignedness
361 getIntegerWidthAndSignedness(const clang::ASTContext &context,
362                              const clang::QualType Type) {
363   assert(Type->isIntegerType() && "Given type is not an integer.");
364   unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width;
365   bool Signed = Type->isSignedIntegerType();
366   return {Width, Signed};
367 }
368
369 // Given one or more integer types, this function produces an integer type that
370 // encompasses them: any value in one of the given types could be expressed in
371 // the encompassing type.
372 static struct WidthAndSignedness
373 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
374   assert(Types.size() > 0 && "Empty list of types.");
375
376   // If any of the given types is signed, we must return a signed type.
377   bool Signed = false;
378   for (const auto &Type : Types) {
379     Signed |= Type.Signed;
380   }
381
382   // The encompassing type must have a width greater than or equal to the width
383   // of the specified types.  Aditionally, if the encompassing type is signed,
384   // its width must be strictly greater than the width of any unsigned types
385   // given.
386   unsigned Width = 0;
387   for (const auto &Type : Types) {
388     unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
389     if (Width < MinWidth) {
390       Width = MinWidth;
391     }
392   }
393
394   return {Width, Signed};
395 }
396
397 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
398   llvm::Type *DestType = Int8PtrTy;
399   if (ArgValue->getType() != DestType)
400     ArgValue =
401         Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
402
403   Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
404   return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
405 }
406
407 /// Checks if using the result of __builtin_object_size(p, @p From) in place of
408 /// __builtin_object_size(p, @p To) is correct
409 static bool areBOSTypesCompatible(int From, int To) {
410   // Note: Our __builtin_object_size implementation currently treats Type=0 and
411   // Type=2 identically. Encoding this implementation detail here may make
412   // improving __builtin_object_size difficult in the future, so it's omitted.
413   return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
414 }
415
416 static llvm::Value *
417 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
418   return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
419 }
420
421 llvm::Value *
422 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
423                                                  llvm::IntegerType *ResType,
424                                                  llvm::Value *EmittedE) {
425   uint64_t ObjectSize;
426   if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
427     return emitBuiltinObjectSize(E, Type, ResType, EmittedE);
428   return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
429 }
430
431 /// Returns a Value corresponding to the size of the given expression.
432 /// This Value may be either of the following:
433 ///   - A llvm::Argument (if E is a param with the pass_object_size attribute on
434 ///     it)
435 ///   - A call to the @llvm.objectsize intrinsic
436 ///
437 /// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
438 /// and we wouldn't otherwise try to reference a pass_object_size parameter,
439 /// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
440 llvm::Value *
441 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
442                                        llvm::IntegerType *ResType,
443                                        llvm::Value *EmittedE) {
444   // We need to reference an argument if the pointer is a parameter with the
445   // pass_object_size attribute.
446   if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
447     auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
448     auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
449     if (Param != nullptr && PS != nullptr &&
450         areBOSTypesCompatible(PS->getType(), Type)) {
451       auto Iter = SizeArguments.find(Param);
452       assert(Iter != SizeArguments.end());
453
454       const ImplicitParamDecl *D = Iter->second;
455       auto DIter = LocalDeclMap.find(D);
456       assert(DIter != LocalDeclMap.end());
457
458       return EmitLoadOfScalar(DIter->second, /*volatile=*/false,
459                               getContext().getSizeType(), E->getLocStart());
460     }
461   }
462
463   // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
464   // evaluate E for side-effects. In either case, we shouldn't lower to
465   // @llvm.objectsize.
466   if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
467     return getDefaultBuiltinObjectSizeResult(Type, ResType);
468
469   Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
470   assert(Ptr->getType()->isPointerTy() &&
471          "Non-pointer passed to __builtin_object_size?");
472
473   Value *F = CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
474
475   // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
476   Value *Min = Builder.getInt1((Type & 2) != 0);
477   // For GCC compatability, __builtin_object_size treat NULL as unknown size.
478   Value *NullIsUnknown = Builder.getTrue();
479   return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown});
480 }
481
482 // Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we
483 // handle them here.
484 enum class CodeGenFunction::MSVCIntrin {
485   _BitScanForward,
486   _BitScanReverse,
487   _InterlockedAnd,
488   _InterlockedDecrement,
489   _InterlockedExchange,
490   _InterlockedExchangeAdd,
491   _InterlockedExchangeSub,
492   _InterlockedIncrement,
493   _InterlockedOr,
494   _InterlockedXor,
495   _interlockedbittestandset,
496   __fastfail,
497 };
498
499 Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
500                                             const CallExpr *E) {
501   switch (BuiltinID) {
502   case MSVCIntrin::_BitScanForward:
503   case MSVCIntrin::_BitScanReverse: {
504     Value *ArgValue = EmitScalarExpr(E->getArg(1));
505
506     llvm::Type *ArgType = ArgValue->getType();
507     llvm::Type *IndexType =
508       EmitScalarExpr(E->getArg(0))->getType()->getPointerElementType();
509     llvm::Type *ResultType = ConvertType(E->getType());
510
511     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
512     Value *ResZero = llvm::Constant::getNullValue(ResultType);
513     Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
514
515     BasicBlock *Begin = Builder.GetInsertBlock();
516     BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
517     Builder.SetInsertPoint(End);
518     PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
519
520     Builder.SetInsertPoint(Begin);
521     Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
522     BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
523     Builder.CreateCondBr(IsZero, End, NotZero);
524     Result->addIncoming(ResZero, Begin);
525
526     Builder.SetInsertPoint(NotZero);
527     Address IndexAddress = EmitPointerWithAlignment(E->getArg(0));
528
529     if (BuiltinID == MSVCIntrin::_BitScanForward) {
530       Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
531       Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
532       ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
533       Builder.CreateStore(ZeroCount, IndexAddress, false);
534     } else {
535       unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
536       Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
537
538       Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
539       Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
540       ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
541       Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
542       Builder.CreateStore(Index, IndexAddress, false);
543     }
544     Builder.CreateBr(End);
545     Result->addIncoming(ResOne, NotZero);
546
547     Builder.SetInsertPoint(End);
548     return Result;
549   }
550   case MSVCIntrin::_InterlockedAnd:
551     return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
552   case MSVCIntrin::_InterlockedExchange:
553     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
554   case MSVCIntrin::_InterlockedExchangeAdd:
555     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
556   case MSVCIntrin::_InterlockedExchangeSub:
557     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
558   case MSVCIntrin::_InterlockedOr:
559     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
560   case MSVCIntrin::_InterlockedXor:
561     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
562
563   case MSVCIntrin::_interlockedbittestandset: {
564     llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
565     llvm::Value *Bit = EmitScalarExpr(E->getArg(1));
566     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
567         AtomicRMWInst::Or, Addr,
568         Builder.CreateShl(ConstantInt::get(Bit->getType(), 1), Bit),
569         llvm::AtomicOrdering::SequentiallyConsistent);
570     // Shift the relevant bit to the least significant position, truncate to
571     // the result type, and test the low bit.
572     llvm::Value *Shifted = Builder.CreateLShr(RMWI, Bit);
573     llvm::Value *Truncated =
574         Builder.CreateTrunc(Shifted, ConvertType(E->getType()));
575     return Builder.CreateAnd(Truncated,
576                              ConstantInt::get(Truncated->getType(), 1));
577   }
578
579   case MSVCIntrin::_InterlockedDecrement: {
580     llvm::Type *IntTy = ConvertType(E->getType());
581     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
582       AtomicRMWInst::Sub,
583       EmitScalarExpr(E->getArg(0)),
584       ConstantInt::get(IntTy, 1),
585       llvm::AtomicOrdering::SequentiallyConsistent);
586     return Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1));
587   }
588   case MSVCIntrin::_InterlockedIncrement: {
589     llvm::Type *IntTy = ConvertType(E->getType());
590     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
591       AtomicRMWInst::Add,
592       EmitScalarExpr(E->getArg(0)),
593       ConstantInt::get(IntTy, 1),
594       llvm::AtomicOrdering::SequentiallyConsistent);
595     return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1));
596   }
597
598   case MSVCIntrin::__fastfail: {
599     // Request immediate process termination from the kernel. The instruction
600     // sequences to do this are documented on MSDN:
601     // https://msdn.microsoft.com/en-us/library/dn774154.aspx
602     llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
603     StringRef Asm, Constraints;
604     switch (ISA) {
605     default:
606       ErrorUnsupported(E, "__fastfail call for this architecture");
607       break;
608     case llvm::Triple::x86:
609     case llvm::Triple::x86_64:
610       Asm = "int $$0x29";
611       Constraints = "{cx}";
612       break;
613     case llvm::Triple::thumb:
614       Asm = "udf #251";
615       Constraints = "{r0}";
616       break;
617     }
618     llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
619     llvm::InlineAsm *IA =
620         llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true);
621     llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
622         getLLVMContext(), llvm::AttributeList::FunctionIndex,
623         llvm::Attribute::NoReturn);
624     CallSite CS = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
625     CS.setAttributes(NoReturnAttr);
626     return CS.getInstruction();
627   }
628   }
629   llvm_unreachable("Incorrect MSVC intrinsic!");
630 }
631
632 namespace {
633 // ARC cleanup for __builtin_os_log_format
634 struct CallObjCArcUse final : EHScopeStack::Cleanup {
635   CallObjCArcUse(llvm::Value *object) : object(object) {}
636   llvm::Value *object;
637
638   void Emit(CodeGenFunction &CGF, Flags flags) override {
639     CGF.EmitARCIntrinsicUse(object);
640   }
641 };
642 }
643
644 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
645                                         unsigned BuiltinID, const CallExpr *E,
646                                         ReturnValueSlot ReturnValue) {
647   // See if we can constant fold this builtin.  If so, don't emit it at all.
648   Expr::EvalResult Result;
649   if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
650       !Result.hasSideEffects()) {
651     if (Result.Val.isInt())
652       return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
653                                                 Result.Val.getInt()));
654     if (Result.Val.isFloat())
655       return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
656                                                Result.Val.getFloat()));
657   }
658
659   switch (BuiltinID) {
660   default: break;  // Handle intrinsics and libm functions below.
661   case Builtin::BI__builtin___CFStringMakeConstantString:
662   case Builtin::BI__builtin___NSStringMakeConstantString:
663     return RValue::get(CGM.EmitConstantExpr(E, E->getType(), nullptr));
664   case Builtin::BI__builtin_stdarg_start:
665   case Builtin::BI__builtin_va_start:
666   case Builtin::BI__va_start:
667   case Builtin::BI__builtin_va_end:
668     return RValue::get(
669         EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
670                            ? EmitScalarExpr(E->getArg(0))
671                            : EmitVAListRef(E->getArg(0)).getPointer(),
672                        BuiltinID != Builtin::BI__builtin_va_end));
673   case Builtin::BI__builtin_va_copy: {
674     Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
675     Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
676
677     llvm::Type *Type = Int8PtrTy;
678
679     DstPtr = Builder.CreateBitCast(DstPtr, Type);
680     SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
681     return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy),
682                                           {DstPtr, SrcPtr}));
683   }
684   case Builtin::BI__builtin_abs:
685   case Builtin::BI__builtin_labs:
686   case Builtin::BI__builtin_llabs: {
687     Value *ArgValue = EmitScalarExpr(E->getArg(0));
688
689     Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
690     Value *CmpResult =
691     Builder.CreateICmpSGE(ArgValue,
692                           llvm::Constant::getNullValue(ArgValue->getType()),
693                                                             "abscond");
694     Value *Result =
695       Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
696
697     return RValue::get(Result);
698   }
699   case Builtin::BI__builtin_fabs:
700   case Builtin::BI__builtin_fabsf:
701   case Builtin::BI__builtin_fabsl: {
702     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
703   }
704   case Builtin::BI__builtin_fmod:
705   case Builtin::BI__builtin_fmodf:
706   case Builtin::BI__builtin_fmodl: {
707     Value *Arg1 = EmitScalarExpr(E->getArg(0));
708     Value *Arg2 = EmitScalarExpr(E->getArg(1));
709     Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod");
710     return RValue::get(Result);
711   }
712   case Builtin::BI__builtin_copysign:
713   case Builtin::BI__builtin_copysignf:
714   case Builtin::BI__builtin_copysignl: {
715     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
716   }
717   case Builtin::BI__builtin_ceil:
718   case Builtin::BI__builtin_ceilf:
719   case Builtin::BI__builtin_ceill: {
720     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil));
721   }
722   case Builtin::BI__builtin_floor:
723   case Builtin::BI__builtin_floorf:
724   case Builtin::BI__builtin_floorl: {
725     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor));
726   }
727   case Builtin::BI__builtin_trunc:
728   case Builtin::BI__builtin_truncf:
729   case Builtin::BI__builtin_truncl: {
730     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc));
731   }
732   case Builtin::BI__builtin_rint:
733   case Builtin::BI__builtin_rintf:
734   case Builtin::BI__builtin_rintl: {
735     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint));
736   }
737   case Builtin::BI__builtin_nearbyint:
738   case Builtin::BI__builtin_nearbyintf:
739   case Builtin::BI__builtin_nearbyintl: {
740     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint));
741   }
742   case Builtin::BI__builtin_round:
743   case Builtin::BI__builtin_roundf:
744   case Builtin::BI__builtin_roundl: {
745     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round));
746   }
747   case Builtin::BI__builtin_fmin:
748   case Builtin::BI__builtin_fminf:
749   case Builtin::BI__builtin_fminl: {
750     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum));
751   }
752   case Builtin::BI__builtin_fmax:
753   case Builtin::BI__builtin_fmaxf:
754   case Builtin::BI__builtin_fmaxl: {
755     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum));
756   }
757   case Builtin::BI__builtin_conj:
758   case Builtin::BI__builtin_conjf:
759   case Builtin::BI__builtin_conjl: {
760     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
761     Value *Real = ComplexVal.first;
762     Value *Imag = ComplexVal.second;
763     Value *Zero =
764       Imag->getType()->isFPOrFPVectorTy()
765         ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
766         : llvm::Constant::getNullValue(Imag->getType());
767
768     Imag = Builder.CreateFSub(Zero, Imag, "sub");
769     return RValue::getComplex(std::make_pair(Real, Imag));
770   }
771   case Builtin::BI__builtin_creal:
772   case Builtin::BI__builtin_crealf:
773   case Builtin::BI__builtin_creall:
774   case Builtin::BIcreal:
775   case Builtin::BIcrealf:
776   case Builtin::BIcreall: {
777     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
778     return RValue::get(ComplexVal.first);
779   }
780
781   case Builtin::BI__builtin_cimag:
782   case Builtin::BI__builtin_cimagf:
783   case Builtin::BI__builtin_cimagl:
784   case Builtin::BIcimag:
785   case Builtin::BIcimagf:
786   case Builtin::BIcimagl: {
787     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
788     return RValue::get(ComplexVal.second);
789   }
790
791   case Builtin::BI__builtin_ctzs:
792   case Builtin::BI__builtin_ctz:
793   case Builtin::BI__builtin_ctzl:
794   case Builtin::BI__builtin_ctzll: {
795     Value *ArgValue = EmitScalarExpr(E->getArg(0));
796
797     llvm::Type *ArgType = ArgValue->getType();
798     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
799
800     llvm::Type *ResultType = ConvertType(E->getType());
801     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
802     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
803     if (Result->getType() != ResultType)
804       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
805                                      "cast");
806     return RValue::get(Result);
807   }
808   case Builtin::BI__builtin_clzs:
809   case Builtin::BI__builtin_clz:
810   case Builtin::BI__builtin_clzl:
811   case Builtin::BI__builtin_clzll: {
812     Value *ArgValue = EmitScalarExpr(E->getArg(0));
813
814     llvm::Type *ArgType = ArgValue->getType();
815     Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
816
817     llvm::Type *ResultType = ConvertType(E->getType());
818     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
819     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
820     if (Result->getType() != ResultType)
821       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
822                                      "cast");
823     return RValue::get(Result);
824   }
825   case Builtin::BI__builtin_ffs:
826   case Builtin::BI__builtin_ffsl:
827   case Builtin::BI__builtin_ffsll: {
828     // ffs(x) -> x ? cttz(x) + 1 : 0
829     Value *ArgValue = EmitScalarExpr(E->getArg(0));
830
831     llvm::Type *ArgType = ArgValue->getType();
832     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
833
834     llvm::Type *ResultType = ConvertType(E->getType());
835     Value *Tmp =
836         Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
837                           llvm::ConstantInt::get(ArgType, 1));
838     Value *Zero = llvm::Constant::getNullValue(ArgType);
839     Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
840     Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
841     if (Result->getType() != ResultType)
842       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
843                                      "cast");
844     return RValue::get(Result);
845   }
846   case Builtin::BI__builtin_parity:
847   case Builtin::BI__builtin_parityl:
848   case Builtin::BI__builtin_parityll: {
849     // parity(x) -> ctpop(x) & 1
850     Value *ArgValue = EmitScalarExpr(E->getArg(0));
851
852     llvm::Type *ArgType = ArgValue->getType();
853     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
854
855     llvm::Type *ResultType = ConvertType(E->getType());
856     Value *Tmp = Builder.CreateCall(F, ArgValue);
857     Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
858     if (Result->getType() != ResultType)
859       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
860                                      "cast");
861     return RValue::get(Result);
862   }
863   case Builtin::BI__popcnt16:
864   case Builtin::BI__popcnt:
865   case Builtin::BI__popcnt64:
866   case Builtin::BI__builtin_popcount:
867   case Builtin::BI__builtin_popcountl:
868   case Builtin::BI__builtin_popcountll: {
869     Value *ArgValue = EmitScalarExpr(E->getArg(0));
870
871     llvm::Type *ArgType = ArgValue->getType();
872     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
873
874     llvm::Type *ResultType = ConvertType(E->getType());
875     Value *Result = Builder.CreateCall(F, ArgValue);
876     if (Result->getType() != ResultType)
877       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
878                                      "cast");
879     return RValue::get(Result);
880   }
881   case Builtin::BI_rotr8:
882   case Builtin::BI_rotr16:
883   case Builtin::BI_rotr:
884   case Builtin::BI_lrotr:
885   case Builtin::BI_rotr64: {
886     Value *Val = EmitScalarExpr(E->getArg(0));
887     Value *Shift = EmitScalarExpr(E->getArg(1));
888
889     llvm::Type *ArgType = Val->getType();
890     Shift = Builder.CreateIntCast(Shift, ArgType, false);
891     unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
892     Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
893     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
894
895     Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
896     Shift = Builder.CreateAnd(Shift, Mask);
897     Value *LeftShift = Builder.CreateSub(ArgTypeSize, Shift);
898
899     Value *RightShifted = Builder.CreateLShr(Val, Shift);
900     Value *LeftShifted = Builder.CreateShl(Val, LeftShift);
901     Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
902
903     Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
904     Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
905     return RValue::get(Result);
906   }
907   case Builtin::BI_rotl8:
908   case Builtin::BI_rotl16:
909   case Builtin::BI_rotl:
910   case Builtin::BI_lrotl:
911   case Builtin::BI_rotl64: {
912     Value *Val = EmitScalarExpr(E->getArg(0));
913     Value *Shift = EmitScalarExpr(E->getArg(1));
914
915     llvm::Type *ArgType = Val->getType();
916     Shift = Builder.CreateIntCast(Shift, ArgType, false);
917     unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
918     Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
919     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
920
921     Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
922     Shift = Builder.CreateAnd(Shift, Mask);
923     Value *RightShift = Builder.CreateSub(ArgTypeSize, Shift);
924
925     Value *LeftShifted = Builder.CreateShl(Val, Shift);
926     Value *RightShifted = Builder.CreateLShr(Val, RightShift);
927     Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
928
929     Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
930     Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
931     return RValue::get(Result);
932   }
933   case Builtin::BI__builtin_unpredictable: {
934     // Always return the argument of __builtin_unpredictable. LLVM does not
935     // handle this builtin. Metadata for this builtin should be added directly
936     // to instructions such as branches or switches that use it.
937     return RValue::get(EmitScalarExpr(E->getArg(0)));
938   }
939   case Builtin::BI__builtin_expect: {
940     Value *ArgValue = EmitScalarExpr(E->getArg(0));
941     llvm::Type *ArgType = ArgValue->getType();
942
943     Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
944     // Don't generate llvm.expect on -O0 as the backend won't use it for
945     // anything.
946     // Note, we still IRGen ExpectedValue because it could have side-effects.
947     if (CGM.getCodeGenOpts().OptimizationLevel == 0)
948       return RValue::get(ArgValue);
949
950     Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
951     Value *Result =
952         Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
953     return RValue::get(Result);
954   }
955   case Builtin::BI__builtin_assume_aligned: {
956     Value *PtrValue = EmitScalarExpr(E->getArg(0));
957     Value *OffsetValue =
958       (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
959
960     Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
961     ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
962     unsigned Alignment = (unsigned) AlignmentCI->getZExtValue();
963
964     EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue);
965     return RValue::get(PtrValue);
966   }
967   case Builtin::BI__assume:
968   case Builtin::BI__builtin_assume: {
969     if (E->getArg(0)->HasSideEffects(getContext()))
970       return RValue::get(nullptr);
971
972     Value *ArgValue = EmitScalarExpr(E->getArg(0));
973     Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
974     return RValue::get(Builder.CreateCall(FnAssume, ArgValue));
975   }
976   case Builtin::BI__builtin_bswap16:
977   case Builtin::BI__builtin_bswap32:
978   case Builtin::BI__builtin_bswap64: {
979     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
980   }
981   case Builtin::BI__builtin_bitreverse8:
982   case Builtin::BI__builtin_bitreverse16:
983   case Builtin::BI__builtin_bitreverse32:
984   case Builtin::BI__builtin_bitreverse64: {
985     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
986   }
987   case Builtin::BI__builtin_object_size: {
988     unsigned Type =
989         E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
990     auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
991
992     // We pass this builtin onto the optimizer so that it can figure out the
993     // object size in more complex cases.
994     return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
995                                              /*EmittedE=*/nullptr));
996   }
997   case Builtin::BI__builtin_prefetch: {
998     Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
999     // FIXME: Technically these constants should of type 'int', yes?
1000     RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
1001       llvm::ConstantInt::get(Int32Ty, 0);
1002     Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
1003       llvm::ConstantInt::get(Int32Ty, 3);
1004     Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
1005     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
1006     return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
1007   }
1008   case Builtin::BI__builtin_readcyclecounter: {
1009     Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
1010     return RValue::get(Builder.CreateCall(F));
1011   }
1012   case Builtin::BI__builtin___clear_cache: {
1013     Value *Begin = EmitScalarExpr(E->getArg(0));
1014     Value *End = EmitScalarExpr(E->getArg(1));
1015     Value *F = CGM.getIntrinsic(Intrinsic::clear_cache);
1016     return RValue::get(Builder.CreateCall(F, {Begin, End}));
1017   }
1018   case Builtin::BI__builtin_trap:
1019     return RValue::get(EmitTrapCall(Intrinsic::trap));
1020   case Builtin::BI__debugbreak:
1021     return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
1022   case Builtin::BI__builtin_unreachable: {
1023     if (SanOpts.has(SanitizerKind::Unreachable)) {
1024       SanitizerScope SanScope(this);
1025       EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()),
1026                                SanitizerKind::Unreachable),
1027                 SanitizerHandler::BuiltinUnreachable,
1028                 EmitCheckSourceLocation(E->getExprLoc()), None);
1029     } else
1030       Builder.CreateUnreachable();
1031
1032     // We do need to preserve an insertion point.
1033     EmitBlock(createBasicBlock("unreachable.cont"));
1034
1035     return RValue::get(nullptr);
1036   }
1037
1038   case Builtin::BI__builtin_powi:
1039   case Builtin::BI__builtin_powif:
1040   case Builtin::BI__builtin_powil: {
1041     Value *Base = EmitScalarExpr(E->getArg(0));
1042     Value *Exponent = EmitScalarExpr(E->getArg(1));
1043     llvm::Type *ArgType = Base->getType();
1044     Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
1045     return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
1046   }
1047
1048   case Builtin::BI__builtin_isgreater:
1049   case Builtin::BI__builtin_isgreaterequal:
1050   case Builtin::BI__builtin_isless:
1051   case Builtin::BI__builtin_islessequal:
1052   case Builtin::BI__builtin_islessgreater:
1053   case Builtin::BI__builtin_isunordered: {
1054     // Ordered comparisons: we know the arguments to these are matching scalar
1055     // floating point values.
1056     Value *LHS = EmitScalarExpr(E->getArg(0));
1057     Value *RHS = EmitScalarExpr(E->getArg(1));
1058
1059     switch (BuiltinID) {
1060     default: llvm_unreachable("Unknown ordered comparison");
1061     case Builtin::BI__builtin_isgreater:
1062       LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
1063       break;
1064     case Builtin::BI__builtin_isgreaterequal:
1065       LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
1066       break;
1067     case Builtin::BI__builtin_isless:
1068       LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
1069       break;
1070     case Builtin::BI__builtin_islessequal:
1071       LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
1072       break;
1073     case Builtin::BI__builtin_islessgreater:
1074       LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
1075       break;
1076     case Builtin::BI__builtin_isunordered:
1077       LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
1078       break;
1079     }
1080     // ZExt bool to int type.
1081     return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
1082   }
1083   case Builtin::BI__builtin_isnan: {
1084     Value *V = EmitScalarExpr(E->getArg(0));
1085     V = Builder.CreateFCmpUNO(V, V, "cmp");
1086     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
1087   }
1088
1089   case Builtin::BIfinite:
1090   case Builtin::BI__finite:
1091   case Builtin::BIfinitef:
1092   case Builtin::BI__finitef:
1093   case Builtin::BIfinitel:
1094   case Builtin::BI__finitel:
1095   case Builtin::BI__builtin_isinf:
1096   case Builtin::BI__builtin_isfinite: {
1097     // isinf(x)    --> fabs(x) == infinity
1098     // isfinite(x) --> fabs(x) != infinity
1099     // x != NaN via the ordered compare in either case.
1100     Value *V = EmitScalarExpr(E->getArg(0));
1101     Value *Fabs = EmitFAbs(*this, V);
1102     Constant *Infinity = ConstantFP::getInfinity(V->getType());
1103     CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf)
1104                                   ? CmpInst::FCMP_OEQ
1105                                   : CmpInst::FCMP_ONE;
1106     Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf");
1107     return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType())));
1108   }
1109
1110   case Builtin::BI__builtin_isinf_sign: {
1111     // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
1112     Value *Arg = EmitScalarExpr(E->getArg(0));
1113     Value *AbsArg = EmitFAbs(*this, Arg);
1114     Value *IsInf = Builder.CreateFCmpOEQ(
1115         AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
1116     Value *IsNeg = EmitSignBit(*this, Arg);
1117
1118     llvm::Type *IntTy = ConvertType(E->getType());
1119     Value *Zero = Constant::getNullValue(IntTy);
1120     Value *One = ConstantInt::get(IntTy, 1);
1121     Value *NegativeOne = ConstantInt::get(IntTy, -1);
1122     Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
1123     Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
1124     return RValue::get(Result);
1125   }
1126
1127   case Builtin::BI__builtin_isnormal: {
1128     // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
1129     Value *V = EmitScalarExpr(E->getArg(0));
1130     Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
1131
1132     Value *Abs = EmitFAbs(*this, V);
1133     Value *IsLessThanInf =
1134       Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
1135     APFloat Smallest = APFloat::getSmallestNormalized(
1136                    getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
1137     Value *IsNormal =
1138       Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
1139                             "isnormal");
1140     V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
1141     V = Builder.CreateAnd(V, IsNormal, "and");
1142     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
1143   }
1144
1145   case Builtin::BI__builtin_fpclassify: {
1146     Value *V = EmitScalarExpr(E->getArg(5));
1147     llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
1148
1149     // Create Result
1150     BasicBlock *Begin = Builder.GetInsertBlock();
1151     BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
1152     Builder.SetInsertPoint(End);
1153     PHINode *Result =
1154       Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
1155                         "fpclassify_result");
1156
1157     // if (V==0) return FP_ZERO
1158     Builder.SetInsertPoint(Begin);
1159     Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
1160                                           "iszero");
1161     Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
1162     BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
1163     Builder.CreateCondBr(IsZero, End, NotZero);
1164     Result->addIncoming(ZeroLiteral, Begin);
1165
1166     // if (V != V) return FP_NAN
1167     Builder.SetInsertPoint(NotZero);
1168     Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
1169     Value *NanLiteral = EmitScalarExpr(E->getArg(0));
1170     BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
1171     Builder.CreateCondBr(IsNan, End, NotNan);
1172     Result->addIncoming(NanLiteral, NotZero);
1173
1174     // if (fabs(V) == infinity) return FP_INFINITY
1175     Builder.SetInsertPoint(NotNan);
1176     Value *VAbs = EmitFAbs(*this, V);
1177     Value *IsInf =
1178       Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
1179                             "isinf");
1180     Value *InfLiteral = EmitScalarExpr(E->getArg(1));
1181     BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
1182     Builder.CreateCondBr(IsInf, End, NotInf);
1183     Result->addIncoming(InfLiteral, NotNan);
1184
1185     // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
1186     Builder.SetInsertPoint(NotInf);
1187     APFloat Smallest = APFloat::getSmallestNormalized(
1188         getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
1189     Value *IsNormal =
1190       Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
1191                             "isnormal");
1192     Value *NormalResult =
1193       Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
1194                            EmitScalarExpr(E->getArg(3)));
1195     Builder.CreateBr(End);
1196     Result->addIncoming(NormalResult, NotInf);
1197
1198     // return Result
1199     Builder.SetInsertPoint(End);
1200     return RValue::get(Result);
1201   }
1202
1203   case Builtin::BIalloca:
1204   case Builtin::BI_alloca:
1205   case Builtin::BI__builtin_alloca: {
1206     Value *Size = EmitScalarExpr(E->getArg(0));
1207     const TargetInfo &TI = getContext().getTargetInfo();
1208     // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
1209     unsigned SuitableAlignmentInBytes =
1210         CGM.getContext()
1211             .toCharUnitsFromBits(TI.getSuitableAlign())
1212             .getQuantity();
1213     AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
1214     AI->setAlignment(SuitableAlignmentInBytes);
1215     return RValue::get(AI);
1216   }
1217
1218   case Builtin::BI__builtin_alloca_with_align: {
1219     Value *Size = EmitScalarExpr(E->getArg(0));
1220     Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
1221     auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
1222     unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
1223     unsigned AlignmentInBytes =
1224         CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity();
1225     AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
1226     AI->setAlignment(AlignmentInBytes);
1227     return RValue::get(AI);
1228   }
1229
1230   case Builtin::BIbzero:
1231   case Builtin::BI__builtin_bzero: {
1232     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1233     Value *SizeVal = EmitScalarExpr(E->getArg(1));
1234     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1235                         E->getArg(0)->getExprLoc(), FD, 0);
1236     Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
1237     return RValue::get(Dest.getPointer());
1238   }
1239   case Builtin::BImemcpy:
1240   case Builtin::BI__builtin_memcpy: {
1241     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1242     Address Src = EmitPointerWithAlignment(E->getArg(1));
1243     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1244     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1245                         E->getArg(0)->getExprLoc(), FD, 0);
1246     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1247                         E->getArg(1)->getExprLoc(), FD, 1);
1248     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1249     return RValue::get(Dest.getPointer());
1250   }
1251
1252   case Builtin::BI__builtin_char_memchr:
1253     BuiltinID = Builtin::BI__builtin_memchr;
1254     break;
1255
1256   case Builtin::BI__builtin___memcpy_chk: {
1257     // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
1258     llvm::APSInt Size, DstSize;
1259     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1260         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1261       break;
1262     if (Size.ugt(DstSize))
1263       break;
1264     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1265     Address Src = EmitPointerWithAlignment(E->getArg(1));
1266     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1267     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1268     return RValue::get(Dest.getPointer());
1269   }
1270
1271   case Builtin::BI__builtin_objc_memmove_collectable: {
1272     Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
1273     Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
1274     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1275     CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
1276                                                   DestAddr, SrcAddr, SizeVal);
1277     return RValue::get(DestAddr.getPointer());
1278   }
1279
1280   case Builtin::BI__builtin___memmove_chk: {
1281     // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
1282     llvm::APSInt Size, DstSize;
1283     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1284         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1285       break;
1286     if (Size.ugt(DstSize))
1287       break;
1288     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1289     Address Src = EmitPointerWithAlignment(E->getArg(1));
1290     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1291     Builder.CreateMemMove(Dest, Src, SizeVal, false);
1292     return RValue::get(Dest.getPointer());
1293   }
1294
1295   case Builtin::BImemmove:
1296   case Builtin::BI__builtin_memmove: {
1297     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1298     Address Src = EmitPointerWithAlignment(E->getArg(1));
1299     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1300     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1301                         E->getArg(0)->getExprLoc(), FD, 0);
1302     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1303                         E->getArg(1)->getExprLoc(), FD, 1);
1304     Builder.CreateMemMove(Dest, Src, SizeVal, false);
1305     return RValue::get(Dest.getPointer());
1306   }
1307   case Builtin::BImemset:
1308   case Builtin::BI__builtin_memset: {
1309     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1310     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1311                                          Builder.getInt8Ty());
1312     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1313     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1314                         E->getArg(0)->getExprLoc(), FD, 0);
1315     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1316     return RValue::get(Dest.getPointer());
1317   }
1318   case Builtin::BI__builtin___memset_chk: {
1319     // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
1320     llvm::APSInt Size, DstSize;
1321     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1322         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1323       break;
1324     if (Size.ugt(DstSize))
1325       break;
1326     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1327     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1328                                          Builder.getInt8Ty());
1329     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1330     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1331     return RValue::get(Dest.getPointer());
1332   }
1333   case Builtin::BI__builtin_dwarf_cfa: {
1334     // The offset in bytes from the first argument to the CFA.
1335     //
1336     // Why on earth is this in the frontend?  Is there any reason at
1337     // all that the backend can't reasonably determine this while
1338     // lowering llvm.eh.dwarf.cfa()?
1339     //
1340     // TODO: If there's a satisfactory reason, add a target hook for
1341     // this instead of hard-coding 0, which is correct for most targets.
1342     int32_t Offset = 0;
1343
1344     Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
1345     return RValue::get(Builder.CreateCall(F,
1346                                       llvm::ConstantInt::get(Int32Ty, Offset)));
1347   }
1348   case Builtin::BI__builtin_return_address: {
1349     Value *Depth =
1350         CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
1351     Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1352     return RValue::get(Builder.CreateCall(F, Depth));
1353   }
1354   case Builtin::BI_ReturnAddress: {
1355     Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1356     return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
1357   }
1358   case Builtin::BI__builtin_frame_address: {
1359     Value *Depth =
1360         CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
1361     Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
1362     return RValue::get(Builder.CreateCall(F, Depth));
1363   }
1364   case Builtin::BI__builtin_extract_return_addr: {
1365     Value *Address = EmitScalarExpr(E->getArg(0));
1366     Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
1367     return RValue::get(Result);
1368   }
1369   case Builtin::BI__builtin_frob_return_addr: {
1370     Value *Address = EmitScalarExpr(E->getArg(0));
1371     Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
1372     return RValue::get(Result);
1373   }
1374   case Builtin::BI__builtin_dwarf_sp_column: {
1375     llvm::IntegerType *Ty
1376       = cast<llvm::IntegerType>(ConvertType(E->getType()));
1377     int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
1378     if (Column == -1) {
1379       CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
1380       return RValue::get(llvm::UndefValue::get(Ty));
1381     }
1382     return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
1383   }
1384   case Builtin::BI__builtin_init_dwarf_reg_size_table: {
1385     Value *Address = EmitScalarExpr(E->getArg(0));
1386     if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
1387       CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
1388     return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
1389   }
1390   case Builtin::BI__builtin_eh_return: {
1391     Value *Int = EmitScalarExpr(E->getArg(0));
1392     Value *Ptr = EmitScalarExpr(E->getArg(1));
1393
1394     llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
1395     assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
1396            "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
1397     Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
1398                                   ? Intrinsic::eh_return_i32
1399                                   : Intrinsic::eh_return_i64);
1400     Builder.CreateCall(F, {Int, Ptr});
1401     Builder.CreateUnreachable();
1402
1403     // We do need to preserve an insertion point.
1404     EmitBlock(createBasicBlock("builtin_eh_return.cont"));
1405
1406     return RValue::get(nullptr);
1407   }
1408   case Builtin::BI__builtin_unwind_init: {
1409     Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
1410     return RValue::get(Builder.CreateCall(F));
1411   }
1412   case Builtin::BI__builtin_extend_pointer: {
1413     // Extends a pointer to the size of an _Unwind_Word, which is
1414     // uint64_t on all platforms.  Generally this gets poked into a
1415     // register and eventually used as an address, so if the
1416     // addressing registers are wider than pointers and the platform
1417     // doesn't implicitly ignore high-order bits when doing
1418     // addressing, we need to make sure we zext / sext based on
1419     // the platform's expectations.
1420     //
1421     // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
1422
1423     // Cast the pointer to intptr_t.
1424     Value *Ptr = EmitScalarExpr(E->getArg(0));
1425     Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
1426
1427     // If that's 64 bits, we're done.
1428     if (IntPtrTy->getBitWidth() == 64)
1429       return RValue::get(Result);
1430
1431     // Otherwise, ask the codegen data what to do.
1432     if (getTargetHooks().extendPointerWithSExt())
1433       return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
1434     else
1435       return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
1436   }
1437   case Builtin::BI__builtin_setjmp: {
1438     // Buffer is a void**.
1439     Address Buf = EmitPointerWithAlignment(E->getArg(0));
1440
1441     // Store the frame pointer to the setjmp buffer.
1442     Value *FrameAddr =
1443       Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
1444                          ConstantInt::get(Int32Ty, 0));
1445     Builder.CreateStore(FrameAddr, Buf);
1446
1447     // Store the stack pointer to the setjmp buffer.
1448     Value *StackAddr =
1449         Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
1450     Address StackSaveSlot =
1451       Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize());
1452     Builder.CreateStore(StackAddr, StackSaveSlot);
1453
1454     // Call LLVM's EH setjmp, which is lightweight.
1455     Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
1456     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1457     return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
1458   }
1459   case Builtin::BI__builtin_longjmp: {
1460     Value *Buf = EmitScalarExpr(E->getArg(0));
1461     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1462
1463     // Call LLVM's EH longjmp, which is lightweight.
1464     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
1465
1466     // longjmp doesn't return; mark this as unreachable.
1467     Builder.CreateUnreachable();
1468
1469     // We do need to preserve an insertion point.
1470     EmitBlock(createBasicBlock("longjmp.cont"));
1471
1472     return RValue::get(nullptr);
1473   }
1474   case Builtin::BI__sync_fetch_and_add:
1475   case Builtin::BI__sync_fetch_and_sub:
1476   case Builtin::BI__sync_fetch_and_or:
1477   case Builtin::BI__sync_fetch_and_and:
1478   case Builtin::BI__sync_fetch_and_xor:
1479   case Builtin::BI__sync_fetch_and_nand:
1480   case Builtin::BI__sync_add_and_fetch:
1481   case Builtin::BI__sync_sub_and_fetch:
1482   case Builtin::BI__sync_and_and_fetch:
1483   case Builtin::BI__sync_or_and_fetch:
1484   case Builtin::BI__sync_xor_and_fetch:
1485   case Builtin::BI__sync_nand_and_fetch:
1486   case Builtin::BI__sync_val_compare_and_swap:
1487   case Builtin::BI__sync_bool_compare_and_swap:
1488   case Builtin::BI__sync_lock_test_and_set:
1489   case Builtin::BI__sync_lock_release:
1490   case Builtin::BI__sync_swap:
1491     llvm_unreachable("Shouldn't make it through sema");
1492   case Builtin::BI__sync_fetch_and_add_1:
1493   case Builtin::BI__sync_fetch_and_add_2:
1494   case Builtin::BI__sync_fetch_and_add_4:
1495   case Builtin::BI__sync_fetch_and_add_8:
1496   case Builtin::BI__sync_fetch_and_add_16:
1497     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
1498   case Builtin::BI__sync_fetch_and_sub_1:
1499   case Builtin::BI__sync_fetch_and_sub_2:
1500   case Builtin::BI__sync_fetch_and_sub_4:
1501   case Builtin::BI__sync_fetch_and_sub_8:
1502   case Builtin::BI__sync_fetch_and_sub_16:
1503     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
1504   case Builtin::BI__sync_fetch_and_or_1:
1505   case Builtin::BI__sync_fetch_and_or_2:
1506   case Builtin::BI__sync_fetch_and_or_4:
1507   case Builtin::BI__sync_fetch_and_or_8:
1508   case Builtin::BI__sync_fetch_and_or_16:
1509     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
1510   case Builtin::BI__sync_fetch_and_and_1:
1511   case Builtin::BI__sync_fetch_and_and_2:
1512   case Builtin::BI__sync_fetch_and_and_4:
1513   case Builtin::BI__sync_fetch_and_and_8:
1514   case Builtin::BI__sync_fetch_and_and_16:
1515     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
1516   case Builtin::BI__sync_fetch_and_xor_1:
1517   case Builtin::BI__sync_fetch_and_xor_2:
1518   case Builtin::BI__sync_fetch_and_xor_4:
1519   case Builtin::BI__sync_fetch_and_xor_8:
1520   case Builtin::BI__sync_fetch_and_xor_16:
1521     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
1522   case Builtin::BI__sync_fetch_and_nand_1:
1523   case Builtin::BI__sync_fetch_and_nand_2:
1524   case Builtin::BI__sync_fetch_and_nand_4:
1525   case Builtin::BI__sync_fetch_and_nand_8:
1526   case Builtin::BI__sync_fetch_and_nand_16:
1527     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
1528
1529   // Clang extensions: not overloaded yet.
1530   case Builtin::BI__sync_fetch_and_min:
1531     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
1532   case Builtin::BI__sync_fetch_and_max:
1533     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
1534   case Builtin::BI__sync_fetch_and_umin:
1535     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
1536   case Builtin::BI__sync_fetch_and_umax:
1537     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
1538
1539   case Builtin::BI__sync_add_and_fetch_1:
1540   case Builtin::BI__sync_add_and_fetch_2:
1541   case Builtin::BI__sync_add_and_fetch_4:
1542   case Builtin::BI__sync_add_and_fetch_8:
1543   case Builtin::BI__sync_add_and_fetch_16:
1544     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
1545                                 llvm::Instruction::Add);
1546   case Builtin::BI__sync_sub_and_fetch_1:
1547   case Builtin::BI__sync_sub_and_fetch_2:
1548   case Builtin::BI__sync_sub_and_fetch_4:
1549   case Builtin::BI__sync_sub_and_fetch_8:
1550   case Builtin::BI__sync_sub_and_fetch_16:
1551     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
1552                                 llvm::Instruction::Sub);
1553   case Builtin::BI__sync_and_and_fetch_1:
1554   case Builtin::BI__sync_and_and_fetch_2:
1555   case Builtin::BI__sync_and_and_fetch_4:
1556   case Builtin::BI__sync_and_and_fetch_8:
1557   case Builtin::BI__sync_and_and_fetch_16:
1558     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
1559                                 llvm::Instruction::And);
1560   case Builtin::BI__sync_or_and_fetch_1:
1561   case Builtin::BI__sync_or_and_fetch_2:
1562   case Builtin::BI__sync_or_and_fetch_4:
1563   case Builtin::BI__sync_or_and_fetch_8:
1564   case Builtin::BI__sync_or_and_fetch_16:
1565     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
1566                                 llvm::Instruction::Or);
1567   case Builtin::BI__sync_xor_and_fetch_1:
1568   case Builtin::BI__sync_xor_and_fetch_2:
1569   case Builtin::BI__sync_xor_and_fetch_4:
1570   case Builtin::BI__sync_xor_and_fetch_8:
1571   case Builtin::BI__sync_xor_and_fetch_16:
1572     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
1573                                 llvm::Instruction::Xor);
1574   case Builtin::BI__sync_nand_and_fetch_1:
1575   case Builtin::BI__sync_nand_and_fetch_2:
1576   case Builtin::BI__sync_nand_and_fetch_4:
1577   case Builtin::BI__sync_nand_and_fetch_8:
1578   case Builtin::BI__sync_nand_and_fetch_16:
1579     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
1580                                 llvm::Instruction::And, true);
1581
1582   case Builtin::BI__sync_val_compare_and_swap_1:
1583   case Builtin::BI__sync_val_compare_and_swap_2:
1584   case Builtin::BI__sync_val_compare_and_swap_4:
1585   case Builtin::BI__sync_val_compare_and_swap_8:
1586   case Builtin::BI__sync_val_compare_and_swap_16:
1587     return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
1588
1589   case Builtin::BI__sync_bool_compare_and_swap_1:
1590   case Builtin::BI__sync_bool_compare_and_swap_2:
1591   case Builtin::BI__sync_bool_compare_and_swap_4:
1592   case Builtin::BI__sync_bool_compare_and_swap_8:
1593   case Builtin::BI__sync_bool_compare_and_swap_16:
1594     return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
1595
1596   case Builtin::BI__sync_swap_1:
1597   case Builtin::BI__sync_swap_2:
1598   case Builtin::BI__sync_swap_4:
1599   case Builtin::BI__sync_swap_8:
1600   case Builtin::BI__sync_swap_16:
1601     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1602
1603   case Builtin::BI__sync_lock_test_and_set_1:
1604   case Builtin::BI__sync_lock_test_and_set_2:
1605   case Builtin::BI__sync_lock_test_and_set_4:
1606   case Builtin::BI__sync_lock_test_and_set_8:
1607   case Builtin::BI__sync_lock_test_and_set_16:
1608     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1609
1610   case Builtin::BI__sync_lock_release_1:
1611   case Builtin::BI__sync_lock_release_2:
1612   case Builtin::BI__sync_lock_release_4:
1613   case Builtin::BI__sync_lock_release_8:
1614   case Builtin::BI__sync_lock_release_16: {
1615     Value *Ptr = EmitScalarExpr(E->getArg(0));
1616     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
1617     CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
1618     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
1619                                              StoreSize.getQuantity() * 8);
1620     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
1621     llvm::StoreInst *Store =
1622       Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
1623                                  StoreSize);
1624     Store->setAtomic(llvm::AtomicOrdering::Release);
1625     return RValue::get(nullptr);
1626   }
1627
1628   case Builtin::BI__sync_synchronize: {
1629     // We assume this is supposed to correspond to a C++0x-style
1630     // sequentially-consistent fence (i.e. this is only usable for
1631     // synchonization, not device I/O or anything like that). This intrinsic
1632     // is really badly designed in the sense that in theory, there isn't
1633     // any way to safely use it... but in practice, it mostly works
1634     // to use it with non-atomic loads and stores to get acquire/release
1635     // semantics.
1636     Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
1637     return RValue::get(nullptr);
1638   }
1639
1640   case Builtin::BI__builtin_nontemporal_load:
1641     return RValue::get(EmitNontemporalLoad(*this, E));
1642   case Builtin::BI__builtin_nontemporal_store:
1643     return RValue::get(EmitNontemporalStore(*this, E));
1644   case Builtin::BI__c11_atomic_is_lock_free:
1645   case Builtin::BI__atomic_is_lock_free: {
1646     // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
1647     // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
1648     // _Atomic(T) is always properly-aligned.
1649     const char *LibCallName = "__atomic_is_lock_free";
1650     CallArgList Args;
1651     Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
1652              getContext().getSizeType());
1653     if (BuiltinID == Builtin::BI__atomic_is_lock_free)
1654       Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
1655                getContext().VoidPtrTy);
1656     else
1657       Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
1658                getContext().VoidPtrTy);
1659     const CGFunctionInfo &FuncInfo =
1660         CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
1661     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
1662     llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
1663     return EmitCall(FuncInfo, CGCallee::forDirect(Func),
1664                     ReturnValueSlot(), Args);
1665   }
1666
1667   case Builtin::BI__atomic_test_and_set: {
1668     // Look at the argument type to determine whether this is a volatile
1669     // operation. The parameter type is always volatile.
1670     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1671     bool Volatile =
1672         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1673
1674     Value *Ptr = EmitScalarExpr(E->getArg(0));
1675     unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
1676     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1677     Value *NewVal = Builder.getInt8(1);
1678     Value *Order = EmitScalarExpr(E->getArg(1));
1679     if (isa<llvm::ConstantInt>(Order)) {
1680       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1681       AtomicRMWInst *Result = nullptr;
1682       switch (ord) {
1683       case 0:  // memory_order_relaxed
1684       default: // invalid order
1685         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1686                                          llvm::AtomicOrdering::Monotonic);
1687         break;
1688       case 1: // memory_order_consume
1689       case 2: // memory_order_acquire
1690         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1691                                          llvm::AtomicOrdering::Acquire);
1692         break;
1693       case 3: // memory_order_release
1694         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1695                                          llvm::AtomicOrdering::Release);
1696         break;
1697       case 4: // memory_order_acq_rel
1698
1699         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1700                                          llvm::AtomicOrdering::AcquireRelease);
1701         break;
1702       case 5: // memory_order_seq_cst
1703         Result = Builder.CreateAtomicRMW(
1704             llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1705             llvm::AtomicOrdering::SequentiallyConsistent);
1706         break;
1707       }
1708       Result->setVolatile(Volatile);
1709       return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1710     }
1711
1712     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1713
1714     llvm::BasicBlock *BBs[5] = {
1715       createBasicBlock("monotonic", CurFn),
1716       createBasicBlock("acquire", CurFn),
1717       createBasicBlock("release", CurFn),
1718       createBasicBlock("acqrel", CurFn),
1719       createBasicBlock("seqcst", CurFn)
1720     };
1721     llvm::AtomicOrdering Orders[5] = {
1722         llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
1723         llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
1724         llvm::AtomicOrdering::SequentiallyConsistent};
1725
1726     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1727     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1728
1729     Builder.SetInsertPoint(ContBB);
1730     PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
1731
1732     for (unsigned i = 0; i < 5; ++i) {
1733       Builder.SetInsertPoint(BBs[i]);
1734       AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1735                                                    Ptr, NewVal, Orders[i]);
1736       RMW->setVolatile(Volatile);
1737       Result->addIncoming(RMW, BBs[i]);
1738       Builder.CreateBr(ContBB);
1739     }
1740
1741     SI->addCase(Builder.getInt32(0), BBs[0]);
1742     SI->addCase(Builder.getInt32(1), BBs[1]);
1743     SI->addCase(Builder.getInt32(2), BBs[1]);
1744     SI->addCase(Builder.getInt32(3), BBs[2]);
1745     SI->addCase(Builder.getInt32(4), BBs[3]);
1746     SI->addCase(Builder.getInt32(5), BBs[4]);
1747
1748     Builder.SetInsertPoint(ContBB);
1749     return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1750   }
1751
1752   case Builtin::BI__atomic_clear: {
1753     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1754     bool Volatile =
1755         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1756
1757     Address Ptr = EmitPointerWithAlignment(E->getArg(0));
1758     unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace();
1759     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1760     Value *NewVal = Builder.getInt8(0);
1761     Value *Order = EmitScalarExpr(E->getArg(1));
1762     if (isa<llvm::ConstantInt>(Order)) {
1763       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1764       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1765       switch (ord) {
1766       case 0:  // memory_order_relaxed
1767       default: // invalid order
1768         Store->setOrdering(llvm::AtomicOrdering::Monotonic);
1769         break;
1770       case 3:  // memory_order_release
1771         Store->setOrdering(llvm::AtomicOrdering::Release);
1772         break;
1773       case 5:  // memory_order_seq_cst
1774         Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
1775         break;
1776       }
1777       return RValue::get(nullptr);
1778     }
1779
1780     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1781
1782     llvm::BasicBlock *BBs[3] = {
1783       createBasicBlock("monotonic", CurFn),
1784       createBasicBlock("release", CurFn),
1785       createBasicBlock("seqcst", CurFn)
1786     };
1787     llvm::AtomicOrdering Orders[3] = {
1788         llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
1789         llvm::AtomicOrdering::SequentiallyConsistent};
1790
1791     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1792     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1793
1794     for (unsigned i = 0; i < 3; ++i) {
1795       Builder.SetInsertPoint(BBs[i]);
1796       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1797       Store->setOrdering(Orders[i]);
1798       Builder.CreateBr(ContBB);
1799     }
1800
1801     SI->addCase(Builder.getInt32(0), BBs[0]);
1802     SI->addCase(Builder.getInt32(3), BBs[1]);
1803     SI->addCase(Builder.getInt32(5), BBs[2]);
1804
1805     Builder.SetInsertPoint(ContBB);
1806     return RValue::get(nullptr);
1807   }
1808
1809   case Builtin::BI__atomic_thread_fence:
1810   case Builtin::BI__atomic_signal_fence:
1811   case Builtin::BI__c11_atomic_thread_fence:
1812   case Builtin::BI__c11_atomic_signal_fence: {
1813     llvm::SynchronizationScope Scope;
1814     if (BuiltinID == Builtin::BI__atomic_signal_fence ||
1815         BuiltinID == Builtin::BI__c11_atomic_signal_fence)
1816       Scope = llvm::SingleThread;
1817     else
1818       Scope = llvm::CrossThread;
1819     Value *Order = EmitScalarExpr(E->getArg(0));
1820     if (isa<llvm::ConstantInt>(Order)) {
1821       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1822       switch (ord) {
1823       case 0:  // memory_order_relaxed
1824       default: // invalid order
1825         break;
1826       case 1:  // memory_order_consume
1827       case 2:  // memory_order_acquire
1828         Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope);
1829         break;
1830       case 3:  // memory_order_release
1831         Builder.CreateFence(llvm::AtomicOrdering::Release, Scope);
1832         break;
1833       case 4:  // memory_order_acq_rel
1834         Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope);
1835         break;
1836       case 5:  // memory_order_seq_cst
1837         Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
1838                             Scope);
1839         break;
1840       }
1841       return RValue::get(nullptr);
1842     }
1843
1844     llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
1845     AcquireBB = createBasicBlock("acquire", CurFn);
1846     ReleaseBB = createBasicBlock("release", CurFn);
1847     AcqRelBB = createBasicBlock("acqrel", CurFn);
1848     SeqCstBB = createBasicBlock("seqcst", CurFn);
1849     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1850
1851     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1852     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
1853
1854     Builder.SetInsertPoint(AcquireBB);
1855     Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope);
1856     Builder.CreateBr(ContBB);
1857     SI->addCase(Builder.getInt32(1), AcquireBB);
1858     SI->addCase(Builder.getInt32(2), AcquireBB);
1859
1860     Builder.SetInsertPoint(ReleaseBB);
1861     Builder.CreateFence(llvm::AtomicOrdering::Release, Scope);
1862     Builder.CreateBr(ContBB);
1863     SI->addCase(Builder.getInt32(3), ReleaseBB);
1864
1865     Builder.SetInsertPoint(AcqRelBB);
1866     Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope);
1867     Builder.CreateBr(ContBB);
1868     SI->addCase(Builder.getInt32(4), AcqRelBB);
1869
1870     Builder.SetInsertPoint(SeqCstBB);
1871     Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, Scope);
1872     Builder.CreateBr(ContBB);
1873     SI->addCase(Builder.getInt32(5), SeqCstBB);
1874
1875     Builder.SetInsertPoint(ContBB);
1876     return RValue::get(nullptr);
1877   }
1878
1879     // Library functions with special handling.
1880   case Builtin::BIsqrt:
1881   case Builtin::BIsqrtf:
1882   case Builtin::BIsqrtl: {
1883     // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only
1884     // in finite- or unsafe-math mode (the intrinsic has different semantics
1885     // for handling negative numbers compared to the library function, so
1886     // -fmath-errno=0 is not enough).
1887     if (!FD->hasAttr<ConstAttr>())
1888       break;
1889     if (!(CGM.getCodeGenOpts().UnsafeFPMath ||
1890           CGM.getCodeGenOpts().NoNaNsFPMath))
1891       break;
1892     Value *Arg0 = EmitScalarExpr(E->getArg(0));
1893     llvm::Type *ArgType = Arg0->getType();
1894     Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType);
1895     return RValue::get(Builder.CreateCall(F, Arg0));
1896   }
1897
1898   case Builtin::BI__builtin_pow:
1899   case Builtin::BI__builtin_powf:
1900   case Builtin::BI__builtin_powl:
1901   case Builtin::BIpow:
1902   case Builtin::BIpowf:
1903   case Builtin::BIpowl: {
1904     // Transform a call to pow* into a @llvm.pow.* intrinsic call.
1905     if (!FD->hasAttr<ConstAttr>())
1906       break;
1907     Value *Base = EmitScalarExpr(E->getArg(0));
1908     Value *Exponent = EmitScalarExpr(E->getArg(1));
1909     llvm::Type *ArgType = Base->getType();
1910     Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
1911     return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
1912   }
1913
1914   case Builtin::BIfma:
1915   case Builtin::BIfmaf:
1916   case Builtin::BIfmal:
1917   case Builtin::BI__builtin_fma:
1918   case Builtin::BI__builtin_fmaf:
1919   case Builtin::BI__builtin_fmal: {
1920     // Rewrite fma to intrinsic.
1921     Value *FirstArg = EmitScalarExpr(E->getArg(0));
1922     llvm::Type *ArgType = FirstArg->getType();
1923     Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType);
1924     return RValue::get(
1925         Builder.CreateCall(F, {FirstArg, EmitScalarExpr(E->getArg(1)),
1926                                EmitScalarExpr(E->getArg(2))}));
1927   }
1928
1929   case Builtin::BI__builtin_signbit:
1930   case Builtin::BI__builtin_signbitf:
1931   case Builtin::BI__builtin_signbitl: {
1932     return RValue::get(
1933         Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
1934                            ConvertType(E->getType())));
1935   }
1936   case Builtin::BI__builtin_annotation: {
1937     llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
1938     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
1939                                       AnnVal->getType());
1940
1941     // Get the annotation string, go through casts. Sema requires this to be a
1942     // non-wide string literal, potentially casted, so the cast<> is safe.
1943     const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
1944     StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
1945     return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
1946   }
1947   case Builtin::BI__builtin_addcb:
1948   case Builtin::BI__builtin_addcs:
1949   case Builtin::BI__builtin_addc:
1950   case Builtin::BI__builtin_addcl:
1951   case Builtin::BI__builtin_addcll:
1952   case Builtin::BI__builtin_subcb:
1953   case Builtin::BI__builtin_subcs:
1954   case Builtin::BI__builtin_subc:
1955   case Builtin::BI__builtin_subcl:
1956   case Builtin::BI__builtin_subcll: {
1957
1958     // We translate all of these builtins from expressions of the form:
1959     //   int x = ..., y = ..., carryin = ..., carryout, result;
1960     //   result = __builtin_addc(x, y, carryin, &carryout);
1961     //
1962     // to LLVM IR of the form:
1963     //
1964     //   %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
1965     //   %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
1966     //   %carry1 = extractvalue {i32, i1} %tmp1, 1
1967     //   %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
1968     //                                                       i32 %carryin)
1969     //   %result = extractvalue {i32, i1} %tmp2, 0
1970     //   %carry2 = extractvalue {i32, i1} %tmp2, 1
1971     //   %tmp3 = or i1 %carry1, %carry2
1972     //   %tmp4 = zext i1 %tmp3 to i32
1973     //   store i32 %tmp4, i32* %carryout
1974
1975     // Scalarize our inputs.
1976     llvm::Value *X = EmitScalarExpr(E->getArg(0));
1977     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
1978     llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
1979     Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
1980
1981     // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
1982     llvm::Intrinsic::ID IntrinsicId;
1983     switch (BuiltinID) {
1984     default: llvm_unreachable("Unknown multiprecision builtin id.");
1985     case Builtin::BI__builtin_addcb:
1986     case Builtin::BI__builtin_addcs:
1987     case Builtin::BI__builtin_addc:
1988     case Builtin::BI__builtin_addcl:
1989     case Builtin::BI__builtin_addcll:
1990       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
1991       break;
1992     case Builtin::BI__builtin_subcb:
1993     case Builtin::BI__builtin_subcs:
1994     case Builtin::BI__builtin_subc:
1995     case Builtin::BI__builtin_subcl:
1996     case Builtin::BI__builtin_subcll:
1997       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
1998       break;
1999     }
2000
2001     // Construct our resulting LLVM IR expression.
2002     llvm::Value *Carry1;
2003     llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
2004                                               X, Y, Carry1);
2005     llvm::Value *Carry2;
2006     llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
2007                                               Sum1, Carryin, Carry2);
2008     llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
2009                                                X->getType());
2010     Builder.CreateStore(CarryOut, CarryOutPtr);
2011     return RValue::get(Sum2);
2012   }
2013
2014   case Builtin::BI__builtin_add_overflow:
2015   case Builtin::BI__builtin_sub_overflow:
2016   case Builtin::BI__builtin_mul_overflow: {
2017     const clang::Expr *LeftArg = E->getArg(0);
2018     const clang::Expr *RightArg = E->getArg(1);
2019     const clang::Expr *ResultArg = E->getArg(2);
2020
2021     clang::QualType ResultQTy =
2022         ResultArg->getType()->castAs<PointerType>()->getPointeeType();
2023
2024     WidthAndSignedness LeftInfo =
2025         getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
2026     WidthAndSignedness RightInfo =
2027         getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
2028     WidthAndSignedness ResultInfo =
2029         getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
2030     WidthAndSignedness EncompassingInfo =
2031         EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
2032
2033     llvm::Type *EncompassingLLVMTy =
2034         llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
2035
2036     llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
2037
2038     llvm::Intrinsic::ID IntrinsicId;
2039     switch (BuiltinID) {
2040     default:
2041       llvm_unreachable("Unknown overflow builtin id.");
2042     case Builtin::BI__builtin_add_overflow:
2043       IntrinsicId = EncompassingInfo.Signed
2044                         ? llvm::Intrinsic::sadd_with_overflow
2045                         : llvm::Intrinsic::uadd_with_overflow;
2046       break;
2047     case Builtin::BI__builtin_sub_overflow:
2048       IntrinsicId = EncompassingInfo.Signed
2049                         ? llvm::Intrinsic::ssub_with_overflow
2050                         : llvm::Intrinsic::usub_with_overflow;
2051       break;
2052     case Builtin::BI__builtin_mul_overflow:
2053       IntrinsicId = EncompassingInfo.Signed
2054                         ? llvm::Intrinsic::smul_with_overflow
2055                         : llvm::Intrinsic::umul_with_overflow;
2056       break;
2057     }
2058
2059     llvm::Value *Left = EmitScalarExpr(LeftArg);
2060     llvm::Value *Right = EmitScalarExpr(RightArg);
2061     Address ResultPtr = EmitPointerWithAlignment(ResultArg);
2062
2063     // Extend each operand to the encompassing type.
2064     Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
2065     Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
2066
2067     // Perform the operation on the extended values.
2068     llvm::Value *Overflow, *Result;
2069     Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
2070
2071     if (EncompassingInfo.Width > ResultInfo.Width) {
2072       // The encompassing type is wider than the result type, so we need to
2073       // truncate it.
2074       llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
2075
2076       // To see if the truncation caused an overflow, we will extend
2077       // the result and then compare it to the original result.
2078       llvm::Value *ResultTruncExt = Builder.CreateIntCast(
2079           ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
2080       llvm::Value *TruncationOverflow =
2081           Builder.CreateICmpNE(Result, ResultTruncExt);
2082
2083       Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
2084       Result = ResultTrunc;
2085     }
2086
2087     // Finally, store the result using the pointer.
2088     bool isVolatile =
2089       ResultArg->getType()->getPointeeType().isVolatileQualified();
2090     Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
2091
2092     return RValue::get(Overflow);
2093   }
2094
2095   case Builtin::BI__builtin_uadd_overflow:
2096   case Builtin::BI__builtin_uaddl_overflow:
2097   case Builtin::BI__builtin_uaddll_overflow:
2098   case Builtin::BI__builtin_usub_overflow:
2099   case Builtin::BI__builtin_usubl_overflow:
2100   case Builtin::BI__builtin_usubll_overflow:
2101   case Builtin::BI__builtin_umul_overflow:
2102   case Builtin::BI__builtin_umull_overflow:
2103   case Builtin::BI__builtin_umulll_overflow:
2104   case Builtin::BI__builtin_sadd_overflow:
2105   case Builtin::BI__builtin_saddl_overflow:
2106   case Builtin::BI__builtin_saddll_overflow:
2107   case Builtin::BI__builtin_ssub_overflow:
2108   case Builtin::BI__builtin_ssubl_overflow:
2109   case Builtin::BI__builtin_ssubll_overflow:
2110   case Builtin::BI__builtin_smul_overflow:
2111   case Builtin::BI__builtin_smull_overflow:
2112   case Builtin::BI__builtin_smulll_overflow: {
2113
2114     // We translate all of these builtins directly to the relevant llvm IR node.
2115
2116     // Scalarize our inputs.
2117     llvm::Value *X = EmitScalarExpr(E->getArg(0));
2118     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
2119     Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
2120
2121     // Decide which of the overflow intrinsics we are lowering to:
2122     llvm::Intrinsic::ID IntrinsicId;
2123     switch (BuiltinID) {
2124     default: llvm_unreachable("Unknown overflow builtin id.");
2125     case Builtin::BI__builtin_uadd_overflow:
2126     case Builtin::BI__builtin_uaddl_overflow:
2127     case Builtin::BI__builtin_uaddll_overflow:
2128       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
2129       break;
2130     case Builtin::BI__builtin_usub_overflow:
2131     case Builtin::BI__builtin_usubl_overflow:
2132     case Builtin::BI__builtin_usubll_overflow:
2133       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
2134       break;
2135     case Builtin::BI__builtin_umul_overflow:
2136     case Builtin::BI__builtin_umull_overflow:
2137     case Builtin::BI__builtin_umulll_overflow:
2138       IntrinsicId = llvm::Intrinsic::umul_with_overflow;
2139       break;
2140     case Builtin::BI__builtin_sadd_overflow:
2141     case Builtin::BI__builtin_saddl_overflow:
2142     case Builtin::BI__builtin_saddll_overflow:
2143       IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
2144       break;
2145     case Builtin::BI__builtin_ssub_overflow:
2146     case Builtin::BI__builtin_ssubl_overflow:
2147     case Builtin::BI__builtin_ssubll_overflow:
2148       IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
2149       break;
2150     case Builtin::BI__builtin_smul_overflow:
2151     case Builtin::BI__builtin_smull_overflow:
2152     case Builtin::BI__builtin_smulll_overflow:
2153       IntrinsicId = llvm::Intrinsic::smul_with_overflow;
2154       break;
2155     }
2156
2157
2158     llvm::Value *Carry;
2159     llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
2160     Builder.CreateStore(Sum, SumOutPtr);
2161
2162     return RValue::get(Carry);
2163   }
2164   case Builtin::BI__builtin_addressof:
2165     return RValue::get(EmitLValue(E->getArg(0)).getPointer());
2166   case Builtin::BI__builtin_operator_new:
2167     return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
2168                                     E->getArg(0), false);
2169   case Builtin::BI__builtin_operator_delete:
2170     return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
2171                                     E->getArg(0), true);
2172   case Builtin::BI__noop:
2173     // __noop always evaluates to an integer literal zero.
2174     return RValue::get(ConstantInt::get(IntTy, 0));
2175   case Builtin::BI__builtin_call_with_static_chain: {
2176     const CallExpr *Call = cast<CallExpr>(E->getArg(0));
2177     const Expr *Chain = E->getArg(1);
2178     return EmitCall(Call->getCallee()->getType(),
2179                     EmitCallee(Call->getCallee()), Call, ReturnValue,
2180                     EmitScalarExpr(Chain));
2181   }
2182   case Builtin::BI_InterlockedExchange8:
2183   case Builtin::BI_InterlockedExchange16:
2184   case Builtin::BI_InterlockedExchange:
2185   case Builtin::BI_InterlockedExchangePointer:
2186     return RValue::get(
2187         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
2188   case Builtin::BI_InterlockedCompareExchangePointer: {
2189     llvm::Type *RTy;
2190     llvm::IntegerType *IntType =
2191       IntegerType::get(getLLVMContext(),
2192                        getContext().getTypeSize(E->getType()));
2193     llvm::Type *IntPtrType = IntType->getPointerTo();
2194
2195     llvm::Value *Destination =
2196       Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType);
2197
2198     llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
2199     RTy = Exchange->getType();
2200     Exchange = Builder.CreatePtrToInt(Exchange, IntType);
2201
2202     llvm::Value *Comparand =
2203       Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
2204
2205     auto Result =
2206         Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
2207                                     AtomicOrdering::SequentiallyConsistent,
2208                                     AtomicOrdering::SequentiallyConsistent);
2209     Result->setVolatile(true);
2210
2211     return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
2212                                                                          0),
2213                                               RTy));
2214   }
2215   case Builtin::BI_InterlockedCompareExchange8:
2216   case Builtin::BI_InterlockedCompareExchange16:
2217   case Builtin::BI_InterlockedCompareExchange:
2218   case Builtin::BI_InterlockedCompareExchange64: {
2219     AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg(
2220         EmitScalarExpr(E->getArg(0)),
2221         EmitScalarExpr(E->getArg(2)),
2222         EmitScalarExpr(E->getArg(1)),
2223         AtomicOrdering::SequentiallyConsistent,
2224         AtomicOrdering::SequentiallyConsistent);
2225       CXI->setVolatile(true);
2226       return RValue::get(Builder.CreateExtractValue(CXI, 0));
2227   }
2228   case Builtin::BI_InterlockedIncrement16:
2229   case Builtin::BI_InterlockedIncrement:
2230     return RValue::get(
2231         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
2232   case Builtin::BI_InterlockedDecrement16:
2233   case Builtin::BI_InterlockedDecrement:
2234     return RValue::get(
2235         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
2236   case Builtin::BI_InterlockedAnd8:
2237   case Builtin::BI_InterlockedAnd16:
2238   case Builtin::BI_InterlockedAnd:
2239     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
2240   case Builtin::BI_InterlockedExchangeAdd8:
2241   case Builtin::BI_InterlockedExchangeAdd16:
2242   case Builtin::BI_InterlockedExchangeAdd:
2243     return RValue::get(
2244         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
2245   case Builtin::BI_InterlockedExchangeSub8:
2246   case Builtin::BI_InterlockedExchangeSub16:
2247   case Builtin::BI_InterlockedExchangeSub:
2248     return RValue::get(
2249         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
2250   case Builtin::BI_InterlockedOr8:
2251   case Builtin::BI_InterlockedOr16:
2252   case Builtin::BI_InterlockedOr:
2253     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
2254   case Builtin::BI_InterlockedXor8:
2255   case Builtin::BI_InterlockedXor16:
2256   case Builtin::BI_InterlockedXor:
2257     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
2258   case Builtin::BI_interlockedbittestandset:
2259     return RValue::get(
2260         EmitMSVCBuiltinExpr(MSVCIntrin::_interlockedbittestandset, E));
2261
2262   case Builtin::BI__exception_code:
2263   case Builtin::BI_exception_code:
2264     return RValue::get(EmitSEHExceptionCode());
2265   case Builtin::BI__exception_info:
2266   case Builtin::BI_exception_info:
2267     return RValue::get(EmitSEHExceptionInfo());
2268   case Builtin::BI__abnormal_termination:
2269   case Builtin::BI_abnormal_termination:
2270     return RValue::get(EmitSEHAbnormalTermination());
2271   case Builtin::BI_setjmpex: {
2272     if (getTarget().getTriple().isOSMSVCRT()) {
2273       llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2274       llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
2275           getLLVMContext(), llvm::AttributeList::FunctionIndex,
2276           llvm::Attribute::ReturnsTwice);
2277       llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction(
2278           llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2279           "_setjmpex", ReturnsTwiceAttr, /*Local=*/true);
2280       llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2281           EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2282       llvm::Value *FrameAddr =
2283           Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2284                              ConstantInt::get(Int32Ty, 0));
2285       llvm::Value *Args[] = {Buf, FrameAddr};
2286       llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args);
2287       CS.setAttributes(ReturnsTwiceAttr);
2288       return RValue::get(CS.getInstruction());
2289     }
2290     break;
2291   }
2292   case Builtin::BI_setjmp: {
2293     if (getTarget().getTriple().isOSMSVCRT()) {
2294       llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
2295           getLLVMContext(), llvm::AttributeList::FunctionIndex,
2296           llvm::Attribute::ReturnsTwice);
2297       llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2298           EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2299       llvm::CallSite CS;
2300       if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
2301         llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy};
2302         llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction(
2303             llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true),
2304             "_setjmp3", ReturnsTwiceAttr, /*Local=*/true);
2305         llvm::Value *Count = ConstantInt::get(IntTy, 0);
2306         llvm::Value *Args[] = {Buf, Count};
2307         CS = EmitRuntimeCallOrInvoke(SetJmp3, Args);
2308       } else {
2309         llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2310         llvm::Constant *SetJmp = CGM.CreateRuntimeFunction(
2311             llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2312             "_setjmp", ReturnsTwiceAttr, /*Local=*/true);
2313         llvm::Value *FrameAddr =
2314             Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2315                                ConstantInt::get(Int32Ty, 0));
2316         llvm::Value *Args[] = {Buf, FrameAddr};
2317         CS = EmitRuntimeCallOrInvoke(SetJmp, Args);
2318       }
2319       CS.setAttributes(ReturnsTwiceAttr);
2320       return RValue::get(CS.getInstruction());
2321     }
2322     break;
2323   }
2324
2325   case Builtin::BI__GetExceptionInfo: {
2326     if (llvm::GlobalVariable *GV =
2327             CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
2328       return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
2329     break;
2330   }
2331
2332   case Builtin::BI__fastfail:
2333     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
2334
2335   case Builtin::BI__builtin_coro_size: {
2336     auto & Context = getContext();
2337     auto SizeTy = Context.getSizeType();
2338     auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy));
2339     Value *F = CGM.getIntrinsic(Intrinsic::coro_size, T);
2340     return RValue::get(Builder.CreateCall(F));
2341   }
2342
2343   case Builtin::BI__builtin_coro_id:
2344     return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
2345   case Builtin::BI__builtin_coro_promise:
2346     return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
2347   case Builtin::BI__builtin_coro_resume:
2348     return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
2349   case Builtin::BI__builtin_coro_frame:
2350     return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
2351   case Builtin::BI__builtin_coro_free:
2352     return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
2353   case Builtin::BI__builtin_coro_destroy:
2354     return EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
2355   case Builtin::BI__builtin_coro_done:
2356     return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
2357   case Builtin::BI__builtin_coro_alloc:
2358     return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
2359   case Builtin::BI__builtin_coro_begin:
2360     return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
2361   case Builtin::BI__builtin_coro_end:
2362     return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
2363   case Builtin::BI__builtin_coro_suspend:
2364     return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
2365   case Builtin::BI__builtin_coro_param:
2366     return EmitCoroutineIntrinsic(E, Intrinsic::coro_param);
2367
2368   // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
2369   case Builtin::BIread_pipe:
2370   case Builtin::BIwrite_pipe: {
2371     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2372           *Arg1 = EmitScalarExpr(E->getArg(1));
2373     CGOpenCLRuntime OpenCLRT(CGM);
2374     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2375     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2376
2377     // Type of the generic packet parameter.
2378     unsigned GenericAS =
2379         getContext().getTargetAddressSpace(LangAS::opencl_generic);
2380     llvm::Type *I8PTy = llvm::PointerType::get(
2381         llvm::Type::getInt8Ty(getLLVMContext()), GenericAS);
2382
2383     // Testing which overloaded version we should generate the call for.
2384     if (2U == E->getNumArgs()) {
2385       const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
2386                                                              : "__write_pipe_2";
2387       // Creating a generic function type to be able to call with any builtin or
2388       // user defined type.
2389       llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
2390       llvm::FunctionType *FTy = llvm::FunctionType::get(
2391           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2392       Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
2393       return RValue::get(
2394           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2395                              {Arg0, BCast, PacketSize, PacketAlign}));
2396     } else {
2397       assert(4 == E->getNumArgs() &&
2398              "Illegal number of parameters to pipe function");
2399       const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
2400                                                              : "__write_pipe_4";
2401
2402       llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
2403                               Int32Ty, Int32Ty};
2404       Value *Arg2 = EmitScalarExpr(E->getArg(2)),
2405             *Arg3 = EmitScalarExpr(E->getArg(3));
2406       llvm::FunctionType *FTy = llvm::FunctionType::get(
2407           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2408       Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
2409       // We know the third argument is an integer type, but we may need to cast
2410       // it to i32.
2411       if (Arg2->getType() != Int32Ty)
2412         Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
2413       return RValue::get(Builder.CreateCall(
2414           CGM.CreateRuntimeFunction(FTy, Name),
2415           {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
2416     }
2417   }
2418   // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
2419   // functions
2420   case Builtin::BIreserve_read_pipe:
2421   case Builtin::BIreserve_write_pipe:
2422   case Builtin::BIwork_group_reserve_read_pipe:
2423   case Builtin::BIwork_group_reserve_write_pipe:
2424   case Builtin::BIsub_group_reserve_read_pipe:
2425   case Builtin::BIsub_group_reserve_write_pipe: {
2426     // Composing the mangled name for the function.
2427     const char *Name;
2428     if (BuiltinID == Builtin::BIreserve_read_pipe)
2429       Name = "__reserve_read_pipe";
2430     else if (BuiltinID == Builtin::BIreserve_write_pipe)
2431       Name = "__reserve_write_pipe";
2432     else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
2433       Name = "__work_group_reserve_read_pipe";
2434     else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
2435       Name = "__work_group_reserve_write_pipe";
2436     else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
2437       Name = "__sub_group_reserve_read_pipe";
2438     else
2439       Name = "__sub_group_reserve_write_pipe";
2440
2441     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2442           *Arg1 = EmitScalarExpr(E->getArg(1));
2443     llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
2444     CGOpenCLRuntime OpenCLRT(CGM);
2445     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2446     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2447
2448     // Building the generic function prototype.
2449     llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
2450     llvm::FunctionType *FTy = llvm::FunctionType::get(
2451         ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2452     // We know the second argument is an integer type, but we may need to cast
2453     // it to i32.
2454     if (Arg1->getType() != Int32Ty)
2455       Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
2456     return RValue::get(
2457         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2458                            {Arg0, Arg1, PacketSize, PacketAlign}));
2459   }
2460   // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
2461   // functions
2462   case Builtin::BIcommit_read_pipe:
2463   case Builtin::BIcommit_write_pipe:
2464   case Builtin::BIwork_group_commit_read_pipe:
2465   case Builtin::BIwork_group_commit_write_pipe:
2466   case Builtin::BIsub_group_commit_read_pipe:
2467   case Builtin::BIsub_group_commit_write_pipe: {
2468     const char *Name;
2469     if (BuiltinID == Builtin::BIcommit_read_pipe)
2470       Name = "__commit_read_pipe";
2471     else if (BuiltinID == Builtin::BIcommit_write_pipe)
2472       Name = "__commit_write_pipe";
2473     else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
2474       Name = "__work_group_commit_read_pipe";
2475     else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
2476       Name = "__work_group_commit_write_pipe";
2477     else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
2478       Name = "__sub_group_commit_read_pipe";
2479     else
2480       Name = "__sub_group_commit_write_pipe";
2481
2482     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2483           *Arg1 = EmitScalarExpr(E->getArg(1));
2484     CGOpenCLRuntime OpenCLRT(CGM);
2485     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2486     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2487
2488     // Building the generic function prototype.
2489     llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
2490     llvm::FunctionType *FTy =
2491         llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
2492                                 llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2493
2494     return RValue::get(
2495         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2496                            {Arg0, Arg1, PacketSize, PacketAlign}));
2497   }
2498   // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
2499   case Builtin::BIget_pipe_num_packets:
2500   case Builtin::BIget_pipe_max_packets: {
2501     const char *Name;
2502     if (BuiltinID == Builtin::BIget_pipe_num_packets)
2503       Name = "__get_pipe_num_packets";
2504     else
2505       Name = "__get_pipe_max_packets";
2506
2507     // Building the generic function prototype.
2508     Value *Arg0 = EmitScalarExpr(E->getArg(0));
2509     CGOpenCLRuntime OpenCLRT(CGM);
2510     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2511     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2512     llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
2513     llvm::FunctionType *FTy = llvm::FunctionType::get(
2514         Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2515
2516     return RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2517                                           {Arg0, PacketSize, PacketAlign}));
2518   }
2519
2520   // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
2521   case Builtin::BIto_global:
2522   case Builtin::BIto_local:
2523   case Builtin::BIto_private: {
2524     auto Arg0 = EmitScalarExpr(E->getArg(0));
2525     auto NewArgT = llvm::PointerType::get(Int8Ty,
2526       CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
2527     auto NewRetT = llvm::PointerType::get(Int8Ty,
2528       CGM.getContext().getTargetAddressSpace(
2529         E->getType()->getPointeeType().getAddressSpace()));
2530     auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
2531     llvm::Value *NewArg;
2532     if (Arg0->getType()->getPointerAddressSpace() !=
2533         NewArgT->getPointerAddressSpace())
2534       NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
2535     else
2536       NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
2537     auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
2538     auto NewCall =
2539         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
2540     return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
2541       ConvertType(E->getType())));
2542   }
2543
2544   // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
2545   // It contains four different overload formats specified in Table 6.13.17.1.
2546   case Builtin::BIenqueue_kernel: {
2547     StringRef Name; // Generated function call name
2548     unsigned NumArgs = E->getNumArgs();
2549
2550     llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
2551     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2552         getContext().getTargetAddressSpace(LangAS::opencl_generic));
2553
2554     llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
2555     llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
2556     LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
2557     llvm::Value *Range = NDRangeL.getAddress().getPointer();
2558     llvm::Type *RangeTy = NDRangeL.getAddress().getType();
2559
2560     if (NumArgs == 4) {
2561       // The most basic form of the call with parameters:
2562       // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
2563       Name = "__enqueue_kernel_basic";
2564       llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy};
2565       llvm::FunctionType *FTy = llvm::FunctionType::get(
2566           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false);
2567
2568       llvm::Value *Block = Builder.CreatePointerCast(
2569           EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy);
2570
2571       AttrBuilder B;
2572       B.addAttribute(Attribute::ByVal);
2573       llvm::AttributeList ByValAttrSet =
2574           llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
2575
2576       auto RTCall =
2577           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
2578                              {Queue, Flags, Range, Block});
2579       RTCall->setAttributes(ByValAttrSet);
2580       return RValue::get(RTCall);
2581     }
2582     assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
2583
2584     // Could have events and/or vaargs.
2585     if (E->getArg(3)->getType()->isBlockPointerType()) {
2586       // No events passed, but has variadic arguments.
2587       Name = "__enqueue_kernel_vaargs";
2588       llvm::Value *Block = Builder.CreatePointerCast(
2589           EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy);
2590       // Create a vector of the arguments, as well as a constant value to
2591       // express to the runtime the number of variadic arguments.
2592       std::vector<llvm::Value *> Args = {Queue, Flags, Range, Block,
2593                                          ConstantInt::get(IntTy, NumArgs - 4)};
2594       std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy,
2595                                           GenericVoidPtrTy, IntTy};
2596
2597       // Each of the following arguments specifies the size of the corresponding
2598       // argument passed to the enqueued block.
2599       for (unsigned I = 4/*Position of the first size arg*/; I < NumArgs; ++I)
2600         Args.push_back(
2601             Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy));
2602
2603       llvm::FunctionType *FTy = llvm::FunctionType::get(
2604           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
2605       return RValue::get(
2606           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2607                              llvm::ArrayRef<llvm::Value *>(Args)));
2608     }
2609     // Any calls now have event arguments passed.
2610     if (NumArgs >= 7) {
2611       llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy);
2612       llvm::Type *EventPtrTy = EventTy->getPointerTo(
2613           CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
2614
2615       llvm::Value *NumEvents =
2616           Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
2617       llvm::Value *EventList =
2618           E->getArg(4)->getType()->isArrayType()
2619               ? EmitArrayToPointerDecay(E->getArg(4)).getPointer()
2620               : EmitScalarExpr(E->getArg(4));
2621       llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5));
2622       // Convert to generic address space.
2623       EventList = Builder.CreatePointerCast(EventList, EventPtrTy);
2624       ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy);
2625       llvm::Value *Block = Builder.CreatePointerCast(
2626           EmitScalarExpr(E->getArg(6)), GenericVoidPtrTy);
2627
2628       std::vector<llvm::Type *> ArgTys = {
2629           QueueTy,    Int32Ty,    RangeTy,         Int32Ty,
2630           EventPtrTy, EventPtrTy, GenericVoidPtrTy};
2631
2632       std::vector<llvm::Value *> Args = {Queue,     Flags,    Range, NumEvents,
2633                                          EventList, ClkEvent, Block};
2634
2635       if (NumArgs == 7) {
2636         // Has events but no variadics.
2637         Name = "__enqueue_kernel_basic_events";
2638         llvm::FunctionType *FTy = llvm::FunctionType::get(
2639             Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2640         return RValue::get(
2641             Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2642                                llvm::ArrayRef<llvm::Value *>(Args)));
2643       }
2644       // Has event info and variadics
2645       // Pass the number of variadics to the runtime function too.
2646       Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
2647       ArgTys.push_back(Int32Ty);
2648       Name = "__enqueue_kernel_events_vaargs";
2649
2650       // Each of the following arguments specifies the size of the corresponding
2651       // argument passed to the enqueued block.
2652       for (unsigned I = 7/*Position of the first size arg*/; I < NumArgs; ++I)
2653         Args.push_back(
2654             Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy));
2655
2656       llvm::FunctionType *FTy = llvm::FunctionType::get(
2657           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
2658       return RValue::get(
2659           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2660                              llvm::ArrayRef<llvm::Value *>(Args)));
2661     }
2662     LLVM_FALLTHROUGH;
2663   }
2664   // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
2665   // parameter.
2666   case Builtin::BIget_kernel_work_group_size: {
2667     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2668         getContext().getTargetAddressSpace(LangAS::opencl_generic));
2669     Value *Arg = EmitScalarExpr(E->getArg(0));
2670     Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy);
2671     return RValue::get(Builder.CreateCall(
2672         CGM.CreateRuntimeFunction(
2673             llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false),
2674             "__get_kernel_work_group_size_impl"),
2675         Arg));
2676   }
2677   case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
2678     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2679         getContext().getTargetAddressSpace(LangAS::opencl_generic));
2680     Value *Arg = EmitScalarExpr(E->getArg(0));
2681     Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy);
2682     return RValue::get(Builder.CreateCall(
2683         CGM.CreateRuntimeFunction(
2684             llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false),
2685             "__get_kernel_preferred_work_group_multiple_impl"),
2686         Arg));
2687   }
2688   case Builtin::BIprintf:
2689     if (getTarget().getTriple().isNVPTX())
2690       return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue);
2691     break;
2692   case Builtin::BI__builtin_canonicalize:
2693   case Builtin::BI__builtin_canonicalizef:
2694   case Builtin::BI__builtin_canonicalizel:
2695     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
2696
2697   case Builtin::BI__builtin_thread_pointer: {
2698     if (!getContext().getTargetInfo().isTLSSupported())
2699       CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
2700     // Fall through - it's already mapped to the intrinsic by GCCBuiltin.
2701     break;
2702   }
2703   case Builtin::BI__builtin_os_log_format: {
2704     assert(E->getNumArgs() >= 2 &&
2705            "__builtin_os_log_format takes at least 2 arguments");
2706     analyze_os_log::OSLogBufferLayout Layout;
2707     analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout);
2708     Address BufAddr = EmitPointerWithAlignment(E->getArg(0));
2709     // Ignore argument 1, the format string. It is not currently used.
2710     CharUnits Offset;
2711     Builder.CreateStore(
2712         Builder.getInt8(Layout.getSummaryByte()),
2713         Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
2714     Builder.CreateStore(
2715         Builder.getInt8(Layout.getNumArgsByte()),
2716         Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
2717
2718     llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
2719     for (const auto &Item : Layout.Items) {
2720       Builder.CreateStore(
2721           Builder.getInt8(Item.getDescriptorByte()),
2722           Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
2723       Builder.CreateStore(
2724           Builder.getInt8(Item.getSizeByte()),
2725           Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
2726       Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset);
2727       if (const Expr *TheExpr = Item.getExpr()) {
2728         Addr = Builder.CreateElementBitCast(
2729             Addr, ConvertTypeForMem(TheExpr->getType()));
2730         // Check if this is a retainable type.
2731         if (TheExpr->getType()->isObjCRetainableType()) {
2732           assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
2733                  "Only scalar can be a ObjC retainable type");
2734           llvm::Value *SV = EmitScalarExpr(TheExpr, /*Ignore*/ false);
2735           RValue RV = RValue::get(SV);
2736           LValue LV = MakeAddrLValue(Addr, TheExpr->getType());
2737           EmitStoreThroughLValue(RV, LV);
2738           // Check if the object is constant, if not, save it in
2739           // RetainableOperands.
2740           if (!isa<Constant>(SV))
2741             RetainableOperands.push_back(SV);
2742         } else {
2743           EmitAnyExprToMem(TheExpr, Addr, Qualifiers(), /*isInit*/ true);
2744         }
2745       } else {
2746         Addr = Builder.CreateElementBitCast(Addr, Int32Ty);
2747         Builder.CreateStore(
2748             Builder.getInt32(Item.getConstValue().getQuantity()), Addr);
2749       }
2750       Offset += Item.size();
2751     }
2752
2753     // Push a clang.arc.use cleanup for each object in RetainableOperands. The
2754     // cleanup will cause the use to appear after the final log call, keeping
2755     // the object valid while it's held in the log buffer.  Note that if there's
2756     // a release cleanup on the object, it will already be active; since
2757     // cleanups are emitted in reverse order, the use will occur before the
2758     // object is released.
2759     if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount &&
2760         CGM.getCodeGenOpts().OptimizationLevel != 0)
2761       for (llvm::Value *object : RetainableOperands)
2762         pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), object);
2763
2764     return RValue::get(BufAddr.getPointer());
2765   }
2766
2767   case Builtin::BI__builtin_os_log_format_buffer_size: {
2768     analyze_os_log::OSLogBufferLayout Layout;
2769     analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout);
2770     return RValue::get(ConstantInt::get(ConvertType(E->getType()),
2771                                         Layout.size().getQuantity()));
2772   }
2773
2774   case Builtin::BI__xray_customevent: {
2775     if (!ShouldXRayInstrumentFunction())
2776       return RValue::getIgnored();
2777     if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>()) {
2778       if (XRayAttr->neverXRayInstrument())
2779         return RValue::getIgnored();
2780     }
2781     Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
2782     auto FTy = F->getFunctionType();
2783     auto Arg0 = E->getArg(0);
2784     auto Arg0Val = EmitScalarExpr(Arg0);
2785     auto Arg0Ty = Arg0->getType();
2786     auto PTy0 = FTy->getParamType(0);
2787     if (PTy0 != Arg0Val->getType()) {
2788       if (Arg0Ty->isArrayType())
2789         Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer();
2790       else
2791         Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
2792     }
2793     auto Arg1 = EmitScalarExpr(E->getArg(1));
2794     auto PTy1 = FTy->getParamType(1);
2795     if (PTy1 != Arg1->getType())
2796       Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
2797     return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
2798   }
2799   }
2800
2801   // If this is an alias for a lib function (e.g. __builtin_sin), emit
2802   // the call using the normal call path, but using the unmangled
2803   // version of the function name.
2804   if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
2805     return emitLibraryCall(*this, FD, E,
2806                            CGM.getBuiltinLibFunction(FD, BuiltinID));
2807
2808   // If this is a predefined lib function (e.g. malloc), emit the call
2809   // using exactly the normal call path.
2810   if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
2811     return emitLibraryCall(*this, FD, E,
2812                       cast<llvm::Constant>(EmitScalarExpr(E->getCallee())));
2813
2814   // Check that a call to a target specific builtin has the correct target
2815   // features.
2816   // This is down here to avoid non-target specific builtins, however, if
2817   // generic builtins start to require generic target features then we
2818   // can move this up to the beginning of the function.
2819   checkTargetFeatures(E, FD);
2820
2821   // See if we have a target specific intrinsic.
2822   const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
2823   Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
2824   StringRef Prefix =
2825       llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
2826   if (!Prefix.empty()) {
2827     IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name);
2828     // NOTE we dont need to perform a compatibility flag check here since the
2829     // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
2830     // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
2831     if (IntrinsicID == Intrinsic::not_intrinsic)
2832       IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
2833   }
2834
2835   if (IntrinsicID != Intrinsic::not_intrinsic) {
2836     SmallVector<Value*, 16> Args;
2837
2838     // Find out if any arguments are required to be integer constant
2839     // expressions.
2840     unsigned ICEArguments = 0;
2841     ASTContext::GetBuiltinTypeError Error;
2842     getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
2843     assert(Error == ASTContext::GE_None && "Should not codegen an error");
2844
2845     Function *F = CGM.getIntrinsic(IntrinsicID);
2846     llvm::FunctionType *FTy = F->getFunctionType();
2847
2848     for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
2849       Value *ArgValue;
2850       // If this is a normal argument, just emit it as a scalar.
2851       if ((ICEArguments & (1 << i)) == 0) {
2852         ArgValue = EmitScalarExpr(E->getArg(i));
2853       } else {
2854         // If this is required to be a constant, constant fold it so that we
2855         // know that the generated intrinsic gets a ConstantInt.
2856         llvm::APSInt Result;
2857         bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
2858         assert(IsConst && "Constant arg isn't actually constant?");
2859         (void)IsConst;
2860         ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
2861       }
2862
2863       // If the intrinsic arg type is different from the builtin arg type
2864       // we need to do a bit cast.
2865       llvm::Type *PTy = FTy->getParamType(i);
2866       if (PTy != ArgValue->getType()) {
2867         assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
2868                "Must be able to losslessly bit cast to param");
2869         ArgValue = Builder.CreateBitCast(ArgValue, PTy);
2870       }
2871
2872       Args.push_back(ArgValue);
2873     }
2874
2875     Value *V = Builder.CreateCall(F, Args);
2876     QualType BuiltinRetType = E->getType();
2877
2878     llvm::Type *RetTy = VoidTy;
2879     if (!BuiltinRetType->isVoidType())
2880       RetTy = ConvertType(BuiltinRetType);
2881
2882     if (RetTy != V->getType()) {
2883       assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
2884              "Must be able to losslessly bit cast result type");
2885       V = Builder.CreateBitCast(V, RetTy);
2886     }
2887
2888     return RValue::get(V);
2889   }
2890
2891   // See if we have a target specific builtin that needs to be lowered.
2892   if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
2893     return RValue::get(V);
2894
2895   ErrorUnsupported(E, "builtin function");
2896
2897   // Unknown builtin, for now just dump it out and return undef.
2898   return GetUndefRValue(E->getType());
2899 }
2900
2901 static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
2902                                         unsigned BuiltinID, const CallExpr *E,
2903                                         llvm::Triple::ArchType Arch) {
2904   switch (Arch) {
2905   case llvm::Triple::arm:
2906   case llvm::Triple::armeb:
2907   case llvm::Triple::thumb:
2908   case llvm::Triple::thumbeb:
2909     return CGF->EmitARMBuiltinExpr(BuiltinID, E);
2910   case llvm::Triple::aarch64:
2911   case llvm::Triple::aarch64_be:
2912     return CGF->EmitAArch64BuiltinExpr(BuiltinID, E);
2913   case llvm::Triple::x86:
2914   case llvm::Triple::x86_64:
2915     return CGF->EmitX86BuiltinExpr(BuiltinID, E);
2916   case llvm::Triple::ppc:
2917   case llvm::Triple::ppc64:
2918   case llvm::Triple::ppc64le:
2919     return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
2920   case llvm::Triple::r600:
2921   case llvm::Triple::amdgcn:
2922     return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
2923   case llvm::Triple::systemz:
2924     return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
2925   case llvm::Triple::nvptx:
2926   case llvm::Triple::nvptx64:
2927     return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
2928   case llvm::Triple::wasm32:
2929   case llvm::Triple::wasm64:
2930     return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
2931   default:
2932     return nullptr;
2933   }
2934 }
2935
2936 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
2937                                               const CallExpr *E) {
2938   if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
2939     assert(getContext().getAuxTargetInfo() && "Missing aux target info");
2940     return EmitTargetArchBuiltinExpr(
2941         this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
2942         getContext().getAuxTargetInfo()->getTriple().getArch());
2943   }
2944
2945   return EmitTargetArchBuiltinExpr(this, BuiltinID, E,
2946                                    getTarget().getTriple().getArch());
2947 }
2948
2949 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
2950                                      NeonTypeFlags TypeFlags,
2951                                      bool V1Ty=false) {
2952   int IsQuad = TypeFlags.isQuad();
2953   switch (TypeFlags.getEltType()) {
2954   case NeonTypeFlags::Int8:
2955   case NeonTypeFlags::Poly8:
2956     return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
2957   case NeonTypeFlags::Int16:
2958   case NeonTypeFlags::Poly16:
2959   case NeonTypeFlags::Float16:
2960     return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
2961   case NeonTypeFlags::Int32:
2962     return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
2963   case NeonTypeFlags::Int64:
2964   case NeonTypeFlags::Poly64:
2965     return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
2966   case NeonTypeFlags::Poly128:
2967     // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
2968     // There is a lot of i128 and f128 API missing.
2969     // so we use v16i8 to represent poly128 and get pattern matched.
2970     return llvm::VectorType::get(CGF->Int8Ty, 16);
2971   case NeonTypeFlags::Float32:
2972     return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
2973   case NeonTypeFlags::Float64:
2974     return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
2975   }
2976   llvm_unreachable("Unknown vector element type!");
2977 }
2978
2979 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
2980                                           NeonTypeFlags IntTypeFlags) {
2981   int IsQuad = IntTypeFlags.isQuad();
2982   switch (IntTypeFlags.getEltType()) {
2983   case NeonTypeFlags::Int32:
2984     return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
2985   case NeonTypeFlags::Int64:
2986     return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad));
2987   default:
2988     llvm_unreachable("Type can't be converted to floating-point!");
2989   }
2990 }
2991
2992 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
2993   unsigned nElts = V->getType()->getVectorNumElements();
2994   Value* SV = llvm::ConstantVector::getSplat(nElts, C);
2995   return Builder.CreateShuffleVector(V, V, SV, "lane");
2996 }
2997
2998 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
2999                                      const char *name,
3000                                      unsigned shift, bool rightshift) {
3001   unsigned j = 0;
3002   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
3003        ai != ae; ++ai, ++j)
3004     if (shift > 0 && shift == j)
3005       Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
3006     else
3007       Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
3008
3009   return Builder.CreateCall(F, Ops, name);
3010 }
3011
3012 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
3013                                             bool neg) {
3014   int SV = cast<ConstantInt>(V)->getSExtValue();
3015   return ConstantInt::get(Ty, neg ? -SV : SV);
3016 }
3017
3018 // \brief Right-shift a vector by a constant.
3019 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
3020                                           llvm::Type *Ty, bool usgn,
3021                                           const char *name) {
3022   llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
3023
3024   int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
3025   int EltSize = VTy->getScalarSizeInBits();
3026
3027   Vec = Builder.CreateBitCast(Vec, Ty);
3028
3029   // lshr/ashr are undefined when the shift amount is equal to the vector
3030   // element size.
3031   if (ShiftAmt == EltSize) {
3032     if (usgn) {
3033       // Right-shifting an unsigned value by its size yields 0.
3034       return llvm::ConstantAggregateZero::get(VTy);
3035     } else {
3036       // Right-shifting a signed value by its size is equivalent
3037       // to a shift of size-1.
3038       --ShiftAmt;
3039       Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
3040     }
3041   }
3042
3043   Shift = EmitNeonShiftVector(Shift, Ty, false);
3044   if (usgn)
3045     return Builder.CreateLShr(Vec, Shift, name);
3046   else
3047     return Builder.CreateAShr(Vec, Shift, name);
3048 }
3049
3050 enum {
3051   AddRetType = (1 << 0),
3052   Add1ArgType = (1 << 1),
3053   Add2ArgTypes = (1 << 2),
3054
3055   VectorizeRetType = (1 << 3),
3056   VectorizeArgTypes = (1 << 4),
3057
3058   InventFloatType = (1 << 5),
3059   UnsignedAlts = (1 << 6),
3060
3061   Use64BitVectors = (1 << 7),
3062   Use128BitVectors = (1 << 8),
3063
3064   Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
3065   VectorRet = AddRetType | VectorizeRetType,
3066   VectorRetGetArgs01 =
3067       AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
3068   FpCmpzModifiers =
3069       AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
3070 };
3071
3072 namespace {
3073 struct NeonIntrinsicInfo {
3074   const char *NameHint;
3075   unsigned BuiltinID;
3076   unsigned LLVMIntrinsic;
3077   unsigned AltLLVMIntrinsic;
3078   unsigned TypeModifier;
3079
3080   bool operator<(unsigned RHSBuiltinID) const {
3081     return BuiltinID < RHSBuiltinID;
3082   }
3083   bool operator<(const NeonIntrinsicInfo &TE) const {
3084     return BuiltinID < TE.BuiltinID;
3085   }
3086 };
3087 } // end anonymous namespace
3088
3089 #define NEONMAP0(NameBase) \
3090   { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
3091
3092 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
3093   { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
3094       Intrinsic::LLVMIntrinsic, 0, TypeModifier }
3095
3096 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
3097   { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
3098       Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
3099       TypeModifier }
3100
3101 static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
3102   NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
3103   NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
3104   NEONMAP1(vabs_v, arm_neon_vabs, 0),
3105   NEONMAP1(vabsq_v, arm_neon_vabs, 0),
3106   NEONMAP0(vaddhn_v),
3107   NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
3108   NEONMAP1(vaeseq_v, arm_neon_aese, 0),
3109   NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
3110   NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
3111   NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
3112   NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
3113   NEONMAP1(vcage_v, arm_neon_vacge, 0),
3114   NEONMAP1(vcageq_v, arm_neon_vacge, 0),
3115   NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
3116   NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
3117   NEONMAP1(vcale_v, arm_neon_vacge, 0),
3118   NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
3119   NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
3120   NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
3121   NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
3122   NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
3123   NEONMAP1(vclz_v, ctlz, Add1ArgType),
3124   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
3125   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
3126   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
3127   NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
3128   NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
3129   NEONMAP0(vcvt_f32_v),
3130   NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3131   NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
3132   NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
3133   NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
3134   NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
3135   NEONMAP0(vcvt_s32_v),
3136   NEONMAP0(vcvt_s64_v),
3137   NEONMAP0(vcvt_u32_v),
3138   NEONMAP0(vcvt_u64_v),
3139   NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
3140   NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
3141   NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
3142   NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
3143   NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
3144   NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
3145   NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
3146   NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
3147   NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
3148   NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
3149   NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
3150   NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
3151   NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
3152   NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
3153   NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
3154   NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
3155   NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
3156   NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
3157   NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
3158   NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
3159   NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
3160   NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
3161   NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
3162   NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
3163   NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
3164   NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
3165   NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
3166   NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
3167   NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
3168   NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
3169   NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
3170   NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
3171   NEONMAP0(vcvtq_f32_v),
3172   NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3173   NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
3174   NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
3175   NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
3176   NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
3177   NEONMAP0(vcvtq_s32_v),
3178   NEONMAP0(vcvtq_s64_v),
3179   NEONMAP0(vcvtq_u32_v),
3180   NEONMAP0(vcvtq_u64_v),
3181   NEONMAP0(vext_v),
3182   NEONMAP0(vextq_v),
3183   NEONMAP0(vfma_v),
3184   NEONMAP0(vfmaq_v),
3185   NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
3186   NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
3187   NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
3188   NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
3189   NEONMAP0(vld1_dup_v),
3190   NEONMAP1(vld1_v, arm_neon_vld1, 0),
3191   NEONMAP0(vld1q_dup_v),
3192   NEONMAP1(vld1q_v, arm_neon_vld1, 0),
3193   NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
3194   NEONMAP1(vld2_v, arm_neon_vld2, 0),
3195   NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
3196   NEONMAP1(vld2q_v, arm_neon_vld2, 0),
3197   NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
3198   NEONMAP1(vld3_v, arm_neon_vld3, 0),
3199   NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
3200   NEONMAP1(vld3q_v, arm_neon_vld3, 0),
3201   NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
3202   NEONMAP1(vld4_v, arm_neon_vld4, 0),
3203   NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
3204   NEONMAP1(vld4q_v, arm_neon_vld4, 0),
3205   NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
3206   NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
3207   NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
3208   NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
3209   NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
3210   NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
3211   NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
3212   NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
3213   NEONMAP0(vmovl_v),
3214   NEONMAP0(vmovn_v),
3215   NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
3216   NEONMAP0(vmull_v),
3217   NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
3218   NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
3219   NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
3220   NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
3221   NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
3222   NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
3223   NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
3224   NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
3225   NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
3226   NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
3227   NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
3228   NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
3229   NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
3230   NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0),
3231   NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0),
3232   NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
3233   NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
3234   NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
3235   NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
3236   NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
3237   NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
3238   NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
3239   NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
3240   NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
3241   NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
3242   NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
3243   NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
3244   NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
3245   NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
3246   NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
3247   NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
3248   NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
3249   NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
3250   NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
3251   NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
3252   NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
3253   NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
3254   NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
3255   NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
3256   NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
3257   NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
3258   NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
3259   NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
3260   NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
3261   NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
3262   NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
3263   NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
3264   NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
3265   NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
3266   NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
3267   NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
3268   NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
3269   NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
3270   NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
3271   NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
3272   NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
3273   NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
3274   NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
3275   NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
3276   NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
3277   NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
3278   NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
3279   NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
3280   NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
3281   NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
3282   NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
3283   NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
3284   NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
3285   NEONMAP0(vshl_n_v),
3286   NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
3287   NEONMAP0(vshll_n_v),
3288   NEONMAP0(vshlq_n_v),
3289   NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
3290   NEONMAP0(vshr_n_v),
3291   NEONMAP0(vshrn_n_v),
3292   NEONMAP0(vshrq_n_v),
3293   NEONMAP1(vst1_v, arm_neon_vst1, 0),
3294   NEONMAP1(vst1q_v, arm_neon_vst1, 0),
3295   NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
3296   NEONMAP1(vst2_v, arm_neon_vst2, 0),
3297   NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
3298   NEONMAP1(vst2q_v, arm_neon_vst2, 0),
3299   NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
3300   NEONMAP1(vst3_v, arm_neon_vst3, 0),
3301   NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
3302   NEONMAP1(vst3q_v, arm_neon_vst3, 0),
3303   NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
3304   NEONMAP1(vst4_v, arm_neon_vst4, 0),
3305   NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
3306   NEONMAP1(vst4q_v, arm_neon_vst4, 0),
3307   NEONMAP0(vsubhn_v),
3308   NEONMAP0(vtrn_v),
3309   NEONMAP0(vtrnq_v),
3310   NEONMAP0(vtst_v),
3311   NEONMAP0(vtstq_v),
3312   NEONMAP0(vuzp_v),
3313   NEONMAP0(vuzpq_v),
3314   NEONMAP0(vzip_v),
3315   NEONMAP0(vzipq_v)
3316 };
3317
3318 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
3319   NEONMAP1(vabs_v, aarch64_neon_abs, 0),
3320   NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
3321   NEONMAP0(vaddhn_v),
3322   NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
3323   NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
3324   NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
3325   NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
3326   NEONMAP1(vcage_v, aarch64_neon_facge, 0),
3327   NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
3328   NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
3329   NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
3330   NEONMAP1(vcale_v, aarch64_neon_facge, 0),
3331   NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
3332   NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
3333   NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
3334   NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
3335   NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
3336   NEONMAP1(vclz_v, ctlz, Add1ArgType),
3337   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
3338   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
3339   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
3340   NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
3341   NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
3342   NEONMAP0(vcvt_f32_v),
3343   NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3344   NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3345   NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
3346   NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
3347   NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
3348   NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
3349   NEONMAP0(vcvtq_f32_v),
3350   NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3351   NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3352   NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
3353   NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
3354   NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
3355   NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
3356   NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
3357   NEONMAP0(vext_v),
3358   NEONMAP0(vextq_v),
3359   NEONMAP0(vfma_v),
3360   NEONMAP0(vfmaq_v),
3361   NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
3362   NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
3363   NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
3364   NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
3365   NEONMAP0(vmovl_v),
3366   NEONMAP0(vmovn_v),
3367   NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
3368   NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
3369   NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
3370   NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
3371   NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
3372   NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
3373   NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
3374   NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
3375   NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
3376   NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
3377   NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
3378   NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
3379   NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
3380   NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
3381   NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
3382   NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
3383   NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
3384   NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
3385   NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
3386   NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
3387   NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
3388   NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
3389   NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
3390   NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
3391   NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
3392   NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
3393   NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
3394   NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
3395   NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
3396   NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
3397   NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
3398   NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
3399   NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
3400   NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
3401   NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
3402   NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
3403   NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
3404   NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
3405   NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
3406   NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
3407   NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
3408   NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
3409   NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
3410   NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
3411   NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
3412   NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
3413   NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
3414   NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
3415   NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
3416   NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
3417   NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
3418   NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
3419   NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
3420   NEONMAP0(vshl_n_v),
3421   NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3422   NEONMAP0(vshll_n_v),
3423   NEONMAP0(vshlq_n_v),
3424   NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3425   NEONMAP0(vshr_n_v),
3426   NEONMAP0(vshrn_n_v),
3427   NEONMAP0(vshrq_n_v),
3428   NEONMAP0(vsubhn_v),
3429   NEONMAP0(vtst_v),
3430   NEONMAP0(vtstq_v),
3431 };
3432
3433 static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
3434   NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
3435   NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
3436   NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
3437   NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3438   NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3439   NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3440   NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3441   NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3442   NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3443   NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3444   NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3445   NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
3446   NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3447   NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
3448   NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3449   NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3450   NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3451   NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3452   NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3453   NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3454   NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3455   NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3456   NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3457   NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3458   NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3459   NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3460   NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3461   NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3462   NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3463   NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3464   NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3465   NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3466   NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3467   NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3468   NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3469   NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3470   NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3471   NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3472   NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3473   NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3474   NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3475   NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3476   NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3477   NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3478   NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3479   NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3480   NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3481   NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3482   NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
3483   NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3484   NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3485   NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3486   NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3487   NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3488   NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3489   NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3490   NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3491   NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3492   NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3493   NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3494   NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3495   NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3496   NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3497   NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3498   NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3499   NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3500   NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3501   NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3502   NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3503   NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
3504   NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
3505   NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
3506   NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3507   NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3508   NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3509   NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3510   NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3511   NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3512   NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3513   NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3514   NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3515   NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3516   NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3517   NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
3518   NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3519   NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
3520   NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3521   NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3522   NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
3523   NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
3524   NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3525   NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3526   NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
3527   NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
3528   NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
3529   NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
3530   NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
3531   NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
3532   NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
3533   NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
3534   NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3535   NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3536   NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3537   NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3538   NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
3539   NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3540   NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3541   NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3542   NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
3543   NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3544   NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
3545   NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
3546   NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
3547   NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3548   NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3549   NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
3550   NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
3551   NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3552   NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3553   NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
3554   NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
3555   NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
3556   NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
3557   NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3558   NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3559   NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3560   NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3561   NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
3562   NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3563   NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3564   NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3565   NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3566   NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3567   NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3568   NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
3569   NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
3570   NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3571   NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3572   NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3573   NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3574   NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
3575   NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
3576   NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
3577   NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
3578   NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3579   NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3580   NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
3581   NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
3582   NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
3583   NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3584   NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3585   NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3586   NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3587   NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
3588   NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3589   NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3590   NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3591   NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3592   NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
3593   NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
3594   NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3595   NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3596   NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
3597   NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
3598   NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
3599   NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
3600   NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
3601   NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
3602   NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
3603   NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
3604   NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
3605   NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
3606   NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
3607   NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
3608   NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
3609   NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
3610   NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
3611   NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
3612   NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
3613   NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
3614   NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
3615   NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
3616   NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3617   NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
3618   NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3619   NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
3620   NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
3621   NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
3622   NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3623   NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
3624   NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3625   NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
3626 };
3627
3628 #undef NEONMAP0
3629 #undef NEONMAP1
3630 #undef NEONMAP2
3631
3632 static bool NEONSIMDIntrinsicsProvenSorted = false;
3633
3634 static bool AArch64SIMDIntrinsicsProvenSorted = false;
3635 static bool AArch64SISDIntrinsicsProvenSorted = false;
3636
3637
3638 static const NeonIntrinsicInfo *
3639 findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap,
3640                        unsigned BuiltinID, bool &MapProvenSorted) {
3641
3642 #ifndef NDEBUG
3643   if (!MapProvenSorted) {
3644     assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap)));
3645     MapProvenSorted = true;
3646   }
3647 #endif
3648
3649   const NeonIntrinsicInfo *Builtin =
3650       std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID);
3651
3652   if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
3653     return Builtin;
3654
3655   return nullptr;
3656 }
3657
3658 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
3659                                                    unsigned Modifier,
3660                                                    llvm::Type *ArgType,
3661                                                    const CallExpr *E) {
3662   int VectorSize = 0;
3663   if (Modifier & Use64BitVectors)
3664     VectorSize = 64;
3665   else if (Modifier & Use128BitVectors)
3666     VectorSize = 128;
3667
3668   // Return type.
3669   SmallVector<llvm::Type *, 3> Tys;
3670   if (Modifier & AddRetType) {
3671     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
3672     if (Modifier & VectorizeRetType)
3673       Ty = llvm::VectorType::get(
3674           Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
3675
3676     Tys.push_back(Ty);
3677   }
3678
3679   // Arguments.
3680   if (Modifier & VectorizeArgTypes) {
3681     int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
3682     ArgType = llvm::VectorType::get(ArgType, Elts);
3683   }
3684
3685   if (Modifier & (Add1ArgType | Add2ArgTypes))
3686     Tys.push_back(ArgType);
3687
3688   if (Modifier & Add2ArgTypes)
3689     Tys.push_back(ArgType);
3690
3691   if (Modifier & InventFloatType)
3692     Tys.push_back(FloatTy);
3693
3694   return CGM.getIntrinsic(IntrinsicID, Tys);
3695 }
3696
3697 static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF,
3698                                             const NeonIntrinsicInfo &SISDInfo,
3699                                             SmallVectorImpl<Value *> &Ops,
3700                                             const CallExpr *E) {
3701   unsigned BuiltinID = SISDInfo.BuiltinID;
3702   unsigned int Int = SISDInfo.LLVMIntrinsic;
3703   unsigned Modifier = SISDInfo.TypeModifier;
3704   const char *s = SISDInfo.NameHint;
3705
3706   switch (BuiltinID) {
3707   case NEON::BI__builtin_neon_vcled_s64:
3708   case NEON::BI__builtin_neon_vcled_u64:
3709   case NEON::BI__builtin_neon_vcles_f32:
3710   case NEON::BI__builtin_neon_vcled_f64:
3711   case NEON::BI__builtin_neon_vcltd_s64:
3712   case NEON::BI__builtin_neon_vcltd_u64:
3713   case NEON::BI__builtin_neon_vclts_f32:
3714   case NEON::BI__builtin_neon_vcltd_f64:
3715   case NEON::BI__builtin_neon_vcales_f32:
3716   case NEON::BI__builtin_neon_vcaled_f64:
3717   case NEON::BI__builtin_neon_vcalts_f32:
3718   case NEON::BI__builtin_neon_vcaltd_f64:
3719     // Only one direction of comparisons actually exist, cmle is actually a cmge
3720     // with swapped operands. The table gives us the right intrinsic but we
3721     // still need to do the swap.
3722     std::swap(Ops[0], Ops[1]);
3723     break;
3724   }
3725
3726   assert(Int && "Generic code assumes a valid intrinsic");
3727
3728   // Determine the type(s) of this overloaded AArch64 intrinsic.
3729   const Expr *Arg = E->getArg(0);
3730   llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
3731   Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
3732
3733   int j = 0;
3734   ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
3735   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
3736        ai != ae; ++ai, ++j) {
3737     llvm::Type *ArgTy = ai->getType();
3738     if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
3739              ArgTy->getPrimitiveSizeInBits())
3740       continue;
3741
3742     assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
3743     // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
3744     // it before inserting.
3745     Ops[j] =
3746         CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType());
3747     Ops[j] =
3748         CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
3749   }
3750
3751   Value *Result = CGF.EmitNeonCall(F, Ops, s);
3752   llvm::Type *ResultType = CGF.ConvertType(E->getType());
3753   if (ResultType->getPrimitiveSizeInBits() <
3754       Result->getType()->getPrimitiveSizeInBits())
3755     return CGF.Builder.CreateExtractElement(Result, C0);
3756
3757   return CGF.Builder.CreateBitCast(Result, ResultType, s);
3758 }
3759
3760 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
3761     unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
3762     const char *NameHint, unsigned Modifier, const CallExpr *E,
3763     SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1) {
3764   // Get the last argument, which specifies the vector type.
3765   llvm::APSInt NeonTypeConst;
3766   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
3767   if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
3768     return nullptr;
3769
3770   // Determine the type of this overloaded NEON intrinsic.
3771   NeonTypeFlags Type(NeonTypeConst.getZExtValue());
3772   bool Usgn = Type.isUnsigned();
3773   bool Quad = Type.isQuad();
3774
3775   llvm::VectorType *VTy = GetNeonType(this, Type);
3776   llvm::Type *Ty = VTy;
3777   if (!Ty)
3778     return nullptr;
3779
3780   auto getAlignmentValue32 = [&](Address addr) -> Value* {
3781     return Builder.getInt32(addr.getAlignment().getQuantity());
3782   };
3783
3784   unsigned Int = LLVMIntrinsic;
3785   if ((Modifier & UnsignedAlts) && !Usgn)
3786     Int = AltLLVMIntrinsic;
3787
3788   switch (BuiltinID) {
3789   default: break;
3790   case NEON::BI__builtin_neon_vabs_v:
3791   case NEON::BI__builtin_neon_vabsq_v:
3792     if (VTy->getElementType()->isFloatingPointTy())
3793       return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
3794     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
3795   case NEON::BI__builtin_neon_vaddhn_v: {
3796     llvm::VectorType *SrcTy =
3797         llvm::VectorType::getExtendedElementVectorType(VTy);
3798
3799     // %sum = add <4 x i32> %lhs, %rhs
3800     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3801     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
3802     Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
3803
3804     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
3805     Constant *ShiftAmt =
3806         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
3807     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
3808
3809     // %res = trunc <4 x i32> %high to <4 x i16>
3810     return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
3811   }
3812   case NEON::BI__builtin_neon_vcale_v:
3813   case NEON::BI__builtin_neon_vcaleq_v:
3814   case NEON::BI__builtin_neon_vcalt_v:
3815   case NEON::BI__builtin_neon_vcaltq_v:
3816     std::swap(Ops[0], Ops[1]);
3817     LLVM_FALLTHROUGH;
3818   case NEON::BI__builtin_neon_vcage_v:
3819   case NEON::BI__builtin_neon_vcageq_v:
3820   case NEON::BI__builtin_neon_vcagt_v:
3821   case NEON::BI__builtin_neon_vcagtq_v: {
3822     llvm::Type *VecFlt = llvm::VectorType::get(
3823         VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy,
3824         VTy->getNumElements());
3825     llvm::Type *Tys[] = { VTy, VecFlt };
3826     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3827     return EmitNeonCall(F, Ops, NameHint);
3828   }
3829   case NEON::BI__builtin_neon_vclz_v:
3830   case NEON::BI__builtin_neon_vclzq_v:
3831     // We generate target-independent intrinsic, which needs a second argument
3832     // for whether or not clz of zero is undefined; on ARM it isn't.
3833     Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
3834     break;
3835   case NEON::BI__builtin_neon_vcvt_f32_v:
3836   case NEON::BI__builtin_neon_vcvtq_f32_v:
3837     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3838     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad));
3839     return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
3840                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
3841   case NEON::BI__builtin_neon_vcvt_n_f32_v:
3842   case NEON::BI__builtin_neon_vcvt_n_f64_v:
3843   case NEON::BI__builtin_neon_vcvtq_n_f32_v:
3844   case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
3845     llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
3846     Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
3847     Function *F = CGM.getIntrinsic(Int, Tys);
3848     return EmitNeonCall(F, Ops, "vcvt_n");
3849   }
3850   case NEON::BI__builtin_neon_vcvt_n_s32_v:
3851   case NEON::BI__builtin_neon_vcvt_n_u32_v:
3852   case NEON::BI__builtin_neon_vcvt_n_s64_v:
3853   case NEON::BI__builtin_neon_vcvt_n_u64_v:
3854   case NEON::BI__builtin_neon_vcvtq_n_s32_v:
3855   case NEON::BI__builtin_neon_vcvtq_n_u32_v:
3856   case NEON::BI__builtin_neon_vcvtq_n_s64_v:
3857   case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
3858     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
3859     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3860     return EmitNeonCall(F, Ops, "vcvt_n");
3861   }
3862   case NEON::BI__builtin_neon_vcvt_s32_v:
3863   case NEON::BI__builtin_neon_vcvt_u32_v:
3864   case NEON::BI__builtin_neon_vcvt_s64_v:
3865   case NEON::BI__builtin_neon_vcvt_u64_v:
3866   case NEON::BI__builtin_neon_vcvtq_s32_v:
3867   case NEON::BI__builtin_neon_vcvtq_u32_v:
3868   case NEON::BI__builtin_neon_vcvtq_s64_v:
3869   case NEON::BI__builtin_neon_vcvtq_u64_v: {
3870     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
3871     return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
3872                 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
3873   }
3874   case NEON::BI__builtin_neon_vcvta_s32_v:
3875   case NEON::BI__builtin_neon_vcvta_s64_v:
3876   case NEON::BI__builtin_neon_vcvta_u32_v:
3877   case NEON::BI__builtin_neon_vcvta_u64_v:
3878   case NEON::BI__builtin_neon_vcvtaq_s32_v:
3879   case NEON::BI__builtin_neon_vcvtaq_s64_v:
3880   case NEON::BI__builtin_neon_vcvtaq_u32_v:
3881   case NEON::BI__builtin_neon_vcvtaq_u64_v:
3882   case NEON::BI__builtin_neon_vcvtn_s32_v:
3883   case NEON::BI__builtin_neon_vcvtn_s64_v:
3884   case NEON::BI__builtin_neon_vcvtn_u32_v:
3885   case NEON::BI__builtin_neon_vcvtn_u64_v:
3886   case NEON::BI__builtin_neon_vcvtnq_s32_v:
3887   case NEON::BI__builtin_neon_vcvtnq_s64_v:
3888   case NEON::BI__builtin_neon_vcvtnq_u32_v:
3889   case NEON::BI__builtin_neon_vcvtnq_u64_v:
3890   case NEON::BI__builtin_neon_vcvtp_s32_v:
3891   case NEON::BI__builtin_neon_vcvtp_s64_v:
3892   case NEON::BI__builtin_neon_vcvtp_u32_v:
3893   case NEON::BI__builtin_neon_vcvtp_u64_v:
3894   case NEON::BI__builtin_neon_vcvtpq_s32_v:
3895   case NEON::BI__builtin_neon_vcvtpq_s64_v:
3896   case NEON::BI__builtin_neon_vcvtpq_u32_v:
3897   case NEON::BI__builtin_neon_vcvtpq_u64_v:
3898   case NEON::BI__builtin_neon_vcvtm_s32_v:
3899   case NEON::BI__builtin_neon_vcvtm_s64_v:
3900   case NEON::BI__builtin_neon_vcvtm_u32_v:
3901   case NEON::BI__builtin_neon_vcvtm_u64_v:
3902   case NEON::BI__builtin_neon_vcvtmq_s32_v:
3903   case NEON::BI__builtin_neon_vcvtmq_s64_v:
3904   case NEON::BI__builtin_neon_vcvtmq_u32_v:
3905   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
3906     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
3907     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
3908   }
3909   case NEON::BI__builtin_neon_vext_v:
3910   case NEON::BI__builtin_neon_vextq_v: {
3911     int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
3912     SmallVector<uint32_t, 16> Indices;
3913     for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
3914       Indices.push_back(i+CV);
3915
3916     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3917     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3918     return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
3919   }
3920   case NEON::BI__builtin_neon_vfma_v:
3921   case NEON::BI__builtin_neon_vfmaq_v: {
3922     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
3923     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3924     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3925     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3926
3927     // NEON intrinsic puts accumulator first, unlike the LLVM fma.
3928     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
3929   }
3930   case NEON::BI__builtin_neon_vld1_v:
3931   case NEON::BI__builtin_neon_vld1q_v: {
3932     llvm::Type *Tys[] = {Ty, Int8PtrTy};
3933     Ops.push_back(getAlignmentValue32(PtrOp0));
3934     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
3935   }
3936   case NEON::BI__builtin_neon_vld2_v:
3937   case NEON::BI__builtin_neon_vld2q_v:
3938   case NEON::BI__builtin_neon_vld3_v:
3939   case NEON::BI__builtin_neon_vld3q_v:
3940   case NEON::BI__builtin_neon_vld4_v:
3941   case NEON::BI__builtin_neon_vld4q_v: {
3942     llvm::Type *Tys[] = {Ty, Int8PtrTy};
3943     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3944     Value *Align = getAlignmentValue32(PtrOp1);
3945     Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
3946     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3947     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3948     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
3949   }
3950   case NEON::BI__builtin_neon_vld1_dup_v:
3951   case NEON::BI__builtin_neon_vld1q_dup_v: {
3952     Value *V = UndefValue::get(Ty);
3953     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
3954     PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty);
3955     LoadInst *Ld = Builder.CreateLoad(PtrOp0);
3956     llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
3957     Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
3958     return EmitNeonSplat(Ops[0], CI);
3959   }
3960   case NEON::BI__builtin_neon_vld2_lane_v:
3961   case NEON::BI__builtin_neon_vld2q_lane_v:
3962   case NEON::BI__builtin_neon_vld3_lane_v:
3963   case NEON::BI__builtin_neon_vld3q_lane_v:
3964   case NEON::BI__builtin_neon_vld4_lane_v:
3965   case NEON::BI__builtin_neon_vld4q_lane_v: {
3966     llvm::Type *Tys[] = {Ty, Int8PtrTy};
3967     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3968     for (unsigned I = 2; I < Ops.size() - 1; ++I)
3969       Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
3970     Ops.push_back(getAlignmentValue32(PtrOp1));
3971     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
3972     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3973     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3974     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
3975   }
3976   case NEON::BI__builtin_neon_vmovl_v: {
3977     llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
3978     Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
3979     if (Usgn)
3980       return Builder.CreateZExt(Ops[0], Ty, "vmovl");
3981     return Builder.CreateSExt(Ops[0], Ty, "vmovl");
3982   }
3983   case NEON::BI__builtin_neon_vmovn_v: {
3984     llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
3985     Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
3986     return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
3987   }
3988   case NEON::BI__builtin_neon_vmull_v:
3989     // FIXME: the integer vmull operations could be emitted in terms of pure
3990     // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
3991     // hoisting the exts outside loops. Until global ISel comes along that can
3992     // see through such movement this leads to bad CodeGen. So we need an
3993     // intrinsic for now.
3994     Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
3995     Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
3996     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
3997   case NEON::BI__builtin_neon_vpadal_v:
3998   case NEON::BI__builtin_neon_vpadalq_v: {
3999     // The source operand type has twice as many elements of half the size.
4000     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
4001     llvm::Type *EltTy =
4002       llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
4003     llvm::Type *NarrowTy =
4004       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
4005     llvm::Type *Tys[2] = { Ty, NarrowTy };
4006     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
4007   }
4008   case NEON::BI__builtin_neon_vpaddl_v:
4009   case NEON::BI__builtin_neon_vpaddlq_v: {
4010     // The source operand type has twice as many elements of half the size.
4011     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
4012     llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
4013     llvm::Type *NarrowTy =
4014       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
4015     llvm::Type *Tys[2] = { Ty, NarrowTy };
4016     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
4017   }
4018   case NEON::BI__builtin_neon_vqdmlal_v:
4019   case NEON::BI__builtin_neon_vqdmlsl_v: {
4020     SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
4021     Ops[1] =
4022         EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
4023     Ops.resize(2);
4024     return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
4025   }
4026   case NEON::BI__builtin_neon_vqshl_n_v:
4027   case NEON::BI__builtin_neon_vqshlq_n_v:
4028     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
4029                         1, false);
4030   case NEON::BI__builtin_neon_vqshlu_n_v:
4031   case NEON::BI__builtin_neon_vqshluq_n_v:
4032     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
4033                         1, false);
4034   case NEON::BI__builtin_neon_vrecpe_v:
4035   case NEON::BI__builtin_neon_vrecpeq_v:
4036   case NEON::BI__builtin_neon_vrsqrte_v:
4037   case NEON::BI__builtin_neon_vrsqrteq_v:
4038     Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
4039     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
4040
4041   case NEON::BI__builtin_neon_vrshr_n_v:
4042   case NEON::BI__builtin_neon_vrshrq_n_v:
4043     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
4044                         1, true);
4045   case NEON::BI__builtin_neon_vshl_n_v:
4046   case NEON::BI__builtin_neon_vshlq_n_v:
4047     Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
4048     return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
4049                              "vshl_n");
4050   case NEON::BI__builtin_neon_vshll_n_v: {
4051     llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
4052     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4053     if (Usgn)
4054       Ops[0] = Builder.CreateZExt(Ops[0], VTy);
4055     else
4056       Ops[0] = Builder.CreateSExt(Ops[0], VTy);
4057     Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
4058     return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
4059   }
4060   case NEON::BI__builtin_neon_vshrn_n_v: {
4061     llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
4062     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4063     Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
4064     if (Usgn)
4065       Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
4066     else
4067       Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
4068     return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
4069   }
4070   case NEON::BI__builtin_neon_vshr_n_v:
4071   case NEON::BI__builtin_neon_vshrq_n_v:
4072     return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
4073   case NEON::BI__builtin_neon_vst1_v:
4074   case NEON::BI__builtin_neon_vst1q_v:
4075   case NEON::BI__builtin_neon_vst2_v:
4076   case NEON::BI__builtin_neon_vst2q_v:
4077   case NEON::BI__builtin_neon_vst3_v:
4078   case NEON::BI__builtin_neon_vst3q_v:
4079   case NEON::BI__builtin_neon_vst4_v:
4080   case NEON::BI__builtin_neon_vst4q_v:
4081   case NEON::BI__builtin_neon_vst2_lane_v:
4082   case NEON::BI__builtin_neon_vst2q_lane_v:
4083   case NEON::BI__builtin_neon_vst3_lane_v:
4084   case NEON::BI__builtin_neon_vst3q_lane_v:
4085   case NEON::BI__builtin_neon_vst4_lane_v:
4086   case NEON::BI__builtin_neon_vst4q_lane_v: {
4087     llvm::Type *Tys[] = {Int8PtrTy, Ty};
4088     Ops.push_back(getAlignmentValue32(PtrOp0));
4089     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
4090   }
4091   case NEON::BI__builtin_neon_vsubhn_v: {
4092     llvm::VectorType *SrcTy =
4093         llvm::VectorType::getExtendedElementVectorType(VTy);
4094
4095     // %sum = add <4 x i32> %lhs, %rhs
4096     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4097     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
4098     Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
4099
4100     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
4101     Constant *ShiftAmt =
4102         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
4103     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
4104
4105     // %res = trunc <4 x i32> %high to <4 x i16>
4106     return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
4107   }
4108   case NEON::BI__builtin_neon_vtrn_v:
4109   case NEON::BI__builtin_neon_vtrnq_v: {
4110     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4111     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4112     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4113     Value *SV = nullptr;
4114
4115     for (unsigned vi = 0; vi != 2; ++vi) {
4116       SmallVector<uint32_t, 16> Indices;
4117       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
4118         Indices.push_back(i+vi);
4119         Indices.push_back(i+e+vi);
4120       }
4121       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4122       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
4123       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4124     }
4125     return SV;
4126   }
4127   case NEON::BI__builtin_neon_vtst_v:
4128   case NEON::BI__builtin_neon_vtstq_v: {
4129     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4130     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4131     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
4132     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
4133                                 ConstantAggregateZero::get(Ty));
4134     return Builder.CreateSExt(Ops[0], Ty, "vtst");
4135   }
4136   case NEON::BI__builtin_neon_vuzp_v:
4137   case NEON::BI__builtin_neon_vuzpq_v: {
4138     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4139     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4140     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4141     Value *SV = nullptr;
4142
4143     for (unsigned vi = 0; vi != 2; ++vi) {
4144       SmallVector<uint32_t, 16> Indices;
4145       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
4146         Indices.push_back(2*i+vi);
4147
4148       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4149       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
4150       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4151     }
4152     return SV;
4153   }
4154   case NEON::BI__builtin_neon_vzip_v:
4155   case NEON::BI__builtin_neon_vzipq_v: {
4156     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4157     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4158     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4159     Value *SV = nullptr;
4160
4161     for (unsigned vi = 0; vi != 2; ++vi) {
4162       SmallVector<uint32_t, 16> Indices;
4163       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
4164         Indices.push_back((i + vi*e) >> 1);
4165         Indices.push_back(((i + vi*e) >> 1)+e);
4166       }
4167       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4168       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
4169       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4170     }
4171     return SV;
4172   }
4173   }
4174
4175   assert(Int && "Expected valid intrinsic number");
4176
4177   // Determine the type(s) of this overloaded AArch64 intrinsic.
4178   Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
4179
4180   Value *Result = EmitNeonCall(F, Ops, NameHint);
4181   llvm::Type *ResultType = ConvertType(E->getType());
4182   // AArch64 intrinsic one-element vector type cast to
4183   // scalar type expected by the builtin
4184   return Builder.CreateBitCast(Result, ResultType, NameHint);
4185 }
4186
4187 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
4188     Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
4189     const CmpInst::Predicate Ip, const Twine &Name) {
4190   llvm::Type *OTy = Op->getType();
4191
4192   // FIXME: this is utterly horrific. We should not be looking at previous
4193   // codegen context to find out what needs doing. Unfortunately TableGen
4194   // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
4195   // (etc).
4196   if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
4197     OTy = BI->getOperand(0)->getType();
4198
4199   Op = Builder.CreateBitCast(Op, OTy);
4200   if (OTy->getScalarType()->isFloatingPointTy()) {
4201     Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
4202   } else {
4203     Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
4204   }
4205   return Builder.CreateSExt(Op, Ty, Name);
4206 }
4207
4208 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
4209                                  Value *ExtOp, Value *IndexOp,
4210                                  llvm::Type *ResTy, unsigned IntID,
4211                                  const char *Name) {
4212   SmallVector<Value *, 2> TblOps;
4213   if (ExtOp)
4214     TblOps.push_back(ExtOp);
4215
4216   // Build a vector containing sequential number like (0, 1, 2, ..., 15)
4217   SmallVector<uint32_t, 16> Indices;
4218   llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
4219   for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
4220     Indices.push_back(2*i);
4221     Indices.push_back(2*i+1);
4222   }
4223
4224   int PairPos = 0, End = Ops.size() - 1;
4225   while (PairPos < End) {
4226     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
4227                                                      Ops[PairPos+1], Indices,
4228                                                      Name));
4229     PairPos += 2;
4230   }
4231
4232   // If there's an odd number of 64-bit lookup table, fill the high 64-bit
4233   // of the 128-bit lookup table with zero.
4234   if (PairPos == End) {
4235     Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
4236     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
4237                                                      ZeroTbl, Indices, Name));
4238   }
4239
4240   Function *TblF;
4241   TblOps.push_back(IndexOp);
4242   TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
4243
4244   return CGF.EmitNeonCall(TblF, TblOps, Name);
4245 }
4246
4247 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
4248   unsigned Value;
4249   switch (BuiltinID) {
4250   default:
4251     return nullptr;
4252   case ARM::BI__builtin_arm_nop:
4253     Value = 0;
4254     break;
4255   case ARM::BI__builtin_arm_yield:
4256   case ARM::BI__yield:
4257     Value = 1;
4258     break;
4259   case ARM::BI__builtin_arm_wfe:
4260   case ARM::BI__wfe:
4261     Value = 2;
4262     break;
4263   case ARM::BI__builtin_arm_wfi:
4264   case ARM::BI__wfi:
4265     Value = 3;
4266     break;
4267   case ARM::BI__builtin_arm_sev:
4268   case ARM::BI__sev:
4269     Value = 4;
4270     break;
4271   case ARM::BI__builtin_arm_sevl:
4272   case ARM::BI__sevl:
4273     Value = 5;
4274     break;
4275   }
4276
4277   return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
4278                             llvm::ConstantInt::get(Int32Ty, Value));
4279 }
4280
4281 // Generates the IR for the read/write special register builtin,
4282 // ValueType is the type of the value that is to be written or read,
4283 // RegisterType is the type of the register being written to or read from.
4284 static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
4285                                          const CallExpr *E,
4286                                          llvm::Type *RegisterType,
4287                                          llvm::Type *ValueType,
4288                                          bool IsRead,
4289                                          StringRef SysReg = "") {
4290   // write and register intrinsics only support 32 and 64 bit operations.
4291   assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
4292           && "Unsupported size for register.");
4293
4294   CodeGen::CGBuilderTy &Builder = CGF.Builder;
4295   CodeGen::CodeGenModule &CGM = CGF.CGM;
4296   LLVMContext &Context = CGM.getLLVMContext();
4297
4298   if (SysReg.empty()) {
4299     const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
4300     SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
4301   }
4302
4303   llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
4304   llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4305   llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4306
4307   llvm::Type *Types[] = { RegisterType };
4308
4309   bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
4310   assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
4311             && "Can't fit 64-bit value in 32-bit register");
4312
4313   if (IsRead) {
4314     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
4315     llvm::Value *Call = Builder.CreateCall(F, Metadata);
4316
4317     if (MixedTypes)
4318       // Read into 64 bit register and then truncate result to 32 bit.
4319       return Builder.CreateTrunc(Call, ValueType);
4320
4321     if (ValueType->isPointerTy())
4322       // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
4323       return Builder.CreateIntToPtr(Call, ValueType);
4324
4325     return Call;
4326   }
4327
4328   llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
4329   llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
4330   if (MixedTypes) {
4331     // Extend 32 bit write value to 64 bit to pass to write.
4332     ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
4333     return Builder.CreateCall(F, { Metadata, ArgValue });
4334   }
4335
4336   if (ValueType->isPointerTy()) {
4337     // Have VoidPtrTy ArgValue but want to return an i32/i64.
4338     ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
4339     return Builder.CreateCall(F, { Metadata, ArgValue });
4340   }
4341
4342   return Builder.CreateCall(F, { Metadata, ArgValue });
4343 }
4344
4345 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
4346 /// argument that specifies the vector type.
4347 static bool HasExtraNeonArgument(unsigned BuiltinID) {
4348   switch (BuiltinID) {
4349   default: break;
4350   case NEON::BI__builtin_neon_vget_lane_i8:
4351   case NEON::BI__builtin_neon_vget_lane_i16:
4352   case NEON::BI__builtin_neon_vget_lane_i32:
4353   case NEON::BI__builtin_neon_vget_lane_i64:
4354   case NEON::BI__builtin_neon_vget_lane_f32:
4355   case NEON::BI__builtin_neon_vgetq_lane_i8:
4356   case NEON::BI__builtin_neon_vgetq_lane_i16:
4357   case NEON::BI__builtin_neon_vgetq_lane_i32:
4358   case NEON::BI__builtin_neon_vgetq_lane_i64:
4359   case NEON::BI__builtin_neon_vgetq_lane_f32:
4360   case NEON::BI__builtin_neon_vset_lane_i8:
4361   case NEON::BI__builtin_neon_vset_lane_i16:
4362   case NEON::BI__builtin_neon_vset_lane_i32:
4363   case NEON::BI__builtin_neon_vset_lane_i64:
4364   case NEON::BI__builtin_neon_vset_lane_f32:
4365   case NEON::BI__builtin_neon_vsetq_lane_i8:
4366   case NEON::BI__builtin_neon_vsetq_lane_i16:
4367   case NEON::BI__builtin_neon_vsetq_lane_i32:
4368   case NEON::BI__builtin_neon_vsetq_lane_i64:
4369   case NEON::BI__builtin_neon_vsetq_lane_f32:
4370   case NEON::BI__builtin_neon_vsha1h_u32:
4371   case NEON::BI__builtin_neon_vsha1cq_u32:
4372   case NEON::BI__builtin_neon_vsha1pq_u32:
4373   case NEON::BI__builtin_neon_vsha1mq_u32:
4374   case ARM::BI_MoveToCoprocessor:
4375   case ARM::BI_MoveToCoprocessor2:
4376     return false;
4377   }
4378   return true;
4379 }
4380
4381 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
4382                                            const CallExpr *E) {
4383   if (auto Hint = GetValueForARMHint(BuiltinID))
4384     return Hint;
4385
4386   if (BuiltinID == ARM::BI__emit) {
4387     bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
4388     llvm::FunctionType *FTy =
4389         llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
4390
4391     APSInt Value;
4392     if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext()))
4393       llvm_unreachable("Sema will ensure that the parameter is constant");
4394
4395     uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
4396
4397     llvm::InlineAsm *Emit =
4398         IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
4399                                  /*SideEffects=*/true)
4400                 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
4401                                  /*SideEffects=*/true);
4402
4403     return Builder.CreateCall(Emit);
4404   }
4405
4406   if (BuiltinID == ARM::BI__builtin_arm_dbg) {
4407     Value *Option = EmitScalarExpr(E->getArg(0));
4408     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
4409   }
4410
4411   if (BuiltinID == ARM::BI__builtin_arm_prefetch) {
4412     Value *Address = EmitScalarExpr(E->getArg(0));
4413     Value *RW      = EmitScalarExpr(E->getArg(1));
4414     Value *IsData  = EmitScalarExpr(E->getArg(2));
4415
4416     // Locality is not supported on ARM target
4417     Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
4418
4419     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
4420     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
4421   }
4422
4423   if (BuiltinID == ARM::BI__builtin_arm_rbit) {
4424     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4425     return Builder.CreateCall(
4426         CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
4427   }
4428
4429   if (BuiltinID == ARM::BI__clear_cache) {
4430     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
4431     const FunctionDecl *FD = E->getDirectCallee();
4432     Value *Ops[2];
4433     for (unsigned i = 0; i < 2; i++)
4434       Ops[i] = EmitScalarExpr(E->getArg(i));
4435     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
4436     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
4437     StringRef Name = FD->getName();
4438     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
4439   }
4440
4441   if (BuiltinID == ARM::BI__builtin_arm_mcrr ||
4442       BuiltinID == ARM::BI__builtin_arm_mcrr2) {
4443     Function *F;
4444
4445     switch (BuiltinID) {
4446     default: llvm_unreachable("unexpected builtin");
4447     case ARM::BI__builtin_arm_mcrr:
4448       F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
4449       break;
4450     case ARM::BI__builtin_arm_mcrr2:
4451       F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
4452       break;
4453     }
4454
4455     // MCRR{2} instruction has 5 operands but
4456     // the intrinsic has 4 because Rt and Rt2
4457     // are represented as a single unsigned 64
4458     // bit integer in the intrinsic definition
4459     // but internally it's represented as 2 32
4460     // bit integers.
4461
4462     Value *Coproc = EmitScalarExpr(E->getArg(0));
4463     Value *Opc1 = EmitScalarExpr(E->getArg(1));
4464     Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
4465     Value *CRm = EmitScalarExpr(E->getArg(3));
4466
4467     Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4468     Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
4469     Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
4470     Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
4471
4472     return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
4473   }
4474
4475   if (BuiltinID == ARM::BI__builtin_arm_mrrc ||
4476       BuiltinID == ARM::BI__builtin_arm_mrrc2) {
4477     Function *F;
4478
4479     switch (BuiltinID) {
4480     default: llvm_unreachable("unexpected builtin");
4481     case ARM::BI__builtin_arm_mrrc:
4482       F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
4483       break;
4484     case ARM::BI__builtin_arm_mrrc2:
4485       F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
4486       break;
4487     }
4488
4489     Value *Coproc = EmitScalarExpr(E->getArg(0));
4490     Value *Opc1 = EmitScalarExpr(E->getArg(1));
4491     Value *CRm  = EmitScalarExpr(E->getArg(2));
4492     Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
4493
4494     // Returns an unsigned 64 bit integer, represented
4495     // as two 32 bit integers.
4496
4497     Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
4498     Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
4499     Rt = Builder.CreateZExt(Rt, Int64Ty);
4500     Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
4501
4502     Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
4503     RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
4504     RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
4505
4506     return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
4507   }
4508
4509   if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
4510       ((BuiltinID == ARM::BI__builtin_arm_ldrex ||
4511         BuiltinID == ARM::BI__builtin_arm_ldaex) &&
4512        getContext().getTypeSize(E->getType()) == 64) ||
4513       BuiltinID == ARM::BI__ldrexd) {
4514     Function *F;
4515
4516     switch (BuiltinID) {
4517     default: llvm_unreachable("unexpected builtin");
4518     case ARM::BI__builtin_arm_ldaex:
4519       F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
4520       break;
4521     case ARM::BI__builtin_arm_ldrexd:
4522     case ARM::BI__builtin_arm_ldrex:
4523     case ARM::BI__ldrexd:
4524       F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
4525       break;
4526     }
4527
4528     Value *LdPtr = EmitScalarExpr(E->getArg(0));
4529     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
4530                                     "ldrexd");
4531
4532     Value *Val0 = Builder.CreateExtractValue(Val, 1);
4533     Value *Val1 = Builder.CreateExtractValue(Val, 0);
4534     Val0 = Builder.CreateZExt(Val0, Int64Ty);
4535     Val1 = Builder.CreateZExt(Val1, Int64Ty);
4536
4537     Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
4538     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
4539     Val = Builder.CreateOr(Val, Val1);
4540     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
4541   }
4542
4543   if (BuiltinID == ARM::BI__builtin_arm_ldrex ||
4544       BuiltinID == ARM::BI__builtin_arm_ldaex) {
4545     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
4546
4547     QualType Ty = E->getType();
4548     llvm::Type *RealResTy = ConvertType(Ty);
4549     llvm::Type *PtrTy = llvm::IntegerType::get(
4550         getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
4551     LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
4552
4553     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex
4554                                        ? Intrinsic::arm_ldaex
4555                                        : Intrinsic::arm_ldrex,
4556                                    PtrTy);
4557     Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
4558
4559     if (RealResTy->isPointerTy())
4560       return Builder.CreateIntToPtr(Val, RealResTy);
4561     else {
4562       llvm::Type *IntResTy = llvm::IntegerType::get(
4563           getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
4564       Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
4565       return Builder.CreateBitCast(Val, RealResTy);
4566     }
4567   }
4568
4569   if (BuiltinID == ARM::BI__builtin_arm_strexd ||
4570       ((BuiltinID == ARM::BI__builtin_arm_stlex ||
4571         BuiltinID == ARM::BI__builtin_arm_strex) &&
4572        getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
4573     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4574                                        ? Intrinsic::arm_stlexd
4575                                        : Intrinsic::arm_strexd);
4576     llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
4577
4578     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
4579     Value *Val = EmitScalarExpr(E->getArg(0));
4580     Builder.CreateStore(Val, Tmp);
4581
4582     Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
4583     Val = Builder.CreateLoad(LdPtr);
4584
4585     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
4586     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
4587     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
4588     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
4589   }
4590
4591   if (BuiltinID == ARM::BI__builtin_arm_strex ||
4592       BuiltinID == ARM::BI__builtin_arm_stlex) {
4593     Value *StoreVal = EmitScalarExpr(E->getArg(0));
4594     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
4595
4596     QualType Ty = E->getArg(0)->getType();
4597     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
4598                                                  getContext().getTypeSize(Ty));
4599     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
4600
4601     if (StoreVal->getType()->isPointerTy())
4602       StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
4603     else {
4604       llvm::Type *IntTy = llvm::IntegerType::get(
4605           getLLVMContext(),
4606           CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
4607       StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
4608       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
4609     }
4610
4611     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4612                                        ? Intrinsic::arm_stlex
4613                                        : Intrinsic::arm_strex,
4614                                    StoreAddr->getType());
4615     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
4616   }
4617
4618   switch (BuiltinID) {
4619   case ARM::BI__iso_volatile_load8:
4620   case ARM::BI__iso_volatile_load16:
4621   case ARM::BI__iso_volatile_load32:
4622   case ARM::BI__iso_volatile_load64: {
4623     Value *Ptr = EmitScalarExpr(E->getArg(0));
4624     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4625     CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy);
4626     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
4627                                              LoadSize.getQuantity() * 8);
4628     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
4629     llvm::LoadInst *Load =
4630       Builder.CreateAlignedLoad(Ptr, LoadSize);
4631     Load->setVolatile(true);
4632     return Load;
4633   }
4634   case ARM::BI__iso_volatile_store8:
4635   case ARM::BI__iso_volatile_store16:
4636   case ARM::BI__iso_volatile_store32:
4637   case ARM::BI__iso_volatile_store64: {
4638     Value *Ptr = EmitScalarExpr(E->getArg(0));
4639     Value *Value = EmitScalarExpr(E->getArg(1));
4640     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4641     CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
4642     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
4643                                              StoreSize.getQuantity() * 8);
4644     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
4645     llvm::StoreInst *Store =
4646       Builder.CreateAlignedStore(Value, Ptr,
4647                                  StoreSize);
4648     Store->setVolatile(true);
4649     return Store;
4650   }
4651   }
4652
4653   if (BuiltinID == ARM::BI__builtin_arm_clrex) {
4654     Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
4655     return Builder.CreateCall(F);
4656   }
4657
4658   // CRC32
4659   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
4660   switch (BuiltinID) {
4661   case ARM::BI__builtin_arm_crc32b:
4662     CRCIntrinsicID = Intrinsic::arm_crc32b; break;
4663   case ARM::BI__builtin_arm_crc32cb:
4664     CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
4665   case ARM::BI__builtin_arm_crc32h:
4666     CRCIntrinsicID = Intrinsic::arm_crc32h; break;
4667   case ARM::BI__builtin_arm_crc32ch:
4668     CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
4669   case ARM::BI__builtin_arm_crc32w:
4670   case ARM::BI__builtin_arm_crc32d:
4671     CRCIntrinsicID = Intrinsic::arm_crc32w; break;
4672   case ARM::BI__builtin_arm_crc32cw:
4673   case ARM::BI__builtin_arm_crc32cd:
4674     CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
4675   }
4676
4677   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
4678     Value *Arg0 = EmitScalarExpr(E->getArg(0));
4679     Value *Arg1 = EmitScalarExpr(E->getArg(1));
4680
4681     // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w
4682     // intrinsics, hence we need different codegen for these cases.
4683     if (BuiltinID == ARM::BI__builtin_arm_crc32d ||
4684         BuiltinID == ARM::BI__builtin_arm_crc32cd) {
4685       Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4686       Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
4687       Value *Arg1b = Builder.CreateLShr(Arg1, C1);
4688       Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
4689
4690       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4691       Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
4692       return Builder.CreateCall(F, {Res, Arg1b});
4693     } else {
4694       Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
4695
4696       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4697       return Builder.CreateCall(F, {Arg0, Arg1});
4698     }
4699   }
4700
4701   if (BuiltinID == ARM::BI__builtin_arm_rsr ||
4702       BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4703       BuiltinID == ARM::BI__builtin_arm_rsrp ||
4704       BuiltinID == ARM::BI__builtin_arm_wsr ||
4705       BuiltinID == ARM::BI__builtin_arm_wsr64 ||
4706       BuiltinID == ARM::BI__builtin_arm_wsrp) {
4707
4708     bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr ||
4709                   BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4710                   BuiltinID == ARM::BI__builtin_arm_rsrp;
4711
4712     bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
4713                             BuiltinID == ARM::BI__builtin_arm_wsrp;
4714
4715     bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4716                    BuiltinID == ARM::BI__builtin_arm_wsr64;
4717
4718     llvm::Type *ValueType;
4719     llvm::Type *RegisterType;
4720     if (IsPointerBuiltin) {
4721       ValueType = VoidPtrTy;
4722       RegisterType = Int32Ty;
4723     } else if (Is64Bit) {
4724       ValueType = RegisterType = Int64Ty;
4725     } else {
4726       ValueType = RegisterType = Int32Ty;
4727     }
4728
4729     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
4730   }
4731
4732   // Find out if any arguments are required to be integer constant
4733   // expressions.
4734   unsigned ICEArguments = 0;
4735   ASTContext::GetBuiltinTypeError Error;
4736   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
4737   assert(Error == ASTContext::GE_None && "Should not codegen an error");
4738
4739   auto getAlignmentValue32 = [&](Address addr) -> Value* {
4740     return Builder.getInt32(addr.getAlignment().getQuantity());
4741   };
4742
4743   Address PtrOp0 = Address::invalid();
4744   Address PtrOp1 = Address::invalid();
4745   SmallVector<Value*, 4> Ops;
4746   bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
4747   unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
4748   for (unsigned i = 0, e = NumArgs; i != e; i++) {
4749     if (i == 0) {
4750       switch (BuiltinID) {
4751       case NEON::BI__builtin_neon_vld1_v:
4752       case NEON::BI__builtin_neon_vld1q_v:
4753       case NEON::BI__builtin_neon_vld1q_lane_v:
4754       case NEON::BI__builtin_neon_vld1_lane_v:
4755       case NEON::BI__builtin_neon_vld1_dup_v:
4756       case NEON::BI__builtin_neon_vld1q_dup_v:
4757       case NEON::BI__builtin_neon_vst1_v:
4758       case NEON::BI__builtin_neon_vst1q_v:
4759       case NEON::BI__builtin_neon_vst1q_lane_v:
4760       case NEON::BI__builtin_neon_vst1_lane_v:
4761       case NEON::BI__builtin_neon_vst2_v:
4762       case NEON::BI__builtin_neon_vst2q_v:
4763       case NEON::BI__builtin_neon_vst2_lane_v:
4764       case NEON::BI__builtin_neon_vst2q_lane_v:
4765       case NEON::BI__builtin_neon_vst3_v:
4766       case NEON::BI__builtin_neon_vst3q_v:
4767       case NEON::BI__builtin_neon_vst3_lane_v:
4768       case NEON::BI__builtin_neon_vst3q_lane_v:
4769       case NEON::BI__builtin_neon_vst4_v:
4770       case NEON::BI__builtin_neon_vst4q_v:
4771       case NEON::BI__builtin_neon_vst4_lane_v:
4772       case NEON::BI__builtin_neon_vst4q_lane_v:
4773         // Get the alignment for the argument in addition to the value;
4774         // we'll use it later.
4775         PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
4776         Ops.push_back(PtrOp0.getPointer());
4777         continue;
4778       }
4779     }
4780     if (i == 1) {
4781       switch (BuiltinID) {
4782       case NEON::BI__builtin_neon_vld2_v:
4783       case NEON::BI__builtin_neon_vld2q_v:
4784       case NEON::BI__builtin_neon_vld3_v:
4785       case NEON::BI__builtin_neon_vld3q_v:
4786       case NEON::BI__builtin_neon_vld4_v:
4787       case NEON::BI__builtin_neon_vld4q_v:
4788       case NEON::BI__builtin_neon_vld2_lane_v:
4789       case NEON::BI__builtin_neon_vld2q_lane_v:
4790       case NEON::BI__builtin_neon_vld3_lane_v:
4791       case NEON::BI__builtin_neon_vld3q_lane_v:
4792       case NEON::BI__builtin_neon_vld4_lane_v:
4793       case NEON::BI__builtin_neon_vld4q_lane_v:
4794       case NEON::BI__builtin_neon_vld2_dup_v:
4795       case NEON::BI__builtin_neon_vld3_dup_v:
4796       case NEON::BI__builtin_neon_vld4_dup_v:
4797         // Get the alignment for the argument in addition to the value;
4798         // we'll use it later.
4799         PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
4800         Ops.push_back(PtrOp1.getPointer());
4801         continue;
4802       }
4803     }
4804
4805     if ((ICEArguments & (1 << i)) == 0) {
4806       Ops.push_back(EmitScalarExpr(E->getArg(i)));
4807     } else {
4808       // If this is required to be a constant, constant fold it so that we know
4809       // that the generated intrinsic gets a ConstantInt.
4810       llvm::APSInt Result;
4811       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
4812       assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
4813       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
4814     }
4815   }
4816
4817   switch (BuiltinID) {
4818   default: break;
4819
4820   case NEON::BI__builtin_neon_vget_lane_i8:
4821   case NEON::BI__builtin_neon_vget_lane_i16:
4822   case NEON::BI__builtin_neon_vget_lane_i32:
4823   case NEON::BI__builtin_neon_vget_lane_i64:
4824   case NEON::BI__builtin_neon_vget_lane_f32:
4825   case NEON::BI__builtin_neon_vgetq_lane_i8:
4826   case NEON::BI__builtin_neon_vgetq_lane_i16:
4827   case NEON::BI__builtin_neon_vgetq_lane_i32:
4828   case NEON::BI__builtin_neon_vgetq_lane_i64:
4829   case NEON::BI__builtin_neon_vgetq_lane_f32:
4830     return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
4831
4832   case NEON::BI__builtin_neon_vset_lane_i8:
4833   case NEON::BI__builtin_neon_vset_lane_i16:
4834   case NEON::BI__builtin_neon_vset_lane_i32:
4835   case NEON::BI__builtin_neon_vset_lane_i64:
4836   case NEON::BI__builtin_neon_vset_lane_f32:
4837   case NEON::BI__builtin_neon_vsetq_lane_i8:
4838   case NEON::BI__builtin_neon_vsetq_lane_i16:
4839   case NEON::BI__builtin_neon_vsetq_lane_i32:
4840   case NEON::BI__builtin_neon_vsetq_lane_i64:
4841   case NEON::BI__builtin_neon_vsetq_lane_f32:
4842     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
4843
4844   case NEON::BI__builtin_neon_vsha1h_u32:
4845     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
4846                         "vsha1h");
4847   case NEON::BI__builtin_neon_vsha1cq_u32:
4848     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
4849                         "vsha1h");
4850   case NEON::BI__builtin_neon_vsha1pq_u32:
4851     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
4852                         "vsha1h");
4853   case NEON::BI__builtin_neon_vsha1mq_u32:
4854     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
4855                         "vsha1h");
4856
4857   // The ARM _MoveToCoprocessor builtins put the input register value as
4858   // the first argument, but the LLVM intrinsic expects it as the third one.
4859   case ARM::BI_MoveToCoprocessor:
4860   case ARM::BI_MoveToCoprocessor2: {
4861     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
4862                                    Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
4863     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
4864                                   Ops[3], Ops[4], Ops[5]});
4865   }
4866   case ARM::BI_BitScanForward:
4867   case ARM::BI_BitScanForward64:
4868     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
4869   case ARM::BI_BitScanReverse:
4870   case ARM::BI_BitScanReverse64:
4871     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
4872
4873   case ARM::BI_InterlockedAnd64:
4874     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
4875   case ARM::BI_InterlockedExchange64:
4876     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
4877   case ARM::BI_InterlockedExchangeAdd64:
4878     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
4879   case ARM::BI_InterlockedExchangeSub64:
4880     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
4881   case ARM::BI_InterlockedOr64:
4882     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
4883   case ARM::BI_InterlockedXor64:
4884     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
4885   case ARM::BI_InterlockedDecrement64:
4886     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
4887   case ARM::BI_InterlockedIncrement64:
4888     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
4889   }
4890
4891   // Get the last argument, which specifies the vector type.
4892   assert(HasExtraArg);
4893   llvm::APSInt Result;
4894   const Expr *Arg = E->getArg(E->getNumArgs()-1);
4895   if (!Arg->isIntegerConstantExpr(Result, getContext()))
4896     return nullptr;
4897
4898   if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
4899       BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
4900     // Determine the overloaded type of this builtin.
4901     llvm::Type *Ty;
4902     if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
4903       Ty = FloatTy;
4904     else
4905       Ty = DoubleTy;
4906
4907     // Determine whether this is an unsigned conversion or not.
4908     bool usgn = Result.getZExtValue() == 1;
4909     unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
4910
4911     // Call the appropriate intrinsic.
4912     Function *F = CGM.getIntrinsic(Int, Ty);
4913     return Builder.CreateCall(F, Ops, "vcvtr");
4914   }
4915
4916   // Determine the type of this overloaded NEON intrinsic.
4917   NeonTypeFlags Type(Result.getZExtValue());
4918   bool usgn = Type.isUnsigned();
4919   bool rightShift = false;
4920
4921   llvm::VectorType *VTy = GetNeonType(this, Type);
4922   llvm::Type *Ty = VTy;
4923   if (!Ty)
4924     return nullptr;
4925
4926   // Many NEON builtins have identical semantics and uses in ARM and
4927   // AArch64. Emit these in a single function.
4928   auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap);
4929   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
4930       IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
4931   if (Builtin)
4932     return EmitCommonNeonBuiltinExpr(
4933         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
4934         Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1);
4935
4936   unsigned Int;
4937   switch (BuiltinID) {
4938   default: return nullptr;
4939   case NEON::BI__builtin_neon_vld1q_lane_v:
4940     // Handle 64-bit integer elements as a special case.  Use shuffles of
4941     // one-element vectors to avoid poor code for i64 in the backend.
4942     if (VTy->getElementType()->isIntegerTy(64)) {
4943       // Extract the other lane.
4944       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4945       uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
4946       Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
4947       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
4948       // Load the value as a one-element vector.
4949       Ty = llvm::VectorType::get(VTy->getElementType(), 1);
4950       llvm::Type *Tys[] = {Ty, Int8PtrTy};
4951       Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
4952       Value *Align = getAlignmentValue32(PtrOp0);
4953       Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
4954       // Combine them.
4955       uint32_t Indices[] = {1 - Lane, Lane};
4956       SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
4957       return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
4958     }
4959     // fall through
4960   case NEON::BI__builtin_neon_vld1_lane_v: {
4961     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4962     PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
4963     Value *Ld = Builder.CreateLoad(PtrOp0);
4964     return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
4965   }
4966   case NEON::BI__builtin_neon_vld2_dup_v:
4967   case NEON::BI__builtin_neon_vld3_dup_v:
4968   case NEON::BI__builtin_neon_vld4_dup_v: {
4969     // Handle 64-bit elements as a special-case.  There is no "dup" needed.
4970     if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
4971       switch (BuiltinID) {
4972       case NEON::BI__builtin_neon_vld2_dup_v:
4973         Int = Intrinsic::arm_neon_vld2;
4974         break;
4975       case NEON::BI__builtin_neon_vld3_dup_v:
4976         Int = Intrinsic::arm_neon_vld3;
4977         break;
4978       case NEON::BI__builtin_neon_vld4_dup_v:
4979         Int = Intrinsic::arm_neon_vld4;
4980         break;
4981       default: llvm_unreachable("unknown vld_dup intrinsic?");
4982       }
4983       llvm::Type *Tys[] = {Ty, Int8PtrTy};
4984       Function *F = CGM.getIntrinsic(Int, Tys);
4985       llvm::Value *Align = getAlignmentValue32(PtrOp1);
4986       Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup");
4987       Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4988       Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4989       return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4990     }
4991     switch (BuiltinID) {
4992     case NEON::BI__builtin_neon_vld2_dup_v:
4993       Int = Intrinsic::arm_neon_vld2lane;
4994       break;
4995     case NEON::BI__builtin_neon_vld3_dup_v:
4996       Int = Intrinsic::arm_neon_vld3lane;
4997       break;
4998     case NEON::BI__builtin_neon_vld4_dup_v:
4999       Int = Intrinsic::arm_neon_vld4lane;
5000       break;
5001     default: llvm_unreachable("unknown vld_dup intrinsic?");
5002     }
5003     llvm::Type *Tys[] = {Ty, Int8PtrTy};
5004     Function *F = CGM.getIntrinsic(Int, Tys);
5005     llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
5006
5007     SmallVector<Value*, 6> Args;
5008     Args.push_back(Ops[1]);
5009     Args.append(STy->getNumElements(), UndefValue::get(Ty));
5010
5011     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
5012     Args.push_back(CI);
5013     Args.push_back(getAlignmentValue32(PtrOp1));
5014
5015     Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
5016     // splat lane 0 to all elts in each vector of the result.
5017     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
5018       Value *Val = Builder.CreateExtractValue(Ops[1], i);
5019       Value *Elt = Builder.CreateBitCast(Val, Ty);
5020       Elt = EmitNeonSplat(Elt, CI);
5021       Elt = Builder.CreateBitCast(Elt, Val->getType());
5022       Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
5023     }
5024     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5025     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5026     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5027   }
5028   case NEON::BI__builtin_neon_vqrshrn_n_v:
5029     Int =
5030       usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
5031     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
5032                         1, true);
5033   case NEON::BI__builtin_neon_vqrshrun_n_v:
5034     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
5035                         Ops, "vqrshrun_n", 1, true);
5036   case NEON::BI__builtin_neon_vqshrn_n_v:
5037     Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
5038     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
5039                         1, true);
5040   case NEON::BI__builtin_neon_vqshrun_n_v:
5041     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
5042                         Ops, "vqshrun_n", 1, true);
5043   case NEON::BI__builtin_neon_vrecpe_v:
5044   case NEON::BI__builtin_neon_vrecpeq_v:
5045     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
5046                         Ops, "vrecpe");
5047   case NEON::BI__builtin_neon_vrshrn_n_v:
5048     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
5049                         Ops, "vrshrn_n", 1, true);
5050   case NEON::BI__builtin_neon_vrsra_n_v:
5051   case NEON::BI__builtin_neon_vrsraq_n_v:
5052     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5053     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5054     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
5055     Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
5056     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
5057     return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
5058   case NEON::BI__builtin_neon_vsri_n_v:
5059   case NEON::BI__builtin_neon_vsriq_n_v:
5060     rightShift = true;
5061     LLVM_FALLTHROUGH;
5062   case NEON::BI__builtin_neon_vsli_n_v:
5063   case NEON::BI__builtin_neon_vsliq_n_v:
5064     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
5065     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
5066                         Ops, "vsli_n");
5067   case NEON::BI__builtin_neon_vsra_n_v:
5068   case NEON::BI__builtin_neon_vsraq_n_v:
5069     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5070     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
5071     return Builder.CreateAdd(Ops[0], Ops[1]);
5072   case NEON::BI__builtin_neon_vst1q_lane_v:
5073     // Handle 64-bit integer elements as a special case.  Use a shuffle to get
5074     // a one-element vector and avoid poor code for i64 in the backend.
5075     if (VTy->getElementType()->isIntegerTy(64)) {
5076       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5077       Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
5078       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
5079       Ops[2] = getAlignmentValue32(PtrOp0);
5080       llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
5081       return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
5082                                                  Tys), Ops);
5083     }
5084     // fall through
5085   case NEON::BI__builtin_neon_vst1_lane_v: {
5086     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5087     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
5088     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5089     auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty));
5090     return St;
5091   }
5092   case NEON::BI__builtin_neon_vtbl1_v:
5093     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
5094                         Ops, "vtbl1");
5095   case NEON::BI__builtin_neon_vtbl2_v:
5096     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
5097                         Ops, "vtbl2");
5098   case NEON::BI__builtin_neon_vtbl3_v:
5099     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
5100                         Ops, "vtbl3");
5101   case NEON::BI__builtin_neon_vtbl4_v:
5102     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
5103                         Ops, "vtbl4");
5104   case NEON::BI__builtin_neon_vtbx1_v:
5105     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
5106                         Ops, "vtbx1");
5107   case NEON::BI__builtin_neon_vtbx2_v:
5108     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
5109                         Ops, "vtbx2");
5110   case NEON::BI__builtin_neon_vtbx3_v:
5111     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
5112                         Ops, "vtbx3");
5113   case NEON::BI__builtin_neon_vtbx4_v:
5114     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
5115                         Ops, "vtbx4");
5116   }
5117 }
5118
5119 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
5120                                       const CallExpr *E,
5121                                       SmallVectorImpl<Value *> &Ops) {
5122   unsigned int Int = 0;
5123   const char *s = nullptr;
5124
5125   switch (BuiltinID) {
5126   default:
5127     return nullptr;
5128   case NEON::BI__builtin_neon_vtbl1_v:
5129   case NEON::BI__builtin_neon_vqtbl1_v:
5130   case NEON::BI__builtin_neon_vqtbl1q_v:
5131   case NEON::BI__builtin_neon_vtbl2_v:
5132   case NEON::BI__builtin_neon_vqtbl2_v:
5133   case NEON::BI__builtin_neon_vqtbl2q_v:
5134   case NEON::BI__builtin_neon_vtbl3_v:
5135   case NEON::BI__builtin_neon_vqtbl3_v:
5136   case NEON::BI__builtin_neon_vqtbl3q_v:
5137   case NEON::BI__builtin_neon_vtbl4_v:
5138   case NEON::BI__builtin_neon_vqtbl4_v:
5139   case NEON::BI__builtin_neon_vqtbl4q_v:
5140     break;
5141   case NEON::BI__builtin_neon_vtbx1_v:
5142   case NEON::BI__builtin_neon_vqtbx1_v:
5143   case NEON::BI__builtin_neon_vqtbx1q_v:
5144   case NEON::BI__builtin_neon_vtbx2_v:
5145   case NEON::BI__builtin_neon_vqtbx2_v:
5146   case NEON::BI__builtin_neon_vqtbx2q_v:
5147   case NEON::BI__builtin_neon_vtbx3_v:
5148   case NEON::BI__builtin_neon_vqtbx3_v:
5149   case NEON::BI__builtin_neon_vqtbx3q_v:
5150   case NEON::BI__builtin_neon_vtbx4_v:
5151   case NEON::BI__builtin_neon_vqtbx4_v:
5152   case NEON::BI__builtin_neon_vqtbx4q_v:
5153     break;
5154   }
5155
5156   assert(E->getNumArgs() >= 3);
5157
5158   // Get the last argument, which specifies the vector type.
5159   llvm::APSInt Result;
5160   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
5161   if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
5162     return nullptr;
5163
5164   // Determine the type of this overloaded NEON intrinsic.
5165   NeonTypeFlags Type(Result.getZExtValue());
5166   llvm::VectorType *Ty = GetNeonType(&CGF, Type);
5167   if (!Ty)
5168     return nullptr;
5169
5170   CodeGen::CGBuilderTy &Builder = CGF.Builder;
5171
5172   // AArch64 scalar builtins are not overloaded, they do not have an extra
5173   // argument that specifies the vector type, need to handle each case.
5174   switch (BuiltinID) {
5175   case NEON::BI__builtin_neon_vtbl1_v: {
5176     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr,
5177                               Ops[1], Ty, Intrinsic::aarch64_neon_tbl1,
5178                               "vtbl1");
5179   }
5180   case NEON::BI__builtin_neon_vtbl2_v: {
5181     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr,
5182                               Ops[2], Ty, Intrinsic::aarch64_neon_tbl1,
5183                               "vtbl1");
5184   }
5185   case NEON::BI__builtin_neon_vtbl3_v: {
5186     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr,
5187                               Ops[3], Ty, Intrinsic::aarch64_neon_tbl2,
5188                               "vtbl2");
5189   }
5190   case NEON::BI__builtin_neon_vtbl4_v: {
5191     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr,
5192                               Ops[4], Ty, Intrinsic::aarch64_neon_tbl2,
5193                               "vtbl2");
5194   }
5195   case NEON::BI__builtin_neon_vtbx1_v: {
5196     Value *TblRes =
5197         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2],
5198                            Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
5199
5200     llvm::Constant *EightV = ConstantInt::get(Ty, 8);
5201     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
5202     CmpRes = Builder.CreateSExt(CmpRes, Ty);
5203
5204     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
5205     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
5206     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
5207   }
5208   case NEON::BI__builtin_neon_vtbx2_v: {
5209     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0],
5210                               Ops[3], Ty, Intrinsic::aarch64_neon_tbx1,
5211                               "vtbx1");
5212   }
5213   case NEON::BI__builtin_neon_vtbx3_v: {
5214     Value *TblRes =
5215         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4],
5216                            Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
5217
5218     llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
5219     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
5220                                            TwentyFourV);
5221     CmpRes = Builder.CreateSExt(CmpRes, Ty);
5222
5223     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
5224     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
5225     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
5226   }
5227   case NEON::BI__builtin_neon_vtbx4_v: {
5228     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0],
5229                               Ops[5], Ty, Intrinsic::aarch64_neon_tbx2,
5230                               "vtbx2");
5231   }
5232   case NEON::BI__builtin_neon_vqtbl1_v:
5233   case NEON::BI__builtin_neon_vqtbl1q_v:
5234     Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
5235   case NEON::BI__builtin_neon_vqtbl2_v:
5236   case NEON::BI__builtin_neon_vqtbl2q_v: {
5237     Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
5238   case NEON::BI__builtin_neon_vqtbl3_v:
5239   case NEON::BI__builtin_neon_vqtbl3q_v:
5240     Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
5241   case NEON::BI__builtin_neon_vqtbl4_v:
5242   case NEON::BI__builtin_neon_vqtbl4q_v:
5243     Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
5244   case NEON::BI__builtin_neon_vqtbx1_v:
5245   case NEON::BI__builtin_neon_vqtbx1q_v:
5246     Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
5247   case NEON::BI__builtin_neon_vqtbx2_v:
5248   case NEON::BI__builtin_neon_vqtbx2q_v:
5249     Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
5250   case NEON::BI__builtin_neon_vqtbx3_v:
5251   case NEON::BI__builtin_neon_vqtbx3q_v:
5252     Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
5253   case NEON::BI__builtin_neon_vqtbx4_v:
5254   case NEON::BI__builtin_neon_vqtbx4q_v:
5255     Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
5256   }
5257   }
5258
5259   if (!Int)
5260     return nullptr;
5261
5262   Function *F = CGF.CGM.getIntrinsic(Int, Ty);
5263   return CGF.EmitNeonCall(F, Ops, s);
5264 }
5265
5266 Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
5267   llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4);
5268   Op = Builder.CreateBitCast(Op, Int16Ty);
5269   Value *V = UndefValue::get(VTy);
5270   llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
5271   Op = Builder.CreateInsertElement(V, Op, CI);
5272   return Op;
5273 }
5274
5275 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
5276                                                const CallExpr *E) {
5277   unsigned HintID = static_cast<unsigned>(-1);
5278   switch (BuiltinID) {
5279   default: break;
5280   case AArch64::BI__builtin_arm_nop:
5281     HintID = 0;
5282     break;
5283   case AArch64::BI__builtin_arm_yield:
5284     HintID = 1;
5285     break;
5286   case AArch64::BI__builtin_arm_wfe:
5287     HintID = 2;
5288     break;
5289   case AArch64::BI__builtin_arm_wfi:
5290     HintID = 3;
5291     break;
5292   case AArch64::BI__builtin_arm_sev:
5293     HintID = 4;
5294     break;
5295   case AArch64::BI__builtin_arm_sevl:
5296     HintID = 5;
5297     break;
5298   }
5299
5300   if (HintID != static_cast<unsigned>(-1)) {
5301     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
5302     return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
5303   }
5304
5305   if (BuiltinID == AArch64::BI__builtin_arm_prefetch) {
5306     Value *Address         = EmitScalarExpr(E->getArg(0));
5307     Value *RW              = EmitScalarExpr(E->getArg(1));
5308     Value *CacheLevel      = EmitScalarExpr(E->getArg(2));
5309     Value *RetentionPolicy = EmitScalarExpr(E->getArg(3));
5310     Value *IsData          = EmitScalarExpr(E->getArg(4));
5311
5312     Value *Locality = nullptr;
5313     if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) {
5314       // Temporal fetch, needs to convert cache level to locality.
5315       Locality = llvm::ConstantInt::get(Int32Ty,
5316         -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3);
5317     } else {
5318       // Streaming fetch.
5319       Locality = llvm::ConstantInt::get(Int32Ty, 0);
5320     }
5321
5322     // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify
5323     // PLDL3STRM or PLDL2STRM.
5324     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
5325     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
5326   }
5327
5328   if (BuiltinID == AArch64::BI__builtin_arm_rbit) {
5329     assert((getContext().getTypeSize(E->getType()) == 32) &&
5330            "rbit of unusual size!");
5331     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5332     return Builder.CreateCall(
5333         CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5334   }
5335   if (BuiltinID == AArch64::BI__builtin_arm_rbit64) {
5336     assert((getContext().getTypeSize(E->getType()) == 64) &&
5337            "rbit of unusual size!");
5338     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5339     return Builder.CreateCall(
5340         CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5341   }
5342
5343   if (BuiltinID == AArch64::BI__clear_cache) {
5344     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
5345     const FunctionDecl *FD = E->getDirectCallee();
5346     Value *Ops[2];
5347     for (unsigned i = 0; i < 2; i++)
5348       Ops[i] = EmitScalarExpr(E->getArg(i));
5349     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
5350     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
5351     StringRef Name = FD->getName();
5352     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
5353   }
5354
5355   if ((BuiltinID == AArch64::BI__builtin_arm_ldrex ||
5356       BuiltinID == AArch64::BI__builtin_arm_ldaex) &&
5357       getContext().getTypeSize(E->getType()) == 128) {
5358     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
5359                                        ? Intrinsic::aarch64_ldaxp
5360                                        : Intrinsic::aarch64_ldxp);
5361
5362     Value *LdPtr = EmitScalarExpr(E->getArg(0));
5363     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
5364                                     "ldxp");
5365
5366     Value *Val0 = Builder.CreateExtractValue(Val, 1);
5367     Value *Val1 = Builder.CreateExtractValue(Val, 0);
5368     llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
5369     Val0 = Builder.CreateZExt(Val0, Int128Ty);
5370     Val1 = Builder.CreateZExt(Val1, Int128Ty);
5371
5372     Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
5373     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
5374     Val = Builder.CreateOr(Val, Val1);
5375     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
5376   } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex ||
5377              BuiltinID == AArch64::BI__builtin_arm_ldaex) {
5378     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
5379
5380     QualType Ty = E->getType();
5381     llvm::Type *RealResTy = ConvertType(Ty);
5382     llvm::Type *PtrTy = llvm::IntegerType::get(
5383         getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
5384     LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
5385
5386     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
5387                                        ? Intrinsic::aarch64_ldaxr
5388                                        : Intrinsic::aarch64_ldxr,
5389                                    PtrTy);
5390     Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
5391
5392     if (RealResTy->isPointerTy())
5393       return Builder.CreateIntToPtr(Val, RealResTy);
5394
5395     llvm::Type *IntResTy = llvm::IntegerType::get(
5396         getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
5397     Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
5398     return Builder.CreateBitCast(Val, RealResTy);
5399   }
5400
5401   if ((BuiltinID == AArch64::BI__builtin_arm_strex ||
5402        BuiltinID == AArch64::BI__builtin_arm_stlex) &&
5403       getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
5404     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
5405                                        ? Intrinsic::aarch64_stlxp
5406                                        : Intrinsic::aarch64_stxp);
5407     llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
5408
5409     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
5410     EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
5411
5412     Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy));
5413     llvm::Value *Val = Builder.CreateLoad(Tmp);
5414
5415     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
5416     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
5417     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
5418                                          Int8PtrTy);
5419     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
5420   }
5421
5422   if (BuiltinID == AArch64::BI__builtin_arm_strex ||
5423       BuiltinID == AArch64::BI__builtin_arm_stlex) {
5424     Value *StoreVal = EmitScalarExpr(E->getArg(0));
5425     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
5426
5427     QualType Ty = E->getArg(0)->getType();
5428     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
5429                                                  getContext().getTypeSize(Ty));
5430     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
5431
5432     if (StoreVal->getType()->isPointerTy())
5433       StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
5434     else {
5435       llvm::Type *IntTy = llvm::IntegerType::get(
5436           getLLVMContext(),
5437           CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
5438       StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
5439       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
5440     }
5441
5442     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
5443                                        ? Intrinsic::aarch64_stlxr
5444                                        : Intrinsic::aarch64_stxr,
5445                                    StoreAddr->getType());
5446     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
5447   }
5448
5449   if (BuiltinID == AArch64::BI__builtin_arm_clrex) {
5450     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
5451     return Builder.CreateCall(F);
5452   }
5453
5454   // CRC32
5455   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
5456   switch (BuiltinID) {
5457   case AArch64::BI__builtin_arm_crc32b:
5458     CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
5459   case AArch64::BI__builtin_arm_crc32cb:
5460     CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
5461   case AArch64::BI__builtin_arm_crc32h:
5462     CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
5463   case AArch64::BI__builtin_arm_crc32ch:
5464     CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
5465   case AArch64::BI__builtin_arm_crc32w:
5466     CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
5467   case AArch64::BI__builtin_arm_crc32cw:
5468     CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
5469   case AArch64::BI__builtin_arm_crc32d:
5470     CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
5471   case AArch64::BI__builtin_arm_crc32cd:
5472     CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
5473   }
5474
5475   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
5476     Value *Arg0 = EmitScalarExpr(E->getArg(0));
5477     Value *Arg1 = EmitScalarExpr(E->getArg(1));
5478     Function *F = CGM.getIntrinsic(CRCIntrinsicID);
5479
5480     llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
5481     Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
5482
5483     return Builder.CreateCall(F, {Arg0, Arg1});
5484   }
5485
5486   if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
5487       BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
5488       BuiltinID == AArch64::BI__builtin_arm_rsrp ||
5489       BuiltinID == AArch64::BI__builtin_arm_wsr ||
5490       BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
5491       BuiltinID == AArch64::BI__builtin_arm_wsrp) {
5492
5493     bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr ||
5494                   BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
5495                   BuiltinID == AArch64::BI__builtin_arm_rsrp;
5496
5497     bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp ||
5498                             BuiltinID == AArch64::BI__builtin_arm_wsrp;
5499
5500     bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr &&
5501                    BuiltinID != AArch64::BI__builtin_arm_wsr;
5502
5503     llvm::Type *ValueType;
5504     llvm::Type *RegisterType = Int64Ty;
5505     if (IsPointerBuiltin) {
5506       ValueType = VoidPtrTy;
5507     } else if (Is64Bit) {
5508       ValueType = Int64Ty;
5509     } else {
5510       ValueType = Int32Ty;
5511     }
5512
5513     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
5514   }
5515
5516   // Find out if any arguments are required to be integer constant
5517   // expressions.
5518   unsigned ICEArguments = 0;
5519   ASTContext::GetBuiltinTypeError Error;
5520   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5521   assert(Error == ASTContext::GE_None && "Should not codegen an error");
5522
5523   llvm::SmallVector<Value*, 4> Ops;
5524   for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
5525     if ((ICEArguments & (1 << i)) == 0) {
5526       Ops.push_back(EmitScalarExpr(E->getArg(i)));
5527     } else {
5528       // If this is required to be a constant, constant fold it so that we know
5529       // that the generated intrinsic gets a ConstantInt.
5530       llvm::APSInt Result;
5531       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
5532       assert(IsConst && "Constant arg isn't actually constant?");
5533       (void)IsConst;
5534       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
5535     }
5536   }
5537
5538   auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap);
5539   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
5540       SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
5541
5542   if (Builtin) {
5543     Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
5544     Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
5545     assert(Result && "SISD intrinsic should have been handled");
5546     return Result;
5547   }
5548
5549   llvm::APSInt Result;
5550   const Expr *Arg = E->getArg(E->getNumArgs()-1);
5551   NeonTypeFlags Type(0);
5552   if (Arg->isIntegerConstantExpr(Result, getContext()))
5553     // Determine the type of this overloaded NEON intrinsic.
5554     Type = NeonTypeFlags(Result.getZExtValue());
5555
5556   bool usgn = Type.isUnsigned();
5557   bool quad = Type.isQuad();
5558
5559   // Handle non-overloaded intrinsics first.
5560   switch (BuiltinID) {
5561   default: break;
5562   case NEON::BI__builtin_neon_vldrq_p128: {
5563     llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
5564     llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0);
5565     Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
5566     return Builder.CreateAlignedLoad(Int128Ty, Ptr,
5567                                      CharUnits::fromQuantity(16));
5568   }
5569   case NEON::BI__builtin_neon_vstrq_p128: {
5570     llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
5571     Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
5572     return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
5573   }
5574   case NEON::BI__builtin_neon_vcvts_u32_f32:
5575   case NEON::BI__builtin_neon_vcvtd_u64_f64:
5576     usgn = true;
5577     // FALL THROUGH
5578   case NEON::BI__builtin_neon_vcvts_s32_f32:
5579   case NEON::BI__builtin_neon_vcvtd_s64_f64: {
5580     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5581     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5582     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5583     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5584     Ops[0] = Builder.CreateBitCast(Ops[0], FTy);
5585     if (usgn)
5586       return Builder.CreateFPToUI(Ops[0], InTy);
5587     return Builder.CreateFPToSI(Ops[0], InTy);
5588   }
5589   case NEON::BI__builtin_neon_vcvts_f32_u32:
5590   case NEON::BI__builtin_neon_vcvtd_f64_u64:
5591     usgn = true;
5592     // FALL THROUGH
5593   case NEON::BI__builtin_neon_vcvts_f32_s32:
5594   case NEON::BI__builtin_neon_vcvtd_f64_s64: {
5595     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5596     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5597     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5598     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5599     Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
5600     if (usgn)
5601       return Builder.CreateUIToFP(Ops[0], FTy);
5602     return Builder.CreateSIToFP(Ops[0], FTy);
5603   }
5604   case NEON::BI__builtin_neon_vpaddd_s64: {
5605     llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
5606     Value *Vec = EmitScalarExpr(E->getArg(0));
5607     // The vector is v2f64, so make sure it's bitcast to that.
5608     Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
5609     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5610     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5611     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5612     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5613     // Pairwise addition of a v2f64 into a scalar f64.
5614     return Builder.CreateAdd(Op0, Op1, "vpaddd");
5615   }
5616   case NEON::BI__builtin_neon_vpaddd_f64: {
5617     llvm::Type *Ty =
5618       llvm::VectorType::get(DoubleTy, 2);
5619     Value *Vec = EmitScalarExpr(E->getArg(0));
5620     // The vector is v2f64, so make sure it's bitcast to that.
5621     Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
5622     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5623     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5624     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5625     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5626     // Pairwise addition of a v2f64 into a scalar f64.
5627     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5628   }
5629   case NEON::BI__builtin_neon_vpadds_f32: {
5630     llvm::Type *Ty =
5631       llvm::VectorType::get(FloatTy, 2);
5632     Value *Vec = EmitScalarExpr(E->getArg(0));
5633     // The vector is v2f32, so make sure it's bitcast to that.
5634     Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
5635     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5636     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5637     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5638     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5639     // Pairwise addition of a v2f32 into a scalar f32.
5640     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5641   }
5642   case NEON::BI__builtin_neon_vceqzd_s64:
5643   case NEON::BI__builtin_neon_vceqzd_f64:
5644   case NEON::BI__builtin_neon_vceqzs_f32:
5645     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5646     return EmitAArch64CompareBuiltinExpr(
5647         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5648         ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
5649   case NEON::BI__builtin_neon_vcgezd_s64:
5650   case NEON::BI__builtin_neon_vcgezd_f64:
5651   case NEON::BI__builtin_neon_vcgezs_f32:
5652     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5653     return EmitAArch64CompareBuiltinExpr(
5654         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5655         ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
5656   case NEON::BI__builtin_neon_vclezd_s64:
5657   case NEON::BI__builtin_neon_vclezd_f64:
5658   case NEON::BI__builtin_neon_vclezs_f32:
5659     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5660     return EmitAArch64CompareBuiltinExpr(
5661         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5662         ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
5663   case NEON::BI__builtin_neon_vcgtzd_s64:
5664   case NEON::BI__builtin_neon_vcgtzd_f64:
5665   case NEON::BI__builtin_neon_vcgtzs_f32:
5666     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5667     return EmitAArch64CompareBuiltinExpr(
5668         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5669         ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
5670   case NEON::BI__builtin_neon_vcltzd_s64:
5671   case NEON::BI__builtin_neon_vcltzd_f64:
5672   case NEON::BI__builtin_neon_vcltzs_f32:
5673     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5674     return EmitAArch64CompareBuiltinExpr(
5675         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5676         ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
5677
5678   case NEON::BI__builtin_neon_vceqzd_u64: {
5679     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5680     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5681     Ops[0] =
5682         Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
5683     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
5684   }
5685   case NEON::BI__builtin_neon_vceqd_f64:
5686   case NEON::BI__builtin_neon_vcled_f64:
5687   case NEON::BI__builtin_neon_vcltd_f64:
5688   case NEON::BI__builtin_neon_vcged_f64:
5689   case NEON::BI__builtin_neon_vcgtd_f64: {
5690     llvm::CmpInst::Predicate P;
5691     switch (BuiltinID) {
5692     default: llvm_unreachable("missing builtin ID in switch!");
5693     case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
5694     case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
5695     case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
5696     case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
5697     case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
5698     }
5699     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5700     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5701     Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
5702     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5703     return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
5704   }
5705   case NEON::BI__builtin_neon_vceqs_f32:
5706   case NEON::BI__builtin_neon_vcles_f32:
5707   case NEON::BI__builtin_neon_vclts_f32:
5708   case NEON::BI__builtin_neon_vcges_f32:
5709   case NEON::BI__builtin_neon_vcgts_f32: {
5710     llvm::CmpInst::Predicate P;
5711     switch (BuiltinID) {
5712     default: llvm_unreachable("missing builtin ID in switch!");
5713     case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
5714     case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
5715     case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
5716     case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
5717     case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
5718     }
5719     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5720     Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
5721     Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
5722     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5723     return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
5724   }
5725   case NEON::BI__builtin_neon_vceqd_s64:
5726   case NEON::BI__builtin_neon_vceqd_u64:
5727   case NEON::BI__builtin_neon_vcgtd_s64:
5728   case NEON::BI__builtin_neon_vcgtd_u64:
5729   case NEON::BI__builtin_neon_vcltd_s64:
5730   case NEON::BI__builtin_neon_vcltd_u64:
5731   case NEON::BI__builtin_neon_vcged_u64:
5732   case NEON::BI__builtin_neon_vcged_s64:
5733   case NEON::BI__builtin_neon_vcled_u64:
5734   case NEON::BI__builtin_neon_vcled_s64: {
5735     llvm::CmpInst::Predicate P;
5736     switch (BuiltinID) {
5737     default: llvm_unreachable("missing builtin ID in switch!");
5738     case NEON::BI__builtin_neon_vceqd_s64:
5739     case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
5740     case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
5741     case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
5742     case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
5743     case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
5744     case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
5745     case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
5746     case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
5747     case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
5748     }
5749     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5750     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5751     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5752     Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
5753     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
5754   }
5755   case NEON::BI__builtin_neon_vtstd_s64:
5756   case NEON::BI__builtin_neon_vtstd_u64: {
5757     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5758     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5759     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5760     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
5761     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
5762                                 llvm::Constant::getNullValue(Int64Ty));
5763     return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
5764   }
5765   case NEON::BI__builtin_neon_vset_lane_i8:
5766   case NEON::BI__builtin_neon_vset_lane_i16:
5767   case NEON::BI__builtin_neon_vset_lane_i32:
5768   case NEON::BI__builtin_neon_vset_lane_i64:
5769   case NEON::BI__builtin_neon_vset_lane_f32:
5770   case NEON::BI__builtin_neon_vsetq_lane_i8:
5771   case NEON::BI__builtin_neon_vsetq_lane_i16:
5772   case NEON::BI__builtin_neon_vsetq_lane_i32:
5773   case NEON::BI__builtin_neon_vsetq_lane_i64:
5774   case NEON::BI__builtin_neon_vsetq_lane_f32:
5775     Ops.push_back(EmitScalarExpr(E->getArg(2)));
5776     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5777   case NEON::BI__builtin_neon_vset_lane_f64:
5778     // The vector type needs a cast for the v1f64 variant.
5779     Ops[1] = Builder.CreateBitCast(Ops[1],
5780                                    llvm::VectorType::get(DoubleTy, 1));
5781     Ops.push_back(EmitScalarExpr(E->getArg(2)));
5782     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5783   case NEON::BI__builtin_neon_vsetq_lane_f64:
5784     // The vector type needs a cast for the v2f64 variant.
5785     Ops[1] = Builder.CreateBitCast(Ops[1],
5786         llvm::VectorType::get(DoubleTy, 2));
5787     Ops.push_back(EmitScalarExpr(E->getArg(2)));
5788     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5789
5790   case NEON::BI__builtin_neon_vget_lane_i8:
5791   case NEON::BI__builtin_neon_vdupb_lane_i8:
5792     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8));
5793     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5794                                         "vget_lane");
5795   case NEON::BI__builtin_neon_vgetq_lane_i8:
5796   case NEON::BI__builtin_neon_vdupb_laneq_i8:
5797     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16));
5798     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5799                                         "vgetq_lane");
5800   case NEON::BI__builtin_neon_vget_lane_i16:
5801   case NEON::BI__builtin_neon_vduph_lane_i16:
5802     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4));
5803     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5804                                         "vget_lane");
5805   case NEON::BI__builtin_neon_vgetq_lane_i16:
5806   case NEON::BI__builtin_neon_vduph_laneq_i16:
5807     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8));
5808     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5809                                         "vgetq_lane");
5810   case NEON::BI__builtin_neon_vget_lane_i32:
5811   case NEON::BI__builtin_neon_vdups_lane_i32:
5812     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2));
5813     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5814                                         "vget_lane");
5815   case NEON::BI__builtin_neon_vdups_lane_f32:
5816     Ops[0] = Builder.CreateBitCast(Ops[0],
5817         llvm::VectorType::get(FloatTy, 2));
5818     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5819                                         "vdups_lane");
5820   case NEON::BI__builtin_neon_vgetq_lane_i32:
5821   case NEON::BI__builtin_neon_vdups_laneq_i32:
5822     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
5823     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5824                                         "vgetq_lane");
5825   case NEON::BI__builtin_neon_vget_lane_i64:
5826   case NEON::BI__builtin_neon_vdupd_lane_i64:
5827     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1));
5828     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5829                                         "vget_lane");
5830   case NEON::BI__builtin_neon_vdupd_lane_f64:
5831     Ops[0] = Builder.CreateBitCast(Ops[0],
5832         llvm::VectorType::get(DoubleTy, 1));
5833     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5834                                         "vdupd_lane");
5835   case NEON::BI__builtin_neon_vgetq_lane_i64:
5836   case NEON::BI__builtin_neon_vdupd_laneq_i64:
5837     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
5838     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5839                                         "vgetq_lane");
5840   case NEON::BI__builtin_neon_vget_lane_f32:
5841     Ops[0] = Builder.CreateBitCast(Ops[0],
5842         llvm::VectorType::get(FloatTy, 2));
5843     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5844                                         "vget_lane");
5845   case NEON::BI__builtin_neon_vget_lane_f64:
5846     Ops[0] = Builder.CreateBitCast(Ops[0],
5847         llvm::VectorType::get(DoubleTy, 1));
5848     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5849                                         "vget_lane");
5850   case NEON::BI__builtin_neon_vgetq_lane_f32:
5851   case NEON::BI__builtin_neon_vdups_laneq_f32:
5852     Ops[0] = Builder.CreateBitCast(Ops[0],
5853         llvm::VectorType::get(FloatTy, 4));
5854     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5855                                         "vgetq_lane");
5856   case NEON::BI__builtin_neon_vgetq_lane_f64:
5857   case NEON::BI__builtin_neon_vdupd_laneq_f64:
5858     Ops[0] = Builder.CreateBitCast(Ops[0],
5859         llvm::VectorType::get(DoubleTy, 2));
5860     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5861                                         "vgetq_lane");
5862   case NEON::BI__builtin_neon_vaddd_s64:
5863   case NEON::BI__builtin_neon_vaddd_u64:
5864     return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
5865   case NEON::BI__builtin_neon_vsubd_s64:
5866   case NEON::BI__builtin_neon_vsubd_u64:
5867     return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
5868   case NEON::BI__builtin_neon_vqdmlalh_s16:
5869   case NEON::BI__builtin_neon_vqdmlslh_s16: {
5870     SmallVector<Value *, 2> ProductOps;
5871     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5872     ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
5873     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
5874     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5875                           ProductOps, "vqdmlXl");
5876     Constant *CI = ConstantInt::get(SizeTy, 0);
5877     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5878
5879     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
5880                                         ? Intrinsic::aarch64_neon_sqadd
5881                                         : Intrinsic::aarch64_neon_sqsub;
5882     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
5883   }
5884   case NEON::BI__builtin_neon_vqshlud_n_s64: {
5885     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5886     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5887     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
5888                         Ops, "vqshlu_n");
5889   }
5890   case NEON::BI__builtin_neon_vqshld_n_u64:
5891   case NEON::BI__builtin_neon_vqshld_n_s64: {
5892     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
5893                                    ? Intrinsic::aarch64_neon_uqshl
5894                                    : Intrinsic::aarch64_neon_sqshl;
5895     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5896     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5897     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
5898   }
5899   case NEON::BI__builtin_neon_vrshrd_n_u64:
5900   case NEON::BI__builtin_neon_vrshrd_n_s64: {
5901     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
5902                                    ? Intrinsic::aarch64_neon_urshl
5903                                    : Intrinsic::aarch64_neon_srshl;
5904     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5905     int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
5906     Ops[1] = ConstantInt::get(Int64Ty, -SV);
5907     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
5908   }
5909   case NEON::BI__builtin_neon_vrsrad_n_u64:
5910   case NEON::BI__builtin_neon_vrsrad_n_s64: {
5911     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
5912                                    ? Intrinsic::aarch64_neon_urshl
5913                                    : Intrinsic::aarch64_neon_srshl;
5914     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5915     Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
5916     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
5917                                 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
5918     return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
5919   }
5920   case NEON::BI__builtin_neon_vshld_n_s64:
5921   case NEON::BI__builtin_neon_vshld_n_u64: {
5922     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5923     return Builder.CreateShl(
5924         Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
5925   }
5926   case NEON::BI__builtin_neon_vshrd_n_s64: {
5927     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5928     return Builder.CreateAShr(
5929         Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
5930                                                    Amt->getZExtValue())),
5931         "shrd_n");
5932   }
5933   case NEON::BI__builtin_neon_vshrd_n_u64: {
5934     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5935     uint64_t ShiftAmt = Amt->getZExtValue();
5936     // Right-shifting an unsigned value by its size yields 0.
5937     if (ShiftAmt == 64)
5938       return ConstantInt::get(Int64Ty, 0);
5939     return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
5940                               "shrd_n");
5941   }
5942   case NEON::BI__builtin_neon_vsrad_n_s64: {
5943     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
5944     Ops[1] = Builder.CreateAShr(
5945         Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
5946                                                    Amt->getZExtValue())),
5947         "shrd_n");
5948     return Builder.CreateAdd(Ops[0], Ops[1]);
5949   }
5950   case NEON::BI__builtin_neon_vsrad_n_u64: {
5951     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
5952     uint64_t ShiftAmt = Amt->getZExtValue();
5953     // Right-shifting an unsigned value by its size yields 0.
5954     // As Op + 0 = Op, return Ops[0] directly.
5955     if (ShiftAmt == 64)
5956       return Ops[0];
5957     Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
5958                                 "shrd_n");
5959     return Builder.CreateAdd(Ops[0], Ops[1]);
5960   }
5961   case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
5962   case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
5963   case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
5964   case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
5965     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
5966                                           "lane");
5967     SmallVector<Value *, 2> ProductOps;
5968     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5969     ProductOps.push_back(vectorWrapScalar16(Ops[2]));
5970     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
5971     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5972                           ProductOps, "vqdmlXl");
5973     Constant *CI = ConstantInt::get(SizeTy, 0);
5974     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5975     Ops.pop_back();
5976
5977     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
5978                        BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
5979                           ? Intrinsic::aarch64_neon_sqadd
5980                           : Intrinsic::aarch64_neon_sqsub;
5981     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
5982   }
5983   case NEON::BI__builtin_neon_vqdmlals_s32:
5984   case NEON::BI__builtin_neon_vqdmlsls_s32: {
5985     SmallVector<Value *, 2> ProductOps;
5986     ProductOps.push_back(Ops[1]);
5987     ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
5988     Ops[1] =
5989         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
5990                      ProductOps, "vqdmlXl");
5991
5992     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
5993                                         ? Intrinsic::aarch64_neon_sqadd
5994                                         : Intrinsic::aarch64_neon_sqsub;
5995     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
5996   }
5997   case NEON::BI__builtin_neon_vqdmlals_lane_s32:
5998   case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
5999   case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
6000   case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
6001     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
6002                                           "lane");
6003     SmallVector<Value *, 2> ProductOps;
6004     ProductOps.push_back(Ops[1]);
6005     ProductOps.push_back(Ops[2]);
6006     Ops[1] =
6007         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6008                      ProductOps, "vqdmlXl");
6009     Ops.pop_back();
6010
6011     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
6012                        BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
6013                           ? Intrinsic::aarch64_neon_sqadd
6014                           : Intrinsic::aarch64_neon_sqsub;
6015     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
6016   }
6017   }
6018
6019   llvm::VectorType *VTy = GetNeonType(this, Type);
6020   llvm::Type *Ty = VTy;
6021   if (!Ty)
6022     return nullptr;
6023
6024   // Not all intrinsics handled by the common case work for AArch64 yet, so only
6025   // defer to common code if it's been added to our special map.
6026   Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
6027                                    AArch64SIMDIntrinsicsProvenSorted);
6028
6029   if (Builtin)
6030     return EmitCommonNeonBuiltinExpr(
6031         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
6032         Builtin->NameHint, Builtin->TypeModifier, E, Ops,
6033         /*never use addresses*/ Address::invalid(), Address::invalid());
6034
6035   if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops))
6036     return V;
6037
6038   unsigned Int;
6039   switch (BuiltinID) {
6040   default: return nullptr;
6041   case NEON::BI__builtin_neon_vbsl_v:
6042   case NEON::BI__builtin_neon_vbslq_v: {
6043     llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
6044     Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
6045     Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
6046     Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
6047
6048     Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
6049     Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
6050     Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
6051     return Builder.CreateBitCast(Ops[0], Ty);
6052   }
6053   case NEON::BI__builtin_neon_vfma_lane_v:
6054   case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
6055     // The ARM builtins (and instructions) have the addend as the first
6056     // operand, but the 'fma' intrinsics have it last. Swap it around here.
6057     Value *Addend = Ops[0];
6058     Value *Multiplicand = Ops[1];
6059     Value *LaneSource = Ops[2];
6060     Ops[0] = Multiplicand;
6061     Ops[1] = LaneSource;
6062     Ops[2] = Addend;
6063
6064     // Now adjust things to handle the lane access.
6065     llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ?
6066       llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) :
6067       VTy;
6068     llvm::Constant *cst = cast<Constant>(Ops[3]);
6069     Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst);
6070     Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
6071     Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
6072
6073     Ops.pop_back();
6074     Int = Intrinsic::fma;
6075     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
6076   }
6077   case NEON::BI__builtin_neon_vfma_laneq_v: {
6078     llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
6079     // v1f64 fma should be mapped to Neon scalar f64 fma
6080     if (VTy && VTy->getElementType() == DoubleTy) {
6081       Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6082       Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
6083       llvm::Type *VTy = GetNeonType(this,
6084         NeonTypeFlags(NeonTypeFlags::Float64, false, true));
6085       Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
6086       Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6087       Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
6088       Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
6089       return Builder.CreateBitCast(Result, Ty);
6090     }
6091     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6092     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6093     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6094
6095     llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
6096                                             VTy->getNumElements() * 2);
6097     Ops[2] = Builder.CreateBitCast(Ops[2], STy);
6098     Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
6099                                                cast<ConstantInt>(Ops[3]));
6100     Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
6101
6102     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
6103   }
6104   case NEON::BI__builtin_neon_vfmaq_laneq_v: {
6105     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6106     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6107     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6108
6109     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6110     Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
6111     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
6112   }
6113   case NEON::BI__builtin_neon_vfmas_lane_f32:
6114   case NEON::BI__builtin_neon_vfmas_laneq_f32:
6115   case NEON::BI__builtin_neon_vfmad_lane_f64:
6116   case NEON::BI__builtin_neon_vfmad_laneq_f64: {
6117     Ops.push_back(EmitScalarExpr(E->getArg(3)));
6118     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
6119     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6120     Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6121     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
6122   }
6123   case NEON::BI__builtin_neon_vmull_v:
6124     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6125     Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
6126     if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
6127     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
6128   case NEON::BI__builtin_neon_vmax_v:
6129   case NEON::BI__builtin_neon_vmaxq_v:
6130     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6131     Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
6132     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
6133     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
6134   case NEON::BI__builtin_neon_vmin_v:
6135   case NEON::BI__builtin_neon_vminq_v:
6136     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6137     Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
6138     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
6139     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
6140   case NEON::BI__builtin_neon_vabd_v:
6141   case NEON::BI__builtin_neon_vabdq_v:
6142     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6143     Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
6144     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
6145     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
6146   case NEON::BI__builtin_neon_vpadal_v:
6147   case NEON::BI__builtin_neon_vpadalq_v: {
6148     unsigned ArgElts = VTy->getNumElements();
6149     llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
6150     unsigned BitWidth = EltTy->getBitWidth();
6151     llvm::Type *ArgTy = llvm::VectorType::get(
6152         llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts);
6153     llvm::Type* Tys[2] = { VTy, ArgTy };
6154     Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
6155     SmallVector<llvm::Value*, 1> TmpOps;
6156     TmpOps.push_back(Ops[1]);
6157     Function *F = CGM.getIntrinsic(Int, Tys);
6158     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
6159     llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
6160     return Builder.CreateAdd(tmp, addend);
6161   }
6162   case NEON::BI__builtin_neon_vpmin_v:
6163   case NEON::BI__builtin_neon_vpminq_v:
6164     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6165     Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
6166     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
6167     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
6168   case NEON::BI__builtin_neon_vpmax_v:
6169   case NEON::BI__builtin_neon_vpmaxq_v:
6170     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6171     Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
6172     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
6173     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
6174   case NEON::BI__builtin_neon_vminnm_v:
6175   case NEON::BI__builtin_neon_vminnmq_v:
6176     Int = Intrinsic::aarch64_neon_fminnm;
6177     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
6178   case NEON::BI__builtin_neon_vmaxnm_v:
6179   case NEON::BI__builtin_neon_vmaxnmq_v:
6180     Int = Intrinsic::aarch64_neon_fmaxnm;
6181     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
6182   case NEON::BI__builtin_neon_vrecpss_f32: {
6183     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6184     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
6185                         Ops, "vrecps");
6186   }
6187   case NEON::BI__builtin_neon_vrecpsd_f64: {
6188     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6189     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
6190                         Ops, "vrecps");
6191   }
6192   case NEON::BI__builtin_neon_vqshrun_n_v:
6193     Int = Intrinsic::aarch64_neon_sqshrun;
6194     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
6195   case NEON::BI__builtin_neon_vqrshrun_n_v:
6196     Int = Intrinsic::aarch64_neon_sqrshrun;
6197     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
6198   case NEON::BI__builtin_neon_vqshrn_n_v:
6199     Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
6200     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
6201   case NEON::BI__builtin_neon_vrshrn_n_v:
6202     Int = Intrinsic::aarch64_neon_rshrn;
6203     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
6204   case NEON::BI__builtin_neon_vqrshrn_n_v:
6205     Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
6206     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
6207   case NEON::BI__builtin_neon_vrnda_v:
6208   case NEON::BI__builtin_neon_vrndaq_v: {
6209     Int = Intrinsic::round;
6210     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
6211   }
6212   case NEON::BI__builtin_neon_vrndi_v:
6213   case NEON::BI__builtin_neon_vrndiq_v: {
6214     Int = Intrinsic::nearbyint;
6215     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi");
6216   }
6217   case NEON::BI__builtin_neon_vrndm_v:
6218   case NEON::BI__builtin_neon_vrndmq_v: {
6219     Int = Intrinsic::floor;
6220     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
6221   }
6222   case NEON::BI__builtin_neon_vrndn_v:
6223   case NEON::BI__builtin_neon_vrndnq_v: {
6224     Int = Intrinsic::aarch64_neon_frintn;
6225     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
6226   }
6227   case NEON::BI__builtin_neon_vrndp_v:
6228   case NEON::BI__builtin_neon_vrndpq_v: {
6229     Int = Intrinsic::ceil;
6230     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
6231   }
6232   case NEON::BI__builtin_neon_vrndx_v:
6233   case NEON::BI__builtin_neon_vrndxq_v: {
6234     Int = Intrinsic::rint;
6235     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
6236   }
6237   case NEON::BI__builtin_neon_vrnd_v:
6238   case NEON::BI__builtin_neon_vrndq_v: {
6239     Int = Intrinsic::trunc;
6240     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
6241   }
6242   case NEON::BI__builtin_neon_vceqz_v:
6243   case NEON::BI__builtin_neon_vceqzq_v:
6244     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
6245                                          ICmpInst::ICMP_EQ, "vceqz");
6246   case NEON::BI__builtin_neon_vcgez_v:
6247   case NEON::BI__builtin_neon_vcgezq_v:
6248     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
6249                                          ICmpInst::ICMP_SGE, "vcgez");
6250   case NEON::BI__builtin_neon_vclez_v:
6251   case NEON::BI__builtin_neon_vclezq_v:
6252     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
6253                                          ICmpInst::ICMP_SLE, "vclez");
6254   case NEON::BI__builtin_neon_vcgtz_v:
6255   case NEON::BI__builtin_neon_vcgtzq_v:
6256     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
6257                                          ICmpInst::ICMP_SGT, "vcgtz");
6258   case NEON::BI__builtin_neon_vcltz_v:
6259   case NEON::BI__builtin_neon_vcltzq_v:
6260     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
6261                                          ICmpInst::ICMP_SLT, "vcltz");
6262   case NEON::BI__builtin_neon_vcvt_f64_v:
6263   case NEON::BI__builtin_neon_vcvtq_f64_v:
6264     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6265     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
6266     return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
6267                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
6268   case NEON::BI__builtin_neon_vcvt_f64_f32: {
6269     assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
6270            "unexpected vcvt_f64_f32 builtin");
6271     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
6272     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6273
6274     return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
6275   }
6276   case NEON::BI__builtin_neon_vcvt_f32_f64: {
6277     assert(Type.getEltType() == NeonTypeFlags::Float32 &&
6278            "unexpected vcvt_f32_f64 builtin");
6279     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
6280     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6281
6282     return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
6283   }
6284   case NEON::BI__builtin_neon_vcvt_s32_v:
6285   case NEON::BI__builtin_neon_vcvt_u32_v:
6286   case NEON::BI__builtin_neon_vcvt_s64_v:
6287   case NEON::BI__builtin_neon_vcvt_u64_v:
6288   case NEON::BI__builtin_neon_vcvtq_s32_v:
6289   case NEON::BI__builtin_neon_vcvtq_u32_v:
6290   case NEON::BI__builtin_neon_vcvtq_s64_v:
6291   case NEON::BI__builtin_neon_vcvtq_u64_v: {
6292     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
6293     if (usgn)
6294       return Builder.CreateFPToUI(Ops[0], Ty);
6295     return Builder.CreateFPToSI(Ops[0], Ty);
6296   }
6297   case NEON::BI__builtin_neon_vcvta_s32_v:
6298   case NEON::BI__builtin_neon_vcvtaq_s32_v:
6299   case NEON::BI__builtin_neon_vcvta_u32_v:
6300   case NEON::BI__builtin_neon_vcvtaq_u32_v:
6301   case NEON::BI__builtin_neon_vcvta_s64_v:
6302   case NEON::BI__builtin_neon_vcvtaq_s64_v:
6303   case NEON::BI__builtin_neon_vcvta_u64_v:
6304   case NEON::BI__builtin_neon_vcvtaq_u64_v: {
6305     Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
6306     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6307     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
6308   }
6309   case NEON::BI__builtin_neon_vcvtm_s32_v:
6310   case NEON::BI__builtin_neon_vcvtmq_s32_v:
6311   case NEON::BI__builtin_neon_vcvtm_u32_v:
6312   case NEON::BI__builtin_neon_vcvtmq_u32_v:
6313   case NEON::BI__builtin_neon_vcvtm_s64_v:
6314   case NEON::BI__builtin_neon_vcvtmq_s64_v:
6315   case NEON::BI__builtin_neon_vcvtm_u64_v:
6316   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
6317     Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
6318     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6319     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
6320   }
6321   case NEON::BI__builtin_neon_vcvtn_s32_v:
6322   case NEON::BI__builtin_neon_vcvtnq_s32_v:
6323   case NEON::BI__builtin_neon_vcvtn_u32_v:
6324   case NEON::BI__builtin_neon_vcvtnq_u32_v:
6325   case NEON::BI__builtin_neon_vcvtn_s64_v:
6326   case NEON::BI__builtin_neon_vcvtnq_s64_v:
6327   case NEON::BI__builtin_neon_vcvtn_u64_v:
6328   case NEON::BI__builtin_neon_vcvtnq_u64_v: {
6329     Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
6330     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6331     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
6332   }
6333   case NEON::BI__builtin_neon_vcvtp_s32_v:
6334   case NEON::BI__builtin_neon_vcvtpq_s32_v:
6335   case NEON::BI__builtin_neon_vcvtp_u32_v:
6336   case NEON::BI__builtin_neon_vcvtpq_u32_v:
6337   case NEON::BI__builtin_neon_vcvtp_s64_v:
6338   case NEON::BI__builtin_neon_vcvtpq_s64_v:
6339   case NEON::BI__builtin_neon_vcvtp_u64_v:
6340   case NEON::BI__builtin_neon_vcvtpq_u64_v: {
6341     Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
6342     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6343     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
6344   }
6345   case NEON::BI__builtin_neon_vmulx_v:
6346   case NEON::BI__builtin_neon_vmulxq_v: {
6347     Int = Intrinsic::aarch64_neon_fmulx;
6348     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
6349   }
6350   case NEON::BI__builtin_neon_vmul_lane_v:
6351   case NEON::BI__builtin_neon_vmul_laneq_v: {
6352     // v1f64 vmul_lane should be mapped to Neon scalar mul lane
6353     bool Quad = false;
6354     if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
6355       Quad = true;
6356     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6357     llvm::Type *VTy = GetNeonType(this,
6358       NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
6359     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6360     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
6361     Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
6362     return Builder.CreateBitCast(Result, Ty);
6363   }
6364   case NEON::BI__builtin_neon_vnegd_s64:
6365     return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
6366   case NEON::BI__builtin_neon_vpmaxnm_v:
6367   case NEON::BI__builtin_neon_vpmaxnmq_v: {
6368     Int = Intrinsic::aarch64_neon_fmaxnmp;
6369     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
6370   }
6371   case NEON::BI__builtin_neon_vpminnm_v:
6372   case NEON::BI__builtin_neon_vpminnmq_v: {
6373     Int = Intrinsic::aarch64_neon_fminnmp;
6374     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
6375   }
6376   case NEON::BI__builtin_neon_vsqrt_v:
6377   case NEON::BI__builtin_neon_vsqrtq_v: {
6378     Int = Intrinsic::sqrt;
6379     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6380     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
6381   }
6382   case NEON::BI__builtin_neon_vrbit_v:
6383   case NEON::BI__builtin_neon_vrbitq_v: {
6384     Int = Intrinsic::aarch64_neon_rbit;
6385     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
6386   }
6387   case NEON::BI__builtin_neon_vaddv_u8:
6388     // FIXME: These are handled by the AArch64 scalar code.
6389     usgn = true;
6390     // FALLTHROUGH
6391   case NEON::BI__builtin_neon_vaddv_s8: {
6392     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6393     Ty = Int32Ty;
6394     VTy = llvm::VectorType::get(Int8Ty, 8);
6395     llvm::Type *Tys[2] = { Ty, VTy };
6396     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6397     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6398     return Builder.CreateTrunc(Ops[0], Int8Ty);
6399   }
6400   case NEON::BI__builtin_neon_vaddv_u16:
6401     usgn = true;
6402     // FALLTHROUGH
6403   case NEON::BI__builtin_neon_vaddv_s16: {
6404     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6405     Ty = Int32Ty;
6406     VTy = llvm::VectorType::get(Int16Ty, 4);
6407     llvm::Type *Tys[2] = { Ty, VTy };
6408     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6409     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6410     return Builder.CreateTrunc(Ops[0], Int16Ty);
6411   }
6412   case NEON::BI__builtin_neon_vaddvq_u8:
6413     usgn = true;
6414     // FALLTHROUGH
6415   case NEON::BI__builtin_neon_vaddvq_s8: {
6416     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6417     Ty = Int32Ty;
6418     VTy = llvm::VectorType::get(Int8Ty, 16);
6419     llvm::Type *Tys[2] = { Ty, VTy };
6420     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6421     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6422     return Builder.CreateTrunc(Ops[0], Int8Ty);
6423   }
6424   case NEON::BI__builtin_neon_vaddvq_u16:
6425     usgn = true;
6426     // FALLTHROUGH
6427   case NEON::BI__builtin_neon_vaddvq_s16: {
6428     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6429     Ty = Int32Ty;
6430     VTy = llvm::VectorType::get(Int16Ty, 8);
6431     llvm::Type *Tys[2] = { Ty, VTy };
6432     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6433     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6434     return Builder.CreateTrunc(Ops[0], Int16Ty);
6435   }
6436   case NEON::BI__builtin_neon_vmaxv_u8: {
6437     Int = Intrinsic::aarch64_neon_umaxv;
6438     Ty = Int32Ty;
6439     VTy = llvm::VectorType::get(Int8Ty, 8);
6440     llvm::Type *Tys[2] = { Ty, VTy };
6441     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6442     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6443     return Builder.CreateTrunc(Ops[0], Int8Ty);
6444   }
6445   case NEON::BI__builtin_neon_vmaxv_u16: {
6446     Int = Intrinsic::aarch64_neon_umaxv;
6447     Ty = Int32Ty;
6448     VTy = llvm::VectorType::get(Int16Ty, 4);
6449     llvm::Type *Tys[2] = { Ty, VTy };
6450     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6451     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6452     return Builder.CreateTrunc(Ops[0], Int16Ty);
6453   }
6454   case NEON::BI__builtin_neon_vmaxvq_u8: {
6455     Int = Intrinsic::aarch64_neon_umaxv;
6456     Ty = Int32Ty;
6457     VTy = llvm::VectorType::get(Int8Ty, 16);
6458     llvm::Type *Tys[2] = { Ty, VTy };
6459     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6460     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6461     return Builder.CreateTrunc(Ops[0], Int8Ty);
6462   }
6463   case NEON::BI__builtin_neon_vmaxvq_u16: {
6464     Int = Intrinsic::aarch64_neon_umaxv;
6465     Ty = Int32Ty;
6466     VTy = llvm::VectorType::get(Int16Ty, 8);
6467     llvm::Type *Tys[2] = { Ty, VTy };
6468     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6469     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6470     return Builder.CreateTrunc(Ops[0], Int16Ty);
6471   }
6472   case NEON::BI__builtin_neon_vmaxv_s8: {
6473     Int = Intrinsic::aarch64_neon_smaxv;
6474     Ty = Int32Ty;
6475     VTy = llvm::VectorType::get(Int8Ty, 8);
6476     llvm::Type *Tys[2] = { Ty, VTy };
6477     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6478     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6479     return Builder.CreateTrunc(Ops[0], Int8Ty);
6480   }
6481   case NEON::BI__builtin_neon_vmaxv_s16: {
6482     Int = Intrinsic::aarch64_neon_smaxv;
6483     Ty = Int32Ty;
6484     VTy = llvm::VectorType::get(Int16Ty, 4);
6485     llvm::Type *Tys[2] = { Ty, VTy };
6486     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6487     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6488     return Builder.CreateTrunc(Ops[0], Int16Ty);
6489   }
6490   case NEON::BI__builtin_neon_vmaxvq_s8: {
6491     Int = Intrinsic::aarch64_neon_smaxv;
6492     Ty = Int32Ty;
6493     VTy = llvm::VectorType::get(Int8Ty, 16);
6494     llvm::Type *Tys[2] = { Ty, VTy };
6495     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6496     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6497     return Builder.CreateTrunc(Ops[0], Int8Ty);
6498   }
6499   case NEON::BI__builtin_neon_vmaxvq_s16: {
6500     Int = Intrinsic::aarch64_neon_smaxv;
6501     Ty = Int32Ty;
6502     VTy = llvm::VectorType::get(Int16Ty, 8);
6503     llvm::Type *Tys[2] = { Ty, VTy };
6504     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6505     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6506     return Builder.CreateTrunc(Ops[0], Int16Ty);
6507   }
6508   case NEON::BI__builtin_neon_vminv_u8: {
6509     Int = Intrinsic::aarch64_neon_uminv;
6510     Ty = Int32Ty;
6511     VTy = llvm::VectorType::get(Int8Ty, 8);
6512     llvm::Type *Tys[2] = { Ty, VTy };
6513     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6514     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6515     return Builder.CreateTrunc(Ops[0], Int8Ty);
6516   }
6517   case NEON::BI__builtin_neon_vminv_u16: {
6518     Int = Intrinsic::aarch64_neon_uminv;
6519     Ty = Int32Ty;
6520     VTy = llvm::VectorType::get(Int16Ty, 4);
6521     llvm::Type *Tys[2] = { Ty, VTy };
6522     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6523     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6524     return Builder.CreateTrunc(Ops[0], Int16Ty);
6525   }
6526   case NEON::BI__builtin_neon_vminvq_u8: {
6527     Int = Intrinsic::aarch64_neon_uminv;
6528     Ty = Int32Ty;
6529     VTy = llvm::VectorType::get(Int8Ty, 16);
6530     llvm::Type *Tys[2] = { Ty, VTy };
6531     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6532     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6533     return Builder.CreateTrunc(Ops[0], Int8Ty);
6534   }
6535   case NEON::BI__builtin_neon_vminvq_u16: {
6536     Int = Intrinsic::aarch64_neon_uminv;
6537     Ty = Int32Ty;
6538     VTy = llvm::VectorType::get(Int16Ty, 8);
6539     llvm::Type *Tys[2] = { Ty, VTy };
6540     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6541     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6542     return Builder.CreateTrunc(Ops[0], Int16Ty);
6543   }
6544   case NEON::BI__builtin_neon_vminv_s8: {
6545     Int = Intrinsic::aarch64_neon_sminv;
6546     Ty = Int32Ty;
6547     VTy = llvm::VectorType::get(Int8Ty, 8);
6548     llvm::Type *Tys[2] = { Ty, VTy };
6549     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6550     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6551     return Builder.CreateTrunc(Ops[0], Int8Ty);
6552   }
6553   case NEON::BI__builtin_neon_vminv_s16: {
6554     Int = Intrinsic::aarch64_neon_sminv;
6555     Ty = Int32Ty;
6556     VTy = llvm::VectorType::get(Int16Ty, 4);
6557     llvm::Type *Tys[2] = { Ty, VTy };
6558     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6559     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6560     return Builder.CreateTrunc(Ops[0], Int16Ty);
6561   }
6562   case NEON::BI__builtin_neon_vminvq_s8: {
6563     Int = Intrinsic::aarch64_neon_sminv;
6564     Ty = Int32Ty;
6565     VTy = llvm::VectorType::get(Int8Ty, 16);
6566     llvm::Type *Tys[2] = { Ty, VTy };
6567     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6568     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6569     return Builder.CreateTrunc(Ops[0], Int8Ty);
6570   }
6571   case NEON::BI__builtin_neon_vminvq_s16: {
6572     Int = Intrinsic::aarch64_neon_sminv;
6573     Ty = Int32Ty;
6574     VTy = llvm::VectorType::get(Int16Ty, 8);
6575     llvm::Type *Tys[2] = { Ty, VTy };
6576     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6577     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6578     return Builder.CreateTrunc(Ops[0], Int16Ty);
6579   }
6580   case NEON::BI__builtin_neon_vmul_n_f64: {
6581     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6582     Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
6583     return Builder.CreateFMul(Ops[0], RHS);
6584   }
6585   case NEON::BI__builtin_neon_vaddlv_u8: {
6586     Int = Intrinsic::aarch64_neon_uaddlv;
6587     Ty = Int32Ty;
6588     VTy = llvm::VectorType::get(Int8Ty, 8);
6589     llvm::Type *Tys[2] = { Ty, VTy };
6590     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6591     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6592     return Builder.CreateTrunc(Ops[0], Int16Ty);
6593   }
6594   case NEON::BI__builtin_neon_vaddlv_u16: {
6595     Int = Intrinsic::aarch64_neon_uaddlv;
6596     Ty = Int32Ty;
6597     VTy = llvm::VectorType::get(Int16Ty, 4);
6598     llvm::Type *Tys[2] = { Ty, VTy };
6599     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6600     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6601   }
6602   case NEON::BI__builtin_neon_vaddlvq_u8: {
6603     Int = Intrinsic::aarch64_neon_uaddlv;
6604     Ty = Int32Ty;
6605     VTy = llvm::VectorType::get(Int8Ty, 16);
6606     llvm::Type *Tys[2] = { Ty, VTy };
6607     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6608     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6609     return Builder.CreateTrunc(Ops[0], Int16Ty);
6610   }
6611   case NEON::BI__builtin_neon_vaddlvq_u16: {
6612     Int = Intrinsic::aarch64_neon_uaddlv;
6613     Ty = Int32Ty;
6614     VTy = llvm::VectorType::get(Int16Ty, 8);
6615     llvm::Type *Tys[2] = { Ty, VTy };
6616     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6617     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6618   }
6619   case NEON::BI__builtin_neon_vaddlv_s8: {
6620     Int = Intrinsic::aarch64_neon_saddlv;
6621     Ty = Int32Ty;
6622     VTy = llvm::VectorType::get(Int8Ty, 8);
6623     llvm::Type *Tys[2] = { Ty, VTy };
6624     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6625     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6626     return Builder.CreateTrunc(Ops[0], Int16Ty);
6627   }
6628   case NEON::BI__builtin_neon_vaddlv_s16: {
6629     Int = Intrinsic::aarch64_neon_saddlv;
6630     Ty = Int32Ty;
6631     VTy = llvm::VectorType::get(Int16Ty, 4);
6632     llvm::Type *Tys[2] = { Ty, VTy };
6633     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6634     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6635   }
6636   case NEON::BI__builtin_neon_vaddlvq_s8: {
6637     Int = Intrinsic::aarch64_neon_saddlv;
6638     Ty = Int32Ty;
6639     VTy = llvm::VectorType::get(Int8Ty, 16);
6640     llvm::Type *Tys[2] = { Ty, VTy };
6641     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6642     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6643     return Builder.CreateTrunc(Ops[0], Int16Ty);
6644   }
6645   case NEON::BI__builtin_neon_vaddlvq_s16: {
6646     Int = Intrinsic::aarch64_neon_saddlv;
6647     Ty = Int32Ty;
6648     VTy = llvm::VectorType::get(Int16Ty, 8);
6649     llvm::Type *Tys[2] = { Ty, VTy };
6650     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6651     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6652   }
6653   case NEON::BI__builtin_neon_vsri_n_v:
6654   case NEON::BI__builtin_neon_vsriq_n_v: {
6655     Int = Intrinsic::aarch64_neon_vsri;
6656     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6657     return EmitNeonCall(Intrin, Ops, "vsri_n");
6658   }
6659   case NEON::BI__builtin_neon_vsli_n_v:
6660   case NEON::BI__builtin_neon_vsliq_n_v: {
6661     Int = Intrinsic::aarch64_neon_vsli;
6662     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6663     return EmitNeonCall(Intrin, Ops, "vsli_n");
6664   }
6665   case NEON::BI__builtin_neon_vsra_n_v:
6666   case NEON::BI__builtin_neon_vsraq_n_v:
6667     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6668     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
6669     return Builder.CreateAdd(Ops[0], Ops[1]);
6670   case NEON::BI__builtin_neon_vrsra_n_v:
6671   case NEON::BI__builtin_neon_vrsraq_n_v: {
6672     Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
6673     SmallVector<llvm::Value*,2> TmpOps;
6674     TmpOps.push_back(Ops[1]);
6675     TmpOps.push_back(Ops[2]);
6676     Function* F = CGM.getIntrinsic(Int, Ty);
6677     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
6678     Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
6679     return Builder.CreateAdd(Ops[0], tmp);
6680   }
6681     // FIXME: Sharing loads & stores with 32-bit is complicated by the absence
6682     // of an Align parameter here.
6683   case NEON::BI__builtin_neon_vld1_x2_v:
6684   case NEON::BI__builtin_neon_vld1q_x2_v:
6685   case NEON::BI__builtin_neon_vld1_x3_v:
6686   case NEON::BI__builtin_neon_vld1q_x3_v:
6687   case NEON::BI__builtin_neon_vld1_x4_v:
6688   case NEON::BI__builtin_neon_vld1q_x4_v: {
6689     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
6690     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6691     llvm::Type *Tys[2] = { VTy, PTy };
6692     unsigned Int;
6693     switch (BuiltinID) {
6694     case NEON::BI__builtin_neon_vld1_x2_v:
6695     case NEON::BI__builtin_neon_vld1q_x2_v:
6696       Int = Intrinsic::aarch64_neon_ld1x2;
6697       break;
6698     case NEON::BI__builtin_neon_vld1_x3_v:
6699     case NEON::BI__builtin_neon_vld1q_x3_v:
6700       Int = Intrinsic::aarch64_neon_ld1x3;
6701       break;
6702     case NEON::BI__builtin_neon_vld1_x4_v:
6703     case NEON::BI__builtin_neon_vld1q_x4_v:
6704       Int = Intrinsic::aarch64_neon_ld1x4;
6705       break;
6706     }
6707     Function *F = CGM.getIntrinsic(Int, Tys);
6708     Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
6709     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6710     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6711     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6712   }
6713   case NEON::BI__builtin_neon_vst1_x2_v:
6714   case NEON::BI__builtin_neon_vst1q_x2_v:
6715   case NEON::BI__builtin_neon_vst1_x3_v:
6716   case NEON::BI__builtin_neon_vst1q_x3_v:
6717   case NEON::BI__builtin_neon_vst1_x4_v:
6718   case NEON::BI__builtin_neon_vst1q_x4_v: {
6719     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
6720     llvm::Type *Tys[2] = { VTy, PTy };
6721     unsigned Int;
6722     switch (BuiltinID) {
6723     case NEON::BI__builtin_neon_vst1_x2_v:
6724     case NEON::BI__builtin_neon_vst1q_x2_v:
6725       Int = Intrinsic::aarch64_neon_st1x2;
6726       break;
6727     case NEON::BI__builtin_neon_vst1_x3_v:
6728     case NEON::BI__builtin_neon_vst1q_x3_v:
6729       Int = Intrinsic::aarch64_neon_st1x3;
6730       break;
6731     case NEON::BI__builtin_neon_vst1_x4_v:
6732     case NEON::BI__builtin_neon_vst1q_x4_v:
6733       Int = Intrinsic::aarch64_neon_st1x4;
6734       break;
6735     }
6736     std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6737     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
6738   }
6739   case NEON::BI__builtin_neon_vld1_v:
6740   case NEON::BI__builtin_neon_vld1q_v: {
6741     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
6742     auto Alignment = CharUnits::fromQuantity(
6743         BuiltinID == NEON::BI__builtin_neon_vld1_v ? 8 : 16);
6744     return Builder.CreateAlignedLoad(VTy, Ops[0], Alignment);
6745   }
6746   case NEON::BI__builtin_neon_vst1_v:
6747   case NEON::BI__builtin_neon_vst1q_v:
6748     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
6749     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6750     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6751   case NEON::BI__builtin_neon_vld1_lane_v:
6752   case NEON::BI__builtin_neon_vld1q_lane_v: {
6753     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6754     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
6755     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6756     auto Alignment = CharUnits::fromQuantity(
6757         BuiltinID == NEON::BI__builtin_neon_vld1_lane_v ? 8 : 16);
6758     Ops[0] =
6759         Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
6760     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
6761   }
6762   case NEON::BI__builtin_neon_vld1_dup_v:
6763   case NEON::BI__builtin_neon_vld1q_dup_v: {
6764     Value *V = UndefValue::get(Ty);
6765     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
6766     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6767     auto Alignment = CharUnits::fromQuantity(
6768         BuiltinID == NEON::BI__builtin_neon_vld1_dup_v ? 8 : 16);
6769     Ops[0] =
6770         Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
6771     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
6772     Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
6773     return EmitNeonSplat(Ops[0], CI);
6774   }
6775   case NEON::BI__builtin_neon_vst1_lane_v:
6776   case NEON::BI__builtin_neon_vst1q_lane_v:
6777     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6778     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
6779     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6780     return Builder.CreateDefaultAlignedStore(Ops[1],
6781                                              Builder.CreateBitCast(Ops[0], Ty));
6782   case NEON::BI__builtin_neon_vld2_v:
6783   case NEON::BI__builtin_neon_vld2q_v: {
6784     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6785     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6786     llvm::Type *Tys[2] = { VTy, PTy };
6787     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
6788     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6789     Ops[0] = Builder.CreateBitCast(Ops[0],
6790                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6791     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6792   }
6793   case NEON::BI__builtin_neon_vld3_v:
6794   case NEON::BI__builtin_neon_vld3q_v: {
6795     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6796     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6797     llvm::Type *Tys[2] = { VTy, PTy };
6798     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
6799     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6800     Ops[0] = Builder.CreateBitCast(Ops[0],
6801                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6802     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6803   }
6804   case NEON::BI__builtin_neon_vld4_v:
6805   case NEON::BI__builtin_neon_vld4q_v: {
6806     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6807     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6808     llvm::Type *Tys[2] = { VTy, PTy };
6809     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
6810     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6811     Ops[0] = Builder.CreateBitCast(Ops[0],
6812                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6813     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6814   }
6815   case NEON::BI__builtin_neon_vld2_dup_v:
6816   case NEON::BI__builtin_neon_vld2q_dup_v: {
6817     llvm::Type *PTy =
6818       llvm::PointerType::getUnqual(VTy->getElementType());
6819     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6820     llvm::Type *Tys[2] = { VTy, PTy };
6821     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
6822     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6823     Ops[0] = Builder.CreateBitCast(Ops[0],
6824                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6825     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6826   }
6827   case NEON::BI__builtin_neon_vld3_dup_v:
6828   case NEON::BI__builtin_neon_vld3q_dup_v: {
6829     llvm::Type *PTy =
6830       llvm::PointerType::getUnqual(VTy->getElementType());
6831     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6832     llvm::Type *Tys[2] = { VTy, PTy };
6833     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
6834     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6835     Ops[0] = Builder.CreateBitCast(Ops[0],
6836                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6837     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6838   }
6839   case NEON::BI__builtin_neon_vld4_dup_v:
6840   case NEON::BI__builtin_neon_vld4q_dup_v: {
6841     llvm::Type *PTy =
6842       llvm::PointerType::getUnqual(VTy->getElementType());
6843     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6844     llvm::Type *Tys[2] = { VTy, PTy };
6845     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
6846     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6847     Ops[0] = Builder.CreateBitCast(Ops[0],
6848                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6849     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6850   }
6851   case NEON::BI__builtin_neon_vld2_lane_v:
6852   case NEON::BI__builtin_neon_vld2q_lane_v: {
6853     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6854     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
6855     Ops.push_back(Ops[1]);
6856     Ops.erase(Ops.begin()+1);
6857     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6858     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6859     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6860     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
6861     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6862     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6863     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6864   }
6865   case NEON::BI__builtin_neon_vld3_lane_v:
6866   case NEON::BI__builtin_neon_vld3q_lane_v: {
6867     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6868     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
6869     Ops.push_back(Ops[1]);
6870     Ops.erase(Ops.begin()+1);
6871     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6872     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6873     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6874     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
6875     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
6876     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6877     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6878     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6879   }
6880   case NEON::BI__builtin_neon_vld4_lane_v:
6881   case NEON::BI__builtin_neon_vld4q_lane_v: {
6882     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6883     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
6884     Ops.push_back(Ops[1]);
6885     Ops.erase(Ops.begin()+1);
6886     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6887     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6888     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6889     Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
6890     Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
6891     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane");
6892     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6893     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6894     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6895   }
6896   case NEON::BI__builtin_neon_vst2_v:
6897   case NEON::BI__builtin_neon_vst2q_v: {
6898     Ops.push_back(Ops[0]);
6899     Ops.erase(Ops.begin());
6900     llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
6901     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
6902                         Ops, "");
6903   }
6904   case NEON::BI__builtin_neon_vst2_lane_v:
6905   case NEON::BI__builtin_neon_vst2q_lane_v: {
6906     Ops.push_back(Ops[0]);
6907     Ops.erase(Ops.begin());
6908     Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
6909     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6910     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
6911                         Ops, "");
6912   }
6913   case NEON::BI__builtin_neon_vst3_v:
6914   case NEON::BI__builtin_neon_vst3q_v: {
6915     Ops.push_back(Ops[0]);
6916     Ops.erase(Ops.begin());
6917     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6918     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
6919                         Ops, "");
6920   }
6921   case NEON::BI__builtin_neon_vst3_lane_v:
6922   case NEON::BI__builtin_neon_vst3q_lane_v: {
6923     Ops.push_back(Ops[0]);
6924     Ops.erase(Ops.begin());
6925     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6926     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6927     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
6928                         Ops, "");
6929   }
6930   case NEON::BI__builtin_neon_vst4_v:
6931   case NEON::BI__builtin_neon_vst4q_v: {
6932     Ops.push_back(Ops[0]);
6933     Ops.erase(Ops.begin());
6934     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6935     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
6936                         Ops, "");
6937   }
6938   case NEON::BI__builtin_neon_vst4_lane_v:
6939   case NEON::BI__builtin_neon_vst4q_lane_v: {
6940     Ops.push_back(Ops[0]);
6941     Ops.erase(Ops.begin());
6942     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
6943     llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
6944     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
6945                         Ops, "");
6946   }
6947   case NEON::BI__builtin_neon_vtrn_v:
6948   case NEON::BI__builtin_neon_vtrnq_v: {
6949     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6950     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6951     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6952     Value *SV = nullptr;
6953
6954     for (unsigned vi = 0; vi != 2; ++vi) {
6955       SmallVector<uint32_t, 16> Indices;
6956       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6957         Indices.push_back(i+vi);
6958         Indices.push_back(i+e+vi);
6959       }
6960       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6961       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
6962       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6963     }
6964     return SV;
6965   }
6966   case NEON::BI__builtin_neon_vuzp_v:
6967   case NEON::BI__builtin_neon_vuzpq_v: {
6968     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6969     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6970     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6971     Value *SV = nullptr;
6972
6973     for (unsigned vi = 0; vi != 2; ++vi) {
6974       SmallVector<uint32_t, 16> Indices;
6975       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
6976         Indices.push_back(2*i+vi);
6977
6978       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6979       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
6980       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6981     }
6982     return SV;
6983   }
6984   case NEON::BI__builtin_neon_vzip_v:
6985   case NEON::BI__builtin_neon_vzipq_v: {
6986     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6987     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6988     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6989     Value *SV = nullptr;
6990
6991     for (unsigned vi = 0; vi != 2; ++vi) {
6992       SmallVector<uint32_t, 16> Indices;
6993       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6994         Indices.push_back((i + vi*e) >> 1);
6995         Indices.push_back(((i + vi*e) >> 1)+e);
6996       }
6997       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6998       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
6999       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7000     }
7001     return SV;
7002   }
7003   case NEON::BI__builtin_neon_vqtbl1q_v: {
7004     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
7005                         Ops, "vtbl1");
7006   }
7007   case NEON::BI__builtin_neon_vqtbl2q_v: {
7008     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
7009                         Ops, "vtbl2");
7010   }
7011   case NEON::BI__builtin_neon_vqtbl3q_v: {
7012     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
7013                         Ops, "vtbl3");
7014   }
7015   case NEON::BI__builtin_neon_vqtbl4q_v: {
7016     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
7017                         Ops, "vtbl4");
7018   }
7019   case NEON::BI__builtin_neon_vqtbx1q_v: {
7020     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
7021                         Ops, "vtbx1");
7022   }
7023   case NEON::BI__builtin_neon_vqtbx2q_v: {
7024     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
7025                         Ops, "vtbx2");
7026   }
7027   case NEON::BI__builtin_neon_vqtbx3q_v: {
7028     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
7029                         Ops, "vtbx3");
7030   }
7031   case NEON::BI__builtin_neon_vqtbx4q_v: {
7032     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
7033                         Ops, "vtbx4");
7034   }
7035   case NEON::BI__builtin_neon_vsqadd_v:
7036   case NEON::BI__builtin_neon_vsqaddq_v: {
7037     Int = Intrinsic::aarch64_neon_usqadd;
7038     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
7039   }
7040   case NEON::BI__builtin_neon_vuqadd_v:
7041   case NEON::BI__builtin_neon_vuqaddq_v: {
7042     Int = Intrinsic::aarch64_neon_suqadd;
7043     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
7044   }
7045   }
7046 }
7047
7048 llvm::Value *CodeGenFunction::
7049 BuildVector(ArrayRef<llvm::Value*> Ops) {
7050   assert((Ops.size() & (Ops.size() - 1)) == 0 &&
7051          "Not a power-of-two sized vector!");
7052   bool AllConstants = true;
7053   for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
7054     AllConstants &= isa<Constant>(Ops[i]);
7055
7056   // If this is a constant vector, create a ConstantVector.
7057   if (AllConstants) {
7058     SmallVector<llvm::Constant*, 16> CstOps;
7059     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
7060       CstOps.push_back(cast<Constant>(Ops[i]));
7061     return llvm::ConstantVector::get(CstOps);
7062   }
7063
7064   // Otherwise, insertelement the values to build the vector.
7065   Value *Result =
7066     llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
7067
7068   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
7069     Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
7070
7071   return Result;
7072 }
7073
7074 // Convert the mask from an integer type to a vector of i1.
7075 static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
7076                               unsigned NumElts) {
7077
7078   llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(),
7079                          cast<IntegerType>(Mask->getType())->getBitWidth());
7080   Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
7081
7082   // If we have less than 8 elements, then the starting mask was an i8 and
7083   // we need to extract down to the right number of elements.
7084   if (NumElts < 8) {
7085     uint32_t Indices[4];
7086     for (unsigned i = 0; i != NumElts; ++i)
7087       Indices[i] = i;
7088     MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec,
7089                                              makeArrayRef(Indices, NumElts),
7090                                              "extract");
7091   }
7092   return MaskVec;
7093 }
7094
7095 static Value *EmitX86MaskedStore(CodeGenFunction &CGF,
7096                                  SmallVectorImpl<Value *> &Ops,
7097                                  unsigned Align) {
7098   // Cast the pointer to right type.
7099   Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
7100                                llvm::PointerType::getUnqual(Ops[1]->getType()));
7101
7102   // If the mask is all ones just emit a regular store.
7103   if (const auto *C = dyn_cast<Constant>(Ops[2]))
7104     if (C->isAllOnesValue())
7105       return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align);
7106
7107   Value *MaskVec = getMaskVecValue(CGF, Ops[2],
7108                                    Ops[1]->getType()->getVectorNumElements());
7109
7110   return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec);
7111 }
7112
7113 static Value *EmitX86MaskedLoad(CodeGenFunction &CGF,
7114                                 SmallVectorImpl<Value *> &Ops, unsigned Align) {
7115   // Cast the pointer to right type.
7116   Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
7117                                llvm::PointerType::getUnqual(Ops[1]->getType()));
7118
7119   // If the mask is all ones just emit a regular store.
7120   if (const auto *C = dyn_cast<Constant>(Ops[2]))
7121     if (C->isAllOnesValue())
7122       return CGF.Builder.CreateAlignedLoad(Ops[0], Align);
7123
7124   Value *MaskVec = getMaskVecValue(CGF, Ops[2],
7125                                    Ops[1]->getType()->getVectorNumElements());
7126
7127   return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]);
7128 }
7129
7130 static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF,
7131                                         SmallVectorImpl<Value *> &Ops,
7132                                         llvm::Type *DstTy,
7133                                         unsigned SrcSizeInBits,
7134                                         unsigned Align) {
7135   // Load the subvector.
7136   Ops[0] = CGF.Builder.CreateAlignedLoad(Ops[0], Align);
7137
7138   // Create broadcast mask.
7139   unsigned NumDstElts = DstTy->getVectorNumElements();
7140   unsigned NumSrcElts = SrcSizeInBits / DstTy->getScalarSizeInBits();
7141
7142   SmallVector<uint32_t, 8> Mask;
7143   for (unsigned i = 0; i != NumDstElts; i += NumSrcElts)
7144     for (unsigned j = 0; j != NumSrcElts; ++j)
7145       Mask.push_back(j);
7146
7147   return CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], Mask, "subvecbcst");
7148 }
7149
7150 static Value *EmitX86Select(CodeGenFunction &CGF,
7151                             Value *Mask, Value *Op0, Value *Op1) {
7152
7153   // If the mask is all ones just return first argument.
7154   if (const auto *C = dyn_cast<Constant>(Mask))
7155     if (C->isAllOnesValue())
7156       return Op0;
7157
7158   Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements());
7159
7160   return CGF.Builder.CreateSelect(Mask, Op0, Op1);
7161 }
7162
7163 static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
7164                                    bool Signed, SmallVectorImpl<Value *> &Ops) {
7165   unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
7166   Value *Cmp;
7167
7168   if (CC == 3) {
7169     Cmp = Constant::getNullValue(
7170                        llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
7171   } else if (CC == 7) {
7172     Cmp = Constant::getAllOnesValue(
7173                        llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
7174   } else {
7175     ICmpInst::Predicate Pred;
7176     switch (CC) {
7177     default: llvm_unreachable("Unknown condition code");
7178     case 0: Pred = ICmpInst::ICMP_EQ;  break;
7179     case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
7180     case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
7181     case 4: Pred = ICmpInst::ICMP_NE;  break;
7182     case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
7183     case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
7184     }
7185     Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
7186   }
7187
7188   const auto *C = dyn_cast<Constant>(Ops.back());
7189   if (!C || !C->isAllOnesValue())
7190     Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), NumElts));
7191
7192   if (NumElts < 8) {
7193     uint32_t Indices[8];
7194     for (unsigned i = 0; i != NumElts; ++i)
7195       Indices[i] = i;
7196     for (unsigned i = NumElts; i != 8; ++i)
7197       Indices[i] = i % NumElts + NumElts;
7198     Cmp = CGF.Builder.CreateShuffleVector(
7199         Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
7200   }
7201   return CGF.Builder.CreateBitCast(Cmp,
7202                                    IntegerType::get(CGF.getLLVMContext(),
7203                                                     std::max(NumElts, 8U)));
7204 }
7205
7206 static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred,
7207                             ArrayRef<Value *> Ops) {
7208   Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
7209   Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
7210
7211   if (Ops.size() == 2)
7212     return Res;
7213
7214   assert(Ops.size() == 4);
7215   return EmitX86Select(CGF, Ops[3], Res, Ops[2]);
7216 }
7217
7218 static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, 
7219                               llvm::Type *DstTy) {
7220   unsigned NumberOfElements = DstTy->getVectorNumElements();
7221   Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
7222   return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
7223 }
7224
7225 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
7226                                            const CallExpr *E) {
7227   if (BuiltinID == X86::BI__builtin_ms_va_start ||
7228       BuiltinID == X86::BI__builtin_ms_va_end)
7229     return EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
7230                           BuiltinID == X86::BI__builtin_ms_va_start);
7231   if (BuiltinID == X86::BI__builtin_ms_va_copy) {
7232     // Lower this manually. We can't reliably determine whether or not any
7233     // given va_copy() is for a Win64 va_list from the calling convention
7234     // alone, because it's legal to do this from a System V ABI function.
7235     // With opaque pointer types, we won't have enough information in LLVM
7236     // IR to determine this from the argument types, either. Best to do it
7237     // now, while we have enough information.
7238     Address DestAddr = EmitMSVAListRef(E->getArg(0));
7239     Address SrcAddr = EmitMSVAListRef(E->getArg(1));
7240
7241     llvm::Type *BPP = Int8PtrPtrTy;
7242
7243     DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
7244                        DestAddr.getAlignment());
7245     SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
7246                       SrcAddr.getAlignment());
7247
7248     Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
7249     return Builder.CreateStore(ArgPtr, DestAddr);
7250   }
7251
7252   SmallVector<Value*, 4> Ops;
7253
7254   // Find out if any arguments are required to be integer constant expressions.
7255   unsigned ICEArguments = 0;
7256   ASTContext::GetBuiltinTypeError Error;
7257   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
7258   assert(Error == ASTContext::GE_None && "Should not codegen an error");
7259
7260   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
7261     // If this is a normal argument, just emit it as a scalar.
7262     if ((ICEArguments & (1 << i)) == 0) {
7263       Ops.push_back(EmitScalarExpr(E->getArg(i)));
7264       continue;
7265     }
7266
7267     // If this is required to be a constant, constant fold it so that we know
7268     // that the generated intrinsic gets a ConstantInt.
7269     llvm::APSInt Result;
7270     bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
7271     assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
7272     Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
7273   }
7274
7275   // These exist so that the builtin that takes an immediate can be bounds
7276   // checked by clang to avoid passing bad immediates to the backend. Since
7277   // AVX has a larger immediate than SSE we would need separate builtins to
7278   // do the different bounds checking. Rather than create a clang specific
7279   // SSE only builtin, this implements eight separate builtins to match gcc
7280   // implementation.
7281   auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
7282     Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
7283     llvm::Function *F = CGM.getIntrinsic(ID);
7284     return Builder.CreateCall(F, Ops);
7285   };
7286
7287   // For the vector forms of FP comparisons, translate the builtins directly to
7288   // IR.
7289   // TODO: The builtins could be removed if the SSE header files used vector
7290   // extension comparisons directly (vector ordered/unordered may need
7291   // additional support via __builtin_isnan()).
7292   auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) {
7293     Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
7294     llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
7295     llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
7296     Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
7297     return Builder.CreateBitCast(Sext, FPVecTy);
7298   };
7299
7300   switch (BuiltinID) {
7301   default: return nullptr;
7302   case X86::BI__builtin_cpu_supports: {
7303     const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
7304     StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
7305
7306     // TODO: When/if this becomes more than x86 specific then use a TargetInfo
7307     // based mapping.
7308     // Processor features and mapping to processor feature value.
7309     enum X86Features {
7310       CMOV = 0,
7311       MMX,
7312       POPCNT,
7313       SSE,
7314       SSE2,
7315       SSE3,
7316       SSSE3,
7317       SSE4_1,
7318       SSE4_2,
7319       AVX,
7320       AVX2,
7321       SSE4_A,
7322       FMA4,
7323       XOP,
7324       FMA,
7325       AVX512F,
7326       BMI,
7327       BMI2,
7328       AES,
7329       PCLMUL,
7330       AVX512VL,
7331       AVX512BW,
7332       AVX512DQ,
7333       AVX512CD,
7334       AVX512ER,
7335       AVX512PF,
7336       AVX512VBMI,
7337       AVX512IFMA,
7338       AVX512VPOPCNTDQ,
7339       MAX
7340     };
7341
7342     X86Features Feature =
7343         StringSwitch<X86Features>(FeatureStr)
7344             .Case("cmov", X86Features::CMOV)
7345             .Case("mmx", X86Features::MMX)
7346             .Case("popcnt", X86Features::POPCNT)
7347             .Case("sse", X86Features::SSE)
7348             .Case("sse2", X86Features::SSE2)
7349             .Case("sse3", X86Features::SSE3)
7350             .Case("ssse3", X86Features::SSSE3)
7351             .Case("sse4.1", X86Features::SSE4_1)
7352             .Case("sse4.2", X86Features::SSE4_2)
7353             .Case("avx", X86Features::AVX)
7354             .Case("avx2", X86Features::AVX2)
7355             .Case("sse4a", X86Features::SSE4_A)
7356             .Case("fma4", X86Features::FMA4)
7357             .Case("xop", X86Features::XOP)
7358             .Case("fma", X86Features::FMA)
7359             .Case("avx512f", X86Features::AVX512F)
7360             .Case("bmi", X86Features::BMI)
7361             .Case("bmi2", X86Features::BMI2)
7362             .Case("aes", X86Features::AES)
7363             .Case("pclmul", X86Features::PCLMUL)
7364             .Case("avx512vl", X86Features::AVX512VL)
7365             .Case("avx512bw", X86Features::AVX512BW)
7366             .Case("avx512dq", X86Features::AVX512DQ)
7367             .Case("avx512cd", X86Features::AVX512CD)
7368             .Case("avx512er", X86Features::AVX512ER)
7369             .Case("avx512pf", X86Features::AVX512PF)
7370             .Case("avx512vbmi", X86Features::AVX512VBMI)
7371             .Case("avx512ifma", X86Features::AVX512IFMA)
7372             .Case("avx512vpopcntdq", X86Features::AVX512VPOPCNTDQ)
7373             .Default(X86Features::MAX);
7374     assert(Feature != X86Features::MAX && "Invalid feature!");
7375
7376     // Matching the struct layout from the compiler-rt/libgcc structure that is
7377     // filled in:
7378     // unsigned int __cpu_vendor;
7379     // unsigned int __cpu_type;
7380     // unsigned int __cpu_subtype;
7381     // unsigned int __cpu_features[1];
7382     llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
7383                                             llvm::ArrayType::get(Int32Ty, 1));
7384
7385     // Grab the global __cpu_model.
7386     llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
7387
7388     // Grab the first (0th) element from the field __cpu_features off of the
7389     // global in the struct STy.
7390     Value *Idxs[] = {
7391       ConstantInt::get(Int32Ty, 0),
7392       ConstantInt::get(Int32Ty, 3),
7393       ConstantInt::get(Int32Ty, 0)
7394     };
7395     Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
7396     Value *Features = Builder.CreateAlignedLoad(CpuFeatures,
7397                                                 CharUnits::fromQuantity(4));
7398
7399     // Check the value of the bit corresponding to the feature requested.
7400     Value *Bitset = Builder.CreateAnd(
7401         Features, llvm::ConstantInt::get(Int32Ty, 1ULL << Feature));
7402     return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0));
7403   }
7404   case X86::BI_mm_prefetch: {
7405     Value *Address = Ops[0];
7406     Value *RW = ConstantInt::get(Int32Ty, 0);
7407     Value *Locality = Ops[1];
7408     Value *Data = ConstantInt::get(Int32Ty, 1);
7409     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
7410     return Builder.CreateCall(F, {Address, RW, Locality, Data});
7411   }
7412   case X86::BI_mm_clflush: {
7413     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
7414                               Ops[0]);
7415   }
7416   case X86::BI_mm_lfence: {
7417     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
7418   }
7419   case X86::BI_mm_mfence: {
7420     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
7421   }
7422   case X86::BI_mm_sfence: {
7423     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
7424   }
7425   case X86::BI_mm_pause: {
7426     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
7427   }
7428   case X86::BI__rdtsc: {
7429     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
7430   }
7431   case X86::BI__builtin_ia32_undef128:
7432   case X86::BI__builtin_ia32_undef256:
7433   case X86::BI__builtin_ia32_undef512:
7434     // The x86 definition of "undef" is not the same as the LLVM definition
7435     // (PR32176). We leave optimizing away an unnecessary zero constant to the
7436     // IR optimizer and backend.
7437     // TODO: If we had a "freeze" IR instruction to generate a fixed undef
7438     // value, we should use that here instead of a zero.
7439     return llvm::Constant::getNullValue(ConvertType(E->getType()));
7440   case X86::BI__builtin_ia32_vec_init_v8qi:
7441   case X86::BI__builtin_ia32_vec_init_v4hi:
7442   case X86::BI__builtin_ia32_vec_init_v2si:
7443     return Builder.CreateBitCast(BuildVector(Ops),
7444                                  llvm::Type::getX86_MMXTy(getLLVMContext()));
7445   case X86::BI__builtin_ia32_vec_ext_v2si:
7446     return Builder.CreateExtractElement(Ops[0],
7447                                   llvm::ConstantInt::get(Ops[1]->getType(), 0));
7448   case X86::BI_mm_setcsr:
7449   case X86::BI__builtin_ia32_ldmxcsr: {
7450     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
7451     Builder.CreateStore(Ops[0], Tmp);
7452     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
7453                           Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
7454   }
7455   case X86::BI_mm_getcsr:
7456   case X86::BI__builtin_ia32_stmxcsr: {
7457     Address Tmp = CreateMemTemp(E->getType());
7458     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
7459                        Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
7460     return Builder.CreateLoad(Tmp, "stmxcsr");
7461   }
7462   case X86::BI__builtin_ia32_xsave:
7463   case X86::BI__builtin_ia32_xsave64:
7464   case X86::BI__builtin_ia32_xrstor:
7465   case X86::BI__builtin_ia32_xrstor64:
7466   case X86::BI__builtin_ia32_xsaveopt:
7467   case X86::BI__builtin_ia32_xsaveopt64:
7468   case X86::BI__builtin_ia32_xrstors:
7469   case X86::BI__builtin_ia32_xrstors64:
7470   case X86::BI__builtin_ia32_xsavec:
7471   case X86::BI__builtin_ia32_xsavec64:
7472   case X86::BI__builtin_ia32_xsaves:
7473   case X86::BI__builtin_ia32_xsaves64: {
7474     Intrinsic::ID ID;
7475 #define INTRINSIC_X86_XSAVE_ID(NAME) \
7476     case X86::BI__builtin_ia32_##NAME: \
7477       ID = Intrinsic::x86_##NAME; \
7478       break
7479     switch (BuiltinID) {
7480     default: llvm_unreachable("Unsupported intrinsic!");
7481     INTRINSIC_X86_XSAVE_ID(xsave);
7482     INTRINSIC_X86_XSAVE_ID(xsave64);
7483     INTRINSIC_X86_XSAVE_ID(xrstor);
7484     INTRINSIC_X86_XSAVE_ID(xrstor64);
7485     INTRINSIC_X86_XSAVE_ID(xsaveopt);
7486     INTRINSIC_X86_XSAVE_ID(xsaveopt64);
7487     INTRINSIC_X86_XSAVE_ID(xrstors);
7488     INTRINSIC_X86_XSAVE_ID(xrstors64);
7489     INTRINSIC_X86_XSAVE_ID(xsavec);
7490     INTRINSIC_X86_XSAVE_ID(xsavec64);
7491     INTRINSIC_X86_XSAVE_ID(xsaves);
7492     INTRINSIC_X86_XSAVE_ID(xsaves64);
7493     }
7494 #undef INTRINSIC_X86_XSAVE_ID
7495     Value *Mhi = Builder.CreateTrunc(
7496       Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
7497     Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
7498     Ops[1] = Mhi;
7499     Ops.push_back(Mlo);
7500     return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
7501   }
7502   case X86::BI__builtin_ia32_storedqudi128_mask:
7503   case X86::BI__builtin_ia32_storedqusi128_mask:
7504   case X86::BI__builtin_ia32_storedquhi128_mask:
7505   case X86::BI__builtin_ia32_storedquqi128_mask:
7506   case X86::BI__builtin_ia32_storeupd128_mask:
7507   case X86::BI__builtin_ia32_storeups128_mask:
7508   case X86::BI__builtin_ia32_storedqudi256_mask:
7509   case X86::BI__builtin_ia32_storedqusi256_mask:
7510   case X86::BI__builtin_ia32_storedquhi256_mask:
7511   case X86::BI__builtin_ia32_storedquqi256_mask:
7512   case X86::BI__builtin_ia32_storeupd256_mask:
7513   case X86::BI__builtin_ia32_storeups256_mask:
7514   case X86::BI__builtin_ia32_storedqudi512_mask:
7515   case X86::BI__builtin_ia32_storedqusi512_mask:
7516   case X86::BI__builtin_ia32_storedquhi512_mask:
7517   case X86::BI__builtin_ia32_storedquqi512_mask:
7518   case X86::BI__builtin_ia32_storeupd512_mask:
7519   case X86::BI__builtin_ia32_storeups512_mask:
7520     return EmitX86MaskedStore(*this, Ops, 1);
7521
7522   case X86::BI__builtin_ia32_storess128_mask:
7523   case X86::BI__builtin_ia32_storesd128_mask: {
7524     return EmitX86MaskedStore(*this, Ops, 16);
7525   }
7526   case X86::BI__builtin_ia32_vpopcntd_512:
7527   case X86::BI__builtin_ia32_vpopcntq_512: {
7528     llvm::Type *ResultType = ConvertType(E->getType());
7529     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
7530     return Builder.CreateCall(F, Ops);
7531   }
7532   case X86::BI__builtin_ia32_cvtmask2b128:
7533   case X86::BI__builtin_ia32_cvtmask2b256:
7534   case X86::BI__builtin_ia32_cvtmask2b512:
7535   case X86::BI__builtin_ia32_cvtmask2w128:
7536   case X86::BI__builtin_ia32_cvtmask2w256:
7537   case X86::BI__builtin_ia32_cvtmask2w512:
7538   case X86::BI__builtin_ia32_cvtmask2d128:
7539   case X86::BI__builtin_ia32_cvtmask2d256:
7540   case X86::BI__builtin_ia32_cvtmask2d512:
7541   case X86::BI__builtin_ia32_cvtmask2q128:
7542   case X86::BI__builtin_ia32_cvtmask2q256:
7543   case X86::BI__builtin_ia32_cvtmask2q512:
7544     return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
7545
7546   case X86::BI__builtin_ia32_movdqa32store128_mask:
7547   case X86::BI__builtin_ia32_movdqa64store128_mask:
7548   case X86::BI__builtin_ia32_storeaps128_mask:
7549   case X86::BI__builtin_ia32_storeapd128_mask:
7550   case X86::BI__builtin_ia32_movdqa32store256_mask:
7551   case X86::BI__builtin_ia32_movdqa64store256_mask:
7552   case X86::BI__builtin_ia32_storeaps256_mask:
7553   case X86::BI__builtin_ia32_storeapd256_mask:
7554   case X86::BI__builtin_ia32_movdqa32store512_mask:
7555   case X86::BI__builtin_ia32_movdqa64store512_mask:
7556   case X86::BI__builtin_ia32_storeaps512_mask:
7557   case X86::BI__builtin_ia32_storeapd512_mask: {
7558     unsigned Align =
7559       getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
7560     return EmitX86MaskedStore(*this, Ops, Align);
7561   }
7562   case X86::BI__builtin_ia32_loadups128_mask:
7563   case X86::BI__builtin_ia32_loadups256_mask:
7564   case X86::BI__builtin_ia32_loadups512_mask:
7565   case X86::BI__builtin_ia32_loadupd128_mask:
7566   case X86::BI__builtin_ia32_loadupd256_mask:
7567   case X86::BI__builtin_ia32_loadupd512_mask:
7568   case X86::BI__builtin_ia32_loaddquqi128_mask:
7569   case X86::BI__builtin_ia32_loaddquqi256_mask:
7570   case X86::BI__builtin_ia32_loaddquqi512_mask:
7571   case X86::BI__builtin_ia32_loaddquhi128_mask:
7572   case X86::BI__builtin_ia32_loaddquhi256_mask:
7573   case X86::BI__builtin_ia32_loaddquhi512_mask:
7574   case X86::BI__builtin_ia32_loaddqusi128_mask:
7575   case X86::BI__builtin_ia32_loaddqusi256_mask:
7576   case X86::BI__builtin_ia32_loaddqusi512_mask:
7577   case X86::BI__builtin_ia32_loaddqudi128_mask:
7578   case X86::BI__builtin_ia32_loaddqudi256_mask:
7579   case X86::BI__builtin_ia32_loaddqudi512_mask:
7580     return EmitX86MaskedLoad(*this, Ops, 1);
7581
7582   case X86::BI__builtin_ia32_loadss128_mask:
7583   case X86::BI__builtin_ia32_loadsd128_mask:
7584     return EmitX86MaskedLoad(*this, Ops, 16);
7585
7586   case X86::BI__builtin_ia32_loadaps128_mask:
7587   case X86::BI__builtin_ia32_loadaps256_mask:
7588   case X86::BI__builtin_ia32_loadaps512_mask:
7589   case X86::BI__builtin_ia32_loadapd128_mask:
7590   case X86::BI__builtin_ia32_loadapd256_mask:
7591   case X86::BI__builtin_ia32_loadapd512_mask:
7592   case X86::BI__builtin_ia32_movdqa32load128_mask:
7593   case X86::BI__builtin_ia32_movdqa32load256_mask:
7594   case X86::BI__builtin_ia32_movdqa32load512_mask:
7595   case X86::BI__builtin_ia32_movdqa64load128_mask:
7596   case X86::BI__builtin_ia32_movdqa64load256_mask:
7597   case X86::BI__builtin_ia32_movdqa64load512_mask: {
7598     unsigned Align =
7599       getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
7600     return EmitX86MaskedLoad(*this, Ops, Align);
7601   }
7602
7603   case X86::BI__builtin_ia32_vbroadcastf128_pd256:
7604   case X86::BI__builtin_ia32_vbroadcastf128_ps256: {
7605     llvm::Type *DstTy = ConvertType(E->getType());
7606     return EmitX86SubVectorBroadcast(*this, Ops, DstTy, 128, 1);
7607   }
7608
7609   case X86::BI__builtin_ia32_storehps:
7610   case X86::BI__builtin_ia32_storelps: {
7611     llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
7612     llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
7613
7614     // cast val v2i64
7615     Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
7616
7617     // extract (0, 1)
7618     unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
7619     llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index);
7620     Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
7621
7622     // cast pointer to i64 & store
7623     Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
7624     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7625   }
7626   case X86::BI__builtin_ia32_palignr128:
7627   case X86::BI__builtin_ia32_palignr256:
7628   case X86::BI__builtin_ia32_palignr512_mask: {
7629     unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
7630
7631     unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
7632     assert(NumElts % 16 == 0);
7633
7634     // If palignr is shifting the pair of vectors more than the size of two
7635     // lanes, emit zero.
7636     if (ShiftVal >= 32)
7637       return llvm::Constant::getNullValue(ConvertType(E->getType()));
7638
7639     // If palignr is shifting the pair of input vectors more than one lane,
7640     // but less than two lanes, convert to shifting in zeroes.
7641     if (ShiftVal > 16) {
7642       ShiftVal -= 16;
7643       Ops[1] = Ops[0];
7644       Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
7645     }
7646
7647     uint32_t Indices[64];
7648     // 256-bit palignr operates on 128-bit lanes so we need to handle that
7649     for (unsigned l = 0; l != NumElts; l += 16) {
7650       for (unsigned i = 0; i != 16; ++i) {
7651         unsigned Idx = ShiftVal + i;
7652         if (Idx >= 16)
7653           Idx += NumElts - 16; // End of lane, switch operand.
7654         Indices[l + i] = Idx + l;
7655       }
7656     }
7657
7658     Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0],
7659                                                makeArrayRef(Indices, NumElts),
7660                                                "palignr");
7661
7662     // If this isn't a masked builtin, just return the align operation.
7663     if (Ops.size() == 3)
7664       return Align;
7665
7666     return EmitX86Select(*this, Ops[4], Align, Ops[3]);
7667   }
7668
7669   case X86::BI__builtin_ia32_movnti:
7670   case X86::BI__builtin_ia32_movnti64:
7671   case X86::BI__builtin_ia32_movntsd:
7672   case X86::BI__builtin_ia32_movntss: {
7673     llvm::MDNode *Node = llvm::MDNode::get(
7674         getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
7675
7676     Value *Ptr = Ops[0];
7677     Value *Src = Ops[1];
7678
7679     // Extract the 0'th element of the source vector.
7680     if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
7681         BuiltinID == X86::BI__builtin_ia32_movntss)
7682       Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
7683
7684     // Convert the type of the pointer to a pointer to the stored type.
7685     Value *BC = Builder.CreateBitCast(
7686         Ptr, llvm::PointerType::getUnqual(Src->getType()), "cast");
7687
7688     // Unaligned nontemporal store of the scalar value.
7689     StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC);
7690     SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
7691     SI->setAlignment(1);
7692     return SI;
7693   }
7694
7695   case X86::BI__builtin_ia32_selectb_128:
7696   case X86::BI__builtin_ia32_selectb_256:
7697   case X86::BI__builtin_ia32_selectb_512:
7698   case X86::BI__builtin_ia32_selectw_128:
7699   case X86::BI__builtin_ia32_selectw_256:
7700   case X86::BI__builtin_ia32_selectw_512:
7701   case X86::BI__builtin_ia32_selectd_128:
7702   case X86::BI__builtin_ia32_selectd_256:
7703   case X86::BI__builtin_ia32_selectd_512:
7704   case X86::BI__builtin_ia32_selectq_128:
7705   case X86::BI__builtin_ia32_selectq_256:
7706   case X86::BI__builtin_ia32_selectq_512:
7707   case X86::BI__builtin_ia32_selectps_128:
7708   case X86::BI__builtin_ia32_selectps_256:
7709   case X86::BI__builtin_ia32_selectps_512:
7710   case X86::BI__builtin_ia32_selectpd_128:
7711   case X86::BI__builtin_ia32_selectpd_256:
7712   case X86::BI__builtin_ia32_selectpd_512:
7713     return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
7714   case X86::BI__builtin_ia32_pcmpeqb128_mask:
7715   case X86::BI__builtin_ia32_pcmpeqb256_mask:
7716   case X86::BI__builtin_ia32_pcmpeqb512_mask:
7717   case X86::BI__builtin_ia32_pcmpeqw128_mask:
7718   case X86::BI__builtin_ia32_pcmpeqw256_mask:
7719   case X86::BI__builtin_ia32_pcmpeqw512_mask:
7720   case X86::BI__builtin_ia32_pcmpeqd128_mask:
7721   case X86::BI__builtin_ia32_pcmpeqd256_mask:
7722   case X86::BI__builtin_ia32_pcmpeqd512_mask:
7723   case X86::BI__builtin_ia32_pcmpeqq128_mask:
7724   case X86::BI__builtin_ia32_pcmpeqq256_mask:
7725   case X86::BI__builtin_ia32_pcmpeqq512_mask:
7726     return EmitX86MaskedCompare(*this, 0, false, Ops);
7727   case X86::BI__builtin_ia32_pcmpgtb128_mask:
7728   case X86::BI__builtin_ia32_pcmpgtb256_mask:
7729   case X86::BI__builtin_ia32_pcmpgtb512_mask:
7730   case X86::BI__builtin_ia32_pcmpgtw128_mask:
7731   case X86::BI__builtin_ia32_pcmpgtw256_mask:
7732   case X86::BI__builtin_ia32_pcmpgtw512_mask:
7733   case X86::BI__builtin_ia32_pcmpgtd128_mask:
7734   case X86::BI__builtin_ia32_pcmpgtd256_mask:
7735   case X86::BI__builtin_ia32_pcmpgtd512_mask:
7736   case X86::BI__builtin_ia32_pcmpgtq128_mask:
7737   case X86::BI__builtin_ia32_pcmpgtq256_mask:
7738   case X86::BI__builtin_ia32_pcmpgtq512_mask:
7739     return EmitX86MaskedCompare(*this, 6, true, Ops);
7740   case X86::BI__builtin_ia32_cmpb128_mask:
7741   case X86::BI__builtin_ia32_cmpb256_mask:
7742   case X86::BI__builtin_ia32_cmpb512_mask:
7743   case X86::BI__builtin_ia32_cmpw128_mask:
7744   case X86::BI__builtin_ia32_cmpw256_mask:
7745   case X86::BI__builtin_ia32_cmpw512_mask:
7746   case X86::BI__builtin_ia32_cmpd128_mask:
7747   case X86::BI__builtin_ia32_cmpd256_mask:
7748   case X86::BI__builtin_ia32_cmpd512_mask:
7749   case X86::BI__builtin_ia32_cmpq128_mask:
7750   case X86::BI__builtin_ia32_cmpq256_mask:
7751   case X86::BI__builtin_ia32_cmpq512_mask: {
7752     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
7753     return EmitX86MaskedCompare(*this, CC, true, Ops);
7754   }
7755   case X86::BI__builtin_ia32_ucmpb128_mask:
7756   case X86::BI__builtin_ia32_ucmpb256_mask:
7757   case X86::BI__builtin_ia32_ucmpb512_mask:
7758   case X86::BI__builtin_ia32_ucmpw128_mask:
7759   case X86::BI__builtin_ia32_ucmpw256_mask:
7760   case X86::BI__builtin_ia32_ucmpw512_mask:
7761   case X86::BI__builtin_ia32_ucmpd128_mask:
7762   case X86::BI__builtin_ia32_ucmpd256_mask:
7763   case X86::BI__builtin_ia32_ucmpd512_mask:
7764   case X86::BI__builtin_ia32_ucmpq128_mask:
7765   case X86::BI__builtin_ia32_ucmpq256_mask:
7766   case X86::BI__builtin_ia32_ucmpq512_mask: {
7767     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
7768     return EmitX86MaskedCompare(*this, CC, false, Ops);
7769   }
7770
7771   case X86::BI__builtin_ia32_vplzcntd_128_mask:
7772   case X86::BI__builtin_ia32_vplzcntd_256_mask:
7773   case X86::BI__builtin_ia32_vplzcntd_512_mask:
7774   case X86::BI__builtin_ia32_vplzcntq_128_mask:
7775   case X86::BI__builtin_ia32_vplzcntq_256_mask:
7776   case X86::BI__builtin_ia32_vplzcntq_512_mask: {
7777     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
7778     return EmitX86Select(*this, Ops[2],
7779                          Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}),
7780                          Ops[1]);
7781   }
7782
7783   case X86::BI__builtin_ia32_pmaxsb128:
7784   case X86::BI__builtin_ia32_pmaxsw128:
7785   case X86::BI__builtin_ia32_pmaxsd128:
7786   case X86::BI__builtin_ia32_pmaxsq128_mask:
7787   case X86::BI__builtin_ia32_pmaxsb256:
7788   case X86::BI__builtin_ia32_pmaxsw256:
7789   case X86::BI__builtin_ia32_pmaxsd256:
7790   case X86::BI__builtin_ia32_pmaxsq256_mask:
7791   case X86::BI__builtin_ia32_pmaxsb512_mask:
7792   case X86::BI__builtin_ia32_pmaxsw512_mask:
7793   case X86::BI__builtin_ia32_pmaxsd512_mask:
7794   case X86::BI__builtin_ia32_pmaxsq512_mask:
7795     return EmitX86MinMax(*this, ICmpInst::ICMP_SGT, Ops);
7796   case X86::BI__builtin_ia32_pmaxub128:
7797   case X86::BI__builtin_ia32_pmaxuw128:
7798   case X86::BI__builtin_ia32_pmaxud128:
7799   case X86::BI__builtin_ia32_pmaxuq128_mask:
7800   case X86::BI__builtin_ia32_pmaxub256:
7801   case X86::BI__builtin_ia32_pmaxuw256:
7802   case X86::BI__builtin_ia32_pmaxud256:
7803   case X86::BI__builtin_ia32_pmaxuq256_mask:
7804   case X86::BI__builtin_ia32_pmaxub512_mask:
7805   case X86::BI__builtin_ia32_pmaxuw512_mask:
7806   case X86::BI__builtin_ia32_pmaxud512_mask:
7807   case X86::BI__builtin_ia32_pmaxuq512_mask:
7808     return EmitX86MinMax(*this, ICmpInst::ICMP_UGT, Ops);
7809   case X86::BI__builtin_ia32_pminsb128:
7810   case X86::BI__builtin_ia32_pminsw128:
7811   case X86::BI__builtin_ia32_pminsd128:
7812   case X86::BI__builtin_ia32_pminsq128_mask:
7813   case X86::BI__builtin_ia32_pminsb256:
7814   case X86::BI__builtin_ia32_pminsw256:
7815   case X86::BI__builtin_ia32_pminsd256:
7816   case X86::BI__builtin_ia32_pminsq256_mask:
7817   case X86::BI__builtin_ia32_pminsb512_mask:
7818   case X86::BI__builtin_ia32_pminsw512_mask:
7819   case X86::BI__builtin_ia32_pminsd512_mask:
7820   case X86::BI__builtin_ia32_pminsq512_mask:
7821     return EmitX86MinMax(*this, ICmpInst::ICMP_SLT, Ops);
7822   case X86::BI__builtin_ia32_pminub128:
7823   case X86::BI__builtin_ia32_pminuw128:
7824   case X86::BI__builtin_ia32_pminud128:
7825   case X86::BI__builtin_ia32_pminuq128_mask:
7826   case X86::BI__builtin_ia32_pminub256:
7827   case X86::BI__builtin_ia32_pminuw256:
7828   case X86::BI__builtin_ia32_pminud256:
7829   case X86::BI__builtin_ia32_pminuq256_mask:
7830   case X86::BI__builtin_ia32_pminub512_mask:
7831   case X86::BI__builtin_ia32_pminuw512_mask:
7832   case X86::BI__builtin_ia32_pminud512_mask:
7833   case X86::BI__builtin_ia32_pminuq512_mask:
7834     return EmitX86MinMax(*this, ICmpInst::ICMP_ULT, Ops);
7835
7836   // 3DNow!
7837   case X86::BI__builtin_ia32_pswapdsf:
7838   case X86::BI__builtin_ia32_pswapdsi: {
7839     llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
7840     Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
7841     llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
7842     return Builder.CreateCall(F, Ops, "pswapd");
7843   }
7844   case X86::BI__builtin_ia32_rdrand16_step:
7845   case X86::BI__builtin_ia32_rdrand32_step:
7846   case X86::BI__builtin_ia32_rdrand64_step:
7847   case X86::BI__builtin_ia32_rdseed16_step:
7848   case X86::BI__builtin_ia32_rdseed32_step:
7849   case X86::BI__builtin_ia32_rdseed64_step: {
7850     Intrinsic::ID ID;
7851     switch (BuiltinID) {
7852     default: llvm_unreachable("Unsupported intrinsic!");
7853     case X86::BI__builtin_ia32_rdrand16_step:
7854       ID = Intrinsic::x86_rdrand_16;
7855       break;
7856     case X86::BI__builtin_ia32_rdrand32_step:
7857       ID = Intrinsic::x86_rdrand_32;
7858       break;
7859     case X86::BI__builtin_ia32_rdrand64_step:
7860       ID = Intrinsic::x86_rdrand_64;
7861       break;
7862     case X86::BI__builtin_ia32_rdseed16_step:
7863       ID = Intrinsic::x86_rdseed_16;
7864       break;
7865     case X86::BI__builtin_ia32_rdseed32_step:
7866       ID = Intrinsic::x86_rdseed_32;
7867       break;
7868     case X86::BI__builtin_ia32_rdseed64_step:
7869       ID = Intrinsic::x86_rdseed_64;
7870       break;
7871     }
7872
7873     Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
7874     Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
7875                                       Ops[0]);
7876     return Builder.CreateExtractValue(Call, 1);
7877   }
7878
7879   // SSE packed comparison intrinsics
7880   case X86::BI__builtin_ia32_cmpeqps:
7881   case X86::BI__builtin_ia32_cmpeqpd:
7882     return getVectorFCmpIR(CmpInst::FCMP_OEQ);
7883   case X86::BI__builtin_ia32_cmpltps:
7884   case X86::BI__builtin_ia32_cmpltpd:
7885     return getVectorFCmpIR(CmpInst::FCMP_OLT);
7886   case X86::BI__builtin_ia32_cmpleps:
7887   case X86::BI__builtin_ia32_cmplepd:
7888     return getVectorFCmpIR(CmpInst::FCMP_OLE);
7889   case X86::BI__builtin_ia32_cmpunordps:
7890   case X86::BI__builtin_ia32_cmpunordpd:
7891     return getVectorFCmpIR(CmpInst::FCMP_UNO);
7892   case X86::BI__builtin_ia32_cmpneqps:
7893   case X86::BI__builtin_ia32_cmpneqpd:
7894     return getVectorFCmpIR(CmpInst::FCMP_UNE);
7895   case X86::BI__builtin_ia32_cmpnltps:
7896   case X86::BI__builtin_ia32_cmpnltpd:
7897     return getVectorFCmpIR(CmpInst::FCMP_UGE);
7898   case X86::BI__builtin_ia32_cmpnleps:
7899   case X86::BI__builtin_ia32_cmpnlepd:
7900     return getVectorFCmpIR(CmpInst::FCMP_UGT);
7901   case X86::BI__builtin_ia32_cmpordps:
7902   case X86::BI__builtin_ia32_cmpordpd:
7903     return getVectorFCmpIR(CmpInst::FCMP_ORD);
7904   case X86::BI__builtin_ia32_cmpps:
7905   case X86::BI__builtin_ia32_cmpps256:
7906   case X86::BI__builtin_ia32_cmppd:
7907   case X86::BI__builtin_ia32_cmppd256: {
7908     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
7909     // If this one of the SSE immediates, we can use native IR.
7910     if (CC < 8) {
7911       FCmpInst::Predicate Pred;
7912       switch (CC) {
7913       case 0: Pred = FCmpInst::FCMP_OEQ; break;
7914       case 1: Pred = FCmpInst::FCMP_OLT; break;
7915       case 2: Pred = FCmpInst::FCMP_OLE; break;
7916       case 3: Pred = FCmpInst::FCMP_UNO; break;
7917       case 4: Pred = FCmpInst::FCMP_UNE; break;
7918       case 5: Pred = FCmpInst::FCMP_UGE; break;
7919       case 6: Pred = FCmpInst::FCMP_UGT; break;
7920       case 7: Pred = FCmpInst::FCMP_ORD; break;
7921       }
7922       return getVectorFCmpIR(Pred);
7923     }
7924
7925     // We can't handle 8-31 immediates with native IR, use the intrinsic.
7926     // Except for predicates that create constants.
7927     Intrinsic::ID ID;
7928     switch (BuiltinID) {
7929     default: llvm_unreachable("Unsupported intrinsic!");
7930     case X86::BI__builtin_ia32_cmpps:
7931       ID = Intrinsic::x86_sse_cmp_ps;
7932       break;
7933     case X86::BI__builtin_ia32_cmpps256:
7934       // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector
7935       // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0...
7936       if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) {
7937          Value *Constant = (CC == 0xf || CC == 0x1f) ?
7938                 llvm::Constant::getAllOnesValue(Builder.getInt32Ty()) :
7939                 llvm::Constant::getNullValue(Builder.getInt32Ty());
7940          Value *Vec = Builder.CreateVectorSplat(
7941                         Ops[0]->getType()->getVectorNumElements(), Constant);
7942          return Builder.CreateBitCast(Vec, Ops[0]->getType());
7943       }
7944       ID = Intrinsic::x86_avx_cmp_ps_256;
7945       break;
7946     case X86::BI__builtin_ia32_cmppd:
7947       ID = Intrinsic::x86_sse2_cmp_pd;
7948       break;
7949     case X86::BI__builtin_ia32_cmppd256:
7950       // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector
7951       // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0...
7952       if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) {
7953          Value *Constant = (CC == 0xf || CC == 0x1f) ?
7954                 llvm::Constant::getAllOnesValue(Builder.getInt64Ty()) :
7955                 llvm::Constant::getNullValue(Builder.getInt64Ty());
7956          Value *Vec = Builder.CreateVectorSplat(
7957                         Ops[0]->getType()->getVectorNumElements(), Constant);
7958          return Builder.CreateBitCast(Vec, Ops[0]->getType());
7959       }
7960       ID = Intrinsic::x86_avx_cmp_pd_256;
7961       break;
7962     }
7963
7964     return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
7965   }
7966
7967   // SSE scalar comparison intrinsics
7968   case X86::BI__builtin_ia32_cmpeqss:
7969     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
7970   case X86::BI__builtin_ia32_cmpltss:
7971     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
7972   case X86::BI__builtin_ia32_cmpless:
7973     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
7974   case X86::BI__builtin_ia32_cmpunordss:
7975     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
7976   case X86::BI__builtin_ia32_cmpneqss:
7977     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
7978   case X86::BI__builtin_ia32_cmpnltss:
7979     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
7980   case X86::BI__builtin_ia32_cmpnless:
7981     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
7982   case X86::BI__builtin_ia32_cmpordss:
7983     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
7984   case X86::BI__builtin_ia32_cmpeqsd:
7985     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
7986   case X86::BI__builtin_ia32_cmpltsd:
7987     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
7988   case X86::BI__builtin_ia32_cmplesd:
7989     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
7990   case X86::BI__builtin_ia32_cmpunordsd:
7991     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
7992   case X86::BI__builtin_ia32_cmpneqsd:
7993     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
7994   case X86::BI__builtin_ia32_cmpnltsd:
7995     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
7996   case X86::BI__builtin_ia32_cmpnlesd:
7997     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
7998   case X86::BI__builtin_ia32_cmpordsd:
7999     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
8000
8001   case X86::BI__emul:
8002   case X86::BI__emulu: {
8003     llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
8004     bool isSigned = (BuiltinID == X86::BI__emul);
8005     Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
8006     Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
8007     return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
8008   }
8009   case X86::BI__mulh:
8010   case X86::BI__umulh:
8011   case X86::BI_mul128:
8012   case X86::BI_umul128: {
8013     llvm::Type *ResType = ConvertType(E->getType());
8014     llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
8015
8016     bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
8017     Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
8018     Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
8019
8020     Value *MulResult, *HigherBits;
8021     if (IsSigned) {
8022       MulResult = Builder.CreateNSWMul(LHS, RHS);
8023       HigherBits = Builder.CreateAShr(MulResult, 64);
8024     } else {
8025       MulResult = Builder.CreateNUWMul(LHS, RHS);
8026       HigherBits = Builder.CreateLShr(MulResult, 64);
8027     }
8028     HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
8029
8030     if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
8031       return HigherBits;
8032
8033     Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
8034     Builder.CreateStore(HigherBits, HighBitsAddress);
8035     return Builder.CreateIntCast(MulResult, ResType, IsSigned);
8036   }
8037
8038   case X86::BI__faststorefence: {
8039     return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
8040                                llvm::CrossThread);
8041   }
8042   case X86::BI_ReadWriteBarrier:
8043   case X86::BI_ReadBarrier:
8044   case X86::BI_WriteBarrier: {
8045     return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
8046                                llvm::SingleThread);
8047   }
8048   case X86::BI_BitScanForward:
8049   case X86::BI_BitScanForward64:
8050     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
8051   case X86::BI_BitScanReverse:
8052   case X86::BI_BitScanReverse64:
8053     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
8054
8055   case X86::BI_InterlockedAnd64:
8056     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
8057   case X86::BI_InterlockedExchange64:
8058     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
8059   case X86::BI_InterlockedExchangeAdd64:
8060     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
8061   case X86::BI_InterlockedExchangeSub64:
8062     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
8063   case X86::BI_InterlockedOr64:
8064     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
8065   case X86::BI_InterlockedXor64:
8066     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
8067   case X86::BI_InterlockedDecrement64:
8068     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
8069   case X86::BI_InterlockedIncrement64:
8070     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
8071
8072   case X86::BI_AddressOfReturnAddress: {
8073     Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress);
8074     return Builder.CreateCall(F);
8075   }
8076   case X86::BI__stosb: {
8077     // We treat __stosb as a volatile memset - it may not generate "rep stosb"
8078     // instruction, but it will create a memset that won't be optimized away.
8079     return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true);
8080   }
8081   case X86::BI__ud2:
8082     // llvm.trap makes a ud2a instruction on x86.
8083     return EmitTrapCall(Intrinsic::trap);
8084   case X86::BI__int2c: {
8085     // This syscall signals a driver assertion failure in x86 NT kernels.
8086     llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
8087     llvm::InlineAsm *IA =
8088         llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*SideEffects=*/true);
8089     llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
8090         getLLVMContext(), llvm::AttributeList::FunctionIndex,
8091         llvm::Attribute::NoReturn);
8092     CallSite CS = Builder.CreateCall(IA);
8093     CS.setAttributes(NoReturnAttr);
8094     return CS.getInstruction();
8095   }
8096   case X86::BI__readfsbyte:
8097   case X86::BI__readfsword:
8098   case X86::BI__readfsdword:
8099   case X86::BI__readfsqword: {
8100     llvm::Type *IntTy = ConvertType(E->getType());
8101     Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
8102                                         llvm::PointerType::get(IntTy, 257));
8103     LoadInst *Load = Builder.CreateAlignedLoad(
8104         IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
8105     Load->setVolatile(true);
8106     return Load;
8107   }
8108   case X86::BI__readgsbyte:
8109   case X86::BI__readgsword:
8110   case X86::BI__readgsdword:
8111   case X86::BI__readgsqword: {
8112     llvm::Type *IntTy = ConvertType(E->getType());
8113     Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
8114                                         llvm::PointerType::get(IntTy, 256));
8115     LoadInst *Load = Builder.CreateAlignedLoad(
8116         IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
8117     Load->setVolatile(true);
8118     return Load;
8119   }
8120   }
8121 }
8122
8123
8124 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
8125                                            const CallExpr *E) {
8126   SmallVector<Value*, 4> Ops;
8127
8128   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
8129     Ops.push_back(EmitScalarExpr(E->getArg(i)));
8130
8131   Intrinsic::ID ID = Intrinsic::not_intrinsic;
8132
8133   switch (BuiltinID) {
8134   default: return nullptr;
8135
8136   // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
8137   // call __builtin_readcyclecounter.
8138   case PPC::BI__builtin_ppc_get_timebase:
8139     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
8140
8141   // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
8142   case PPC::BI__builtin_altivec_lvx:
8143   case PPC::BI__builtin_altivec_lvxl:
8144   case PPC::BI__builtin_altivec_lvebx:
8145   case PPC::BI__builtin_altivec_lvehx:
8146   case PPC::BI__builtin_altivec_lvewx:
8147   case PPC::BI__builtin_altivec_lvsl:
8148   case PPC::BI__builtin_altivec_lvsr:
8149   case PPC::BI__builtin_vsx_lxvd2x:
8150   case PPC::BI__builtin_vsx_lxvw4x:
8151   case PPC::BI__builtin_vsx_lxvd2x_be:
8152   case PPC::BI__builtin_vsx_lxvw4x_be:
8153   case PPC::BI__builtin_vsx_lxvl:
8154   case PPC::BI__builtin_vsx_lxvll:
8155   {
8156     if(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
8157        BuiltinID == PPC::BI__builtin_vsx_lxvll){
8158       Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy);
8159     }else {
8160       Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
8161       Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
8162       Ops.pop_back();
8163     }
8164
8165     switch (BuiltinID) {
8166     default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
8167     case PPC::BI__builtin_altivec_lvx:
8168       ID = Intrinsic::ppc_altivec_lvx;
8169       break;
8170     case PPC::BI__builtin_altivec_lvxl:
8171       ID = Intrinsic::ppc_altivec_lvxl;
8172       break;
8173     case PPC::BI__builtin_altivec_lvebx:
8174       ID = Intrinsic::ppc_altivec_lvebx;
8175       break;
8176     case PPC::BI__builtin_altivec_lvehx:
8177       ID = Intrinsic::ppc_altivec_lvehx;
8178       break;
8179     case PPC::BI__builtin_altivec_lvewx:
8180       ID = Intrinsic::ppc_altivec_lvewx;
8181       break;
8182     case PPC::BI__builtin_altivec_lvsl:
8183       ID = Intrinsic::ppc_altivec_lvsl;
8184       break;
8185     case PPC::BI__builtin_altivec_lvsr:
8186       ID = Intrinsic::ppc_altivec_lvsr;
8187       break;
8188     case PPC::BI__builtin_vsx_lxvd2x:
8189       ID = Intrinsic::ppc_vsx_lxvd2x;
8190       break;
8191     case PPC::BI__builtin_vsx_lxvw4x:
8192       ID = Intrinsic::ppc_vsx_lxvw4x;
8193       break;
8194     case PPC::BI__builtin_vsx_lxvd2x_be:
8195       ID = Intrinsic::ppc_vsx_lxvd2x_be;
8196       break;
8197     case PPC::BI__builtin_vsx_lxvw4x_be:
8198       ID = Intrinsic::ppc_vsx_lxvw4x_be;
8199       break;
8200     case PPC::BI__builtin_vsx_lxvl:
8201       ID = Intrinsic::ppc_vsx_lxvl;
8202       break;
8203     case PPC::BI__builtin_vsx_lxvll:
8204       ID = Intrinsic::ppc_vsx_lxvll;
8205       break;
8206     }
8207     llvm::Function *F = CGM.getIntrinsic(ID);
8208     return Builder.CreateCall(F, Ops, "");
8209   }
8210
8211   // vec_st, vec_xst_be
8212   case PPC::BI__builtin_altivec_stvx:
8213   case PPC::BI__builtin_altivec_stvxl:
8214   case PPC::BI__builtin_altivec_stvebx:
8215   case PPC::BI__builtin_altivec_stvehx:
8216   case PPC::BI__builtin_altivec_stvewx:
8217   case PPC::BI__builtin_vsx_stxvd2x:
8218   case PPC::BI__builtin_vsx_stxvw4x:
8219   case PPC::BI__builtin_vsx_stxvd2x_be:
8220   case PPC::BI__builtin_vsx_stxvw4x_be:
8221   case PPC::BI__builtin_vsx_stxvl:
8222   case PPC::BI__builtin_vsx_stxvll:
8223   {
8224     if(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
8225       BuiltinID == PPC::BI__builtin_vsx_stxvll ){
8226       Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
8227     }else {
8228       Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
8229       Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
8230       Ops.pop_back();
8231     }
8232
8233     switch (BuiltinID) {
8234     default: llvm_unreachable("Unsupported st intrinsic!");
8235     case PPC::BI__builtin_altivec_stvx:
8236       ID = Intrinsic::ppc_altivec_stvx;
8237       break;
8238     case PPC::BI__builtin_altivec_stvxl:
8239       ID = Intrinsic::ppc_altivec_stvxl;
8240       break;
8241     case PPC::BI__builtin_altivec_stvebx:
8242       ID = Intrinsic::ppc_altivec_stvebx;
8243       break;
8244     case PPC::BI__builtin_altivec_stvehx:
8245       ID = Intrinsic::ppc_altivec_stvehx;
8246       break;
8247     case PPC::BI__builtin_altivec_stvewx:
8248       ID = Intrinsic::ppc_altivec_stvewx;
8249       break;
8250     case PPC::BI__builtin_vsx_stxvd2x:
8251       ID = Intrinsic::ppc_vsx_stxvd2x;
8252       break;
8253     case PPC::BI__builtin_vsx_stxvw4x:
8254       ID = Intrinsic::ppc_vsx_stxvw4x;
8255       break;
8256     case PPC::BI__builtin_vsx_stxvd2x_be:
8257       ID = Intrinsic::ppc_vsx_stxvd2x_be;
8258       break;
8259     case PPC::BI__builtin_vsx_stxvw4x_be:
8260       ID = Intrinsic::ppc_vsx_stxvw4x_be;
8261       break;
8262     case PPC::BI__builtin_vsx_stxvl:
8263       ID = Intrinsic::ppc_vsx_stxvl;
8264       break;
8265     case PPC::BI__builtin_vsx_stxvll:
8266       ID = Intrinsic::ppc_vsx_stxvll;
8267       break;
8268     }
8269     llvm::Function *F = CGM.getIntrinsic(ID);
8270     return Builder.CreateCall(F, Ops, "");
8271   }
8272   // Square root
8273   case PPC::BI__builtin_vsx_xvsqrtsp:
8274   case PPC::BI__builtin_vsx_xvsqrtdp: {
8275     llvm::Type *ResultType = ConvertType(E->getType());
8276     Value *X = EmitScalarExpr(E->getArg(0));
8277     ID = Intrinsic::sqrt;
8278     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
8279     return Builder.CreateCall(F, X);
8280   }
8281   // Count leading zeros
8282   case PPC::BI__builtin_altivec_vclzb:
8283   case PPC::BI__builtin_altivec_vclzh:
8284   case PPC::BI__builtin_altivec_vclzw:
8285   case PPC::BI__builtin_altivec_vclzd: {
8286     llvm::Type *ResultType = ConvertType(E->getType());
8287     Value *X = EmitScalarExpr(E->getArg(0));
8288     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8289     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
8290     return Builder.CreateCall(F, {X, Undef});
8291   }
8292   case PPC::BI__builtin_altivec_vctzb:
8293   case PPC::BI__builtin_altivec_vctzh:
8294   case PPC::BI__builtin_altivec_vctzw:
8295   case PPC::BI__builtin_altivec_vctzd: {
8296     llvm::Type *ResultType = ConvertType(E->getType());
8297     Value *X = EmitScalarExpr(E->getArg(0));
8298     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8299     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
8300     return Builder.CreateCall(F, {X, Undef});
8301   }
8302   case PPC::BI__builtin_altivec_vpopcntb:
8303   case PPC::BI__builtin_altivec_vpopcnth:
8304   case PPC::BI__builtin_altivec_vpopcntw:
8305   case PPC::BI__builtin_altivec_vpopcntd: {
8306     llvm::Type *ResultType = ConvertType(E->getType());
8307     Value *X = EmitScalarExpr(E->getArg(0));
8308     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
8309     return Builder.CreateCall(F, X);
8310   }
8311   // Copy sign
8312   case PPC::BI__builtin_vsx_xvcpsgnsp:
8313   case PPC::BI__builtin_vsx_xvcpsgndp: {
8314     llvm::Type *ResultType = ConvertType(E->getType());
8315     Value *X = EmitScalarExpr(E->getArg(0));
8316     Value *Y = EmitScalarExpr(E->getArg(1));
8317     ID = Intrinsic::copysign;
8318     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
8319     return Builder.CreateCall(F, {X, Y});
8320   }
8321   // Rounding/truncation
8322   case PPC::BI__builtin_vsx_xvrspip:
8323   case PPC::BI__builtin_vsx_xvrdpip:
8324   case PPC::BI__builtin_vsx_xvrdpim:
8325   case PPC::BI__builtin_vsx_xvrspim:
8326   case PPC::BI__builtin_vsx_xvrdpi:
8327   case PPC::BI__builtin_vsx_xvrspi:
8328   case PPC::BI__builtin_vsx_xvrdpic:
8329   case PPC::BI__builtin_vsx_xvrspic:
8330   case PPC::BI__builtin_vsx_xvrdpiz:
8331   case PPC::BI__builtin_vsx_xvrspiz: {
8332     llvm::Type *ResultType = ConvertType(E->getType());
8333     Value *X = EmitScalarExpr(E->getArg(0));
8334     if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
8335         BuiltinID == PPC::BI__builtin_vsx_xvrspim)
8336       ID = Intrinsic::floor;
8337     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
8338              BuiltinID == PPC::BI__builtin_vsx_xvrspi)
8339       ID = Intrinsic::round;
8340     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
8341              BuiltinID == PPC::BI__builtin_vsx_xvrspic)
8342       ID = Intrinsic::nearbyint;
8343     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
8344              BuiltinID == PPC::BI__builtin_vsx_xvrspip)
8345       ID = Intrinsic::ceil;
8346     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
8347              BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
8348       ID = Intrinsic::trunc;
8349     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
8350     return Builder.CreateCall(F, X);
8351   }
8352
8353   // Absolute value
8354   case PPC::BI__builtin_vsx_xvabsdp:
8355   case PPC::BI__builtin_vsx_xvabssp: {
8356     llvm::Type *ResultType = ConvertType(E->getType());
8357     Value *X = EmitScalarExpr(E->getArg(0));
8358     llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
8359     return Builder.CreateCall(F, X);
8360   }
8361
8362   // FMA variations
8363   case PPC::BI__builtin_vsx_xvmaddadp:
8364   case PPC::BI__builtin_vsx_xvmaddasp:
8365   case PPC::BI__builtin_vsx_xvnmaddadp:
8366   case PPC::BI__builtin_vsx_xvnmaddasp:
8367   case PPC::BI__builtin_vsx_xvmsubadp:
8368   case PPC::BI__builtin_vsx_xvmsubasp:
8369   case PPC::BI__builtin_vsx_xvnmsubadp:
8370   case PPC::BI__builtin_vsx_xvnmsubasp: {
8371     llvm::Type *ResultType = ConvertType(E->getType());
8372     Value *X = EmitScalarExpr(E->getArg(0));
8373     Value *Y = EmitScalarExpr(E->getArg(1));
8374     Value *Z = EmitScalarExpr(E->getArg(2));
8375     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
8376     llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
8377     switch (BuiltinID) {
8378       case PPC::BI__builtin_vsx_xvmaddadp:
8379       case PPC::BI__builtin_vsx_xvmaddasp:
8380         return Builder.CreateCall(F, {X, Y, Z});
8381       case PPC::BI__builtin_vsx_xvnmaddadp:
8382       case PPC::BI__builtin_vsx_xvnmaddasp:
8383         return Builder.CreateFSub(Zero,
8384                                   Builder.CreateCall(F, {X, Y, Z}), "sub");
8385       case PPC::BI__builtin_vsx_xvmsubadp:
8386       case PPC::BI__builtin_vsx_xvmsubasp:
8387         return Builder.CreateCall(F,
8388                                   {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
8389       case PPC::BI__builtin_vsx_xvnmsubadp:
8390       case PPC::BI__builtin_vsx_xvnmsubasp:
8391         Value *FsubRes =
8392           Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
8393         return Builder.CreateFSub(Zero, FsubRes, "sub");
8394     }
8395     llvm_unreachable("Unknown FMA operation");
8396     return nullptr; // Suppress no-return warning
8397   }
8398
8399   case PPC::BI__builtin_vsx_insertword: {
8400     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
8401
8402     // Third argument is a compile time constant int. It must be clamped to
8403     // to the range [0, 12].
8404     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
8405     assert(ArgCI &&
8406            "Third arg to xxinsertw intrinsic must be constant integer");
8407     const int64_t MaxIndex = 12;
8408     int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
8409
8410     // The builtin semantics don't exactly match the xxinsertw instructions
8411     // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
8412     // word from the first argument, and inserts it in the second argument. The
8413     // instruction extracts the word from its second input register and inserts
8414     // it into its first input register, so swap the first and second arguments.
8415     std::swap(Ops[0], Ops[1]);
8416
8417     // Need to cast the second argument from a vector of unsigned int to a
8418     // vector of long long.
8419     Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
8420
8421     if (getTarget().isLittleEndian()) {
8422       // Create a shuffle mask of (1, 0)
8423       Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
8424                                    ConstantInt::get(Int32Ty, 0)
8425                                  };
8426       Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8427
8428       // Reverse the double words in the vector we will extract from.
8429       Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
8430       Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ShuffleMask);
8431
8432       // Reverse the index.
8433       Index = MaxIndex - Index;
8434     }
8435
8436     // Intrinsic expects the first arg to be a vector of int.
8437     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
8438     Ops[2] = ConstantInt::getSigned(Int32Ty, Index);
8439     return Builder.CreateCall(F, Ops);
8440   }
8441
8442   case PPC::BI__builtin_vsx_extractuword: {
8443     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
8444
8445     // Intrinsic expects the first argument to be a vector of doublewords.
8446     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
8447
8448     // The second argument is a compile time constant int that needs to
8449     // be clamped to the range [0, 12].
8450     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[1]);
8451     assert(ArgCI &&
8452            "Second Arg to xxextractuw intrinsic must be a constant integer!");
8453     const int64_t MaxIndex = 12;
8454     int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
8455
8456     if (getTarget().isLittleEndian()) {
8457       // Reverse the index.
8458       Index = MaxIndex - Index;
8459       Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
8460
8461       // Emit the call, then reverse the double words of the results vector.
8462       Value *Call = Builder.CreateCall(F, Ops);
8463
8464       // Create a shuffle mask of (1, 0)
8465       Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
8466                                    ConstantInt::get(Int32Ty, 0)
8467                                  };
8468       Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8469
8470       Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ShuffleMask);
8471       return ShuffleCall;
8472     } else {
8473       Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
8474       return Builder.CreateCall(F, Ops);
8475     }
8476   }
8477
8478   case PPC::BI__builtin_vsx_xxpermdi: {
8479     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
8480     assert(ArgCI && "Third arg must be constant integer!");
8481
8482     unsigned Index = ArgCI->getZExtValue();
8483     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
8484     Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
8485
8486     // Element zero comes from the first input vector and element one comes from
8487     // the second. The element indices within each vector are numbered in big
8488     // endian order so the shuffle mask must be adjusted for this on little
8489     // endian platforms (i.e. index is complemented and source vector reversed).
8490     unsigned ElemIdx0;
8491     unsigned ElemIdx1;
8492     if (getTarget().isLittleEndian()) {
8493       ElemIdx0 = (~Index & 1) + 2;
8494       ElemIdx1 = (~Index & 2) >> 1;
8495     } else { // BigEndian
8496       ElemIdx0 = (Index & 2) >> 1;
8497       ElemIdx1 = 2 + (Index & 1);
8498     }
8499
8500     Constant *ShuffleElts[2] = {ConstantInt::get(Int32Ty, ElemIdx0),
8501                                 ConstantInt::get(Int32Ty, ElemIdx1)};
8502     Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8503
8504     Value *ShuffleCall =
8505         Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask);
8506     QualType BIRetType = E->getType();
8507     auto RetTy = ConvertType(BIRetType);
8508     return Builder.CreateBitCast(ShuffleCall, RetTy);
8509   }
8510
8511   case PPC::BI__builtin_vsx_xxsldwi: {
8512     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
8513     assert(ArgCI && "Third argument must be a compile time constant");
8514     unsigned Index = ArgCI->getZExtValue() & 0x3;
8515     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
8516     Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int32Ty, 4));
8517
8518     // Create a shuffle mask
8519     unsigned ElemIdx0;
8520     unsigned ElemIdx1;
8521     unsigned ElemIdx2;
8522     unsigned ElemIdx3;
8523     if (getTarget().isLittleEndian()) {
8524       // Little endian element N comes from element 8+N-Index of the
8525       // concatenated wide vector (of course, using modulo arithmetic on
8526       // the total number of elements).
8527       ElemIdx0 = (8 - Index) % 8;
8528       ElemIdx1 = (9 - Index) % 8;
8529       ElemIdx2 = (10 - Index) % 8;
8530       ElemIdx3 = (11 - Index) % 8;
8531     } else {
8532       // Big endian ElemIdx<N> = Index + N
8533       ElemIdx0 = Index;
8534       ElemIdx1 = Index + 1;
8535       ElemIdx2 = Index + 2;
8536       ElemIdx3 = Index + 3;
8537     }
8538
8539     Constant *ShuffleElts[4] = {ConstantInt::get(Int32Ty, ElemIdx0),
8540                                 ConstantInt::get(Int32Ty, ElemIdx1),
8541                                 ConstantInt::get(Int32Ty, ElemIdx2),
8542                                 ConstantInt::get(Int32Ty, ElemIdx3)};
8543
8544     Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8545     Value *ShuffleCall =
8546         Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask);
8547     QualType BIRetType = E->getType();
8548     auto RetTy = ConvertType(BIRetType);
8549     return Builder.CreateBitCast(ShuffleCall, RetTy);
8550   }
8551   }
8552 }
8553
8554 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
8555                                               const CallExpr *E) {
8556   switch (BuiltinID) {
8557   case AMDGPU::BI__builtin_amdgcn_div_scale:
8558   case AMDGPU::BI__builtin_amdgcn_div_scalef: {
8559     // Translate from the intrinsics's struct return to the builtin's out
8560     // argument.
8561
8562     Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
8563
8564     llvm::Value *X = EmitScalarExpr(E->getArg(0));
8565     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
8566     llvm::Value *Z = EmitScalarExpr(E->getArg(2));
8567
8568     llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
8569                                            X->getType());
8570
8571     llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
8572
8573     llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
8574     llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
8575
8576     llvm::Type *RealFlagType
8577       = FlagOutPtr.getPointer()->getType()->getPointerElementType();
8578
8579     llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
8580     Builder.CreateStore(FlagExt, FlagOutPtr);
8581     return Result;
8582   }
8583   case AMDGPU::BI__builtin_amdgcn_div_fmas:
8584   case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
8585     llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
8586     llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
8587     llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
8588     llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
8589
8590     llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
8591                                       Src0->getType());
8592     llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
8593     return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
8594   }
8595
8596   case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
8597     return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
8598   case AMDGPU::BI__builtin_amdgcn_mov_dpp: {
8599     llvm::SmallVector<llvm::Value *, 5> Args;
8600     for (unsigned I = 0; I != 5; ++I)
8601       Args.push_back(EmitScalarExpr(E->getArg(I)));
8602     Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_mov_dpp,
8603                                     Args[0]->getType());
8604     return Builder.CreateCall(F, Args);
8605   }
8606   case AMDGPU::BI__builtin_amdgcn_div_fixup:
8607   case AMDGPU::BI__builtin_amdgcn_div_fixupf:
8608   case AMDGPU::BI__builtin_amdgcn_div_fixuph:
8609     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
8610   case AMDGPU::BI__builtin_amdgcn_trig_preop:
8611   case AMDGPU::BI__builtin_amdgcn_trig_preopf:
8612     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
8613   case AMDGPU::BI__builtin_amdgcn_rcp:
8614   case AMDGPU::BI__builtin_amdgcn_rcpf:
8615   case AMDGPU::BI__builtin_amdgcn_rcph:
8616     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
8617   case AMDGPU::BI__builtin_amdgcn_rsq:
8618   case AMDGPU::BI__builtin_amdgcn_rsqf:
8619   case AMDGPU::BI__builtin_amdgcn_rsqh:
8620     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
8621   case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
8622   case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
8623     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
8624   case AMDGPU::BI__builtin_amdgcn_sinf:
8625   case AMDGPU::BI__builtin_amdgcn_sinh:
8626     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
8627   case AMDGPU::BI__builtin_amdgcn_cosf:
8628   case AMDGPU::BI__builtin_amdgcn_cosh:
8629     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
8630   case AMDGPU::BI__builtin_amdgcn_log_clampf:
8631     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
8632   case AMDGPU::BI__builtin_amdgcn_ldexp:
8633   case AMDGPU::BI__builtin_amdgcn_ldexpf:
8634   case AMDGPU::BI__builtin_amdgcn_ldexph:
8635     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
8636   case AMDGPU::BI__builtin_amdgcn_frexp_mant:
8637   case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
8638   case AMDGPU::BI__builtin_amdgcn_frexp_manth:
8639     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
8640   case AMDGPU::BI__builtin_amdgcn_frexp_exp:
8641   case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
8642     Value *Src0 = EmitScalarExpr(E->getArg(0));
8643     Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
8644                                 { Builder.getInt32Ty(), Src0->getType() });
8645     return Builder.CreateCall(F, Src0);
8646   }
8647   case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
8648     Value *Src0 = EmitScalarExpr(E->getArg(0));
8649     Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
8650                                 { Builder.getInt16Ty(), Src0->getType() });
8651     return Builder.CreateCall(F, Src0);
8652   }
8653   case AMDGPU::BI__builtin_amdgcn_fract:
8654   case AMDGPU::BI__builtin_amdgcn_fractf:
8655   case AMDGPU::BI__builtin_amdgcn_fracth:
8656     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
8657   case AMDGPU::BI__builtin_amdgcn_lerp:
8658     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
8659   case AMDGPU::BI__builtin_amdgcn_uicmp:
8660   case AMDGPU::BI__builtin_amdgcn_uicmpl:
8661   case AMDGPU::BI__builtin_amdgcn_sicmp:
8662   case AMDGPU::BI__builtin_amdgcn_sicmpl:
8663     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_icmp);
8664   case AMDGPU::BI__builtin_amdgcn_fcmp:
8665   case AMDGPU::BI__builtin_amdgcn_fcmpf:
8666     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fcmp);
8667   case AMDGPU::BI__builtin_amdgcn_class:
8668   case AMDGPU::BI__builtin_amdgcn_classf:
8669   case AMDGPU::BI__builtin_amdgcn_classh:
8670     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
8671   case AMDGPU::BI__builtin_amdgcn_fmed3f:
8672   case AMDGPU::BI__builtin_amdgcn_fmed3h:
8673     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3);
8674   case AMDGPU::BI__builtin_amdgcn_read_exec: {
8675     CallInst *CI = cast<CallInst>(
8676       EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec"));
8677     CI->setConvergent();
8678     return CI;
8679   }
8680
8681   // amdgcn workitem
8682   case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
8683     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
8684   case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
8685     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
8686   case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
8687     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
8688
8689   // r600 intrinsics
8690   case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
8691   case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
8692     return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee);
8693   case AMDGPU::BI__builtin_r600_read_tidig_x:
8694     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
8695   case AMDGPU::BI__builtin_r600_read_tidig_y:
8696     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
8697   case AMDGPU::BI__builtin_r600_read_tidig_z:
8698     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
8699   default:
8700     return nullptr;
8701   }
8702 }
8703
8704 /// Handle a SystemZ function in which the final argument is a pointer
8705 /// to an int that receives the post-instruction CC value.  At the LLVM level
8706 /// this is represented as a function that returns a {result, cc} pair.
8707 static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF,
8708                                          unsigned IntrinsicID,
8709                                          const CallExpr *E) {
8710   unsigned NumArgs = E->getNumArgs() - 1;
8711   SmallVector<Value *, 8> Args(NumArgs);
8712   for (unsigned I = 0; I < NumArgs; ++I)
8713     Args[I] = CGF.EmitScalarExpr(E->getArg(I));
8714   Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
8715   Value *F = CGF.CGM.getIntrinsic(IntrinsicID);
8716   Value *Call = CGF.Builder.CreateCall(F, Args);
8717   Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
8718   CGF.Builder.CreateStore(CC, CCPtr);
8719   return CGF.Builder.CreateExtractValue(Call, 0);
8720 }
8721
8722 Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
8723                                                const CallExpr *E) {
8724   switch (BuiltinID) {
8725   case SystemZ::BI__builtin_tbegin: {
8726     Value *TDB = EmitScalarExpr(E->getArg(0));
8727     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
8728     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
8729     return Builder.CreateCall(F, {TDB, Control});
8730   }
8731   case SystemZ::BI__builtin_tbegin_nofloat: {
8732     Value *TDB = EmitScalarExpr(E->getArg(0));
8733     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
8734     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
8735     return Builder.CreateCall(F, {TDB, Control});
8736   }
8737   case SystemZ::BI__builtin_tbeginc: {
8738     Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
8739     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
8740     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
8741     return Builder.CreateCall(F, {TDB, Control});
8742   }
8743   case SystemZ::BI__builtin_tabort: {
8744     Value *Data = EmitScalarExpr(E->getArg(0));
8745     Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
8746     return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
8747   }
8748   case SystemZ::BI__builtin_non_tx_store: {
8749     Value *Address = EmitScalarExpr(E->getArg(0));
8750     Value *Data = EmitScalarExpr(E->getArg(1));
8751     Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
8752     return Builder.CreateCall(F, {Data, Address});
8753   }
8754
8755   // Vector builtins.  Note that most vector builtins are mapped automatically
8756   // to target-specific LLVM intrinsics.  The ones handled specially here can
8757   // be represented via standard LLVM IR, which is preferable to enable common
8758   // LLVM optimizations.
8759
8760   case SystemZ::BI__builtin_s390_vpopctb:
8761   case SystemZ::BI__builtin_s390_vpopcth:
8762   case SystemZ::BI__builtin_s390_vpopctf:
8763   case SystemZ::BI__builtin_s390_vpopctg: {
8764     llvm::Type *ResultType = ConvertType(E->getType());
8765     Value *X = EmitScalarExpr(E->getArg(0));
8766     Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
8767     return Builder.CreateCall(F, X);
8768   }
8769
8770   case SystemZ::BI__builtin_s390_vclzb:
8771   case SystemZ::BI__builtin_s390_vclzh:
8772   case SystemZ::BI__builtin_s390_vclzf:
8773   case SystemZ::BI__builtin_s390_vclzg: {
8774     llvm::Type *ResultType = ConvertType(E->getType());
8775     Value *X = EmitScalarExpr(E->getArg(0));
8776     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8777     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
8778     return Builder.CreateCall(F, {X, Undef});
8779   }
8780
8781   case SystemZ::BI__builtin_s390_vctzb:
8782   case SystemZ::BI__builtin_s390_vctzh:
8783   case SystemZ::BI__builtin_s390_vctzf:
8784   case SystemZ::BI__builtin_s390_vctzg: {
8785     llvm::Type *ResultType = ConvertType(E->getType());
8786     Value *X = EmitScalarExpr(E->getArg(0));
8787     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8788     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
8789     return Builder.CreateCall(F, {X, Undef});
8790   }
8791
8792   case SystemZ::BI__builtin_s390_vfsqdb: {
8793     llvm::Type *ResultType = ConvertType(E->getType());
8794     Value *X = EmitScalarExpr(E->getArg(0));
8795     Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
8796     return Builder.CreateCall(F, X);
8797   }
8798   case SystemZ::BI__builtin_s390_vfmadb: {
8799     llvm::Type *ResultType = ConvertType(E->getType());
8800     Value *X = EmitScalarExpr(E->getArg(0));
8801     Value *Y = EmitScalarExpr(E->getArg(1));
8802     Value *Z = EmitScalarExpr(E->getArg(2));
8803     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
8804     return Builder.CreateCall(F, {X, Y, Z});
8805   }
8806   case SystemZ::BI__builtin_s390_vfmsdb: {
8807     llvm::Type *ResultType = ConvertType(E->getType());
8808     Value *X = EmitScalarExpr(E->getArg(0));
8809     Value *Y = EmitScalarExpr(E->getArg(1));
8810     Value *Z = EmitScalarExpr(E->getArg(2));
8811     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
8812     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
8813     return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
8814   }
8815   case SystemZ::BI__builtin_s390_vflpdb: {
8816     llvm::Type *ResultType = ConvertType(E->getType());
8817     Value *X = EmitScalarExpr(E->getArg(0));
8818     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
8819     return Builder.CreateCall(F, X);
8820   }
8821   case SystemZ::BI__builtin_s390_vflndb: {
8822     llvm::Type *ResultType = ConvertType(E->getType());
8823     Value *X = EmitScalarExpr(E->getArg(0));
8824     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
8825     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
8826     return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub");
8827   }
8828   case SystemZ::BI__builtin_s390_vfidb: {
8829     llvm::Type *ResultType = ConvertType(E->getType());
8830     Value *X = EmitScalarExpr(E->getArg(0));
8831     // Constant-fold the M4 and M5 mask arguments.
8832     llvm::APSInt M4, M5;
8833     bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext());
8834     bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext());
8835     assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?");
8836     (void)IsConstM4; (void)IsConstM5;
8837     // Check whether this instance of vfidb can be represented via a LLVM
8838     // standard intrinsic.  We only support some combinations of M4 and M5.
8839     Intrinsic::ID ID = Intrinsic::not_intrinsic;
8840     switch (M4.getZExtValue()) {
8841     default: break;
8842     case 0:  // IEEE-inexact exception allowed
8843       switch (M5.getZExtValue()) {
8844       default: break;
8845       case 0: ID = Intrinsic::rint; break;
8846       }
8847       break;
8848     case 4:  // IEEE-inexact exception suppressed
8849       switch (M5.getZExtValue()) {
8850       default: break;
8851       case 0: ID = Intrinsic::nearbyint; break;
8852       case 1: ID = Intrinsic::round; break;
8853       case 5: ID = Intrinsic::trunc; break;
8854       case 6: ID = Intrinsic::ceil; break;
8855       case 7: ID = Intrinsic::floor; break;
8856       }
8857       break;
8858     }
8859     if (ID != Intrinsic::not_intrinsic) {
8860       Function *F = CGM.getIntrinsic(ID, ResultType);
8861       return Builder.CreateCall(F, X);
8862     }
8863     Function *F = CGM.getIntrinsic(Intrinsic::s390_vfidb);
8864     Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
8865     Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
8866     return Builder.CreateCall(F, {X, M4Value, M5Value});
8867   }
8868
8869   // Vector intrisincs that output the post-instruction CC value.
8870
8871 #define INTRINSIC_WITH_CC(NAME) \
8872     case SystemZ::BI__builtin_##NAME: \
8873       return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
8874
8875   INTRINSIC_WITH_CC(s390_vpkshs);
8876   INTRINSIC_WITH_CC(s390_vpksfs);
8877   INTRINSIC_WITH_CC(s390_vpksgs);
8878
8879   INTRINSIC_WITH_CC(s390_vpklshs);
8880   INTRINSIC_WITH_CC(s390_vpklsfs);
8881   INTRINSIC_WITH_CC(s390_vpklsgs);
8882
8883   INTRINSIC_WITH_CC(s390_vceqbs);
8884   INTRINSIC_WITH_CC(s390_vceqhs);
8885   INTRINSIC_WITH_CC(s390_vceqfs);
8886   INTRINSIC_WITH_CC(s390_vceqgs);
8887
8888   INTRINSIC_WITH_CC(s390_vchbs);
8889   INTRINSIC_WITH_CC(s390_vchhs);
8890   INTRINSIC_WITH_CC(s390_vchfs);
8891   INTRINSIC_WITH_CC(s390_vchgs);
8892
8893   INTRINSIC_WITH_CC(s390_vchlbs);
8894   INTRINSIC_WITH_CC(s390_vchlhs);
8895   INTRINSIC_WITH_CC(s390_vchlfs);
8896   INTRINSIC_WITH_CC(s390_vchlgs);
8897
8898   INTRINSIC_WITH_CC(s390_vfaebs);
8899   INTRINSIC_WITH_CC(s390_vfaehs);
8900   INTRINSIC_WITH_CC(s390_vfaefs);
8901
8902   INTRINSIC_WITH_CC(s390_vfaezbs);
8903   INTRINSIC_WITH_CC(s390_vfaezhs);
8904   INTRINSIC_WITH_CC(s390_vfaezfs);
8905
8906   INTRINSIC_WITH_CC(s390_vfeebs);
8907   INTRINSIC_WITH_CC(s390_vfeehs);
8908   INTRINSIC_WITH_CC(s390_vfeefs);
8909
8910   INTRINSIC_WITH_CC(s390_vfeezbs);
8911   INTRINSIC_WITH_CC(s390_vfeezhs);
8912   INTRINSIC_WITH_CC(s390_vfeezfs);
8913
8914   INTRINSIC_WITH_CC(s390_vfenebs);
8915   INTRINSIC_WITH_CC(s390_vfenehs);
8916   INTRINSIC_WITH_CC(s390_vfenefs);
8917
8918   INTRINSIC_WITH_CC(s390_vfenezbs);
8919   INTRINSIC_WITH_CC(s390_vfenezhs);
8920   INTRINSIC_WITH_CC(s390_vfenezfs);
8921
8922   INTRINSIC_WITH_CC(s390_vistrbs);
8923   INTRINSIC_WITH_CC(s390_vistrhs);
8924   INTRINSIC_WITH_CC(s390_vistrfs);
8925
8926   INTRINSIC_WITH_CC(s390_vstrcbs);
8927   INTRINSIC_WITH_CC(s390_vstrchs);
8928   INTRINSIC_WITH_CC(s390_vstrcfs);
8929
8930   INTRINSIC_WITH_CC(s390_vstrczbs);
8931   INTRINSIC_WITH_CC(s390_vstrczhs);
8932   INTRINSIC_WITH_CC(s390_vstrczfs);
8933
8934   INTRINSIC_WITH_CC(s390_vfcedbs);
8935   INTRINSIC_WITH_CC(s390_vfchdbs);
8936   INTRINSIC_WITH_CC(s390_vfchedbs);
8937
8938   INTRINSIC_WITH_CC(s390_vftcidb);
8939
8940 #undef INTRINSIC_WITH_CC
8941
8942   default:
8943     return nullptr;
8944   }
8945 }
8946
8947 Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
8948                                              const CallExpr *E) {
8949   auto MakeLdg = [&](unsigned IntrinsicID) {
8950     Value *Ptr = EmitScalarExpr(E->getArg(0));
8951     clang::CharUnits Align =
8952         getNaturalPointeeTypeAlignment(E->getArg(0)->getType());
8953     return Builder.CreateCall(
8954         CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
8955                                        Ptr->getType()}),
8956         {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())});
8957   };
8958   auto MakeScopedAtomic = [&](unsigned IntrinsicID) {
8959     Value *Ptr = EmitScalarExpr(E->getArg(0));
8960     return Builder.CreateCall(
8961         CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
8962                                        Ptr->getType()}),
8963         {Ptr, EmitScalarExpr(E->getArg(1))});
8964   };
8965   switch (BuiltinID) {
8966   case NVPTX::BI__nvvm_atom_add_gen_i:
8967   case NVPTX::BI__nvvm_atom_add_gen_l:
8968   case NVPTX::BI__nvvm_atom_add_gen_ll:
8969     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
8970
8971   case NVPTX::BI__nvvm_atom_sub_gen_i:
8972   case NVPTX::BI__nvvm_atom_sub_gen_l:
8973   case NVPTX::BI__nvvm_atom_sub_gen_ll:
8974     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
8975
8976   case NVPTX::BI__nvvm_atom_and_gen_i:
8977   case NVPTX::BI__nvvm_atom_and_gen_l:
8978   case NVPTX::BI__nvvm_atom_and_gen_ll:
8979     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
8980
8981   case NVPTX::BI__nvvm_atom_or_gen_i:
8982   case NVPTX::BI__nvvm_atom_or_gen_l:
8983   case NVPTX::BI__nvvm_atom_or_gen_ll:
8984     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
8985
8986   case NVPTX::BI__nvvm_atom_xor_gen_i:
8987   case NVPTX::BI__nvvm_atom_xor_gen_l:
8988   case NVPTX::BI__nvvm_atom_xor_gen_ll:
8989     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
8990
8991   case NVPTX::BI__nvvm_atom_xchg_gen_i:
8992   case NVPTX::BI__nvvm_atom_xchg_gen_l:
8993   case NVPTX::BI__nvvm_atom_xchg_gen_ll:
8994     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
8995
8996   case NVPTX::BI__nvvm_atom_max_gen_i:
8997   case NVPTX::BI__nvvm_atom_max_gen_l:
8998   case NVPTX::BI__nvvm_atom_max_gen_ll:
8999     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
9000
9001   case NVPTX::BI__nvvm_atom_max_gen_ui:
9002   case NVPTX::BI__nvvm_atom_max_gen_ul:
9003   case NVPTX::BI__nvvm_atom_max_gen_ull:
9004     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
9005
9006   case NVPTX::BI__nvvm_atom_min_gen_i:
9007   case NVPTX::BI__nvvm_atom_min_gen_l:
9008   case NVPTX::BI__nvvm_atom_min_gen_ll:
9009     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
9010
9011   case NVPTX::BI__nvvm_atom_min_gen_ui:
9012   case NVPTX::BI__nvvm_atom_min_gen_ul:
9013   case NVPTX::BI__nvvm_atom_min_gen_ull:
9014     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
9015
9016   case NVPTX::BI__nvvm_atom_cas_gen_i:
9017   case NVPTX::BI__nvvm_atom_cas_gen_l:
9018   case NVPTX::BI__nvvm_atom_cas_gen_ll:
9019     // __nvvm_atom_cas_gen_* should return the old value rather than the
9020     // success flag.
9021     return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
9022
9023   case NVPTX::BI__nvvm_atom_add_gen_f: {
9024     Value *Ptr = EmitScalarExpr(E->getArg(0));
9025     Value *Val = EmitScalarExpr(E->getArg(1));
9026     // atomicrmw only deals with integer arguments so we need to use
9027     // LLVM's nvvm_atomic_load_add_f32 intrinsic for that.
9028     Value *FnALAF32 =
9029         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType());
9030     return Builder.CreateCall(FnALAF32, {Ptr, Val});
9031   }
9032
9033   case NVPTX::BI__nvvm_atom_inc_gen_ui: {
9034     Value *Ptr = EmitScalarExpr(E->getArg(0));
9035     Value *Val = EmitScalarExpr(E->getArg(1));
9036     Value *FnALI32 =
9037         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
9038     return Builder.CreateCall(FnALI32, {Ptr, Val});
9039   }
9040
9041   case NVPTX::BI__nvvm_atom_dec_gen_ui: {
9042     Value *Ptr = EmitScalarExpr(E->getArg(0));
9043     Value *Val = EmitScalarExpr(E->getArg(1));
9044     Value *FnALD32 =
9045         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
9046     return Builder.CreateCall(FnALD32, {Ptr, Val});
9047   }
9048
9049   case NVPTX::BI__nvvm_ldg_c:
9050   case NVPTX::BI__nvvm_ldg_c2:
9051   case NVPTX::BI__nvvm_ldg_c4:
9052   case NVPTX::BI__nvvm_ldg_s:
9053   case NVPTX::BI__nvvm_ldg_s2:
9054   case NVPTX::BI__nvvm_ldg_s4:
9055   case NVPTX::BI__nvvm_ldg_i:
9056   case NVPTX::BI__nvvm_ldg_i2:
9057   case NVPTX::BI__nvvm_ldg_i4:
9058   case NVPTX::BI__nvvm_ldg_l:
9059   case NVPTX::BI__nvvm_ldg_ll:
9060   case NVPTX::BI__nvvm_ldg_ll2:
9061   case NVPTX::BI__nvvm_ldg_uc:
9062   case NVPTX::BI__nvvm_ldg_uc2:
9063   case NVPTX::BI__nvvm_ldg_uc4:
9064   case NVPTX::BI__nvvm_ldg_us:
9065   case NVPTX::BI__nvvm_ldg_us2:
9066   case NVPTX::BI__nvvm_ldg_us4:
9067   case NVPTX::BI__nvvm_ldg_ui:
9068   case NVPTX::BI__nvvm_ldg_ui2:
9069   case NVPTX::BI__nvvm_ldg_ui4:
9070   case NVPTX::BI__nvvm_ldg_ul:
9071   case NVPTX::BI__nvvm_ldg_ull:
9072   case NVPTX::BI__nvvm_ldg_ull2:
9073     // PTX Interoperability section 2.2: "For a vector with an even number of
9074     // elements, its alignment is set to number of elements times the alignment
9075     // of its member: n*alignof(t)."
9076     return MakeLdg(Intrinsic::nvvm_ldg_global_i);
9077   case NVPTX::BI__nvvm_ldg_f:
9078   case NVPTX::BI__nvvm_ldg_f2:
9079   case NVPTX::BI__nvvm_ldg_f4:
9080   case NVPTX::BI__nvvm_ldg_d:
9081   case NVPTX::BI__nvvm_ldg_d2:
9082     return MakeLdg(Intrinsic::nvvm_ldg_global_f);
9083
9084   case NVPTX::BI__nvvm_atom_cta_add_gen_i:
9085   case NVPTX::BI__nvvm_atom_cta_add_gen_l:
9086   case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
9087     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta);
9088   case NVPTX::BI__nvvm_atom_sys_add_gen_i:
9089   case NVPTX::BI__nvvm_atom_sys_add_gen_l:
9090   case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
9091     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys);
9092   case NVPTX::BI__nvvm_atom_cta_add_gen_f:
9093   case NVPTX::BI__nvvm_atom_cta_add_gen_d:
9094     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta);
9095   case NVPTX::BI__nvvm_atom_sys_add_gen_f:
9096   case NVPTX::BI__nvvm_atom_sys_add_gen_d:
9097     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys);
9098   case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
9099   case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
9100   case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
9101     return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta);
9102   case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
9103   case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
9104   case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
9105     return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys);
9106   case NVPTX::BI__nvvm_atom_cta_max_gen_i:
9107   case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
9108   case NVPTX::BI__nvvm_atom_cta_max_gen_l:
9109   case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
9110   case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
9111   case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
9112     return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta);
9113   case NVPTX::BI__nvvm_atom_sys_max_gen_i:
9114   case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
9115   case NVPTX::BI__nvvm_atom_sys_max_gen_l:
9116   case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
9117   case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
9118   case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
9119     return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys);
9120   case NVPTX::BI__nvvm_atom_cta_min_gen_i:
9121   case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
9122   case NVPTX::BI__nvvm_atom_cta_min_gen_l:
9123   case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
9124   case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
9125   case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
9126     return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta);
9127   case NVPTX::BI__nvvm_atom_sys_min_gen_i:
9128   case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
9129   case NVPTX::BI__nvvm_atom_sys_min_gen_l:
9130   case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
9131   case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
9132   case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
9133     return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys);
9134   case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
9135     return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta);
9136   case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
9137     return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta);
9138   case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
9139     return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys);
9140   case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
9141     return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys);
9142   case NVPTX::BI__nvvm_atom_cta_and_gen_i:
9143   case NVPTX::BI__nvvm_atom_cta_and_gen_l:
9144   case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
9145     return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta);
9146   case NVPTX::BI__nvvm_atom_sys_and_gen_i:
9147   case NVPTX::BI__nvvm_atom_sys_and_gen_l:
9148   case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
9149     return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys);
9150   case NVPTX::BI__nvvm_atom_cta_or_gen_i:
9151   case NVPTX::BI__nvvm_atom_cta_or_gen_l:
9152   case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
9153     return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta);
9154   case NVPTX::BI__nvvm_atom_sys_or_gen_i:
9155   case NVPTX::BI__nvvm_atom_sys_or_gen_l:
9156   case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
9157     return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys);
9158   case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
9159   case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
9160   case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
9161     return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta);
9162   case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
9163   case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
9164   case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
9165     return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys);
9166   case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
9167   case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
9168   case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
9169     Value *Ptr = EmitScalarExpr(E->getArg(0));
9170     return Builder.CreateCall(
9171         CGM.getIntrinsic(
9172             Intrinsic::nvvm_atomic_cas_gen_i_cta,
9173             {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
9174         {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
9175   }
9176   case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
9177   case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
9178   case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
9179     Value *Ptr = EmitScalarExpr(E->getArg(0));
9180     return Builder.CreateCall(
9181         CGM.getIntrinsic(
9182             Intrinsic::nvvm_atomic_cas_gen_i_sys,
9183             {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
9184         {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
9185   }
9186   default:
9187     return nullptr;
9188   }
9189 }
9190
9191 Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
9192                                                    const CallExpr *E) {
9193   switch (BuiltinID) {
9194   case WebAssembly::BI__builtin_wasm_current_memory: {
9195     llvm::Type *ResultType = ConvertType(E->getType());
9196     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType);
9197     return Builder.CreateCall(Callee);
9198   }
9199   case WebAssembly::BI__builtin_wasm_grow_memory: {
9200     Value *X = EmitScalarExpr(E->getArg(0));
9201     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType());
9202     return Builder.CreateCall(Callee, X);
9203   }
9204
9205   default:
9206     return nullptr;
9207   }
9208 }