]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp
Merge ^/head r317216 through r317280.
[FreeBSD/FreeBSD.git] / contrib / llvm / tools / clang / lib / CodeGen / CGBuiltin.cpp
1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit Builtin calls as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "CGCXXABI.h"
15 #include "CGObjCRuntime.h"
16 #include "CGOpenCLRuntime.h"
17 #include "CodeGenFunction.h"
18 #include "CodeGenModule.h"
19 #include "TargetInfo.h"
20 #include "clang/AST/ASTContext.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/Analysis/Analyses/OSLog.h"
23 #include "clang/Basic/TargetBuiltins.h"
24 #include "clang/Basic/TargetInfo.h"
25 #include "clang/CodeGen/CGFunctionInfo.h"
26 #include "llvm/ADT/StringExtras.h"
27 #include "llvm/IR/CallSite.h"
28 #include "llvm/IR/DataLayout.h"
29 #include "llvm/IR/InlineAsm.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/IR/MDBuilder.h"
32 #include <sstream>
33
34 using namespace clang;
35 using namespace CodeGen;
36 using namespace llvm;
37
38 static
39 int64_t clamp(int64_t Value, int64_t Low, int64_t High) {
40   return std::min(High, std::max(Low, Value));
41 }
42
43 /// getBuiltinLibFunction - Given a builtin id for a function like
44 /// "__builtin_fabsf", return a Function* for "fabsf".
45 llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
46                                                      unsigned BuiltinID) {
47   assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
48
49   // Get the name, skip over the __builtin_ prefix (if necessary).
50   StringRef Name;
51   GlobalDecl D(FD);
52
53   // If the builtin has been declared explicitly with an assembler label,
54   // use the mangled name. This differs from the plain label on platforms
55   // that prefix labels.
56   if (FD->hasAttr<AsmLabelAttr>())
57     Name = getMangledName(D);
58   else
59     Name = Context.BuiltinInfo.getName(BuiltinID) + 10;
60
61   llvm::FunctionType *Ty =
62     cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
63
64   return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
65 }
66
67 /// Emit the conversions required to turn the given value into an
68 /// integer of the given size.
69 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
70                         QualType T, llvm::IntegerType *IntType) {
71   V = CGF.EmitToMemory(V, T);
72
73   if (V->getType()->isPointerTy())
74     return CGF.Builder.CreatePtrToInt(V, IntType);
75
76   assert(V->getType() == IntType);
77   return V;
78 }
79
80 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
81                           QualType T, llvm::Type *ResultType) {
82   V = CGF.EmitFromMemory(V, T);
83
84   if (ResultType->isPointerTy())
85     return CGF.Builder.CreateIntToPtr(V, ResultType);
86
87   assert(V->getType() == ResultType);
88   return V;
89 }
90
91 /// Utility to insert an atomic instruction based on Instrinsic::ID
92 /// and the expression node.
93 static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF,
94                                     llvm::AtomicRMWInst::BinOp Kind,
95                                     const CallExpr *E) {
96   QualType T = E->getType();
97   assert(E->getArg(0)->getType()->isPointerType());
98   assert(CGF.getContext().hasSameUnqualifiedType(T,
99                                   E->getArg(0)->getType()->getPointeeType()));
100   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
101
102   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
103   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
104
105   llvm::IntegerType *IntType =
106     llvm::IntegerType::get(CGF.getLLVMContext(),
107                            CGF.getContext().getTypeSize(T));
108   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
109
110   llvm::Value *Args[2];
111   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
112   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
113   llvm::Type *ValueType = Args[1]->getType();
114   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
115
116   llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
117       Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
118   return EmitFromInt(CGF, Result, T, ValueType);
119 }
120
121 static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
122   Value *Val = CGF.EmitScalarExpr(E->getArg(0));
123   Value *Address = CGF.EmitScalarExpr(E->getArg(1));
124
125   // Convert the type of the pointer to a pointer to the stored type.
126   Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
127   Value *BC = CGF.Builder.CreateBitCast(
128       Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
129   LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
130   LV.setNontemporal(true);
131   CGF.EmitStoreOfScalar(Val, LV, false);
132   return nullptr;
133 }
134
135 static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
136   Value *Address = CGF.EmitScalarExpr(E->getArg(0));
137
138   LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
139   LV.setNontemporal(true);
140   return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
141 }
142
143 static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
144                                llvm::AtomicRMWInst::BinOp Kind,
145                                const CallExpr *E) {
146   return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
147 }
148
149 /// Utility to insert an atomic instruction based Instrinsic::ID and
150 /// the expression node, where the return value is the result of the
151 /// operation.
152 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
153                                    llvm::AtomicRMWInst::BinOp Kind,
154                                    const CallExpr *E,
155                                    Instruction::BinaryOps Op,
156                                    bool Invert = false) {
157   QualType T = E->getType();
158   assert(E->getArg(0)->getType()->isPointerType());
159   assert(CGF.getContext().hasSameUnqualifiedType(T,
160                                   E->getArg(0)->getType()->getPointeeType()));
161   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
162
163   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
164   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
165
166   llvm::IntegerType *IntType =
167     llvm::IntegerType::get(CGF.getLLVMContext(),
168                            CGF.getContext().getTypeSize(T));
169   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
170
171   llvm::Value *Args[2];
172   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
173   llvm::Type *ValueType = Args[1]->getType();
174   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
175   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
176
177   llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
178       Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
179   Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
180   if (Invert)
181     Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
182                                      llvm::ConstantInt::get(IntType, -1));
183   Result = EmitFromInt(CGF, Result, T, ValueType);
184   return RValue::get(Result);
185 }
186
187 /// @brief Utility to insert an atomic cmpxchg instruction.
188 ///
189 /// @param CGF The current codegen function.
190 /// @param E   Builtin call expression to convert to cmpxchg.
191 ///            arg0 - address to operate on
192 ///            arg1 - value to compare with
193 ///            arg2 - new value
194 /// @param ReturnBool Specifies whether to return success flag of
195 ///                   cmpxchg result or the old value.
196 ///
197 /// @returns result of cmpxchg, according to ReturnBool
198 static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
199                                      bool ReturnBool) {
200   QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
201   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
202   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
203
204   llvm::IntegerType *IntType = llvm::IntegerType::get(
205       CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
206   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
207
208   Value *Args[3];
209   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
210   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
211   llvm::Type *ValueType = Args[1]->getType();
212   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
213   Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
214
215   Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
216       Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
217       llvm::AtomicOrdering::SequentiallyConsistent);
218   if (ReturnBool)
219     // Extract boolean success flag and zext it to int.
220     return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
221                                   CGF.ConvertType(E->getType()));
222   else
223     // Extract old value and emit it using the same type as compare value.
224     return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
225                        ValueType);
226 }
227
228 // Emit a simple mangled intrinsic that has 1 argument and a return type
229 // matching the argument type.
230 static Value *emitUnaryBuiltin(CodeGenFunction &CGF,
231                                const CallExpr *E,
232                                unsigned IntrinsicID) {
233   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
234
235   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
236   return CGF.Builder.CreateCall(F, Src0);
237 }
238
239 // Emit an intrinsic that has 2 operands of the same type as its result.
240 static Value *emitBinaryBuiltin(CodeGenFunction &CGF,
241                                 const CallExpr *E,
242                                 unsigned IntrinsicID) {
243   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
244   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
245
246   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
247   return CGF.Builder.CreateCall(F, { Src0, Src1 });
248 }
249
250 // Emit an intrinsic that has 3 operands of the same type as its result.
251 static Value *emitTernaryBuiltin(CodeGenFunction &CGF,
252                                  const CallExpr *E,
253                                  unsigned IntrinsicID) {
254   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
255   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
256   llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
257
258   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
259   return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
260 }
261
262 // Emit an intrinsic that has 1 float or double operand, and 1 integer.
263 static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
264                                const CallExpr *E,
265                                unsigned IntrinsicID) {
266   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
267   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
268
269   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
270   return CGF.Builder.CreateCall(F, {Src0, Src1});
271 }
272
273 /// EmitFAbs - Emit a call to @llvm.fabs().
274 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
275   Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
276   llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
277   Call->setDoesNotAccessMemory();
278   return Call;
279 }
280
281 /// Emit the computation of the sign bit for a floating point value. Returns
282 /// the i1 sign bit value.
283 static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
284   LLVMContext &C = CGF.CGM.getLLVMContext();
285
286   llvm::Type *Ty = V->getType();
287   int Width = Ty->getPrimitiveSizeInBits();
288   llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
289   V = CGF.Builder.CreateBitCast(V, IntTy);
290   if (Ty->isPPC_FP128Ty()) {
291     // We want the sign bit of the higher-order double. The bitcast we just
292     // did works as if the double-double was stored to memory and then
293     // read as an i128. The "store" will put the higher-order double in the
294     // lower address in both little- and big-Endian modes, but the "load"
295     // will treat those bits as a different part of the i128: the low bits in
296     // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
297     // we need to shift the high bits down to the low before truncating.
298     Width >>= 1;
299     if (CGF.getTarget().isBigEndian()) {
300       Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
301       V = CGF.Builder.CreateLShr(V, ShiftCst);
302     }
303     // We are truncating value in order to extract the higher-order
304     // double, which we will be using to extract the sign from.
305     IntTy = llvm::IntegerType::get(C, Width);
306     V = CGF.Builder.CreateTrunc(V, IntTy);
307   }
308   Value *Zero = llvm::Constant::getNullValue(IntTy);
309   return CGF.Builder.CreateICmpSLT(V, Zero);
310 }
311
312 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD,
313                               const CallExpr *E, llvm::Constant *calleeValue) {
314   CGCallee callee = CGCallee::forDirect(calleeValue, FD);
315   return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
316 }
317
318 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
319 /// depending on IntrinsicID.
320 ///
321 /// \arg CGF The current codegen function.
322 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
323 /// \arg X The first argument to the llvm.*.with.overflow.*.
324 /// \arg Y The second argument to the llvm.*.with.overflow.*.
325 /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
326 /// \returns The result (i.e. sum/product) returned by the intrinsic.
327 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
328                                           const llvm::Intrinsic::ID IntrinsicID,
329                                           llvm::Value *X, llvm::Value *Y,
330                                           llvm::Value *&Carry) {
331   // Make sure we have integers of the same width.
332   assert(X->getType() == Y->getType() &&
333          "Arguments must be the same type. (Did you forget to make sure both "
334          "arguments have the same integer width?)");
335
336   llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
337   llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
338   Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
339   return CGF.Builder.CreateExtractValue(Tmp, 0);
340 }
341
342 static Value *emitRangedBuiltin(CodeGenFunction &CGF,
343                                 unsigned IntrinsicID,
344                                 int low, int high) {
345     llvm::MDBuilder MDHelper(CGF.getLLVMContext());
346     llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
347     Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
348     llvm::Instruction *Call = CGF.Builder.CreateCall(F);
349     Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
350     return Call;
351 }
352
353 namespace {
354   struct WidthAndSignedness {
355     unsigned Width;
356     bool Signed;
357   };
358 }
359
360 static WidthAndSignedness
361 getIntegerWidthAndSignedness(const clang::ASTContext &context,
362                              const clang::QualType Type) {
363   assert(Type->isIntegerType() && "Given type is not an integer.");
364   unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width;
365   bool Signed = Type->isSignedIntegerType();
366   return {Width, Signed};
367 }
368
369 // Given one or more integer types, this function produces an integer type that
370 // encompasses them: any value in one of the given types could be expressed in
371 // the encompassing type.
372 static struct WidthAndSignedness
373 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
374   assert(Types.size() > 0 && "Empty list of types.");
375
376   // If any of the given types is signed, we must return a signed type.
377   bool Signed = false;
378   for (const auto &Type : Types) {
379     Signed |= Type.Signed;
380   }
381
382   // The encompassing type must have a width greater than or equal to the width
383   // of the specified types.  Aditionally, if the encompassing type is signed,
384   // its width must be strictly greater than the width of any unsigned types
385   // given.
386   unsigned Width = 0;
387   for (const auto &Type : Types) {
388     unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
389     if (Width < MinWidth) {
390       Width = MinWidth;
391     }
392   }
393
394   return {Width, Signed};
395 }
396
397 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
398   llvm::Type *DestType = Int8PtrTy;
399   if (ArgValue->getType() != DestType)
400     ArgValue =
401         Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
402
403   Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
404   return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
405 }
406
407 /// Checks if using the result of __builtin_object_size(p, @p From) in place of
408 /// __builtin_object_size(p, @p To) is correct
409 static bool areBOSTypesCompatible(int From, int To) {
410   // Note: Our __builtin_object_size implementation currently treats Type=0 and
411   // Type=2 identically. Encoding this implementation detail here may make
412   // improving __builtin_object_size difficult in the future, so it's omitted.
413   return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
414 }
415
416 static llvm::Value *
417 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
418   return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
419 }
420
421 llvm::Value *
422 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
423                                                  llvm::IntegerType *ResType,
424                                                  llvm::Value *EmittedE) {
425   uint64_t ObjectSize;
426   if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
427     return emitBuiltinObjectSize(E, Type, ResType, EmittedE);
428   return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
429 }
430
431 /// Returns a Value corresponding to the size of the given expression.
432 /// This Value may be either of the following:
433 ///   - A llvm::Argument (if E is a param with the pass_object_size attribute on
434 ///     it)
435 ///   - A call to the @llvm.objectsize intrinsic
436 ///
437 /// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
438 /// and we wouldn't otherwise try to reference a pass_object_size parameter,
439 /// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
440 llvm::Value *
441 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
442                                        llvm::IntegerType *ResType,
443                                        llvm::Value *EmittedE) {
444   // We need to reference an argument if the pointer is a parameter with the
445   // pass_object_size attribute.
446   if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
447     auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
448     auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
449     if (Param != nullptr && PS != nullptr &&
450         areBOSTypesCompatible(PS->getType(), Type)) {
451       auto Iter = SizeArguments.find(Param);
452       assert(Iter != SizeArguments.end());
453
454       const ImplicitParamDecl *D = Iter->second;
455       auto DIter = LocalDeclMap.find(D);
456       assert(DIter != LocalDeclMap.end());
457
458       return EmitLoadOfScalar(DIter->second, /*volatile=*/false,
459                               getContext().getSizeType(), E->getLocStart());
460     }
461   }
462
463   // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
464   // evaluate E for side-effects. In either case, we shouldn't lower to
465   // @llvm.objectsize.
466   if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
467     return getDefaultBuiltinObjectSizeResult(Type, ResType);
468
469   Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
470   assert(Ptr->getType()->isPointerTy() &&
471          "Non-pointer passed to __builtin_object_size?");
472
473   Value *F = CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
474
475   // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
476   Value *Min = Builder.getInt1((Type & 2) != 0);
477   // For GCC compatability, __builtin_object_size treat NULL as unknown size.
478   Value *NullIsUnknown = Builder.getTrue();
479   return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown});
480 }
481
482 // Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we
483 // handle them here.
484 enum class CodeGenFunction::MSVCIntrin {
485   _BitScanForward,
486   _BitScanReverse,
487   _InterlockedAnd,
488   _InterlockedDecrement,
489   _InterlockedExchange,
490   _InterlockedExchangeAdd,
491   _InterlockedExchangeSub,
492   _InterlockedIncrement,
493   _InterlockedOr,
494   _InterlockedXor,
495   _interlockedbittestandset,
496   __fastfail,
497 };
498
499 Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
500                                             const CallExpr *E) {
501   switch (BuiltinID) {
502   case MSVCIntrin::_BitScanForward:
503   case MSVCIntrin::_BitScanReverse: {
504     Value *ArgValue = EmitScalarExpr(E->getArg(1));
505
506     llvm::Type *ArgType = ArgValue->getType();
507     llvm::Type *IndexType =
508       EmitScalarExpr(E->getArg(0))->getType()->getPointerElementType();
509     llvm::Type *ResultType = ConvertType(E->getType());
510
511     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
512     Value *ResZero = llvm::Constant::getNullValue(ResultType);
513     Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
514
515     BasicBlock *Begin = Builder.GetInsertBlock();
516     BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
517     Builder.SetInsertPoint(End);
518     PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
519
520     Builder.SetInsertPoint(Begin);
521     Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
522     BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
523     Builder.CreateCondBr(IsZero, End, NotZero);
524     Result->addIncoming(ResZero, Begin);
525
526     Builder.SetInsertPoint(NotZero);
527     Address IndexAddress = EmitPointerWithAlignment(E->getArg(0));
528
529     if (BuiltinID == MSVCIntrin::_BitScanForward) {
530       Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
531       Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
532       ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
533       Builder.CreateStore(ZeroCount, IndexAddress, false);
534     } else {
535       unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
536       Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
537
538       Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
539       Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
540       ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
541       Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
542       Builder.CreateStore(Index, IndexAddress, false);
543     }
544     Builder.CreateBr(End);
545     Result->addIncoming(ResOne, NotZero);
546
547     Builder.SetInsertPoint(End);
548     return Result;
549   }
550   case MSVCIntrin::_InterlockedAnd:
551     return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
552   case MSVCIntrin::_InterlockedExchange:
553     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
554   case MSVCIntrin::_InterlockedExchangeAdd:
555     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
556   case MSVCIntrin::_InterlockedExchangeSub:
557     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
558   case MSVCIntrin::_InterlockedOr:
559     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
560   case MSVCIntrin::_InterlockedXor:
561     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
562
563   case MSVCIntrin::_interlockedbittestandset: {
564     llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
565     llvm::Value *Bit = EmitScalarExpr(E->getArg(1));
566     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
567         AtomicRMWInst::Or, Addr,
568         Builder.CreateShl(ConstantInt::get(Bit->getType(), 1), Bit),
569         llvm::AtomicOrdering::SequentiallyConsistent);
570     // Shift the relevant bit to the least significant position, truncate to
571     // the result type, and test the low bit.
572     llvm::Value *Shifted = Builder.CreateLShr(RMWI, Bit);
573     llvm::Value *Truncated =
574         Builder.CreateTrunc(Shifted, ConvertType(E->getType()));
575     return Builder.CreateAnd(Truncated,
576                              ConstantInt::get(Truncated->getType(), 1));
577   }
578
579   case MSVCIntrin::_InterlockedDecrement: {
580     llvm::Type *IntTy = ConvertType(E->getType());
581     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
582       AtomicRMWInst::Sub,
583       EmitScalarExpr(E->getArg(0)),
584       ConstantInt::get(IntTy, 1),
585       llvm::AtomicOrdering::SequentiallyConsistent);
586     return Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1));
587   }
588   case MSVCIntrin::_InterlockedIncrement: {
589     llvm::Type *IntTy = ConvertType(E->getType());
590     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
591       AtomicRMWInst::Add,
592       EmitScalarExpr(E->getArg(0)),
593       ConstantInt::get(IntTy, 1),
594       llvm::AtomicOrdering::SequentiallyConsistent);
595     return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1));
596   }
597
598   case MSVCIntrin::__fastfail: {
599     // Request immediate process termination from the kernel. The instruction
600     // sequences to do this are documented on MSDN:
601     // https://msdn.microsoft.com/en-us/library/dn774154.aspx
602     llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
603     StringRef Asm, Constraints;
604     switch (ISA) {
605     default:
606       ErrorUnsupported(E, "__fastfail call for this architecture");
607       break;
608     case llvm::Triple::x86:
609     case llvm::Triple::x86_64:
610       Asm = "int $$0x29";
611       Constraints = "{cx}";
612       break;
613     case llvm::Triple::thumb:
614       Asm = "udf #251";
615       Constraints = "{r0}";
616       break;
617     }
618     llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
619     llvm::InlineAsm *IA =
620         llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true);
621     llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
622         getLLVMContext(), llvm::AttributeList::FunctionIndex,
623         llvm::Attribute::NoReturn);
624     CallSite CS = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
625     CS.setAttributes(NoReturnAttr);
626     return CS.getInstruction();
627   }
628   }
629   llvm_unreachable("Incorrect MSVC intrinsic!");
630 }
631
632 namespace {
633 // ARC cleanup for __builtin_os_log_format
634 struct CallObjCArcUse final : EHScopeStack::Cleanup {
635   CallObjCArcUse(llvm::Value *object) : object(object) {}
636   llvm::Value *object;
637
638   void Emit(CodeGenFunction &CGF, Flags flags) override {
639     CGF.EmitARCIntrinsicUse(object);
640   }
641 };
642 }
643
644 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
645                                         unsigned BuiltinID, const CallExpr *E,
646                                         ReturnValueSlot ReturnValue) {
647   // See if we can constant fold this builtin.  If so, don't emit it at all.
648   Expr::EvalResult Result;
649   if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
650       !Result.hasSideEffects()) {
651     if (Result.Val.isInt())
652       return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
653                                                 Result.Val.getInt()));
654     if (Result.Val.isFloat())
655       return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
656                                                Result.Val.getFloat()));
657   }
658
659   switch (BuiltinID) {
660   default: break;  // Handle intrinsics and libm functions below.
661   case Builtin::BI__builtin___CFStringMakeConstantString:
662   case Builtin::BI__builtin___NSStringMakeConstantString:
663     return RValue::get(CGM.EmitConstantExpr(E, E->getType(), nullptr));
664   case Builtin::BI__builtin_stdarg_start:
665   case Builtin::BI__builtin_va_start:
666   case Builtin::BI__va_start:
667   case Builtin::BI__builtin_va_end:
668     return RValue::get(
669         EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
670                            ? EmitScalarExpr(E->getArg(0))
671                            : EmitVAListRef(E->getArg(0)).getPointer(),
672                        BuiltinID != Builtin::BI__builtin_va_end));
673   case Builtin::BI__builtin_va_copy: {
674     Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
675     Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
676
677     llvm::Type *Type = Int8PtrTy;
678
679     DstPtr = Builder.CreateBitCast(DstPtr, Type);
680     SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
681     return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy),
682                                           {DstPtr, SrcPtr}));
683   }
684   case Builtin::BI__builtin_abs:
685   case Builtin::BI__builtin_labs:
686   case Builtin::BI__builtin_llabs: {
687     Value *ArgValue = EmitScalarExpr(E->getArg(0));
688
689     Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
690     Value *CmpResult =
691     Builder.CreateICmpSGE(ArgValue,
692                           llvm::Constant::getNullValue(ArgValue->getType()),
693                                                             "abscond");
694     Value *Result =
695       Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
696
697     return RValue::get(Result);
698   }
699   case Builtin::BI__builtin_fabs:
700   case Builtin::BI__builtin_fabsf:
701   case Builtin::BI__builtin_fabsl: {
702     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
703   }
704   case Builtin::BI__builtin_fmod:
705   case Builtin::BI__builtin_fmodf:
706   case Builtin::BI__builtin_fmodl: {
707     Value *Arg1 = EmitScalarExpr(E->getArg(0));
708     Value *Arg2 = EmitScalarExpr(E->getArg(1));
709     Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod");
710     return RValue::get(Result);
711   }
712   case Builtin::BI__builtin_copysign:
713   case Builtin::BI__builtin_copysignf:
714   case Builtin::BI__builtin_copysignl: {
715     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
716   }
717   case Builtin::BI__builtin_ceil:
718   case Builtin::BI__builtin_ceilf:
719   case Builtin::BI__builtin_ceill: {
720     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil));
721   }
722   case Builtin::BI__builtin_floor:
723   case Builtin::BI__builtin_floorf:
724   case Builtin::BI__builtin_floorl: {
725     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor));
726   }
727   case Builtin::BI__builtin_trunc:
728   case Builtin::BI__builtin_truncf:
729   case Builtin::BI__builtin_truncl: {
730     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc));
731   }
732   case Builtin::BI__builtin_rint:
733   case Builtin::BI__builtin_rintf:
734   case Builtin::BI__builtin_rintl: {
735     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint));
736   }
737   case Builtin::BI__builtin_nearbyint:
738   case Builtin::BI__builtin_nearbyintf:
739   case Builtin::BI__builtin_nearbyintl: {
740     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint));
741   }
742   case Builtin::BI__builtin_round:
743   case Builtin::BI__builtin_roundf:
744   case Builtin::BI__builtin_roundl: {
745     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round));
746   }
747   case Builtin::BI__builtin_fmin:
748   case Builtin::BI__builtin_fminf:
749   case Builtin::BI__builtin_fminl: {
750     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum));
751   }
752   case Builtin::BI__builtin_fmax:
753   case Builtin::BI__builtin_fmaxf:
754   case Builtin::BI__builtin_fmaxl: {
755     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum));
756   }
757   case Builtin::BI__builtin_conj:
758   case Builtin::BI__builtin_conjf:
759   case Builtin::BI__builtin_conjl: {
760     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
761     Value *Real = ComplexVal.first;
762     Value *Imag = ComplexVal.second;
763     Value *Zero =
764       Imag->getType()->isFPOrFPVectorTy()
765         ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
766         : llvm::Constant::getNullValue(Imag->getType());
767
768     Imag = Builder.CreateFSub(Zero, Imag, "sub");
769     return RValue::getComplex(std::make_pair(Real, Imag));
770   }
771   case Builtin::BI__builtin_creal:
772   case Builtin::BI__builtin_crealf:
773   case Builtin::BI__builtin_creall:
774   case Builtin::BIcreal:
775   case Builtin::BIcrealf:
776   case Builtin::BIcreall: {
777     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
778     return RValue::get(ComplexVal.first);
779   }
780
781   case Builtin::BI__builtin_cimag:
782   case Builtin::BI__builtin_cimagf:
783   case Builtin::BI__builtin_cimagl:
784   case Builtin::BIcimag:
785   case Builtin::BIcimagf:
786   case Builtin::BIcimagl: {
787     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
788     return RValue::get(ComplexVal.second);
789   }
790
791   case Builtin::BI__builtin_ctzs:
792   case Builtin::BI__builtin_ctz:
793   case Builtin::BI__builtin_ctzl:
794   case Builtin::BI__builtin_ctzll: {
795     Value *ArgValue = EmitScalarExpr(E->getArg(0));
796
797     llvm::Type *ArgType = ArgValue->getType();
798     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
799
800     llvm::Type *ResultType = ConvertType(E->getType());
801     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
802     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
803     if (Result->getType() != ResultType)
804       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
805                                      "cast");
806     return RValue::get(Result);
807   }
808   case Builtin::BI__builtin_clzs:
809   case Builtin::BI__builtin_clz:
810   case Builtin::BI__builtin_clzl:
811   case Builtin::BI__builtin_clzll: {
812     Value *ArgValue = EmitScalarExpr(E->getArg(0));
813
814     llvm::Type *ArgType = ArgValue->getType();
815     Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
816
817     llvm::Type *ResultType = ConvertType(E->getType());
818     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
819     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
820     if (Result->getType() != ResultType)
821       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
822                                      "cast");
823     return RValue::get(Result);
824   }
825   case Builtin::BI__builtin_ffs:
826   case Builtin::BI__builtin_ffsl:
827   case Builtin::BI__builtin_ffsll: {
828     // ffs(x) -> x ? cttz(x) + 1 : 0
829     Value *ArgValue = EmitScalarExpr(E->getArg(0));
830
831     llvm::Type *ArgType = ArgValue->getType();
832     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
833
834     llvm::Type *ResultType = ConvertType(E->getType());
835     Value *Tmp =
836         Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
837                           llvm::ConstantInt::get(ArgType, 1));
838     Value *Zero = llvm::Constant::getNullValue(ArgType);
839     Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
840     Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
841     if (Result->getType() != ResultType)
842       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
843                                      "cast");
844     return RValue::get(Result);
845   }
846   case Builtin::BI__builtin_parity:
847   case Builtin::BI__builtin_parityl:
848   case Builtin::BI__builtin_parityll: {
849     // parity(x) -> ctpop(x) & 1
850     Value *ArgValue = EmitScalarExpr(E->getArg(0));
851
852     llvm::Type *ArgType = ArgValue->getType();
853     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
854
855     llvm::Type *ResultType = ConvertType(E->getType());
856     Value *Tmp = Builder.CreateCall(F, ArgValue);
857     Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
858     if (Result->getType() != ResultType)
859       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
860                                      "cast");
861     return RValue::get(Result);
862   }
863   case Builtin::BI__popcnt16:
864   case Builtin::BI__popcnt:
865   case Builtin::BI__popcnt64:
866   case Builtin::BI__builtin_popcount:
867   case Builtin::BI__builtin_popcountl:
868   case Builtin::BI__builtin_popcountll: {
869     Value *ArgValue = EmitScalarExpr(E->getArg(0));
870
871     llvm::Type *ArgType = ArgValue->getType();
872     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
873
874     llvm::Type *ResultType = ConvertType(E->getType());
875     Value *Result = Builder.CreateCall(F, ArgValue);
876     if (Result->getType() != ResultType)
877       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
878                                      "cast");
879     return RValue::get(Result);
880   }
881   case Builtin::BI_rotr8:
882   case Builtin::BI_rotr16:
883   case Builtin::BI_rotr:
884   case Builtin::BI_lrotr:
885   case Builtin::BI_rotr64: {
886     Value *Val = EmitScalarExpr(E->getArg(0));
887     Value *Shift = EmitScalarExpr(E->getArg(1));
888
889     llvm::Type *ArgType = Val->getType();
890     Shift = Builder.CreateIntCast(Shift, ArgType, false);
891     unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
892     Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
893     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
894
895     Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
896     Shift = Builder.CreateAnd(Shift, Mask);
897     Value *LeftShift = Builder.CreateSub(ArgTypeSize, Shift);
898
899     Value *RightShifted = Builder.CreateLShr(Val, Shift);
900     Value *LeftShifted = Builder.CreateShl(Val, LeftShift);
901     Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
902
903     Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
904     Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
905     return RValue::get(Result);
906   }
907   case Builtin::BI_rotl8:
908   case Builtin::BI_rotl16:
909   case Builtin::BI_rotl:
910   case Builtin::BI_lrotl:
911   case Builtin::BI_rotl64: {
912     Value *Val = EmitScalarExpr(E->getArg(0));
913     Value *Shift = EmitScalarExpr(E->getArg(1));
914
915     llvm::Type *ArgType = Val->getType();
916     Shift = Builder.CreateIntCast(Shift, ArgType, false);
917     unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
918     Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
919     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
920
921     Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
922     Shift = Builder.CreateAnd(Shift, Mask);
923     Value *RightShift = Builder.CreateSub(ArgTypeSize, Shift);
924
925     Value *LeftShifted = Builder.CreateShl(Val, Shift);
926     Value *RightShifted = Builder.CreateLShr(Val, RightShift);
927     Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
928
929     Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
930     Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
931     return RValue::get(Result);
932   }
933   case Builtin::BI__builtin_unpredictable: {
934     // Always return the argument of __builtin_unpredictable. LLVM does not
935     // handle this builtin. Metadata for this builtin should be added directly
936     // to instructions such as branches or switches that use it.
937     return RValue::get(EmitScalarExpr(E->getArg(0)));
938   }
939   case Builtin::BI__builtin_expect: {
940     Value *ArgValue = EmitScalarExpr(E->getArg(0));
941     llvm::Type *ArgType = ArgValue->getType();
942
943     Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
944     // Don't generate llvm.expect on -O0 as the backend won't use it for
945     // anything.
946     // Note, we still IRGen ExpectedValue because it could have side-effects.
947     if (CGM.getCodeGenOpts().OptimizationLevel == 0)
948       return RValue::get(ArgValue);
949
950     Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
951     Value *Result =
952         Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
953     return RValue::get(Result);
954   }
955   case Builtin::BI__builtin_assume_aligned: {
956     Value *PtrValue = EmitScalarExpr(E->getArg(0));
957     Value *OffsetValue =
958       (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
959
960     Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
961     ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
962     unsigned Alignment = (unsigned) AlignmentCI->getZExtValue();
963
964     EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue);
965     return RValue::get(PtrValue);
966   }
967   case Builtin::BI__assume:
968   case Builtin::BI__builtin_assume: {
969     if (E->getArg(0)->HasSideEffects(getContext()))
970       return RValue::get(nullptr);
971
972     Value *ArgValue = EmitScalarExpr(E->getArg(0));
973     Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
974     return RValue::get(Builder.CreateCall(FnAssume, ArgValue));
975   }
976   case Builtin::BI__builtin_bswap16:
977   case Builtin::BI__builtin_bswap32:
978   case Builtin::BI__builtin_bswap64: {
979     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
980   }
981   case Builtin::BI__builtin_bitreverse8:
982   case Builtin::BI__builtin_bitreverse16:
983   case Builtin::BI__builtin_bitreverse32:
984   case Builtin::BI__builtin_bitreverse64: {
985     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
986   }
987   case Builtin::BI__builtin_object_size: {
988     unsigned Type =
989         E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
990     auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
991
992     // We pass this builtin onto the optimizer so that it can figure out the
993     // object size in more complex cases.
994     return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
995                                              /*EmittedE=*/nullptr));
996   }
997   case Builtin::BI__builtin_prefetch: {
998     Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
999     // FIXME: Technically these constants should of type 'int', yes?
1000     RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
1001       llvm::ConstantInt::get(Int32Ty, 0);
1002     Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
1003       llvm::ConstantInt::get(Int32Ty, 3);
1004     Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
1005     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
1006     return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
1007   }
1008   case Builtin::BI__builtin_readcyclecounter: {
1009     Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
1010     return RValue::get(Builder.CreateCall(F));
1011   }
1012   case Builtin::BI__builtin___clear_cache: {
1013     Value *Begin = EmitScalarExpr(E->getArg(0));
1014     Value *End = EmitScalarExpr(E->getArg(1));
1015     Value *F = CGM.getIntrinsic(Intrinsic::clear_cache);
1016     return RValue::get(Builder.CreateCall(F, {Begin, End}));
1017   }
1018   case Builtin::BI__builtin_trap:
1019     return RValue::get(EmitTrapCall(Intrinsic::trap));
1020   case Builtin::BI__debugbreak:
1021     return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
1022   case Builtin::BI__builtin_unreachable: {
1023     if (SanOpts.has(SanitizerKind::Unreachable)) {
1024       SanitizerScope SanScope(this);
1025       EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()),
1026                                SanitizerKind::Unreachable),
1027                 SanitizerHandler::BuiltinUnreachable,
1028                 EmitCheckSourceLocation(E->getExprLoc()), None);
1029     } else
1030       Builder.CreateUnreachable();
1031
1032     // We do need to preserve an insertion point.
1033     EmitBlock(createBasicBlock("unreachable.cont"));
1034
1035     return RValue::get(nullptr);
1036   }
1037
1038   case Builtin::BI__builtin_powi:
1039   case Builtin::BI__builtin_powif:
1040   case Builtin::BI__builtin_powil: {
1041     Value *Base = EmitScalarExpr(E->getArg(0));
1042     Value *Exponent = EmitScalarExpr(E->getArg(1));
1043     llvm::Type *ArgType = Base->getType();
1044     Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
1045     return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
1046   }
1047
1048   case Builtin::BI__builtin_isgreater:
1049   case Builtin::BI__builtin_isgreaterequal:
1050   case Builtin::BI__builtin_isless:
1051   case Builtin::BI__builtin_islessequal:
1052   case Builtin::BI__builtin_islessgreater:
1053   case Builtin::BI__builtin_isunordered: {
1054     // Ordered comparisons: we know the arguments to these are matching scalar
1055     // floating point values.
1056     Value *LHS = EmitScalarExpr(E->getArg(0));
1057     Value *RHS = EmitScalarExpr(E->getArg(1));
1058
1059     switch (BuiltinID) {
1060     default: llvm_unreachable("Unknown ordered comparison");
1061     case Builtin::BI__builtin_isgreater:
1062       LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
1063       break;
1064     case Builtin::BI__builtin_isgreaterequal:
1065       LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
1066       break;
1067     case Builtin::BI__builtin_isless:
1068       LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
1069       break;
1070     case Builtin::BI__builtin_islessequal:
1071       LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
1072       break;
1073     case Builtin::BI__builtin_islessgreater:
1074       LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
1075       break;
1076     case Builtin::BI__builtin_isunordered:
1077       LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
1078       break;
1079     }
1080     // ZExt bool to int type.
1081     return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
1082   }
1083   case Builtin::BI__builtin_isnan: {
1084     Value *V = EmitScalarExpr(E->getArg(0));
1085     V = Builder.CreateFCmpUNO(V, V, "cmp");
1086     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
1087   }
1088
1089   case Builtin::BIfinite:
1090   case Builtin::BI__finite:
1091   case Builtin::BIfinitef:
1092   case Builtin::BI__finitef:
1093   case Builtin::BIfinitel:
1094   case Builtin::BI__finitel:
1095   case Builtin::BI__builtin_isinf:
1096   case Builtin::BI__builtin_isfinite: {
1097     // isinf(x)    --> fabs(x) == infinity
1098     // isfinite(x) --> fabs(x) != infinity
1099     // x != NaN via the ordered compare in either case.
1100     Value *V = EmitScalarExpr(E->getArg(0));
1101     Value *Fabs = EmitFAbs(*this, V);
1102     Constant *Infinity = ConstantFP::getInfinity(V->getType());
1103     CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf)
1104                                   ? CmpInst::FCMP_OEQ
1105                                   : CmpInst::FCMP_ONE;
1106     Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf");
1107     return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType())));
1108   }
1109
1110   case Builtin::BI__builtin_isinf_sign: {
1111     // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
1112     Value *Arg = EmitScalarExpr(E->getArg(0));
1113     Value *AbsArg = EmitFAbs(*this, Arg);
1114     Value *IsInf = Builder.CreateFCmpOEQ(
1115         AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
1116     Value *IsNeg = EmitSignBit(*this, Arg);
1117
1118     llvm::Type *IntTy = ConvertType(E->getType());
1119     Value *Zero = Constant::getNullValue(IntTy);
1120     Value *One = ConstantInt::get(IntTy, 1);
1121     Value *NegativeOne = ConstantInt::get(IntTy, -1);
1122     Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
1123     Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
1124     return RValue::get(Result);
1125   }
1126
1127   case Builtin::BI__builtin_isnormal: {
1128     // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
1129     Value *V = EmitScalarExpr(E->getArg(0));
1130     Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
1131
1132     Value *Abs = EmitFAbs(*this, V);
1133     Value *IsLessThanInf =
1134       Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
1135     APFloat Smallest = APFloat::getSmallestNormalized(
1136                    getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
1137     Value *IsNormal =
1138       Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
1139                             "isnormal");
1140     V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
1141     V = Builder.CreateAnd(V, IsNormal, "and");
1142     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
1143   }
1144
1145   case Builtin::BI__builtin_fpclassify: {
1146     Value *V = EmitScalarExpr(E->getArg(5));
1147     llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
1148
1149     // Create Result
1150     BasicBlock *Begin = Builder.GetInsertBlock();
1151     BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
1152     Builder.SetInsertPoint(End);
1153     PHINode *Result =
1154       Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
1155                         "fpclassify_result");
1156
1157     // if (V==0) return FP_ZERO
1158     Builder.SetInsertPoint(Begin);
1159     Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
1160                                           "iszero");
1161     Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
1162     BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
1163     Builder.CreateCondBr(IsZero, End, NotZero);
1164     Result->addIncoming(ZeroLiteral, Begin);
1165
1166     // if (V != V) return FP_NAN
1167     Builder.SetInsertPoint(NotZero);
1168     Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
1169     Value *NanLiteral = EmitScalarExpr(E->getArg(0));
1170     BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
1171     Builder.CreateCondBr(IsNan, End, NotNan);
1172     Result->addIncoming(NanLiteral, NotZero);
1173
1174     // if (fabs(V) == infinity) return FP_INFINITY
1175     Builder.SetInsertPoint(NotNan);
1176     Value *VAbs = EmitFAbs(*this, V);
1177     Value *IsInf =
1178       Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
1179                             "isinf");
1180     Value *InfLiteral = EmitScalarExpr(E->getArg(1));
1181     BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
1182     Builder.CreateCondBr(IsInf, End, NotInf);
1183     Result->addIncoming(InfLiteral, NotNan);
1184
1185     // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
1186     Builder.SetInsertPoint(NotInf);
1187     APFloat Smallest = APFloat::getSmallestNormalized(
1188         getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
1189     Value *IsNormal =
1190       Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
1191                             "isnormal");
1192     Value *NormalResult =
1193       Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
1194                            EmitScalarExpr(E->getArg(3)));
1195     Builder.CreateBr(End);
1196     Result->addIncoming(NormalResult, NotInf);
1197
1198     // return Result
1199     Builder.SetInsertPoint(End);
1200     return RValue::get(Result);
1201   }
1202
1203   case Builtin::BIalloca:
1204   case Builtin::BI_alloca:
1205   case Builtin::BI__builtin_alloca: {
1206     Value *Size = EmitScalarExpr(E->getArg(0));
1207     const TargetInfo &TI = getContext().getTargetInfo();
1208     // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
1209     unsigned SuitableAlignmentInBytes =
1210         CGM.getContext()
1211             .toCharUnitsFromBits(TI.getSuitableAlign())
1212             .getQuantity();
1213     AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
1214     AI->setAlignment(SuitableAlignmentInBytes);
1215     return RValue::get(AI);
1216   }
1217
1218   case Builtin::BI__builtin_alloca_with_align: {
1219     Value *Size = EmitScalarExpr(E->getArg(0));
1220     Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
1221     auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
1222     unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
1223     unsigned AlignmentInBytes =
1224         CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity();
1225     AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
1226     AI->setAlignment(AlignmentInBytes);
1227     return RValue::get(AI);
1228   }
1229
1230   case Builtin::BIbzero:
1231   case Builtin::BI__builtin_bzero: {
1232     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1233     Value *SizeVal = EmitScalarExpr(E->getArg(1));
1234     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1235                         E->getArg(0)->getExprLoc(), FD, 0);
1236     Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
1237     return RValue::get(Dest.getPointer());
1238   }
1239   case Builtin::BImemcpy:
1240   case Builtin::BI__builtin_memcpy: {
1241     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1242     Address Src = EmitPointerWithAlignment(E->getArg(1));
1243     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1244     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1245                         E->getArg(0)->getExprLoc(), FD, 0);
1246     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1247                         E->getArg(1)->getExprLoc(), FD, 1);
1248     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1249     return RValue::get(Dest.getPointer());
1250   }
1251
1252   case Builtin::BI__builtin_char_memchr:
1253     BuiltinID = Builtin::BI__builtin_memchr;
1254     break;
1255
1256   case Builtin::BI__builtin___memcpy_chk: {
1257     // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
1258     llvm::APSInt Size, DstSize;
1259     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1260         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1261       break;
1262     if (Size.ugt(DstSize))
1263       break;
1264     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1265     Address Src = EmitPointerWithAlignment(E->getArg(1));
1266     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1267     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1268     return RValue::get(Dest.getPointer());
1269   }
1270
1271   case Builtin::BI__builtin_objc_memmove_collectable: {
1272     Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
1273     Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
1274     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1275     CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
1276                                                   DestAddr, SrcAddr, SizeVal);
1277     return RValue::get(DestAddr.getPointer());
1278   }
1279
1280   case Builtin::BI__builtin___memmove_chk: {
1281     // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
1282     llvm::APSInt Size, DstSize;
1283     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1284         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1285       break;
1286     if (Size.ugt(DstSize))
1287       break;
1288     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1289     Address Src = EmitPointerWithAlignment(E->getArg(1));
1290     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1291     Builder.CreateMemMove(Dest, Src, SizeVal, false);
1292     return RValue::get(Dest.getPointer());
1293   }
1294
1295   case Builtin::BImemmove:
1296   case Builtin::BI__builtin_memmove: {
1297     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1298     Address Src = EmitPointerWithAlignment(E->getArg(1));
1299     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1300     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1301                         E->getArg(0)->getExprLoc(), FD, 0);
1302     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1303                         E->getArg(1)->getExprLoc(), FD, 1);
1304     Builder.CreateMemMove(Dest, Src, SizeVal, false);
1305     return RValue::get(Dest.getPointer());
1306   }
1307   case Builtin::BImemset:
1308   case Builtin::BI__builtin_memset: {
1309     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1310     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1311                                          Builder.getInt8Ty());
1312     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1313     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1314                         E->getArg(0)->getExprLoc(), FD, 0);
1315     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1316     return RValue::get(Dest.getPointer());
1317   }
1318   case Builtin::BI__builtin___memset_chk: {
1319     // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
1320     llvm::APSInt Size, DstSize;
1321     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1322         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1323       break;
1324     if (Size.ugt(DstSize))
1325       break;
1326     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1327     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1328                                          Builder.getInt8Ty());
1329     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1330     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1331     return RValue::get(Dest.getPointer());
1332   }
1333   case Builtin::BI__builtin_dwarf_cfa: {
1334     // The offset in bytes from the first argument to the CFA.
1335     //
1336     // Why on earth is this in the frontend?  Is there any reason at
1337     // all that the backend can't reasonably determine this while
1338     // lowering llvm.eh.dwarf.cfa()?
1339     //
1340     // TODO: If there's a satisfactory reason, add a target hook for
1341     // this instead of hard-coding 0, which is correct for most targets.
1342     int32_t Offset = 0;
1343
1344     Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
1345     return RValue::get(Builder.CreateCall(F,
1346                                       llvm::ConstantInt::get(Int32Ty, Offset)));
1347   }
1348   case Builtin::BI__builtin_return_address: {
1349     Value *Depth =
1350         CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
1351     Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1352     return RValue::get(Builder.CreateCall(F, Depth));
1353   }
1354   case Builtin::BI_ReturnAddress: {
1355     Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1356     return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
1357   }
1358   case Builtin::BI__builtin_frame_address: {
1359     Value *Depth =
1360         CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
1361     Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
1362     return RValue::get(Builder.CreateCall(F, Depth));
1363   }
1364   case Builtin::BI__builtin_extract_return_addr: {
1365     Value *Address = EmitScalarExpr(E->getArg(0));
1366     Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
1367     return RValue::get(Result);
1368   }
1369   case Builtin::BI__builtin_frob_return_addr: {
1370     Value *Address = EmitScalarExpr(E->getArg(0));
1371     Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
1372     return RValue::get(Result);
1373   }
1374   case Builtin::BI__builtin_dwarf_sp_column: {
1375     llvm::IntegerType *Ty
1376       = cast<llvm::IntegerType>(ConvertType(E->getType()));
1377     int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
1378     if (Column == -1) {
1379       CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
1380       return RValue::get(llvm::UndefValue::get(Ty));
1381     }
1382     return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
1383   }
1384   case Builtin::BI__builtin_init_dwarf_reg_size_table: {
1385     Value *Address = EmitScalarExpr(E->getArg(0));
1386     if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
1387       CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
1388     return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
1389   }
1390   case Builtin::BI__builtin_eh_return: {
1391     Value *Int = EmitScalarExpr(E->getArg(0));
1392     Value *Ptr = EmitScalarExpr(E->getArg(1));
1393
1394     llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
1395     assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
1396            "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
1397     Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
1398                                   ? Intrinsic::eh_return_i32
1399                                   : Intrinsic::eh_return_i64);
1400     Builder.CreateCall(F, {Int, Ptr});
1401     Builder.CreateUnreachable();
1402
1403     // We do need to preserve an insertion point.
1404     EmitBlock(createBasicBlock("builtin_eh_return.cont"));
1405
1406     return RValue::get(nullptr);
1407   }
1408   case Builtin::BI__builtin_unwind_init: {
1409     Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
1410     return RValue::get(Builder.CreateCall(F));
1411   }
1412   case Builtin::BI__builtin_extend_pointer: {
1413     // Extends a pointer to the size of an _Unwind_Word, which is
1414     // uint64_t on all platforms.  Generally this gets poked into a
1415     // register and eventually used as an address, so if the
1416     // addressing registers are wider than pointers and the platform
1417     // doesn't implicitly ignore high-order bits when doing
1418     // addressing, we need to make sure we zext / sext based on
1419     // the platform's expectations.
1420     //
1421     // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
1422
1423     // Cast the pointer to intptr_t.
1424     Value *Ptr = EmitScalarExpr(E->getArg(0));
1425     Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
1426
1427     // If that's 64 bits, we're done.
1428     if (IntPtrTy->getBitWidth() == 64)
1429       return RValue::get(Result);
1430
1431     // Otherwise, ask the codegen data what to do.
1432     if (getTargetHooks().extendPointerWithSExt())
1433       return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
1434     else
1435       return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
1436   }
1437   case Builtin::BI__builtin_setjmp: {
1438     // Buffer is a void**.
1439     Address Buf = EmitPointerWithAlignment(E->getArg(0));
1440
1441     // Store the frame pointer to the setjmp buffer.
1442     Value *FrameAddr =
1443       Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
1444                          ConstantInt::get(Int32Ty, 0));
1445     Builder.CreateStore(FrameAddr, Buf);
1446
1447     // Store the stack pointer to the setjmp buffer.
1448     Value *StackAddr =
1449         Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
1450     Address StackSaveSlot =
1451       Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize());
1452     Builder.CreateStore(StackAddr, StackSaveSlot);
1453
1454     // Call LLVM's EH setjmp, which is lightweight.
1455     Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
1456     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1457     return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
1458   }
1459   case Builtin::BI__builtin_longjmp: {
1460     Value *Buf = EmitScalarExpr(E->getArg(0));
1461     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1462
1463     // Call LLVM's EH longjmp, which is lightweight.
1464     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
1465
1466     // longjmp doesn't return; mark this as unreachable.
1467     Builder.CreateUnreachable();
1468
1469     // We do need to preserve an insertion point.
1470     EmitBlock(createBasicBlock("longjmp.cont"));
1471
1472     return RValue::get(nullptr);
1473   }
1474   case Builtin::BI__sync_fetch_and_add:
1475   case Builtin::BI__sync_fetch_and_sub:
1476   case Builtin::BI__sync_fetch_and_or:
1477   case Builtin::BI__sync_fetch_and_and:
1478   case Builtin::BI__sync_fetch_and_xor:
1479   case Builtin::BI__sync_fetch_and_nand:
1480   case Builtin::BI__sync_add_and_fetch:
1481   case Builtin::BI__sync_sub_and_fetch:
1482   case Builtin::BI__sync_and_and_fetch:
1483   case Builtin::BI__sync_or_and_fetch:
1484   case Builtin::BI__sync_xor_and_fetch:
1485   case Builtin::BI__sync_nand_and_fetch:
1486   case Builtin::BI__sync_val_compare_and_swap:
1487   case Builtin::BI__sync_bool_compare_and_swap:
1488   case Builtin::BI__sync_lock_test_and_set:
1489   case Builtin::BI__sync_lock_release:
1490   case Builtin::BI__sync_swap:
1491     llvm_unreachable("Shouldn't make it through sema");
1492   case Builtin::BI__sync_fetch_and_add_1:
1493   case Builtin::BI__sync_fetch_and_add_2:
1494   case Builtin::BI__sync_fetch_and_add_4:
1495   case Builtin::BI__sync_fetch_and_add_8:
1496   case Builtin::BI__sync_fetch_and_add_16:
1497     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
1498   case Builtin::BI__sync_fetch_and_sub_1:
1499   case Builtin::BI__sync_fetch_and_sub_2:
1500   case Builtin::BI__sync_fetch_and_sub_4:
1501   case Builtin::BI__sync_fetch_and_sub_8:
1502   case Builtin::BI__sync_fetch_and_sub_16:
1503     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
1504   case Builtin::BI__sync_fetch_and_or_1:
1505   case Builtin::BI__sync_fetch_and_or_2:
1506   case Builtin::BI__sync_fetch_and_or_4:
1507   case Builtin::BI__sync_fetch_and_or_8:
1508   case Builtin::BI__sync_fetch_and_or_16:
1509     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
1510   case Builtin::BI__sync_fetch_and_and_1:
1511   case Builtin::BI__sync_fetch_and_and_2:
1512   case Builtin::BI__sync_fetch_and_and_4:
1513   case Builtin::BI__sync_fetch_and_and_8:
1514   case Builtin::BI__sync_fetch_and_and_16:
1515     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
1516   case Builtin::BI__sync_fetch_and_xor_1:
1517   case Builtin::BI__sync_fetch_and_xor_2:
1518   case Builtin::BI__sync_fetch_and_xor_4:
1519   case Builtin::BI__sync_fetch_and_xor_8:
1520   case Builtin::BI__sync_fetch_and_xor_16:
1521     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
1522   case Builtin::BI__sync_fetch_and_nand_1:
1523   case Builtin::BI__sync_fetch_and_nand_2:
1524   case Builtin::BI__sync_fetch_and_nand_4:
1525   case Builtin::BI__sync_fetch_and_nand_8:
1526   case Builtin::BI__sync_fetch_and_nand_16:
1527     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
1528
1529   // Clang extensions: not overloaded yet.
1530   case Builtin::BI__sync_fetch_and_min:
1531     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
1532   case Builtin::BI__sync_fetch_and_max:
1533     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
1534   case Builtin::BI__sync_fetch_and_umin:
1535     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
1536   case Builtin::BI__sync_fetch_and_umax:
1537     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
1538
1539   case Builtin::BI__sync_add_and_fetch_1:
1540   case Builtin::BI__sync_add_and_fetch_2:
1541   case Builtin::BI__sync_add_and_fetch_4:
1542   case Builtin::BI__sync_add_and_fetch_8:
1543   case Builtin::BI__sync_add_and_fetch_16:
1544     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
1545                                 llvm::Instruction::Add);
1546   case Builtin::BI__sync_sub_and_fetch_1:
1547   case Builtin::BI__sync_sub_and_fetch_2:
1548   case Builtin::BI__sync_sub_and_fetch_4:
1549   case Builtin::BI__sync_sub_and_fetch_8:
1550   case Builtin::BI__sync_sub_and_fetch_16:
1551     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
1552                                 llvm::Instruction::Sub);
1553   case Builtin::BI__sync_and_and_fetch_1:
1554   case Builtin::BI__sync_and_and_fetch_2:
1555   case Builtin::BI__sync_and_and_fetch_4:
1556   case Builtin::BI__sync_and_and_fetch_8:
1557   case Builtin::BI__sync_and_and_fetch_16:
1558     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
1559                                 llvm::Instruction::And);
1560   case Builtin::BI__sync_or_and_fetch_1:
1561   case Builtin::BI__sync_or_and_fetch_2:
1562   case Builtin::BI__sync_or_and_fetch_4:
1563   case Builtin::BI__sync_or_and_fetch_8:
1564   case Builtin::BI__sync_or_and_fetch_16:
1565     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
1566                                 llvm::Instruction::Or);
1567   case Builtin::BI__sync_xor_and_fetch_1:
1568   case Builtin::BI__sync_xor_and_fetch_2:
1569   case Builtin::BI__sync_xor_and_fetch_4:
1570   case Builtin::BI__sync_xor_and_fetch_8:
1571   case Builtin::BI__sync_xor_and_fetch_16:
1572     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
1573                                 llvm::Instruction::Xor);
1574   case Builtin::BI__sync_nand_and_fetch_1:
1575   case Builtin::BI__sync_nand_and_fetch_2:
1576   case Builtin::BI__sync_nand_and_fetch_4:
1577   case Builtin::BI__sync_nand_and_fetch_8:
1578   case Builtin::BI__sync_nand_and_fetch_16:
1579     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
1580                                 llvm::Instruction::And, true);
1581
1582   case Builtin::BI__sync_val_compare_and_swap_1:
1583   case Builtin::BI__sync_val_compare_and_swap_2:
1584   case Builtin::BI__sync_val_compare_and_swap_4:
1585   case Builtin::BI__sync_val_compare_and_swap_8:
1586   case Builtin::BI__sync_val_compare_and_swap_16:
1587     return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
1588
1589   case Builtin::BI__sync_bool_compare_and_swap_1:
1590   case Builtin::BI__sync_bool_compare_and_swap_2:
1591   case Builtin::BI__sync_bool_compare_and_swap_4:
1592   case Builtin::BI__sync_bool_compare_and_swap_8:
1593   case Builtin::BI__sync_bool_compare_and_swap_16:
1594     return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
1595
1596   case Builtin::BI__sync_swap_1:
1597   case Builtin::BI__sync_swap_2:
1598   case Builtin::BI__sync_swap_4:
1599   case Builtin::BI__sync_swap_8:
1600   case Builtin::BI__sync_swap_16:
1601     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1602
1603   case Builtin::BI__sync_lock_test_and_set_1:
1604   case Builtin::BI__sync_lock_test_and_set_2:
1605   case Builtin::BI__sync_lock_test_and_set_4:
1606   case Builtin::BI__sync_lock_test_and_set_8:
1607   case Builtin::BI__sync_lock_test_and_set_16:
1608     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1609
1610   case Builtin::BI__sync_lock_release_1:
1611   case Builtin::BI__sync_lock_release_2:
1612   case Builtin::BI__sync_lock_release_4:
1613   case Builtin::BI__sync_lock_release_8:
1614   case Builtin::BI__sync_lock_release_16: {
1615     Value *Ptr = EmitScalarExpr(E->getArg(0));
1616     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
1617     CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
1618     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
1619                                              StoreSize.getQuantity() * 8);
1620     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
1621     llvm::StoreInst *Store =
1622       Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
1623                                  StoreSize);
1624     Store->setAtomic(llvm::AtomicOrdering::Release);
1625     return RValue::get(nullptr);
1626   }
1627
1628   case Builtin::BI__sync_synchronize: {
1629     // We assume this is supposed to correspond to a C++0x-style
1630     // sequentially-consistent fence (i.e. this is only usable for
1631     // synchonization, not device I/O or anything like that). This intrinsic
1632     // is really badly designed in the sense that in theory, there isn't
1633     // any way to safely use it... but in practice, it mostly works
1634     // to use it with non-atomic loads and stores to get acquire/release
1635     // semantics.
1636     Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
1637     return RValue::get(nullptr);
1638   }
1639
1640   case Builtin::BI__builtin_nontemporal_load:
1641     return RValue::get(EmitNontemporalLoad(*this, E));
1642   case Builtin::BI__builtin_nontemporal_store:
1643     return RValue::get(EmitNontemporalStore(*this, E));
1644   case Builtin::BI__c11_atomic_is_lock_free:
1645   case Builtin::BI__atomic_is_lock_free: {
1646     // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
1647     // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
1648     // _Atomic(T) is always properly-aligned.
1649     const char *LibCallName = "__atomic_is_lock_free";
1650     CallArgList Args;
1651     Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
1652              getContext().getSizeType());
1653     if (BuiltinID == Builtin::BI__atomic_is_lock_free)
1654       Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
1655                getContext().VoidPtrTy);
1656     else
1657       Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
1658                getContext().VoidPtrTy);
1659     const CGFunctionInfo &FuncInfo =
1660         CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
1661     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
1662     llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
1663     return EmitCall(FuncInfo, CGCallee::forDirect(Func),
1664                     ReturnValueSlot(), Args);
1665   }
1666
1667   case Builtin::BI__atomic_test_and_set: {
1668     // Look at the argument type to determine whether this is a volatile
1669     // operation. The parameter type is always volatile.
1670     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1671     bool Volatile =
1672         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1673
1674     Value *Ptr = EmitScalarExpr(E->getArg(0));
1675     unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
1676     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1677     Value *NewVal = Builder.getInt8(1);
1678     Value *Order = EmitScalarExpr(E->getArg(1));
1679     if (isa<llvm::ConstantInt>(Order)) {
1680       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1681       AtomicRMWInst *Result = nullptr;
1682       switch (ord) {
1683       case 0:  // memory_order_relaxed
1684       default: // invalid order
1685         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1686                                          llvm::AtomicOrdering::Monotonic);
1687         break;
1688       case 1: // memory_order_consume
1689       case 2: // memory_order_acquire
1690         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1691                                          llvm::AtomicOrdering::Acquire);
1692         break;
1693       case 3: // memory_order_release
1694         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1695                                          llvm::AtomicOrdering::Release);
1696         break;
1697       case 4: // memory_order_acq_rel
1698
1699         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1700                                          llvm::AtomicOrdering::AcquireRelease);
1701         break;
1702       case 5: // memory_order_seq_cst
1703         Result = Builder.CreateAtomicRMW(
1704             llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1705             llvm::AtomicOrdering::SequentiallyConsistent);
1706         break;
1707       }
1708       Result->setVolatile(Volatile);
1709       return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1710     }
1711
1712     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1713
1714     llvm::BasicBlock *BBs[5] = {
1715       createBasicBlock("monotonic", CurFn),
1716       createBasicBlock("acquire", CurFn),
1717       createBasicBlock("release", CurFn),
1718       createBasicBlock("acqrel", CurFn),
1719       createBasicBlock("seqcst", CurFn)
1720     };
1721     llvm::AtomicOrdering Orders[5] = {
1722         llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
1723         llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
1724         llvm::AtomicOrdering::SequentiallyConsistent};
1725
1726     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1727     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1728
1729     Builder.SetInsertPoint(ContBB);
1730     PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
1731
1732     for (unsigned i = 0; i < 5; ++i) {
1733       Builder.SetInsertPoint(BBs[i]);
1734       AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1735                                                    Ptr, NewVal, Orders[i]);
1736       RMW->setVolatile(Volatile);
1737       Result->addIncoming(RMW, BBs[i]);
1738       Builder.CreateBr(ContBB);
1739     }
1740
1741     SI->addCase(Builder.getInt32(0), BBs[0]);
1742     SI->addCase(Builder.getInt32(1), BBs[1]);
1743     SI->addCase(Builder.getInt32(2), BBs[1]);
1744     SI->addCase(Builder.getInt32(3), BBs[2]);
1745     SI->addCase(Builder.getInt32(4), BBs[3]);
1746     SI->addCase(Builder.getInt32(5), BBs[4]);
1747
1748     Builder.SetInsertPoint(ContBB);
1749     return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1750   }
1751
1752   case Builtin::BI__atomic_clear: {
1753     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1754     bool Volatile =
1755         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1756
1757     Address Ptr = EmitPointerWithAlignment(E->getArg(0));
1758     unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace();
1759     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1760     Value *NewVal = Builder.getInt8(0);
1761     Value *Order = EmitScalarExpr(E->getArg(1));
1762     if (isa<llvm::ConstantInt>(Order)) {
1763       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1764       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1765       switch (ord) {
1766       case 0:  // memory_order_relaxed
1767       default: // invalid order
1768         Store->setOrdering(llvm::AtomicOrdering::Monotonic);
1769         break;
1770       case 3:  // memory_order_release
1771         Store->setOrdering(llvm::AtomicOrdering::Release);
1772         break;
1773       case 5:  // memory_order_seq_cst
1774         Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
1775         break;
1776       }
1777       return RValue::get(nullptr);
1778     }
1779
1780     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1781
1782     llvm::BasicBlock *BBs[3] = {
1783       createBasicBlock("monotonic", CurFn),
1784       createBasicBlock("release", CurFn),
1785       createBasicBlock("seqcst", CurFn)
1786     };
1787     llvm::AtomicOrdering Orders[3] = {
1788         llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
1789         llvm::AtomicOrdering::SequentiallyConsistent};
1790
1791     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1792     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1793
1794     for (unsigned i = 0; i < 3; ++i) {
1795       Builder.SetInsertPoint(BBs[i]);
1796       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1797       Store->setOrdering(Orders[i]);
1798       Builder.CreateBr(ContBB);
1799     }
1800
1801     SI->addCase(Builder.getInt32(0), BBs[0]);
1802     SI->addCase(Builder.getInt32(3), BBs[1]);
1803     SI->addCase(Builder.getInt32(5), BBs[2]);
1804
1805     Builder.SetInsertPoint(ContBB);
1806     return RValue::get(nullptr);
1807   }
1808
1809   case Builtin::BI__atomic_thread_fence:
1810   case Builtin::BI__atomic_signal_fence:
1811   case Builtin::BI__c11_atomic_thread_fence:
1812   case Builtin::BI__c11_atomic_signal_fence: {
1813     llvm::SynchronizationScope Scope;
1814     if (BuiltinID == Builtin::BI__atomic_signal_fence ||
1815         BuiltinID == Builtin::BI__c11_atomic_signal_fence)
1816       Scope = llvm::SingleThread;
1817     else
1818       Scope = llvm::CrossThread;
1819     Value *Order = EmitScalarExpr(E->getArg(0));
1820     if (isa<llvm::ConstantInt>(Order)) {
1821       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1822       switch (ord) {
1823       case 0:  // memory_order_relaxed
1824       default: // invalid order
1825         break;
1826       case 1:  // memory_order_consume
1827       case 2:  // memory_order_acquire
1828         Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope);
1829         break;
1830       case 3:  // memory_order_release
1831         Builder.CreateFence(llvm::AtomicOrdering::Release, Scope);
1832         break;
1833       case 4:  // memory_order_acq_rel
1834         Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope);
1835         break;
1836       case 5:  // memory_order_seq_cst
1837         Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
1838                             Scope);
1839         break;
1840       }
1841       return RValue::get(nullptr);
1842     }
1843
1844     llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
1845     AcquireBB = createBasicBlock("acquire", CurFn);
1846     ReleaseBB = createBasicBlock("release", CurFn);
1847     AcqRelBB = createBasicBlock("acqrel", CurFn);
1848     SeqCstBB = createBasicBlock("seqcst", CurFn);
1849     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1850
1851     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1852     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
1853
1854     Builder.SetInsertPoint(AcquireBB);
1855     Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope);
1856     Builder.CreateBr(ContBB);
1857     SI->addCase(Builder.getInt32(1), AcquireBB);
1858     SI->addCase(Builder.getInt32(2), AcquireBB);
1859
1860     Builder.SetInsertPoint(ReleaseBB);
1861     Builder.CreateFence(llvm::AtomicOrdering::Release, Scope);
1862     Builder.CreateBr(ContBB);
1863     SI->addCase(Builder.getInt32(3), ReleaseBB);
1864
1865     Builder.SetInsertPoint(AcqRelBB);
1866     Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope);
1867     Builder.CreateBr(ContBB);
1868     SI->addCase(Builder.getInt32(4), AcqRelBB);
1869
1870     Builder.SetInsertPoint(SeqCstBB);
1871     Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, Scope);
1872     Builder.CreateBr(ContBB);
1873     SI->addCase(Builder.getInt32(5), SeqCstBB);
1874
1875     Builder.SetInsertPoint(ContBB);
1876     return RValue::get(nullptr);
1877   }
1878
1879     // Library functions with special handling.
1880   case Builtin::BIsqrt:
1881   case Builtin::BIsqrtf:
1882   case Builtin::BIsqrtl: {
1883     // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only
1884     // in finite- or unsafe-math mode (the intrinsic has different semantics
1885     // for handling negative numbers compared to the library function, so
1886     // -fmath-errno=0 is not enough).
1887     if (!FD->hasAttr<ConstAttr>())
1888       break;
1889     if (!(CGM.getCodeGenOpts().UnsafeFPMath ||
1890           CGM.getCodeGenOpts().NoNaNsFPMath))
1891       break;
1892     Value *Arg0 = EmitScalarExpr(E->getArg(0));
1893     llvm::Type *ArgType = Arg0->getType();
1894     Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType);
1895     return RValue::get(Builder.CreateCall(F, Arg0));
1896   }
1897
1898   case Builtin::BI__builtin_pow:
1899   case Builtin::BI__builtin_powf:
1900   case Builtin::BI__builtin_powl:
1901   case Builtin::BIpow:
1902   case Builtin::BIpowf:
1903   case Builtin::BIpowl: {
1904     // Transform a call to pow* into a @llvm.pow.* intrinsic call.
1905     if (!FD->hasAttr<ConstAttr>())
1906       break;
1907     Value *Base = EmitScalarExpr(E->getArg(0));
1908     Value *Exponent = EmitScalarExpr(E->getArg(1));
1909     llvm::Type *ArgType = Base->getType();
1910     Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
1911     return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
1912   }
1913
1914   case Builtin::BIfma:
1915   case Builtin::BIfmaf:
1916   case Builtin::BIfmal:
1917   case Builtin::BI__builtin_fma:
1918   case Builtin::BI__builtin_fmaf:
1919   case Builtin::BI__builtin_fmal: {
1920     // Rewrite fma to intrinsic.
1921     Value *FirstArg = EmitScalarExpr(E->getArg(0));
1922     llvm::Type *ArgType = FirstArg->getType();
1923     Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType);
1924     return RValue::get(
1925         Builder.CreateCall(F, {FirstArg, EmitScalarExpr(E->getArg(1)),
1926                                EmitScalarExpr(E->getArg(2))}));
1927   }
1928
1929   case Builtin::BI__builtin_signbit:
1930   case Builtin::BI__builtin_signbitf:
1931   case Builtin::BI__builtin_signbitl: {
1932     return RValue::get(
1933         Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
1934                            ConvertType(E->getType())));
1935   }
1936   case Builtin::BI__builtin_annotation: {
1937     llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
1938     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
1939                                       AnnVal->getType());
1940
1941     // Get the annotation string, go through casts. Sema requires this to be a
1942     // non-wide string literal, potentially casted, so the cast<> is safe.
1943     const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
1944     StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
1945     return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
1946   }
1947   case Builtin::BI__builtin_addcb:
1948   case Builtin::BI__builtin_addcs:
1949   case Builtin::BI__builtin_addc:
1950   case Builtin::BI__builtin_addcl:
1951   case Builtin::BI__builtin_addcll:
1952   case Builtin::BI__builtin_subcb:
1953   case Builtin::BI__builtin_subcs:
1954   case Builtin::BI__builtin_subc:
1955   case Builtin::BI__builtin_subcl:
1956   case Builtin::BI__builtin_subcll: {
1957
1958     // We translate all of these builtins from expressions of the form:
1959     //   int x = ..., y = ..., carryin = ..., carryout, result;
1960     //   result = __builtin_addc(x, y, carryin, &carryout);
1961     //
1962     // to LLVM IR of the form:
1963     //
1964     //   %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
1965     //   %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
1966     //   %carry1 = extractvalue {i32, i1} %tmp1, 1
1967     //   %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
1968     //                                                       i32 %carryin)
1969     //   %result = extractvalue {i32, i1} %tmp2, 0
1970     //   %carry2 = extractvalue {i32, i1} %tmp2, 1
1971     //   %tmp3 = or i1 %carry1, %carry2
1972     //   %tmp4 = zext i1 %tmp3 to i32
1973     //   store i32 %tmp4, i32* %carryout
1974
1975     // Scalarize our inputs.
1976     llvm::Value *X = EmitScalarExpr(E->getArg(0));
1977     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
1978     llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
1979     Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
1980
1981     // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
1982     llvm::Intrinsic::ID IntrinsicId;
1983     switch (BuiltinID) {
1984     default: llvm_unreachable("Unknown multiprecision builtin id.");
1985     case Builtin::BI__builtin_addcb:
1986     case Builtin::BI__builtin_addcs:
1987     case Builtin::BI__builtin_addc:
1988     case Builtin::BI__builtin_addcl:
1989     case Builtin::BI__builtin_addcll:
1990       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
1991       break;
1992     case Builtin::BI__builtin_subcb:
1993     case Builtin::BI__builtin_subcs:
1994     case Builtin::BI__builtin_subc:
1995     case Builtin::BI__builtin_subcl:
1996     case Builtin::BI__builtin_subcll:
1997       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
1998       break;
1999     }
2000
2001     // Construct our resulting LLVM IR expression.
2002     llvm::Value *Carry1;
2003     llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
2004                                               X, Y, Carry1);
2005     llvm::Value *Carry2;
2006     llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
2007                                               Sum1, Carryin, Carry2);
2008     llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
2009                                                X->getType());
2010     Builder.CreateStore(CarryOut, CarryOutPtr);
2011     return RValue::get(Sum2);
2012   }
2013
2014   case Builtin::BI__builtin_add_overflow:
2015   case Builtin::BI__builtin_sub_overflow:
2016   case Builtin::BI__builtin_mul_overflow: {
2017     const clang::Expr *LeftArg = E->getArg(0);
2018     const clang::Expr *RightArg = E->getArg(1);
2019     const clang::Expr *ResultArg = E->getArg(2);
2020
2021     clang::QualType ResultQTy =
2022         ResultArg->getType()->castAs<PointerType>()->getPointeeType();
2023
2024     WidthAndSignedness LeftInfo =
2025         getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
2026     WidthAndSignedness RightInfo =
2027         getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
2028     WidthAndSignedness ResultInfo =
2029         getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
2030     WidthAndSignedness EncompassingInfo =
2031         EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
2032
2033     llvm::Type *EncompassingLLVMTy =
2034         llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
2035
2036     llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
2037
2038     llvm::Intrinsic::ID IntrinsicId;
2039     switch (BuiltinID) {
2040     default:
2041       llvm_unreachable("Unknown overflow builtin id.");
2042     case Builtin::BI__builtin_add_overflow:
2043       IntrinsicId = EncompassingInfo.Signed
2044                         ? llvm::Intrinsic::sadd_with_overflow
2045                         : llvm::Intrinsic::uadd_with_overflow;
2046       break;
2047     case Builtin::BI__builtin_sub_overflow:
2048       IntrinsicId = EncompassingInfo.Signed
2049                         ? llvm::Intrinsic::ssub_with_overflow
2050                         : llvm::Intrinsic::usub_with_overflow;
2051       break;
2052     case Builtin::BI__builtin_mul_overflow:
2053       IntrinsicId = EncompassingInfo.Signed
2054                         ? llvm::Intrinsic::smul_with_overflow
2055                         : llvm::Intrinsic::umul_with_overflow;
2056       break;
2057     }
2058
2059     llvm::Value *Left = EmitScalarExpr(LeftArg);
2060     llvm::Value *Right = EmitScalarExpr(RightArg);
2061     Address ResultPtr = EmitPointerWithAlignment(ResultArg);
2062
2063     // Extend each operand to the encompassing type.
2064     Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
2065     Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
2066
2067     // Perform the operation on the extended values.
2068     llvm::Value *Overflow, *Result;
2069     Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
2070
2071     if (EncompassingInfo.Width > ResultInfo.Width) {
2072       // The encompassing type is wider than the result type, so we need to
2073       // truncate it.
2074       llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
2075
2076       // To see if the truncation caused an overflow, we will extend
2077       // the result and then compare it to the original result.
2078       llvm::Value *ResultTruncExt = Builder.CreateIntCast(
2079           ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
2080       llvm::Value *TruncationOverflow =
2081           Builder.CreateICmpNE(Result, ResultTruncExt);
2082
2083       Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
2084       Result = ResultTrunc;
2085     }
2086
2087     // Finally, store the result using the pointer.
2088     bool isVolatile =
2089       ResultArg->getType()->getPointeeType().isVolatileQualified();
2090     Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
2091
2092     return RValue::get(Overflow);
2093   }
2094
2095   case Builtin::BI__builtin_uadd_overflow:
2096   case Builtin::BI__builtin_uaddl_overflow:
2097   case Builtin::BI__builtin_uaddll_overflow:
2098   case Builtin::BI__builtin_usub_overflow:
2099   case Builtin::BI__builtin_usubl_overflow:
2100   case Builtin::BI__builtin_usubll_overflow:
2101   case Builtin::BI__builtin_umul_overflow:
2102   case Builtin::BI__builtin_umull_overflow:
2103   case Builtin::BI__builtin_umulll_overflow:
2104   case Builtin::BI__builtin_sadd_overflow:
2105   case Builtin::BI__builtin_saddl_overflow:
2106   case Builtin::BI__builtin_saddll_overflow:
2107   case Builtin::BI__builtin_ssub_overflow:
2108   case Builtin::BI__builtin_ssubl_overflow:
2109   case Builtin::BI__builtin_ssubll_overflow:
2110   case Builtin::BI__builtin_smul_overflow:
2111   case Builtin::BI__builtin_smull_overflow:
2112   case Builtin::BI__builtin_smulll_overflow: {
2113
2114     // We translate all of these builtins directly to the relevant llvm IR node.
2115
2116     // Scalarize our inputs.
2117     llvm::Value *X = EmitScalarExpr(E->getArg(0));
2118     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
2119     Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
2120
2121     // Decide which of the overflow intrinsics we are lowering to:
2122     llvm::Intrinsic::ID IntrinsicId;
2123     switch (BuiltinID) {
2124     default: llvm_unreachable("Unknown overflow builtin id.");
2125     case Builtin::BI__builtin_uadd_overflow:
2126     case Builtin::BI__builtin_uaddl_overflow:
2127     case Builtin::BI__builtin_uaddll_overflow:
2128       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
2129       break;
2130     case Builtin::BI__builtin_usub_overflow:
2131     case Builtin::BI__builtin_usubl_overflow:
2132     case Builtin::BI__builtin_usubll_overflow:
2133       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
2134       break;
2135     case Builtin::BI__builtin_umul_overflow:
2136     case Builtin::BI__builtin_umull_overflow:
2137     case Builtin::BI__builtin_umulll_overflow:
2138       IntrinsicId = llvm::Intrinsic::umul_with_overflow;
2139       break;
2140     case Builtin::BI__builtin_sadd_overflow:
2141     case Builtin::BI__builtin_saddl_overflow:
2142     case Builtin::BI__builtin_saddll_overflow:
2143       IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
2144       break;
2145     case Builtin::BI__builtin_ssub_overflow:
2146     case Builtin::BI__builtin_ssubl_overflow:
2147     case Builtin::BI__builtin_ssubll_overflow:
2148       IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
2149       break;
2150     case Builtin::BI__builtin_smul_overflow:
2151     case Builtin::BI__builtin_smull_overflow:
2152     case Builtin::BI__builtin_smulll_overflow:
2153       IntrinsicId = llvm::Intrinsic::smul_with_overflow;
2154       break;
2155     }
2156
2157
2158     llvm::Value *Carry;
2159     llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
2160     Builder.CreateStore(Sum, SumOutPtr);
2161
2162     return RValue::get(Carry);
2163   }
2164   case Builtin::BI__builtin_addressof:
2165     return RValue::get(EmitLValue(E->getArg(0)).getPointer());
2166   case Builtin::BI__builtin_operator_new:
2167     return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
2168                                     E->getArg(0), false);
2169   case Builtin::BI__builtin_operator_delete:
2170     return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
2171                                     E->getArg(0), true);
2172   case Builtin::BI__noop:
2173     // __noop always evaluates to an integer literal zero.
2174     return RValue::get(ConstantInt::get(IntTy, 0));
2175   case Builtin::BI__builtin_call_with_static_chain: {
2176     const CallExpr *Call = cast<CallExpr>(E->getArg(0));
2177     const Expr *Chain = E->getArg(1);
2178     return EmitCall(Call->getCallee()->getType(),
2179                     EmitCallee(Call->getCallee()), Call, ReturnValue,
2180                     EmitScalarExpr(Chain));
2181   }
2182   case Builtin::BI_InterlockedExchange8:
2183   case Builtin::BI_InterlockedExchange16:
2184   case Builtin::BI_InterlockedExchange:
2185   case Builtin::BI_InterlockedExchangePointer:
2186     return RValue::get(
2187         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
2188   case Builtin::BI_InterlockedCompareExchangePointer: {
2189     llvm::Type *RTy;
2190     llvm::IntegerType *IntType =
2191       IntegerType::get(getLLVMContext(),
2192                        getContext().getTypeSize(E->getType()));
2193     llvm::Type *IntPtrType = IntType->getPointerTo();
2194
2195     llvm::Value *Destination =
2196       Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType);
2197
2198     llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
2199     RTy = Exchange->getType();
2200     Exchange = Builder.CreatePtrToInt(Exchange, IntType);
2201
2202     llvm::Value *Comparand =
2203       Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
2204
2205     auto Result =
2206         Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
2207                                     AtomicOrdering::SequentiallyConsistent,
2208                                     AtomicOrdering::SequentiallyConsistent);
2209     Result->setVolatile(true);
2210
2211     return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
2212                                                                          0),
2213                                               RTy));
2214   }
2215   case Builtin::BI_InterlockedCompareExchange8:
2216   case Builtin::BI_InterlockedCompareExchange16:
2217   case Builtin::BI_InterlockedCompareExchange:
2218   case Builtin::BI_InterlockedCompareExchange64: {
2219     AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg(
2220         EmitScalarExpr(E->getArg(0)),
2221         EmitScalarExpr(E->getArg(2)),
2222         EmitScalarExpr(E->getArg(1)),
2223         AtomicOrdering::SequentiallyConsistent,
2224         AtomicOrdering::SequentiallyConsistent);
2225       CXI->setVolatile(true);
2226       return RValue::get(Builder.CreateExtractValue(CXI, 0));
2227   }
2228   case Builtin::BI_InterlockedIncrement16:
2229   case Builtin::BI_InterlockedIncrement:
2230     return RValue::get(
2231         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
2232   case Builtin::BI_InterlockedDecrement16:
2233   case Builtin::BI_InterlockedDecrement:
2234     return RValue::get(
2235         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
2236   case Builtin::BI_InterlockedAnd8:
2237   case Builtin::BI_InterlockedAnd16:
2238   case Builtin::BI_InterlockedAnd:
2239     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
2240   case Builtin::BI_InterlockedExchangeAdd8:
2241   case Builtin::BI_InterlockedExchangeAdd16:
2242   case Builtin::BI_InterlockedExchangeAdd:
2243     return RValue::get(
2244         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
2245   case Builtin::BI_InterlockedExchangeSub8:
2246   case Builtin::BI_InterlockedExchangeSub16:
2247   case Builtin::BI_InterlockedExchangeSub:
2248     return RValue::get(
2249         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
2250   case Builtin::BI_InterlockedOr8:
2251   case Builtin::BI_InterlockedOr16:
2252   case Builtin::BI_InterlockedOr:
2253     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
2254   case Builtin::BI_InterlockedXor8:
2255   case Builtin::BI_InterlockedXor16:
2256   case Builtin::BI_InterlockedXor:
2257     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
2258   case Builtin::BI_interlockedbittestandset:
2259     return RValue::get(
2260         EmitMSVCBuiltinExpr(MSVCIntrin::_interlockedbittestandset, E));
2261
2262   case Builtin::BI__exception_code:
2263   case Builtin::BI_exception_code:
2264     return RValue::get(EmitSEHExceptionCode());
2265   case Builtin::BI__exception_info:
2266   case Builtin::BI_exception_info:
2267     return RValue::get(EmitSEHExceptionInfo());
2268   case Builtin::BI__abnormal_termination:
2269   case Builtin::BI_abnormal_termination:
2270     return RValue::get(EmitSEHAbnormalTermination());
2271   case Builtin::BI_setjmpex: {
2272     if (getTarget().getTriple().isOSMSVCRT()) {
2273       llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2274       llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
2275           getLLVMContext(), llvm::AttributeList::FunctionIndex,
2276           llvm::Attribute::ReturnsTwice);
2277       llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction(
2278           llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2279           "_setjmpex", ReturnsTwiceAttr, /*Local=*/true);
2280       llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2281           EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2282       llvm::Value *FrameAddr =
2283           Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2284                              ConstantInt::get(Int32Ty, 0));
2285       llvm::Value *Args[] = {Buf, FrameAddr};
2286       llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args);
2287       CS.setAttributes(ReturnsTwiceAttr);
2288       return RValue::get(CS.getInstruction());
2289     }
2290     break;
2291   }
2292   case Builtin::BI_setjmp: {
2293     if (getTarget().getTriple().isOSMSVCRT()) {
2294       llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
2295           getLLVMContext(), llvm::AttributeList::FunctionIndex,
2296           llvm::Attribute::ReturnsTwice);
2297       llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2298           EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2299       llvm::CallSite CS;
2300       if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
2301         llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy};
2302         llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction(
2303             llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true),
2304             "_setjmp3", ReturnsTwiceAttr, /*Local=*/true);
2305         llvm::Value *Count = ConstantInt::get(IntTy, 0);
2306         llvm::Value *Args[] = {Buf, Count};
2307         CS = EmitRuntimeCallOrInvoke(SetJmp3, Args);
2308       } else {
2309         llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2310         llvm::Constant *SetJmp = CGM.CreateRuntimeFunction(
2311             llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2312             "_setjmp", ReturnsTwiceAttr, /*Local=*/true);
2313         llvm::Value *FrameAddr =
2314             Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2315                                ConstantInt::get(Int32Ty, 0));
2316         llvm::Value *Args[] = {Buf, FrameAddr};
2317         CS = EmitRuntimeCallOrInvoke(SetJmp, Args);
2318       }
2319       CS.setAttributes(ReturnsTwiceAttr);
2320       return RValue::get(CS.getInstruction());
2321     }
2322     break;
2323   }
2324
2325   case Builtin::BI__GetExceptionInfo: {
2326     if (llvm::GlobalVariable *GV =
2327             CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
2328       return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
2329     break;
2330   }
2331
2332   case Builtin::BI__fastfail:
2333     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
2334
2335   case Builtin::BI__builtin_coro_size: {
2336     auto & Context = getContext();
2337     auto SizeTy = Context.getSizeType();
2338     auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy));
2339     Value *F = CGM.getIntrinsic(Intrinsic::coro_size, T);
2340     return RValue::get(Builder.CreateCall(F));
2341   }
2342
2343   case Builtin::BI__builtin_coro_id:
2344     return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
2345   case Builtin::BI__builtin_coro_promise:
2346     return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
2347   case Builtin::BI__builtin_coro_resume:
2348     return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
2349   case Builtin::BI__builtin_coro_frame:
2350     return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
2351   case Builtin::BI__builtin_coro_free:
2352     return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
2353   case Builtin::BI__builtin_coro_destroy:
2354     return EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
2355   case Builtin::BI__builtin_coro_done:
2356     return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
2357   case Builtin::BI__builtin_coro_alloc:
2358     return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
2359   case Builtin::BI__builtin_coro_begin:
2360     return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
2361   case Builtin::BI__builtin_coro_end:
2362     return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
2363   case Builtin::BI__builtin_coro_suspend:
2364     return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
2365   case Builtin::BI__builtin_coro_param:
2366     return EmitCoroutineIntrinsic(E, Intrinsic::coro_param);
2367
2368   // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
2369   case Builtin::BIread_pipe:
2370   case Builtin::BIwrite_pipe: {
2371     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2372           *Arg1 = EmitScalarExpr(E->getArg(1));
2373     CGOpenCLRuntime OpenCLRT(CGM);
2374     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2375     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2376
2377     // Type of the generic packet parameter.
2378     unsigned GenericAS =
2379         getContext().getTargetAddressSpace(LangAS::opencl_generic);
2380     llvm::Type *I8PTy = llvm::PointerType::get(
2381         llvm::Type::getInt8Ty(getLLVMContext()), GenericAS);
2382
2383     // Testing which overloaded version we should generate the call for.
2384     if (2U == E->getNumArgs()) {
2385       const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
2386                                                              : "__write_pipe_2";
2387       // Creating a generic function type to be able to call with any builtin or
2388       // user defined type.
2389       llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
2390       llvm::FunctionType *FTy = llvm::FunctionType::get(
2391           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2392       Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
2393       return RValue::get(
2394           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2395                              {Arg0, BCast, PacketSize, PacketAlign}));
2396     } else {
2397       assert(4 == E->getNumArgs() &&
2398              "Illegal number of parameters to pipe function");
2399       const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
2400                                                              : "__write_pipe_4";
2401
2402       llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
2403                               Int32Ty, Int32Ty};
2404       Value *Arg2 = EmitScalarExpr(E->getArg(2)),
2405             *Arg3 = EmitScalarExpr(E->getArg(3));
2406       llvm::FunctionType *FTy = llvm::FunctionType::get(
2407           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2408       Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
2409       // We know the third argument is an integer type, but we may need to cast
2410       // it to i32.
2411       if (Arg2->getType() != Int32Ty)
2412         Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
2413       return RValue::get(Builder.CreateCall(
2414           CGM.CreateRuntimeFunction(FTy, Name),
2415           {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
2416     }
2417   }
2418   // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
2419   // functions
2420   case Builtin::BIreserve_read_pipe:
2421   case Builtin::BIreserve_write_pipe:
2422   case Builtin::BIwork_group_reserve_read_pipe:
2423   case Builtin::BIwork_group_reserve_write_pipe:
2424   case Builtin::BIsub_group_reserve_read_pipe:
2425   case Builtin::BIsub_group_reserve_write_pipe: {
2426     // Composing the mangled name for the function.
2427     const char *Name;
2428     if (BuiltinID == Builtin::BIreserve_read_pipe)
2429       Name = "__reserve_read_pipe";
2430     else if (BuiltinID == Builtin::BIreserve_write_pipe)
2431       Name = "__reserve_write_pipe";
2432     else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
2433       Name = "__work_group_reserve_read_pipe";
2434     else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
2435       Name = "__work_group_reserve_write_pipe";
2436     else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
2437       Name = "__sub_group_reserve_read_pipe";
2438     else
2439       Name = "__sub_group_reserve_write_pipe";
2440
2441     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2442           *Arg1 = EmitScalarExpr(E->getArg(1));
2443     llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
2444     CGOpenCLRuntime OpenCLRT(CGM);
2445     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2446     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2447
2448     // Building the generic function prototype.
2449     llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
2450     llvm::FunctionType *FTy = llvm::FunctionType::get(
2451         ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2452     // We know the second argument is an integer type, but we may need to cast
2453     // it to i32.
2454     if (Arg1->getType() != Int32Ty)
2455       Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
2456     return RValue::get(
2457         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2458                            {Arg0, Arg1, PacketSize, PacketAlign}));
2459   }
2460   // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
2461   // functions
2462   case Builtin::BIcommit_read_pipe:
2463   case Builtin::BIcommit_write_pipe:
2464   case Builtin::BIwork_group_commit_read_pipe:
2465   case Builtin::BIwork_group_commit_write_pipe:
2466   case Builtin::BIsub_group_commit_read_pipe:
2467   case Builtin::BIsub_group_commit_write_pipe: {
2468     const char *Name;
2469     if (BuiltinID == Builtin::BIcommit_read_pipe)
2470       Name = "__commit_read_pipe";
2471     else if (BuiltinID == Builtin::BIcommit_write_pipe)
2472       Name = "__commit_write_pipe";
2473     else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
2474       Name = "__work_group_commit_read_pipe";
2475     else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
2476       Name = "__work_group_commit_write_pipe";
2477     else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
2478       Name = "__sub_group_commit_read_pipe";
2479     else
2480       Name = "__sub_group_commit_write_pipe";
2481
2482     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2483           *Arg1 = EmitScalarExpr(E->getArg(1));
2484     CGOpenCLRuntime OpenCLRT(CGM);
2485     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2486     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2487
2488     // Building the generic function prototype.
2489     llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
2490     llvm::FunctionType *FTy =
2491         llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
2492                                 llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2493
2494     return RValue::get(
2495         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2496                            {Arg0, Arg1, PacketSize, PacketAlign}));
2497   }
2498   // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
2499   case Builtin::BIget_pipe_num_packets:
2500   case Builtin::BIget_pipe_max_packets: {
2501     const char *Name;
2502     if (BuiltinID == Builtin::BIget_pipe_num_packets)
2503       Name = "__get_pipe_num_packets";
2504     else
2505       Name = "__get_pipe_max_packets";
2506
2507     // Building the generic function prototype.
2508     Value *Arg0 = EmitScalarExpr(E->getArg(0));
2509     CGOpenCLRuntime OpenCLRT(CGM);
2510     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2511     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2512     llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
2513     llvm::FunctionType *FTy = llvm::FunctionType::get(
2514         Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2515
2516     return RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2517                                           {Arg0, PacketSize, PacketAlign}));
2518   }
2519
2520   // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
2521   case Builtin::BIto_global:
2522   case Builtin::BIto_local:
2523   case Builtin::BIto_private: {
2524     auto Arg0 = EmitScalarExpr(E->getArg(0));
2525     auto NewArgT = llvm::PointerType::get(Int8Ty,
2526       CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
2527     auto NewRetT = llvm::PointerType::get(Int8Ty,
2528       CGM.getContext().getTargetAddressSpace(
2529         E->getType()->getPointeeType().getAddressSpace()));
2530     auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
2531     llvm::Value *NewArg;
2532     if (Arg0->getType()->getPointerAddressSpace() !=
2533         NewArgT->getPointerAddressSpace())
2534       NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
2535     else
2536       NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
2537     auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
2538     auto NewCall =
2539         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
2540     return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
2541       ConvertType(E->getType())));
2542   }
2543
2544   // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
2545   // It contains four different overload formats specified in Table 6.13.17.1.
2546   case Builtin::BIenqueue_kernel: {
2547     StringRef Name; // Generated function call name
2548     unsigned NumArgs = E->getNumArgs();
2549
2550     llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
2551     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2552         getContext().getTargetAddressSpace(LangAS::opencl_generic));
2553
2554     llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
2555     llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
2556     LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
2557     llvm::Value *Range = NDRangeL.getAddress().getPointer();
2558     llvm::Type *RangeTy = NDRangeL.getAddress().getType();
2559
2560     if (NumArgs == 4) {
2561       // The most basic form of the call with parameters:
2562       // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
2563       Name = "__enqueue_kernel_basic";
2564       llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy};
2565       llvm::FunctionType *FTy = llvm::FunctionType::get(
2566           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false);
2567
2568       llvm::Value *Block = Builder.CreatePointerCast(
2569           EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy);
2570
2571       AttrBuilder B;
2572       B.addAttribute(Attribute::ByVal);
2573       llvm::AttributeList ByValAttrSet =
2574           llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
2575
2576       auto RTCall =
2577           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
2578                              {Queue, Flags, Range, Block});
2579       RTCall->setAttributes(ByValAttrSet);
2580       return RValue::get(RTCall);
2581     }
2582     assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
2583
2584     // Could have events and/or vaargs.
2585     if (E->getArg(3)->getType()->isBlockPointerType()) {
2586       // No events passed, but has variadic arguments.
2587       Name = "__enqueue_kernel_vaargs";
2588       llvm::Value *Block = Builder.CreatePointerCast(
2589           EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy);
2590       // Create a vector of the arguments, as well as a constant value to
2591       // express to the runtime the number of variadic arguments.
2592       std::vector<llvm::Value *> Args = {Queue, Flags, Range, Block,
2593                                          ConstantInt::get(IntTy, NumArgs - 4)};
2594       std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy,
2595                                           GenericVoidPtrTy, IntTy};
2596
2597       // Each of the following arguments specifies the size of the corresponding
2598       // argument passed to the enqueued block.
2599       for (unsigned I = 4/*Position of the first size arg*/; I < NumArgs; ++I)
2600         Args.push_back(
2601             Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy));
2602
2603       llvm::FunctionType *FTy = llvm::FunctionType::get(
2604           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
2605       return RValue::get(
2606           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2607                              llvm::ArrayRef<llvm::Value *>(Args)));
2608     }
2609     // Any calls now have event arguments passed.
2610     if (NumArgs >= 7) {
2611       llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy);
2612       llvm::Type *EventPtrTy = EventTy->getPointerTo(
2613           CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
2614
2615       llvm::Value *NumEvents =
2616           Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
2617       llvm::Value *EventList =
2618           E->getArg(4)->getType()->isArrayType()
2619               ? EmitArrayToPointerDecay(E->getArg(4)).getPointer()
2620               : EmitScalarExpr(E->getArg(4));
2621       llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5));
2622       // Convert to generic address space.
2623       EventList = Builder.CreatePointerCast(EventList, EventPtrTy);
2624       ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy);
2625       llvm::Value *Block = Builder.CreatePointerCast(
2626           EmitScalarExpr(E->getArg(6)), GenericVoidPtrTy);
2627
2628       std::vector<llvm::Type *> ArgTys = {
2629           QueueTy,    Int32Ty,    RangeTy,         Int32Ty,
2630           EventPtrTy, EventPtrTy, GenericVoidPtrTy};
2631
2632       std::vector<llvm::Value *> Args = {Queue,     Flags,    Range, NumEvents,
2633                                          EventList, ClkEvent, Block};
2634
2635       if (NumArgs == 7) {
2636         // Has events but no variadics.
2637         Name = "__enqueue_kernel_basic_events";
2638         llvm::FunctionType *FTy = llvm::FunctionType::get(
2639             Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2640         return RValue::get(
2641             Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2642                                llvm::ArrayRef<llvm::Value *>(Args)));
2643       }
2644       // Has event info and variadics
2645       // Pass the number of variadics to the runtime function too.
2646       Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
2647       ArgTys.push_back(Int32Ty);
2648       Name = "__enqueue_kernel_events_vaargs";
2649
2650       // Each of the following arguments specifies the size of the corresponding
2651       // argument passed to the enqueued block.
2652       for (unsigned I = 7/*Position of the first size arg*/; I < NumArgs; ++I)
2653         Args.push_back(
2654             Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy));
2655
2656       llvm::FunctionType *FTy = llvm::FunctionType::get(
2657           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
2658       return RValue::get(
2659           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2660                              llvm::ArrayRef<llvm::Value *>(Args)));
2661     }
2662   }
2663   // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
2664   // parameter.
2665   case Builtin::BIget_kernel_work_group_size: {
2666     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2667         getContext().getTargetAddressSpace(LangAS::opencl_generic));
2668     Value *Arg = EmitScalarExpr(E->getArg(0));
2669     Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy);
2670     return RValue::get(Builder.CreateCall(
2671         CGM.CreateRuntimeFunction(
2672             llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false),
2673             "__get_kernel_work_group_size_impl"),
2674         Arg));
2675   }
2676   case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
2677     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2678         getContext().getTargetAddressSpace(LangAS::opencl_generic));
2679     Value *Arg = EmitScalarExpr(E->getArg(0));
2680     Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy);
2681     return RValue::get(Builder.CreateCall(
2682         CGM.CreateRuntimeFunction(
2683             llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false),
2684             "__get_kernel_preferred_work_group_multiple_impl"),
2685         Arg));
2686   }
2687   case Builtin::BIprintf:
2688     if (getTarget().getTriple().isNVPTX())
2689       return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue);
2690     break;
2691   case Builtin::BI__builtin_canonicalize:
2692   case Builtin::BI__builtin_canonicalizef:
2693   case Builtin::BI__builtin_canonicalizel:
2694     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
2695
2696   case Builtin::BI__builtin_thread_pointer: {
2697     if (!getContext().getTargetInfo().isTLSSupported())
2698       CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
2699     // Fall through - it's already mapped to the intrinsic by GCCBuiltin.
2700     break;
2701   }
2702   case Builtin::BI__builtin_os_log_format: {
2703     assert(E->getNumArgs() >= 2 &&
2704            "__builtin_os_log_format takes at least 2 arguments");
2705     analyze_os_log::OSLogBufferLayout Layout;
2706     analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout);
2707     Address BufAddr = EmitPointerWithAlignment(E->getArg(0));
2708     // Ignore argument 1, the format string. It is not currently used.
2709     CharUnits Offset;
2710     Builder.CreateStore(
2711         Builder.getInt8(Layout.getSummaryByte()),
2712         Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
2713     Builder.CreateStore(
2714         Builder.getInt8(Layout.getNumArgsByte()),
2715         Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
2716
2717     llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
2718     for (const auto &Item : Layout.Items) {
2719       Builder.CreateStore(
2720           Builder.getInt8(Item.getDescriptorByte()),
2721           Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
2722       Builder.CreateStore(
2723           Builder.getInt8(Item.getSizeByte()),
2724           Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
2725       Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset);
2726       if (const Expr *TheExpr = Item.getExpr()) {
2727         Addr = Builder.CreateElementBitCast(
2728             Addr, ConvertTypeForMem(TheExpr->getType()));
2729         // Check if this is a retainable type.
2730         if (TheExpr->getType()->isObjCRetainableType()) {
2731           assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
2732                  "Only scalar can be a ObjC retainable type");
2733           llvm::Value *SV = EmitScalarExpr(TheExpr, /*Ignore*/ false);
2734           RValue RV = RValue::get(SV);
2735           LValue LV = MakeAddrLValue(Addr, TheExpr->getType());
2736           EmitStoreThroughLValue(RV, LV);
2737           // Check if the object is constant, if not, save it in
2738           // RetainableOperands.
2739           if (!isa<Constant>(SV))
2740             RetainableOperands.push_back(SV);
2741         } else {
2742           EmitAnyExprToMem(TheExpr, Addr, Qualifiers(), /*isInit*/ true);
2743         }
2744       } else {
2745         Addr = Builder.CreateElementBitCast(Addr, Int32Ty);
2746         Builder.CreateStore(
2747             Builder.getInt32(Item.getConstValue().getQuantity()), Addr);
2748       }
2749       Offset += Item.size();
2750     }
2751
2752     // Push a clang.arc.use cleanup for each object in RetainableOperands. The
2753     // cleanup will cause the use to appear after the final log call, keeping
2754     // the object valid while it’s held in the log buffer.  Note that if there’s
2755     // a release cleanup on the object, it will already be active; since
2756     // cleanups are emitted in reverse order, the use will occur before the
2757     // object is released.
2758     if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount &&
2759         CGM.getCodeGenOpts().OptimizationLevel != 0)
2760       for (llvm::Value *object : RetainableOperands)
2761         pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), object);
2762
2763     return RValue::get(BufAddr.getPointer());
2764   }
2765
2766   case Builtin::BI__builtin_os_log_format_buffer_size: {
2767     analyze_os_log::OSLogBufferLayout Layout;
2768     analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout);
2769     return RValue::get(ConstantInt::get(ConvertType(E->getType()),
2770                                         Layout.size().getQuantity()));
2771   }
2772   }
2773
2774   // If this is an alias for a lib function (e.g. __builtin_sin), emit
2775   // the call using the normal call path, but using the unmangled
2776   // version of the function name.
2777   if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
2778     return emitLibraryCall(*this, FD, E,
2779                            CGM.getBuiltinLibFunction(FD, BuiltinID));
2780
2781   // If this is a predefined lib function (e.g. malloc), emit the call
2782   // using exactly the normal call path.
2783   if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
2784     return emitLibraryCall(*this, FD, E,
2785                       cast<llvm::Constant>(EmitScalarExpr(E->getCallee())));
2786
2787   // Check that a call to a target specific builtin has the correct target
2788   // features.
2789   // This is down here to avoid non-target specific builtins, however, if
2790   // generic builtins start to require generic target features then we
2791   // can move this up to the beginning of the function.
2792   checkTargetFeatures(E, FD);
2793
2794   // See if we have a target specific intrinsic.
2795   const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
2796   Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
2797   StringRef Prefix =
2798       llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
2799   if (!Prefix.empty()) {
2800     IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name);
2801     // NOTE we dont need to perform a compatibility flag check here since the
2802     // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
2803     // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
2804     if (IntrinsicID == Intrinsic::not_intrinsic)
2805       IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
2806   }
2807
2808   if (IntrinsicID != Intrinsic::not_intrinsic) {
2809     SmallVector<Value*, 16> Args;
2810
2811     // Find out if any arguments are required to be integer constant
2812     // expressions.
2813     unsigned ICEArguments = 0;
2814     ASTContext::GetBuiltinTypeError Error;
2815     getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
2816     assert(Error == ASTContext::GE_None && "Should not codegen an error");
2817
2818     Function *F = CGM.getIntrinsic(IntrinsicID);
2819     llvm::FunctionType *FTy = F->getFunctionType();
2820
2821     for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
2822       Value *ArgValue;
2823       // If this is a normal argument, just emit it as a scalar.
2824       if ((ICEArguments & (1 << i)) == 0) {
2825         ArgValue = EmitScalarExpr(E->getArg(i));
2826       } else {
2827         // If this is required to be a constant, constant fold it so that we
2828         // know that the generated intrinsic gets a ConstantInt.
2829         llvm::APSInt Result;
2830         bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
2831         assert(IsConst && "Constant arg isn't actually constant?");
2832         (void)IsConst;
2833         ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
2834       }
2835
2836       // If the intrinsic arg type is different from the builtin arg type
2837       // we need to do a bit cast.
2838       llvm::Type *PTy = FTy->getParamType(i);
2839       if (PTy != ArgValue->getType()) {
2840         assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
2841                "Must be able to losslessly bit cast to param");
2842         ArgValue = Builder.CreateBitCast(ArgValue, PTy);
2843       }
2844
2845       Args.push_back(ArgValue);
2846     }
2847
2848     Value *V = Builder.CreateCall(F, Args);
2849     QualType BuiltinRetType = E->getType();
2850
2851     llvm::Type *RetTy = VoidTy;
2852     if (!BuiltinRetType->isVoidType())
2853       RetTy = ConvertType(BuiltinRetType);
2854
2855     if (RetTy != V->getType()) {
2856       assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
2857              "Must be able to losslessly bit cast result type");
2858       V = Builder.CreateBitCast(V, RetTy);
2859     }
2860
2861     return RValue::get(V);
2862   }
2863
2864   // See if we have a target specific builtin that needs to be lowered.
2865   if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
2866     return RValue::get(V);
2867
2868   ErrorUnsupported(E, "builtin function");
2869
2870   // Unknown builtin, for now just dump it out and return undef.
2871   return GetUndefRValue(E->getType());
2872 }
2873
2874 static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
2875                                         unsigned BuiltinID, const CallExpr *E,
2876                                         llvm::Triple::ArchType Arch) {
2877   switch (Arch) {
2878   case llvm::Triple::arm:
2879   case llvm::Triple::armeb:
2880   case llvm::Triple::thumb:
2881   case llvm::Triple::thumbeb:
2882     return CGF->EmitARMBuiltinExpr(BuiltinID, E);
2883   case llvm::Triple::aarch64:
2884   case llvm::Triple::aarch64_be:
2885     return CGF->EmitAArch64BuiltinExpr(BuiltinID, E);
2886   case llvm::Triple::x86:
2887   case llvm::Triple::x86_64:
2888     return CGF->EmitX86BuiltinExpr(BuiltinID, E);
2889   case llvm::Triple::ppc:
2890   case llvm::Triple::ppc64:
2891   case llvm::Triple::ppc64le:
2892     return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
2893   case llvm::Triple::r600:
2894   case llvm::Triple::amdgcn:
2895     return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
2896   case llvm::Triple::systemz:
2897     return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
2898   case llvm::Triple::nvptx:
2899   case llvm::Triple::nvptx64:
2900     return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
2901   case llvm::Triple::wasm32:
2902   case llvm::Triple::wasm64:
2903     return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
2904   default:
2905     return nullptr;
2906   }
2907 }
2908
2909 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
2910                                               const CallExpr *E) {
2911   if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
2912     assert(getContext().getAuxTargetInfo() && "Missing aux target info");
2913     return EmitTargetArchBuiltinExpr(
2914         this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
2915         getContext().getAuxTargetInfo()->getTriple().getArch());
2916   }
2917
2918   return EmitTargetArchBuiltinExpr(this, BuiltinID, E,
2919                                    getTarget().getTriple().getArch());
2920 }
2921
2922 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
2923                                      NeonTypeFlags TypeFlags,
2924                                      bool V1Ty=false) {
2925   int IsQuad = TypeFlags.isQuad();
2926   switch (TypeFlags.getEltType()) {
2927   case NeonTypeFlags::Int8:
2928   case NeonTypeFlags::Poly8:
2929     return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
2930   case NeonTypeFlags::Int16:
2931   case NeonTypeFlags::Poly16:
2932   case NeonTypeFlags::Float16:
2933     return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
2934   case NeonTypeFlags::Int32:
2935     return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
2936   case NeonTypeFlags::Int64:
2937   case NeonTypeFlags::Poly64:
2938     return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
2939   case NeonTypeFlags::Poly128:
2940     // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
2941     // There is a lot of i128 and f128 API missing.
2942     // so we use v16i8 to represent poly128 and get pattern matched.
2943     return llvm::VectorType::get(CGF->Int8Ty, 16);
2944   case NeonTypeFlags::Float32:
2945     return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
2946   case NeonTypeFlags::Float64:
2947     return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
2948   }
2949   llvm_unreachable("Unknown vector element type!");
2950 }
2951
2952 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
2953                                           NeonTypeFlags IntTypeFlags) {
2954   int IsQuad = IntTypeFlags.isQuad();
2955   switch (IntTypeFlags.getEltType()) {
2956   case NeonTypeFlags::Int32:
2957     return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
2958   case NeonTypeFlags::Int64:
2959     return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad));
2960   default:
2961     llvm_unreachable("Type can't be converted to floating-point!");
2962   }
2963 }
2964
2965 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
2966   unsigned nElts = V->getType()->getVectorNumElements();
2967   Value* SV = llvm::ConstantVector::getSplat(nElts, C);
2968   return Builder.CreateShuffleVector(V, V, SV, "lane");
2969 }
2970
2971 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
2972                                      const char *name,
2973                                      unsigned shift, bool rightshift) {
2974   unsigned j = 0;
2975   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
2976        ai != ae; ++ai, ++j)
2977     if (shift > 0 && shift == j)
2978       Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
2979     else
2980       Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
2981
2982   return Builder.CreateCall(F, Ops, name);
2983 }
2984
2985 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
2986                                             bool neg) {
2987   int SV = cast<ConstantInt>(V)->getSExtValue();
2988   return ConstantInt::get(Ty, neg ? -SV : SV);
2989 }
2990
2991 // \brief Right-shift a vector by a constant.
2992 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
2993                                           llvm::Type *Ty, bool usgn,
2994                                           const char *name) {
2995   llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
2996
2997   int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
2998   int EltSize = VTy->getScalarSizeInBits();
2999
3000   Vec = Builder.CreateBitCast(Vec, Ty);
3001
3002   // lshr/ashr are undefined when the shift amount is equal to the vector
3003   // element size.
3004   if (ShiftAmt == EltSize) {
3005     if (usgn) {
3006       // Right-shifting an unsigned value by its size yields 0.
3007       return llvm::ConstantAggregateZero::get(VTy);
3008     } else {
3009       // Right-shifting a signed value by its size is equivalent
3010       // to a shift of size-1.
3011       --ShiftAmt;
3012       Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
3013     }
3014   }
3015
3016   Shift = EmitNeonShiftVector(Shift, Ty, false);
3017   if (usgn)
3018     return Builder.CreateLShr(Vec, Shift, name);
3019   else
3020     return Builder.CreateAShr(Vec, Shift, name);
3021 }
3022
3023 enum {
3024   AddRetType = (1 << 0),
3025   Add1ArgType = (1 << 1),
3026   Add2ArgTypes = (1 << 2),
3027
3028   VectorizeRetType = (1 << 3),
3029   VectorizeArgTypes = (1 << 4),
3030
3031   InventFloatType = (1 << 5),
3032   UnsignedAlts = (1 << 6),
3033
3034   Use64BitVectors = (1 << 7),
3035   Use128BitVectors = (1 << 8),
3036
3037   Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
3038   VectorRet = AddRetType | VectorizeRetType,
3039   VectorRetGetArgs01 =
3040       AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
3041   FpCmpzModifiers =
3042       AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
3043 };
3044
3045 namespace {
3046 struct NeonIntrinsicInfo {
3047   const char *NameHint;
3048   unsigned BuiltinID;
3049   unsigned LLVMIntrinsic;
3050   unsigned AltLLVMIntrinsic;
3051   unsigned TypeModifier;
3052
3053   bool operator<(unsigned RHSBuiltinID) const {
3054     return BuiltinID < RHSBuiltinID;
3055   }
3056   bool operator<(const NeonIntrinsicInfo &TE) const {
3057     return BuiltinID < TE.BuiltinID;
3058   }
3059 };
3060 } // end anonymous namespace
3061
3062 #define NEONMAP0(NameBase) \
3063   { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
3064
3065 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
3066   { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
3067       Intrinsic::LLVMIntrinsic, 0, TypeModifier }
3068
3069 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
3070   { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
3071       Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
3072       TypeModifier }
3073
3074 static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
3075   NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
3076   NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
3077   NEONMAP1(vabs_v, arm_neon_vabs, 0),
3078   NEONMAP1(vabsq_v, arm_neon_vabs, 0),
3079   NEONMAP0(vaddhn_v),
3080   NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
3081   NEONMAP1(vaeseq_v, arm_neon_aese, 0),
3082   NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
3083   NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
3084   NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
3085   NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
3086   NEONMAP1(vcage_v, arm_neon_vacge, 0),
3087   NEONMAP1(vcageq_v, arm_neon_vacge, 0),
3088   NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
3089   NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
3090   NEONMAP1(vcale_v, arm_neon_vacge, 0),
3091   NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
3092   NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
3093   NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
3094   NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
3095   NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
3096   NEONMAP1(vclz_v, ctlz, Add1ArgType),
3097   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
3098   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
3099   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
3100   NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
3101   NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
3102   NEONMAP0(vcvt_f32_v),
3103   NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3104   NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
3105   NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
3106   NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
3107   NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
3108   NEONMAP0(vcvt_s32_v),
3109   NEONMAP0(vcvt_s64_v),
3110   NEONMAP0(vcvt_u32_v),
3111   NEONMAP0(vcvt_u64_v),
3112   NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
3113   NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
3114   NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
3115   NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
3116   NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
3117   NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
3118   NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
3119   NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
3120   NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
3121   NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
3122   NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
3123   NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
3124   NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
3125   NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
3126   NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
3127   NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
3128   NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
3129   NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
3130   NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
3131   NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
3132   NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
3133   NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
3134   NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
3135   NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
3136   NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
3137   NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
3138   NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
3139   NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
3140   NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
3141   NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
3142   NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
3143   NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
3144   NEONMAP0(vcvtq_f32_v),
3145   NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3146   NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
3147   NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
3148   NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
3149   NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
3150   NEONMAP0(vcvtq_s32_v),
3151   NEONMAP0(vcvtq_s64_v),
3152   NEONMAP0(vcvtq_u32_v),
3153   NEONMAP0(vcvtq_u64_v),
3154   NEONMAP0(vext_v),
3155   NEONMAP0(vextq_v),
3156   NEONMAP0(vfma_v),
3157   NEONMAP0(vfmaq_v),
3158   NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
3159   NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
3160   NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
3161   NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
3162   NEONMAP0(vld1_dup_v),
3163   NEONMAP1(vld1_v, arm_neon_vld1, 0),
3164   NEONMAP0(vld1q_dup_v),
3165   NEONMAP1(vld1q_v, arm_neon_vld1, 0),
3166   NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
3167   NEONMAP1(vld2_v, arm_neon_vld2, 0),
3168   NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
3169   NEONMAP1(vld2q_v, arm_neon_vld2, 0),
3170   NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
3171   NEONMAP1(vld3_v, arm_neon_vld3, 0),
3172   NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
3173   NEONMAP1(vld3q_v, arm_neon_vld3, 0),
3174   NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
3175   NEONMAP1(vld4_v, arm_neon_vld4, 0),
3176   NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
3177   NEONMAP1(vld4q_v, arm_neon_vld4, 0),
3178   NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
3179   NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
3180   NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
3181   NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
3182   NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
3183   NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
3184   NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
3185   NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
3186   NEONMAP0(vmovl_v),
3187   NEONMAP0(vmovn_v),
3188   NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
3189   NEONMAP0(vmull_v),
3190   NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
3191   NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
3192   NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
3193   NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
3194   NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
3195   NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
3196   NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
3197   NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
3198   NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
3199   NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
3200   NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
3201   NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
3202   NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
3203   NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0),
3204   NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0),
3205   NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
3206   NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
3207   NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
3208   NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
3209   NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
3210   NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
3211   NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
3212   NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
3213   NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
3214   NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
3215   NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
3216   NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
3217   NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
3218   NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
3219   NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
3220   NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
3221   NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
3222   NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
3223   NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
3224   NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
3225   NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
3226   NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
3227   NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
3228   NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
3229   NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
3230   NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
3231   NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
3232   NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
3233   NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
3234   NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
3235   NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
3236   NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
3237   NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
3238   NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
3239   NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
3240   NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
3241   NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
3242   NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
3243   NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
3244   NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
3245   NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
3246   NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
3247   NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
3248   NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
3249   NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
3250   NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
3251   NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
3252   NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
3253   NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
3254   NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
3255   NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
3256   NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
3257   NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
3258   NEONMAP0(vshl_n_v),
3259   NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
3260   NEONMAP0(vshll_n_v),
3261   NEONMAP0(vshlq_n_v),
3262   NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
3263   NEONMAP0(vshr_n_v),
3264   NEONMAP0(vshrn_n_v),
3265   NEONMAP0(vshrq_n_v),
3266   NEONMAP1(vst1_v, arm_neon_vst1, 0),
3267   NEONMAP1(vst1q_v, arm_neon_vst1, 0),
3268   NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
3269   NEONMAP1(vst2_v, arm_neon_vst2, 0),
3270   NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
3271   NEONMAP1(vst2q_v, arm_neon_vst2, 0),
3272   NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
3273   NEONMAP1(vst3_v, arm_neon_vst3, 0),
3274   NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
3275   NEONMAP1(vst3q_v, arm_neon_vst3, 0),
3276   NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
3277   NEONMAP1(vst4_v, arm_neon_vst4, 0),
3278   NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
3279   NEONMAP1(vst4q_v, arm_neon_vst4, 0),
3280   NEONMAP0(vsubhn_v),
3281   NEONMAP0(vtrn_v),
3282   NEONMAP0(vtrnq_v),
3283   NEONMAP0(vtst_v),
3284   NEONMAP0(vtstq_v),
3285   NEONMAP0(vuzp_v),
3286   NEONMAP0(vuzpq_v),
3287   NEONMAP0(vzip_v),
3288   NEONMAP0(vzipq_v)
3289 };
3290
3291 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
3292   NEONMAP1(vabs_v, aarch64_neon_abs, 0),
3293   NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
3294   NEONMAP0(vaddhn_v),
3295   NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
3296   NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
3297   NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
3298   NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
3299   NEONMAP1(vcage_v, aarch64_neon_facge, 0),
3300   NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
3301   NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
3302   NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
3303   NEONMAP1(vcale_v, aarch64_neon_facge, 0),
3304   NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
3305   NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
3306   NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
3307   NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
3308   NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
3309   NEONMAP1(vclz_v, ctlz, Add1ArgType),
3310   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
3311   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
3312   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
3313   NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
3314   NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
3315   NEONMAP0(vcvt_f32_v),
3316   NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3317   NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3318   NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
3319   NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
3320   NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
3321   NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
3322   NEONMAP0(vcvtq_f32_v),
3323   NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3324   NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3325   NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
3326   NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
3327   NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
3328   NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
3329   NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
3330   NEONMAP0(vext_v),
3331   NEONMAP0(vextq_v),
3332   NEONMAP0(vfma_v),
3333   NEONMAP0(vfmaq_v),
3334   NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
3335   NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
3336   NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
3337   NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
3338   NEONMAP0(vmovl_v),
3339   NEONMAP0(vmovn_v),
3340   NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
3341   NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
3342   NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
3343   NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
3344   NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
3345   NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
3346   NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
3347   NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
3348   NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
3349   NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
3350   NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
3351   NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
3352   NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
3353   NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
3354   NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
3355   NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
3356   NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
3357   NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
3358   NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
3359   NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
3360   NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
3361   NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
3362   NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
3363   NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
3364   NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
3365   NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
3366   NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
3367   NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
3368   NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
3369   NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
3370   NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
3371   NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
3372   NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
3373   NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
3374   NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
3375   NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
3376   NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
3377   NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
3378   NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
3379   NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
3380   NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
3381   NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
3382   NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
3383   NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
3384   NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
3385   NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
3386   NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
3387   NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
3388   NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
3389   NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
3390   NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
3391   NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
3392   NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
3393   NEONMAP0(vshl_n_v),
3394   NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3395   NEONMAP0(vshll_n_v),
3396   NEONMAP0(vshlq_n_v),
3397   NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3398   NEONMAP0(vshr_n_v),
3399   NEONMAP0(vshrn_n_v),
3400   NEONMAP0(vshrq_n_v),
3401   NEONMAP0(vsubhn_v),
3402   NEONMAP0(vtst_v),
3403   NEONMAP0(vtstq_v),
3404 };
3405
3406 static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
3407   NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
3408   NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
3409   NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
3410   NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3411   NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3412   NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3413   NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3414   NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3415   NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3416   NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3417   NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3418   NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
3419   NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3420   NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
3421   NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3422   NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3423   NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3424   NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3425   NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3426   NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3427   NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3428   NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3429   NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3430   NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3431   NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3432   NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3433   NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3434   NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3435   NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3436   NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3437   NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3438   NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3439   NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3440   NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3441   NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3442   NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3443   NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3444   NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3445   NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3446   NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3447   NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3448   NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3449   NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3450   NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3451   NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3452   NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3453   NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3454   NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3455   NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
3456   NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3457   NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3458   NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3459   NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3460   NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3461   NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3462   NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3463   NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3464   NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3465   NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3466   NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3467   NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3468   NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3469   NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3470   NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3471   NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3472   NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3473   NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3474   NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3475   NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3476   NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
3477   NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
3478   NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
3479   NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3480   NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3481   NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3482   NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3483   NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3484   NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3485   NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3486   NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3487   NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3488   NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3489   NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3490   NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
3491   NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3492   NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
3493   NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3494   NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3495   NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
3496   NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
3497   NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3498   NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3499   NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
3500   NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
3501   NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
3502   NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
3503   NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
3504   NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
3505   NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
3506   NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
3507   NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3508   NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3509   NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3510   NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3511   NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
3512   NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3513   NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3514   NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3515   NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
3516   NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3517   NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
3518   NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
3519   NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
3520   NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3521   NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3522   NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
3523   NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
3524   NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3525   NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3526   NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
3527   NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
3528   NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
3529   NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
3530   NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3531   NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3532   NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3533   NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3534   NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
3535   NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3536   NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3537   NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3538   NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3539   NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3540   NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3541   NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
3542   NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
3543   NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3544   NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3545   NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3546   NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3547   NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
3548   NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
3549   NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
3550   NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
3551   NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3552   NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3553   NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
3554   NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
3555   NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
3556   NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3557   NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3558   NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3559   NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3560   NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
3561   NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3562   NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3563   NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3564   NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3565   NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
3566   NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
3567   NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3568   NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3569   NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
3570   NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
3571   NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
3572   NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
3573   NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
3574   NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
3575   NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
3576   NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
3577   NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
3578   NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
3579   NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
3580   NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
3581   NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
3582   NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
3583   NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
3584   NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
3585   NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
3586   NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
3587   NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
3588   NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
3589   NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3590   NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
3591   NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3592   NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
3593   NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
3594   NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
3595   NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3596   NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
3597   NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3598   NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
3599 };
3600
3601 #undef NEONMAP0
3602 #undef NEONMAP1
3603 #undef NEONMAP2
3604
3605 static bool NEONSIMDIntrinsicsProvenSorted = false;
3606
3607 static bool AArch64SIMDIntrinsicsProvenSorted = false;
3608 static bool AArch64SISDIntrinsicsProvenSorted = false;
3609
3610
3611 static const NeonIntrinsicInfo *
3612 findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap,
3613                        unsigned BuiltinID, bool &MapProvenSorted) {
3614
3615 #ifndef NDEBUG
3616   if (!MapProvenSorted) {
3617     assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap)));
3618     MapProvenSorted = true;
3619   }
3620 #endif
3621
3622   const NeonIntrinsicInfo *Builtin =
3623       std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID);
3624
3625   if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
3626     return Builtin;
3627
3628   return nullptr;
3629 }
3630
3631 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
3632                                                    unsigned Modifier,
3633                                                    llvm::Type *ArgType,
3634                                                    const CallExpr *E) {
3635   int VectorSize = 0;
3636   if (Modifier & Use64BitVectors)
3637     VectorSize = 64;
3638   else if (Modifier & Use128BitVectors)
3639     VectorSize = 128;
3640
3641   // Return type.
3642   SmallVector<llvm::Type *, 3> Tys;
3643   if (Modifier & AddRetType) {
3644     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
3645     if (Modifier & VectorizeRetType)
3646       Ty = llvm::VectorType::get(
3647           Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
3648
3649     Tys.push_back(Ty);
3650   }
3651
3652   // Arguments.
3653   if (Modifier & VectorizeArgTypes) {
3654     int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
3655     ArgType = llvm::VectorType::get(ArgType, Elts);
3656   }
3657
3658   if (Modifier & (Add1ArgType | Add2ArgTypes))
3659     Tys.push_back(ArgType);
3660
3661   if (Modifier & Add2ArgTypes)
3662     Tys.push_back(ArgType);
3663
3664   if (Modifier & InventFloatType)
3665     Tys.push_back(FloatTy);
3666
3667   return CGM.getIntrinsic(IntrinsicID, Tys);
3668 }
3669
3670 static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF,
3671                                             const NeonIntrinsicInfo &SISDInfo,
3672                                             SmallVectorImpl<Value *> &Ops,
3673                                             const CallExpr *E) {
3674   unsigned BuiltinID = SISDInfo.BuiltinID;
3675   unsigned int Int = SISDInfo.LLVMIntrinsic;
3676   unsigned Modifier = SISDInfo.TypeModifier;
3677   const char *s = SISDInfo.NameHint;
3678
3679   switch (BuiltinID) {
3680   case NEON::BI__builtin_neon_vcled_s64:
3681   case NEON::BI__builtin_neon_vcled_u64:
3682   case NEON::BI__builtin_neon_vcles_f32:
3683   case NEON::BI__builtin_neon_vcled_f64:
3684   case NEON::BI__builtin_neon_vcltd_s64:
3685   case NEON::BI__builtin_neon_vcltd_u64:
3686   case NEON::BI__builtin_neon_vclts_f32:
3687   case NEON::BI__builtin_neon_vcltd_f64:
3688   case NEON::BI__builtin_neon_vcales_f32:
3689   case NEON::BI__builtin_neon_vcaled_f64:
3690   case NEON::BI__builtin_neon_vcalts_f32:
3691   case NEON::BI__builtin_neon_vcaltd_f64:
3692     // Only one direction of comparisons actually exist, cmle is actually a cmge
3693     // with swapped operands. The table gives us the right intrinsic but we
3694     // still need to do the swap.
3695     std::swap(Ops[0], Ops[1]);
3696     break;
3697   }
3698
3699   assert(Int && "Generic code assumes a valid intrinsic");
3700
3701   // Determine the type(s) of this overloaded AArch64 intrinsic.
3702   const Expr *Arg = E->getArg(0);
3703   llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
3704   Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
3705
3706   int j = 0;
3707   ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
3708   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
3709        ai != ae; ++ai, ++j) {
3710     llvm::Type *ArgTy = ai->getType();
3711     if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
3712              ArgTy->getPrimitiveSizeInBits())
3713       continue;
3714
3715     assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
3716     // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
3717     // it before inserting.
3718     Ops[j] =
3719         CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType());
3720     Ops[j] =
3721         CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
3722   }
3723
3724   Value *Result = CGF.EmitNeonCall(F, Ops, s);
3725   llvm::Type *ResultType = CGF.ConvertType(E->getType());
3726   if (ResultType->getPrimitiveSizeInBits() <
3727       Result->getType()->getPrimitiveSizeInBits())
3728     return CGF.Builder.CreateExtractElement(Result, C0);
3729
3730   return CGF.Builder.CreateBitCast(Result, ResultType, s);
3731 }
3732
3733 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
3734     unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
3735     const char *NameHint, unsigned Modifier, const CallExpr *E,
3736     SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1) {
3737   // Get the last argument, which specifies the vector type.
3738   llvm::APSInt NeonTypeConst;
3739   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
3740   if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
3741     return nullptr;
3742
3743   // Determine the type of this overloaded NEON intrinsic.
3744   NeonTypeFlags Type(NeonTypeConst.getZExtValue());
3745   bool Usgn = Type.isUnsigned();
3746   bool Quad = Type.isQuad();
3747
3748   llvm::VectorType *VTy = GetNeonType(this, Type);
3749   llvm::Type *Ty = VTy;
3750   if (!Ty)
3751     return nullptr;
3752
3753   auto getAlignmentValue32 = [&](Address addr) -> Value* {
3754     return Builder.getInt32(addr.getAlignment().getQuantity());
3755   };
3756
3757   unsigned Int = LLVMIntrinsic;
3758   if ((Modifier & UnsignedAlts) && !Usgn)
3759     Int = AltLLVMIntrinsic;
3760
3761   switch (BuiltinID) {
3762   default: break;
3763   case NEON::BI__builtin_neon_vabs_v:
3764   case NEON::BI__builtin_neon_vabsq_v:
3765     if (VTy->getElementType()->isFloatingPointTy())
3766       return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
3767     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
3768   case NEON::BI__builtin_neon_vaddhn_v: {
3769     llvm::VectorType *SrcTy =
3770         llvm::VectorType::getExtendedElementVectorType(VTy);
3771
3772     // %sum = add <4 x i32> %lhs, %rhs
3773     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3774     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
3775     Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
3776
3777     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
3778     Constant *ShiftAmt =
3779         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
3780     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
3781
3782     // %res = trunc <4 x i32> %high to <4 x i16>
3783     return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
3784   }
3785   case NEON::BI__builtin_neon_vcale_v:
3786   case NEON::BI__builtin_neon_vcaleq_v:
3787   case NEON::BI__builtin_neon_vcalt_v:
3788   case NEON::BI__builtin_neon_vcaltq_v:
3789     std::swap(Ops[0], Ops[1]);
3790   case NEON::BI__builtin_neon_vcage_v:
3791   case NEON::BI__builtin_neon_vcageq_v:
3792   case NEON::BI__builtin_neon_vcagt_v:
3793   case NEON::BI__builtin_neon_vcagtq_v: {
3794     llvm::Type *VecFlt = llvm::VectorType::get(
3795         VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy,
3796         VTy->getNumElements());
3797     llvm::Type *Tys[] = { VTy, VecFlt };
3798     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3799     return EmitNeonCall(F, Ops, NameHint);
3800   }
3801   case NEON::BI__builtin_neon_vclz_v:
3802   case NEON::BI__builtin_neon_vclzq_v:
3803     // We generate target-independent intrinsic, which needs a second argument
3804     // for whether or not clz of zero is undefined; on ARM it isn't.
3805     Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
3806     break;
3807   case NEON::BI__builtin_neon_vcvt_f32_v:
3808   case NEON::BI__builtin_neon_vcvtq_f32_v:
3809     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3810     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad));
3811     return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
3812                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
3813   case NEON::BI__builtin_neon_vcvt_n_f32_v:
3814   case NEON::BI__builtin_neon_vcvt_n_f64_v:
3815   case NEON::BI__builtin_neon_vcvtq_n_f32_v:
3816   case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
3817     llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
3818     Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
3819     Function *F = CGM.getIntrinsic(Int, Tys);
3820     return EmitNeonCall(F, Ops, "vcvt_n");
3821   }
3822   case NEON::BI__builtin_neon_vcvt_n_s32_v:
3823   case NEON::BI__builtin_neon_vcvt_n_u32_v:
3824   case NEON::BI__builtin_neon_vcvt_n_s64_v:
3825   case NEON::BI__builtin_neon_vcvt_n_u64_v:
3826   case NEON::BI__builtin_neon_vcvtq_n_s32_v:
3827   case NEON::BI__builtin_neon_vcvtq_n_u32_v:
3828   case NEON::BI__builtin_neon_vcvtq_n_s64_v:
3829   case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
3830     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
3831     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3832     return EmitNeonCall(F, Ops, "vcvt_n");
3833   }
3834   case NEON::BI__builtin_neon_vcvt_s32_v:
3835   case NEON::BI__builtin_neon_vcvt_u32_v:
3836   case NEON::BI__builtin_neon_vcvt_s64_v:
3837   case NEON::BI__builtin_neon_vcvt_u64_v:
3838   case NEON::BI__builtin_neon_vcvtq_s32_v:
3839   case NEON::BI__builtin_neon_vcvtq_u32_v:
3840   case NEON::BI__builtin_neon_vcvtq_s64_v:
3841   case NEON::BI__builtin_neon_vcvtq_u64_v: {
3842     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
3843     return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
3844                 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
3845   }
3846   case NEON::BI__builtin_neon_vcvta_s32_v:
3847   case NEON::BI__builtin_neon_vcvta_s64_v:
3848   case NEON::BI__builtin_neon_vcvta_u32_v:
3849   case NEON::BI__builtin_neon_vcvta_u64_v:
3850   case NEON::BI__builtin_neon_vcvtaq_s32_v:
3851   case NEON::BI__builtin_neon_vcvtaq_s64_v:
3852   case NEON::BI__builtin_neon_vcvtaq_u32_v:
3853   case NEON::BI__builtin_neon_vcvtaq_u64_v:
3854   case NEON::BI__builtin_neon_vcvtn_s32_v:
3855   case NEON::BI__builtin_neon_vcvtn_s64_v:
3856   case NEON::BI__builtin_neon_vcvtn_u32_v:
3857   case NEON::BI__builtin_neon_vcvtn_u64_v:
3858   case NEON::BI__builtin_neon_vcvtnq_s32_v:
3859   case NEON::BI__builtin_neon_vcvtnq_s64_v:
3860   case NEON::BI__builtin_neon_vcvtnq_u32_v:
3861   case NEON::BI__builtin_neon_vcvtnq_u64_v:
3862   case NEON::BI__builtin_neon_vcvtp_s32_v:
3863   case NEON::BI__builtin_neon_vcvtp_s64_v:
3864   case NEON::BI__builtin_neon_vcvtp_u32_v:
3865   case NEON::BI__builtin_neon_vcvtp_u64_v:
3866   case NEON::BI__builtin_neon_vcvtpq_s32_v:
3867   case NEON::BI__builtin_neon_vcvtpq_s64_v:
3868   case NEON::BI__builtin_neon_vcvtpq_u32_v:
3869   case NEON::BI__builtin_neon_vcvtpq_u64_v:
3870   case NEON::BI__builtin_neon_vcvtm_s32_v:
3871   case NEON::BI__builtin_neon_vcvtm_s64_v:
3872   case NEON::BI__builtin_neon_vcvtm_u32_v:
3873   case NEON::BI__builtin_neon_vcvtm_u64_v:
3874   case NEON::BI__builtin_neon_vcvtmq_s32_v:
3875   case NEON::BI__builtin_neon_vcvtmq_s64_v:
3876   case NEON::BI__builtin_neon_vcvtmq_u32_v:
3877   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
3878     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
3879     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
3880   }
3881   case NEON::BI__builtin_neon_vext_v:
3882   case NEON::BI__builtin_neon_vextq_v: {
3883     int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
3884     SmallVector<uint32_t, 16> Indices;
3885     for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
3886       Indices.push_back(i+CV);
3887
3888     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3889     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3890     return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
3891   }
3892   case NEON::BI__builtin_neon_vfma_v:
3893   case NEON::BI__builtin_neon_vfmaq_v: {
3894     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
3895     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3896     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3897     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3898
3899     // NEON intrinsic puts accumulator first, unlike the LLVM fma.
3900     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
3901   }
3902   case NEON::BI__builtin_neon_vld1_v:
3903   case NEON::BI__builtin_neon_vld1q_v: {
3904     llvm::Type *Tys[] = {Ty, Int8PtrTy};
3905     Ops.push_back(getAlignmentValue32(PtrOp0));
3906     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
3907   }
3908   case NEON::BI__builtin_neon_vld2_v:
3909   case NEON::BI__builtin_neon_vld2q_v:
3910   case NEON::BI__builtin_neon_vld3_v:
3911   case NEON::BI__builtin_neon_vld3q_v:
3912   case NEON::BI__builtin_neon_vld4_v:
3913   case NEON::BI__builtin_neon_vld4q_v: {
3914     llvm::Type *Tys[] = {Ty, Int8PtrTy};
3915     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3916     Value *Align = getAlignmentValue32(PtrOp1);
3917     Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
3918     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3919     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3920     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
3921   }
3922   case NEON::BI__builtin_neon_vld1_dup_v:
3923   case NEON::BI__builtin_neon_vld1q_dup_v: {
3924     Value *V = UndefValue::get(Ty);
3925     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
3926     PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty);
3927     LoadInst *Ld = Builder.CreateLoad(PtrOp0);
3928     llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
3929     Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
3930     return EmitNeonSplat(Ops[0], CI);
3931   }
3932   case NEON::BI__builtin_neon_vld2_lane_v:
3933   case NEON::BI__builtin_neon_vld2q_lane_v:
3934   case NEON::BI__builtin_neon_vld3_lane_v:
3935   case NEON::BI__builtin_neon_vld3q_lane_v:
3936   case NEON::BI__builtin_neon_vld4_lane_v:
3937   case NEON::BI__builtin_neon_vld4q_lane_v: {
3938     llvm::Type *Tys[] = {Ty, Int8PtrTy};
3939     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3940     for (unsigned I = 2; I < Ops.size() - 1; ++I)
3941       Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
3942     Ops.push_back(getAlignmentValue32(PtrOp1));
3943     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
3944     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3945     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3946     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
3947   }
3948   case NEON::BI__builtin_neon_vmovl_v: {
3949     llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
3950     Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
3951     if (Usgn)
3952       return Builder.CreateZExt(Ops[0], Ty, "vmovl");
3953     return Builder.CreateSExt(Ops[0], Ty, "vmovl");
3954   }
3955   case NEON::BI__builtin_neon_vmovn_v: {
3956     llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
3957     Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
3958     return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
3959   }
3960   case NEON::BI__builtin_neon_vmull_v:
3961     // FIXME: the integer vmull operations could be emitted in terms of pure
3962     // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
3963     // hoisting the exts outside loops. Until global ISel comes along that can
3964     // see through such movement this leads to bad CodeGen. So we need an
3965     // intrinsic for now.
3966     Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
3967     Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
3968     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
3969   case NEON::BI__builtin_neon_vpadal_v:
3970   case NEON::BI__builtin_neon_vpadalq_v: {
3971     // The source operand type has twice as many elements of half the size.
3972     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
3973     llvm::Type *EltTy =
3974       llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
3975     llvm::Type *NarrowTy =
3976       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
3977     llvm::Type *Tys[2] = { Ty, NarrowTy };
3978     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
3979   }
3980   case NEON::BI__builtin_neon_vpaddl_v:
3981   case NEON::BI__builtin_neon_vpaddlq_v: {
3982     // The source operand type has twice as many elements of half the size.
3983     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
3984     llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
3985     llvm::Type *NarrowTy =
3986       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
3987     llvm::Type *Tys[2] = { Ty, NarrowTy };
3988     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
3989   }
3990   case NEON::BI__builtin_neon_vqdmlal_v:
3991   case NEON::BI__builtin_neon_vqdmlsl_v: {
3992     SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
3993     Ops[1] =
3994         EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
3995     Ops.resize(2);
3996     return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
3997   }
3998   case NEON::BI__builtin_neon_vqshl_n_v:
3999   case NEON::BI__builtin_neon_vqshlq_n_v:
4000     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
4001                         1, false);
4002   case NEON::BI__builtin_neon_vqshlu_n_v:
4003   case NEON::BI__builtin_neon_vqshluq_n_v:
4004     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
4005                         1, false);
4006   case NEON::BI__builtin_neon_vrecpe_v:
4007   case NEON::BI__builtin_neon_vrecpeq_v:
4008   case NEON::BI__builtin_neon_vrsqrte_v:
4009   case NEON::BI__builtin_neon_vrsqrteq_v:
4010     Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
4011     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
4012
4013   case NEON::BI__builtin_neon_vrshr_n_v:
4014   case NEON::BI__builtin_neon_vrshrq_n_v:
4015     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
4016                         1, true);
4017   case NEON::BI__builtin_neon_vshl_n_v:
4018   case NEON::BI__builtin_neon_vshlq_n_v:
4019     Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
4020     return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
4021                              "vshl_n");
4022   case NEON::BI__builtin_neon_vshll_n_v: {
4023     llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
4024     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4025     if (Usgn)
4026       Ops[0] = Builder.CreateZExt(Ops[0], VTy);
4027     else
4028       Ops[0] = Builder.CreateSExt(Ops[0], VTy);
4029     Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
4030     return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
4031   }
4032   case NEON::BI__builtin_neon_vshrn_n_v: {
4033     llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
4034     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4035     Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
4036     if (Usgn)
4037       Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
4038     else
4039       Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
4040     return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
4041   }
4042   case NEON::BI__builtin_neon_vshr_n_v:
4043   case NEON::BI__builtin_neon_vshrq_n_v:
4044     return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
4045   case NEON::BI__builtin_neon_vst1_v:
4046   case NEON::BI__builtin_neon_vst1q_v:
4047   case NEON::BI__builtin_neon_vst2_v:
4048   case NEON::BI__builtin_neon_vst2q_v:
4049   case NEON::BI__builtin_neon_vst3_v:
4050   case NEON::BI__builtin_neon_vst3q_v:
4051   case NEON::BI__builtin_neon_vst4_v:
4052   case NEON::BI__builtin_neon_vst4q_v:
4053   case NEON::BI__builtin_neon_vst2_lane_v:
4054   case NEON::BI__builtin_neon_vst2q_lane_v:
4055   case NEON::BI__builtin_neon_vst3_lane_v:
4056   case NEON::BI__builtin_neon_vst3q_lane_v:
4057   case NEON::BI__builtin_neon_vst4_lane_v:
4058   case NEON::BI__builtin_neon_vst4q_lane_v: {
4059     llvm::Type *Tys[] = {Int8PtrTy, Ty};
4060     Ops.push_back(getAlignmentValue32(PtrOp0));
4061     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
4062   }
4063   case NEON::BI__builtin_neon_vsubhn_v: {
4064     llvm::VectorType *SrcTy =
4065         llvm::VectorType::getExtendedElementVectorType(VTy);
4066
4067     // %sum = add <4 x i32> %lhs, %rhs
4068     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4069     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
4070     Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
4071
4072     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
4073     Constant *ShiftAmt =
4074         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
4075     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
4076
4077     // %res = trunc <4 x i32> %high to <4 x i16>
4078     return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
4079   }
4080   case NEON::BI__builtin_neon_vtrn_v:
4081   case NEON::BI__builtin_neon_vtrnq_v: {
4082     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4083     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4084     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4085     Value *SV = nullptr;
4086
4087     for (unsigned vi = 0; vi != 2; ++vi) {
4088       SmallVector<uint32_t, 16> Indices;
4089       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
4090         Indices.push_back(i+vi);
4091         Indices.push_back(i+e+vi);
4092       }
4093       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4094       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
4095       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4096     }
4097     return SV;
4098   }
4099   case NEON::BI__builtin_neon_vtst_v:
4100   case NEON::BI__builtin_neon_vtstq_v: {
4101     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4102     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4103     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
4104     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
4105                                 ConstantAggregateZero::get(Ty));
4106     return Builder.CreateSExt(Ops[0], Ty, "vtst");
4107   }
4108   case NEON::BI__builtin_neon_vuzp_v:
4109   case NEON::BI__builtin_neon_vuzpq_v: {
4110     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4111     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4112     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4113     Value *SV = nullptr;
4114
4115     for (unsigned vi = 0; vi != 2; ++vi) {
4116       SmallVector<uint32_t, 16> Indices;
4117       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
4118         Indices.push_back(2*i+vi);
4119
4120       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4121       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
4122       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4123     }
4124     return SV;
4125   }
4126   case NEON::BI__builtin_neon_vzip_v:
4127   case NEON::BI__builtin_neon_vzipq_v: {
4128     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4129     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4130     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4131     Value *SV = nullptr;
4132
4133     for (unsigned vi = 0; vi != 2; ++vi) {
4134       SmallVector<uint32_t, 16> Indices;
4135       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
4136         Indices.push_back((i + vi*e) >> 1);
4137         Indices.push_back(((i + vi*e) >> 1)+e);
4138       }
4139       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4140       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
4141       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4142     }
4143     return SV;
4144   }
4145   }
4146
4147   assert(Int && "Expected valid intrinsic number");
4148
4149   // Determine the type(s) of this overloaded AArch64 intrinsic.
4150   Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
4151
4152   Value *Result = EmitNeonCall(F, Ops, NameHint);
4153   llvm::Type *ResultType = ConvertType(E->getType());
4154   // AArch64 intrinsic one-element vector type cast to
4155   // scalar type expected by the builtin
4156   return Builder.CreateBitCast(Result, ResultType, NameHint);
4157 }
4158
4159 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
4160     Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
4161     const CmpInst::Predicate Ip, const Twine &Name) {
4162   llvm::Type *OTy = Op->getType();
4163
4164   // FIXME: this is utterly horrific. We should not be looking at previous
4165   // codegen context to find out what needs doing. Unfortunately TableGen
4166   // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
4167   // (etc).
4168   if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
4169     OTy = BI->getOperand(0)->getType();
4170
4171   Op = Builder.CreateBitCast(Op, OTy);
4172   if (OTy->getScalarType()->isFloatingPointTy()) {
4173     Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
4174   } else {
4175     Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
4176   }
4177   return Builder.CreateSExt(Op, Ty, Name);
4178 }
4179
4180 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
4181                                  Value *ExtOp, Value *IndexOp,
4182                                  llvm::Type *ResTy, unsigned IntID,
4183                                  const char *Name) {
4184   SmallVector<Value *, 2> TblOps;
4185   if (ExtOp)
4186     TblOps.push_back(ExtOp);
4187
4188   // Build a vector containing sequential number like (0, 1, 2, ..., 15)
4189   SmallVector<uint32_t, 16> Indices;
4190   llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
4191   for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
4192     Indices.push_back(2*i);
4193     Indices.push_back(2*i+1);
4194   }
4195
4196   int PairPos = 0, End = Ops.size() - 1;
4197   while (PairPos < End) {
4198     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
4199                                                      Ops[PairPos+1], Indices,
4200                                                      Name));
4201     PairPos += 2;
4202   }
4203
4204   // If there's an odd number of 64-bit lookup table, fill the high 64-bit
4205   // of the 128-bit lookup table with zero.
4206   if (PairPos == End) {
4207     Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
4208     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
4209                                                      ZeroTbl, Indices, Name));
4210   }
4211
4212   Function *TblF;
4213   TblOps.push_back(IndexOp);
4214   TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
4215
4216   return CGF.EmitNeonCall(TblF, TblOps, Name);
4217 }
4218
4219 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
4220   unsigned Value;
4221   switch (BuiltinID) {
4222   default:
4223     return nullptr;
4224   case ARM::BI__builtin_arm_nop:
4225     Value = 0;
4226     break;
4227   case ARM::BI__builtin_arm_yield:
4228   case ARM::BI__yield:
4229     Value = 1;
4230     break;
4231   case ARM::BI__builtin_arm_wfe:
4232   case ARM::BI__wfe:
4233     Value = 2;
4234     break;
4235   case ARM::BI__builtin_arm_wfi:
4236   case ARM::BI__wfi:
4237     Value = 3;
4238     break;
4239   case ARM::BI__builtin_arm_sev:
4240   case ARM::BI__sev:
4241     Value = 4;
4242     break;
4243   case ARM::BI__builtin_arm_sevl:
4244   case ARM::BI__sevl:
4245     Value = 5;
4246     break;
4247   }
4248
4249   return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
4250                             llvm::ConstantInt::get(Int32Ty, Value));
4251 }
4252
4253 // Generates the IR for the read/write special register builtin,
4254 // ValueType is the type of the value that is to be written or read,
4255 // RegisterType is the type of the register being written to or read from.
4256 static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
4257                                          const CallExpr *E,
4258                                          llvm::Type *RegisterType,
4259                                          llvm::Type *ValueType,
4260                                          bool IsRead,
4261                                          StringRef SysReg = "") {
4262   // write and register intrinsics only support 32 and 64 bit operations.
4263   assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
4264           && "Unsupported size for register.");
4265
4266   CodeGen::CGBuilderTy &Builder = CGF.Builder;
4267   CodeGen::CodeGenModule &CGM = CGF.CGM;
4268   LLVMContext &Context = CGM.getLLVMContext();
4269
4270   if (SysReg.empty()) {
4271     const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
4272     SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
4273   }
4274
4275   llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
4276   llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4277   llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4278
4279   llvm::Type *Types[] = { RegisterType };
4280
4281   bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
4282   assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
4283             && "Can't fit 64-bit value in 32-bit register");
4284
4285   if (IsRead) {
4286     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
4287     llvm::Value *Call = Builder.CreateCall(F, Metadata);
4288
4289     if (MixedTypes)
4290       // Read into 64 bit register and then truncate result to 32 bit.
4291       return Builder.CreateTrunc(Call, ValueType);
4292
4293     if (ValueType->isPointerTy())
4294       // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
4295       return Builder.CreateIntToPtr(Call, ValueType);
4296
4297     return Call;
4298   }
4299
4300   llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
4301   llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
4302   if (MixedTypes) {
4303     // Extend 32 bit write value to 64 bit to pass to write.
4304     ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
4305     return Builder.CreateCall(F, { Metadata, ArgValue });
4306   }
4307
4308   if (ValueType->isPointerTy()) {
4309     // Have VoidPtrTy ArgValue but want to return an i32/i64.
4310     ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
4311     return Builder.CreateCall(F, { Metadata, ArgValue });
4312   }
4313
4314   return Builder.CreateCall(F, { Metadata, ArgValue });
4315 }
4316
4317 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
4318 /// argument that specifies the vector type.
4319 static bool HasExtraNeonArgument(unsigned BuiltinID) {
4320   switch (BuiltinID) {
4321   default: break;
4322   case NEON::BI__builtin_neon_vget_lane_i8:
4323   case NEON::BI__builtin_neon_vget_lane_i16:
4324   case NEON::BI__builtin_neon_vget_lane_i32:
4325   case NEON::BI__builtin_neon_vget_lane_i64:
4326   case NEON::BI__builtin_neon_vget_lane_f32:
4327   case NEON::BI__builtin_neon_vgetq_lane_i8:
4328   case NEON::BI__builtin_neon_vgetq_lane_i16:
4329   case NEON::BI__builtin_neon_vgetq_lane_i32:
4330   case NEON::BI__builtin_neon_vgetq_lane_i64:
4331   case NEON::BI__builtin_neon_vgetq_lane_f32:
4332   case NEON::BI__builtin_neon_vset_lane_i8:
4333   case NEON::BI__builtin_neon_vset_lane_i16:
4334   case NEON::BI__builtin_neon_vset_lane_i32:
4335   case NEON::BI__builtin_neon_vset_lane_i64:
4336   case NEON::BI__builtin_neon_vset_lane_f32:
4337   case NEON::BI__builtin_neon_vsetq_lane_i8:
4338   case NEON::BI__builtin_neon_vsetq_lane_i16:
4339   case NEON::BI__builtin_neon_vsetq_lane_i32:
4340   case NEON::BI__builtin_neon_vsetq_lane_i64:
4341   case NEON::BI__builtin_neon_vsetq_lane_f32:
4342   case NEON::BI__builtin_neon_vsha1h_u32:
4343   case NEON::BI__builtin_neon_vsha1cq_u32:
4344   case NEON::BI__builtin_neon_vsha1pq_u32:
4345   case NEON::BI__builtin_neon_vsha1mq_u32:
4346   case ARM::BI_MoveToCoprocessor:
4347   case ARM::BI_MoveToCoprocessor2:
4348     return false;
4349   }
4350   return true;
4351 }
4352
4353 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
4354                                            const CallExpr *E) {
4355   if (auto Hint = GetValueForARMHint(BuiltinID))
4356     return Hint;
4357
4358   if (BuiltinID == ARM::BI__emit) {
4359     bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
4360     llvm::FunctionType *FTy =
4361         llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
4362
4363     APSInt Value;
4364     if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext()))
4365       llvm_unreachable("Sema will ensure that the parameter is constant");
4366
4367     uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
4368
4369     llvm::InlineAsm *Emit =
4370         IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
4371                                  /*SideEffects=*/true)
4372                 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
4373                                  /*SideEffects=*/true);
4374
4375     return Builder.CreateCall(Emit);
4376   }
4377
4378   if (BuiltinID == ARM::BI__builtin_arm_dbg) {
4379     Value *Option = EmitScalarExpr(E->getArg(0));
4380     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
4381   }
4382
4383   if (BuiltinID == ARM::BI__builtin_arm_prefetch) {
4384     Value *Address = EmitScalarExpr(E->getArg(0));
4385     Value *RW      = EmitScalarExpr(E->getArg(1));
4386     Value *IsData  = EmitScalarExpr(E->getArg(2));
4387
4388     // Locality is not supported on ARM target
4389     Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
4390
4391     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
4392     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
4393   }
4394
4395   if (BuiltinID == ARM::BI__builtin_arm_rbit) {
4396     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4397     return Builder.CreateCall(
4398         CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
4399   }
4400
4401   if (BuiltinID == ARM::BI__clear_cache) {
4402     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
4403     const FunctionDecl *FD = E->getDirectCallee();
4404     Value *Ops[2];
4405     for (unsigned i = 0; i < 2; i++)
4406       Ops[i] = EmitScalarExpr(E->getArg(i));
4407     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
4408     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
4409     StringRef Name = FD->getName();
4410     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
4411   }
4412
4413   if (BuiltinID == ARM::BI__builtin_arm_mcrr ||
4414       BuiltinID == ARM::BI__builtin_arm_mcrr2) {
4415     Function *F;
4416
4417     switch (BuiltinID) {
4418     default: llvm_unreachable("unexpected builtin");
4419     case ARM::BI__builtin_arm_mcrr:
4420       F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
4421       break;
4422     case ARM::BI__builtin_arm_mcrr2:
4423       F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
4424       break;
4425     }
4426
4427     // MCRR{2} instruction has 5 operands but
4428     // the intrinsic has 4 because Rt and Rt2
4429     // are represented as a single unsigned 64
4430     // bit integer in the intrinsic definition
4431     // but internally it's represented as 2 32
4432     // bit integers.
4433
4434     Value *Coproc = EmitScalarExpr(E->getArg(0));
4435     Value *Opc1 = EmitScalarExpr(E->getArg(1));
4436     Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
4437     Value *CRm = EmitScalarExpr(E->getArg(3));
4438
4439     Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4440     Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
4441     Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
4442     Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
4443
4444     return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
4445   }
4446
4447   if (BuiltinID == ARM::BI__builtin_arm_mrrc ||
4448       BuiltinID == ARM::BI__builtin_arm_mrrc2) {
4449     Function *F;
4450
4451     switch (BuiltinID) {
4452     default: llvm_unreachable("unexpected builtin");
4453     case ARM::BI__builtin_arm_mrrc:
4454       F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
4455       break;
4456     case ARM::BI__builtin_arm_mrrc2:
4457       F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
4458       break;
4459     }
4460
4461     Value *Coproc = EmitScalarExpr(E->getArg(0));
4462     Value *Opc1 = EmitScalarExpr(E->getArg(1));
4463     Value *CRm  = EmitScalarExpr(E->getArg(2));
4464     Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
4465
4466     // Returns an unsigned 64 bit integer, represented
4467     // as two 32 bit integers.
4468
4469     Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
4470     Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
4471     Rt = Builder.CreateZExt(Rt, Int64Ty);
4472     Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
4473
4474     Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
4475     RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
4476     RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
4477
4478     return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
4479   }
4480
4481   if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
4482       ((BuiltinID == ARM::BI__builtin_arm_ldrex ||
4483         BuiltinID == ARM::BI__builtin_arm_ldaex) &&
4484        getContext().getTypeSize(E->getType()) == 64) ||
4485       BuiltinID == ARM::BI__ldrexd) {
4486     Function *F;
4487
4488     switch (BuiltinID) {
4489     default: llvm_unreachable("unexpected builtin");
4490     case ARM::BI__builtin_arm_ldaex:
4491       F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
4492       break;
4493     case ARM::BI__builtin_arm_ldrexd:
4494     case ARM::BI__builtin_arm_ldrex:
4495     case ARM::BI__ldrexd:
4496       F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
4497       break;
4498     }
4499
4500     Value *LdPtr = EmitScalarExpr(E->getArg(0));
4501     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
4502                                     "ldrexd");
4503
4504     Value *Val0 = Builder.CreateExtractValue(Val, 1);
4505     Value *Val1 = Builder.CreateExtractValue(Val, 0);
4506     Val0 = Builder.CreateZExt(Val0, Int64Ty);
4507     Val1 = Builder.CreateZExt(Val1, Int64Ty);
4508
4509     Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
4510     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
4511     Val = Builder.CreateOr(Val, Val1);
4512     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
4513   }
4514
4515   if (BuiltinID == ARM::BI__builtin_arm_ldrex ||
4516       BuiltinID == ARM::BI__builtin_arm_ldaex) {
4517     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
4518
4519     QualType Ty = E->getType();
4520     llvm::Type *RealResTy = ConvertType(Ty);
4521     llvm::Type *PtrTy = llvm::IntegerType::get(
4522         getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
4523     LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
4524
4525     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex
4526                                        ? Intrinsic::arm_ldaex
4527                                        : Intrinsic::arm_ldrex,
4528                                    PtrTy);
4529     Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
4530
4531     if (RealResTy->isPointerTy())
4532       return Builder.CreateIntToPtr(Val, RealResTy);
4533     else {
4534       llvm::Type *IntResTy = llvm::IntegerType::get(
4535           getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
4536       Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
4537       return Builder.CreateBitCast(Val, RealResTy);
4538     }
4539   }
4540
4541   if (BuiltinID == ARM::BI__builtin_arm_strexd ||
4542       ((BuiltinID == ARM::BI__builtin_arm_stlex ||
4543         BuiltinID == ARM::BI__builtin_arm_strex) &&
4544        getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
4545     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4546                                        ? Intrinsic::arm_stlexd
4547                                        : Intrinsic::arm_strexd);
4548     llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, nullptr);
4549
4550     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
4551     Value *Val = EmitScalarExpr(E->getArg(0));
4552     Builder.CreateStore(Val, Tmp);
4553
4554     Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
4555     Val = Builder.CreateLoad(LdPtr);
4556
4557     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
4558     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
4559     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
4560     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
4561   }
4562
4563   if (BuiltinID == ARM::BI__builtin_arm_strex ||
4564       BuiltinID == ARM::BI__builtin_arm_stlex) {
4565     Value *StoreVal = EmitScalarExpr(E->getArg(0));
4566     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
4567
4568     QualType Ty = E->getArg(0)->getType();
4569     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
4570                                                  getContext().getTypeSize(Ty));
4571     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
4572
4573     if (StoreVal->getType()->isPointerTy())
4574       StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
4575     else {
4576       llvm::Type *IntTy = llvm::IntegerType::get(
4577           getLLVMContext(),
4578           CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
4579       StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
4580       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
4581     }
4582
4583     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4584                                        ? Intrinsic::arm_stlex
4585                                        : Intrinsic::arm_strex,
4586                                    StoreAddr->getType());
4587     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
4588   }
4589
4590   switch (BuiltinID) {
4591   case ARM::BI__iso_volatile_load8:
4592   case ARM::BI__iso_volatile_load16:
4593   case ARM::BI__iso_volatile_load32:
4594   case ARM::BI__iso_volatile_load64: {
4595     Value *Ptr = EmitScalarExpr(E->getArg(0));
4596     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4597     CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy);
4598     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
4599                                              LoadSize.getQuantity() * 8);
4600     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
4601     llvm::LoadInst *Load =
4602       Builder.CreateAlignedLoad(Ptr, LoadSize);
4603     Load->setVolatile(true);
4604     return Load;
4605   }
4606   case ARM::BI__iso_volatile_store8:
4607   case ARM::BI__iso_volatile_store16:
4608   case ARM::BI__iso_volatile_store32:
4609   case ARM::BI__iso_volatile_store64: {
4610     Value *Ptr = EmitScalarExpr(E->getArg(0));
4611     Value *Value = EmitScalarExpr(E->getArg(1));
4612     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4613     CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
4614     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
4615                                              StoreSize.getQuantity() * 8);
4616     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
4617     llvm::StoreInst *Store =
4618       Builder.CreateAlignedStore(Value, Ptr,
4619                                  StoreSize);
4620     Store->setVolatile(true);
4621     return Store;
4622   }
4623   }
4624
4625   if (BuiltinID == ARM::BI__builtin_arm_clrex) {
4626     Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
4627     return Builder.CreateCall(F);
4628   }
4629
4630   // CRC32
4631   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
4632   switch (BuiltinID) {
4633   case ARM::BI__builtin_arm_crc32b:
4634     CRCIntrinsicID = Intrinsic::arm_crc32b; break;
4635   case ARM::BI__builtin_arm_crc32cb:
4636     CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
4637   case ARM::BI__builtin_arm_crc32h:
4638     CRCIntrinsicID = Intrinsic::arm_crc32h; break;
4639   case ARM::BI__builtin_arm_crc32ch:
4640     CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
4641   case ARM::BI__builtin_arm_crc32w:
4642   case ARM::BI__builtin_arm_crc32d:
4643     CRCIntrinsicID = Intrinsic::arm_crc32w; break;
4644   case ARM::BI__builtin_arm_crc32cw:
4645   case ARM::BI__builtin_arm_crc32cd:
4646     CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
4647   }
4648
4649   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
4650     Value *Arg0 = EmitScalarExpr(E->getArg(0));
4651     Value *Arg1 = EmitScalarExpr(E->getArg(1));
4652
4653     // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w
4654     // intrinsics, hence we need different codegen for these cases.
4655     if (BuiltinID == ARM::BI__builtin_arm_crc32d ||
4656         BuiltinID == ARM::BI__builtin_arm_crc32cd) {
4657       Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4658       Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
4659       Value *Arg1b = Builder.CreateLShr(Arg1, C1);
4660       Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
4661
4662       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4663       Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
4664       return Builder.CreateCall(F, {Res, Arg1b});
4665     } else {
4666       Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
4667
4668       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4669       return Builder.CreateCall(F, {Arg0, Arg1});
4670     }
4671   }
4672
4673   if (BuiltinID == ARM::BI__builtin_arm_rsr ||
4674       BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4675       BuiltinID == ARM::BI__builtin_arm_rsrp ||
4676       BuiltinID == ARM::BI__builtin_arm_wsr ||
4677       BuiltinID == ARM::BI__builtin_arm_wsr64 ||
4678       BuiltinID == ARM::BI__builtin_arm_wsrp) {
4679
4680     bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr ||
4681                   BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4682                   BuiltinID == ARM::BI__builtin_arm_rsrp;
4683
4684     bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
4685                             BuiltinID == ARM::BI__builtin_arm_wsrp;
4686
4687     bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4688                    BuiltinID == ARM::BI__builtin_arm_wsr64;
4689
4690     llvm::Type *ValueType;
4691     llvm::Type *RegisterType;
4692     if (IsPointerBuiltin) {
4693       ValueType = VoidPtrTy;
4694       RegisterType = Int32Ty;
4695     } else if (Is64Bit) {
4696       ValueType = RegisterType = Int64Ty;
4697     } else {
4698       ValueType = RegisterType = Int32Ty;
4699     }
4700
4701     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
4702   }
4703
4704   // Find out if any arguments are required to be integer constant
4705   // expressions.
4706   unsigned ICEArguments = 0;
4707   ASTContext::GetBuiltinTypeError Error;
4708   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
4709   assert(Error == ASTContext::GE_None && "Should not codegen an error");
4710
4711   auto getAlignmentValue32 = [&](Address addr) -> Value* {
4712     return Builder.getInt32(addr.getAlignment().getQuantity());
4713   };
4714
4715   Address PtrOp0 = Address::invalid();
4716   Address PtrOp1 = Address::invalid();
4717   SmallVector<Value*, 4> Ops;
4718   bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
4719   unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
4720   for (unsigned i = 0, e = NumArgs; i != e; i++) {
4721     if (i == 0) {
4722       switch (BuiltinID) {
4723       case NEON::BI__builtin_neon_vld1_v:
4724       case NEON::BI__builtin_neon_vld1q_v:
4725       case NEON::BI__builtin_neon_vld1q_lane_v:
4726       case NEON::BI__builtin_neon_vld1_lane_v:
4727       case NEON::BI__builtin_neon_vld1_dup_v:
4728       case NEON::BI__builtin_neon_vld1q_dup_v:
4729       case NEON::BI__builtin_neon_vst1_v:
4730       case NEON::BI__builtin_neon_vst1q_v:
4731       case NEON::BI__builtin_neon_vst1q_lane_v:
4732       case NEON::BI__builtin_neon_vst1_lane_v:
4733       case NEON::BI__builtin_neon_vst2_v:
4734       case NEON::BI__builtin_neon_vst2q_v:
4735       case NEON::BI__builtin_neon_vst2_lane_v:
4736       case NEON::BI__builtin_neon_vst2q_lane_v:
4737       case NEON::BI__builtin_neon_vst3_v:
4738       case NEON::BI__builtin_neon_vst3q_v:
4739       case NEON::BI__builtin_neon_vst3_lane_v:
4740       case NEON::BI__builtin_neon_vst3q_lane_v:
4741       case NEON::BI__builtin_neon_vst4_v:
4742       case NEON::BI__builtin_neon_vst4q_v:
4743       case NEON::BI__builtin_neon_vst4_lane_v:
4744       case NEON::BI__builtin_neon_vst4q_lane_v:
4745         // Get the alignment for the argument in addition to the value;
4746         // we'll use it later.
4747         PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
4748         Ops.push_back(PtrOp0.getPointer());
4749         continue;
4750       }
4751     }
4752     if (i == 1) {
4753       switch (BuiltinID) {
4754       case NEON::BI__builtin_neon_vld2_v:
4755       case NEON::BI__builtin_neon_vld2q_v:
4756       case NEON::BI__builtin_neon_vld3_v:
4757       case NEON::BI__builtin_neon_vld3q_v:
4758       case NEON::BI__builtin_neon_vld4_v:
4759       case NEON::BI__builtin_neon_vld4q_v:
4760       case NEON::BI__builtin_neon_vld2_lane_v:
4761       case NEON::BI__builtin_neon_vld2q_lane_v:
4762       case NEON::BI__builtin_neon_vld3_lane_v:
4763       case NEON::BI__builtin_neon_vld3q_lane_v:
4764       case NEON::BI__builtin_neon_vld4_lane_v:
4765       case NEON::BI__builtin_neon_vld4q_lane_v:
4766       case NEON::BI__builtin_neon_vld2_dup_v:
4767       case NEON::BI__builtin_neon_vld3_dup_v:
4768       case NEON::BI__builtin_neon_vld4_dup_v:
4769         // Get the alignment for the argument in addition to the value;
4770         // we'll use it later.
4771         PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
4772         Ops.push_back(PtrOp1.getPointer());
4773         continue;
4774       }
4775     }
4776
4777     if ((ICEArguments & (1 << i)) == 0) {
4778       Ops.push_back(EmitScalarExpr(E->getArg(i)));
4779     } else {
4780       // If this is required to be a constant, constant fold it so that we know
4781       // that the generated intrinsic gets a ConstantInt.
4782       llvm::APSInt Result;
4783       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
4784       assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
4785       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
4786     }
4787   }
4788
4789   switch (BuiltinID) {
4790   default: break;
4791
4792   case NEON::BI__builtin_neon_vget_lane_i8:
4793   case NEON::BI__builtin_neon_vget_lane_i16:
4794   case NEON::BI__builtin_neon_vget_lane_i32:
4795   case NEON::BI__builtin_neon_vget_lane_i64:
4796   case NEON::BI__builtin_neon_vget_lane_f32:
4797   case NEON::BI__builtin_neon_vgetq_lane_i8:
4798   case NEON::BI__builtin_neon_vgetq_lane_i16:
4799   case NEON::BI__builtin_neon_vgetq_lane_i32:
4800   case NEON::BI__builtin_neon_vgetq_lane_i64:
4801   case NEON::BI__builtin_neon_vgetq_lane_f32:
4802     return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
4803
4804   case NEON::BI__builtin_neon_vset_lane_i8:
4805   case NEON::BI__builtin_neon_vset_lane_i16:
4806   case NEON::BI__builtin_neon_vset_lane_i32:
4807   case NEON::BI__builtin_neon_vset_lane_i64:
4808   case NEON::BI__builtin_neon_vset_lane_f32:
4809   case NEON::BI__builtin_neon_vsetq_lane_i8:
4810   case NEON::BI__builtin_neon_vsetq_lane_i16:
4811   case NEON::BI__builtin_neon_vsetq_lane_i32:
4812   case NEON::BI__builtin_neon_vsetq_lane_i64:
4813   case NEON::BI__builtin_neon_vsetq_lane_f32:
4814     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
4815
4816   case NEON::BI__builtin_neon_vsha1h_u32:
4817     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
4818                         "vsha1h");
4819   case NEON::BI__builtin_neon_vsha1cq_u32:
4820     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
4821                         "vsha1h");
4822   case NEON::BI__builtin_neon_vsha1pq_u32:
4823     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
4824                         "vsha1h");
4825   case NEON::BI__builtin_neon_vsha1mq_u32:
4826     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
4827                         "vsha1h");
4828
4829   // The ARM _MoveToCoprocessor builtins put the input register value as
4830   // the first argument, but the LLVM intrinsic expects it as the third one.
4831   case ARM::BI_MoveToCoprocessor:
4832   case ARM::BI_MoveToCoprocessor2: {
4833     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
4834                                    Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
4835     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
4836                                   Ops[3], Ops[4], Ops[5]});
4837   }
4838   case ARM::BI_BitScanForward:
4839   case ARM::BI_BitScanForward64:
4840     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
4841   case ARM::BI_BitScanReverse:
4842   case ARM::BI_BitScanReverse64:
4843     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
4844
4845   case ARM::BI_InterlockedAnd64:
4846     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
4847   case ARM::BI_InterlockedExchange64:
4848     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
4849   case ARM::BI_InterlockedExchangeAdd64:
4850     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
4851   case ARM::BI_InterlockedExchangeSub64:
4852     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
4853   case ARM::BI_InterlockedOr64:
4854     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
4855   case ARM::BI_InterlockedXor64:
4856     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
4857   case ARM::BI_InterlockedDecrement64:
4858     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
4859   case ARM::BI_InterlockedIncrement64:
4860     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
4861   }
4862
4863   // Get the last argument, which specifies the vector type.
4864   assert(HasExtraArg);
4865   llvm::APSInt Result;
4866   const Expr *Arg = E->getArg(E->getNumArgs()-1);
4867   if (!Arg->isIntegerConstantExpr(Result, getContext()))
4868     return nullptr;
4869
4870   if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
4871       BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
4872     // Determine the overloaded type of this builtin.
4873     llvm::Type *Ty;
4874     if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
4875       Ty = FloatTy;
4876     else
4877       Ty = DoubleTy;
4878
4879     // Determine whether this is an unsigned conversion or not.
4880     bool usgn = Result.getZExtValue() == 1;
4881     unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
4882
4883     // Call the appropriate intrinsic.
4884     Function *F = CGM.getIntrinsic(Int, Ty);
4885     return Builder.CreateCall(F, Ops, "vcvtr");
4886   }
4887
4888   // Determine the type of this overloaded NEON intrinsic.
4889   NeonTypeFlags Type(Result.getZExtValue());
4890   bool usgn = Type.isUnsigned();
4891   bool rightShift = false;
4892
4893   llvm::VectorType *VTy = GetNeonType(this, Type);
4894   llvm::Type *Ty = VTy;
4895   if (!Ty)
4896     return nullptr;
4897
4898   // Many NEON builtins have identical semantics and uses in ARM and
4899   // AArch64. Emit these in a single function.
4900   auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap);
4901   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
4902       IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
4903   if (Builtin)
4904     return EmitCommonNeonBuiltinExpr(
4905         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
4906         Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1);
4907
4908   unsigned Int;
4909   switch (BuiltinID) {
4910   default: return nullptr;
4911   case NEON::BI__builtin_neon_vld1q_lane_v:
4912     // Handle 64-bit integer elements as a special case.  Use shuffles of
4913     // one-element vectors to avoid poor code for i64 in the backend.
4914     if (VTy->getElementType()->isIntegerTy(64)) {
4915       // Extract the other lane.
4916       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4917       uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
4918       Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
4919       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
4920       // Load the value as a one-element vector.
4921       Ty = llvm::VectorType::get(VTy->getElementType(), 1);
4922       llvm::Type *Tys[] = {Ty, Int8PtrTy};
4923       Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
4924       Value *Align = getAlignmentValue32(PtrOp0);
4925       Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
4926       // Combine them.
4927       uint32_t Indices[] = {1 - Lane, Lane};
4928       SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
4929       return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
4930     }
4931     // fall through
4932   case NEON::BI__builtin_neon_vld1_lane_v: {
4933     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4934     PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
4935     Value *Ld = Builder.CreateLoad(PtrOp0);
4936     return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
4937   }
4938   case NEON::BI__builtin_neon_vld2_dup_v:
4939   case NEON::BI__builtin_neon_vld3_dup_v:
4940   case NEON::BI__builtin_neon_vld4_dup_v: {
4941     // Handle 64-bit elements as a special-case.  There is no "dup" needed.
4942     if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
4943       switch (BuiltinID) {
4944       case NEON::BI__builtin_neon_vld2_dup_v:
4945         Int = Intrinsic::arm_neon_vld2;
4946         break;
4947       case NEON::BI__builtin_neon_vld3_dup_v:
4948         Int = Intrinsic::arm_neon_vld3;
4949         break;
4950       case NEON::BI__builtin_neon_vld4_dup_v:
4951         Int = Intrinsic::arm_neon_vld4;
4952         break;
4953       default: llvm_unreachable("unknown vld_dup intrinsic?");
4954       }
4955       llvm::Type *Tys[] = {Ty, Int8PtrTy};
4956       Function *F = CGM.getIntrinsic(Int, Tys);
4957       llvm::Value *Align = getAlignmentValue32(PtrOp1);
4958       Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup");
4959       Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4960       Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4961       return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4962     }
4963     switch (BuiltinID) {
4964     case NEON::BI__builtin_neon_vld2_dup_v:
4965       Int = Intrinsic::arm_neon_vld2lane;
4966       break;
4967     case NEON::BI__builtin_neon_vld3_dup_v:
4968       Int = Intrinsic::arm_neon_vld3lane;
4969       break;
4970     case NEON::BI__builtin_neon_vld4_dup_v:
4971       Int = Intrinsic::arm_neon_vld4lane;
4972       break;
4973     default: llvm_unreachable("unknown vld_dup intrinsic?");
4974     }
4975     llvm::Type *Tys[] = {Ty, Int8PtrTy};
4976     Function *F = CGM.getIntrinsic(Int, Tys);
4977     llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
4978
4979     SmallVector<Value*, 6> Args;
4980     Args.push_back(Ops[1]);
4981     Args.append(STy->getNumElements(), UndefValue::get(Ty));
4982
4983     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
4984     Args.push_back(CI);
4985     Args.push_back(getAlignmentValue32(PtrOp1));
4986
4987     Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
4988     // splat lane 0 to all elts in each vector of the result.
4989     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
4990       Value *Val = Builder.CreateExtractValue(Ops[1], i);
4991       Value *Elt = Builder.CreateBitCast(Val, Ty);
4992       Elt = EmitNeonSplat(Elt, CI);
4993       Elt = Builder.CreateBitCast(Elt, Val->getType());
4994       Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
4995     }
4996     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4997     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4998     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4999   }
5000   case NEON::BI__builtin_neon_vqrshrn_n_v:
5001     Int =
5002       usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
5003     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
5004                         1, true);
5005   case NEON::BI__builtin_neon_vqrshrun_n_v:
5006     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
5007                         Ops, "vqrshrun_n", 1, true);
5008   case NEON::BI__builtin_neon_vqshrn_n_v:
5009     Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
5010     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
5011                         1, true);
5012   case NEON::BI__builtin_neon_vqshrun_n_v:
5013     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
5014                         Ops, "vqshrun_n", 1, true);
5015   case NEON::BI__builtin_neon_vrecpe_v:
5016   case NEON::BI__builtin_neon_vrecpeq_v:
5017     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
5018                         Ops, "vrecpe");
5019   case NEON::BI__builtin_neon_vrshrn_n_v:
5020     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
5021                         Ops, "vrshrn_n", 1, true);
5022   case NEON::BI__builtin_neon_vrsra_n_v:
5023   case NEON::BI__builtin_neon_vrsraq_n_v:
5024     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5025     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5026     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
5027     Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
5028     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
5029     return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
5030   case NEON::BI__builtin_neon_vsri_n_v:
5031   case NEON::BI__builtin_neon_vsriq_n_v:
5032     rightShift = true;
5033   case NEON::BI__builtin_neon_vsli_n_v:
5034   case NEON::BI__builtin_neon_vsliq_n_v:
5035     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
5036     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
5037                         Ops, "vsli_n");
5038   case NEON::BI__builtin_neon_vsra_n_v:
5039   case NEON::BI__builtin_neon_vsraq_n_v:
5040     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5041     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
5042     return Builder.CreateAdd(Ops[0], Ops[1]);
5043   case NEON::BI__builtin_neon_vst1q_lane_v:
5044     // Handle 64-bit integer elements as a special case.  Use a shuffle to get
5045     // a one-element vector and avoid poor code for i64 in the backend.
5046     if (VTy->getElementType()->isIntegerTy(64)) {
5047       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5048       Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
5049       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
5050       Ops[2] = getAlignmentValue32(PtrOp0);
5051       llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
5052       return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
5053                                                  Tys), Ops);
5054     }
5055     // fall through
5056   case NEON::BI__builtin_neon_vst1_lane_v: {
5057     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5058     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
5059     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5060     auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty));
5061     return St;
5062   }
5063   case NEON::BI__builtin_neon_vtbl1_v:
5064     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
5065                         Ops, "vtbl1");
5066   case NEON::BI__builtin_neon_vtbl2_v:
5067     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
5068                         Ops, "vtbl2");
5069   case NEON::BI__builtin_neon_vtbl3_v:
5070     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
5071                         Ops, "vtbl3");
5072   case NEON::BI__builtin_neon_vtbl4_v:
5073     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
5074                         Ops, "vtbl4");
5075   case NEON::BI__builtin_neon_vtbx1_v:
5076     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
5077                         Ops, "vtbx1");
5078   case NEON::BI__builtin_neon_vtbx2_v:
5079     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
5080                         Ops, "vtbx2");
5081   case NEON::BI__builtin_neon_vtbx3_v:
5082     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
5083                         Ops, "vtbx3");
5084   case NEON::BI__builtin_neon_vtbx4_v:
5085     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
5086                         Ops, "vtbx4");
5087   }
5088 }
5089
5090 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
5091                                       const CallExpr *E,
5092                                       SmallVectorImpl<Value *> &Ops) {
5093   unsigned int Int = 0;
5094   const char *s = nullptr;
5095
5096   switch (BuiltinID) {
5097   default:
5098     return nullptr;
5099   case NEON::BI__builtin_neon_vtbl1_v:
5100   case NEON::BI__builtin_neon_vqtbl1_v:
5101   case NEON::BI__builtin_neon_vqtbl1q_v:
5102   case NEON::BI__builtin_neon_vtbl2_v:
5103   case NEON::BI__builtin_neon_vqtbl2_v:
5104   case NEON::BI__builtin_neon_vqtbl2q_v:
5105   case NEON::BI__builtin_neon_vtbl3_v:
5106   case NEON::BI__builtin_neon_vqtbl3_v:
5107   case NEON::BI__builtin_neon_vqtbl3q_v:
5108   case NEON::BI__builtin_neon_vtbl4_v:
5109   case NEON::BI__builtin_neon_vqtbl4_v:
5110   case NEON::BI__builtin_neon_vqtbl4q_v:
5111     break;
5112   case NEON::BI__builtin_neon_vtbx1_v:
5113   case NEON::BI__builtin_neon_vqtbx1_v:
5114   case NEON::BI__builtin_neon_vqtbx1q_v:
5115   case NEON::BI__builtin_neon_vtbx2_v:
5116   case NEON::BI__builtin_neon_vqtbx2_v:
5117   case NEON::BI__builtin_neon_vqtbx2q_v:
5118   case NEON::BI__builtin_neon_vtbx3_v:
5119   case NEON::BI__builtin_neon_vqtbx3_v:
5120   case NEON::BI__builtin_neon_vqtbx3q_v:
5121   case NEON::BI__builtin_neon_vtbx4_v:
5122   case NEON::BI__builtin_neon_vqtbx4_v:
5123   case NEON::BI__builtin_neon_vqtbx4q_v:
5124     break;
5125   }
5126
5127   assert(E->getNumArgs() >= 3);
5128
5129   // Get the last argument, which specifies the vector type.
5130   llvm::APSInt Result;
5131   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
5132   if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
5133     return nullptr;
5134
5135   // Determine the type of this overloaded NEON intrinsic.
5136   NeonTypeFlags Type(Result.getZExtValue());
5137   llvm::VectorType *Ty = GetNeonType(&CGF, Type);
5138   if (!Ty)
5139     return nullptr;
5140
5141   CodeGen::CGBuilderTy &Builder = CGF.Builder;
5142
5143   // AArch64 scalar builtins are not overloaded, they do not have an extra
5144   // argument that specifies the vector type, need to handle each case.
5145   switch (BuiltinID) {
5146   case NEON::BI__builtin_neon_vtbl1_v: {
5147     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr,
5148                               Ops[1], Ty, Intrinsic::aarch64_neon_tbl1,
5149                               "vtbl1");
5150   }
5151   case NEON::BI__builtin_neon_vtbl2_v: {
5152     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr,
5153                               Ops[2], Ty, Intrinsic::aarch64_neon_tbl1,
5154                               "vtbl1");
5155   }
5156   case NEON::BI__builtin_neon_vtbl3_v: {
5157     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr,
5158                               Ops[3], Ty, Intrinsic::aarch64_neon_tbl2,
5159                               "vtbl2");
5160   }
5161   case NEON::BI__builtin_neon_vtbl4_v: {
5162     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr,
5163                               Ops[4], Ty, Intrinsic::aarch64_neon_tbl2,
5164                               "vtbl2");
5165   }
5166   case NEON::BI__builtin_neon_vtbx1_v: {
5167     Value *TblRes =
5168         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2],
5169                            Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
5170
5171     llvm::Constant *EightV = ConstantInt::get(Ty, 8);
5172     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
5173     CmpRes = Builder.CreateSExt(CmpRes, Ty);
5174
5175     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
5176     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
5177     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
5178   }
5179   case NEON::BI__builtin_neon_vtbx2_v: {
5180     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0],
5181                               Ops[3], Ty, Intrinsic::aarch64_neon_tbx1,
5182                               "vtbx1");
5183   }
5184   case NEON::BI__builtin_neon_vtbx3_v: {
5185     Value *TblRes =
5186         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4],
5187                            Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
5188
5189     llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
5190     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
5191                                            TwentyFourV);
5192     CmpRes = Builder.CreateSExt(CmpRes, Ty);
5193
5194     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
5195     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
5196     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
5197   }
5198   case NEON::BI__builtin_neon_vtbx4_v: {
5199     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0],
5200                               Ops[5], Ty, Intrinsic::aarch64_neon_tbx2,
5201                               "vtbx2");
5202   }
5203   case NEON::BI__builtin_neon_vqtbl1_v:
5204   case NEON::BI__builtin_neon_vqtbl1q_v:
5205     Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
5206   case NEON::BI__builtin_neon_vqtbl2_v:
5207   case NEON::BI__builtin_neon_vqtbl2q_v: {
5208     Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
5209   case NEON::BI__builtin_neon_vqtbl3_v:
5210   case NEON::BI__builtin_neon_vqtbl3q_v:
5211     Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
5212   case NEON::BI__builtin_neon_vqtbl4_v:
5213   case NEON::BI__builtin_neon_vqtbl4q_v:
5214     Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
5215   case NEON::BI__builtin_neon_vqtbx1_v:
5216   case NEON::BI__builtin_neon_vqtbx1q_v:
5217     Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
5218   case NEON::BI__builtin_neon_vqtbx2_v:
5219   case NEON::BI__builtin_neon_vqtbx2q_v:
5220     Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
5221   case NEON::BI__builtin_neon_vqtbx3_v:
5222   case NEON::BI__builtin_neon_vqtbx3q_v:
5223     Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
5224   case NEON::BI__builtin_neon_vqtbx4_v:
5225   case NEON::BI__builtin_neon_vqtbx4q_v:
5226     Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
5227   }
5228   }
5229
5230   if (!Int)
5231     return nullptr;
5232
5233   Function *F = CGF.CGM.getIntrinsic(Int, Ty);
5234   return CGF.EmitNeonCall(F, Ops, s);
5235 }
5236
5237 Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
5238   llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4);
5239   Op = Builder.CreateBitCast(Op, Int16Ty);
5240   Value *V = UndefValue::get(VTy);
5241   llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
5242   Op = Builder.CreateInsertElement(V, Op, CI);
5243   return Op;
5244 }
5245
5246 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
5247                                                const CallExpr *E) {
5248   unsigned HintID = static_cast<unsigned>(-1);
5249   switch (BuiltinID) {
5250   default: break;
5251   case AArch64::BI__builtin_arm_nop:
5252     HintID = 0;
5253     break;
5254   case AArch64::BI__builtin_arm_yield:
5255     HintID = 1;
5256     break;
5257   case AArch64::BI__builtin_arm_wfe:
5258     HintID = 2;
5259     break;
5260   case AArch64::BI__builtin_arm_wfi:
5261     HintID = 3;
5262     break;
5263   case AArch64::BI__builtin_arm_sev:
5264     HintID = 4;
5265     break;
5266   case AArch64::BI__builtin_arm_sevl:
5267     HintID = 5;
5268     break;
5269   }
5270
5271   if (HintID != static_cast<unsigned>(-1)) {
5272     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
5273     return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
5274   }
5275
5276   if (BuiltinID == AArch64::BI__builtin_arm_prefetch) {
5277     Value *Address         = EmitScalarExpr(E->getArg(0));
5278     Value *RW              = EmitScalarExpr(E->getArg(1));
5279     Value *CacheLevel      = EmitScalarExpr(E->getArg(2));
5280     Value *RetentionPolicy = EmitScalarExpr(E->getArg(3));
5281     Value *IsData          = EmitScalarExpr(E->getArg(4));
5282
5283     Value *Locality = nullptr;
5284     if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) {
5285       // Temporal fetch, needs to convert cache level to locality.
5286       Locality = llvm::ConstantInt::get(Int32Ty,
5287         -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3);
5288     } else {
5289       // Streaming fetch.
5290       Locality = llvm::ConstantInt::get(Int32Ty, 0);
5291     }
5292
5293     // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify
5294     // PLDL3STRM or PLDL2STRM.
5295     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
5296     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
5297   }
5298
5299   if (BuiltinID == AArch64::BI__builtin_arm_rbit) {
5300     assert((getContext().getTypeSize(E->getType()) == 32) &&
5301            "rbit of unusual size!");
5302     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5303     return Builder.CreateCall(
5304         CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5305   }
5306   if (BuiltinID == AArch64::BI__builtin_arm_rbit64) {
5307     assert((getContext().getTypeSize(E->getType()) == 64) &&
5308            "rbit of unusual size!");
5309     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5310     return Builder.CreateCall(
5311         CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5312   }
5313
5314   if (BuiltinID == AArch64::BI__clear_cache) {
5315     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
5316     const FunctionDecl *FD = E->getDirectCallee();
5317     Value *Ops[2];
5318     for (unsigned i = 0; i < 2; i++)
5319       Ops[i] = EmitScalarExpr(E->getArg(i));
5320     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
5321     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
5322     StringRef Name = FD->getName();
5323     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
5324   }
5325
5326   if ((BuiltinID == AArch64::BI__builtin_arm_ldrex ||
5327       BuiltinID == AArch64::BI__builtin_arm_ldaex) &&
5328       getContext().getTypeSize(E->getType()) == 128) {
5329     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
5330                                        ? Intrinsic::aarch64_ldaxp
5331                                        : Intrinsic::aarch64_ldxp);
5332
5333     Value *LdPtr = EmitScalarExpr(E->getArg(0));
5334     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
5335                                     "ldxp");
5336
5337     Value *Val0 = Builder.CreateExtractValue(Val, 1);
5338     Value *Val1 = Builder.CreateExtractValue(Val, 0);
5339     llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
5340     Val0 = Builder.CreateZExt(Val0, Int128Ty);
5341     Val1 = Builder.CreateZExt(Val1, Int128Ty);
5342
5343     Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
5344     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
5345     Val = Builder.CreateOr(Val, Val1);
5346     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
5347   } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex ||
5348              BuiltinID == AArch64::BI__builtin_arm_ldaex) {
5349     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
5350
5351     QualType Ty = E->getType();
5352     llvm::Type *RealResTy = ConvertType(Ty);
5353     llvm::Type *PtrTy = llvm::IntegerType::get(
5354         getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
5355     LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
5356
5357     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
5358                                        ? Intrinsic::aarch64_ldaxr
5359                                        : Intrinsic::aarch64_ldxr,
5360                                    PtrTy);
5361     Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
5362
5363     if (RealResTy->isPointerTy())
5364       return Builder.CreateIntToPtr(Val, RealResTy);
5365
5366     llvm::Type *IntResTy = llvm::IntegerType::get(
5367         getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
5368     Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
5369     return Builder.CreateBitCast(Val, RealResTy);
5370   }
5371
5372   if ((BuiltinID == AArch64::BI__builtin_arm_strex ||
5373        BuiltinID == AArch64::BI__builtin_arm_stlex) &&
5374       getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
5375     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
5376                                        ? Intrinsic::aarch64_stlxp
5377                                        : Intrinsic::aarch64_stxp);
5378     llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty, nullptr);
5379
5380     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
5381     EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
5382
5383     Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy));
5384     llvm::Value *Val = Builder.CreateLoad(Tmp);
5385
5386     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
5387     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
5388     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
5389                                          Int8PtrTy);
5390     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
5391   }
5392
5393   if (BuiltinID == AArch64::BI__builtin_arm_strex ||
5394       BuiltinID == AArch64::BI__builtin_arm_stlex) {
5395     Value *StoreVal = EmitScalarExpr(E->getArg(0));
5396     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
5397
5398     QualType Ty = E->getArg(0)->getType();
5399     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
5400                                                  getContext().getTypeSize(Ty));
5401     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
5402
5403     if (StoreVal->getType()->isPointerTy())
5404       StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
5405     else {
5406       llvm::Type *IntTy = llvm::IntegerType::get(
5407           getLLVMContext(),
5408           CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
5409       StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
5410       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
5411     }
5412
5413     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
5414                                        ? Intrinsic::aarch64_stlxr
5415                                        : Intrinsic::aarch64_stxr,
5416                                    StoreAddr->getType());
5417     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
5418   }
5419
5420   if (BuiltinID == AArch64::BI__builtin_arm_clrex) {
5421     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
5422     return Builder.CreateCall(F);
5423   }
5424
5425   // CRC32
5426   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
5427   switch (BuiltinID) {
5428   case AArch64::BI__builtin_arm_crc32b:
5429     CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
5430   case AArch64::BI__builtin_arm_crc32cb:
5431     CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
5432   case AArch64::BI__builtin_arm_crc32h:
5433     CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
5434   case AArch64::BI__builtin_arm_crc32ch:
5435     CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
5436   case AArch64::BI__builtin_arm_crc32w:
5437     CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
5438   case AArch64::BI__builtin_arm_crc32cw:
5439     CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
5440   case AArch64::BI__builtin_arm_crc32d:
5441     CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
5442   case AArch64::BI__builtin_arm_crc32cd:
5443     CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
5444   }
5445
5446   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
5447     Value *Arg0 = EmitScalarExpr(E->getArg(0));
5448     Value *Arg1 = EmitScalarExpr(E->getArg(1));
5449     Function *F = CGM.getIntrinsic(CRCIntrinsicID);
5450
5451     llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
5452     Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
5453
5454     return Builder.CreateCall(F, {Arg0, Arg1});
5455   }
5456
5457   if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
5458       BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
5459       BuiltinID == AArch64::BI__builtin_arm_rsrp ||
5460       BuiltinID == AArch64::BI__builtin_arm_wsr ||
5461       BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
5462       BuiltinID == AArch64::BI__builtin_arm_wsrp) {
5463
5464     bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr ||
5465                   BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
5466                   BuiltinID == AArch64::BI__builtin_arm_rsrp;
5467
5468     bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp ||
5469                             BuiltinID == AArch64::BI__builtin_arm_wsrp;
5470
5471     bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr &&
5472                    BuiltinID != AArch64::BI__builtin_arm_wsr;
5473
5474     llvm::Type *ValueType;
5475     llvm::Type *RegisterType = Int64Ty;
5476     if (IsPointerBuiltin) {
5477       ValueType = VoidPtrTy;
5478     } else if (Is64Bit) {
5479       ValueType = Int64Ty;
5480     } else {
5481       ValueType = Int32Ty;
5482     }
5483
5484     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
5485   }
5486
5487   // Find out if any arguments are required to be integer constant
5488   // expressions.
5489   unsigned ICEArguments = 0;
5490   ASTContext::GetBuiltinTypeError Error;
5491   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5492   assert(Error == ASTContext::GE_None && "Should not codegen an error");
5493
5494   llvm::SmallVector<Value*, 4> Ops;
5495   for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
5496     if ((ICEArguments & (1 << i)) == 0) {
5497       Ops.push_back(EmitScalarExpr(E->getArg(i)));
5498     } else {
5499       // If this is required to be a constant, constant fold it so that we know
5500       // that the generated intrinsic gets a ConstantInt.
5501       llvm::APSInt Result;
5502       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
5503       assert(IsConst && "Constant arg isn't actually constant?");
5504       (void)IsConst;
5505       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
5506     }
5507   }
5508
5509   auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap);
5510   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
5511       SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
5512
5513   if (Builtin) {
5514     Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
5515     Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
5516     assert(Result && "SISD intrinsic should have been handled");
5517     return Result;
5518   }
5519
5520   llvm::APSInt Result;
5521   const Expr *Arg = E->getArg(E->getNumArgs()-1);
5522   NeonTypeFlags Type(0);
5523   if (Arg->isIntegerConstantExpr(Result, getContext()))
5524     // Determine the type of this overloaded NEON intrinsic.
5525     Type = NeonTypeFlags(Result.getZExtValue());
5526
5527   bool usgn = Type.isUnsigned();
5528   bool quad = Type.isQuad();
5529
5530   // Handle non-overloaded intrinsics first.
5531   switch (BuiltinID) {
5532   default: break;
5533   case NEON::BI__builtin_neon_vldrq_p128: {
5534     llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
5535     llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0);
5536     Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
5537     return Builder.CreateAlignedLoad(Int128Ty, Ptr,
5538                                      CharUnits::fromQuantity(16));
5539   }
5540   case NEON::BI__builtin_neon_vstrq_p128: {
5541     llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
5542     Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
5543     return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
5544   }
5545   case NEON::BI__builtin_neon_vcvts_u32_f32:
5546   case NEON::BI__builtin_neon_vcvtd_u64_f64:
5547     usgn = true;
5548     // FALL THROUGH
5549   case NEON::BI__builtin_neon_vcvts_s32_f32:
5550   case NEON::BI__builtin_neon_vcvtd_s64_f64: {
5551     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5552     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5553     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5554     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5555     Ops[0] = Builder.CreateBitCast(Ops[0], FTy);
5556     if (usgn)
5557       return Builder.CreateFPToUI(Ops[0], InTy);
5558     return Builder.CreateFPToSI(Ops[0], InTy);
5559   }
5560   case NEON::BI__builtin_neon_vcvts_f32_u32:
5561   case NEON::BI__builtin_neon_vcvtd_f64_u64:
5562     usgn = true;
5563     // FALL THROUGH
5564   case NEON::BI__builtin_neon_vcvts_f32_s32:
5565   case NEON::BI__builtin_neon_vcvtd_f64_s64: {
5566     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5567     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5568     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5569     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5570     Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
5571     if (usgn)
5572       return Builder.CreateUIToFP(Ops[0], FTy);
5573     return Builder.CreateSIToFP(Ops[0], FTy);
5574   }
5575   case NEON::BI__builtin_neon_vpaddd_s64: {
5576     llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
5577     Value *Vec = EmitScalarExpr(E->getArg(0));
5578     // The vector is v2f64, so make sure it's bitcast to that.
5579     Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
5580     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5581     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5582     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5583     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5584     // Pairwise addition of a v2f64 into a scalar f64.
5585     return Builder.CreateAdd(Op0, Op1, "vpaddd");
5586   }
5587   case NEON::BI__builtin_neon_vpaddd_f64: {
5588     llvm::Type *Ty =
5589       llvm::VectorType::get(DoubleTy, 2);
5590     Value *Vec = EmitScalarExpr(E->getArg(0));
5591     // The vector is v2f64, so make sure it's bitcast to that.
5592     Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
5593     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5594     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5595     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5596     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5597     // Pairwise addition of a v2f64 into a scalar f64.
5598     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5599   }
5600   case NEON::BI__builtin_neon_vpadds_f32: {
5601     llvm::Type *Ty =
5602       llvm::VectorType::get(FloatTy, 2);
5603     Value *Vec = EmitScalarExpr(E->getArg(0));
5604     // The vector is v2f32, so make sure it's bitcast to that.
5605     Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
5606     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5607     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5608     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5609     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5610     // Pairwise addition of a v2f32 into a scalar f32.
5611     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5612   }
5613   case NEON::BI__builtin_neon_vceqzd_s64:
5614   case NEON::BI__builtin_neon_vceqzd_f64:
5615   case NEON::BI__builtin_neon_vceqzs_f32:
5616     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5617     return EmitAArch64CompareBuiltinExpr(
5618         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5619         ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
5620   case NEON::BI__builtin_neon_vcgezd_s64:
5621   case NEON::BI__builtin_neon_vcgezd_f64:
5622   case NEON::BI__builtin_neon_vcgezs_f32:
5623     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5624     return EmitAArch64CompareBuiltinExpr(
5625         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5626         ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
5627   case NEON::BI__builtin_neon_vclezd_s64:
5628   case NEON::BI__builtin_neon_vclezd_f64:
5629   case NEON::BI__builtin_neon_vclezs_f32:
5630     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5631     return EmitAArch64CompareBuiltinExpr(
5632         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5633         ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
5634   case NEON::BI__builtin_neon_vcgtzd_s64:
5635   case NEON::BI__builtin_neon_vcgtzd_f64:
5636   case NEON::BI__builtin_neon_vcgtzs_f32:
5637     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5638     return EmitAArch64CompareBuiltinExpr(
5639         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5640         ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
5641   case NEON::BI__builtin_neon_vcltzd_s64:
5642   case NEON::BI__builtin_neon_vcltzd_f64:
5643   case NEON::BI__builtin_neon_vcltzs_f32:
5644     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5645     return EmitAArch64CompareBuiltinExpr(
5646         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5647         ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
5648
5649   case NEON::BI__builtin_neon_vceqzd_u64: {
5650     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5651     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5652     Ops[0] =
5653         Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
5654     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
5655   }
5656   case NEON::BI__builtin_neon_vceqd_f64:
5657   case NEON::BI__builtin_neon_vcled_f64:
5658   case NEON::BI__builtin_neon_vcltd_f64:
5659   case NEON::BI__builtin_neon_vcged_f64:
5660   case NEON::BI__builtin_neon_vcgtd_f64: {
5661     llvm::CmpInst::Predicate P;
5662     switch (BuiltinID) {
5663     default: llvm_unreachable("missing builtin ID in switch!");
5664     case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
5665     case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
5666     case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
5667     case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
5668     case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
5669     }
5670     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5671     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5672     Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
5673     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5674     return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
5675   }
5676   case NEON::BI__builtin_neon_vceqs_f32:
5677   case NEON::BI__builtin_neon_vcles_f32:
5678   case NEON::BI__builtin_neon_vclts_f32:
5679   case NEON::BI__builtin_neon_vcges_f32:
5680   case NEON::BI__builtin_neon_vcgts_f32: {
5681     llvm::CmpInst::Predicate P;
5682     switch (BuiltinID) {
5683     default: llvm_unreachable("missing builtin ID in switch!");
5684     case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
5685     case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
5686     case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
5687     case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
5688     case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
5689     }
5690     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5691     Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
5692     Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
5693     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5694     return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
5695   }
5696   case NEON::BI__builtin_neon_vceqd_s64:
5697   case NEON::BI__builtin_neon_vceqd_u64:
5698   case NEON::BI__builtin_neon_vcgtd_s64:
5699   case NEON::BI__builtin_neon_vcgtd_u64:
5700   case NEON::BI__builtin_neon_vcltd_s64:
5701   case NEON::BI__builtin_neon_vcltd_u64:
5702   case NEON::BI__builtin_neon_vcged_u64:
5703   case NEON::BI__builtin_neon_vcged_s64:
5704   case NEON::BI__builtin_neon_vcled_u64:
5705   case NEON::BI__builtin_neon_vcled_s64: {
5706     llvm::CmpInst::Predicate P;
5707     switch (BuiltinID) {
5708     default: llvm_unreachable("missing builtin ID in switch!");
5709     case NEON::BI__builtin_neon_vceqd_s64:
5710     case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
5711     case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
5712     case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
5713     case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
5714     case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
5715     case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
5716     case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
5717     case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
5718     case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
5719     }
5720     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5721     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5722     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5723     Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
5724     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
5725   }
5726   case NEON::BI__builtin_neon_vtstd_s64:
5727   case NEON::BI__builtin_neon_vtstd_u64: {
5728     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5729     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5730     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5731     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
5732     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
5733                                 llvm::Constant::getNullValue(Int64Ty));
5734     return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
5735   }
5736   case NEON::BI__builtin_neon_vset_lane_i8:
5737   case NEON::BI__builtin_neon_vset_lane_i16:
5738   case NEON::BI__builtin_neon_vset_lane_i32:
5739   case NEON::BI__builtin_neon_vset_lane_i64:
5740   case NEON::BI__builtin_neon_vset_lane_f32:
5741   case NEON::BI__builtin_neon_vsetq_lane_i8:
5742   case NEON::BI__builtin_neon_vsetq_lane_i16:
5743   case NEON::BI__builtin_neon_vsetq_lane_i32:
5744   case NEON::BI__builtin_neon_vsetq_lane_i64:
5745   case NEON::BI__builtin_neon_vsetq_lane_f32:
5746     Ops.push_back(EmitScalarExpr(E->getArg(2)));
5747     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5748   case NEON::BI__builtin_neon_vset_lane_f64:
5749     // The vector type needs a cast for the v1f64 variant.
5750     Ops[1] = Builder.CreateBitCast(Ops[1],
5751                                    llvm::VectorType::get(DoubleTy, 1));
5752     Ops.push_back(EmitScalarExpr(E->getArg(2)));
5753     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5754   case NEON::BI__builtin_neon_vsetq_lane_f64:
5755     // The vector type needs a cast for the v2f64 variant.
5756     Ops[1] = Builder.CreateBitCast(Ops[1],
5757         llvm::VectorType::get(DoubleTy, 2));
5758     Ops.push_back(EmitScalarExpr(E->getArg(2)));
5759     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5760
5761   case NEON::BI__builtin_neon_vget_lane_i8:
5762   case NEON::BI__builtin_neon_vdupb_lane_i8:
5763     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8));
5764     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5765                                         "vget_lane");
5766   case NEON::BI__builtin_neon_vgetq_lane_i8:
5767   case NEON::BI__builtin_neon_vdupb_laneq_i8:
5768     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16));
5769     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5770                                         "vgetq_lane");
5771   case NEON::BI__builtin_neon_vget_lane_i16:
5772   case NEON::BI__builtin_neon_vduph_lane_i16:
5773     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4));
5774     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5775                                         "vget_lane");
5776   case NEON::BI__builtin_neon_vgetq_lane_i16:
5777   case NEON::BI__builtin_neon_vduph_laneq_i16:
5778     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8));
5779     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5780                                         "vgetq_lane");
5781   case NEON::BI__builtin_neon_vget_lane_i32:
5782   case NEON::BI__builtin_neon_vdups_lane_i32:
5783     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2));
5784     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5785                                         "vget_lane");
5786   case NEON::BI__builtin_neon_vdups_lane_f32:
5787     Ops[0] = Builder.CreateBitCast(Ops[0],
5788         llvm::VectorType::get(FloatTy, 2));
5789     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5790                                         "vdups_lane");
5791   case NEON::BI__builtin_neon_vgetq_lane_i32:
5792   case NEON::BI__builtin_neon_vdups_laneq_i32:
5793     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
5794     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5795                                         "vgetq_lane");
5796   case NEON::BI__builtin_neon_vget_lane_i64:
5797   case NEON::BI__builtin_neon_vdupd_lane_i64:
5798     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1));
5799     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5800                                         "vget_lane");
5801   case NEON::BI__builtin_neon_vdupd_lane_f64:
5802     Ops[0] = Builder.CreateBitCast(Ops[0],
5803         llvm::VectorType::get(DoubleTy, 1));
5804     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5805                                         "vdupd_lane");
5806   case NEON::BI__builtin_neon_vgetq_lane_i64:
5807   case NEON::BI__builtin_neon_vdupd_laneq_i64:
5808     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
5809     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5810                                         "vgetq_lane");
5811   case NEON::BI__builtin_neon_vget_lane_f32:
5812     Ops[0] = Builder.CreateBitCast(Ops[0],
5813         llvm::VectorType::get(FloatTy, 2));
5814     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5815                                         "vget_lane");
5816   case NEON::BI__builtin_neon_vget_lane_f64:
5817     Ops[0] = Builder.CreateBitCast(Ops[0],
5818         llvm::VectorType::get(DoubleTy, 1));
5819     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5820                                         "vget_lane");
5821   case NEON::BI__builtin_neon_vgetq_lane_f32:
5822   case NEON::BI__builtin_neon_vdups_laneq_f32:
5823     Ops[0] = Builder.CreateBitCast(Ops[0],
5824         llvm::VectorType::get(FloatTy, 4));
5825     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5826                                         "vgetq_lane");
5827   case NEON::BI__builtin_neon_vgetq_lane_f64:
5828   case NEON::BI__builtin_neon_vdupd_laneq_f64:
5829     Ops[0] = Builder.CreateBitCast(Ops[0],
5830         llvm::VectorType::get(DoubleTy, 2));
5831     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5832                                         "vgetq_lane");
5833   case NEON::BI__builtin_neon_vaddd_s64:
5834   case NEON::BI__builtin_neon_vaddd_u64:
5835     return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
5836   case NEON::BI__builtin_neon_vsubd_s64:
5837   case NEON::BI__builtin_neon_vsubd_u64:
5838     return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
5839   case NEON::BI__builtin_neon_vqdmlalh_s16:
5840   case NEON::BI__builtin_neon_vqdmlslh_s16: {
5841     SmallVector<Value *, 2> ProductOps;
5842     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5843     ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
5844     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
5845     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5846                           ProductOps, "vqdmlXl");
5847     Constant *CI = ConstantInt::get(SizeTy, 0);
5848     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5849
5850     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
5851                                         ? Intrinsic::aarch64_neon_sqadd
5852                                         : Intrinsic::aarch64_neon_sqsub;
5853     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
5854   }
5855   case NEON::BI__builtin_neon_vqshlud_n_s64: {
5856     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5857     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5858     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
5859                         Ops, "vqshlu_n");
5860   }
5861   case NEON::BI__builtin_neon_vqshld_n_u64:
5862   case NEON::BI__builtin_neon_vqshld_n_s64: {
5863     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
5864                                    ? Intrinsic::aarch64_neon_uqshl
5865                                    : Intrinsic::aarch64_neon_sqshl;
5866     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5867     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5868     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
5869   }
5870   case NEON::BI__builtin_neon_vrshrd_n_u64:
5871   case NEON::BI__builtin_neon_vrshrd_n_s64: {
5872     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
5873                                    ? Intrinsic::aarch64_neon_urshl
5874                                    : Intrinsic::aarch64_neon_srshl;
5875     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5876     int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
5877     Ops[1] = ConstantInt::get(Int64Ty, -SV);
5878     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
5879   }
5880   case NEON::BI__builtin_neon_vrsrad_n_u64:
5881   case NEON::BI__builtin_neon_vrsrad_n_s64: {
5882     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
5883                                    ? Intrinsic::aarch64_neon_urshl
5884                                    : Intrinsic::aarch64_neon_srshl;
5885     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5886     Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
5887     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
5888                                 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
5889     return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
5890   }
5891   case NEON::BI__builtin_neon_vshld_n_s64:
5892   case NEON::BI__builtin_neon_vshld_n_u64: {
5893     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5894     return Builder.CreateShl(
5895         Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
5896   }
5897   case NEON::BI__builtin_neon_vshrd_n_s64: {
5898     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5899     return Builder.CreateAShr(
5900         Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
5901                                                    Amt->getZExtValue())),
5902         "shrd_n");
5903   }
5904   case NEON::BI__builtin_neon_vshrd_n_u64: {
5905     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5906     uint64_t ShiftAmt = Amt->getZExtValue();
5907     // Right-shifting an unsigned value by its size yields 0.
5908     if (ShiftAmt == 64)
5909       return ConstantInt::get(Int64Ty, 0);
5910     return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
5911                               "shrd_n");
5912   }
5913   case NEON::BI__builtin_neon_vsrad_n_s64: {
5914     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
5915     Ops[1] = Builder.CreateAShr(
5916         Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
5917                                                    Amt->getZExtValue())),
5918         "shrd_n");
5919     return Builder.CreateAdd(Ops[0], Ops[1]);
5920   }
5921   case NEON::BI__builtin_neon_vsrad_n_u64: {
5922     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
5923     uint64_t ShiftAmt = Amt->getZExtValue();
5924     // Right-shifting an unsigned value by its size yields 0.
5925     // As Op + 0 = Op, return Ops[0] directly.
5926     if (ShiftAmt == 64)
5927       return Ops[0];
5928     Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
5929                                 "shrd_n");
5930     return Builder.CreateAdd(Ops[0], Ops[1]);
5931   }
5932   case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
5933   case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
5934   case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
5935   case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
5936     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
5937                                           "lane");
5938     SmallVector<Value *, 2> ProductOps;
5939     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5940     ProductOps.push_back(vectorWrapScalar16(Ops[2]));
5941     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
5942     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5943                           ProductOps, "vqdmlXl");
5944     Constant *CI = ConstantInt::get(SizeTy, 0);
5945     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5946     Ops.pop_back();
5947
5948     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
5949                        BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
5950                           ? Intrinsic::aarch64_neon_sqadd
5951                           : Intrinsic::aarch64_neon_sqsub;
5952     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
5953   }
5954   case NEON::BI__builtin_neon_vqdmlals_s32:
5955   case NEON::BI__builtin_neon_vqdmlsls_s32: {
5956     SmallVector<Value *, 2> ProductOps;
5957     ProductOps.push_back(Ops[1]);
5958     ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
5959     Ops[1] =
5960         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
5961                      ProductOps, "vqdmlXl");
5962
5963     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
5964                                         ? Intrinsic::aarch64_neon_sqadd
5965                                         : Intrinsic::aarch64_neon_sqsub;
5966     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
5967   }
5968   case NEON::BI__builtin_neon_vqdmlals_lane_s32:
5969   case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
5970   case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
5971   case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
5972     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
5973                                           "lane");
5974     SmallVector<Value *, 2> ProductOps;
5975     ProductOps.push_back(Ops[1]);
5976     ProductOps.push_back(Ops[2]);
5977     Ops[1] =
5978         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
5979                      ProductOps, "vqdmlXl");
5980     Ops.pop_back();
5981
5982     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
5983                        BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
5984                           ? Intrinsic::aarch64_neon_sqadd
5985                           : Intrinsic::aarch64_neon_sqsub;
5986     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
5987   }
5988   }
5989
5990   llvm::VectorType *VTy = GetNeonType(this, Type);
5991   llvm::Type *Ty = VTy;
5992   if (!Ty)
5993     return nullptr;
5994
5995   // Not all intrinsics handled by the common case work for AArch64 yet, so only
5996   // defer to common code if it's been added to our special map.
5997   Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
5998                                    AArch64SIMDIntrinsicsProvenSorted);
5999
6000   if (Builtin)
6001     return EmitCommonNeonBuiltinExpr(
6002         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
6003         Builtin->NameHint, Builtin->TypeModifier, E, Ops,
6004         /*never use addresses*/ Address::invalid(), Address::invalid());
6005
6006   if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops))
6007     return V;
6008
6009   unsigned Int;
6010   switch (BuiltinID) {
6011   default: return nullptr;
6012   case NEON::BI__builtin_neon_vbsl_v:
6013   case NEON::BI__builtin_neon_vbslq_v: {
6014     llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
6015     Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
6016     Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
6017     Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
6018
6019     Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
6020     Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
6021     Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
6022     return Builder.CreateBitCast(Ops[0], Ty);
6023   }
6024   case NEON::BI__builtin_neon_vfma_lane_v:
6025   case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
6026     // The ARM builtins (and instructions) have the addend as the first
6027     // operand, but the 'fma' intrinsics have it last. Swap it around here.
6028     Value *Addend = Ops[0];
6029     Value *Multiplicand = Ops[1];
6030     Value *LaneSource = Ops[2];
6031     Ops[0] = Multiplicand;
6032     Ops[1] = LaneSource;
6033     Ops[2] = Addend;
6034
6035     // Now adjust things to handle the lane access.
6036     llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ?
6037       llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) :
6038       VTy;
6039     llvm::Constant *cst = cast<Constant>(Ops[3]);
6040     Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst);
6041     Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
6042     Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
6043
6044     Ops.pop_back();
6045     Int = Intrinsic::fma;
6046     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
6047   }
6048   case NEON::BI__builtin_neon_vfma_laneq_v: {
6049     llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
6050     // v1f64 fma should be mapped to Neon scalar f64 fma
6051     if (VTy && VTy->getElementType() == DoubleTy) {
6052       Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6053       Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
6054       llvm::Type *VTy = GetNeonType(this,
6055         NeonTypeFlags(NeonTypeFlags::Float64, false, true));
6056       Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
6057       Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6058       Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
6059       Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
6060       return Builder.CreateBitCast(Result, Ty);
6061     }
6062     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6063     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6064     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6065
6066     llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
6067                                             VTy->getNumElements() * 2);
6068     Ops[2] = Builder.CreateBitCast(Ops[2], STy);
6069     Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
6070                                                cast<ConstantInt>(Ops[3]));
6071     Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
6072
6073     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
6074   }
6075   case NEON::BI__builtin_neon_vfmaq_laneq_v: {
6076     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6077     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6078     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6079
6080     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6081     Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
6082     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
6083   }
6084   case NEON::BI__builtin_neon_vfmas_lane_f32:
6085   case NEON::BI__builtin_neon_vfmas_laneq_f32:
6086   case NEON::BI__builtin_neon_vfmad_lane_f64:
6087   case NEON::BI__builtin_neon_vfmad_laneq_f64: {
6088     Ops.push_back(EmitScalarExpr(E->getArg(3)));
6089     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
6090     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6091     Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6092     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
6093   }
6094   case NEON::BI__builtin_neon_vmull_v:
6095     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6096     Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
6097     if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
6098     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
6099   case NEON::BI__builtin_neon_vmax_v:
6100   case NEON::BI__builtin_neon_vmaxq_v:
6101     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6102     Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
6103     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
6104     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
6105   case NEON::BI__builtin_neon_vmin_v:
6106   case NEON::BI__builtin_neon_vminq_v:
6107     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6108     Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
6109     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
6110     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
6111   case NEON::BI__builtin_neon_vabd_v:
6112   case NEON::BI__builtin_neon_vabdq_v:
6113     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6114     Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
6115     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
6116     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
6117   case NEON::BI__builtin_neon_vpadal_v:
6118   case NEON::BI__builtin_neon_vpadalq_v: {
6119     unsigned ArgElts = VTy->getNumElements();
6120     llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
6121     unsigned BitWidth = EltTy->getBitWidth();
6122     llvm::Type *ArgTy = llvm::VectorType::get(
6123         llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts);
6124     llvm::Type* Tys[2] = { VTy, ArgTy };
6125     Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
6126     SmallVector<llvm::Value*, 1> TmpOps;
6127     TmpOps.push_back(Ops[1]);
6128     Function *F = CGM.getIntrinsic(Int, Tys);
6129     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
6130     llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
6131     return Builder.CreateAdd(tmp, addend);
6132   }
6133   case NEON::BI__builtin_neon_vpmin_v:
6134   case NEON::BI__builtin_neon_vpminq_v:
6135     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6136     Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
6137     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
6138     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
6139   case NEON::BI__builtin_neon_vpmax_v:
6140   case NEON::BI__builtin_neon_vpmaxq_v:
6141     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6142     Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
6143     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
6144     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
6145   case NEON::BI__builtin_neon_vminnm_v:
6146   case NEON::BI__builtin_neon_vminnmq_v:
6147     Int = Intrinsic::aarch64_neon_fminnm;
6148     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
6149   case NEON::BI__builtin_neon_vmaxnm_v:
6150   case NEON::BI__builtin_neon_vmaxnmq_v:
6151     Int = Intrinsic::aarch64_neon_fmaxnm;
6152     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
6153   case NEON::BI__builtin_neon_vrecpss_f32: {
6154     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6155     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
6156                         Ops, "vrecps");
6157   }
6158   case NEON::BI__builtin_neon_vrecpsd_f64: {
6159     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6160     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
6161                         Ops, "vrecps");
6162   }
6163   case NEON::BI__builtin_neon_vqshrun_n_v:
6164     Int = Intrinsic::aarch64_neon_sqshrun;
6165     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
6166   case NEON::BI__builtin_neon_vqrshrun_n_v:
6167     Int = Intrinsic::aarch64_neon_sqrshrun;
6168     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
6169   case NEON::BI__builtin_neon_vqshrn_n_v:
6170     Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
6171     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
6172   case NEON::BI__builtin_neon_vrshrn_n_v:
6173     Int = Intrinsic::aarch64_neon_rshrn;
6174     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
6175   case NEON::BI__builtin_neon_vqrshrn_n_v:
6176     Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
6177     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
6178   case NEON::BI__builtin_neon_vrnda_v:
6179   case NEON::BI__builtin_neon_vrndaq_v: {
6180     Int = Intrinsic::round;
6181     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
6182   }
6183   case NEON::BI__builtin_neon_vrndi_v:
6184   case NEON::BI__builtin_neon_vrndiq_v: {
6185     Int = Intrinsic::nearbyint;
6186     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi");
6187   }
6188   case NEON::BI__builtin_neon_vrndm_v:
6189   case NEON::BI__builtin_neon_vrndmq_v: {
6190     Int = Intrinsic::floor;
6191     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
6192   }
6193   case NEON::BI__builtin_neon_vrndn_v:
6194   case NEON::BI__builtin_neon_vrndnq_v: {
6195     Int = Intrinsic::aarch64_neon_frintn;
6196     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
6197   }
6198   case NEON::BI__builtin_neon_vrndp_v:
6199   case NEON::BI__builtin_neon_vrndpq_v: {
6200     Int = Intrinsic::ceil;
6201     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
6202   }
6203   case NEON::BI__builtin_neon_vrndx_v:
6204   case NEON::BI__builtin_neon_vrndxq_v: {
6205     Int = Intrinsic::rint;
6206     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
6207   }
6208   case NEON::BI__builtin_neon_vrnd_v:
6209   case NEON::BI__builtin_neon_vrndq_v: {
6210     Int = Intrinsic::trunc;
6211     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
6212   }
6213   case NEON::BI__builtin_neon_vceqz_v:
6214   case NEON::BI__builtin_neon_vceqzq_v:
6215     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
6216                                          ICmpInst::ICMP_EQ, "vceqz");
6217   case NEON::BI__builtin_neon_vcgez_v:
6218   case NEON::BI__builtin_neon_vcgezq_v:
6219     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
6220                                          ICmpInst::ICMP_SGE, "vcgez");
6221   case NEON::BI__builtin_neon_vclez_v:
6222   case NEON::BI__builtin_neon_vclezq_v:
6223     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
6224                                          ICmpInst::ICMP_SLE, "vclez");
6225   case NEON::BI__builtin_neon_vcgtz_v:
6226   case NEON::BI__builtin_neon_vcgtzq_v:
6227     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
6228                                          ICmpInst::ICMP_SGT, "vcgtz");
6229   case NEON::BI__builtin_neon_vcltz_v:
6230   case NEON::BI__builtin_neon_vcltzq_v:
6231     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
6232                                          ICmpInst::ICMP_SLT, "vcltz");
6233   case NEON::BI__builtin_neon_vcvt_f64_v:
6234   case NEON::BI__builtin_neon_vcvtq_f64_v:
6235     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6236     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
6237     return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
6238                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
6239   case NEON::BI__builtin_neon_vcvt_f64_f32: {
6240     assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
6241            "unexpected vcvt_f64_f32 builtin");
6242     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
6243     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6244
6245     return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
6246   }
6247   case NEON::BI__builtin_neon_vcvt_f32_f64: {
6248     assert(Type.getEltType() == NeonTypeFlags::Float32 &&
6249            "unexpected vcvt_f32_f64 builtin");
6250     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
6251     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6252
6253     return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
6254   }
6255   case NEON::BI__builtin_neon_vcvt_s32_v:
6256   case NEON::BI__builtin_neon_vcvt_u32_v:
6257   case NEON::BI__builtin_neon_vcvt_s64_v:
6258   case NEON::BI__builtin_neon_vcvt_u64_v:
6259   case NEON::BI__builtin_neon_vcvtq_s32_v:
6260   case NEON::BI__builtin_neon_vcvtq_u32_v:
6261   case NEON::BI__builtin_neon_vcvtq_s64_v:
6262   case NEON::BI__builtin_neon_vcvtq_u64_v: {
6263     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
6264     if (usgn)
6265       return Builder.CreateFPToUI(Ops[0], Ty);
6266     return Builder.CreateFPToSI(Ops[0], Ty);
6267   }
6268   case NEON::BI__builtin_neon_vcvta_s32_v:
6269   case NEON::BI__builtin_neon_vcvtaq_s32_v:
6270   case NEON::BI__builtin_neon_vcvta_u32_v:
6271   case NEON::BI__builtin_neon_vcvtaq_u32_v:
6272   case NEON::BI__builtin_neon_vcvta_s64_v:
6273   case NEON::BI__builtin_neon_vcvtaq_s64_v:
6274   case NEON::BI__builtin_neon_vcvta_u64_v:
6275   case NEON::BI__builtin_neon_vcvtaq_u64_v: {
6276     Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
6277     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6278     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
6279   }
6280   case NEON::BI__builtin_neon_vcvtm_s32_v:
6281   case NEON::BI__builtin_neon_vcvtmq_s32_v:
6282   case NEON::BI__builtin_neon_vcvtm_u32_v:
6283   case NEON::BI__builtin_neon_vcvtmq_u32_v:
6284   case NEON::BI__builtin_neon_vcvtm_s64_v:
6285   case NEON::BI__builtin_neon_vcvtmq_s64_v:
6286   case NEON::BI__builtin_neon_vcvtm_u64_v:
6287   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
6288     Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
6289     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6290     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
6291   }
6292   case NEON::BI__builtin_neon_vcvtn_s32_v:
6293   case NEON::BI__builtin_neon_vcvtnq_s32_v:
6294   case NEON::BI__builtin_neon_vcvtn_u32_v:
6295   case NEON::BI__builtin_neon_vcvtnq_u32_v:
6296   case NEON::BI__builtin_neon_vcvtn_s64_v:
6297   case NEON::BI__builtin_neon_vcvtnq_s64_v:
6298   case NEON::BI__builtin_neon_vcvtn_u64_v:
6299   case NEON::BI__builtin_neon_vcvtnq_u64_v: {
6300     Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
6301     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6302     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
6303   }
6304   case NEON::BI__builtin_neon_vcvtp_s32_v:
6305   case NEON::BI__builtin_neon_vcvtpq_s32_v:
6306   case NEON::BI__builtin_neon_vcvtp_u32_v:
6307   case NEON::BI__builtin_neon_vcvtpq_u32_v:
6308   case NEON::BI__builtin_neon_vcvtp_s64_v:
6309   case NEON::BI__builtin_neon_vcvtpq_s64_v:
6310   case NEON::BI__builtin_neon_vcvtp_u64_v:
6311   case NEON::BI__builtin_neon_vcvtpq_u64_v: {
6312     Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
6313     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6314     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
6315   }
6316   case NEON::BI__builtin_neon_vmulx_v:
6317   case NEON::BI__builtin_neon_vmulxq_v: {
6318     Int = Intrinsic::aarch64_neon_fmulx;
6319     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
6320   }
6321   case NEON::BI__builtin_neon_vmul_lane_v:
6322   case NEON::BI__builtin_neon_vmul_laneq_v: {
6323     // v1f64 vmul_lane should be mapped to Neon scalar mul lane
6324     bool Quad = false;
6325     if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
6326       Quad = true;
6327     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6328     llvm::Type *VTy = GetNeonType(this,
6329       NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
6330     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6331     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
6332     Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
6333     return Builder.CreateBitCast(Result, Ty);
6334   }
6335   case NEON::BI__builtin_neon_vnegd_s64:
6336     return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
6337   case NEON::BI__builtin_neon_vpmaxnm_v:
6338   case NEON::BI__builtin_neon_vpmaxnmq_v: {
6339     Int = Intrinsic::aarch64_neon_fmaxnmp;
6340     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
6341   }
6342   case NEON::BI__builtin_neon_vpminnm_v:
6343   case NEON::BI__builtin_neon_vpminnmq_v: {
6344     Int = Intrinsic::aarch64_neon_fminnmp;
6345     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
6346   }
6347   case NEON::BI__builtin_neon_vsqrt_v:
6348   case NEON::BI__builtin_neon_vsqrtq_v: {
6349     Int = Intrinsic::sqrt;
6350     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6351     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
6352   }
6353   case NEON::BI__builtin_neon_vrbit_v:
6354   case NEON::BI__builtin_neon_vrbitq_v: {
6355     Int = Intrinsic::aarch64_neon_rbit;
6356     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
6357   }
6358   case NEON::BI__builtin_neon_vaddv_u8:
6359     // FIXME: These are handled by the AArch64 scalar code.
6360     usgn = true;
6361     // FALLTHROUGH
6362   case NEON::BI__builtin_neon_vaddv_s8: {
6363     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6364     Ty = Int32Ty;
6365     VTy = llvm::VectorType::get(Int8Ty, 8);
6366     llvm::Type *Tys[2] = { Ty, VTy };
6367     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6368     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6369     return Builder.CreateTrunc(Ops[0], Int8Ty);
6370   }
6371   case NEON::BI__builtin_neon_vaddv_u16:
6372     usgn = true;
6373     // FALLTHROUGH
6374   case NEON::BI__builtin_neon_vaddv_s16: {
6375     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6376     Ty = Int32Ty;
6377     VTy = llvm::VectorType::get(Int16Ty, 4);
6378     llvm::Type *Tys[2] = { Ty, VTy };
6379     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6380     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6381     return Builder.CreateTrunc(Ops[0], Int16Ty);
6382   }
6383   case NEON::BI__builtin_neon_vaddvq_u8:
6384     usgn = true;
6385     // FALLTHROUGH
6386   case NEON::BI__builtin_neon_vaddvq_s8: {
6387     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6388     Ty = Int32Ty;
6389     VTy = llvm::VectorType::get(Int8Ty, 16);
6390     llvm::Type *Tys[2] = { Ty, VTy };
6391     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6392     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6393     return Builder.CreateTrunc(Ops[0], Int8Ty);
6394   }
6395   case NEON::BI__builtin_neon_vaddvq_u16:
6396     usgn = true;
6397     // FALLTHROUGH
6398   case NEON::BI__builtin_neon_vaddvq_s16: {
6399     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6400     Ty = Int32Ty;
6401     VTy = llvm::VectorType::get(Int16Ty, 8);
6402     llvm::Type *Tys[2] = { Ty, VTy };
6403     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6404     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6405     return Builder.CreateTrunc(Ops[0], Int16Ty);
6406   }
6407   case NEON::BI__builtin_neon_vmaxv_u8: {
6408     Int = Intrinsic::aarch64_neon_umaxv;
6409     Ty = Int32Ty;
6410     VTy = llvm::VectorType::get(Int8Ty, 8);
6411     llvm::Type *Tys[2] = { Ty, VTy };
6412     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6413     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6414     return Builder.CreateTrunc(Ops[0], Int8Ty);
6415   }
6416   case NEON::BI__builtin_neon_vmaxv_u16: {
6417     Int = Intrinsic::aarch64_neon_umaxv;
6418     Ty = Int32Ty;
6419     VTy = llvm::VectorType::get(Int16Ty, 4);
6420     llvm::Type *Tys[2] = { Ty, VTy };
6421     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6422     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6423     return Builder.CreateTrunc(Ops[0], Int16Ty);
6424   }
6425   case NEON::BI__builtin_neon_vmaxvq_u8: {
6426     Int = Intrinsic::aarch64_neon_umaxv;
6427     Ty = Int32Ty;
6428     VTy = llvm::VectorType::get(Int8Ty, 16);
6429     llvm::Type *Tys[2] = { Ty, VTy };
6430     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6431     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6432     return Builder.CreateTrunc(Ops[0], Int8Ty);
6433   }
6434   case NEON::BI__builtin_neon_vmaxvq_u16: {
6435     Int = Intrinsic::aarch64_neon_umaxv;
6436     Ty = Int32Ty;
6437     VTy = llvm::VectorType::get(Int16Ty, 8);
6438     llvm::Type *Tys[2] = { Ty, VTy };
6439     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6440     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6441     return Builder.CreateTrunc(Ops[0], Int16Ty);
6442   }
6443   case NEON::BI__builtin_neon_vmaxv_s8: {
6444     Int = Intrinsic::aarch64_neon_smaxv;
6445     Ty = Int32Ty;
6446     VTy = llvm::VectorType::get(Int8Ty, 8);
6447     llvm::Type *Tys[2] = { Ty, VTy };
6448     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6449     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6450     return Builder.CreateTrunc(Ops[0], Int8Ty);
6451   }
6452   case NEON::BI__builtin_neon_vmaxv_s16: {
6453     Int = Intrinsic::aarch64_neon_smaxv;
6454     Ty = Int32Ty;
6455     VTy = llvm::VectorType::get(Int16Ty, 4);
6456     llvm::Type *Tys[2] = { Ty, VTy };
6457     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6458     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6459     return Builder.CreateTrunc(Ops[0], Int16Ty);
6460   }
6461   case NEON::BI__builtin_neon_vmaxvq_s8: {
6462     Int = Intrinsic::aarch64_neon_smaxv;
6463     Ty = Int32Ty;
6464     VTy = llvm::VectorType::get(Int8Ty, 16);
6465     llvm::Type *Tys[2] = { Ty, VTy };
6466     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6467     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6468     return Builder.CreateTrunc(Ops[0], Int8Ty);
6469   }
6470   case NEON::BI__builtin_neon_vmaxvq_s16: {
6471     Int = Intrinsic::aarch64_neon_smaxv;
6472     Ty = Int32Ty;
6473     VTy = llvm::VectorType::get(Int16Ty, 8);
6474     llvm::Type *Tys[2] = { Ty, VTy };
6475     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6476     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6477     return Builder.CreateTrunc(Ops[0], Int16Ty);
6478   }
6479   case NEON::BI__builtin_neon_vminv_u8: {
6480     Int = Intrinsic::aarch64_neon_uminv;
6481     Ty = Int32Ty;
6482     VTy = llvm::VectorType::get(Int8Ty, 8);
6483     llvm::Type *Tys[2] = { Ty, VTy };
6484     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6485     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6486     return Builder.CreateTrunc(Ops[0], Int8Ty);
6487   }
6488   case NEON::BI__builtin_neon_vminv_u16: {
6489     Int = Intrinsic::aarch64_neon_uminv;
6490     Ty = Int32Ty;
6491     VTy = llvm::VectorType::get(Int16Ty, 4);
6492     llvm::Type *Tys[2] = { Ty, VTy };
6493     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6494     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6495     return Builder.CreateTrunc(Ops[0], Int16Ty);
6496   }
6497   case NEON::BI__builtin_neon_vminvq_u8: {
6498     Int = Intrinsic::aarch64_neon_uminv;
6499     Ty = Int32Ty;
6500     VTy = llvm::VectorType::get(Int8Ty, 16);
6501     llvm::Type *Tys[2] = { Ty, VTy };
6502     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6503     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6504     return Builder.CreateTrunc(Ops[0], Int8Ty);
6505   }
6506   case NEON::BI__builtin_neon_vminvq_u16: {
6507     Int = Intrinsic::aarch64_neon_uminv;
6508     Ty = Int32Ty;
6509     VTy = llvm::VectorType::get(Int16Ty, 8);
6510     llvm::Type *Tys[2] = { Ty, VTy };
6511     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6512     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6513     return Builder.CreateTrunc(Ops[0], Int16Ty);
6514   }
6515   case NEON::BI__builtin_neon_vminv_s8: {
6516     Int = Intrinsic::aarch64_neon_sminv;
6517     Ty = Int32Ty;
6518     VTy = llvm::VectorType::get(Int8Ty, 8);
6519     llvm::Type *Tys[2] = { Ty, VTy };
6520     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6521     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6522     return Builder.CreateTrunc(Ops[0], Int8Ty);
6523   }
6524   case NEON::BI__builtin_neon_vminv_s16: {
6525     Int = Intrinsic::aarch64_neon_sminv;
6526     Ty = Int32Ty;
6527     VTy = llvm::VectorType::get(Int16Ty, 4);
6528     llvm::Type *Tys[2] = { Ty, VTy };
6529     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6530     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6531     return Builder.CreateTrunc(Ops[0], Int16Ty);
6532   }
6533   case NEON::BI__builtin_neon_vminvq_s8: {
6534     Int = Intrinsic::aarch64_neon_sminv;
6535     Ty = Int32Ty;
6536     VTy = llvm::VectorType::get(Int8Ty, 16);
6537     llvm::Type *Tys[2] = { Ty, VTy };
6538     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6539     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6540     return Builder.CreateTrunc(Ops[0], Int8Ty);
6541   }
6542   case NEON::BI__builtin_neon_vminvq_s16: {
6543     Int = Intrinsic::aarch64_neon_sminv;
6544     Ty = Int32Ty;
6545     VTy = llvm::VectorType::get(Int16Ty, 8);
6546     llvm::Type *Tys[2] = { Ty, VTy };
6547     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6548     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6549     return Builder.CreateTrunc(Ops[0], Int16Ty);
6550   }
6551   case NEON::BI__builtin_neon_vmul_n_f64: {
6552     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6553     Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
6554     return Builder.CreateFMul(Ops[0], RHS);
6555   }
6556   case NEON::BI__builtin_neon_vaddlv_u8: {
6557     Int = Intrinsic::aarch64_neon_uaddlv;
6558     Ty = Int32Ty;
6559     VTy = llvm::VectorType::get(Int8Ty, 8);
6560     llvm::Type *Tys[2] = { Ty, VTy };
6561     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6562     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6563     return Builder.CreateTrunc(Ops[0], Int16Ty);
6564   }
6565   case NEON::BI__builtin_neon_vaddlv_u16: {
6566     Int = Intrinsic::aarch64_neon_uaddlv;
6567     Ty = Int32Ty;
6568     VTy = llvm::VectorType::get(Int16Ty, 4);
6569     llvm::Type *Tys[2] = { Ty, VTy };
6570     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6571     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6572   }
6573   case NEON::BI__builtin_neon_vaddlvq_u8: {
6574     Int = Intrinsic::aarch64_neon_uaddlv;
6575     Ty = Int32Ty;
6576     VTy = llvm::VectorType::get(Int8Ty, 16);
6577     llvm::Type *Tys[2] = { Ty, VTy };
6578     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6579     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6580     return Builder.CreateTrunc(Ops[0], Int16Ty);
6581   }
6582   case NEON::BI__builtin_neon_vaddlvq_u16: {
6583     Int = Intrinsic::aarch64_neon_uaddlv;
6584     Ty = Int32Ty;
6585     VTy = llvm::VectorType::get(Int16Ty, 8);
6586     llvm::Type *Tys[2] = { Ty, VTy };
6587     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6588     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6589   }
6590   case NEON::BI__builtin_neon_vaddlv_s8: {
6591     Int = Intrinsic::aarch64_neon_saddlv;
6592     Ty = Int32Ty;
6593     VTy = llvm::VectorType::get(Int8Ty, 8);
6594     llvm::Type *Tys[2] = { Ty, VTy };
6595     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6596     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6597     return Builder.CreateTrunc(Ops[0], Int16Ty);
6598   }
6599   case NEON::BI__builtin_neon_vaddlv_s16: {
6600     Int = Intrinsic::aarch64_neon_saddlv;
6601     Ty = Int32Ty;
6602     VTy = llvm::VectorType::get(Int16Ty, 4);
6603     llvm::Type *Tys[2] = { Ty, VTy };
6604     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6605     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6606   }
6607   case NEON::BI__builtin_neon_vaddlvq_s8: {
6608     Int = Intrinsic::aarch64_neon_saddlv;
6609     Ty = Int32Ty;
6610     VTy = llvm::VectorType::get(Int8Ty, 16);
6611     llvm::Type *Tys[2] = { Ty, VTy };
6612     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6613     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6614     return Builder.CreateTrunc(Ops[0], Int16Ty);
6615   }
6616   case NEON::BI__builtin_neon_vaddlvq_s16: {
6617     Int = Intrinsic::aarch64_neon_saddlv;
6618     Ty = Int32Ty;
6619     VTy = llvm::VectorType::get(Int16Ty, 8);
6620     llvm::Type *Tys[2] = { Ty, VTy };
6621     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6622     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6623   }
6624   case NEON::BI__builtin_neon_vsri_n_v:
6625   case NEON::BI__builtin_neon_vsriq_n_v: {
6626     Int = Intrinsic::aarch64_neon_vsri;
6627     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6628     return EmitNeonCall(Intrin, Ops, "vsri_n");
6629   }
6630   case NEON::BI__builtin_neon_vsli_n_v:
6631   case NEON::BI__builtin_neon_vsliq_n_v: {
6632     Int = Intrinsic::aarch64_neon_vsli;
6633     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6634     return EmitNeonCall(Intrin, Ops, "vsli_n");
6635   }
6636   case NEON::BI__builtin_neon_vsra_n_v:
6637   case NEON::BI__builtin_neon_vsraq_n_v:
6638     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6639     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
6640     return Builder.CreateAdd(Ops[0], Ops[1]);
6641   case NEON::BI__builtin_neon_vrsra_n_v:
6642   case NEON::BI__builtin_neon_vrsraq_n_v: {
6643     Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
6644     SmallVector<llvm::Value*,2> TmpOps;
6645     TmpOps.push_back(Ops[1]);
6646     TmpOps.push_back(Ops[2]);
6647     Function* F = CGM.getIntrinsic(Int, Ty);
6648     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
6649     Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
6650     return Builder.CreateAdd(Ops[0], tmp);
6651   }
6652     // FIXME: Sharing loads & stores with 32-bit is complicated by the absence
6653     // of an Align parameter here.
6654   case NEON::BI__builtin_neon_vld1_x2_v:
6655   case NEON::BI__builtin_neon_vld1q_x2_v:
6656   case NEON::BI__builtin_neon_vld1_x3_v:
6657   case NEON::BI__builtin_neon_vld1q_x3_v:
6658   case NEON::BI__builtin_neon_vld1_x4_v:
6659   case NEON::BI__builtin_neon_vld1q_x4_v: {
6660     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
6661     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6662     llvm::Type *Tys[2] = { VTy, PTy };
6663     unsigned Int;
6664     switch (BuiltinID) {
6665     case NEON::BI__builtin_neon_vld1_x2_v:
6666     case NEON::BI__builtin_neon_vld1q_x2_v:
6667       Int = Intrinsic::aarch64_neon_ld1x2;
6668       break;
6669     case NEON::BI__builtin_neon_vld1_x3_v:
6670     case NEON::BI__builtin_neon_vld1q_x3_v:
6671       Int = Intrinsic::aarch64_neon_ld1x3;
6672       break;
6673     case NEON::BI__builtin_neon_vld1_x4_v:
6674     case NEON::BI__builtin_neon_vld1q_x4_v:
6675       Int = Intrinsic::aarch64_neon_ld1x4;
6676       break;
6677     }
6678     Function *F = CGM.getIntrinsic(Int, Tys);
6679     Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
6680     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6681     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6682     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6683   }
6684   case NEON::BI__builtin_neon_vst1_x2_v:
6685   case NEON::BI__builtin_neon_vst1q_x2_v:
6686   case NEON::BI__builtin_neon_vst1_x3_v:
6687   case NEON::BI__builtin_neon_vst1q_x3_v:
6688   case NEON::BI__builtin_neon_vst1_x4_v:
6689   case NEON::BI__builtin_neon_vst1q_x4_v: {
6690     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
6691     llvm::Type *Tys[2] = { VTy, PTy };
6692     unsigned Int;
6693     switch (BuiltinID) {
6694     case NEON::BI__builtin_neon_vst1_x2_v:
6695     case NEON::BI__builtin_neon_vst1q_x2_v:
6696       Int = Intrinsic::aarch64_neon_st1x2;
6697       break;
6698     case NEON::BI__builtin_neon_vst1_x3_v:
6699     case NEON::BI__builtin_neon_vst1q_x3_v:
6700       Int = Intrinsic::aarch64_neon_st1x3;
6701       break;
6702     case NEON::BI__builtin_neon_vst1_x4_v:
6703     case NEON::BI__builtin_neon_vst1q_x4_v:
6704       Int = Intrinsic::aarch64_neon_st1x4;
6705       break;
6706     }
6707     std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6708     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
6709   }
6710   case NEON::BI__builtin_neon_vld1_v:
6711   case NEON::BI__builtin_neon_vld1q_v: {
6712     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
6713     auto Alignment = CharUnits::fromQuantity(
6714         BuiltinID == NEON::BI__builtin_neon_vld1_v ? 8 : 16);
6715     return Builder.CreateAlignedLoad(VTy, Ops[0], Alignment);
6716   }
6717   case NEON::BI__builtin_neon_vst1_v:
6718   case NEON::BI__builtin_neon_vst1q_v:
6719     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
6720     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6721     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6722   case NEON::BI__builtin_neon_vld1_lane_v:
6723   case NEON::BI__builtin_neon_vld1q_lane_v: {
6724     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6725     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
6726     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6727     auto Alignment = CharUnits::fromQuantity(
6728         BuiltinID == NEON::BI__builtin_neon_vld1_lane_v ? 8 : 16);
6729     Ops[0] =
6730         Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
6731     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
6732   }
6733   case NEON::BI__builtin_neon_vld1_dup_v:
6734   case NEON::BI__builtin_neon_vld1q_dup_v: {
6735     Value *V = UndefValue::get(Ty);
6736     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
6737     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6738     auto Alignment = CharUnits::fromQuantity(
6739         BuiltinID == NEON::BI__builtin_neon_vld1_dup_v ? 8 : 16);
6740     Ops[0] =
6741         Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
6742     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
6743     Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
6744     return EmitNeonSplat(Ops[0], CI);
6745   }
6746   case NEON::BI__builtin_neon_vst1_lane_v:
6747   case NEON::BI__builtin_neon_vst1q_lane_v:
6748     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6749     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
6750     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6751     return Builder.CreateDefaultAlignedStore(Ops[1],
6752                                              Builder.CreateBitCast(Ops[0], Ty));
6753   case NEON::BI__builtin_neon_vld2_v:
6754   case NEON::BI__builtin_neon_vld2q_v: {
6755     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6756     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6757     llvm::Type *Tys[2] = { VTy, PTy };
6758     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
6759     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6760     Ops[0] = Builder.CreateBitCast(Ops[0],
6761                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6762     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6763   }
6764   case NEON::BI__builtin_neon_vld3_v:
6765   case NEON::BI__builtin_neon_vld3q_v: {
6766     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6767     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6768     llvm::Type *Tys[2] = { VTy, PTy };
6769     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
6770     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6771     Ops[0] = Builder.CreateBitCast(Ops[0],
6772                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6773     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6774   }
6775   case NEON::BI__builtin_neon_vld4_v:
6776   case NEON::BI__builtin_neon_vld4q_v: {
6777     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6778     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6779     llvm::Type *Tys[2] = { VTy, PTy };
6780     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
6781     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6782     Ops[0] = Builder.CreateBitCast(Ops[0],
6783                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6784     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6785   }
6786   case NEON::BI__builtin_neon_vld2_dup_v:
6787   case NEON::BI__builtin_neon_vld2q_dup_v: {
6788     llvm::Type *PTy =
6789       llvm::PointerType::getUnqual(VTy->getElementType());
6790     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6791     llvm::Type *Tys[2] = { VTy, PTy };
6792     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
6793     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6794     Ops[0] = Builder.CreateBitCast(Ops[0],
6795                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6796     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6797   }
6798   case NEON::BI__builtin_neon_vld3_dup_v:
6799   case NEON::BI__builtin_neon_vld3q_dup_v: {
6800     llvm::Type *PTy =
6801       llvm::PointerType::getUnqual(VTy->getElementType());
6802     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6803     llvm::Type *Tys[2] = { VTy, PTy };
6804     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
6805     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6806     Ops[0] = Builder.CreateBitCast(Ops[0],
6807                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6808     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6809   }
6810   case NEON::BI__builtin_neon_vld4_dup_v:
6811   case NEON::BI__builtin_neon_vld4q_dup_v: {
6812     llvm::Type *PTy =
6813       llvm::PointerType::getUnqual(VTy->getElementType());
6814     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6815     llvm::Type *Tys[2] = { VTy, PTy };
6816     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
6817     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6818     Ops[0] = Builder.CreateBitCast(Ops[0],
6819                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6820     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6821   }
6822   case NEON::BI__builtin_neon_vld2_lane_v:
6823   case NEON::BI__builtin_neon_vld2q_lane_v: {
6824     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6825     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
6826     Ops.push_back(Ops[1]);
6827     Ops.erase(Ops.begin()+1);
6828     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6829     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6830     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6831     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
6832     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6833     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6834     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6835   }
6836   case NEON::BI__builtin_neon_vld3_lane_v:
6837   case NEON::BI__builtin_neon_vld3q_lane_v: {
6838     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6839     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
6840     Ops.push_back(Ops[1]);
6841     Ops.erase(Ops.begin()+1);
6842     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6843     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6844     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6845     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
6846     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
6847     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6848     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6849     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6850   }
6851   case NEON::BI__builtin_neon_vld4_lane_v:
6852   case NEON::BI__builtin_neon_vld4q_lane_v: {
6853     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6854     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
6855     Ops.push_back(Ops[1]);
6856     Ops.erase(Ops.begin()+1);
6857     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6858     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6859     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6860     Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
6861     Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
6862     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane");
6863     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6864     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6865     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6866   }
6867   case NEON::BI__builtin_neon_vst2_v:
6868   case NEON::BI__builtin_neon_vst2q_v: {
6869     Ops.push_back(Ops[0]);
6870     Ops.erase(Ops.begin());
6871     llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
6872     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
6873                         Ops, "");
6874   }
6875   case NEON::BI__builtin_neon_vst2_lane_v:
6876   case NEON::BI__builtin_neon_vst2q_lane_v: {
6877     Ops.push_back(Ops[0]);
6878     Ops.erase(Ops.begin());
6879     Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
6880     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6881     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
6882                         Ops, "");
6883   }
6884   case NEON::BI__builtin_neon_vst3_v:
6885   case NEON::BI__builtin_neon_vst3q_v: {
6886     Ops.push_back(Ops[0]);
6887     Ops.erase(Ops.begin());
6888     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6889     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
6890                         Ops, "");
6891   }
6892   case NEON::BI__builtin_neon_vst3_lane_v:
6893   case NEON::BI__builtin_neon_vst3q_lane_v: {
6894     Ops.push_back(Ops[0]);
6895     Ops.erase(Ops.begin());
6896     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6897     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6898     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
6899                         Ops, "");
6900   }
6901   case NEON::BI__builtin_neon_vst4_v:
6902   case NEON::BI__builtin_neon_vst4q_v: {
6903     Ops.push_back(Ops[0]);
6904     Ops.erase(Ops.begin());
6905     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6906     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
6907                         Ops, "");
6908   }
6909   case NEON::BI__builtin_neon_vst4_lane_v:
6910   case NEON::BI__builtin_neon_vst4q_lane_v: {
6911     Ops.push_back(Ops[0]);
6912     Ops.erase(Ops.begin());
6913     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
6914     llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
6915     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
6916                         Ops, "");
6917   }
6918   case NEON::BI__builtin_neon_vtrn_v:
6919   case NEON::BI__builtin_neon_vtrnq_v: {
6920     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6921     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6922     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6923     Value *SV = nullptr;
6924
6925     for (unsigned vi = 0; vi != 2; ++vi) {
6926       SmallVector<uint32_t, 16> Indices;
6927       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6928         Indices.push_back(i+vi);
6929         Indices.push_back(i+e+vi);
6930       }
6931       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6932       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
6933       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6934     }
6935     return SV;
6936   }
6937   case NEON::BI__builtin_neon_vuzp_v:
6938   case NEON::BI__builtin_neon_vuzpq_v: {
6939     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6940     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6941     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6942     Value *SV = nullptr;
6943
6944     for (unsigned vi = 0; vi != 2; ++vi) {
6945       SmallVector<uint32_t, 16> Indices;
6946       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
6947         Indices.push_back(2*i+vi);
6948
6949       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6950       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
6951       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6952     }
6953     return SV;
6954   }
6955   case NEON::BI__builtin_neon_vzip_v:
6956   case NEON::BI__builtin_neon_vzipq_v: {
6957     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6958     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6959     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6960     Value *SV = nullptr;
6961
6962     for (unsigned vi = 0; vi != 2; ++vi) {
6963       SmallVector<uint32_t, 16> Indices;
6964       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6965         Indices.push_back((i + vi*e) >> 1);
6966         Indices.push_back(((i + vi*e) >> 1)+e);
6967       }
6968       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6969       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
6970       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6971     }
6972     return SV;
6973   }
6974   case NEON::BI__builtin_neon_vqtbl1q_v: {
6975     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
6976                         Ops, "vtbl1");
6977   }
6978   case NEON::BI__builtin_neon_vqtbl2q_v: {
6979     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
6980                         Ops, "vtbl2");
6981   }
6982   case NEON::BI__builtin_neon_vqtbl3q_v: {
6983     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
6984                         Ops, "vtbl3");
6985   }
6986   case NEON::BI__builtin_neon_vqtbl4q_v: {
6987     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
6988                         Ops, "vtbl4");
6989   }
6990   case NEON::BI__builtin_neon_vqtbx1q_v: {
6991     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
6992                         Ops, "vtbx1");
6993   }
6994   case NEON::BI__builtin_neon_vqtbx2q_v: {
6995     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
6996                         Ops, "vtbx2");
6997   }
6998   case NEON::BI__builtin_neon_vqtbx3q_v: {
6999     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
7000                         Ops, "vtbx3");
7001   }
7002   case NEON::BI__builtin_neon_vqtbx4q_v: {
7003     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
7004                         Ops, "vtbx4");
7005   }
7006   case NEON::BI__builtin_neon_vsqadd_v:
7007   case NEON::BI__builtin_neon_vsqaddq_v: {
7008     Int = Intrinsic::aarch64_neon_usqadd;
7009     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
7010   }
7011   case NEON::BI__builtin_neon_vuqadd_v:
7012   case NEON::BI__builtin_neon_vuqaddq_v: {
7013     Int = Intrinsic::aarch64_neon_suqadd;
7014     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
7015   }
7016   }
7017 }
7018
7019 llvm::Value *CodeGenFunction::
7020 BuildVector(ArrayRef<llvm::Value*> Ops) {
7021   assert((Ops.size() & (Ops.size() - 1)) == 0 &&
7022          "Not a power-of-two sized vector!");
7023   bool AllConstants = true;
7024   for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
7025     AllConstants &= isa<Constant>(Ops[i]);
7026
7027   // If this is a constant vector, create a ConstantVector.
7028   if (AllConstants) {
7029     SmallVector<llvm::Constant*, 16> CstOps;
7030     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
7031       CstOps.push_back(cast<Constant>(Ops[i]));
7032     return llvm::ConstantVector::get(CstOps);
7033   }
7034
7035   // Otherwise, insertelement the values to build the vector.
7036   Value *Result =
7037     llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
7038
7039   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
7040     Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
7041
7042   return Result;
7043 }
7044
7045 // Convert the mask from an integer type to a vector of i1.
7046 static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
7047                               unsigned NumElts) {
7048
7049   llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(),
7050                          cast<IntegerType>(Mask->getType())->getBitWidth());
7051   Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
7052
7053   // If we have less than 8 elements, then the starting mask was an i8 and
7054   // we need to extract down to the right number of elements.
7055   if (NumElts < 8) {
7056     uint32_t Indices[4];
7057     for (unsigned i = 0; i != NumElts; ++i)
7058       Indices[i] = i;
7059     MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec,
7060                                              makeArrayRef(Indices, NumElts),
7061                                              "extract");
7062   }
7063   return MaskVec;
7064 }
7065
7066 static Value *EmitX86MaskedStore(CodeGenFunction &CGF,
7067                                  SmallVectorImpl<Value *> &Ops,
7068                                  unsigned Align) {
7069   // Cast the pointer to right type.
7070   Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
7071                                llvm::PointerType::getUnqual(Ops[1]->getType()));
7072
7073   // If the mask is all ones just emit a regular store.
7074   if (const auto *C = dyn_cast<Constant>(Ops[2]))
7075     if (C->isAllOnesValue())
7076       return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align);
7077
7078   Value *MaskVec = getMaskVecValue(CGF, Ops[2],
7079                                    Ops[1]->getType()->getVectorNumElements());
7080
7081   return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec);
7082 }
7083
7084 static Value *EmitX86MaskedLoad(CodeGenFunction &CGF,
7085                                 SmallVectorImpl<Value *> &Ops, unsigned Align) {
7086   // Cast the pointer to right type.
7087   Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
7088                                llvm::PointerType::getUnqual(Ops[1]->getType()));
7089
7090   // If the mask is all ones just emit a regular store.
7091   if (const auto *C = dyn_cast<Constant>(Ops[2]))
7092     if (C->isAllOnesValue())
7093       return CGF.Builder.CreateAlignedLoad(Ops[0], Align);
7094
7095   Value *MaskVec = getMaskVecValue(CGF, Ops[2],
7096                                    Ops[1]->getType()->getVectorNumElements());
7097
7098   return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]);
7099 }
7100
7101 static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF,
7102                                         SmallVectorImpl<Value *> &Ops,
7103                                         llvm::Type *DstTy,
7104                                         unsigned SrcSizeInBits,
7105                                         unsigned Align) {
7106   // Load the subvector.
7107   Ops[0] = CGF.Builder.CreateAlignedLoad(Ops[0], Align);
7108
7109   // Create broadcast mask.
7110   unsigned NumDstElts = DstTy->getVectorNumElements();
7111   unsigned NumSrcElts = SrcSizeInBits / DstTy->getScalarSizeInBits();
7112
7113   SmallVector<uint32_t, 8> Mask;
7114   for (unsigned i = 0; i != NumDstElts; i += NumSrcElts)
7115     for (unsigned j = 0; j != NumSrcElts; ++j)
7116       Mask.push_back(j);
7117
7118   return CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], Mask, "subvecbcst");
7119 }
7120
7121 static Value *EmitX86Select(CodeGenFunction &CGF,
7122                             Value *Mask, Value *Op0, Value *Op1) {
7123
7124   // If the mask is all ones just return first argument.
7125   if (const auto *C = dyn_cast<Constant>(Mask))
7126     if (C->isAllOnesValue())
7127       return Op0;
7128
7129   Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements());
7130
7131   return CGF.Builder.CreateSelect(Mask, Op0, Op1);
7132 }
7133
7134 static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
7135                                    bool Signed, SmallVectorImpl<Value *> &Ops) {
7136   unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
7137   Value *Cmp;
7138
7139   if (CC == 3) {
7140     Cmp = Constant::getNullValue(
7141                        llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
7142   } else if (CC == 7) {
7143     Cmp = Constant::getAllOnesValue(
7144                        llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
7145   } else {
7146     ICmpInst::Predicate Pred;
7147     switch (CC) {
7148     default: llvm_unreachable("Unknown condition code");
7149     case 0: Pred = ICmpInst::ICMP_EQ;  break;
7150     case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
7151     case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
7152     case 4: Pred = ICmpInst::ICMP_NE;  break;
7153     case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
7154     case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
7155     }
7156     Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
7157   }
7158
7159   const auto *C = dyn_cast<Constant>(Ops.back());
7160   if (!C || !C->isAllOnesValue())
7161     Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), NumElts));
7162
7163   if (NumElts < 8) {
7164     uint32_t Indices[8];
7165     for (unsigned i = 0; i != NumElts; ++i)
7166       Indices[i] = i;
7167     for (unsigned i = NumElts; i != 8; ++i)
7168       Indices[i] = i % NumElts + NumElts;
7169     Cmp = CGF.Builder.CreateShuffleVector(
7170         Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
7171   }
7172   return CGF.Builder.CreateBitCast(Cmp,
7173                                    IntegerType::get(CGF.getLLVMContext(),
7174                                                     std::max(NumElts, 8U)));
7175 }
7176
7177 static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred,
7178                             ArrayRef<Value *> Ops) {
7179   Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
7180   Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
7181
7182   if (Ops.size() == 2)
7183     return Res;
7184
7185   assert(Ops.size() == 4);
7186   return EmitX86Select(CGF, Ops[3], Res, Ops[2]);
7187 }
7188
7189 static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, 
7190                               llvm::Type *DstTy) {
7191   unsigned NumberOfElements = DstTy->getVectorNumElements();
7192   Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
7193   return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
7194 }
7195
7196 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
7197                                            const CallExpr *E) {
7198   if (BuiltinID == X86::BI__builtin_ms_va_start ||
7199       BuiltinID == X86::BI__builtin_ms_va_end)
7200     return EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
7201                           BuiltinID == X86::BI__builtin_ms_va_start);
7202   if (BuiltinID == X86::BI__builtin_ms_va_copy) {
7203     // Lower this manually. We can't reliably determine whether or not any
7204     // given va_copy() is for a Win64 va_list from the calling convention
7205     // alone, because it's legal to do this from a System V ABI function.
7206     // With opaque pointer types, we won't have enough information in LLVM
7207     // IR to determine this from the argument types, either. Best to do it
7208     // now, while we have enough information.
7209     Address DestAddr = EmitMSVAListRef(E->getArg(0));
7210     Address SrcAddr = EmitMSVAListRef(E->getArg(1));
7211
7212     llvm::Type *BPP = Int8PtrPtrTy;
7213
7214     DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
7215                        DestAddr.getAlignment());
7216     SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
7217                       SrcAddr.getAlignment());
7218
7219     Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
7220     return Builder.CreateStore(ArgPtr, DestAddr);
7221   }
7222
7223   SmallVector<Value*, 4> Ops;
7224
7225   // Find out if any arguments are required to be integer constant expressions.
7226   unsigned ICEArguments = 0;
7227   ASTContext::GetBuiltinTypeError Error;
7228   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
7229   assert(Error == ASTContext::GE_None && "Should not codegen an error");
7230
7231   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
7232     // If this is a normal argument, just emit it as a scalar.
7233     if ((ICEArguments & (1 << i)) == 0) {
7234       Ops.push_back(EmitScalarExpr(E->getArg(i)));
7235       continue;
7236     }
7237
7238     // If this is required to be a constant, constant fold it so that we know
7239     // that the generated intrinsic gets a ConstantInt.
7240     llvm::APSInt Result;
7241     bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
7242     assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
7243     Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
7244   }
7245
7246   // These exist so that the builtin that takes an immediate can be bounds
7247   // checked by clang to avoid passing bad immediates to the backend. Since
7248   // AVX has a larger immediate than SSE we would need separate builtins to
7249   // do the different bounds checking. Rather than create a clang specific
7250   // SSE only builtin, this implements eight separate builtins to match gcc
7251   // implementation.
7252   auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
7253     Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
7254     llvm::Function *F = CGM.getIntrinsic(ID);
7255     return Builder.CreateCall(F, Ops);
7256   };
7257
7258   // For the vector forms of FP comparisons, translate the builtins directly to
7259   // IR.
7260   // TODO: The builtins could be removed if the SSE header files used vector
7261   // extension comparisons directly (vector ordered/unordered may need
7262   // additional support via __builtin_isnan()).
7263   auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) {
7264     Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
7265     llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
7266     llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
7267     Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
7268     return Builder.CreateBitCast(Sext, FPVecTy);
7269   };
7270
7271   switch (BuiltinID) {
7272   default: return nullptr;
7273   case X86::BI__builtin_cpu_supports: {
7274     const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
7275     StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
7276
7277     // TODO: When/if this becomes more than x86 specific then use a TargetInfo
7278     // based mapping.
7279     // Processor features and mapping to processor feature value.
7280     enum X86Features {
7281       CMOV = 0,
7282       MMX,
7283       POPCNT,
7284       SSE,
7285       SSE2,
7286       SSE3,
7287       SSSE3,
7288       SSE4_1,
7289       SSE4_2,
7290       AVX,
7291       AVX2,
7292       SSE4_A,
7293       FMA4,
7294       XOP,
7295       FMA,
7296       AVX512F,
7297       BMI,
7298       BMI2,
7299       AES,
7300       PCLMUL,
7301       AVX512VL,
7302       AVX512BW,
7303       AVX512DQ,
7304       AVX512CD,
7305       AVX512ER,
7306       AVX512PF,
7307       AVX512VBMI,
7308       AVX512IFMA,
7309       MAX
7310     };
7311
7312     X86Features Feature = StringSwitch<X86Features>(FeatureStr)
7313                               .Case("cmov", X86Features::CMOV)
7314                               .Case("mmx", X86Features::MMX)
7315                               .Case("popcnt", X86Features::POPCNT)
7316                               .Case("sse", X86Features::SSE)
7317                               .Case("sse2", X86Features::SSE2)
7318                               .Case("sse3", X86Features::SSE3)
7319                               .Case("ssse3", X86Features::SSSE3)
7320                               .Case("sse4.1", X86Features::SSE4_1)
7321                               .Case("sse4.2", X86Features::SSE4_2)
7322                               .Case("avx", X86Features::AVX)
7323                               .Case("avx2", X86Features::AVX2)
7324                               .Case("sse4a", X86Features::SSE4_A)
7325                               .Case("fma4", X86Features::FMA4)
7326                               .Case("xop", X86Features::XOP)
7327                               .Case("fma", X86Features::FMA)
7328                               .Case("avx512f", X86Features::AVX512F)
7329                               .Case("bmi", X86Features::BMI)
7330                               .Case("bmi2", X86Features::BMI2)
7331                               .Case("aes", X86Features::AES)
7332                               .Case("pclmul", X86Features::PCLMUL)
7333                               .Case("avx512vl", X86Features::AVX512VL)
7334                               .Case("avx512bw", X86Features::AVX512BW)
7335                               .Case("avx512dq", X86Features::AVX512DQ)
7336                               .Case("avx512cd", X86Features::AVX512CD)
7337                               .Case("avx512er", X86Features::AVX512ER)
7338                               .Case("avx512pf", X86Features::AVX512PF)
7339                               .Case("avx512vbmi", X86Features::AVX512VBMI)
7340                               .Case("avx512ifma", X86Features::AVX512IFMA)
7341                               .Default(X86Features::MAX);
7342     assert(Feature != X86Features::MAX && "Invalid feature!");
7343
7344     // Matching the struct layout from the compiler-rt/libgcc structure that is
7345     // filled in:
7346     // unsigned int __cpu_vendor;
7347     // unsigned int __cpu_type;
7348     // unsigned int __cpu_subtype;
7349     // unsigned int __cpu_features[1];
7350     llvm::Type *STy = llvm::StructType::get(
7351         Int32Ty, Int32Ty, Int32Ty, llvm::ArrayType::get(Int32Ty, 1), nullptr);
7352
7353     // Grab the global __cpu_model.
7354     llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
7355
7356     // Grab the first (0th) element from the field __cpu_features off of the
7357     // global in the struct STy.
7358     Value *Idxs[] = {
7359       ConstantInt::get(Int32Ty, 0),
7360       ConstantInt::get(Int32Ty, 3),
7361       ConstantInt::get(Int32Ty, 0)
7362     };
7363     Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
7364     Value *Features = Builder.CreateAlignedLoad(CpuFeatures,
7365                                                 CharUnits::fromQuantity(4));
7366
7367     // Check the value of the bit corresponding to the feature requested.
7368     Value *Bitset = Builder.CreateAnd(
7369         Features, llvm::ConstantInt::get(Int32Ty, 1ULL << Feature));
7370     return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0));
7371   }
7372   case X86::BI_mm_prefetch: {
7373     Value *Address = Ops[0];
7374     Value *RW = ConstantInt::get(Int32Ty, 0);
7375     Value *Locality = Ops[1];
7376     Value *Data = ConstantInt::get(Int32Ty, 1);
7377     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
7378     return Builder.CreateCall(F, {Address, RW, Locality, Data});
7379   }
7380   case X86::BI_mm_clflush: {
7381     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
7382                               Ops[0]);
7383   }
7384   case X86::BI_mm_lfence: {
7385     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
7386   }
7387   case X86::BI_mm_mfence: {
7388     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
7389   }
7390   case X86::BI_mm_sfence: {
7391     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
7392   }
7393   case X86::BI_mm_pause: {
7394     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
7395   }
7396   case X86::BI__rdtsc: {
7397     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
7398   }
7399   case X86::BI__builtin_ia32_undef128:
7400   case X86::BI__builtin_ia32_undef256:
7401   case X86::BI__builtin_ia32_undef512:
7402     // The x86 definition of "undef" is not the same as the LLVM definition
7403     // (PR32176). We leave optimizing away an unnecessary zero constant to the
7404     // IR optimizer and backend.
7405     // TODO: If we had a "freeze" IR instruction to generate a fixed undef
7406     // value, we should use that here instead of a zero.
7407     return llvm::Constant::getNullValue(ConvertType(E->getType()));
7408   case X86::BI__builtin_ia32_vec_init_v8qi:
7409   case X86::BI__builtin_ia32_vec_init_v4hi:
7410   case X86::BI__builtin_ia32_vec_init_v2si:
7411     return Builder.CreateBitCast(BuildVector(Ops),
7412                                  llvm::Type::getX86_MMXTy(getLLVMContext()));
7413   case X86::BI__builtin_ia32_vec_ext_v2si:
7414     return Builder.CreateExtractElement(Ops[0],
7415                                   llvm::ConstantInt::get(Ops[1]->getType(), 0));
7416   case X86::BI_mm_setcsr:
7417   case X86::BI__builtin_ia32_ldmxcsr: {
7418     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
7419     Builder.CreateStore(Ops[0], Tmp);
7420     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
7421                           Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
7422   }
7423   case X86::BI_mm_getcsr:
7424   case X86::BI__builtin_ia32_stmxcsr: {
7425     Address Tmp = CreateMemTemp(E->getType());
7426     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
7427                        Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
7428     return Builder.CreateLoad(Tmp, "stmxcsr");
7429   }
7430   case X86::BI__builtin_ia32_xsave:
7431   case X86::BI__builtin_ia32_xsave64:
7432   case X86::BI__builtin_ia32_xrstor:
7433   case X86::BI__builtin_ia32_xrstor64:
7434   case X86::BI__builtin_ia32_xsaveopt:
7435   case X86::BI__builtin_ia32_xsaveopt64:
7436   case X86::BI__builtin_ia32_xrstors:
7437   case X86::BI__builtin_ia32_xrstors64:
7438   case X86::BI__builtin_ia32_xsavec:
7439   case X86::BI__builtin_ia32_xsavec64:
7440   case X86::BI__builtin_ia32_xsaves:
7441   case X86::BI__builtin_ia32_xsaves64: {
7442     Intrinsic::ID ID;
7443 #define INTRINSIC_X86_XSAVE_ID(NAME) \
7444     case X86::BI__builtin_ia32_##NAME: \
7445       ID = Intrinsic::x86_##NAME; \
7446       break
7447     switch (BuiltinID) {
7448     default: llvm_unreachable("Unsupported intrinsic!");
7449     INTRINSIC_X86_XSAVE_ID(xsave);
7450     INTRINSIC_X86_XSAVE_ID(xsave64);
7451     INTRINSIC_X86_XSAVE_ID(xrstor);
7452     INTRINSIC_X86_XSAVE_ID(xrstor64);
7453     INTRINSIC_X86_XSAVE_ID(xsaveopt);
7454     INTRINSIC_X86_XSAVE_ID(xsaveopt64);
7455     INTRINSIC_X86_XSAVE_ID(xrstors);
7456     INTRINSIC_X86_XSAVE_ID(xrstors64);
7457     INTRINSIC_X86_XSAVE_ID(xsavec);
7458     INTRINSIC_X86_XSAVE_ID(xsavec64);
7459     INTRINSIC_X86_XSAVE_ID(xsaves);
7460     INTRINSIC_X86_XSAVE_ID(xsaves64);
7461     }
7462 #undef INTRINSIC_X86_XSAVE_ID
7463     Value *Mhi = Builder.CreateTrunc(
7464       Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
7465     Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
7466     Ops[1] = Mhi;
7467     Ops.push_back(Mlo);
7468     return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
7469   }
7470   case X86::BI__builtin_ia32_storedqudi128_mask:
7471   case X86::BI__builtin_ia32_storedqusi128_mask:
7472   case X86::BI__builtin_ia32_storedquhi128_mask:
7473   case X86::BI__builtin_ia32_storedquqi128_mask:
7474   case X86::BI__builtin_ia32_storeupd128_mask:
7475   case X86::BI__builtin_ia32_storeups128_mask:
7476   case X86::BI__builtin_ia32_storedqudi256_mask:
7477   case X86::BI__builtin_ia32_storedqusi256_mask:
7478   case X86::BI__builtin_ia32_storedquhi256_mask:
7479   case X86::BI__builtin_ia32_storedquqi256_mask:
7480   case X86::BI__builtin_ia32_storeupd256_mask:
7481   case X86::BI__builtin_ia32_storeups256_mask:
7482   case X86::BI__builtin_ia32_storedqudi512_mask:
7483   case X86::BI__builtin_ia32_storedqusi512_mask:
7484   case X86::BI__builtin_ia32_storedquhi512_mask:
7485   case X86::BI__builtin_ia32_storedquqi512_mask:
7486   case X86::BI__builtin_ia32_storeupd512_mask:
7487   case X86::BI__builtin_ia32_storeups512_mask:
7488     return EmitX86MaskedStore(*this, Ops, 1);
7489
7490   case X86::BI__builtin_ia32_storess128_mask:
7491   case X86::BI__builtin_ia32_storesd128_mask: {
7492     return EmitX86MaskedStore(*this, Ops, 16);
7493   }
7494
7495   case X86::BI__builtin_ia32_cvtmask2b128:
7496   case X86::BI__builtin_ia32_cvtmask2b256:
7497   case X86::BI__builtin_ia32_cvtmask2b512:
7498   case X86::BI__builtin_ia32_cvtmask2w128:
7499   case X86::BI__builtin_ia32_cvtmask2w256:
7500   case X86::BI__builtin_ia32_cvtmask2w512:
7501   case X86::BI__builtin_ia32_cvtmask2d128:
7502   case X86::BI__builtin_ia32_cvtmask2d256:
7503   case X86::BI__builtin_ia32_cvtmask2d512:
7504   case X86::BI__builtin_ia32_cvtmask2q128:
7505   case X86::BI__builtin_ia32_cvtmask2q256:
7506   case X86::BI__builtin_ia32_cvtmask2q512:
7507     return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
7508
7509   case X86::BI__builtin_ia32_movdqa32store128_mask:
7510   case X86::BI__builtin_ia32_movdqa64store128_mask:
7511   case X86::BI__builtin_ia32_storeaps128_mask:
7512   case X86::BI__builtin_ia32_storeapd128_mask:
7513   case X86::BI__builtin_ia32_movdqa32store256_mask:
7514   case X86::BI__builtin_ia32_movdqa64store256_mask:
7515   case X86::BI__builtin_ia32_storeaps256_mask:
7516   case X86::BI__builtin_ia32_storeapd256_mask:
7517   case X86::BI__builtin_ia32_movdqa32store512_mask:
7518   case X86::BI__builtin_ia32_movdqa64store512_mask:
7519   case X86::BI__builtin_ia32_storeaps512_mask:
7520   case X86::BI__builtin_ia32_storeapd512_mask: {
7521     unsigned Align =
7522       getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
7523     return EmitX86MaskedStore(*this, Ops, Align);
7524   }
7525   case X86::BI__builtin_ia32_loadups128_mask:
7526   case X86::BI__builtin_ia32_loadups256_mask:
7527   case X86::BI__builtin_ia32_loadups512_mask:
7528   case X86::BI__builtin_ia32_loadupd128_mask:
7529   case X86::BI__builtin_ia32_loadupd256_mask:
7530   case X86::BI__builtin_ia32_loadupd512_mask:
7531   case X86::BI__builtin_ia32_loaddquqi128_mask:
7532   case X86::BI__builtin_ia32_loaddquqi256_mask:
7533   case X86::BI__builtin_ia32_loaddquqi512_mask:
7534   case X86::BI__builtin_ia32_loaddquhi128_mask:
7535   case X86::BI__builtin_ia32_loaddquhi256_mask:
7536   case X86::BI__builtin_ia32_loaddquhi512_mask:
7537   case X86::BI__builtin_ia32_loaddqusi128_mask:
7538   case X86::BI__builtin_ia32_loaddqusi256_mask:
7539   case X86::BI__builtin_ia32_loaddqusi512_mask:
7540   case X86::BI__builtin_ia32_loaddqudi128_mask:
7541   case X86::BI__builtin_ia32_loaddqudi256_mask:
7542   case X86::BI__builtin_ia32_loaddqudi512_mask:
7543     return EmitX86MaskedLoad(*this, Ops, 1);
7544
7545   case X86::BI__builtin_ia32_loadss128_mask:
7546   case X86::BI__builtin_ia32_loadsd128_mask:
7547     return EmitX86MaskedLoad(*this, Ops, 16);
7548
7549   case X86::BI__builtin_ia32_loadaps128_mask:
7550   case X86::BI__builtin_ia32_loadaps256_mask:
7551   case X86::BI__builtin_ia32_loadaps512_mask:
7552   case X86::BI__builtin_ia32_loadapd128_mask:
7553   case X86::BI__builtin_ia32_loadapd256_mask:
7554   case X86::BI__builtin_ia32_loadapd512_mask:
7555   case X86::BI__builtin_ia32_movdqa32load128_mask:
7556   case X86::BI__builtin_ia32_movdqa32load256_mask:
7557   case X86::BI__builtin_ia32_movdqa32load512_mask:
7558   case X86::BI__builtin_ia32_movdqa64load128_mask:
7559   case X86::BI__builtin_ia32_movdqa64load256_mask:
7560   case X86::BI__builtin_ia32_movdqa64load512_mask: {
7561     unsigned Align =
7562       getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
7563     return EmitX86MaskedLoad(*this, Ops, Align);
7564   }
7565
7566   case X86::BI__builtin_ia32_vbroadcastf128_pd256:
7567   case X86::BI__builtin_ia32_vbroadcastf128_ps256: {
7568     llvm::Type *DstTy = ConvertType(E->getType());
7569     return EmitX86SubVectorBroadcast(*this, Ops, DstTy, 128, 1);
7570   }
7571
7572   case X86::BI__builtin_ia32_storehps:
7573   case X86::BI__builtin_ia32_storelps: {
7574     llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
7575     llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
7576
7577     // cast val v2i64
7578     Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
7579
7580     // extract (0, 1)
7581     unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
7582     llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index);
7583     Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
7584
7585     // cast pointer to i64 & store
7586     Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
7587     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7588   }
7589   case X86::BI__builtin_ia32_palignr128:
7590   case X86::BI__builtin_ia32_palignr256:
7591   case X86::BI__builtin_ia32_palignr512_mask: {
7592     unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
7593
7594     unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
7595     assert(NumElts % 16 == 0);
7596
7597     // If palignr is shifting the pair of vectors more than the size of two
7598     // lanes, emit zero.
7599     if (ShiftVal >= 32)
7600       return llvm::Constant::getNullValue(ConvertType(E->getType()));
7601
7602     // If palignr is shifting the pair of input vectors more than one lane,
7603     // but less than two lanes, convert to shifting in zeroes.
7604     if (ShiftVal > 16) {
7605       ShiftVal -= 16;
7606       Ops[1] = Ops[0];
7607       Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
7608     }
7609
7610     uint32_t Indices[64];
7611     // 256-bit palignr operates on 128-bit lanes so we need to handle that
7612     for (unsigned l = 0; l != NumElts; l += 16) {
7613       for (unsigned i = 0; i != 16; ++i) {
7614         unsigned Idx = ShiftVal + i;
7615         if (Idx >= 16)
7616           Idx += NumElts - 16; // End of lane, switch operand.
7617         Indices[l + i] = Idx + l;
7618       }
7619     }
7620
7621     Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0],
7622                                                makeArrayRef(Indices, NumElts),
7623                                                "palignr");
7624
7625     // If this isn't a masked builtin, just return the align operation.
7626     if (Ops.size() == 3)
7627       return Align;
7628
7629     return EmitX86Select(*this, Ops[4], Align, Ops[3]);
7630   }
7631
7632   case X86::BI__builtin_ia32_movnti:
7633   case X86::BI__builtin_ia32_movnti64:
7634   case X86::BI__builtin_ia32_movntsd:
7635   case X86::BI__builtin_ia32_movntss: {
7636     llvm::MDNode *Node = llvm::MDNode::get(
7637         getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
7638
7639     Value *Ptr = Ops[0];
7640     Value *Src = Ops[1];
7641
7642     // Extract the 0'th element of the source vector.
7643     if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
7644         BuiltinID == X86::BI__builtin_ia32_movntss)
7645       Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
7646
7647     // Convert the type of the pointer to a pointer to the stored type.
7648     Value *BC = Builder.CreateBitCast(
7649         Ptr, llvm::PointerType::getUnqual(Src->getType()), "cast");
7650
7651     // Unaligned nontemporal store of the scalar value.
7652     StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC);
7653     SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
7654     SI->setAlignment(1);
7655     return SI;
7656   }
7657
7658   case X86::BI__builtin_ia32_selectb_128:
7659   case X86::BI__builtin_ia32_selectb_256:
7660   case X86::BI__builtin_ia32_selectb_512:
7661   case X86::BI__builtin_ia32_selectw_128:
7662   case X86::BI__builtin_ia32_selectw_256:
7663   case X86::BI__builtin_ia32_selectw_512:
7664   case X86::BI__builtin_ia32_selectd_128:
7665   case X86::BI__builtin_ia32_selectd_256:
7666   case X86::BI__builtin_ia32_selectd_512:
7667   case X86::BI__builtin_ia32_selectq_128:
7668   case X86::BI__builtin_ia32_selectq_256:
7669   case X86::BI__builtin_ia32_selectq_512:
7670   case X86::BI__builtin_ia32_selectps_128:
7671   case X86::BI__builtin_ia32_selectps_256:
7672   case X86::BI__builtin_ia32_selectps_512:
7673   case X86::BI__builtin_ia32_selectpd_128:
7674   case X86::BI__builtin_ia32_selectpd_256:
7675   case X86::BI__builtin_ia32_selectpd_512:
7676     return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
7677   case X86::BI__builtin_ia32_pcmpeqb128_mask:
7678   case X86::BI__builtin_ia32_pcmpeqb256_mask:
7679   case X86::BI__builtin_ia32_pcmpeqb512_mask:
7680   case X86::BI__builtin_ia32_pcmpeqw128_mask:
7681   case X86::BI__builtin_ia32_pcmpeqw256_mask:
7682   case X86::BI__builtin_ia32_pcmpeqw512_mask:
7683   case X86::BI__builtin_ia32_pcmpeqd128_mask:
7684   case X86::BI__builtin_ia32_pcmpeqd256_mask:
7685   case X86::BI__builtin_ia32_pcmpeqd512_mask:
7686   case X86::BI__builtin_ia32_pcmpeqq128_mask:
7687   case X86::BI__builtin_ia32_pcmpeqq256_mask:
7688   case X86::BI__builtin_ia32_pcmpeqq512_mask:
7689     return EmitX86MaskedCompare(*this, 0, false, Ops);
7690   case X86::BI__builtin_ia32_pcmpgtb128_mask:
7691   case X86::BI__builtin_ia32_pcmpgtb256_mask:
7692   case X86::BI__builtin_ia32_pcmpgtb512_mask:
7693   case X86::BI__builtin_ia32_pcmpgtw128_mask:
7694   case X86::BI__builtin_ia32_pcmpgtw256_mask:
7695   case X86::BI__builtin_ia32_pcmpgtw512_mask:
7696   case X86::BI__builtin_ia32_pcmpgtd128_mask:
7697   case X86::BI__builtin_ia32_pcmpgtd256_mask:
7698   case X86::BI__builtin_ia32_pcmpgtd512_mask:
7699   case X86::BI__builtin_ia32_pcmpgtq128_mask:
7700   case X86::BI__builtin_ia32_pcmpgtq256_mask:
7701   case X86::BI__builtin_ia32_pcmpgtq512_mask:
7702     return EmitX86MaskedCompare(*this, 6, true, Ops);
7703   case X86::BI__builtin_ia32_cmpb128_mask:
7704   case X86::BI__builtin_ia32_cmpb256_mask:
7705   case X86::BI__builtin_ia32_cmpb512_mask:
7706   case X86::BI__builtin_ia32_cmpw128_mask:
7707   case X86::BI__builtin_ia32_cmpw256_mask:
7708   case X86::BI__builtin_ia32_cmpw512_mask:
7709   case X86::BI__builtin_ia32_cmpd128_mask:
7710   case X86::BI__builtin_ia32_cmpd256_mask:
7711   case X86::BI__builtin_ia32_cmpd512_mask:
7712   case X86::BI__builtin_ia32_cmpq128_mask:
7713   case X86::BI__builtin_ia32_cmpq256_mask:
7714   case X86::BI__builtin_ia32_cmpq512_mask: {
7715     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
7716     return EmitX86MaskedCompare(*this, CC, true, Ops);
7717   }
7718   case X86::BI__builtin_ia32_ucmpb128_mask:
7719   case X86::BI__builtin_ia32_ucmpb256_mask:
7720   case X86::BI__builtin_ia32_ucmpb512_mask:
7721   case X86::BI__builtin_ia32_ucmpw128_mask:
7722   case X86::BI__builtin_ia32_ucmpw256_mask:
7723   case X86::BI__builtin_ia32_ucmpw512_mask:
7724   case X86::BI__builtin_ia32_ucmpd128_mask:
7725   case X86::BI__builtin_ia32_ucmpd256_mask:
7726   case X86::BI__builtin_ia32_ucmpd512_mask:
7727   case X86::BI__builtin_ia32_ucmpq128_mask:
7728   case X86::BI__builtin_ia32_ucmpq256_mask:
7729   case X86::BI__builtin_ia32_ucmpq512_mask: {
7730     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
7731     return EmitX86MaskedCompare(*this, CC, false, Ops);
7732   }
7733
7734   case X86::BI__builtin_ia32_vplzcntd_128_mask:
7735   case X86::BI__builtin_ia32_vplzcntd_256_mask:
7736   case X86::BI__builtin_ia32_vplzcntd_512_mask:
7737   case X86::BI__builtin_ia32_vplzcntq_128_mask:
7738   case X86::BI__builtin_ia32_vplzcntq_256_mask:
7739   case X86::BI__builtin_ia32_vplzcntq_512_mask: {
7740     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
7741     return EmitX86Select(*this, Ops[2],
7742                          Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}),
7743                          Ops[1]);
7744   }
7745
7746   case X86::BI__builtin_ia32_pmaxsb128:
7747   case X86::BI__builtin_ia32_pmaxsw128:
7748   case X86::BI__builtin_ia32_pmaxsd128:
7749   case X86::BI__builtin_ia32_pmaxsq128_mask:
7750   case X86::BI__builtin_ia32_pmaxsb256:
7751   case X86::BI__builtin_ia32_pmaxsw256:
7752   case X86::BI__builtin_ia32_pmaxsd256:
7753   case X86::BI__builtin_ia32_pmaxsq256_mask:
7754   case X86::BI__builtin_ia32_pmaxsb512_mask:
7755   case X86::BI__builtin_ia32_pmaxsw512_mask:
7756   case X86::BI__builtin_ia32_pmaxsd512_mask:
7757   case X86::BI__builtin_ia32_pmaxsq512_mask:
7758     return EmitX86MinMax(*this, ICmpInst::ICMP_SGT, Ops);
7759   case X86::BI__builtin_ia32_pmaxub128:
7760   case X86::BI__builtin_ia32_pmaxuw128:
7761   case X86::BI__builtin_ia32_pmaxud128:
7762   case X86::BI__builtin_ia32_pmaxuq128_mask:
7763   case X86::BI__builtin_ia32_pmaxub256:
7764   case X86::BI__builtin_ia32_pmaxuw256:
7765   case X86::BI__builtin_ia32_pmaxud256:
7766   case X86::BI__builtin_ia32_pmaxuq256_mask:
7767   case X86::BI__builtin_ia32_pmaxub512_mask:
7768   case X86::BI__builtin_ia32_pmaxuw512_mask:
7769   case X86::BI__builtin_ia32_pmaxud512_mask:
7770   case X86::BI__builtin_ia32_pmaxuq512_mask:
7771     return EmitX86MinMax(*this, ICmpInst::ICMP_UGT, Ops);
7772   case X86::BI__builtin_ia32_pminsb128:
7773   case X86::BI__builtin_ia32_pminsw128:
7774   case X86::BI__builtin_ia32_pminsd128:
7775   case X86::BI__builtin_ia32_pminsq128_mask:
7776   case X86::BI__builtin_ia32_pminsb256:
7777   case X86::BI__builtin_ia32_pminsw256:
7778   case X86::BI__builtin_ia32_pminsd256:
7779   case X86::BI__builtin_ia32_pminsq256_mask:
7780   case X86::BI__builtin_ia32_pminsb512_mask:
7781   case X86::BI__builtin_ia32_pminsw512_mask:
7782   case X86::BI__builtin_ia32_pminsd512_mask:
7783   case X86::BI__builtin_ia32_pminsq512_mask:
7784     return EmitX86MinMax(*this, ICmpInst::ICMP_SLT, Ops);
7785   case X86::BI__builtin_ia32_pminub128:
7786   case X86::BI__builtin_ia32_pminuw128:
7787   case X86::BI__builtin_ia32_pminud128:
7788   case X86::BI__builtin_ia32_pminuq128_mask:
7789   case X86::BI__builtin_ia32_pminub256:
7790   case X86::BI__builtin_ia32_pminuw256:
7791   case X86::BI__builtin_ia32_pminud256:
7792   case X86::BI__builtin_ia32_pminuq256_mask:
7793   case X86::BI__builtin_ia32_pminub512_mask:
7794   case X86::BI__builtin_ia32_pminuw512_mask:
7795   case X86::BI__builtin_ia32_pminud512_mask:
7796   case X86::BI__builtin_ia32_pminuq512_mask:
7797     return EmitX86MinMax(*this, ICmpInst::ICMP_ULT, Ops);
7798
7799   // 3DNow!
7800   case X86::BI__builtin_ia32_pswapdsf:
7801   case X86::BI__builtin_ia32_pswapdsi: {
7802     llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
7803     Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
7804     llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
7805     return Builder.CreateCall(F, Ops, "pswapd");
7806   }
7807   case X86::BI__builtin_ia32_rdrand16_step:
7808   case X86::BI__builtin_ia32_rdrand32_step:
7809   case X86::BI__builtin_ia32_rdrand64_step:
7810   case X86::BI__builtin_ia32_rdseed16_step:
7811   case X86::BI__builtin_ia32_rdseed32_step:
7812   case X86::BI__builtin_ia32_rdseed64_step: {
7813     Intrinsic::ID ID;
7814     switch (BuiltinID) {
7815     default: llvm_unreachable("Unsupported intrinsic!");
7816     case X86::BI__builtin_ia32_rdrand16_step:
7817       ID = Intrinsic::x86_rdrand_16;
7818       break;
7819     case X86::BI__builtin_ia32_rdrand32_step:
7820       ID = Intrinsic::x86_rdrand_32;
7821       break;
7822     case X86::BI__builtin_ia32_rdrand64_step:
7823       ID = Intrinsic::x86_rdrand_64;
7824       break;
7825     case X86::BI__builtin_ia32_rdseed16_step:
7826       ID = Intrinsic::x86_rdseed_16;
7827       break;
7828     case X86::BI__builtin_ia32_rdseed32_step:
7829       ID = Intrinsic::x86_rdseed_32;
7830       break;
7831     case X86::BI__builtin_ia32_rdseed64_step:
7832       ID = Intrinsic::x86_rdseed_64;
7833       break;
7834     }
7835
7836     Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
7837     Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
7838                                       Ops[0]);
7839     return Builder.CreateExtractValue(Call, 1);
7840   }
7841
7842   // SSE packed comparison intrinsics
7843   case X86::BI__builtin_ia32_cmpeqps:
7844   case X86::BI__builtin_ia32_cmpeqpd:
7845     return getVectorFCmpIR(CmpInst::FCMP_OEQ);
7846   case X86::BI__builtin_ia32_cmpltps:
7847   case X86::BI__builtin_ia32_cmpltpd:
7848     return getVectorFCmpIR(CmpInst::FCMP_OLT);
7849   case X86::BI__builtin_ia32_cmpleps:
7850   case X86::BI__builtin_ia32_cmplepd:
7851     return getVectorFCmpIR(CmpInst::FCMP_OLE);
7852   case X86::BI__builtin_ia32_cmpunordps:
7853   case X86::BI__builtin_ia32_cmpunordpd:
7854     return getVectorFCmpIR(CmpInst::FCMP_UNO);
7855   case X86::BI__builtin_ia32_cmpneqps:
7856   case X86::BI__builtin_ia32_cmpneqpd:
7857     return getVectorFCmpIR(CmpInst::FCMP_UNE);
7858   case X86::BI__builtin_ia32_cmpnltps:
7859   case X86::BI__builtin_ia32_cmpnltpd:
7860     return getVectorFCmpIR(CmpInst::FCMP_UGE);
7861   case X86::BI__builtin_ia32_cmpnleps:
7862   case X86::BI__builtin_ia32_cmpnlepd:
7863     return getVectorFCmpIR(CmpInst::FCMP_UGT);
7864   case X86::BI__builtin_ia32_cmpordps:
7865   case X86::BI__builtin_ia32_cmpordpd:
7866     return getVectorFCmpIR(CmpInst::FCMP_ORD);
7867   case X86::BI__builtin_ia32_cmpps:
7868   case X86::BI__builtin_ia32_cmpps256:
7869   case X86::BI__builtin_ia32_cmppd:
7870   case X86::BI__builtin_ia32_cmppd256: {
7871     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
7872     // If this one of the SSE immediates, we can use native IR.
7873     if (CC < 8) {
7874       FCmpInst::Predicate Pred;
7875       switch (CC) {
7876       case 0: Pred = FCmpInst::FCMP_OEQ; break;
7877       case 1: Pred = FCmpInst::FCMP_OLT; break;
7878       case 2: Pred = FCmpInst::FCMP_OLE; break;
7879       case 3: Pred = FCmpInst::FCMP_UNO; break;
7880       case 4: Pred = FCmpInst::FCMP_UNE; break;
7881       case 5: Pred = FCmpInst::FCMP_UGE; break;
7882       case 6: Pred = FCmpInst::FCMP_UGT; break;
7883       case 7: Pred = FCmpInst::FCMP_ORD; break;
7884       }
7885       return getVectorFCmpIR(Pred);
7886     }
7887
7888     // We can't handle 8-31 immediates with native IR, use the intrinsic.
7889     Intrinsic::ID ID;
7890     switch (BuiltinID) {
7891     default: llvm_unreachable("Unsupported intrinsic!");
7892     case X86::BI__builtin_ia32_cmpps:
7893       ID = Intrinsic::x86_sse_cmp_ps;
7894       break;
7895     case X86::BI__builtin_ia32_cmpps256:
7896       ID = Intrinsic::x86_avx_cmp_ps_256;
7897       break;
7898     case X86::BI__builtin_ia32_cmppd:
7899       ID = Intrinsic::x86_sse2_cmp_pd;
7900       break;
7901     case X86::BI__builtin_ia32_cmppd256:
7902       ID = Intrinsic::x86_avx_cmp_pd_256;
7903       break;
7904     }
7905
7906     return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
7907   }
7908
7909   // SSE scalar comparison intrinsics
7910   case X86::BI__builtin_ia32_cmpeqss:
7911     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
7912   case X86::BI__builtin_ia32_cmpltss:
7913     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
7914   case X86::BI__builtin_ia32_cmpless:
7915     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
7916   case X86::BI__builtin_ia32_cmpunordss:
7917     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
7918   case X86::BI__builtin_ia32_cmpneqss:
7919     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
7920   case X86::BI__builtin_ia32_cmpnltss:
7921     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
7922   case X86::BI__builtin_ia32_cmpnless:
7923     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
7924   case X86::BI__builtin_ia32_cmpordss:
7925     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
7926   case X86::BI__builtin_ia32_cmpeqsd:
7927     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
7928   case X86::BI__builtin_ia32_cmpltsd:
7929     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
7930   case X86::BI__builtin_ia32_cmplesd:
7931     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
7932   case X86::BI__builtin_ia32_cmpunordsd:
7933     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
7934   case X86::BI__builtin_ia32_cmpneqsd:
7935     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
7936   case X86::BI__builtin_ia32_cmpnltsd:
7937     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
7938   case X86::BI__builtin_ia32_cmpnlesd:
7939     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
7940   case X86::BI__builtin_ia32_cmpordsd:
7941     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
7942
7943   case X86::BI__emul:
7944   case X86::BI__emulu: {
7945     llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
7946     bool isSigned = (BuiltinID == X86::BI__emul);
7947     Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
7948     Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
7949     return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
7950   }
7951   case X86::BI__mulh:
7952   case X86::BI__umulh:
7953   case X86::BI_mul128:
7954   case X86::BI_umul128: {
7955     llvm::Type *ResType = ConvertType(E->getType());
7956     llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
7957
7958     bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
7959     Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
7960     Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
7961
7962     Value *MulResult, *HigherBits;
7963     if (IsSigned) {
7964       MulResult = Builder.CreateNSWMul(LHS, RHS);
7965       HigherBits = Builder.CreateAShr(MulResult, 64);
7966     } else {
7967       MulResult = Builder.CreateNUWMul(LHS, RHS);
7968       HigherBits = Builder.CreateLShr(MulResult, 64);
7969     }
7970     HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
7971
7972     if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
7973       return HigherBits;
7974
7975     Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
7976     Builder.CreateStore(HigherBits, HighBitsAddress);
7977     return Builder.CreateIntCast(MulResult, ResType, IsSigned);
7978   }
7979
7980   case X86::BI__faststorefence: {
7981     return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
7982                                llvm::CrossThread);
7983   }
7984   case X86::BI_ReadWriteBarrier:
7985   case X86::BI_ReadBarrier:
7986   case X86::BI_WriteBarrier: {
7987     return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
7988                                llvm::SingleThread);
7989   }
7990   case X86::BI_BitScanForward:
7991   case X86::BI_BitScanForward64:
7992     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
7993   case X86::BI_BitScanReverse:
7994   case X86::BI_BitScanReverse64:
7995     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
7996
7997   case X86::BI_InterlockedAnd64:
7998     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
7999   case X86::BI_InterlockedExchange64:
8000     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
8001   case X86::BI_InterlockedExchangeAdd64:
8002     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
8003   case X86::BI_InterlockedExchangeSub64:
8004     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
8005   case X86::BI_InterlockedOr64:
8006     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
8007   case X86::BI_InterlockedXor64:
8008     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
8009   case X86::BI_InterlockedDecrement64:
8010     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
8011   case X86::BI_InterlockedIncrement64:
8012     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
8013
8014   case X86::BI_AddressOfReturnAddress: {
8015     Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress);
8016     return Builder.CreateCall(F);
8017   }
8018   case X86::BI__stosb: {
8019     // We treat __stosb as a volatile memset - it may not generate "rep stosb"
8020     // instruction, but it will create a memset that won't be optimized away.
8021     return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true);
8022   }
8023   case X86::BI__ud2:
8024     // llvm.trap makes a ud2a instruction on x86.
8025     return EmitTrapCall(Intrinsic::trap);
8026   case X86::BI__int2c: {
8027     // This syscall signals a driver assertion failure in x86 NT kernels.
8028     llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
8029     llvm::InlineAsm *IA =
8030         llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*SideEffects=*/true);
8031     llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
8032         getLLVMContext(), llvm::AttributeList::FunctionIndex,
8033         llvm::Attribute::NoReturn);
8034     CallSite CS = Builder.CreateCall(IA);
8035     CS.setAttributes(NoReturnAttr);
8036     return CS.getInstruction();
8037   }
8038   case X86::BI__readfsbyte:
8039   case X86::BI__readfsword:
8040   case X86::BI__readfsdword:
8041   case X86::BI__readfsqword: {
8042     llvm::Type *IntTy = ConvertType(E->getType());
8043     Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
8044                                         llvm::PointerType::get(IntTy, 257));
8045     LoadInst *Load = Builder.CreateAlignedLoad(
8046         IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
8047     Load->setVolatile(true);
8048     return Load;
8049   }
8050   case X86::BI__readgsbyte:
8051   case X86::BI__readgsword:
8052   case X86::BI__readgsdword:
8053   case X86::BI__readgsqword: {
8054     llvm::Type *IntTy = ConvertType(E->getType());
8055     Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
8056                                         llvm::PointerType::get(IntTy, 256));
8057     LoadInst *Load = Builder.CreateAlignedLoad(
8058         IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
8059     Load->setVolatile(true);
8060     return Load;
8061   }
8062   }
8063 }
8064
8065
8066 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
8067                                            const CallExpr *E) {
8068   SmallVector<Value*, 4> Ops;
8069
8070   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
8071     Ops.push_back(EmitScalarExpr(E->getArg(i)));
8072
8073   Intrinsic::ID ID = Intrinsic::not_intrinsic;
8074
8075   switch (BuiltinID) {
8076   default: return nullptr;
8077
8078   // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
8079   // call __builtin_readcyclecounter.
8080   case PPC::BI__builtin_ppc_get_timebase:
8081     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
8082
8083   // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
8084   case PPC::BI__builtin_altivec_lvx:
8085   case PPC::BI__builtin_altivec_lvxl:
8086   case PPC::BI__builtin_altivec_lvebx:
8087   case PPC::BI__builtin_altivec_lvehx:
8088   case PPC::BI__builtin_altivec_lvewx:
8089   case PPC::BI__builtin_altivec_lvsl:
8090   case PPC::BI__builtin_altivec_lvsr:
8091   case PPC::BI__builtin_vsx_lxvd2x:
8092   case PPC::BI__builtin_vsx_lxvw4x:
8093   case PPC::BI__builtin_vsx_lxvd2x_be:
8094   case PPC::BI__builtin_vsx_lxvw4x_be:
8095   case PPC::BI__builtin_vsx_lxvl:
8096   case PPC::BI__builtin_vsx_lxvll:
8097   {
8098     if(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
8099        BuiltinID == PPC::BI__builtin_vsx_lxvll){
8100       Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy);
8101     }else {
8102       Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
8103       Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
8104       Ops.pop_back();
8105     }
8106
8107     switch (BuiltinID) {
8108     default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
8109     case PPC::BI__builtin_altivec_lvx:
8110       ID = Intrinsic::ppc_altivec_lvx;
8111       break;
8112     case PPC::BI__builtin_altivec_lvxl:
8113       ID = Intrinsic::ppc_altivec_lvxl;
8114       break;
8115     case PPC::BI__builtin_altivec_lvebx:
8116       ID = Intrinsic::ppc_altivec_lvebx;
8117       break;
8118     case PPC::BI__builtin_altivec_lvehx:
8119       ID = Intrinsic::ppc_altivec_lvehx;
8120       break;
8121     case PPC::BI__builtin_altivec_lvewx:
8122       ID = Intrinsic::ppc_altivec_lvewx;
8123       break;
8124     case PPC::BI__builtin_altivec_lvsl:
8125       ID = Intrinsic::ppc_altivec_lvsl;
8126       break;
8127     case PPC::BI__builtin_altivec_lvsr:
8128       ID = Intrinsic::ppc_altivec_lvsr;
8129       break;
8130     case PPC::BI__builtin_vsx_lxvd2x:
8131       ID = Intrinsic::ppc_vsx_lxvd2x;
8132       break;
8133     case PPC::BI__builtin_vsx_lxvw4x:
8134       ID = Intrinsic::ppc_vsx_lxvw4x;
8135       break;
8136     case PPC::BI__builtin_vsx_lxvd2x_be:
8137       ID = Intrinsic::ppc_vsx_lxvd2x_be;
8138       break;
8139     case PPC::BI__builtin_vsx_lxvw4x_be:
8140       ID = Intrinsic::ppc_vsx_lxvw4x_be;
8141       break;
8142     case PPC::BI__builtin_vsx_lxvl:
8143       ID = Intrinsic::ppc_vsx_lxvl;
8144       break;
8145     case PPC::BI__builtin_vsx_lxvll:
8146       ID = Intrinsic::ppc_vsx_lxvll;
8147       break;
8148     }
8149     llvm::Function *F = CGM.getIntrinsic(ID);
8150     return Builder.CreateCall(F, Ops, "");
8151   }
8152
8153   // vec_st, vec_xst_be
8154   case PPC::BI__builtin_altivec_stvx:
8155   case PPC::BI__builtin_altivec_stvxl:
8156   case PPC::BI__builtin_altivec_stvebx:
8157   case PPC::BI__builtin_altivec_stvehx:
8158   case PPC::BI__builtin_altivec_stvewx:
8159   case PPC::BI__builtin_vsx_stxvd2x:
8160   case PPC::BI__builtin_vsx_stxvw4x:
8161   case PPC::BI__builtin_vsx_stxvd2x_be:
8162   case PPC::BI__builtin_vsx_stxvw4x_be:
8163   case PPC::BI__builtin_vsx_stxvl:
8164   case PPC::BI__builtin_vsx_stxvll:
8165   {
8166     if(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
8167       BuiltinID == PPC::BI__builtin_vsx_stxvll ){
8168       Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
8169     }else {
8170       Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
8171       Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
8172       Ops.pop_back();
8173     }
8174
8175     switch (BuiltinID) {
8176     default: llvm_unreachable("Unsupported st intrinsic!");
8177     case PPC::BI__builtin_altivec_stvx:
8178       ID = Intrinsic::ppc_altivec_stvx;
8179       break;
8180     case PPC::BI__builtin_altivec_stvxl:
8181       ID = Intrinsic::ppc_altivec_stvxl;
8182       break;
8183     case PPC::BI__builtin_altivec_stvebx:
8184       ID = Intrinsic::ppc_altivec_stvebx;
8185       break;
8186     case PPC::BI__builtin_altivec_stvehx:
8187       ID = Intrinsic::ppc_altivec_stvehx;
8188       break;
8189     case PPC::BI__builtin_altivec_stvewx:
8190       ID = Intrinsic::ppc_altivec_stvewx;
8191       break;
8192     case PPC::BI__builtin_vsx_stxvd2x:
8193       ID = Intrinsic::ppc_vsx_stxvd2x;
8194       break;
8195     case PPC::BI__builtin_vsx_stxvw4x:
8196       ID = Intrinsic::ppc_vsx_stxvw4x;
8197       break;
8198     case PPC::BI__builtin_vsx_stxvd2x_be:
8199       ID = Intrinsic::ppc_vsx_stxvd2x_be;
8200       break;
8201     case PPC::BI__builtin_vsx_stxvw4x_be:
8202       ID = Intrinsic::ppc_vsx_stxvw4x_be;
8203       break;
8204     case PPC::BI__builtin_vsx_stxvl:
8205       ID = Intrinsic::ppc_vsx_stxvl;
8206       break;
8207     case PPC::BI__builtin_vsx_stxvll:
8208       ID = Intrinsic::ppc_vsx_stxvll;
8209       break;
8210     }
8211     llvm::Function *F = CGM.getIntrinsic(ID);
8212     return Builder.CreateCall(F, Ops, "");
8213   }
8214   // Square root
8215   case PPC::BI__builtin_vsx_xvsqrtsp:
8216   case PPC::BI__builtin_vsx_xvsqrtdp: {
8217     llvm::Type *ResultType = ConvertType(E->getType());
8218     Value *X = EmitScalarExpr(E->getArg(0));
8219     ID = Intrinsic::sqrt;
8220     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
8221     return Builder.CreateCall(F, X);
8222   }
8223   // Count leading zeros
8224   case PPC::BI__builtin_altivec_vclzb:
8225   case PPC::BI__builtin_altivec_vclzh:
8226   case PPC::BI__builtin_altivec_vclzw:
8227   case PPC::BI__builtin_altivec_vclzd: {
8228     llvm::Type *ResultType = ConvertType(E->getType());
8229     Value *X = EmitScalarExpr(E->getArg(0));
8230     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8231     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
8232     return Builder.CreateCall(F, {X, Undef});
8233   }
8234   case PPC::BI__builtin_altivec_vctzb:
8235   case PPC::BI__builtin_altivec_vctzh:
8236   case PPC::BI__builtin_altivec_vctzw:
8237   case PPC::BI__builtin_altivec_vctzd: {
8238     llvm::Type *ResultType = ConvertType(E->getType());
8239     Value *X = EmitScalarExpr(E->getArg(0));
8240     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8241     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
8242     return Builder.CreateCall(F, {X, Undef});
8243   }
8244   case PPC::BI__builtin_altivec_vpopcntb:
8245   case PPC::BI__builtin_altivec_vpopcnth:
8246   case PPC::BI__builtin_altivec_vpopcntw:
8247   case PPC::BI__builtin_altivec_vpopcntd: {
8248     llvm::Type *ResultType = ConvertType(E->getType());
8249     Value *X = EmitScalarExpr(E->getArg(0));
8250     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
8251     return Builder.CreateCall(F, X);
8252   }
8253   // Copy sign
8254   case PPC::BI__builtin_vsx_xvcpsgnsp:
8255   case PPC::BI__builtin_vsx_xvcpsgndp: {
8256     llvm::Type *ResultType = ConvertType(E->getType());
8257     Value *X = EmitScalarExpr(E->getArg(0));
8258     Value *Y = EmitScalarExpr(E->getArg(1));
8259     ID = Intrinsic::copysign;
8260     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
8261     return Builder.CreateCall(F, {X, Y});
8262   }
8263   // Rounding/truncation
8264   case PPC::BI__builtin_vsx_xvrspip:
8265   case PPC::BI__builtin_vsx_xvrdpip:
8266   case PPC::BI__builtin_vsx_xvrdpim:
8267   case PPC::BI__builtin_vsx_xvrspim:
8268   case PPC::BI__builtin_vsx_xvrdpi:
8269   case PPC::BI__builtin_vsx_xvrspi:
8270   case PPC::BI__builtin_vsx_xvrdpic:
8271   case PPC::BI__builtin_vsx_xvrspic:
8272   case PPC::BI__builtin_vsx_xvrdpiz:
8273   case PPC::BI__builtin_vsx_xvrspiz: {
8274     llvm::Type *ResultType = ConvertType(E->getType());
8275     Value *X = EmitScalarExpr(E->getArg(0));
8276     if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
8277         BuiltinID == PPC::BI__builtin_vsx_xvrspim)
8278       ID = Intrinsic::floor;
8279     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
8280              BuiltinID == PPC::BI__builtin_vsx_xvrspi)
8281       ID = Intrinsic::round;
8282     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
8283              BuiltinID == PPC::BI__builtin_vsx_xvrspic)
8284       ID = Intrinsic::nearbyint;
8285     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
8286              BuiltinID == PPC::BI__builtin_vsx_xvrspip)
8287       ID = Intrinsic::ceil;
8288     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
8289              BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
8290       ID = Intrinsic::trunc;
8291     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
8292     return Builder.CreateCall(F, X);
8293   }
8294
8295   // Absolute value
8296   case PPC::BI__builtin_vsx_xvabsdp:
8297   case PPC::BI__builtin_vsx_xvabssp: {
8298     llvm::Type *ResultType = ConvertType(E->getType());
8299     Value *X = EmitScalarExpr(E->getArg(0));
8300     llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
8301     return Builder.CreateCall(F, X);
8302   }
8303
8304   // FMA variations
8305   case PPC::BI__builtin_vsx_xvmaddadp:
8306   case PPC::BI__builtin_vsx_xvmaddasp:
8307   case PPC::BI__builtin_vsx_xvnmaddadp:
8308   case PPC::BI__builtin_vsx_xvnmaddasp:
8309   case PPC::BI__builtin_vsx_xvmsubadp:
8310   case PPC::BI__builtin_vsx_xvmsubasp:
8311   case PPC::BI__builtin_vsx_xvnmsubadp:
8312   case PPC::BI__builtin_vsx_xvnmsubasp: {
8313     llvm::Type *ResultType = ConvertType(E->getType());
8314     Value *X = EmitScalarExpr(E->getArg(0));
8315     Value *Y = EmitScalarExpr(E->getArg(1));
8316     Value *Z = EmitScalarExpr(E->getArg(2));
8317     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
8318     llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
8319     switch (BuiltinID) {
8320       case PPC::BI__builtin_vsx_xvmaddadp:
8321       case PPC::BI__builtin_vsx_xvmaddasp:
8322         return Builder.CreateCall(F, {X, Y, Z});
8323       case PPC::BI__builtin_vsx_xvnmaddadp:
8324       case PPC::BI__builtin_vsx_xvnmaddasp:
8325         return Builder.CreateFSub(Zero,
8326                                   Builder.CreateCall(F, {X, Y, Z}), "sub");
8327       case PPC::BI__builtin_vsx_xvmsubadp:
8328       case PPC::BI__builtin_vsx_xvmsubasp:
8329         return Builder.CreateCall(F,
8330                                   {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
8331       case PPC::BI__builtin_vsx_xvnmsubadp:
8332       case PPC::BI__builtin_vsx_xvnmsubasp:
8333         Value *FsubRes =
8334           Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
8335         return Builder.CreateFSub(Zero, FsubRes, "sub");
8336     }
8337     llvm_unreachable("Unknown FMA operation");
8338     return nullptr; // Suppress no-return warning
8339   }
8340
8341   case PPC::BI__builtin_vsx_insertword: {
8342     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
8343
8344     // Third argument is a compile time constant int. It must be clamped to
8345     // to the range [0, 12].
8346     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
8347     assert(ArgCI &&
8348            "Third arg to xxinsertw intrinsic must be constant integer");
8349     const int64_t MaxIndex = 12;
8350     int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
8351
8352     // The builtin semantics don't exactly match the xxinsertw instructions
8353     // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
8354     // word from the first argument, and inserts it in the second argument. The
8355     // instruction extracts the word from its second input register and inserts
8356     // it into its first input register, so swap the first and second arguments.
8357     std::swap(Ops[0], Ops[1]);
8358
8359     // Need to cast the second argument from a vector of unsigned int to a
8360     // vector of long long.
8361     Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
8362
8363     if (getTarget().isLittleEndian()) {
8364       // Create a shuffle mask of (1, 0)
8365       Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
8366                                    ConstantInt::get(Int32Ty, 0)
8367                                  };
8368       Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8369
8370       // Reverse the double words in the vector we will extract from.
8371       Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
8372       Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ShuffleMask);
8373
8374       // Reverse the index.
8375       Index = MaxIndex - Index;
8376     }
8377
8378     // Intrinsic expects the first arg to be a vector of int.
8379     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
8380     Ops[2] = ConstantInt::getSigned(Int32Ty, Index);
8381     return Builder.CreateCall(F, Ops);
8382   }
8383
8384   case PPC::BI__builtin_vsx_extractuword: {
8385     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
8386
8387     // Intrinsic expects the first argument to be a vector of doublewords.
8388     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
8389
8390     // The second argument is a compile time constant int that needs to
8391     // be clamped to the range [0, 12].
8392     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[1]);
8393     assert(ArgCI &&
8394            "Second Arg to xxextractuw intrinsic must be a constant integer!");
8395     const int64_t MaxIndex = 12;
8396     int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
8397
8398     if (getTarget().isLittleEndian()) {
8399       // Reverse the index.
8400       Index = MaxIndex - Index;
8401       Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
8402
8403       // Emit the call, then reverse the double words of the results vector.
8404       Value *Call = Builder.CreateCall(F, Ops);
8405
8406       // Create a shuffle mask of (1, 0)
8407       Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
8408                                    ConstantInt::get(Int32Ty, 0)
8409                                  };
8410       Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8411
8412       Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ShuffleMask);
8413       return ShuffleCall;
8414     } else {
8415       Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
8416       return Builder.CreateCall(F, Ops);
8417     }
8418   }
8419   }
8420 }
8421
8422 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
8423                                               const CallExpr *E) {
8424   switch (BuiltinID) {
8425   case AMDGPU::BI__builtin_amdgcn_div_scale:
8426   case AMDGPU::BI__builtin_amdgcn_div_scalef: {
8427     // Translate from the intrinsics's struct return to the builtin's out
8428     // argument.
8429
8430     Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
8431
8432     llvm::Value *X = EmitScalarExpr(E->getArg(0));
8433     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
8434     llvm::Value *Z = EmitScalarExpr(E->getArg(2));
8435
8436     llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
8437                                            X->getType());
8438
8439     llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
8440
8441     llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
8442     llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
8443
8444     llvm::Type *RealFlagType
8445       = FlagOutPtr.getPointer()->getType()->getPointerElementType();
8446
8447     llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
8448     Builder.CreateStore(FlagExt, FlagOutPtr);
8449     return Result;
8450   }
8451   case AMDGPU::BI__builtin_amdgcn_div_fmas:
8452   case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
8453     llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
8454     llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
8455     llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
8456     llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
8457
8458     llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
8459                                       Src0->getType());
8460     llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
8461     return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
8462   }
8463
8464   case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
8465     return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
8466   case AMDGPU::BI__builtin_amdgcn_mov_dpp: {
8467     llvm::SmallVector<llvm::Value *, 5> Args;
8468     for (unsigned I = 0; I != 5; ++I)
8469       Args.push_back(EmitScalarExpr(E->getArg(I)));
8470     Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_mov_dpp,
8471                                     Args[0]->getType());
8472     return Builder.CreateCall(F, Args);
8473   }
8474   case AMDGPU::BI__builtin_amdgcn_div_fixup:
8475   case AMDGPU::BI__builtin_amdgcn_div_fixupf:
8476   case AMDGPU::BI__builtin_amdgcn_div_fixuph:
8477     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
8478   case AMDGPU::BI__builtin_amdgcn_trig_preop:
8479   case AMDGPU::BI__builtin_amdgcn_trig_preopf:
8480     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
8481   case AMDGPU::BI__builtin_amdgcn_rcp:
8482   case AMDGPU::BI__builtin_amdgcn_rcpf:
8483   case AMDGPU::BI__builtin_amdgcn_rcph:
8484     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
8485   case AMDGPU::BI__builtin_amdgcn_rsq:
8486   case AMDGPU::BI__builtin_amdgcn_rsqf:
8487   case AMDGPU::BI__builtin_amdgcn_rsqh:
8488     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
8489   case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
8490   case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
8491     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
8492   case AMDGPU::BI__builtin_amdgcn_sinf:
8493   case AMDGPU::BI__builtin_amdgcn_sinh:
8494     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
8495   case AMDGPU::BI__builtin_amdgcn_cosf:
8496   case AMDGPU::BI__builtin_amdgcn_cosh:
8497     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
8498   case AMDGPU::BI__builtin_amdgcn_log_clampf:
8499     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
8500   case AMDGPU::BI__builtin_amdgcn_ldexp:
8501   case AMDGPU::BI__builtin_amdgcn_ldexpf:
8502   case AMDGPU::BI__builtin_amdgcn_ldexph:
8503     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
8504   case AMDGPU::BI__builtin_amdgcn_frexp_mant:
8505   case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
8506   case AMDGPU::BI__builtin_amdgcn_frexp_manth:
8507     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
8508   case AMDGPU::BI__builtin_amdgcn_frexp_exp:
8509   case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
8510     Value *Src0 = EmitScalarExpr(E->getArg(0));
8511     Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
8512                                 { Builder.getInt32Ty(), Src0->getType() });
8513     return Builder.CreateCall(F, Src0);
8514   }
8515   case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
8516     Value *Src0 = EmitScalarExpr(E->getArg(0));
8517     Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
8518                                 { Builder.getInt16Ty(), Src0->getType() });
8519     return Builder.CreateCall(F, Src0);
8520   }
8521   case AMDGPU::BI__builtin_amdgcn_fract:
8522   case AMDGPU::BI__builtin_amdgcn_fractf:
8523   case AMDGPU::BI__builtin_amdgcn_fracth:
8524     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
8525   case AMDGPU::BI__builtin_amdgcn_lerp:
8526     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
8527   case AMDGPU::BI__builtin_amdgcn_uicmp:
8528   case AMDGPU::BI__builtin_amdgcn_uicmpl:
8529   case AMDGPU::BI__builtin_amdgcn_sicmp:
8530   case AMDGPU::BI__builtin_amdgcn_sicmpl:
8531     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_icmp);
8532   case AMDGPU::BI__builtin_amdgcn_fcmp:
8533   case AMDGPU::BI__builtin_amdgcn_fcmpf:
8534     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fcmp);
8535   case AMDGPU::BI__builtin_amdgcn_class:
8536   case AMDGPU::BI__builtin_amdgcn_classf:
8537   case AMDGPU::BI__builtin_amdgcn_classh:
8538     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
8539   case AMDGPU::BI__builtin_amdgcn_fmed3f:
8540   case AMDGPU::BI__builtin_amdgcn_fmed3h:
8541     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3);
8542   case AMDGPU::BI__builtin_amdgcn_read_exec: {
8543     CallInst *CI = cast<CallInst>(
8544       EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec"));
8545     CI->setConvergent();
8546     return CI;
8547   }
8548
8549   // amdgcn workitem
8550   case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
8551     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
8552   case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
8553     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
8554   case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
8555     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
8556
8557   // r600 intrinsics
8558   case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
8559   case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
8560     return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee);
8561   case AMDGPU::BI__builtin_r600_read_tidig_x:
8562     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
8563   case AMDGPU::BI__builtin_r600_read_tidig_y:
8564     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
8565   case AMDGPU::BI__builtin_r600_read_tidig_z:
8566     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
8567   default:
8568     return nullptr;
8569   }
8570 }
8571
8572 /// Handle a SystemZ function in which the final argument is a pointer
8573 /// to an int that receives the post-instruction CC value.  At the LLVM level
8574 /// this is represented as a function that returns a {result, cc} pair.
8575 static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF,
8576                                          unsigned IntrinsicID,
8577                                          const CallExpr *E) {
8578   unsigned NumArgs = E->getNumArgs() - 1;
8579   SmallVector<Value *, 8> Args(NumArgs);
8580   for (unsigned I = 0; I < NumArgs; ++I)
8581     Args[I] = CGF.EmitScalarExpr(E->getArg(I));
8582   Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
8583   Value *F = CGF.CGM.getIntrinsic(IntrinsicID);
8584   Value *Call = CGF.Builder.CreateCall(F, Args);
8585   Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
8586   CGF.Builder.CreateStore(CC, CCPtr);
8587   return CGF.Builder.CreateExtractValue(Call, 0);
8588 }
8589
8590 Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
8591                                                const CallExpr *E) {
8592   switch (BuiltinID) {
8593   case SystemZ::BI__builtin_tbegin: {
8594     Value *TDB = EmitScalarExpr(E->getArg(0));
8595     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
8596     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
8597     return Builder.CreateCall(F, {TDB, Control});
8598   }
8599   case SystemZ::BI__builtin_tbegin_nofloat: {
8600     Value *TDB = EmitScalarExpr(E->getArg(0));
8601     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
8602     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
8603     return Builder.CreateCall(F, {TDB, Control});
8604   }
8605   case SystemZ::BI__builtin_tbeginc: {
8606     Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
8607     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
8608     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
8609     return Builder.CreateCall(F, {TDB, Control});
8610   }
8611   case SystemZ::BI__builtin_tabort: {
8612     Value *Data = EmitScalarExpr(E->getArg(0));
8613     Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
8614     return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
8615   }
8616   case SystemZ::BI__builtin_non_tx_store: {
8617     Value *Address = EmitScalarExpr(E->getArg(0));
8618     Value *Data = EmitScalarExpr(E->getArg(1));
8619     Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
8620     return Builder.CreateCall(F, {Data, Address});
8621   }
8622
8623   // Vector builtins.  Note that most vector builtins are mapped automatically
8624   // to target-specific LLVM intrinsics.  The ones handled specially here can
8625   // be represented via standard LLVM IR, which is preferable to enable common
8626   // LLVM optimizations.
8627
8628   case SystemZ::BI__builtin_s390_vpopctb:
8629   case SystemZ::BI__builtin_s390_vpopcth:
8630   case SystemZ::BI__builtin_s390_vpopctf:
8631   case SystemZ::BI__builtin_s390_vpopctg: {
8632     llvm::Type *ResultType = ConvertType(E->getType());
8633     Value *X = EmitScalarExpr(E->getArg(0));
8634     Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
8635     return Builder.CreateCall(F, X);
8636   }
8637
8638   case SystemZ::BI__builtin_s390_vclzb:
8639   case SystemZ::BI__builtin_s390_vclzh:
8640   case SystemZ::BI__builtin_s390_vclzf:
8641   case SystemZ::BI__builtin_s390_vclzg: {
8642     llvm::Type *ResultType = ConvertType(E->getType());
8643     Value *X = EmitScalarExpr(E->getArg(0));
8644     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8645     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
8646     return Builder.CreateCall(F, {X, Undef});
8647   }
8648
8649   case SystemZ::BI__builtin_s390_vctzb:
8650   case SystemZ::BI__builtin_s390_vctzh:
8651   case SystemZ::BI__builtin_s390_vctzf:
8652   case SystemZ::BI__builtin_s390_vctzg: {
8653     llvm::Type *ResultType = ConvertType(E->getType());
8654     Value *X = EmitScalarExpr(E->getArg(0));
8655     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8656     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
8657     return Builder.CreateCall(F, {X, Undef});
8658   }
8659
8660   case SystemZ::BI__builtin_s390_vfsqdb: {
8661     llvm::Type *ResultType = ConvertType(E->getType());
8662     Value *X = EmitScalarExpr(E->getArg(0));
8663     Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
8664     return Builder.CreateCall(F, X);
8665   }
8666   case SystemZ::BI__builtin_s390_vfmadb: {
8667     llvm::Type *ResultType = ConvertType(E->getType());
8668     Value *X = EmitScalarExpr(E->getArg(0));
8669     Value *Y = EmitScalarExpr(E->getArg(1));
8670     Value *Z = EmitScalarExpr(E->getArg(2));
8671     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
8672     return Builder.CreateCall(F, {X, Y, Z});
8673   }
8674   case SystemZ::BI__builtin_s390_vfmsdb: {
8675     llvm::Type *ResultType = ConvertType(E->getType());
8676     Value *X = EmitScalarExpr(E->getArg(0));
8677     Value *Y = EmitScalarExpr(E->getArg(1));
8678     Value *Z = EmitScalarExpr(E->getArg(2));
8679     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
8680     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
8681     return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
8682   }
8683   case SystemZ::BI__builtin_s390_vflpdb: {
8684     llvm::Type *ResultType = ConvertType(E->getType());
8685     Value *X = EmitScalarExpr(E->getArg(0));
8686     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
8687     return Builder.CreateCall(F, X);
8688   }
8689   case SystemZ::BI__builtin_s390_vflndb: {
8690     llvm::Type *ResultType = ConvertType(E->getType());
8691     Value *X = EmitScalarExpr(E->getArg(0));
8692     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
8693     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
8694     return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub");
8695   }
8696   case SystemZ::BI__builtin_s390_vfidb: {
8697     llvm::Type *ResultType = ConvertType(E->getType());
8698     Value *X = EmitScalarExpr(E->getArg(0));
8699     // Constant-fold the M4 and M5 mask arguments.
8700     llvm::APSInt M4, M5;
8701     bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext());
8702     bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext());
8703     assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?");
8704     (void)IsConstM4; (void)IsConstM5;
8705     // Check whether this instance of vfidb can be represented via a LLVM
8706     // standard intrinsic.  We only support some combinations of M4 and M5.
8707     Intrinsic::ID ID = Intrinsic::not_intrinsic;
8708     switch (M4.getZExtValue()) {
8709     default: break;
8710     case 0:  // IEEE-inexact exception allowed
8711       switch (M5.getZExtValue()) {
8712       default: break;
8713       case 0: ID = Intrinsic::rint; break;
8714       }
8715       break;
8716     case 4:  // IEEE-inexact exception suppressed
8717       switch (M5.getZExtValue()) {
8718       default: break;
8719       case 0: ID = Intrinsic::nearbyint; break;
8720       case 1: ID = Intrinsic::round; break;
8721       case 5: ID = Intrinsic::trunc; break;
8722       case 6: ID = Intrinsic::ceil; break;
8723       case 7: ID = Intrinsic::floor; break;
8724       }
8725       break;
8726     }
8727     if (ID != Intrinsic::not_intrinsic) {
8728       Function *F = CGM.getIntrinsic(ID, ResultType);
8729       return Builder.CreateCall(F, X);
8730     }
8731     Function *F = CGM.getIntrinsic(Intrinsic::s390_vfidb);
8732     Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
8733     Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
8734     return Builder.CreateCall(F, {X, M4Value, M5Value});
8735   }
8736
8737   // Vector intrisincs that output the post-instruction CC value.
8738
8739 #define INTRINSIC_WITH_CC(NAME) \
8740     case SystemZ::BI__builtin_##NAME: \
8741       return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
8742
8743   INTRINSIC_WITH_CC(s390_vpkshs);
8744   INTRINSIC_WITH_CC(s390_vpksfs);
8745   INTRINSIC_WITH_CC(s390_vpksgs);
8746
8747   INTRINSIC_WITH_CC(s390_vpklshs);
8748   INTRINSIC_WITH_CC(s390_vpklsfs);
8749   INTRINSIC_WITH_CC(s390_vpklsgs);
8750
8751   INTRINSIC_WITH_CC(s390_vceqbs);
8752   INTRINSIC_WITH_CC(s390_vceqhs);
8753   INTRINSIC_WITH_CC(s390_vceqfs);
8754   INTRINSIC_WITH_CC(s390_vceqgs);
8755
8756   INTRINSIC_WITH_CC(s390_vchbs);
8757   INTRINSIC_WITH_CC(s390_vchhs);
8758   INTRINSIC_WITH_CC(s390_vchfs);
8759   INTRINSIC_WITH_CC(s390_vchgs);
8760
8761   INTRINSIC_WITH_CC(s390_vchlbs);
8762   INTRINSIC_WITH_CC(s390_vchlhs);
8763   INTRINSIC_WITH_CC(s390_vchlfs);
8764   INTRINSIC_WITH_CC(s390_vchlgs);
8765
8766   INTRINSIC_WITH_CC(s390_vfaebs);
8767   INTRINSIC_WITH_CC(s390_vfaehs);
8768   INTRINSIC_WITH_CC(s390_vfaefs);
8769
8770   INTRINSIC_WITH_CC(s390_vfaezbs);
8771   INTRINSIC_WITH_CC(s390_vfaezhs);
8772   INTRINSIC_WITH_CC(s390_vfaezfs);
8773
8774   INTRINSIC_WITH_CC(s390_vfeebs);
8775   INTRINSIC_WITH_CC(s390_vfeehs);
8776   INTRINSIC_WITH_CC(s390_vfeefs);
8777
8778   INTRINSIC_WITH_CC(s390_vfeezbs);
8779   INTRINSIC_WITH_CC(s390_vfeezhs);
8780   INTRINSIC_WITH_CC(s390_vfeezfs);
8781
8782   INTRINSIC_WITH_CC(s390_vfenebs);
8783   INTRINSIC_WITH_CC(s390_vfenehs);
8784   INTRINSIC_WITH_CC(s390_vfenefs);
8785
8786   INTRINSIC_WITH_CC(s390_vfenezbs);
8787   INTRINSIC_WITH_CC(s390_vfenezhs);
8788   INTRINSIC_WITH_CC(s390_vfenezfs);
8789
8790   INTRINSIC_WITH_CC(s390_vistrbs);
8791   INTRINSIC_WITH_CC(s390_vistrhs);
8792   INTRINSIC_WITH_CC(s390_vistrfs);
8793
8794   INTRINSIC_WITH_CC(s390_vstrcbs);
8795   INTRINSIC_WITH_CC(s390_vstrchs);
8796   INTRINSIC_WITH_CC(s390_vstrcfs);
8797
8798   INTRINSIC_WITH_CC(s390_vstrczbs);
8799   INTRINSIC_WITH_CC(s390_vstrczhs);
8800   INTRINSIC_WITH_CC(s390_vstrczfs);
8801
8802   INTRINSIC_WITH_CC(s390_vfcedbs);
8803   INTRINSIC_WITH_CC(s390_vfchdbs);
8804   INTRINSIC_WITH_CC(s390_vfchedbs);
8805
8806   INTRINSIC_WITH_CC(s390_vftcidb);
8807
8808 #undef INTRINSIC_WITH_CC
8809
8810   default:
8811     return nullptr;
8812   }
8813 }
8814
8815 Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
8816                                              const CallExpr *E) {
8817   auto MakeLdg = [&](unsigned IntrinsicID) {
8818     Value *Ptr = EmitScalarExpr(E->getArg(0));
8819     AlignmentSource AlignSource;
8820     clang::CharUnits Align =
8821         getNaturalPointeeTypeAlignment(E->getArg(0)->getType(), &AlignSource);
8822     return Builder.CreateCall(
8823         CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
8824                                        Ptr->getType()}),
8825         {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())});
8826   };
8827   auto MakeScopedAtomic = [&](unsigned IntrinsicID) {
8828     Value *Ptr = EmitScalarExpr(E->getArg(0));
8829     return Builder.CreateCall(
8830         CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
8831                                        Ptr->getType()}),
8832         {Ptr, EmitScalarExpr(E->getArg(1))});
8833   };
8834   switch (BuiltinID) {
8835   case NVPTX::BI__nvvm_atom_add_gen_i:
8836   case NVPTX::BI__nvvm_atom_add_gen_l:
8837   case NVPTX::BI__nvvm_atom_add_gen_ll:
8838     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
8839
8840   case NVPTX::BI__nvvm_atom_sub_gen_i:
8841   case NVPTX::BI__nvvm_atom_sub_gen_l:
8842   case NVPTX::BI__nvvm_atom_sub_gen_ll:
8843     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
8844
8845   case NVPTX::BI__nvvm_atom_and_gen_i:
8846   case NVPTX::BI__nvvm_atom_and_gen_l:
8847   case NVPTX::BI__nvvm_atom_and_gen_ll:
8848     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
8849
8850   case NVPTX::BI__nvvm_atom_or_gen_i:
8851   case NVPTX::BI__nvvm_atom_or_gen_l:
8852   case NVPTX::BI__nvvm_atom_or_gen_ll:
8853     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
8854
8855   case NVPTX::BI__nvvm_atom_xor_gen_i:
8856   case NVPTX::BI__nvvm_atom_xor_gen_l:
8857   case NVPTX::BI__nvvm_atom_xor_gen_ll:
8858     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
8859
8860   case NVPTX::BI__nvvm_atom_xchg_gen_i:
8861   case NVPTX::BI__nvvm_atom_xchg_gen_l:
8862   case NVPTX::BI__nvvm_atom_xchg_gen_ll:
8863     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
8864
8865   case NVPTX::BI__nvvm_atom_max_gen_i:
8866   case NVPTX::BI__nvvm_atom_max_gen_l:
8867   case NVPTX::BI__nvvm_atom_max_gen_ll:
8868     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
8869
8870   case NVPTX::BI__nvvm_atom_max_gen_ui:
8871   case NVPTX::BI__nvvm_atom_max_gen_ul:
8872   case NVPTX::BI__nvvm_atom_max_gen_ull:
8873     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
8874
8875   case NVPTX::BI__nvvm_atom_min_gen_i:
8876   case NVPTX::BI__nvvm_atom_min_gen_l:
8877   case NVPTX::BI__nvvm_atom_min_gen_ll:
8878     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
8879
8880   case NVPTX::BI__nvvm_atom_min_gen_ui:
8881   case NVPTX::BI__nvvm_atom_min_gen_ul:
8882   case NVPTX::BI__nvvm_atom_min_gen_ull:
8883     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
8884
8885   case NVPTX::BI__nvvm_atom_cas_gen_i:
8886   case NVPTX::BI__nvvm_atom_cas_gen_l:
8887   case NVPTX::BI__nvvm_atom_cas_gen_ll:
8888     // __nvvm_atom_cas_gen_* should return the old value rather than the
8889     // success flag.
8890     return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
8891
8892   case NVPTX::BI__nvvm_atom_add_gen_f: {
8893     Value *Ptr = EmitScalarExpr(E->getArg(0));
8894     Value *Val = EmitScalarExpr(E->getArg(1));
8895     // atomicrmw only deals with integer arguments so we need to use
8896     // LLVM's nvvm_atomic_load_add_f32 intrinsic for that.
8897     Value *FnALAF32 =
8898         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType());
8899     return Builder.CreateCall(FnALAF32, {Ptr, Val});
8900   }
8901
8902   case NVPTX::BI__nvvm_atom_inc_gen_ui: {
8903     Value *Ptr = EmitScalarExpr(E->getArg(0));
8904     Value *Val = EmitScalarExpr(E->getArg(1));
8905     Value *FnALI32 =
8906         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
8907     return Builder.CreateCall(FnALI32, {Ptr, Val});
8908   }
8909
8910   case NVPTX::BI__nvvm_atom_dec_gen_ui: {
8911     Value *Ptr = EmitScalarExpr(E->getArg(0));
8912     Value *Val = EmitScalarExpr(E->getArg(1));
8913     Value *FnALD32 =
8914         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
8915     return Builder.CreateCall(FnALD32, {Ptr, Val});
8916   }
8917
8918   case NVPTX::BI__nvvm_ldg_c:
8919   case NVPTX::BI__nvvm_ldg_c2:
8920   case NVPTX::BI__nvvm_ldg_c4:
8921   case NVPTX::BI__nvvm_ldg_s:
8922   case NVPTX::BI__nvvm_ldg_s2:
8923   case NVPTX::BI__nvvm_ldg_s4:
8924   case NVPTX::BI__nvvm_ldg_i:
8925   case NVPTX::BI__nvvm_ldg_i2:
8926   case NVPTX::BI__nvvm_ldg_i4:
8927   case NVPTX::BI__nvvm_ldg_l:
8928   case NVPTX::BI__nvvm_ldg_ll:
8929   case NVPTX::BI__nvvm_ldg_ll2:
8930   case NVPTX::BI__nvvm_ldg_uc:
8931   case NVPTX::BI__nvvm_ldg_uc2:
8932   case NVPTX::BI__nvvm_ldg_uc4:
8933   case NVPTX::BI__nvvm_ldg_us:
8934   case NVPTX::BI__nvvm_ldg_us2:
8935   case NVPTX::BI__nvvm_ldg_us4:
8936   case NVPTX::BI__nvvm_ldg_ui:
8937   case NVPTX::BI__nvvm_ldg_ui2:
8938   case NVPTX::BI__nvvm_ldg_ui4:
8939   case NVPTX::BI__nvvm_ldg_ul:
8940   case NVPTX::BI__nvvm_ldg_ull:
8941   case NVPTX::BI__nvvm_ldg_ull2:
8942     // PTX Interoperability section 2.2: "For a vector with an even number of
8943     // elements, its alignment is set to number of elements times the alignment
8944     // of its member: n*alignof(t)."
8945     return MakeLdg(Intrinsic::nvvm_ldg_global_i);
8946   case NVPTX::BI__nvvm_ldg_f:
8947   case NVPTX::BI__nvvm_ldg_f2:
8948   case NVPTX::BI__nvvm_ldg_f4:
8949   case NVPTX::BI__nvvm_ldg_d:
8950   case NVPTX::BI__nvvm_ldg_d2:
8951     return MakeLdg(Intrinsic::nvvm_ldg_global_f);
8952
8953   case NVPTX::BI__nvvm_atom_cta_add_gen_i:
8954   case NVPTX::BI__nvvm_atom_cta_add_gen_l:
8955   case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
8956     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta);
8957   case NVPTX::BI__nvvm_atom_sys_add_gen_i:
8958   case NVPTX::BI__nvvm_atom_sys_add_gen_l:
8959   case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
8960     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys);
8961   case NVPTX::BI__nvvm_atom_cta_add_gen_f:
8962   case NVPTX::BI__nvvm_atom_cta_add_gen_d:
8963     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta);
8964   case NVPTX::BI__nvvm_atom_sys_add_gen_f:
8965   case NVPTX::BI__nvvm_atom_sys_add_gen_d:
8966     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys);
8967   case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
8968   case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
8969   case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
8970     return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta);
8971   case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
8972   case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
8973   case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
8974     return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys);
8975   case NVPTX::BI__nvvm_atom_cta_max_gen_i:
8976   case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
8977   case NVPTX::BI__nvvm_atom_cta_max_gen_l:
8978   case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
8979   case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
8980   case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
8981     return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta);
8982   case NVPTX::BI__nvvm_atom_sys_max_gen_i:
8983   case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
8984   case NVPTX::BI__nvvm_atom_sys_max_gen_l:
8985   case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
8986   case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
8987   case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
8988     return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys);
8989   case NVPTX::BI__nvvm_atom_cta_min_gen_i:
8990   case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
8991   case NVPTX::BI__nvvm_atom_cta_min_gen_l:
8992   case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
8993   case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
8994   case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
8995     return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta);
8996   case NVPTX::BI__nvvm_atom_sys_min_gen_i:
8997   case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
8998   case NVPTX::BI__nvvm_atom_sys_min_gen_l:
8999   case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
9000   case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
9001   case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
9002     return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys);
9003   case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
9004     return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta);
9005   case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
9006     return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta);
9007   case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
9008     return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys);
9009   case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
9010     return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys);
9011   case NVPTX::BI__nvvm_atom_cta_and_gen_i:
9012   case NVPTX::BI__nvvm_atom_cta_and_gen_l:
9013   case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
9014     return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta);
9015   case NVPTX::BI__nvvm_atom_sys_and_gen_i:
9016   case NVPTX::BI__nvvm_atom_sys_and_gen_l:
9017   case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
9018     return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys);
9019   case NVPTX::BI__nvvm_atom_cta_or_gen_i:
9020   case NVPTX::BI__nvvm_atom_cta_or_gen_l:
9021   case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
9022     return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta);
9023   case NVPTX::BI__nvvm_atom_sys_or_gen_i:
9024   case NVPTX::BI__nvvm_atom_sys_or_gen_l:
9025   case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
9026     return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys);
9027   case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
9028   case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
9029   case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
9030     return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta);
9031   case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
9032   case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
9033   case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
9034     return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys);
9035   case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
9036   case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
9037   case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
9038     Value *Ptr = EmitScalarExpr(E->getArg(0));
9039     return Builder.CreateCall(
9040         CGM.getIntrinsic(
9041             Intrinsic::nvvm_atomic_cas_gen_i_cta,
9042             {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
9043         {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
9044   }
9045   case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
9046   case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
9047   case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
9048     Value *Ptr = EmitScalarExpr(E->getArg(0));
9049     return Builder.CreateCall(
9050         CGM.getIntrinsic(
9051             Intrinsic::nvvm_atomic_cas_gen_i_sys,
9052             {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
9053         {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
9054   }
9055   default:
9056     return nullptr;
9057   }
9058 }
9059
9060 Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
9061                                                    const CallExpr *E) {
9062   switch (BuiltinID) {
9063   case WebAssembly::BI__builtin_wasm_current_memory: {
9064     llvm::Type *ResultType = ConvertType(E->getType());
9065     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType);
9066     return Builder.CreateCall(Callee);
9067   }
9068   case WebAssembly::BI__builtin_wasm_grow_memory: {
9069     Value *X = EmitScalarExpr(E->getArg(0));
9070     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType());
9071     return Builder.CreateCall(Callee, X);
9072   }
9073
9074   default:
9075     return nullptr;
9076   }
9077 }