]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp
Merge bmake-20170510
[FreeBSD/FreeBSD.git] / contrib / llvm / tools / clang / lib / CodeGen / CGBuiltin.cpp
1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit Builtin calls as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "CGCXXABI.h"
15 #include "CGObjCRuntime.h"
16 #include "CGOpenCLRuntime.h"
17 #include "CodeGenFunction.h"
18 #include "CodeGenModule.h"
19 #include "TargetInfo.h"
20 #include "clang/AST/ASTContext.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/Analysis/Analyses/OSLog.h"
23 #include "clang/Basic/TargetBuiltins.h"
24 #include "clang/Basic/TargetInfo.h"
25 #include "clang/CodeGen/CGFunctionInfo.h"
26 #include "llvm/ADT/StringExtras.h"
27 #include "llvm/IR/CallSite.h"
28 #include "llvm/IR/DataLayout.h"
29 #include "llvm/IR/InlineAsm.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/IR/MDBuilder.h"
32 #include <sstream>
33
34 using namespace clang;
35 using namespace CodeGen;
36 using namespace llvm;
37
38 static
39 int64_t clamp(int64_t Value, int64_t Low, int64_t High) {
40   return std::min(High, std::max(Low, Value));
41 }
42
43 /// getBuiltinLibFunction - Given a builtin id for a function like
44 /// "__builtin_fabsf", return a Function* for "fabsf".
45 llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
46                                                      unsigned BuiltinID) {
47   assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
48
49   // Get the name, skip over the __builtin_ prefix (if necessary).
50   StringRef Name;
51   GlobalDecl D(FD);
52
53   // If the builtin has been declared explicitly with an assembler label,
54   // use the mangled name. This differs from the plain label on platforms
55   // that prefix labels.
56   if (FD->hasAttr<AsmLabelAttr>())
57     Name = getMangledName(D);
58   else
59     Name = Context.BuiltinInfo.getName(BuiltinID) + 10;
60
61   llvm::FunctionType *Ty =
62     cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
63
64   return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
65 }
66
67 /// Emit the conversions required to turn the given value into an
68 /// integer of the given size.
69 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
70                         QualType T, llvm::IntegerType *IntType) {
71   V = CGF.EmitToMemory(V, T);
72
73   if (V->getType()->isPointerTy())
74     return CGF.Builder.CreatePtrToInt(V, IntType);
75
76   assert(V->getType() == IntType);
77   return V;
78 }
79
80 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
81                           QualType T, llvm::Type *ResultType) {
82   V = CGF.EmitFromMemory(V, T);
83
84   if (ResultType->isPointerTy())
85     return CGF.Builder.CreateIntToPtr(V, ResultType);
86
87   assert(V->getType() == ResultType);
88   return V;
89 }
90
91 /// Utility to insert an atomic instruction based on Instrinsic::ID
92 /// and the expression node.
93 static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF,
94                                     llvm::AtomicRMWInst::BinOp Kind,
95                                     const CallExpr *E) {
96   QualType T = E->getType();
97   assert(E->getArg(0)->getType()->isPointerType());
98   assert(CGF.getContext().hasSameUnqualifiedType(T,
99                                   E->getArg(0)->getType()->getPointeeType()));
100   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
101
102   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
103   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
104
105   llvm::IntegerType *IntType =
106     llvm::IntegerType::get(CGF.getLLVMContext(),
107                            CGF.getContext().getTypeSize(T));
108   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
109
110   llvm::Value *Args[2];
111   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
112   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
113   llvm::Type *ValueType = Args[1]->getType();
114   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
115
116   llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
117       Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
118   return EmitFromInt(CGF, Result, T, ValueType);
119 }
120
121 static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
122   Value *Val = CGF.EmitScalarExpr(E->getArg(0));
123   Value *Address = CGF.EmitScalarExpr(E->getArg(1));
124
125   // Convert the type of the pointer to a pointer to the stored type.
126   Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
127   Value *BC = CGF.Builder.CreateBitCast(
128       Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
129   LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
130   LV.setNontemporal(true);
131   CGF.EmitStoreOfScalar(Val, LV, false);
132   return nullptr;
133 }
134
135 static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
136   Value *Address = CGF.EmitScalarExpr(E->getArg(0));
137
138   LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
139   LV.setNontemporal(true);
140   return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
141 }
142
143 static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
144                                llvm::AtomicRMWInst::BinOp Kind,
145                                const CallExpr *E) {
146   return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
147 }
148
149 /// Utility to insert an atomic instruction based Instrinsic::ID and
150 /// the expression node, where the return value is the result of the
151 /// operation.
152 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
153                                    llvm::AtomicRMWInst::BinOp Kind,
154                                    const CallExpr *E,
155                                    Instruction::BinaryOps Op,
156                                    bool Invert = false) {
157   QualType T = E->getType();
158   assert(E->getArg(0)->getType()->isPointerType());
159   assert(CGF.getContext().hasSameUnqualifiedType(T,
160                                   E->getArg(0)->getType()->getPointeeType()));
161   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
162
163   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
164   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
165
166   llvm::IntegerType *IntType =
167     llvm::IntegerType::get(CGF.getLLVMContext(),
168                            CGF.getContext().getTypeSize(T));
169   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
170
171   llvm::Value *Args[2];
172   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
173   llvm::Type *ValueType = Args[1]->getType();
174   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
175   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
176
177   llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
178       Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
179   Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
180   if (Invert)
181     Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
182                                      llvm::ConstantInt::get(IntType, -1));
183   Result = EmitFromInt(CGF, Result, T, ValueType);
184   return RValue::get(Result);
185 }
186
187 /// @brief Utility to insert an atomic cmpxchg instruction.
188 ///
189 /// @param CGF The current codegen function.
190 /// @param E   Builtin call expression to convert to cmpxchg.
191 ///            arg0 - address to operate on
192 ///            arg1 - value to compare with
193 ///            arg2 - new value
194 /// @param ReturnBool Specifies whether to return success flag of
195 ///                   cmpxchg result or the old value.
196 ///
197 /// @returns result of cmpxchg, according to ReturnBool
198 static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
199                                      bool ReturnBool) {
200   QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
201   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
202   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
203
204   llvm::IntegerType *IntType = llvm::IntegerType::get(
205       CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
206   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
207
208   Value *Args[3];
209   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
210   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
211   llvm::Type *ValueType = Args[1]->getType();
212   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
213   Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
214
215   Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
216       Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
217       llvm::AtomicOrdering::SequentiallyConsistent);
218   if (ReturnBool)
219     // Extract boolean success flag and zext it to int.
220     return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
221                                   CGF.ConvertType(E->getType()));
222   else
223     // Extract old value and emit it using the same type as compare value.
224     return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
225                        ValueType);
226 }
227
228 // Emit a simple mangled intrinsic that has 1 argument and a return type
229 // matching the argument type.
230 static Value *emitUnaryBuiltin(CodeGenFunction &CGF,
231                                const CallExpr *E,
232                                unsigned IntrinsicID) {
233   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
234
235   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
236   return CGF.Builder.CreateCall(F, Src0);
237 }
238
239 // Emit an intrinsic that has 2 operands of the same type as its result.
240 static Value *emitBinaryBuiltin(CodeGenFunction &CGF,
241                                 const CallExpr *E,
242                                 unsigned IntrinsicID) {
243   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
244   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
245
246   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
247   return CGF.Builder.CreateCall(F, { Src0, Src1 });
248 }
249
250 // Emit an intrinsic that has 3 operands of the same type as its result.
251 static Value *emitTernaryBuiltin(CodeGenFunction &CGF,
252                                  const CallExpr *E,
253                                  unsigned IntrinsicID) {
254   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
255   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
256   llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
257
258   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
259   return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
260 }
261
262 // Emit an intrinsic that has 1 float or double operand, and 1 integer.
263 static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
264                                const CallExpr *E,
265                                unsigned IntrinsicID) {
266   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
267   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
268
269   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
270   return CGF.Builder.CreateCall(F, {Src0, Src1});
271 }
272
273 /// EmitFAbs - Emit a call to @llvm.fabs().
274 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
275   Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
276   llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
277   Call->setDoesNotAccessMemory();
278   return Call;
279 }
280
281 /// Emit the computation of the sign bit for a floating point value. Returns
282 /// the i1 sign bit value.
283 static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
284   LLVMContext &C = CGF.CGM.getLLVMContext();
285
286   llvm::Type *Ty = V->getType();
287   int Width = Ty->getPrimitiveSizeInBits();
288   llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
289   V = CGF.Builder.CreateBitCast(V, IntTy);
290   if (Ty->isPPC_FP128Ty()) {
291     // We want the sign bit of the higher-order double. The bitcast we just
292     // did works as if the double-double was stored to memory and then
293     // read as an i128. The "store" will put the higher-order double in the
294     // lower address in both little- and big-Endian modes, but the "load"
295     // will treat those bits as a different part of the i128: the low bits in
296     // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
297     // we need to shift the high bits down to the low before truncating.
298     Width >>= 1;
299     if (CGF.getTarget().isBigEndian()) {
300       Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
301       V = CGF.Builder.CreateLShr(V, ShiftCst);
302     }
303     // We are truncating value in order to extract the higher-order
304     // double, which we will be using to extract the sign from.
305     IntTy = llvm::IntegerType::get(C, Width);
306     V = CGF.Builder.CreateTrunc(V, IntTy);
307   }
308   Value *Zero = llvm::Constant::getNullValue(IntTy);
309   return CGF.Builder.CreateICmpSLT(V, Zero);
310 }
311
312 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD,
313                               const CallExpr *E, llvm::Constant *calleeValue) {
314   CGCallee callee = CGCallee::forDirect(calleeValue, FD);
315   return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
316 }
317
318 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
319 /// depending on IntrinsicID.
320 ///
321 /// \arg CGF The current codegen function.
322 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
323 /// \arg X The first argument to the llvm.*.with.overflow.*.
324 /// \arg Y The second argument to the llvm.*.with.overflow.*.
325 /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
326 /// \returns The result (i.e. sum/product) returned by the intrinsic.
327 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
328                                           const llvm::Intrinsic::ID IntrinsicID,
329                                           llvm::Value *X, llvm::Value *Y,
330                                           llvm::Value *&Carry) {
331   // Make sure we have integers of the same width.
332   assert(X->getType() == Y->getType() &&
333          "Arguments must be the same type. (Did you forget to make sure both "
334          "arguments have the same integer width?)");
335
336   llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
337   llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
338   Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
339   return CGF.Builder.CreateExtractValue(Tmp, 0);
340 }
341
342 static Value *emitRangedBuiltin(CodeGenFunction &CGF,
343                                 unsigned IntrinsicID,
344                                 int low, int high) {
345     llvm::MDBuilder MDHelper(CGF.getLLVMContext());
346     llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
347     Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
348     llvm::Instruction *Call = CGF.Builder.CreateCall(F);
349     Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
350     return Call;
351 }
352
353 namespace {
354   struct WidthAndSignedness {
355     unsigned Width;
356     bool Signed;
357   };
358 }
359
360 static WidthAndSignedness
361 getIntegerWidthAndSignedness(const clang::ASTContext &context,
362                              const clang::QualType Type) {
363   assert(Type->isIntegerType() && "Given type is not an integer.");
364   unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width;
365   bool Signed = Type->isSignedIntegerType();
366   return {Width, Signed};
367 }
368
369 // Given one or more integer types, this function produces an integer type that
370 // encompasses them: any value in one of the given types could be expressed in
371 // the encompassing type.
372 static struct WidthAndSignedness
373 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
374   assert(Types.size() > 0 && "Empty list of types.");
375
376   // If any of the given types is signed, we must return a signed type.
377   bool Signed = false;
378   for (const auto &Type : Types) {
379     Signed |= Type.Signed;
380   }
381
382   // The encompassing type must have a width greater than or equal to the width
383   // of the specified types.  Aditionally, if the encompassing type is signed,
384   // its width must be strictly greater than the width of any unsigned types
385   // given.
386   unsigned Width = 0;
387   for (const auto &Type : Types) {
388     unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
389     if (Width < MinWidth) {
390       Width = MinWidth;
391     }
392   }
393
394   return {Width, Signed};
395 }
396
397 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
398   llvm::Type *DestType = Int8PtrTy;
399   if (ArgValue->getType() != DestType)
400     ArgValue =
401         Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
402
403   Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
404   return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
405 }
406
407 /// Checks if using the result of __builtin_object_size(p, @p From) in place of
408 /// __builtin_object_size(p, @p To) is correct
409 static bool areBOSTypesCompatible(int From, int To) {
410   // Note: Our __builtin_object_size implementation currently treats Type=0 and
411   // Type=2 identically. Encoding this implementation detail here may make
412   // improving __builtin_object_size difficult in the future, so it's omitted.
413   return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
414 }
415
416 static llvm::Value *
417 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
418   return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
419 }
420
421 llvm::Value *
422 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
423                                                  llvm::IntegerType *ResType) {
424   uint64_t ObjectSize;
425   if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
426     return emitBuiltinObjectSize(E, Type, ResType);
427   return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
428 }
429
430 /// Returns a Value corresponding to the size of the given expression.
431 /// This Value may be either of the following:
432 ///   - A llvm::Argument (if E is a param with the pass_object_size attribute on
433 ///     it)
434 ///   - A call to the @llvm.objectsize intrinsic
435 llvm::Value *
436 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
437                                        llvm::IntegerType *ResType) {
438   // We need to reference an argument if the pointer is a parameter with the
439   // pass_object_size attribute.
440   if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
441     auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
442     auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
443     if (Param != nullptr && PS != nullptr &&
444         areBOSTypesCompatible(PS->getType(), Type)) {
445       auto Iter = SizeArguments.find(Param);
446       assert(Iter != SizeArguments.end());
447
448       const ImplicitParamDecl *D = Iter->second;
449       auto DIter = LocalDeclMap.find(D);
450       assert(DIter != LocalDeclMap.end());
451
452       return EmitLoadOfScalar(DIter->second, /*volatile=*/false,
453                               getContext().getSizeType(), E->getLocStart());
454     }
455   }
456
457   // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
458   // evaluate E for side-effects. In either case, we shouldn't lower to
459   // @llvm.objectsize.
460   if (Type == 3 || E->HasSideEffects(getContext()))
461     return getDefaultBuiltinObjectSizeResult(Type, ResType);
462
463   // LLVM only supports 0 and 2, make sure that we pass along that
464   // as a boolean.
465   auto *CI = ConstantInt::get(Builder.getInt1Ty(), (Type & 2) >> 1);
466   // FIXME: Get right address space.
467   llvm::Type *Tys[] = {ResType, Builder.getInt8PtrTy(0)};
468   Value *F = CGM.getIntrinsic(Intrinsic::objectsize, Tys);
469   return Builder.CreateCall(F, {EmitScalarExpr(E), CI});
470 }
471
472 // Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we
473 // handle them here.
474 enum class CodeGenFunction::MSVCIntrin {
475   _BitScanForward,
476   _BitScanReverse,
477   _InterlockedAnd,
478   _InterlockedDecrement,
479   _InterlockedExchange,
480   _InterlockedExchangeAdd,
481   _InterlockedExchangeSub,
482   _InterlockedIncrement,
483   _InterlockedOr,
484   _InterlockedXor,
485 };
486
487 Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
488   const CallExpr *E) {
489   switch (BuiltinID) {
490   case MSVCIntrin::_BitScanForward:
491   case MSVCIntrin::_BitScanReverse: {
492     Value *ArgValue = EmitScalarExpr(E->getArg(1));
493
494     llvm::Type *ArgType = ArgValue->getType();
495     llvm::Type *IndexType =
496       EmitScalarExpr(E->getArg(0))->getType()->getPointerElementType();
497     llvm::Type *ResultType = ConvertType(E->getType());
498
499     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
500     Value *ResZero = llvm::Constant::getNullValue(ResultType);
501     Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
502
503     BasicBlock *Begin = Builder.GetInsertBlock();
504     BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
505     Builder.SetInsertPoint(End);
506     PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
507
508     Builder.SetInsertPoint(Begin);
509     Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
510     BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
511     Builder.CreateCondBr(IsZero, End, NotZero);
512     Result->addIncoming(ResZero, Begin);
513
514     Builder.SetInsertPoint(NotZero);
515     Address IndexAddress = EmitPointerWithAlignment(E->getArg(0));
516
517     if (BuiltinID == MSVCIntrin::_BitScanForward) {
518       Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
519       Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
520       ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
521       Builder.CreateStore(ZeroCount, IndexAddress, false);
522     } else {
523       unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
524       Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
525
526       Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
527       Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
528       ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
529       Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
530       Builder.CreateStore(Index, IndexAddress, false);
531     }
532     Builder.CreateBr(End);
533     Result->addIncoming(ResOne, NotZero);
534
535     Builder.SetInsertPoint(End);
536     return Result;
537   }
538   case MSVCIntrin::_InterlockedAnd:
539     return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
540   case MSVCIntrin::_InterlockedExchange:
541     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
542   case MSVCIntrin::_InterlockedExchangeAdd:
543     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
544   case MSVCIntrin::_InterlockedExchangeSub:
545     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
546   case MSVCIntrin::_InterlockedOr:
547     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
548   case MSVCIntrin::_InterlockedXor:
549     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
550
551   case MSVCIntrin::_InterlockedDecrement: {
552     llvm::Type *IntTy = ConvertType(E->getType());
553     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
554       AtomicRMWInst::Sub,
555       EmitScalarExpr(E->getArg(0)),
556       ConstantInt::get(IntTy, 1),
557       llvm::AtomicOrdering::SequentiallyConsistent);
558     return Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1));
559   }
560   case MSVCIntrin::_InterlockedIncrement: {
561     llvm::Type *IntTy = ConvertType(E->getType());
562     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
563       AtomicRMWInst::Add,
564       EmitScalarExpr(E->getArg(0)),
565       ConstantInt::get(IntTy, 1),
566       llvm::AtomicOrdering::SequentiallyConsistent);
567     return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1));
568   }
569   }
570   llvm_unreachable("Incorrect MSVC intrinsic!");
571 }
572
573 namespace {
574 // ARC cleanup for __builtin_os_log_format
575 struct CallObjCArcUse final : EHScopeStack::Cleanup {
576   CallObjCArcUse(llvm::Value *object) : object(object) {}
577   llvm::Value *object;
578
579   void Emit(CodeGenFunction &CGF, Flags flags) override {
580     CGF.EmitARCIntrinsicUse(object);
581   }
582 };
583 }
584
585 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
586                                         unsigned BuiltinID, const CallExpr *E,
587                                         ReturnValueSlot ReturnValue) {
588   // See if we can constant fold this builtin.  If so, don't emit it at all.
589   Expr::EvalResult Result;
590   if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
591       !Result.hasSideEffects()) {
592     if (Result.Val.isInt())
593       return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
594                                                 Result.Val.getInt()));
595     if (Result.Val.isFloat())
596       return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
597                                                Result.Val.getFloat()));
598   }
599
600   switch (BuiltinID) {
601   default: break;  // Handle intrinsics and libm functions below.
602   case Builtin::BI__builtin___CFStringMakeConstantString:
603   case Builtin::BI__builtin___NSStringMakeConstantString:
604     return RValue::get(CGM.EmitConstantExpr(E, E->getType(), nullptr));
605   case Builtin::BI__builtin_stdarg_start:
606   case Builtin::BI__builtin_va_start:
607   case Builtin::BI__va_start:
608   case Builtin::BI__builtin_va_end:
609     return RValue::get(
610         EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
611                            ? EmitScalarExpr(E->getArg(0))
612                            : EmitVAListRef(E->getArg(0)).getPointer(),
613                        BuiltinID != Builtin::BI__builtin_va_end));
614   case Builtin::BI__builtin_va_copy: {
615     Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
616     Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
617
618     llvm::Type *Type = Int8PtrTy;
619
620     DstPtr = Builder.CreateBitCast(DstPtr, Type);
621     SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
622     return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy),
623                                           {DstPtr, SrcPtr}));
624   }
625   case Builtin::BI__builtin_abs:
626   case Builtin::BI__builtin_labs:
627   case Builtin::BI__builtin_llabs: {
628     Value *ArgValue = EmitScalarExpr(E->getArg(0));
629
630     Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
631     Value *CmpResult =
632     Builder.CreateICmpSGE(ArgValue,
633                           llvm::Constant::getNullValue(ArgValue->getType()),
634                                                             "abscond");
635     Value *Result =
636       Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
637
638     return RValue::get(Result);
639   }
640   case Builtin::BI__builtin_fabs:
641   case Builtin::BI__builtin_fabsf:
642   case Builtin::BI__builtin_fabsl: {
643     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
644   }
645   case Builtin::BI__builtin_fmod:
646   case Builtin::BI__builtin_fmodf:
647   case Builtin::BI__builtin_fmodl: {
648     Value *Arg1 = EmitScalarExpr(E->getArg(0));
649     Value *Arg2 = EmitScalarExpr(E->getArg(1));
650     Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod");
651     return RValue::get(Result);
652   }
653   case Builtin::BI__builtin_copysign:
654   case Builtin::BI__builtin_copysignf:
655   case Builtin::BI__builtin_copysignl: {
656     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
657   }
658   case Builtin::BI__builtin_ceil:
659   case Builtin::BI__builtin_ceilf:
660   case Builtin::BI__builtin_ceill: {
661     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil));
662   }
663   case Builtin::BI__builtin_floor:
664   case Builtin::BI__builtin_floorf:
665   case Builtin::BI__builtin_floorl: {
666     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor));
667   }
668   case Builtin::BI__builtin_trunc:
669   case Builtin::BI__builtin_truncf:
670   case Builtin::BI__builtin_truncl: {
671     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc));
672   }
673   case Builtin::BI__builtin_rint:
674   case Builtin::BI__builtin_rintf:
675   case Builtin::BI__builtin_rintl: {
676     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint));
677   }
678   case Builtin::BI__builtin_nearbyint:
679   case Builtin::BI__builtin_nearbyintf:
680   case Builtin::BI__builtin_nearbyintl: {
681     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint));
682   }
683   case Builtin::BI__builtin_round:
684   case Builtin::BI__builtin_roundf:
685   case Builtin::BI__builtin_roundl: {
686     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round));
687   }
688   case Builtin::BI__builtin_fmin:
689   case Builtin::BI__builtin_fminf:
690   case Builtin::BI__builtin_fminl: {
691     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum));
692   }
693   case Builtin::BI__builtin_fmax:
694   case Builtin::BI__builtin_fmaxf:
695   case Builtin::BI__builtin_fmaxl: {
696     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum));
697   }
698   case Builtin::BI__builtin_conj:
699   case Builtin::BI__builtin_conjf:
700   case Builtin::BI__builtin_conjl: {
701     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
702     Value *Real = ComplexVal.first;
703     Value *Imag = ComplexVal.second;
704     Value *Zero =
705       Imag->getType()->isFPOrFPVectorTy()
706         ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
707         : llvm::Constant::getNullValue(Imag->getType());
708
709     Imag = Builder.CreateFSub(Zero, Imag, "sub");
710     return RValue::getComplex(std::make_pair(Real, Imag));
711   }
712   case Builtin::BI__builtin_creal:
713   case Builtin::BI__builtin_crealf:
714   case Builtin::BI__builtin_creall:
715   case Builtin::BIcreal:
716   case Builtin::BIcrealf:
717   case Builtin::BIcreall: {
718     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
719     return RValue::get(ComplexVal.first);
720   }
721
722   case Builtin::BI__builtin_cimag:
723   case Builtin::BI__builtin_cimagf:
724   case Builtin::BI__builtin_cimagl:
725   case Builtin::BIcimag:
726   case Builtin::BIcimagf:
727   case Builtin::BIcimagl: {
728     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
729     return RValue::get(ComplexVal.second);
730   }
731
732   case Builtin::BI__builtin_ctzs:
733   case Builtin::BI__builtin_ctz:
734   case Builtin::BI__builtin_ctzl:
735   case Builtin::BI__builtin_ctzll: {
736     Value *ArgValue = EmitScalarExpr(E->getArg(0));
737
738     llvm::Type *ArgType = ArgValue->getType();
739     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
740
741     llvm::Type *ResultType = ConvertType(E->getType());
742     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
743     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
744     if (Result->getType() != ResultType)
745       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
746                                      "cast");
747     return RValue::get(Result);
748   }
749   case Builtin::BI__builtin_clzs:
750   case Builtin::BI__builtin_clz:
751   case Builtin::BI__builtin_clzl:
752   case Builtin::BI__builtin_clzll: {
753     Value *ArgValue = EmitScalarExpr(E->getArg(0));
754
755     llvm::Type *ArgType = ArgValue->getType();
756     Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
757
758     llvm::Type *ResultType = ConvertType(E->getType());
759     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
760     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
761     if (Result->getType() != ResultType)
762       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
763                                      "cast");
764     return RValue::get(Result);
765   }
766   case Builtin::BI__builtin_ffs:
767   case Builtin::BI__builtin_ffsl:
768   case Builtin::BI__builtin_ffsll: {
769     // ffs(x) -> x ? cttz(x) + 1 : 0
770     Value *ArgValue = EmitScalarExpr(E->getArg(0));
771
772     llvm::Type *ArgType = ArgValue->getType();
773     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
774
775     llvm::Type *ResultType = ConvertType(E->getType());
776     Value *Tmp =
777         Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
778                           llvm::ConstantInt::get(ArgType, 1));
779     Value *Zero = llvm::Constant::getNullValue(ArgType);
780     Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
781     Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
782     if (Result->getType() != ResultType)
783       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
784                                      "cast");
785     return RValue::get(Result);
786   }
787   case Builtin::BI__builtin_parity:
788   case Builtin::BI__builtin_parityl:
789   case Builtin::BI__builtin_parityll: {
790     // parity(x) -> ctpop(x) & 1
791     Value *ArgValue = EmitScalarExpr(E->getArg(0));
792
793     llvm::Type *ArgType = ArgValue->getType();
794     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
795
796     llvm::Type *ResultType = ConvertType(E->getType());
797     Value *Tmp = Builder.CreateCall(F, ArgValue);
798     Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
799     if (Result->getType() != ResultType)
800       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
801                                      "cast");
802     return RValue::get(Result);
803   }
804   case Builtin::BI__popcnt16:
805   case Builtin::BI__popcnt:
806   case Builtin::BI__popcnt64:
807   case Builtin::BI__builtin_popcount:
808   case Builtin::BI__builtin_popcountl:
809   case Builtin::BI__builtin_popcountll: {
810     Value *ArgValue = EmitScalarExpr(E->getArg(0));
811
812     llvm::Type *ArgType = ArgValue->getType();
813     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
814
815     llvm::Type *ResultType = ConvertType(E->getType());
816     Value *Result = Builder.CreateCall(F, ArgValue);
817     if (Result->getType() != ResultType)
818       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
819                                      "cast");
820     return RValue::get(Result);
821   }
822   case Builtin::BI_rotr8:
823   case Builtin::BI_rotr16:
824   case Builtin::BI_rotr:
825   case Builtin::BI_lrotr:
826   case Builtin::BI_rotr64: {
827     Value *Val = EmitScalarExpr(E->getArg(0));
828     Value *Shift = EmitScalarExpr(E->getArg(1));
829
830     llvm::Type *ArgType = Val->getType();
831     Shift = Builder.CreateIntCast(Shift, ArgType, false);
832     unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
833     Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
834     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
835
836     Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
837     Shift = Builder.CreateAnd(Shift, Mask);
838     Value *LeftShift = Builder.CreateSub(ArgTypeSize, Shift);
839
840     Value *RightShifted = Builder.CreateLShr(Val, Shift);
841     Value *LeftShifted = Builder.CreateShl(Val, LeftShift);
842     Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
843
844     Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
845     Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
846     return RValue::get(Result);
847   }
848   case Builtin::BI_rotl8:
849   case Builtin::BI_rotl16:
850   case Builtin::BI_rotl:
851   case Builtin::BI_lrotl:
852   case Builtin::BI_rotl64: {
853     Value *Val = EmitScalarExpr(E->getArg(0));
854     Value *Shift = EmitScalarExpr(E->getArg(1));
855
856     llvm::Type *ArgType = Val->getType();
857     Shift = Builder.CreateIntCast(Shift, ArgType, false);
858     unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
859     Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
860     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
861
862     Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
863     Shift = Builder.CreateAnd(Shift, Mask);
864     Value *RightShift = Builder.CreateSub(ArgTypeSize, Shift);
865
866     Value *LeftShifted = Builder.CreateShl(Val, Shift);
867     Value *RightShifted = Builder.CreateLShr(Val, RightShift);
868     Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
869
870     Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
871     Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
872     return RValue::get(Result);
873   }
874   case Builtin::BI__builtin_unpredictable: {
875     // Always return the argument of __builtin_unpredictable. LLVM does not
876     // handle this builtin. Metadata for this builtin should be added directly
877     // to instructions such as branches or switches that use it.
878     return RValue::get(EmitScalarExpr(E->getArg(0)));
879   }
880   case Builtin::BI__builtin_expect: {
881     Value *ArgValue = EmitScalarExpr(E->getArg(0));
882     llvm::Type *ArgType = ArgValue->getType();
883
884     Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
885     // Don't generate llvm.expect on -O0 as the backend won't use it for
886     // anything.
887     // Note, we still IRGen ExpectedValue because it could have side-effects.
888     if (CGM.getCodeGenOpts().OptimizationLevel == 0)
889       return RValue::get(ArgValue);
890
891     Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
892     Value *Result =
893         Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
894     return RValue::get(Result);
895   }
896   case Builtin::BI__builtin_assume_aligned: {
897     Value *PtrValue = EmitScalarExpr(E->getArg(0));
898     Value *OffsetValue =
899       (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
900
901     Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
902     ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
903     unsigned Alignment = (unsigned) AlignmentCI->getZExtValue();
904
905     EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue);
906     return RValue::get(PtrValue);
907   }
908   case Builtin::BI__assume:
909   case Builtin::BI__builtin_assume: {
910     if (E->getArg(0)->HasSideEffects(getContext()))
911       return RValue::get(nullptr);
912
913     Value *ArgValue = EmitScalarExpr(E->getArg(0));
914     Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
915     return RValue::get(Builder.CreateCall(FnAssume, ArgValue));
916   }
917   case Builtin::BI__builtin_bswap16:
918   case Builtin::BI__builtin_bswap32:
919   case Builtin::BI__builtin_bswap64: {
920     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
921   }
922   case Builtin::BI__builtin_bitreverse8:
923   case Builtin::BI__builtin_bitreverse16:
924   case Builtin::BI__builtin_bitreverse32:
925   case Builtin::BI__builtin_bitreverse64: {
926     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
927   }
928   case Builtin::BI__builtin_object_size: {
929     unsigned Type =
930         E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
931     auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
932
933     // We pass this builtin onto the optimizer so that it can figure out the
934     // object size in more complex cases.
935     return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType));
936   }
937   case Builtin::BI__builtin_prefetch: {
938     Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
939     // FIXME: Technically these constants should of type 'int', yes?
940     RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
941       llvm::ConstantInt::get(Int32Ty, 0);
942     Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
943       llvm::ConstantInt::get(Int32Ty, 3);
944     Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
945     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
946     return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
947   }
948   case Builtin::BI__builtin_readcyclecounter: {
949     Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
950     return RValue::get(Builder.CreateCall(F));
951   }
952   case Builtin::BI__builtin___clear_cache: {
953     Value *Begin = EmitScalarExpr(E->getArg(0));
954     Value *End = EmitScalarExpr(E->getArg(1));
955     Value *F = CGM.getIntrinsic(Intrinsic::clear_cache);
956     return RValue::get(Builder.CreateCall(F, {Begin, End}));
957   }
958   case Builtin::BI__builtin_trap:
959     return RValue::get(EmitTrapCall(Intrinsic::trap));
960   case Builtin::BI__debugbreak:
961     return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
962   case Builtin::BI__builtin_unreachable: {
963     if (SanOpts.has(SanitizerKind::Unreachable)) {
964       SanitizerScope SanScope(this);
965       EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()),
966                                SanitizerKind::Unreachable),
967                 SanitizerHandler::BuiltinUnreachable,
968                 EmitCheckSourceLocation(E->getExprLoc()), None);
969     } else
970       Builder.CreateUnreachable();
971
972     // We do need to preserve an insertion point.
973     EmitBlock(createBasicBlock("unreachable.cont"));
974
975     return RValue::get(nullptr);
976   }
977
978   case Builtin::BI__builtin_powi:
979   case Builtin::BI__builtin_powif:
980   case Builtin::BI__builtin_powil: {
981     Value *Base = EmitScalarExpr(E->getArg(0));
982     Value *Exponent = EmitScalarExpr(E->getArg(1));
983     llvm::Type *ArgType = Base->getType();
984     Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
985     return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
986   }
987
988   case Builtin::BI__builtin_isgreater:
989   case Builtin::BI__builtin_isgreaterequal:
990   case Builtin::BI__builtin_isless:
991   case Builtin::BI__builtin_islessequal:
992   case Builtin::BI__builtin_islessgreater:
993   case Builtin::BI__builtin_isunordered: {
994     // Ordered comparisons: we know the arguments to these are matching scalar
995     // floating point values.
996     Value *LHS = EmitScalarExpr(E->getArg(0));
997     Value *RHS = EmitScalarExpr(E->getArg(1));
998
999     switch (BuiltinID) {
1000     default: llvm_unreachable("Unknown ordered comparison");
1001     case Builtin::BI__builtin_isgreater:
1002       LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
1003       break;
1004     case Builtin::BI__builtin_isgreaterequal:
1005       LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
1006       break;
1007     case Builtin::BI__builtin_isless:
1008       LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
1009       break;
1010     case Builtin::BI__builtin_islessequal:
1011       LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
1012       break;
1013     case Builtin::BI__builtin_islessgreater:
1014       LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
1015       break;
1016     case Builtin::BI__builtin_isunordered:
1017       LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
1018       break;
1019     }
1020     // ZExt bool to int type.
1021     return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
1022   }
1023   case Builtin::BI__builtin_isnan: {
1024     Value *V = EmitScalarExpr(E->getArg(0));
1025     V = Builder.CreateFCmpUNO(V, V, "cmp");
1026     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
1027   }
1028
1029   case Builtin::BIfinite:
1030   case Builtin::BI__finite:
1031   case Builtin::BIfinitef:
1032   case Builtin::BI__finitef:
1033   case Builtin::BIfinitel:
1034   case Builtin::BI__finitel:
1035   case Builtin::BI__builtin_isinf:
1036   case Builtin::BI__builtin_isfinite: {
1037     // isinf(x)    --> fabs(x) == infinity
1038     // isfinite(x) --> fabs(x) != infinity
1039     // x != NaN via the ordered compare in either case.
1040     Value *V = EmitScalarExpr(E->getArg(0));
1041     Value *Fabs = EmitFAbs(*this, V);
1042     Constant *Infinity = ConstantFP::getInfinity(V->getType());
1043     CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf)
1044                                   ? CmpInst::FCMP_OEQ
1045                                   : CmpInst::FCMP_ONE;
1046     Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf");
1047     return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType())));
1048   }
1049
1050   case Builtin::BI__builtin_isinf_sign: {
1051     // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
1052     Value *Arg = EmitScalarExpr(E->getArg(0));
1053     Value *AbsArg = EmitFAbs(*this, Arg);
1054     Value *IsInf = Builder.CreateFCmpOEQ(
1055         AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
1056     Value *IsNeg = EmitSignBit(*this, Arg);
1057
1058     llvm::Type *IntTy = ConvertType(E->getType());
1059     Value *Zero = Constant::getNullValue(IntTy);
1060     Value *One = ConstantInt::get(IntTy, 1);
1061     Value *NegativeOne = ConstantInt::get(IntTy, -1);
1062     Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
1063     Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
1064     return RValue::get(Result);
1065   }
1066
1067   case Builtin::BI__builtin_isnormal: {
1068     // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
1069     Value *V = EmitScalarExpr(E->getArg(0));
1070     Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
1071
1072     Value *Abs = EmitFAbs(*this, V);
1073     Value *IsLessThanInf =
1074       Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
1075     APFloat Smallest = APFloat::getSmallestNormalized(
1076                    getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
1077     Value *IsNormal =
1078       Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
1079                             "isnormal");
1080     V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
1081     V = Builder.CreateAnd(V, IsNormal, "and");
1082     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
1083   }
1084
1085   case Builtin::BI__builtin_fpclassify: {
1086     Value *V = EmitScalarExpr(E->getArg(5));
1087     llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
1088
1089     // Create Result
1090     BasicBlock *Begin = Builder.GetInsertBlock();
1091     BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
1092     Builder.SetInsertPoint(End);
1093     PHINode *Result =
1094       Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
1095                         "fpclassify_result");
1096
1097     // if (V==0) return FP_ZERO
1098     Builder.SetInsertPoint(Begin);
1099     Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
1100                                           "iszero");
1101     Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
1102     BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
1103     Builder.CreateCondBr(IsZero, End, NotZero);
1104     Result->addIncoming(ZeroLiteral, Begin);
1105
1106     // if (V != V) return FP_NAN
1107     Builder.SetInsertPoint(NotZero);
1108     Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
1109     Value *NanLiteral = EmitScalarExpr(E->getArg(0));
1110     BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
1111     Builder.CreateCondBr(IsNan, End, NotNan);
1112     Result->addIncoming(NanLiteral, NotZero);
1113
1114     // if (fabs(V) == infinity) return FP_INFINITY
1115     Builder.SetInsertPoint(NotNan);
1116     Value *VAbs = EmitFAbs(*this, V);
1117     Value *IsInf =
1118       Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
1119                             "isinf");
1120     Value *InfLiteral = EmitScalarExpr(E->getArg(1));
1121     BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
1122     Builder.CreateCondBr(IsInf, End, NotInf);
1123     Result->addIncoming(InfLiteral, NotNan);
1124
1125     // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
1126     Builder.SetInsertPoint(NotInf);
1127     APFloat Smallest = APFloat::getSmallestNormalized(
1128         getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
1129     Value *IsNormal =
1130       Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
1131                             "isnormal");
1132     Value *NormalResult =
1133       Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
1134                            EmitScalarExpr(E->getArg(3)));
1135     Builder.CreateBr(End);
1136     Result->addIncoming(NormalResult, NotInf);
1137
1138     // return Result
1139     Builder.SetInsertPoint(End);
1140     return RValue::get(Result);
1141   }
1142
1143   case Builtin::BIalloca:
1144   case Builtin::BI_alloca:
1145   case Builtin::BI__builtin_alloca: {
1146     Value *Size = EmitScalarExpr(E->getArg(0));
1147     const TargetInfo &TI = getContext().getTargetInfo();
1148     // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
1149     unsigned SuitableAlignmentInBytes =
1150         CGM.getContext()
1151             .toCharUnitsFromBits(TI.getSuitableAlign())
1152             .getQuantity();
1153     AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
1154     AI->setAlignment(SuitableAlignmentInBytes);
1155     return RValue::get(AI);
1156   }
1157
1158   case Builtin::BI__builtin_alloca_with_align: {
1159     Value *Size = EmitScalarExpr(E->getArg(0));
1160     Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
1161     auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
1162     unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
1163     unsigned AlignmentInBytes =
1164         CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity();
1165     AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
1166     AI->setAlignment(AlignmentInBytes);
1167     return RValue::get(AI);
1168   }
1169
1170   case Builtin::BIbzero:
1171   case Builtin::BI__builtin_bzero: {
1172     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1173     Value *SizeVal = EmitScalarExpr(E->getArg(1));
1174     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1175                         E->getArg(0)->getExprLoc(), FD, 0);
1176     Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
1177     return RValue::get(Dest.getPointer());
1178   }
1179   case Builtin::BImemcpy:
1180   case Builtin::BI__builtin_memcpy: {
1181     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1182     Address Src = EmitPointerWithAlignment(E->getArg(1));
1183     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1184     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1185                         E->getArg(0)->getExprLoc(), FD, 0);
1186     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1187                         E->getArg(1)->getExprLoc(), FD, 1);
1188     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1189     return RValue::get(Dest.getPointer());
1190   }
1191
1192   case Builtin::BI__builtin_char_memchr:
1193     BuiltinID = Builtin::BI__builtin_memchr;
1194     break;
1195
1196   case Builtin::BI__builtin___memcpy_chk: {
1197     // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
1198     llvm::APSInt Size, DstSize;
1199     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1200         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1201       break;
1202     if (Size.ugt(DstSize))
1203       break;
1204     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1205     Address Src = EmitPointerWithAlignment(E->getArg(1));
1206     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1207     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1208     return RValue::get(Dest.getPointer());
1209   }
1210
1211   case Builtin::BI__builtin_objc_memmove_collectable: {
1212     Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
1213     Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
1214     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1215     CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
1216                                                   DestAddr, SrcAddr, SizeVal);
1217     return RValue::get(DestAddr.getPointer());
1218   }
1219
1220   case Builtin::BI__builtin___memmove_chk: {
1221     // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
1222     llvm::APSInt Size, DstSize;
1223     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1224         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1225       break;
1226     if (Size.ugt(DstSize))
1227       break;
1228     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1229     Address Src = EmitPointerWithAlignment(E->getArg(1));
1230     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1231     Builder.CreateMemMove(Dest, Src, SizeVal, false);
1232     return RValue::get(Dest.getPointer());
1233   }
1234
1235   case Builtin::BImemmove:
1236   case Builtin::BI__builtin_memmove: {
1237     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1238     Address Src = EmitPointerWithAlignment(E->getArg(1));
1239     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1240     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1241                         E->getArg(0)->getExprLoc(), FD, 0);
1242     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1243                         E->getArg(1)->getExprLoc(), FD, 1);
1244     Builder.CreateMemMove(Dest, Src, SizeVal, false);
1245     return RValue::get(Dest.getPointer());
1246   }
1247   case Builtin::BImemset:
1248   case Builtin::BI__builtin_memset: {
1249     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1250     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1251                                          Builder.getInt8Ty());
1252     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1253     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1254                         E->getArg(0)->getExprLoc(), FD, 0);
1255     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1256     return RValue::get(Dest.getPointer());
1257   }
1258   case Builtin::BI__builtin___memset_chk: {
1259     // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
1260     llvm::APSInt Size, DstSize;
1261     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1262         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1263       break;
1264     if (Size.ugt(DstSize))
1265       break;
1266     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1267     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1268                                          Builder.getInt8Ty());
1269     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1270     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1271     return RValue::get(Dest.getPointer());
1272   }
1273   case Builtin::BI__builtin_dwarf_cfa: {
1274     // The offset in bytes from the first argument to the CFA.
1275     //
1276     // Why on earth is this in the frontend?  Is there any reason at
1277     // all that the backend can't reasonably determine this while
1278     // lowering llvm.eh.dwarf.cfa()?
1279     //
1280     // TODO: If there's a satisfactory reason, add a target hook for
1281     // this instead of hard-coding 0, which is correct for most targets.
1282     int32_t Offset = 0;
1283
1284     Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
1285     return RValue::get(Builder.CreateCall(F,
1286                                       llvm::ConstantInt::get(Int32Ty, Offset)));
1287   }
1288   case Builtin::BI__builtin_return_address: {
1289     Value *Depth =
1290         CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
1291     Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1292     return RValue::get(Builder.CreateCall(F, Depth));
1293   }
1294   case Builtin::BI_ReturnAddress: {
1295     Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1296     return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
1297   }
1298   case Builtin::BI__builtin_frame_address: {
1299     Value *Depth =
1300         CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
1301     Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
1302     return RValue::get(Builder.CreateCall(F, Depth));
1303   }
1304   case Builtin::BI__builtin_extract_return_addr: {
1305     Value *Address = EmitScalarExpr(E->getArg(0));
1306     Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
1307     return RValue::get(Result);
1308   }
1309   case Builtin::BI__builtin_frob_return_addr: {
1310     Value *Address = EmitScalarExpr(E->getArg(0));
1311     Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
1312     return RValue::get(Result);
1313   }
1314   case Builtin::BI__builtin_dwarf_sp_column: {
1315     llvm::IntegerType *Ty
1316       = cast<llvm::IntegerType>(ConvertType(E->getType()));
1317     int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
1318     if (Column == -1) {
1319       CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
1320       return RValue::get(llvm::UndefValue::get(Ty));
1321     }
1322     return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
1323   }
1324   case Builtin::BI__builtin_init_dwarf_reg_size_table: {
1325     Value *Address = EmitScalarExpr(E->getArg(0));
1326     if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
1327       CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
1328     return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
1329   }
1330   case Builtin::BI__builtin_eh_return: {
1331     Value *Int = EmitScalarExpr(E->getArg(0));
1332     Value *Ptr = EmitScalarExpr(E->getArg(1));
1333
1334     llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
1335     assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
1336            "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
1337     Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
1338                                   ? Intrinsic::eh_return_i32
1339                                   : Intrinsic::eh_return_i64);
1340     Builder.CreateCall(F, {Int, Ptr});
1341     Builder.CreateUnreachable();
1342
1343     // We do need to preserve an insertion point.
1344     EmitBlock(createBasicBlock("builtin_eh_return.cont"));
1345
1346     return RValue::get(nullptr);
1347   }
1348   case Builtin::BI__builtin_unwind_init: {
1349     Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
1350     return RValue::get(Builder.CreateCall(F));
1351   }
1352   case Builtin::BI__builtin_extend_pointer: {
1353     // Extends a pointer to the size of an _Unwind_Word, which is
1354     // uint64_t on all platforms.  Generally this gets poked into a
1355     // register and eventually used as an address, so if the
1356     // addressing registers are wider than pointers and the platform
1357     // doesn't implicitly ignore high-order bits when doing
1358     // addressing, we need to make sure we zext / sext based on
1359     // the platform's expectations.
1360     //
1361     // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
1362
1363     // Cast the pointer to intptr_t.
1364     Value *Ptr = EmitScalarExpr(E->getArg(0));
1365     Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
1366
1367     // If that's 64 bits, we're done.
1368     if (IntPtrTy->getBitWidth() == 64)
1369       return RValue::get(Result);
1370
1371     // Otherwise, ask the codegen data what to do.
1372     if (getTargetHooks().extendPointerWithSExt())
1373       return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
1374     else
1375       return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
1376   }
1377   case Builtin::BI__builtin_setjmp: {
1378     // Buffer is a void**.
1379     Address Buf = EmitPointerWithAlignment(E->getArg(0));
1380
1381     // Store the frame pointer to the setjmp buffer.
1382     Value *FrameAddr =
1383       Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
1384                          ConstantInt::get(Int32Ty, 0));
1385     Builder.CreateStore(FrameAddr, Buf);
1386
1387     // Store the stack pointer to the setjmp buffer.
1388     Value *StackAddr =
1389         Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
1390     Address StackSaveSlot =
1391       Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize());
1392     Builder.CreateStore(StackAddr, StackSaveSlot);
1393
1394     // Call LLVM's EH setjmp, which is lightweight.
1395     Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
1396     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1397     return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
1398   }
1399   case Builtin::BI__builtin_longjmp: {
1400     Value *Buf = EmitScalarExpr(E->getArg(0));
1401     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1402
1403     // Call LLVM's EH longjmp, which is lightweight.
1404     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
1405
1406     // longjmp doesn't return; mark this as unreachable.
1407     Builder.CreateUnreachable();
1408
1409     // We do need to preserve an insertion point.
1410     EmitBlock(createBasicBlock("longjmp.cont"));
1411
1412     return RValue::get(nullptr);
1413   }
1414   case Builtin::BI__sync_fetch_and_add:
1415   case Builtin::BI__sync_fetch_and_sub:
1416   case Builtin::BI__sync_fetch_and_or:
1417   case Builtin::BI__sync_fetch_and_and:
1418   case Builtin::BI__sync_fetch_and_xor:
1419   case Builtin::BI__sync_fetch_and_nand:
1420   case Builtin::BI__sync_add_and_fetch:
1421   case Builtin::BI__sync_sub_and_fetch:
1422   case Builtin::BI__sync_and_and_fetch:
1423   case Builtin::BI__sync_or_and_fetch:
1424   case Builtin::BI__sync_xor_and_fetch:
1425   case Builtin::BI__sync_nand_and_fetch:
1426   case Builtin::BI__sync_val_compare_and_swap:
1427   case Builtin::BI__sync_bool_compare_and_swap:
1428   case Builtin::BI__sync_lock_test_and_set:
1429   case Builtin::BI__sync_lock_release:
1430   case Builtin::BI__sync_swap:
1431     llvm_unreachable("Shouldn't make it through sema");
1432   case Builtin::BI__sync_fetch_and_add_1:
1433   case Builtin::BI__sync_fetch_and_add_2:
1434   case Builtin::BI__sync_fetch_and_add_4:
1435   case Builtin::BI__sync_fetch_and_add_8:
1436   case Builtin::BI__sync_fetch_and_add_16:
1437     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
1438   case Builtin::BI__sync_fetch_and_sub_1:
1439   case Builtin::BI__sync_fetch_and_sub_2:
1440   case Builtin::BI__sync_fetch_and_sub_4:
1441   case Builtin::BI__sync_fetch_and_sub_8:
1442   case Builtin::BI__sync_fetch_and_sub_16:
1443     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
1444   case Builtin::BI__sync_fetch_and_or_1:
1445   case Builtin::BI__sync_fetch_and_or_2:
1446   case Builtin::BI__sync_fetch_and_or_4:
1447   case Builtin::BI__sync_fetch_and_or_8:
1448   case Builtin::BI__sync_fetch_and_or_16:
1449     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
1450   case Builtin::BI__sync_fetch_and_and_1:
1451   case Builtin::BI__sync_fetch_and_and_2:
1452   case Builtin::BI__sync_fetch_and_and_4:
1453   case Builtin::BI__sync_fetch_and_and_8:
1454   case Builtin::BI__sync_fetch_and_and_16:
1455     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
1456   case Builtin::BI__sync_fetch_and_xor_1:
1457   case Builtin::BI__sync_fetch_and_xor_2:
1458   case Builtin::BI__sync_fetch_and_xor_4:
1459   case Builtin::BI__sync_fetch_and_xor_8:
1460   case Builtin::BI__sync_fetch_and_xor_16:
1461     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
1462   case Builtin::BI__sync_fetch_and_nand_1:
1463   case Builtin::BI__sync_fetch_and_nand_2:
1464   case Builtin::BI__sync_fetch_and_nand_4:
1465   case Builtin::BI__sync_fetch_and_nand_8:
1466   case Builtin::BI__sync_fetch_and_nand_16:
1467     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
1468
1469   // Clang extensions: not overloaded yet.
1470   case Builtin::BI__sync_fetch_and_min:
1471     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
1472   case Builtin::BI__sync_fetch_and_max:
1473     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
1474   case Builtin::BI__sync_fetch_and_umin:
1475     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
1476   case Builtin::BI__sync_fetch_and_umax:
1477     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
1478
1479   case Builtin::BI__sync_add_and_fetch_1:
1480   case Builtin::BI__sync_add_and_fetch_2:
1481   case Builtin::BI__sync_add_and_fetch_4:
1482   case Builtin::BI__sync_add_and_fetch_8:
1483   case Builtin::BI__sync_add_and_fetch_16:
1484     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
1485                                 llvm::Instruction::Add);
1486   case Builtin::BI__sync_sub_and_fetch_1:
1487   case Builtin::BI__sync_sub_and_fetch_2:
1488   case Builtin::BI__sync_sub_and_fetch_4:
1489   case Builtin::BI__sync_sub_and_fetch_8:
1490   case Builtin::BI__sync_sub_and_fetch_16:
1491     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
1492                                 llvm::Instruction::Sub);
1493   case Builtin::BI__sync_and_and_fetch_1:
1494   case Builtin::BI__sync_and_and_fetch_2:
1495   case Builtin::BI__sync_and_and_fetch_4:
1496   case Builtin::BI__sync_and_and_fetch_8:
1497   case Builtin::BI__sync_and_and_fetch_16:
1498     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
1499                                 llvm::Instruction::And);
1500   case Builtin::BI__sync_or_and_fetch_1:
1501   case Builtin::BI__sync_or_and_fetch_2:
1502   case Builtin::BI__sync_or_and_fetch_4:
1503   case Builtin::BI__sync_or_and_fetch_8:
1504   case Builtin::BI__sync_or_and_fetch_16:
1505     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
1506                                 llvm::Instruction::Or);
1507   case Builtin::BI__sync_xor_and_fetch_1:
1508   case Builtin::BI__sync_xor_and_fetch_2:
1509   case Builtin::BI__sync_xor_and_fetch_4:
1510   case Builtin::BI__sync_xor_and_fetch_8:
1511   case Builtin::BI__sync_xor_and_fetch_16:
1512     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
1513                                 llvm::Instruction::Xor);
1514   case Builtin::BI__sync_nand_and_fetch_1:
1515   case Builtin::BI__sync_nand_and_fetch_2:
1516   case Builtin::BI__sync_nand_and_fetch_4:
1517   case Builtin::BI__sync_nand_and_fetch_8:
1518   case Builtin::BI__sync_nand_and_fetch_16:
1519     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
1520                                 llvm::Instruction::And, true);
1521
1522   case Builtin::BI__sync_val_compare_and_swap_1:
1523   case Builtin::BI__sync_val_compare_and_swap_2:
1524   case Builtin::BI__sync_val_compare_and_swap_4:
1525   case Builtin::BI__sync_val_compare_and_swap_8:
1526   case Builtin::BI__sync_val_compare_and_swap_16:
1527     return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
1528
1529   case Builtin::BI__sync_bool_compare_and_swap_1:
1530   case Builtin::BI__sync_bool_compare_and_swap_2:
1531   case Builtin::BI__sync_bool_compare_and_swap_4:
1532   case Builtin::BI__sync_bool_compare_and_swap_8:
1533   case Builtin::BI__sync_bool_compare_and_swap_16:
1534     return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
1535
1536   case Builtin::BI__sync_swap_1:
1537   case Builtin::BI__sync_swap_2:
1538   case Builtin::BI__sync_swap_4:
1539   case Builtin::BI__sync_swap_8:
1540   case Builtin::BI__sync_swap_16:
1541     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1542
1543   case Builtin::BI__sync_lock_test_and_set_1:
1544   case Builtin::BI__sync_lock_test_and_set_2:
1545   case Builtin::BI__sync_lock_test_and_set_4:
1546   case Builtin::BI__sync_lock_test_and_set_8:
1547   case Builtin::BI__sync_lock_test_and_set_16:
1548     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1549
1550   case Builtin::BI__sync_lock_release_1:
1551   case Builtin::BI__sync_lock_release_2:
1552   case Builtin::BI__sync_lock_release_4:
1553   case Builtin::BI__sync_lock_release_8:
1554   case Builtin::BI__sync_lock_release_16: {
1555     Value *Ptr = EmitScalarExpr(E->getArg(0));
1556     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
1557     CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
1558     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
1559                                              StoreSize.getQuantity() * 8);
1560     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
1561     llvm::StoreInst *Store =
1562       Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
1563                                  StoreSize);
1564     Store->setAtomic(llvm::AtomicOrdering::Release);
1565     return RValue::get(nullptr);
1566   }
1567
1568   case Builtin::BI__sync_synchronize: {
1569     // We assume this is supposed to correspond to a C++0x-style
1570     // sequentially-consistent fence (i.e. this is only usable for
1571     // synchonization, not device I/O or anything like that). This intrinsic
1572     // is really badly designed in the sense that in theory, there isn't
1573     // any way to safely use it... but in practice, it mostly works
1574     // to use it with non-atomic loads and stores to get acquire/release
1575     // semantics.
1576     Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
1577     return RValue::get(nullptr);
1578   }
1579
1580   case Builtin::BI__builtin_nontemporal_load:
1581     return RValue::get(EmitNontemporalLoad(*this, E));
1582   case Builtin::BI__builtin_nontemporal_store:
1583     return RValue::get(EmitNontemporalStore(*this, E));
1584   case Builtin::BI__c11_atomic_is_lock_free:
1585   case Builtin::BI__atomic_is_lock_free: {
1586     // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
1587     // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
1588     // _Atomic(T) is always properly-aligned.
1589     const char *LibCallName = "__atomic_is_lock_free";
1590     CallArgList Args;
1591     Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
1592              getContext().getSizeType());
1593     if (BuiltinID == Builtin::BI__atomic_is_lock_free)
1594       Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
1595                getContext().VoidPtrTy);
1596     else
1597       Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
1598                getContext().VoidPtrTy);
1599     const CGFunctionInfo &FuncInfo =
1600         CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
1601     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
1602     llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
1603     return EmitCall(FuncInfo, CGCallee::forDirect(Func),
1604                     ReturnValueSlot(), Args);
1605   }
1606
1607   case Builtin::BI__atomic_test_and_set: {
1608     // Look at the argument type to determine whether this is a volatile
1609     // operation. The parameter type is always volatile.
1610     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1611     bool Volatile =
1612         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1613
1614     Value *Ptr = EmitScalarExpr(E->getArg(0));
1615     unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
1616     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1617     Value *NewVal = Builder.getInt8(1);
1618     Value *Order = EmitScalarExpr(E->getArg(1));
1619     if (isa<llvm::ConstantInt>(Order)) {
1620       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1621       AtomicRMWInst *Result = nullptr;
1622       switch (ord) {
1623       case 0:  // memory_order_relaxed
1624       default: // invalid order
1625         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1626                                          llvm::AtomicOrdering::Monotonic);
1627         break;
1628       case 1: // memory_order_consume
1629       case 2: // memory_order_acquire
1630         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1631                                          llvm::AtomicOrdering::Acquire);
1632         break;
1633       case 3: // memory_order_release
1634         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1635                                          llvm::AtomicOrdering::Release);
1636         break;
1637       case 4: // memory_order_acq_rel
1638
1639         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1640                                          llvm::AtomicOrdering::AcquireRelease);
1641         break;
1642       case 5: // memory_order_seq_cst
1643         Result = Builder.CreateAtomicRMW(
1644             llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1645             llvm::AtomicOrdering::SequentiallyConsistent);
1646         break;
1647       }
1648       Result->setVolatile(Volatile);
1649       return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1650     }
1651
1652     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1653
1654     llvm::BasicBlock *BBs[5] = {
1655       createBasicBlock("monotonic", CurFn),
1656       createBasicBlock("acquire", CurFn),
1657       createBasicBlock("release", CurFn),
1658       createBasicBlock("acqrel", CurFn),
1659       createBasicBlock("seqcst", CurFn)
1660     };
1661     llvm::AtomicOrdering Orders[5] = {
1662         llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
1663         llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
1664         llvm::AtomicOrdering::SequentiallyConsistent};
1665
1666     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1667     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1668
1669     Builder.SetInsertPoint(ContBB);
1670     PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
1671
1672     for (unsigned i = 0; i < 5; ++i) {
1673       Builder.SetInsertPoint(BBs[i]);
1674       AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1675                                                    Ptr, NewVal, Orders[i]);
1676       RMW->setVolatile(Volatile);
1677       Result->addIncoming(RMW, BBs[i]);
1678       Builder.CreateBr(ContBB);
1679     }
1680
1681     SI->addCase(Builder.getInt32(0), BBs[0]);
1682     SI->addCase(Builder.getInt32(1), BBs[1]);
1683     SI->addCase(Builder.getInt32(2), BBs[1]);
1684     SI->addCase(Builder.getInt32(3), BBs[2]);
1685     SI->addCase(Builder.getInt32(4), BBs[3]);
1686     SI->addCase(Builder.getInt32(5), BBs[4]);
1687
1688     Builder.SetInsertPoint(ContBB);
1689     return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1690   }
1691
1692   case Builtin::BI__atomic_clear: {
1693     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1694     bool Volatile =
1695         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1696
1697     Address Ptr = EmitPointerWithAlignment(E->getArg(0));
1698     unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace();
1699     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1700     Value *NewVal = Builder.getInt8(0);
1701     Value *Order = EmitScalarExpr(E->getArg(1));
1702     if (isa<llvm::ConstantInt>(Order)) {
1703       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1704       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1705       switch (ord) {
1706       case 0:  // memory_order_relaxed
1707       default: // invalid order
1708         Store->setOrdering(llvm::AtomicOrdering::Monotonic);
1709         break;
1710       case 3:  // memory_order_release
1711         Store->setOrdering(llvm::AtomicOrdering::Release);
1712         break;
1713       case 5:  // memory_order_seq_cst
1714         Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
1715         break;
1716       }
1717       return RValue::get(nullptr);
1718     }
1719
1720     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1721
1722     llvm::BasicBlock *BBs[3] = {
1723       createBasicBlock("monotonic", CurFn),
1724       createBasicBlock("release", CurFn),
1725       createBasicBlock("seqcst", CurFn)
1726     };
1727     llvm::AtomicOrdering Orders[3] = {
1728         llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
1729         llvm::AtomicOrdering::SequentiallyConsistent};
1730
1731     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1732     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1733
1734     for (unsigned i = 0; i < 3; ++i) {
1735       Builder.SetInsertPoint(BBs[i]);
1736       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1737       Store->setOrdering(Orders[i]);
1738       Builder.CreateBr(ContBB);
1739     }
1740
1741     SI->addCase(Builder.getInt32(0), BBs[0]);
1742     SI->addCase(Builder.getInt32(3), BBs[1]);
1743     SI->addCase(Builder.getInt32(5), BBs[2]);
1744
1745     Builder.SetInsertPoint(ContBB);
1746     return RValue::get(nullptr);
1747   }
1748
1749   case Builtin::BI__atomic_thread_fence:
1750   case Builtin::BI__atomic_signal_fence:
1751   case Builtin::BI__c11_atomic_thread_fence:
1752   case Builtin::BI__c11_atomic_signal_fence: {
1753     llvm::SynchronizationScope Scope;
1754     if (BuiltinID == Builtin::BI__atomic_signal_fence ||
1755         BuiltinID == Builtin::BI__c11_atomic_signal_fence)
1756       Scope = llvm::SingleThread;
1757     else
1758       Scope = llvm::CrossThread;
1759     Value *Order = EmitScalarExpr(E->getArg(0));
1760     if (isa<llvm::ConstantInt>(Order)) {
1761       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1762       switch (ord) {
1763       case 0:  // memory_order_relaxed
1764       default: // invalid order
1765         break;
1766       case 1:  // memory_order_consume
1767       case 2:  // memory_order_acquire
1768         Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope);
1769         break;
1770       case 3:  // memory_order_release
1771         Builder.CreateFence(llvm::AtomicOrdering::Release, Scope);
1772         break;
1773       case 4:  // memory_order_acq_rel
1774         Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope);
1775         break;
1776       case 5:  // memory_order_seq_cst
1777         Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
1778                             Scope);
1779         break;
1780       }
1781       return RValue::get(nullptr);
1782     }
1783
1784     llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
1785     AcquireBB = createBasicBlock("acquire", CurFn);
1786     ReleaseBB = createBasicBlock("release", CurFn);
1787     AcqRelBB = createBasicBlock("acqrel", CurFn);
1788     SeqCstBB = createBasicBlock("seqcst", CurFn);
1789     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1790
1791     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1792     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
1793
1794     Builder.SetInsertPoint(AcquireBB);
1795     Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope);
1796     Builder.CreateBr(ContBB);
1797     SI->addCase(Builder.getInt32(1), AcquireBB);
1798     SI->addCase(Builder.getInt32(2), AcquireBB);
1799
1800     Builder.SetInsertPoint(ReleaseBB);
1801     Builder.CreateFence(llvm::AtomicOrdering::Release, Scope);
1802     Builder.CreateBr(ContBB);
1803     SI->addCase(Builder.getInt32(3), ReleaseBB);
1804
1805     Builder.SetInsertPoint(AcqRelBB);
1806     Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope);
1807     Builder.CreateBr(ContBB);
1808     SI->addCase(Builder.getInt32(4), AcqRelBB);
1809
1810     Builder.SetInsertPoint(SeqCstBB);
1811     Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, Scope);
1812     Builder.CreateBr(ContBB);
1813     SI->addCase(Builder.getInt32(5), SeqCstBB);
1814
1815     Builder.SetInsertPoint(ContBB);
1816     return RValue::get(nullptr);
1817   }
1818
1819     // Library functions with special handling.
1820   case Builtin::BIsqrt:
1821   case Builtin::BIsqrtf:
1822   case Builtin::BIsqrtl: {
1823     // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only
1824     // in finite- or unsafe-math mode (the intrinsic has different semantics
1825     // for handling negative numbers compared to the library function, so
1826     // -fmath-errno=0 is not enough).
1827     if (!FD->hasAttr<ConstAttr>())
1828       break;
1829     if (!(CGM.getCodeGenOpts().UnsafeFPMath ||
1830           CGM.getCodeGenOpts().NoNaNsFPMath))
1831       break;
1832     Value *Arg0 = EmitScalarExpr(E->getArg(0));
1833     llvm::Type *ArgType = Arg0->getType();
1834     Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType);
1835     return RValue::get(Builder.CreateCall(F, Arg0));
1836   }
1837
1838   case Builtin::BI__builtin_pow:
1839   case Builtin::BI__builtin_powf:
1840   case Builtin::BI__builtin_powl:
1841   case Builtin::BIpow:
1842   case Builtin::BIpowf:
1843   case Builtin::BIpowl: {
1844     // Transform a call to pow* into a @llvm.pow.* intrinsic call.
1845     if (!FD->hasAttr<ConstAttr>())
1846       break;
1847     Value *Base = EmitScalarExpr(E->getArg(0));
1848     Value *Exponent = EmitScalarExpr(E->getArg(1));
1849     llvm::Type *ArgType = Base->getType();
1850     Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
1851     return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
1852   }
1853
1854   case Builtin::BIfma:
1855   case Builtin::BIfmaf:
1856   case Builtin::BIfmal:
1857   case Builtin::BI__builtin_fma:
1858   case Builtin::BI__builtin_fmaf:
1859   case Builtin::BI__builtin_fmal: {
1860     // Rewrite fma to intrinsic.
1861     Value *FirstArg = EmitScalarExpr(E->getArg(0));
1862     llvm::Type *ArgType = FirstArg->getType();
1863     Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType);
1864     return RValue::get(
1865         Builder.CreateCall(F, {FirstArg, EmitScalarExpr(E->getArg(1)),
1866                                EmitScalarExpr(E->getArg(2))}));
1867   }
1868
1869   case Builtin::BI__builtin_signbit:
1870   case Builtin::BI__builtin_signbitf:
1871   case Builtin::BI__builtin_signbitl: {
1872     return RValue::get(
1873         Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
1874                            ConvertType(E->getType())));
1875   }
1876   case Builtin::BI__builtin_annotation: {
1877     llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
1878     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
1879                                       AnnVal->getType());
1880
1881     // Get the annotation string, go through casts. Sema requires this to be a
1882     // non-wide string literal, potentially casted, so the cast<> is safe.
1883     const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
1884     StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
1885     return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
1886   }
1887   case Builtin::BI__builtin_addcb:
1888   case Builtin::BI__builtin_addcs:
1889   case Builtin::BI__builtin_addc:
1890   case Builtin::BI__builtin_addcl:
1891   case Builtin::BI__builtin_addcll:
1892   case Builtin::BI__builtin_subcb:
1893   case Builtin::BI__builtin_subcs:
1894   case Builtin::BI__builtin_subc:
1895   case Builtin::BI__builtin_subcl:
1896   case Builtin::BI__builtin_subcll: {
1897
1898     // We translate all of these builtins from expressions of the form:
1899     //   int x = ..., y = ..., carryin = ..., carryout, result;
1900     //   result = __builtin_addc(x, y, carryin, &carryout);
1901     //
1902     // to LLVM IR of the form:
1903     //
1904     //   %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
1905     //   %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
1906     //   %carry1 = extractvalue {i32, i1} %tmp1, 1
1907     //   %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
1908     //                                                       i32 %carryin)
1909     //   %result = extractvalue {i32, i1} %tmp2, 0
1910     //   %carry2 = extractvalue {i32, i1} %tmp2, 1
1911     //   %tmp3 = or i1 %carry1, %carry2
1912     //   %tmp4 = zext i1 %tmp3 to i32
1913     //   store i32 %tmp4, i32* %carryout
1914
1915     // Scalarize our inputs.
1916     llvm::Value *X = EmitScalarExpr(E->getArg(0));
1917     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
1918     llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
1919     Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
1920
1921     // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
1922     llvm::Intrinsic::ID IntrinsicId;
1923     switch (BuiltinID) {
1924     default: llvm_unreachable("Unknown multiprecision builtin id.");
1925     case Builtin::BI__builtin_addcb:
1926     case Builtin::BI__builtin_addcs:
1927     case Builtin::BI__builtin_addc:
1928     case Builtin::BI__builtin_addcl:
1929     case Builtin::BI__builtin_addcll:
1930       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
1931       break;
1932     case Builtin::BI__builtin_subcb:
1933     case Builtin::BI__builtin_subcs:
1934     case Builtin::BI__builtin_subc:
1935     case Builtin::BI__builtin_subcl:
1936     case Builtin::BI__builtin_subcll:
1937       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
1938       break;
1939     }
1940
1941     // Construct our resulting LLVM IR expression.
1942     llvm::Value *Carry1;
1943     llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
1944                                               X, Y, Carry1);
1945     llvm::Value *Carry2;
1946     llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
1947                                               Sum1, Carryin, Carry2);
1948     llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
1949                                                X->getType());
1950     Builder.CreateStore(CarryOut, CarryOutPtr);
1951     return RValue::get(Sum2);
1952   }
1953
1954   case Builtin::BI__builtin_add_overflow:
1955   case Builtin::BI__builtin_sub_overflow:
1956   case Builtin::BI__builtin_mul_overflow: {
1957     const clang::Expr *LeftArg = E->getArg(0);
1958     const clang::Expr *RightArg = E->getArg(1);
1959     const clang::Expr *ResultArg = E->getArg(2);
1960
1961     clang::QualType ResultQTy =
1962         ResultArg->getType()->castAs<PointerType>()->getPointeeType();
1963
1964     WidthAndSignedness LeftInfo =
1965         getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
1966     WidthAndSignedness RightInfo =
1967         getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
1968     WidthAndSignedness ResultInfo =
1969         getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
1970     WidthAndSignedness EncompassingInfo =
1971         EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
1972
1973     llvm::Type *EncompassingLLVMTy =
1974         llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
1975
1976     llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
1977
1978     llvm::Intrinsic::ID IntrinsicId;
1979     switch (BuiltinID) {
1980     default:
1981       llvm_unreachable("Unknown overflow builtin id.");
1982     case Builtin::BI__builtin_add_overflow:
1983       IntrinsicId = EncompassingInfo.Signed
1984                         ? llvm::Intrinsic::sadd_with_overflow
1985                         : llvm::Intrinsic::uadd_with_overflow;
1986       break;
1987     case Builtin::BI__builtin_sub_overflow:
1988       IntrinsicId = EncompassingInfo.Signed
1989                         ? llvm::Intrinsic::ssub_with_overflow
1990                         : llvm::Intrinsic::usub_with_overflow;
1991       break;
1992     case Builtin::BI__builtin_mul_overflow:
1993       IntrinsicId = EncompassingInfo.Signed
1994                         ? llvm::Intrinsic::smul_with_overflow
1995                         : llvm::Intrinsic::umul_with_overflow;
1996       break;
1997     }
1998
1999     llvm::Value *Left = EmitScalarExpr(LeftArg);
2000     llvm::Value *Right = EmitScalarExpr(RightArg);
2001     Address ResultPtr = EmitPointerWithAlignment(ResultArg);
2002
2003     // Extend each operand to the encompassing type.
2004     Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
2005     Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
2006
2007     // Perform the operation on the extended values.
2008     llvm::Value *Overflow, *Result;
2009     Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
2010
2011     if (EncompassingInfo.Width > ResultInfo.Width) {
2012       // The encompassing type is wider than the result type, so we need to
2013       // truncate it.
2014       llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
2015
2016       // To see if the truncation caused an overflow, we will extend
2017       // the result and then compare it to the original result.
2018       llvm::Value *ResultTruncExt = Builder.CreateIntCast(
2019           ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
2020       llvm::Value *TruncationOverflow =
2021           Builder.CreateICmpNE(Result, ResultTruncExt);
2022
2023       Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
2024       Result = ResultTrunc;
2025     }
2026
2027     // Finally, store the result using the pointer.
2028     bool isVolatile =
2029       ResultArg->getType()->getPointeeType().isVolatileQualified();
2030     Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
2031
2032     return RValue::get(Overflow);
2033   }
2034
2035   case Builtin::BI__builtin_uadd_overflow:
2036   case Builtin::BI__builtin_uaddl_overflow:
2037   case Builtin::BI__builtin_uaddll_overflow:
2038   case Builtin::BI__builtin_usub_overflow:
2039   case Builtin::BI__builtin_usubl_overflow:
2040   case Builtin::BI__builtin_usubll_overflow:
2041   case Builtin::BI__builtin_umul_overflow:
2042   case Builtin::BI__builtin_umull_overflow:
2043   case Builtin::BI__builtin_umulll_overflow:
2044   case Builtin::BI__builtin_sadd_overflow:
2045   case Builtin::BI__builtin_saddl_overflow:
2046   case Builtin::BI__builtin_saddll_overflow:
2047   case Builtin::BI__builtin_ssub_overflow:
2048   case Builtin::BI__builtin_ssubl_overflow:
2049   case Builtin::BI__builtin_ssubll_overflow:
2050   case Builtin::BI__builtin_smul_overflow:
2051   case Builtin::BI__builtin_smull_overflow:
2052   case Builtin::BI__builtin_smulll_overflow: {
2053
2054     // We translate all of these builtins directly to the relevant llvm IR node.
2055
2056     // Scalarize our inputs.
2057     llvm::Value *X = EmitScalarExpr(E->getArg(0));
2058     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
2059     Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
2060
2061     // Decide which of the overflow intrinsics we are lowering to:
2062     llvm::Intrinsic::ID IntrinsicId;
2063     switch (BuiltinID) {
2064     default: llvm_unreachable("Unknown overflow builtin id.");
2065     case Builtin::BI__builtin_uadd_overflow:
2066     case Builtin::BI__builtin_uaddl_overflow:
2067     case Builtin::BI__builtin_uaddll_overflow:
2068       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
2069       break;
2070     case Builtin::BI__builtin_usub_overflow:
2071     case Builtin::BI__builtin_usubl_overflow:
2072     case Builtin::BI__builtin_usubll_overflow:
2073       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
2074       break;
2075     case Builtin::BI__builtin_umul_overflow:
2076     case Builtin::BI__builtin_umull_overflow:
2077     case Builtin::BI__builtin_umulll_overflow:
2078       IntrinsicId = llvm::Intrinsic::umul_with_overflow;
2079       break;
2080     case Builtin::BI__builtin_sadd_overflow:
2081     case Builtin::BI__builtin_saddl_overflow:
2082     case Builtin::BI__builtin_saddll_overflow:
2083       IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
2084       break;
2085     case Builtin::BI__builtin_ssub_overflow:
2086     case Builtin::BI__builtin_ssubl_overflow:
2087     case Builtin::BI__builtin_ssubll_overflow:
2088       IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
2089       break;
2090     case Builtin::BI__builtin_smul_overflow:
2091     case Builtin::BI__builtin_smull_overflow:
2092     case Builtin::BI__builtin_smulll_overflow:
2093       IntrinsicId = llvm::Intrinsic::smul_with_overflow;
2094       break;
2095     }
2096
2097
2098     llvm::Value *Carry;
2099     llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
2100     Builder.CreateStore(Sum, SumOutPtr);
2101
2102     return RValue::get(Carry);
2103   }
2104   case Builtin::BI__builtin_addressof:
2105     return RValue::get(EmitLValue(E->getArg(0)).getPointer());
2106   case Builtin::BI__builtin_operator_new:
2107     return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
2108                                     E->getArg(0), false);
2109   case Builtin::BI__builtin_operator_delete:
2110     return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
2111                                     E->getArg(0), true);
2112   case Builtin::BI__noop:
2113     // __noop always evaluates to an integer literal zero.
2114     return RValue::get(ConstantInt::get(IntTy, 0));
2115   case Builtin::BI__builtin_call_with_static_chain: {
2116     const CallExpr *Call = cast<CallExpr>(E->getArg(0));
2117     const Expr *Chain = E->getArg(1);
2118     return EmitCall(Call->getCallee()->getType(),
2119                     EmitCallee(Call->getCallee()), Call, ReturnValue,
2120                     EmitScalarExpr(Chain));
2121   }
2122   case Builtin::BI_InterlockedExchange8:
2123   case Builtin::BI_InterlockedExchange16:
2124   case Builtin::BI_InterlockedExchange:
2125   case Builtin::BI_InterlockedExchangePointer:
2126     return RValue::get(
2127         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
2128   case Builtin::BI_InterlockedCompareExchangePointer: {
2129     llvm::Type *RTy;
2130     llvm::IntegerType *IntType =
2131       IntegerType::get(getLLVMContext(),
2132                        getContext().getTypeSize(E->getType()));
2133     llvm::Type *IntPtrType = IntType->getPointerTo();
2134
2135     llvm::Value *Destination =
2136       Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType);
2137
2138     llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
2139     RTy = Exchange->getType();
2140     Exchange = Builder.CreatePtrToInt(Exchange, IntType);
2141
2142     llvm::Value *Comparand =
2143       Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
2144
2145     auto Result =
2146         Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
2147                                     AtomicOrdering::SequentiallyConsistent,
2148                                     AtomicOrdering::SequentiallyConsistent);
2149     Result->setVolatile(true);
2150
2151     return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
2152                                                                          0),
2153                                               RTy));
2154   }
2155   case Builtin::BI_InterlockedCompareExchange8:
2156   case Builtin::BI_InterlockedCompareExchange16:
2157   case Builtin::BI_InterlockedCompareExchange:
2158   case Builtin::BI_InterlockedCompareExchange64: {
2159     AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg(
2160         EmitScalarExpr(E->getArg(0)),
2161         EmitScalarExpr(E->getArg(2)),
2162         EmitScalarExpr(E->getArg(1)),
2163         AtomicOrdering::SequentiallyConsistent,
2164         AtomicOrdering::SequentiallyConsistent);
2165       CXI->setVolatile(true);
2166       return RValue::get(Builder.CreateExtractValue(CXI, 0));
2167   }
2168   case Builtin::BI_InterlockedIncrement16:
2169   case Builtin::BI_InterlockedIncrement:
2170     return RValue::get(
2171         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
2172   case Builtin::BI_InterlockedDecrement16:
2173   case Builtin::BI_InterlockedDecrement:
2174     return RValue::get(
2175         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
2176   case Builtin::BI_InterlockedAnd8:
2177   case Builtin::BI_InterlockedAnd16:
2178   case Builtin::BI_InterlockedAnd:
2179     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
2180   case Builtin::BI_InterlockedExchangeAdd8:
2181   case Builtin::BI_InterlockedExchangeAdd16:
2182   case Builtin::BI_InterlockedExchangeAdd:
2183     return RValue::get(
2184         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
2185   case Builtin::BI_InterlockedExchangeSub8:
2186   case Builtin::BI_InterlockedExchangeSub16:
2187   case Builtin::BI_InterlockedExchangeSub:
2188     return RValue::get(
2189         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
2190   case Builtin::BI_InterlockedOr8:
2191   case Builtin::BI_InterlockedOr16:
2192   case Builtin::BI_InterlockedOr:
2193     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
2194   case Builtin::BI_InterlockedXor8:
2195   case Builtin::BI_InterlockedXor16:
2196   case Builtin::BI_InterlockedXor:
2197     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
2198   case Builtin::BI__readfsdword: {
2199     llvm::Type *IntTy = ConvertType(E->getType());
2200     Value *IntToPtr =
2201       Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
2202                              llvm::PointerType::get(IntTy, 257));
2203     LoadInst *Load = Builder.CreateAlignedLoad(
2204         IntTy, IntToPtr, getContext().getTypeAlignInChars(E->getType()));
2205     Load->setVolatile(true);
2206     return RValue::get(Load);
2207   }
2208
2209   case Builtin::BI__exception_code:
2210   case Builtin::BI_exception_code:
2211     return RValue::get(EmitSEHExceptionCode());
2212   case Builtin::BI__exception_info:
2213   case Builtin::BI_exception_info:
2214     return RValue::get(EmitSEHExceptionInfo());
2215   case Builtin::BI__abnormal_termination:
2216   case Builtin::BI_abnormal_termination:
2217     return RValue::get(EmitSEHAbnormalTermination());
2218   case Builtin::BI_setjmpex: {
2219     if (getTarget().getTriple().isOSMSVCRT()) {
2220       llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2221       llvm::AttributeSet ReturnsTwiceAttr =
2222           AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
2223                             llvm::Attribute::ReturnsTwice);
2224       llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction(
2225           llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2226           "_setjmpex", ReturnsTwiceAttr, /*Local=*/true);
2227       llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2228           EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2229       llvm::Value *FrameAddr =
2230           Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2231                              ConstantInt::get(Int32Ty, 0));
2232       llvm::Value *Args[] = {Buf, FrameAddr};
2233       llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args);
2234       CS.setAttributes(ReturnsTwiceAttr);
2235       return RValue::get(CS.getInstruction());
2236     }
2237     break;
2238   }
2239   case Builtin::BI_setjmp: {
2240     if (getTarget().getTriple().isOSMSVCRT()) {
2241       llvm::AttributeSet ReturnsTwiceAttr =
2242           AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
2243                             llvm::Attribute::ReturnsTwice);
2244       llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2245           EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2246       llvm::CallSite CS;
2247       if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
2248         llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy};
2249         llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction(
2250             llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true),
2251             "_setjmp3", ReturnsTwiceAttr, /*Local=*/true);
2252         llvm::Value *Count = ConstantInt::get(IntTy, 0);
2253         llvm::Value *Args[] = {Buf, Count};
2254         CS = EmitRuntimeCallOrInvoke(SetJmp3, Args);
2255       } else {
2256         llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2257         llvm::Constant *SetJmp = CGM.CreateRuntimeFunction(
2258             llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2259             "_setjmp", ReturnsTwiceAttr, /*Local=*/true);
2260         llvm::Value *FrameAddr =
2261             Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2262                                ConstantInt::get(Int32Ty, 0));
2263         llvm::Value *Args[] = {Buf, FrameAddr};
2264         CS = EmitRuntimeCallOrInvoke(SetJmp, Args);
2265       }
2266       CS.setAttributes(ReturnsTwiceAttr);
2267       return RValue::get(CS.getInstruction());
2268     }
2269     break;
2270   }
2271
2272   case Builtin::BI__GetExceptionInfo: {
2273     if (llvm::GlobalVariable *GV =
2274             CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
2275       return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
2276     break;
2277   }
2278
2279   case Builtin::BI__builtin_coro_size: {
2280     auto & Context = getContext();
2281     auto SizeTy = Context.getSizeType();
2282     auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy));
2283     Value *F = CGM.getIntrinsic(Intrinsic::coro_size, T);
2284     return RValue::get(Builder.CreateCall(F));
2285   }
2286
2287   case Builtin::BI__builtin_coro_id:
2288     return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
2289   case Builtin::BI__builtin_coro_promise:
2290     return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
2291   case Builtin::BI__builtin_coro_resume:
2292     return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
2293   case Builtin::BI__builtin_coro_frame:
2294     return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
2295   case Builtin::BI__builtin_coro_free:
2296     return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
2297   case Builtin::BI__builtin_coro_destroy:
2298     return EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
2299   case Builtin::BI__builtin_coro_done:
2300     return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
2301   case Builtin::BI__builtin_coro_alloc:
2302     return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
2303   case Builtin::BI__builtin_coro_begin:
2304     return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
2305   case Builtin::BI__builtin_coro_end:
2306     return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
2307   case Builtin::BI__builtin_coro_suspend:
2308     return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
2309   case Builtin::BI__builtin_coro_param:
2310     return EmitCoroutineIntrinsic(E, Intrinsic::coro_param);
2311
2312   // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
2313   case Builtin::BIread_pipe:
2314   case Builtin::BIwrite_pipe: {
2315     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2316           *Arg1 = EmitScalarExpr(E->getArg(1));
2317     CGOpenCLRuntime OpenCLRT(CGM);
2318     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2319     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2320
2321     // Type of the generic packet parameter.
2322     unsigned GenericAS =
2323         getContext().getTargetAddressSpace(LangAS::opencl_generic);
2324     llvm::Type *I8PTy = llvm::PointerType::get(
2325         llvm::Type::getInt8Ty(getLLVMContext()), GenericAS);
2326
2327     // Testing which overloaded version we should generate the call for.
2328     if (2U == E->getNumArgs()) {
2329       const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
2330                                                              : "__write_pipe_2";
2331       // Creating a generic function type to be able to call with any builtin or
2332       // user defined type.
2333       llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
2334       llvm::FunctionType *FTy = llvm::FunctionType::get(
2335           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2336       Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
2337       return RValue::get(
2338           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2339                              {Arg0, BCast, PacketSize, PacketAlign}));
2340     } else {
2341       assert(4 == E->getNumArgs() &&
2342              "Illegal number of parameters to pipe function");
2343       const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
2344                                                              : "__write_pipe_4";
2345
2346       llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
2347                               Int32Ty, Int32Ty};
2348       Value *Arg2 = EmitScalarExpr(E->getArg(2)),
2349             *Arg3 = EmitScalarExpr(E->getArg(3));
2350       llvm::FunctionType *FTy = llvm::FunctionType::get(
2351           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2352       Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
2353       // We know the third argument is an integer type, but we may need to cast
2354       // it to i32.
2355       if (Arg2->getType() != Int32Ty)
2356         Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
2357       return RValue::get(Builder.CreateCall(
2358           CGM.CreateRuntimeFunction(FTy, Name),
2359           {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
2360     }
2361   }
2362   // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
2363   // functions
2364   case Builtin::BIreserve_read_pipe:
2365   case Builtin::BIreserve_write_pipe:
2366   case Builtin::BIwork_group_reserve_read_pipe:
2367   case Builtin::BIwork_group_reserve_write_pipe:
2368   case Builtin::BIsub_group_reserve_read_pipe:
2369   case Builtin::BIsub_group_reserve_write_pipe: {
2370     // Composing the mangled name for the function.
2371     const char *Name;
2372     if (BuiltinID == Builtin::BIreserve_read_pipe)
2373       Name = "__reserve_read_pipe";
2374     else if (BuiltinID == Builtin::BIreserve_write_pipe)
2375       Name = "__reserve_write_pipe";
2376     else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
2377       Name = "__work_group_reserve_read_pipe";
2378     else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
2379       Name = "__work_group_reserve_write_pipe";
2380     else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
2381       Name = "__sub_group_reserve_read_pipe";
2382     else
2383       Name = "__sub_group_reserve_write_pipe";
2384
2385     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2386           *Arg1 = EmitScalarExpr(E->getArg(1));
2387     llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
2388     CGOpenCLRuntime OpenCLRT(CGM);
2389     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2390     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2391
2392     // Building the generic function prototype.
2393     llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
2394     llvm::FunctionType *FTy = llvm::FunctionType::get(
2395         ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2396     // We know the second argument is an integer type, but we may need to cast
2397     // it to i32.
2398     if (Arg1->getType() != Int32Ty)
2399       Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
2400     return RValue::get(
2401         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2402                            {Arg0, Arg1, PacketSize, PacketAlign}));
2403   }
2404   // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
2405   // functions
2406   case Builtin::BIcommit_read_pipe:
2407   case Builtin::BIcommit_write_pipe:
2408   case Builtin::BIwork_group_commit_read_pipe:
2409   case Builtin::BIwork_group_commit_write_pipe:
2410   case Builtin::BIsub_group_commit_read_pipe:
2411   case Builtin::BIsub_group_commit_write_pipe: {
2412     const char *Name;
2413     if (BuiltinID == Builtin::BIcommit_read_pipe)
2414       Name = "__commit_read_pipe";
2415     else if (BuiltinID == Builtin::BIcommit_write_pipe)
2416       Name = "__commit_write_pipe";
2417     else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
2418       Name = "__work_group_commit_read_pipe";
2419     else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
2420       Name = "__work_group_commit_write_pipe";
2421     else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
2422       Name = "__sub_group_commit_read_pipe";
2423     else
2424       Name = "__sub_group_commit_write_pipe";
2425
2426     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2427           *Arg1 = EmitScalarExpr(E->getArg(1));
2428     CGOpenCLRuntime OpenCLRT(CGM);
2429     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2430     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2431
2432     // Building the generic function prototype.
2433     llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
2434     llvm::FunctionType *FTy =
2435         llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
2436                                 llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2437
2438     return RValue::get(
2439         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2440                            {Arg0, Arg1, PacketSize, PacketAlign}));
2441   }
2442   // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
2443   case Builtin::BIget_pipe_num_packets:
2444   case Builtin::BIget_pipe_max_packets: {
2445     const char *Name;
2446     if (BuiltinID == Builtin::BIget_pipe_num_packets)
2447       Name = "__get_pipe_num_packets";
2448     else
2449       Name = "__get_pipe_max_packets";
2450
2451     // Building the generic function prototype.
2452     Value *Arg0 = EmitScalarExpr(E->getArg(0));
2453     CGOpenCLRuntime OpenCLRT(CGM);
2454     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2455     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2456     llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
2457     llvm::FunctionType *FTy = llvm::FunctionType::get(
2458         Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2459
2460     return RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2461                                           {Arg0, PacketSize, PacketAlign}));
2462   }
2463
2464   // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
2465   case Builtin::BIto_global:
2466   case Builtin::BIto_local:
2467   case Builtin::BIto_private: {
2468     auto Arg0 = EmitScalarExpr(E->getArg(0));
2469     auto NewArgT = llvm::PointerType::get(Int8Ty,
2470       CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
2471     auto NewRetT = llvm::PointerType::get(Int8Ty,
2472       CGM.getContext().getTargetAddressSpace(
2473         E->getType()->getPointeeType().getAddressSpace()));
2474     auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
2475     llvm::Value *NewArg;
2476     if (Arg0->getType()->getPointerAddressSpace() !=
2477         NewArgT->getPointerAddressSpace())
2478       NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
2479     else
2480       NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
2481     auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
2482     auto NewCall =
2483         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
2484     return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
2485       ConvertType(E->getType())));
2486   }
2487
2488   // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
2489   // It contains four different overload formats specified in Table 6.13.17.1.
2490   case Builtin::BIenqueue_kernel: {
2491     StringRef Name; // Generated function call name
2492     unsigned NumArgs = E->getNumArgs();
2493
2494     llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
2495     llvm::Type *RangeTy = ConvertType(getContext().OCLNDRangeTy);
2496
2497     llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
2498     llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
2499     llvm::Value *Range = EmitScalarExpr(E->getArg(2));
2500
2501     if (NumArgs == 4) {
2502       // The most basic form of the call with parameters:
2503       // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
2504       Name = "__enqueue_kernel_basic";
2505       llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, Int8PtrTy};
2506       llvm::FunctionType *FTy = llvm::FunctionType::get(
2507           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false);
2508
2509       llvm::Value *Block =
2510           Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy);
2511
2512       return RValue::get(Builder.CreateCall(
2513           CGM.CreateRuntimeFunction(FTy, Name), {Queue, Flags, Range, Block}));
2514     }
2515     assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
2516
2517     // Could have events and/or vaargs.
2518     if (E->getArg(3)->getType()->isBlockPointerType()) {
2519       // No events passed, but has variadic arguments.
2520       Name = "__enqueue_kernel_vaargs";
2521       llvm::Value *Block =
2522           Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy);
2523       // Create a vector of the arguments, as well as a constant value to
2524       // express to the runtime the number of variadic arguments.
2525       std::vector<llvm::Value *> Args = {Queue, Flags, Range, Block,
2526                                          ConstantInt::get(IntTy, NumArgs - 4)};
2527       std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy, Int8PtrTy,
2528                                           IntTy};
2529
2530       // Each of the following arguments specifies the size of the corresponding
2531       // argument passed to the enqueued block.
2532       for (unsigned I = 4/*Position of the first size arg*/; I < NumArgs; ++I)
2533         Args.push_back(
2534             Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy));
2535
2536       llvm::FunctionType *FTy = llvm::FunctionType::get(
2537           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
2538       return RValue::get(
2539           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2540                              llvm::ArrayRef<llvm::Value *>(Args)));
2541     }
2542     // Any calls now have event arguments passed.
2543     if (NumArgs >= 7) {
2544       llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy);
2545       llvm::Type *EventPtrTy = EventTy->getPointerTo(
2546           CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
2547
2548       llvm::Value *NumEvents =
2549           Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
2550       llvm::Value *EventList =
2551           E->getArg(4)->getType()->isArrayType()
2552               ? EmitArrayToPointerDecay(E->getArg(4)).getPointer()
2553               : EmitScalarExpr(E->getArg(4));
2554       llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5));
2555       // Convert to generic address space.
2556       EventList = Builder.CreatePointerCast(EventList, EventPtrTy);
2557       ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy);
2558       llvm::Value *Block =
2559           Builder.CreateBitCast(EmitScalarExpr(E->getArg(6)), Int8PtrTy);
2560
2561       std::vector<llvm::Type *> ArgTys = {QueueTy,  Int32Ty,    RangeTy,
2562                                           Int32Ty,  EventPtrTy, EventPtrTy,
2563                                           Int8PtrTy};
2564
2565       std::vector<llvm::Value *> Args = {Queue,     Flags,    Range, NumEvents,
2566                                          EventList, ClkEvent, Block};
2567
2568       if (NumArgs == 7) {
2569         // Has events but no variadics.
2570         Name = "__enqueue_kernel_basic_events";
2571         llvm::FunctionType *FTy = llvm::FunctionType::get(
2572             Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2573         return RValue::get(
2574             Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2575                                llvm::ArrayRef<llvm::Value *>(Args)));
2576       }
2577       // Has event info and variadics
2578       // Pass the number of variadics to the runtime function too.
2579       Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
2580       ArgTys.push_back(Int32Ty);
2581       Name = "__enqueue_kernel_events_vaargs";
2582
2583       // Each of the following arguments specifies the size of the corresponding
2584       // argument passed to the enqueued block.
2585       for (unsigned I = 7/*Position of the first size arg*/; I < NumArgs; ++I)
2586         Args.push_back(
2587             Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy));
2588
2589       llvm::FunctionType *FTy = llvm::FunctionType::get(
2590           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
2591       return RValue::get(
2592           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2593                              llvm::ArrayRef<llvm::Value *>(Args)));
2594     }
2595   }
2596   // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
2597   // parameter.
2598   case Builtin::BIget_kernel_work_group_size: {
2599     Value *Arg = EmitScalarExpr(E->getArg(0));
2600     Arg = Builder.CreateBitCast(Arg, Int8PtrTy);
2601     return RValue::get(
2602         Builder.CreateCall(CGM.CreateRuntimeFunction(
2603                                llvm::FunctionType::get(IntTy, Int8PtrTy, false),
2604                                "__get_kernel_work_group_size_impl"),
2605                            Arg));
2606   }
2607   case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
2608     Value *Arg = EmitScalarExpr(E->getArg(0));
2609     Arg = Builder.CreateBitCast(Arg, Int8PtrTy);
2610     return RValue::get(Builder.CreateCall(
2611         CGM.CreateRuntimeFunction(
2612             llvm::FunctionType::get(IntTy, Int8PtrTy, false),
2613             "__get_kernel_preferred_work_group_multiple_impl"),
2614         Arg));
2615   }
2616   case Builtin::BIprintf:
2617     if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice)
2618       return EmitCUDADevicePrintfCallExpr(E, ReturnValue);
2619     break;
2620   case Builtin::BI__builtin_canonicalize:
2621   case Builtin::BI__builtin_canonicalizef:
2622   case Builtin::BI__builtin_canonicalizel:
2623     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
2624
2625   case Builtin::BI__builtin_thread_pointer: {
2626     if (!getContext().getTargetInfo().isTLSSupported())
2627       CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
2628     // Fall through - it's already mapped to the intrinsic by GCCBuiltin.
2629     break;
2630   }
2631   case Builtin::BI__builtin_os_log_format: {
2632     assert(E->getNumArgs() >= 2 &&
2633            "__builtin_os_log_format takes at least 2 arguments");
2634     analyze_os_log::OSLogBufferLayout Layout;
2635     analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout);
2636     Address BufAddr = EmitPointerWithAlignment(E->getArg(0));
2637     // Ignore argument 1, the format string. It is not currently used.
2638     CharUnits Offset;
2639     Builder.CreateStore(
2640         Builder.getInt8(Layout.getSummaryByte()),
2641         Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
2642     Builder.CreateStore(
2643         Builder.getInt8(Layout.getNumArgsByte()),
2644         Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
2645
2646     llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
2647     for (const auto &Item : Layout.Items) {
2648       Builder.CreateStore(
2649           Builder.getInt8(Item.getDescriptorByte()),
2650           Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
2651       Builder.CreateStore(
2652           Builder.getInt8(Item.getSizeByte()),
2653           Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
2654       Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset);
2655       if (const Expr *TheExpr = Item.getExpr()) {
2656         Addr = Builder.CreateElementBitCast(
2657             Addr, ConvertTypeForMem(TheExpr->getType()));
2658         // Check if this is a retainable type.
2659         if (TheExpr->getType()->isObjCRetainableType()) {
2660           assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
2661                  "Only scalar can be a ObjC retainable type");
2662           llvm::Value *SV = EmitScalarExpr(TheExpr, /*Ignore*/ false);
2663           RValue RV = RValue::get(SV);
2664           LValue LV = MakeAddrLValue(Addr, TheExpr->getType());
2665           EmitStoreThroughLValue(RV, LV);
2666           // Check if the object is constant, if not, save it in
2667           // RetainableOperands.
2668           if (!isa<Constant>(SV))
2669             RetainableOperands.push_back(SV);
2670         } else {
2671           EmitAnyExprToMem(TheExpr, Addr, Qualifiers(), /*isInit*/ true);
2672         }
2673       } else {
2674         Addr = Builder.CreateElementBitCast(Addr, Int32Ty);
2675         Builder.CreateStore(
2676             Builder.getInt32(Item.getConstValue().getQuantity()), Addr);
2677       }
2678       Offset += Item.size();
2679     }
2680
2681     // Push a clang.arc.use cleanup for each object in RetainableOperands. The
2682     // cleanup will cause the use to appear after the final log call, keeping
2683     // the object valid while it’s held in the log buffer.  Note that if there’s
2684     // a release cleanup on the object, it will already be active; since
2685     // cleanups are emitted in reverse order, the use will occur before the
2686     // object is released.
2687     if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount &&
2688         CGM.getCodeGenOpts().OptimizationLevel != 0)
2689       for (llvm::Value *object : RetainableOperands)
2690         pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), object);
2691
2692     return RValue::get(BufAddr.getPointer());
2693   }
2694
2695   case Builtin::BI__builtin_os_log_format_buffer_size: {
2696     analyze_os_log::OSLogBufferLayout Layout;
2697     analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout);
2698     return RValue::get(ConstantInt::get(ConvertType(E->getType()),
2699                                         Layout.size().getQuantity()));
2700   }
2701   }
2702
2703   // If this is an alias for a lib function (e.g. __builtin_sin), emit
2704   // the call using the normal call path, but using the unmangled
2705   // version of the function name.
2706   if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
2707     return emitLibraryCall(*this, FD, E,
2708                            CGM.getBuiltinLibFunction(FD, BuiltinID));
2709
2710   // If this is a predefined lib function (e.g. malloc), emit the call
2711   // using exactly the normal call path.
2712   if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
2713     return emitLibraryCall(*this, FD, E,
2714                       cast<llvm::Constant>(EmitScalarExpr(E->getCallee())));
2715
2716   // Check that a call to a target specific builtin has the correct target
2717   // features.
2718   // This is down here to avoid non-target specific builtins, however, if
2719   // generic builtins start to require generic target features then we
2720   // can move this up to the beginning of the function.
2721   checkTargetFeatures(E, FD);
2722
2723   // See if we have a target specific intrinsic.
2724   const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
2725   Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
2726   StringRef Prefix =
2727       llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
2728   if (!Prefix.empty()) {
2729     IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name);
2730     // NOTE we dont need to perform a compatibility flag check here since the
2731     // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
2732     // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
2733     if (IntrinsicID == Intrinsic::not_intrinsic)
2734       IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
2735   }
2736
2737   if (IntrinsicID != Intrinsic::not_intrinsic) {
2738     SmallVector<Value*, 16> Args;
2739
2740     // Find out if any arguments are required to be integer constant
2741     // expressions.
2742     unsigned ICEArguments = 0;
2743     ASTContext::GetBuiltinTypeError Error;
2744     getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
2745     assert(Error == ASTContext::GE_None && "Should not codegen an error");
2746
2747     Function *F = CGM.getIntrinsic(IntrinsicID);
2748     llvm::FunctionType *FTy = F->getFunctionType();
2749
2750     for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
2751       Value *ArgValue;
2752       // If this is a normal argument, just emit it as a scalar.
2753       if ((ICEArguments & (1 << i)) == 0) {
2754         ArgValue = EmitScalarExpr(E->getArg(i));
2755       } else {
2756         // If this is required to be a constant, constant fold it so that we
2757         // know that the generated intrinsic gets a ConstantInt.
2758         llvm::APSInt Result;
2759         bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
2760         assert(IsConst && "Constant arg isn't actually constant?");
2761         (void)IsConst;
2762         ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
2763       }
2764
2765       // If the intrinsic arg type is different from the builtin arg type
2766       // we need to do a bit cast.
2767       llvm::Type *PTy = FTy->getParamType(i);
2768       if (PTy != ArgValue->getType()) {
2769         assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
2770                "Must be able to losslessly bit cast to param");
2771         ArgValue = Builder.CreateBitCast(ArgValue, PTy);
2772       }
2773
2774       Args.push_back(ArgValue);
2775     }
2776
2777     Value *V = Builder.CreateCall(F, Args);
2778     QualType BuiltinRetType = E->getType();
2779
2780     llvm::Type *RetTy = VoidTy;
2781     if (!BuiltinRetType->isVoidType())
2782       RetTy = ConvertType(BuiltinRetType);
2783
2784     if (RetTy != V->getType()) {
2785       assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
2786              "Must be able to losslessly bit cast result type");
2787       V = Builder.CreateBitCast(V, RetTy);
2788     }
2789
2790     return RValue::get(V);
2791   }
2792
2793   // See if we have a target specific builtin that needs to be lowered.
2794   if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
2795     return RValue::get(V);
2796
2797   ErrorUnsupported(E, "builtin function");
2798
2799   // Unknown builtin, for now just dump it out and return undef.
2800   return GetUndefRValue(E->getType());
2801 }
2802
2803 static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
2804                                         unsigned BuiltinID, const CallExpr *E,
2805                                         llvm::Triple::ArchType Arch) {
2806   switch (Arch) {
2807   case llvm::Triple::arm:
2808   case llvm::Triple::armeb:
2809   case llvm::Triple::thumb:
2810   case llvm::Triple::thumbeb:
2811     return CGF->EmitARMBuiltinExpr(BuiltinID, E);
2812   case llvm::Triple::aarch64:
2813   case llvm::Triple::aarch64_be:
2814     return CGF->EmitAArch64BuiltinExpr(BuiltinID, E);
2815   case llvm::Triple::x86:
2816   case llvm::Triple::x86_64:
2817     return CGF->EmitX86BuiltinExpr(BuiltinID, E);
2818   case llvm::Triple::ppc:
2819   case llvm::Triple::ppc64:
2820   case llvm::Triple::ppc64le:
2821     return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
2822   case llvm::Triple::r600:
2823   case llvm::Triple::amdgcn:
2824     return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
2825   case llvm::Triple::systemz:
2826     return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
2827   case llvm::Triple::nvptx:
2828   case llvm::Triple::nvptx64:
2829     return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
2830   case llvm::Triple::wasm32:
2831   case llvm::Triple::wasm64:
2832     return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
2833   default:
2834     return nullptr;
2835   }
2836 }
2837
2838 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
2839                                               const CallExpr *E) {
2840   if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
2841     assert(getContext().getAuxTargetInfo() && "Missing aux target info");
2842     return EmitTargetArchBuiltinExpr(
2843         this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
2844         getContext().getAuxTargetInfo()->getTriple().getArch());
2845   }
2846
2847   return EmitTargetArchBuiltinExpr(this, BuiltinID, E,
2848                                    getTarget().getTriple().getArch());
2849 }
2850
2851 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
2852                                      NeonTypeFlags TypeFlags,
2853                                      bool V1Ty=false) {
2854   int IsQuad = TypeFlags.isQuad();
2855   switch (TypeFlags.getEltType()) {
2856   case NeonTypeFlags::Int8:
2857   case NeonTypeFlags::Poly8:
2858     return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
2859   case NeonTypeFlags::Int16:
2860   case NeonTypeFlags::Poly16:
2861   case NeonTypeFlags::Float16:
2862     return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
2863   case NeonTypeFlags::Int32:
2864     return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
2865   case NeonTypeFlags::Int64:
2866   case NeonTypeFlags::Poly64:
2867     return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
2868   case NeonTypeFlags::Poly128:
2869     // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
2870     // There is a lot of i128 and f128 API missing.
2871     // so we use v16i8 to represent poly128 and get pattern matched.
2872     return llvm::VectorType::get(CGF->Int8Ty, 16);
2873   case NeonTypeFlags::Float32:
2874     return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
2875   case NeonTypeFlags::Float64:
2876     return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
2877   }
2878   llvm_unreachable("Unknown vector element type!");
2879 }
2880
2881 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
2882                                           NeonTypeFlags IntTypeFlags) {
2883   int IsQuad = IntTypeFlags.isQuad();
2884   switch (IntTypeFlags.getEltType()) {
2885   case NeonTypeFlags::Int32:
2886     return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
2887   case NeonTypeFlags::Int64:
2888     return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad));
2889   default:
2890     llvm_unreachable("Type can't be converted to floating-point!");
2891   }
2892 }
2893
2894 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
2895   unsigned nElts = V->getType()->getVectorNumElements();
2896   Value* SV = llvm::ConstantVector::getSplat(nElts, C);
2897   return Builder.CreateShuffleVector(V, V, SV, "lane");
2898 }
2899
2900 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
2901                                      const char *name,
2902                                      unsigned shift, bool rightshift) {
2903   unsigned j = 0;
2904   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
2905        ai != ae; ++ai, ++j)
2906     if (shift > 0 && shift == j)
2907       Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
2908     else
2909       Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
2910
2911   return Builder.CreateCall(F, Ops, name);
2912 }
2913
2914 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
2915                                             bool neg) {
2916   int SV = cast<ConstantInt>(V)->getSExtValue();
2917   return ConstantInt::get(Ty, neg ? -SV : SV);
2918 }
2919
2920 // \brief Right-shift a vector by a constant.
2921 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
2922                                           llvm::Type *Ty, bool usgn,
2923                                           const char *name) {
2924   llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
2925
2926   int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
2927   int EltSize = VTy->getScalarSizeInBits();
2928
2929   Vec = Builder.CreateBitCast(Vec, Ty);
2930
2931   // lshr/ashr are undefined when the shift amount is equal to the vector
2932   // element size.
2933   if (ShiftAmt == EltSize) {
2934     if (usgn) {
2935       // Right-shifting an unsigned value by its size yields 0.
2936       return llvm::ConstantAggregateZero::get(VTy);
2937     } else {
2938       // Right-shifting a signed value by its size is equivalent
2939       // to a shift of size-1.
2940       --ShiftAmt;
2941       Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
2942     }
2943   }
2944
2945   Shift = EmitNeonShiftVector(Shift, Ty, false);
2946   if (usgn)
2947     return Builder.CreateLShr(Vec, Shift, name);
2948   else
2949     return Builder.CreateAShr(Vec, Shift, name);
2950 }
2951
2952 enum {
2953   AddRetType = (1 << 0),
2954   Add1ArgType = (1 << 1),
2955   Add2ArgTypes = (1 << 2),
2956
2957   VectorizeRetType = (1 << 3),
2958   VectorizeArgTypes = (1 << 4),
2959
2960   InventFloatType = (1 << 5),
2961   UnsignedAlts = (1 << 6),
2962
2963   Use64BitVectors = (1 << 7),
2964   Use128BitVectors = (1 << 8),
2965
2966   Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
2967   VectorRet = AddRetType | VectorizeRetType,
2968   VectorRetGetArgs01 =
2969       AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
2970   FpCmpzModifiers =
2971       AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
2972 };
2973
2974 namespace {
2975 struct NeonIntrinsicInfo {
2976   const char *NameHint;
2977   unsigned BuiltinID;
2978   unsigned LLVMIntrinsic;
2979   unsigned AltLLVMIntrinsic;
2980   unsigned TypeModifier;
2981
2982   bool operator<(unsigned RHSBuiltinID) const {
2983     return BuiltinID < RHSBuiltinID;
2984   }
2985   bool operator<(const NeonIntrinsicInfo &TE) const {
2986     return BuiltinID < TE.BuiltinID;
2987   }
2988 };
2989 } // end anonymous namespace
2990
2991 #define NEONMAP0(NameBase) \
2992   { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
2993
2994 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
2995   { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
2996       Intrinsic::LLVMIntrinsic, 0, TypeModifier }
2997
2998 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
2999   { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
3000       Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
3001       TypeModifier }
3002
3003 static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
3004   NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
3005   NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
3006   NEONMAP1(vabs_v, arm_neon_vabs, 0),
3007   NEONMAP1(vabsq_v, arm_neon_vabs, 0),
3008   NEONMAP0(vaddhn_v),
3009   NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
3010   NEONMAP1(vaeseq_v, arm_neon_aese, 0),
3011   NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
3012   NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
3013   NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
3014   NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
3015   NEONMAP1(vcage_v, arm_neon_vacge, 0),
3016   NEONMAP1(vcageq_v, arm_neon_vacge, 0),
3017   NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
3018   NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
3019   NEONMAP1(vcale_v, arm_neon_vacge, 0),
3020   NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
3021   NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
3022   NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
3023   NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
3024   NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
3025   NEONMAP1(vclz_v, ctlz, Add1ArgType),
3026   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
3027   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
3028   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
3029   NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
3030   NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
3031   NEONMAP0(vcvt_f32_v),
3032   NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3033   NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
3034   NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
3035   NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
3036   NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
3037   NEONMAP0(vcvt_s32_v),
3038   NEONMAP0(vcvt_s64_v),
3039   NEONMAP0(vcvt_u32_v),
3040   NEONMAP0(vcvt_u64_v),
3041   NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
3042   NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
3043   NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
3044   NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
3045   NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
3046   NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
3047   NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
3048   NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
3049   NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
3050   NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
3051   NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
3052   NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
3053   NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
3054   NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
3055   NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
3056   NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
3057   NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
3058   NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
3059   NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
3060   NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
3061   NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
3062   NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
3063   NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
3064   NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
3065   NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
3066   NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
3067   NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
3068   NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
3069   NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
3070   NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
3071   NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
3072   NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
3073   NEONMAP0(vcvtq_f32_v),
3074   NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3075   NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
3076   NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
3077   NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
3078   NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
3079   NEONMAP0(vcvtq_s32_v),
3080   NEONMAP0(vcvtq_s64_v),
3081   NEONMAP0(vcvtq_u32_v),
3082   NEONMAP0(vcvtq_u64_v),
3083   NEONMAP0(vext_v),
3084   NEONMAP0(vextq_v),
3085   NEONMAP0(vfma_v),
3086   NEONMAP0(vfmaq_v),
3087   NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
3088   NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
3089   NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
3090   NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
3091   NEONMAP0(vld1_dup_v),
3092   NEONMAP1(vld1_v, arm_neon_vld1, 0),
3093   NEONMAP0(vld1q_dup_v),
3094   NEONMAP1(vld1q_v, arm_neon_vld1, 0),
3095   NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
3096   NEONMAP1(vld2_v, arm_neon_vld2, 0),
3097   NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
3098   NEONMAP1(vld2q_v, arm_neon_vld2, 0),
3099   NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
3100   NEONMAP1(vld3_v, arm_neon_vld3, 0),
3101   NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
3102   NEONMAP1(vld3q_v, arm_neon_vld3, 0),
3103   NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
3104   NEONMAP1(vld4_v, arm_neon_vld4, 0),
3105   NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
3106   NEONMAP1(vld4q_v, arm_neon_vld4, 0),
3107   NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
3108   NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
3109   NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
3110   NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
3111   NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
3112   NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
3113   NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
3114   NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
3115   NEONMAP0(vmovl_v),
3116   NEONMAP0(vmovn_v),
3117   NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
3118   NEONMAP0(vmull_v),
3119   NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
3120   NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
3121   NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
3122   NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
3123   NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
3124   NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
3125   NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
3126   NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
3127   NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
3128   NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
3129   NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
3130   NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
3131   NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
3132   NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0),
3133   NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0),
3134   NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
3135   NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
3136   NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
3137   NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
3138   NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
3139   NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
3140   NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
3141   NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
3142   NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
3143   NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
3144   NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
3145   NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
3146   NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
3147   NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
3148   NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
3149   NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
3150   NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
3151   NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
3152   NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
3153   NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
3154   NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
3155   NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
3156   NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
3157   NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
3158   NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
3159   NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
3160   NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
3161   NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
3162   NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
3163   NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
3164   NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
3165   NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
3166   NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
3167   NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
3168   NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
3169   NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
3170   NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
3171   NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
3172   NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
3173   NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
3174   NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
3175   NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
3176   NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
3177   NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
3178   NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
3179   NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
3180   NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
3181   NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
3182   NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
3183   NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
3184   NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
3185   NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
3186   NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
3187   NEONMAP0(vshl_n_v),
3188   NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
3189   NEONMAP0(vshll_n_v),
3190   NEONMAP0(vshlq_n_v),
3191   NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
3192   NEONMAP0(vshr_n_v),
3193   NEONMAP0(vshrn_n_v),
3194   NEONMAP0(vshrq_n_v),
3195   NEONMAP1(vst1_v, arm_neon_vst1, 0),
3196   NEONMAP1(vst1q_v, arm_neon_vst1, 0),
3197   NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
3198   NEONMAP1(vst2_v, arm_neon_vst2, 0),
3199   NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
3200   NEONMAP1(vst2q_v, arm_neon_vst2, 0),
3201   NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
3202   NEONMAP1(vst3_v, arm_neon_vst3, 0),
3203   NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
3204   NEONMAP1(vst3q_v, arm_neon_vst3, 0),
3205   NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
3206   NEONMAP1(vst4_v, arm_neon_vst4, 0),
3207   NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
3208   NEONMAP1(vst4q_v, arm_neon_vst4, 0),
3209   NEONMAP0(vsubhn_v),
3210   NEONMAP0(vtrn_v),
3211   NEONMAP0(vtrnq_v),
3212   NEONMAP0(vtst_v),
3213   NEONMAP0(vtstq_v),
3214   NEONMAP0(vuzp_v),
3215   NEONMAP0(vuzpq_v),
3216   NEONMAP0(vzip_v),
3217   NEONMAP0(vzipq_v)
3218 };
3219
3220 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
3221   NEONMAP1(vabs_v, aarch64_neon_abs, 0),
3222   NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
3223   NEONMAP0(vaddhn_v),
3224   NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
3225   NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
3226   NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
3227   NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
3228   NEONMAP1(vcage_v, aarch64_neon_facge, 0),
3229   NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
3230   NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
3231   NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
3232   NEONMAP1(vcale_v, aarch64_neon_facge, 0),
3233   NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
3234   NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
3235   NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
3236   NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
3237   NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
3238   NEONMAP1(vclz_v, ctlz, Add1ArgType),
3239   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
3240   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
3241   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
3242   NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
3243   NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
3244   NEONMAP0(vcvt_f32_v),
3245   NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3246   NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3247   NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
3248   NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
3249   NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
3250   NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
3251   NEONMAP0(vcvtq_f32_v),
3252   NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3253   NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3254   NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
3255   NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
3256   NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
3257   NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
3258   NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
3259   NEONMAP0(vext_v),
3260   NEONMAP0(vextq_v),
3261   NEONMAP0(vfma_v),
3262   NEONMAP0(vfmaq_v),
3263   NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
3264   NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
3265   NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
3266   NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
3267   NEONMAP0(vmovl_v),
3268   NEONMAP0(vmovn_v),
3269   NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
3270   NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
3271   NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
3272   NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
3273   NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
3274   NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
3275   NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
3276   NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
3277   NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
3278   NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
3279   NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
3280   NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
3281   NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
3282   NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
3283   NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
3284   NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
3285   NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
3286   NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
3287   NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
3288   NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
3289   NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
3290   NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
3291   NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
3292   NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
3293   NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
3294   NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
3295   NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
3296   NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
3297   NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
3298   NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
3299   NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
3300   NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
3301   NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
3302   NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
3303   NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
3304   NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
3305   NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
3306   NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
3307   NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
3308   NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
3309   NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
3310   NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
3311   NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
3312   NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
3313   NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
3314   NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
3315   NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
3316   NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
3317   NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
3318   NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
3319   NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
3320   NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
3321   NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
3322   NEONMAP0(vshl_n_v),
3323   NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3324   NEONMAP0(vshll_n_v),
3325   NEONMAP0(vshlq_n_v),
3326   NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3327   NEONMAP0(vshr_n_v),
3328   NEONMAP0(vshrn_n_v),
3329   NEONMAP0(vshrq_n_v),
3330   NEONMAP0(vsubhn_v),
3331   NEONMAP0(vtst_v),
3332   NEONMAP0(vtstq_v),
3333 };
3334
3335 static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
3336   NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
3337   NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
3338   NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
3339   NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3340   NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3341   NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3342   NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3343   NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3344   NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3345   NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3346   NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3347   NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
3348   NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3349   NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
3350   NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3351   NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3352   NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3353   NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3354   NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3355   NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3356   NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3357   NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3358   NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3359   NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3360   NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3361   NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3362   NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3363   NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3364   NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3365   NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3366   NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3367   NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3368   NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3369   NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3370   NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3371   NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3372   NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3373   NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3374   NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3375   NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3376   NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3377   NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3378   NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3379   NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3380   NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3381   NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3382   NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3383   NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3384   NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
3385   NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3386   NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3387   NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3388   NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3389   NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3390   NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3391   NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3392   NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3393   NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3394   NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3395   NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3396   NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3397   NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3398   NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3399   NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3400   NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3401   NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3402   NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3403   NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3404   NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3405   NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
3406   NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
3407   NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
3408   NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3409   NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3410   NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3411   NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3412   NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3413   NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3414   NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3415   NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3416   NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3417   NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3418   NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3419   NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
3420   NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3421   NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
3422   NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3423   NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3424   NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
3425   NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
3426   NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3427   NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3428   NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
3429   NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
3430   NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
3431   NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
3432   NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
3433   NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
3434   NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
3435   NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
3436   NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3437   NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3438   NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3439   NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3440   NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
3441   NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3442   NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3443   NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3444   NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
3445   NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3446   NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
3447   NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
3448   NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
3449   NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3450   NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3451   NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
3452   NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
3453   NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3454   NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3455   NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
3456   NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
3457   NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
3458   NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
3459   NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3460   NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3461   NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3462   NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3463   NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
3464   NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3465   NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3466   NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3467   NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3468   NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3469   NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3470   NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
3471   NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
3472   NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3473   NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3474   NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3475   NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3476   NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
3477   NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
3478   NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
3479   NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
3480   NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3481   NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3482   NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
3483   NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
3484   NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
3485   NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3486   NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3487   NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3488   NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3489   NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
3490   NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3491   NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3492   NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3493   NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3494   NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
3495   NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
3496   NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3497   NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3498   NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
3499   NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
3500   NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
3501   NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
3502   NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
3503   NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
3504   NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
3505   NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
3506   NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
3507   NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
3508   NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
3509   NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
3510   NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
3511   NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
3512   NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
3513   NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
3514   NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
3515   NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
3516   NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
3517   NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
3518   NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3519   NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
3520   NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3521   NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
3522   NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
3523   NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
3524   NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3525   NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
3526   NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3527   NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
3528 };
3529
3530 #undef NEONMAP0
3531 #undef NEONMAP1
3532 #undef NEONMAP2
3533
3534 static bool NEONSIMDIntrinsicsProvenSorted = false;
3535
3536 static bool AArch64SIMDIntrinsicsProvenSorted = false;
3537 static bool AArch64SISDIntrinsicsProvenSorted = false;
3538
3539
3540 static const NeonIntrinsicInfo *
3541 findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap,
3542                        unsigned BuiltinID, bool &MapProvenSorted) {
3543
3544 #ifndef NDEBUG
3545   if (!MapProvenSorted) {
3546     assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap)));
3547     MapProvenSorted = true;
3548   }
3549 #endif
3550
3551   const NeonIntrinsicInfo *Builtin =
3552       std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID);
3553
3554   if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
3555     return Builtin;
3556
3557   return nullptr;
3558 }
3559
3560 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
3561                                                    unsigned Modifier,
3562                                                    llvm::Type *ArgType,
3563                                                    const CallExpr *E) {
3564   int VectorSize = 0;
3565   if (Modifier & Use64BitVectors)
3566     VectorSize = 64;
3567   else if (Modifier & Use128BitVectors)
3568     VectorSize = 128;
3569
3570   // Return type.
3571   SmallVector<llvm::Type *, 3> Tys;
3572   if (Modifier & AddRetType) {
3573     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
3574     if (Modifier & VectorizeRetType)
3575       Ty = llvm::VectorType::get(
3576           Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
3577
3578     Tys.push_back(Ty);
3579   }
3580
3581   // Arguments.
3582   if (Modifier & VectorizeArgTypes) {
3583     int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
3584     ArgType = llvm::VectorType::get(ArgType, Elts);
3585   }
3586
3587   if (Modifier & (Add1ArgType | Add2ArgTypes))
3588     Tys.push_back(ArgType);
3589
3590   if (Modifier & Add2ArgTypes)
3591     Tys.push_back(ArgType);
3592
3593   if (Modifier & InventFloatType)
3594     Tys.push_back(FloatTy);
3595
3596   return CGM.getIntrinsic(IntrinsicID, Tys);
3597 }
3598
3599 static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF,
3600                                             const NeonIntrinsicInfo &SISDInfo,
3601                                             SmallVectorImpl<Value *> &Ops,
3602                                             const CallExpr *E) {
3603   unsigned BuiltinID = SISDInfo.BuiltinID;
3604   unsigned int Int = SISDInfo.LLVMIntrinsic;
3605   unsigned Modifier = SISDInfo.TypeModifier;
3606   const char *s = SISDInfo.NameHint;
3607
3608   switch (BuiltinID) {
3609   case NEON::BI__builtin_neon_vcled_s64:
3610   case NEON::BI__builtin_neon_vcled_u64:
3611   case NEON::BI__builtin_neon_vcles_f32:
3612   case NEON::BI__builtin_neon_vcled_f64:
3613   case NEON::BI__builtin_neon_vcltd_s64:
3614   case NEON::BI__builtin_neon_vcltd_u64:
3615   case NEON::BI__builtin_neon_vclts_f32:
3616   case NEON::BI__builtin_neon_vcltd_f64:
3617   case NEON::BI__builtin_neon_vcales_f32:
3618   case NEON::BI__builtin_neon_vcaled_f64:
3619   case NEON::BI__builtin_neon_vcalts_f32:
3620   case NEON::BI__builtin_neon_vcaltd_f64:
3621     // Only one direction of comparisons actually exist, cmle is actually a cmge
3622     // with swapped operands. The table gives us the right intrinsic but we
3623     // still need to do the swap.
3624     std::swap(Ops[0], Ops[1]);
3625     break;
3626   }
3627
3628   assert(Int && "Generic code assumes a valid intrinsic");
3629
3630   // Determine the type(s) of this overloaded AArch64 intrinsic.
3631   const Expr *Arg = E->getArg(0);
3632   llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
3633   Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
3634
3635   int j = 0;
3636   ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
3637   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
3638        ai != ae; ++ai, ++j) {
3639     llvm::Type *ArgTy = ai->getType();
3640     if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
3641              ArgTy->getPrimitiveSizeInBits())
3642       continue;
3643
3644     assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
3645     // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
3646     // it before inserting.
3647     Ops[j] =
3648         CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType());
3649     Ops[j] =
3650         CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
3651   }
3652
3653   Value *Result = CGF.EmitNeonCall(F, Ops, s);
3654   llvm::Type *ResultType = CGF.ConvertType(E->getType());
3655   if (ResultType->getPrimitiveSizeInBits() <
3656       Result->getType()->getPrimitiveSizeInBits())
3657     return CGF.Builder.CreateExtractElement(Result, C0);
3658
3659   return CGF.Builder.CreateBitCast(Result, ResultType, s);
3660 }
3661
3662 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
3663     unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
3664     const char *NameHint, unsigned Modifier, const CallExpr *E,
3665     SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1) {
3666   // Get the last argument, which specifies the vector type.
3667   llvm::APSInt NeonTypeConst;
3668   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
3669   if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
3670     return nullptr;
3671
3672   // Determine the type of this overloaded NEON intrinsic.
3673   NeonTypeFlags Type(NeonTypeConst.getZExtValue());
3674   bool Usgn = Type.isUnsigned();
3675   bool Quad = Type.isQuad();
3676
3677   llvm::VectorType *VTy = GetNeonType(this, Type);
3678   llvm::Type *Ty = VTy;
3679   if (!Ty)
3680     return nullptr;
3681
3682   auto getAlignmentValue32 = [&](Address addr) -> Value* {
3683     return Builder.getInt32(addr.getAlignment().getQuantity());
3684   };
3685
3686   unsigned Int = LLVMIntrinsic;
3687   if ((Modifier & UnsignedAlts) && !Usgn)
3688     Int = AltLLVMIntrinsic;
3689
3690   switch (BuiltinID) {
3691   default: break;
3692   case NEON::BI__builtin_neon_vabs_v:
3693   case NEON::BI__builtin_neon_vabsq_v:
3694     if (VTy->getElementType()->isFloatingPointTy())
3695       return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
3696     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
3697   case NEON::BI__builtin_neon_vaddhn_v: {
3698     llvm::VectorType *SrcTy =
3699         llvm::VectorType::getExtendedElementVectorType(VTy);
3700
3701     // %sum = add <4 x i32> %lhs, %rhs
3702     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3703     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
3704     Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
3705
3706     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
3707     Constant *ShiftAmt =
3708         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
3709     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
3710
3711     // %res = trunc <4 x i32> %high to <4 x i16>
3712     return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
3713   }
3714   case NEON::BI__builtin_neon_vcale_v:
3715   case NEON::BI__builtin_neon_vcaleq_v:
3716   case NEON::BI__builtin_neon_vcalt_v:
3717   case NEON::BI__builtin_neon_vcaltq_v:
3718     std::swap(Ops[0], Ops[1]);
3719   case NEON::BI__builtin_neon_vcage_v:
3720   case NEON::BI__builtin_neon_vcageq_v:
3721   case NEON::BI__builtin_neon_vcagt_v:
3722   case NEON::BI__builtin_neon_vcagtq_v: {
3723     llvm::Type *VecFlt = llvm::VectorType::get(
3724         VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy,
3725         VTy->getNumElements());
3726     llvm::Type *Tys[] = { VTy, VecFlt };
3727     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3728     return EmitNeonCall(F, Ops, NameHint);
3729   }
3730   case NEON::BI__builtin_neon_vclz_v:
3731   case NEON::BI__builtin_neon_vclzq_v:
3732     // We generate target-independent intrinsic, which needs a second argument
3733     // for whether or not clz of zero is undefined; on ARM it isn't.
3734     Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
3735     break;
3736   case NEON::BI__builtin_neon_vcvt_f32_v:
3737   case NEON::BI__builtin_neon_vcvtq_f32_v:
3738     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3739     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad));
3740     return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
3741                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
3742   case NEON::BI__builtin_neon_vcvt_n_f32_v:
3743   case NEON::BI__builtin_neon_vcvt_n_f64_v:
3744   case NEON::BI__builtin_neon_vcvtq_n_f32_v:
3745   case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
3746     llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
3747     Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
3748     Function *F = CGM.getIntrinsic(Int, Tys);
3749     return EmitNeonCall(F, Ops, "vcvt_n");
3750   }
3751   case NEON::BI__builtin_neon_vcvt_n_s32_v:
3752   case NEON::BI__builtin_neon_vcvt_n_u32_v:
3753   case NEON::BI__builtin_neon_vcvt_n_s64_v:
3754   case NEON::BI__builtin_neon_vcvt_n_u64_v:
3755   case NEON::BI__builtin_neon_vcvtq_n_s32_v:
3756   case NEON::BI__builtin_neon_vcvtq_n_u32_v:
3757   case NEON::BI__builtin_neon_vcvtq_n_s64_v:
3758   case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
3759     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
3760     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3761     return EmitNeonCall(F, Ops, "vcvt_n");
3762   }
3763   case NEON::BI__builtin_neon_vcvt_s32_v:
3764   case NEON::BI__builtin_neon_vcvt_u32_v:
3765   case NEON::BI__builtin_neon_vcvt_s64_v:
3766   case NEON::BI__builtin_neon_vcvt_u64_v:
3767   case NEON::BI__builtin_neon_vcvtq_s32_v:
3768   case NEON::BI__builtin_neon_vcvtq_u32_v:
3769   case NEON::BI__builtin_neon_vcvtq_s64_v:
3770   case NEON::BI__builtin_neon_vcvtq_u64_v: {
3771     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
3772     return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
3773                 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
3774   }
3775   case NEON::BI__builtin_neon_vcvta_s32_v:
3776   case NEON::BI__builtin_neon_vcvta_s64_v:
3777   case NEON::BI__builtin_neon_vcvta_u32_v:
3778   case NEON::BI__builtin_neon_vcvta_u64_v:
3779   case NEON::BI__builtin_neon_vcvtaq_s32_v:
3780   case NEON::BI__builtin_neon_vcvtaq_s64_v:
3781   case NEON::BI__builtin_neon_vcvtaq_u32_v:
3782   case NEON::BI__builtin_neon_vcvtaq_u64_v:
3783   case NEON::BI__builtin_neon_vcvtn_s32_v:
3784   case NEON::BI__builtin_neon_vcvtn_s64_v:
3785   case NEON::BI__builtin_neon_vcvtn_u32_v:
3786   case NEON::BI__builtin_neon_vcvtn_u64_v:
3787   case NEON::BI__builtin_neon_vcvtnq_s32_v:
3788   case NEON::BI__builtin_neon_vcvtnq_s64_v:
3789   case NEON::BI__builtin_neon_vcvtnq_u32_v:
3790   case NEON::BI__builtin_neon_vcvtnq_u64_v:
3791   case NEON::BI__builtin_neon_vcvtp_s32_v:
3792   case NEON::BI__builtin_neon_vcvtp_s64_v:
3793   case NEON::BI__builtin_neon_vcvtp_u32_v:
3794   case NEON::BI__builtin_neon_vcvtp_u64_v:
3795   case NEON::BI__builtin_neon_vcvtpq_s32_v:
3796   case NEON::BI__builtin_neon_vcvtpq_s64_v:
3797   case NEON::BI__builtin_neon_vcvtpq_u32_v:
3798   case NEON::BI__builtin_neon_vcvtpq_u64_v:
3799   case NEON::BI__builtin_neon_vcvtm_s32_v:
3800   case NEON::BI__builtin_neon_vcvtm_s64_v:
3801   case NEON::BI__builtin_neon_vcvtm_u32_v:
3802   case NEON::BI__builtin_neon_vcvtm_u64_v:
3803   case NEON::BI__builtin_neon_vcvtmq_s32_v:
3804   case NEON::BI__builtin_neon_vcvtmq_s64_v:
3805   case NEON::BI__builtin_neon_vcvtmq_u32_v:
3806   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
3807     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
3808     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
3809   }
3810   case NEON::BI__builtin_neon_vext_v:
3811   case NEON::BI__builtin_neon_vextq_v: {
3812     int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
3813     SmallVector<uint32_t, 16> Indices;
3814     for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
3815       Indices.push_back(i+CV);
3816
3817     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3818     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3819     return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
3820   }
3821   case NEON::BI__builtin_neon_vfma_v:
3822   case NEON::BI__builtin_neon_vfmaq_v: {
3823     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
3824     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3825     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3826     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3827
3828     // NEON intrinsic puts accumulator first, unlike the LLVM fma.
3829     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
3830   }
3831   case NEON::BI__builtin_neon_vld1_v:
3832   case NEON::BI__builtin_neon_vld1q_v: {
3833     llvm::Type *Tys[] = {Ty, Int8PtrTy};
3834     Ops.push_back(getAlignmentValue32(PtrOp0));
3835     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
3836   }
3837   case NEON::BI__builtin_neon_vld2_v:
3838   case NEON::BI__builtin_neon_vld2q_v:
3839   case NEON::BI__builtin_neon_vld3_v:
3840   case NEON::BI__builtin_neon_vld3q_v:
3841   case NEON::BI__builtin_neon_vld4_v:
3842   case NEON::BI__builtin_neon_vld4q_v: {
3843     llvm::Type *Tys[] = {Ty, Int8PtrTy};
3844     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3845     Value *Align = getAlignmentValue32(PtrOp1);
3846     Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
3847     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3848     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3849     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
3850   }
3851   case NEON::BI__builtin_neon_vld1_dup_v:
3852   case NEON::BI__builtin_neon_vld1q_dup_v: {
3853     Value *V = UndefValue::get(Ty);
3854     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
3855     PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty);
3856     LoadInst *Ld = Builder.CreateLoad(PtrOp0);
3857     llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
3858     Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
3859     return EmitNeonSplat(Ops[0], CI);
3860   }
3861   case NEON::BI__builtin_neon_vld2_lane_v:
3862   case NEON::BI__builtin_neon_vld2q_lane_v:
3863   case NEON::BI__builtin_neon_vld3_lane_v:
3864   case NEON::BI__builtin_neon_vld3q_lane_v:
3865   case NEON::BI__builtin_neon_vld4_lane_v:
3866   case NEON::BI__builtin_neon_vld4q_lane_v: {
3867     llvm::Type *Tys[] = {Ty, Int8PtrTy};
3868     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3869     for (unsigned I = 2; I < Ops.size() - 1; ++I)
3870       Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
3871     Ops.push_back(getAlignmentValue32(PtrOp1));
3872     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
3873     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3874     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3875     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
3876   }
3877   case NEON::BI__builtin_neon_vmovl_v: {
3878     llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
3879     Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
3880     if (Usgn)
3881       return Builder.CreateZExt(Ops[0], Ty, "vmovl");
3882     return Builder.CreateSExt(Ops[0], Ty, "vmovl");
3883   }
3884   case NEON::BI__builtin_neon_vmovn_v: {
3885     llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
3886     Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
3887     return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
3888   }
3889   case NEON::BI__builtin_neon_vmull_v:
3890     // FIXME: the integer vmull operations could be emitted in terms of pure
3891     // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
3892     // hoisting the exts outside loops. Until global ISel comes along that can
3893     // see through such movement this leads to bad CodeGen. So we need an
3894     // intrinsic for now.
3895     Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
3896     Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
3897     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
3898   case NEON::BI__builtin_neon_vpadal_v:
3899   case NEON::BI__builtin_neon_vpadalq_v: {
3900     // The source operand type has twice as many elements of half the size.
3901     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
3902     llvm::Type *EltTy =
3903       llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
3904     llvm::Type *NarrowTy =
3905       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
3906     llvm::Type *Tys[2] = { Ty, NarrowTy };
3907     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
3908   }
3909   case NEON::BI__builtin_neon_vpaddl_v:
3910   case NEON::BI__builtin_neon_vpaddlq_v: {
3911     // The source operand type has twice as many elements of half the size.
3912     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
3913     llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
3914     llvm::Type *NarrowTy =
3915       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
3916     llvm::Type *Tys[2] = { Ty, NarrowTy };
3917     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
3918   }
3919   case NEON::BI__builtin_neon_vqdmlal_v:
3920   case NEON::BI__builtin_neon_vqdmlsl_v: {
3921     SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
3922     Ops[1] =
3923         EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
3924     Ops.resize(2);
3925     return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
3926   }
3927   case NEON::BI__builtin_neon_vqshl_n_v:
3928   case NEON::BI__builtin_neon_vqshlq_n_v:
3929     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
3930                         1, false);
3931   case NEON::BI__builtin_neon_vqshlu_n_v:
3932   case NEON::BI__builtin_neon_vqshluq_n_v:
3933     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
3934                         1, false);
3935   case NEON::BI__builtin_neon_vrecpe_v:
3936   case NEON::BI__builtin_neon_vrecpeq_v:
3937   case NEON::BI__builtin_neon_vrsqrte_v:
3938   case NEON::BI__builtin_neon_vrsqrteq_v:
3939     Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
3940     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
3941
3942   case NEON::BI__builtin_neon_vrshr_n_v:
3943   case NEON::BI__builtin_neon_vrshrq_n_v:
3944     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
3945                         1, true);
3946   case NEON::BI__builtin_neon_vshl_n_v:
3947   case NEON::BI__builtin_neon_vshlq_n_v:
3948     Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
3949     return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
3950                              "vshl_n");
3951   case NEON::BI__builtin_neon_vshll_n_v: {
3952     llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
3953     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3954     if (Usgn)
3955       Ops[0] = Builder.CreateZExt(Ops[0], VTy);
3956     else
3957       Ops[0] = Builder.CreateSExt(Ops[0], VTy);
3958     Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
3959     return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
3960   }
3961   case NEON::BI__builtin_neon_vshrn_n_v: {
3962     llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
3963     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3964     Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
3965     if (Usgn)
3966       Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
3967     else
3968       Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
3969     return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
3970   }
3971   case NEON::BI__builtin_neon_vshr_n_v:
3972   case NEON::BI__builtin_neon_vshrq_n_v:
3973     return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
3974   case NEON::BI__builtin_neon_vst1_v:
3975   case NEON::BI__builtin_neon_vst1q_v:
3976   case NEON::BI__builtin_neon_vst2_v:
3977   case NEON::BI__builtin_neon_vst2q_v:
3978   case NEON::BI__builtin_neon_vst3_v:
3979   case NEON::BI__builtin_neon_vst3q_v:
3980   case NEON::BI__builtin_neon_vst4_v:
3981   case NEON::BI__builtin_neon_vst4q_v:
3982   case NEON::BI__builtin_neon_vst2_lane_v:
3983   case NEON::BI__builtin_neon_vst2q_lane_v:
3984   case NEON::BI__builtin_neon_vst3_lane_v:
3985   case NEON::BI__builtin_neon_vst3q_lane_v:
3986   case NEON::BI__builtin_neon_vst4_lane_v:
3987   case NEON::BI__builtin_neon_vst4q_lane_v: {
3988     llvm::Type *Tys[] = {Int8PtrTy, Ty};
3989     Ops.push_back(getAlignmentValue32(PtrOp0));
3990     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
3991   }
3992   case NEON::BI__builtin_neon_vsubhn_v: {
3993     llvm::VectorType *SrcTy =
3994         llvm::VectorType::getExtendedElementVectorType(VTy);
3995
3996     // %sum = add <4 x i32> %lhs, %rhs
3997     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3998     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
3999     Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
4000
4001     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
4002     Constant *ShiftAmt =
4003         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
4004     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
4005
4006     // %res = trunc <4 x i32> %high to <4 x i16>
4007     return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
4008   }
4009   case NEON::BI__builtin_neon_vtrn_v:
4010   case NEON::BI__builtin_neon_vtrnq_v: {
4011     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4012     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4013     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4014     Value *SV = nullptr;
4015
4016     for (unsigned vi = 0; vi != 2; ++vi) {
4017       SmallVector<uint32_t, 16> Indices;
4018       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
4019         Indices.push_back(i+vi);
4020         Indices.push_back(i+e+vi);
4021       }
4022       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4023       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
4024       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4025     }
4026     return SV;
4027   }
4028   case NEON::BI__builtin_neon_vtst_v:
4029   case NEON::BI__builtin_neon_vtstq_v: {
4030     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4031     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4032     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
4033     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
4034                                 ConstantAggregateZero::get(Ty));
4035     return Builder.CreateSExt(Ops[0], Ty, "vtst");
4036   }
4037   case NEON::BI__builtin_neon_vuzp_v:
4038   case NEON::BI__builtin_neon_vuzpq_v: {
4039     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4040     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4041     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4042     Value *SV = nullptr;
4043
4044     for (unsigned vi = 0; vi != 2; ++vi) {
4045       SmallVector<uint32_t, 16> Indices;
4046       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
4047         Indices.push_back(2*i+vi);
4048
4049       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4050       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
4051       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4052     }
4053     return SV;
4054   }
4055   case NEON::BI__builtin_neon_vzip_v:
4056   case NEON::BI__builtin_neon_vzipq_v: {
4057     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4058     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4059     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4060     Value *SV = nullptr;
4061
4062     for (unsigned vi = 0; vi != 2; ++vi) {
4063       SmallVector<uint32_t, 16> Indices;
4064       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
4065         Indices.push_back((i + vi*e) >> 1);
4066         Indices.push_back(((i + vi*e) >> 1)+e);
4067       }
4068       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4069       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
4070       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4071     }
4072     return SV;
4073   }
4074   }
4075
4076   assert(Int && "Expected valid intrinsic number");
4077
4078   // Determine the type(s) of this overloaded AArch64 intrinsic.
4079   Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
4080
4081   Value *Result = EmitNeonCall(F, Ops, NameHint);
4082   llvm::Type *ResultType = ConvertType(E->getType());
4083   // AArch64 intrinsic one-element vector type cast to
4084   // scalar type expected by the builtin
4085   return Builder.CreateBitCast(Result, ResultType, NameHint);
4086 }
4087
4088 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
4089     Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
4090     const CmpInst::Predicate Ip, const Twine &Name) {
4091   llvm::Type *OTy = Op->getType();
4092
4093   // FIXME: this is utterly horrific. We should not be looking at previous
4094   // codegen context to find out what needs doing. Unfortunately TableGen
4095   // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
4096   // (etc).
4097   if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
4098     OTy = BI->getOperand(0)->getType();
4099
4100   Op = Builder.CreateBitCast(Op, OTy);
4101   if (OTy->getScalarType()->isFloatingPointTy()) {
4102     Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
4103   } else {
4104     Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
4105   }
4106   return Builder.CreateSExt(Op, Ty, Name);
4107 }
4108
4109 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
4110                                  Value *ExtOp, Value *IndexOp,
4111                                  llvm::Type *ResTy, unsigned IntID,
4112                                  const char *Name) {
4113   SmallVector<Value *, 2> TblOps;
4114   if (ExtOp)
4115     TblOps.push_back(ExtOp);
4116
4117   // Build a vector containing sequential number like (0, 1, 2, ..., 15)
4118   SmallVector<uint32_t, 16> Indices;
4119   llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
4120   for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
4121     Indices.push_back(2*i);
4122     Indices.push_back(2*i+1);
4123   }
4124
4125   int PairPos = 0, End = Ops.size() - 1;
4126   while (PairPos < End) {
4127     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
4128                                                      Ops[PairPos+1], Indices,
4129                                                      Name));
4130     PairPos += 2;
4131   }
4132
4133   // If there's an odd number of 64-bit lookup table, fill the high 64-bit
4134   // of the 128-bit lookup table with zero.
4135   if (PairPos == End) {
4136     Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
4137     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
4138                                                      ZeroTbl, Indices, Name));
4139   }
4140
4141   Function *TblF;
4142   TblOps.push_back(IndexOp);
4143   TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
4144
4145   return CGF.EmitNeonCall(TblF, TblOps, Name);
4146 }
4147
4148 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
4149   unsigned Value;
4150   switch (BuiltinID) {
4151   default:
4152     return nullptr;
4153   case ARM::BI__builtin_arm_nop:
4154     Value = 0;
4155     break;
4156   case ARM::BI__builtin_arm_yield:
4157   case ARM::BI__yield:
4158     Value = 1;
4159     break;
4160   case ARM::BI__builtin_arm_wfe:
4161   case ARM::BI__wfe:
4162     Value = 2;
4163     break;
4164   case ARM::BI__builtin_arm_wfi:
4165   case ARM::BI__wfi:
4166     Value = 3;
4167     break;
4168   case ARM::BI__builtin_arm_sev:
4169   case ARM::BI__sev:
4170     Value = 4;
4171     break;
4172   case ARM::BI__builtin_arm_sevl:
4173   case ARM::BI__sevl:
4174     Value = 5;
4175     break;
4176   }
4177
4178   return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
4179                             llvm::ConstantInt::get(Int32Ty, Value));
4180 }
4181
4182 // Generates the IR for the read/write special register builtin,
4183 // ValueType is the type of the value that is to be written or read,
4184 // RegisterType is the type of the register being written to or read from.
4185 static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
4186                                          const CallExpr *E,
4187                                          llvm::Type *RegisterType,
4188                                          llvm::Type *ValueType,
4189                                          bool IsRead,
4190                                          StringRef SysReg = "") {
4191   // write and register intrinsics only support 32 and 64 bit operations.
4192   assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
4193           && "Unsupported size for register.");
4194
4195   CodeGen::CGBuilderTy &Builder = CGF.Builder;
4196   CodeGen::CodeGenModule &CGM = CGF.CGM;
4197   LLVMContext &Context = CGM.getLLVMContext();
4198
4199   if (SysReg.empty()) {
4200     const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
4201     SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
4202   }
4203
4204   llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
4205   llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4206   llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4207
4208   llvm::Type *Types[] = { RegisterType };
4209
4210   bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
4211   assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
4212             && "Can't fit 64-bit value in 32-bit register");
4213
4214   if (IsRead) {
4215     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
4216     llvm::Value *Call = Builder.CreateCall(F, Metadata);
4217
4218     if (MixedTypes)
4219       // Read into 64 bit register and then truncate result to 32 bit.
4220       return Builder.CreateTrunc(Call, ValueType);
4221
4222     if (ValueType->isPointerTy())
4223       // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
4224       return Builder.CreateIntToPtr(Call, ValueType);
4225
4226     return Call;
4227   }
4228
4229   llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
4230   llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
4231   if (MixedTypes) {
4232     // Extend 32 bit write value to 64 bit to pass to write.
4233     ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
4234     return Builder.CreateCall(F, { Metadata, ArgValue });
4235   }
4236
4237   if (ValueType->isPointerTy()) {
4238     // Have VoidPtrTy ArgValue but want to return an i32/i64.
4239     ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
4240     return Builder.CreateCall(F, { Metadata, ArgValue });
4241   }
4242
4243   return Builder.CreateCall(F, { Metadata, ArgValue });
4244 }
4245
4246 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
4247 /// argument that specifies the vector type.
4248 static bool HasExtraNeonArgument(unsigned BuiltinID) {
4249   switch (BuiltinID) {
4250   default: break;
4251   case NEON::BI__builtin_neon_vget_lane_i8:
4252   case NEON::BI__builtin_neon_vget_lane_i16:
4253   case NEON::BI__builtin_neon_vget_lane_i32:
4254   case NEON::BI__builtin_neon_vget_lane_i64:
4255   case NEON::BI__builtin_neon_vget_lane_f32:
4256   case NEON::BI__builtin_neon_vgetq_lane_i8:
4257   case NEON::BI__builtin_neon_vgetq_lane_i16:
4258   case NEON::BI__builtin_neon_vgetq_lane_i32:
4259   case NEON::BI__builtin_neon_vgetq_lane_i64:
4260   case NEON::BI__builtin_neon_vgetq_lane_f32:
4261   case NEON::BI__builtin_neon_vset_lane_i8:
4262   case NEON::BI__builtin_neon_vset_lane_i16:
4263   case NEON::BI__builtin_neon_vset_lane_i32:
4264   case NEON::BI__builtin_neon_vset_lane_i64:
4265   case NEON::BI__builtin_neon_vset_lane_f32:
4266   case NEON::BI__builtin_neon_vsetq_lane_i8:
4267   case NEON::BI__builtin_neon_vsetq_lane_i16:
4268   case NEON::BI__builtin_neon_vsetq_lane_i32:
4269   case NEON::BI__builtin_neon_vsetq_lane_i64:
4270   case NEON::BI__builtin_neon_vsetq_lane_f32:
4271   case NEON::BI__builtin_neon_vsha1h_u32:
4272   case NEON::BI__builtin_neon_vsha1cq_u32:
4273   case NEON::BI__builtin_neon_vsha1pq_u32:
4274   case NEON::BI__builtin_neon_vsha1mq_u32:
4275   case ARM::BI_MoveToCoprocessor:
4276   case ARM::BI_MoveToCoprocessor2:
4277     return false;
4278   }
4279   return true;
4280 }
4281
4282 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
4283                                            const CallExpr *E) {
4284   if (auto Hint = GetValueForARMHint(BuiltinID))
4285     return Hint;
4286
4287   if (BuiltinID == ARM::BI__emit) {
4288     bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
4289     llvm::FunctionType *FTy =
4290         llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
4291
4292     APSInt Value;
4293     if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext()))
4294       llvm_unreachable("Sema will ensure that the parameter is constant");
4295
4296     uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
4297
4298     llvm::InlineAsm *Emit =
4299         IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
4300                                  /*SideEffects=*/true)
4301                 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
4302                                  /*SideEffects=*/true);
4303
4304     return Builder.CreateCall(Emit);
4305   }
4306
4307   if (BuiltinID == ARM::BI__builtin_arm_dbg) {
4308     Value *Option = EmitScalarExpr(E->getArg(0));
4309     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
4310   }
4311
4312   if (BuiltinID == ARM::BI__builtin_arm_prefetch) {
4313     Value *Address = EmitScalarExpr(E->getArg(0));
4314     Value *RW      = EmitScalarExpr(E->getArg(1));
4315     Value *IsData  = EmitScalarExpr(E->getArg(2));
4316
4317     // Locality is not supported on ARM target
4318     Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
4319
4320     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
4321     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
4322   }
4323
4324   if (BuiltinID == ARM::BI__builtin_arm_rbit) {
4325     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4326     return Builder.CreateCall(
4327         CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
4328   }
4329
4330   if (BuiltinID == ARM::BI__clear_cache) {
4331     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
4332     const FunctionDecl *FD = E->getDirectCallee();
4333     Value *Ops[2];
4334     for (unsigned i = 0; i < 2; i++)
4335       Ops[i] = EmitScalarExpr(E->getArg(i));
4336     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
4337     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
4338     StringRef Name = FD->getName();
4339     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
4340   }
4341
4342   if (BuiltinID == ARM::BI__builtin_arm_mcrr ||
4343       BuiltinID == ARM::BI__builtin_arm_mcrr2) {
4344     Function *F;
4345
4346     switch (BuiltinID) {
4347     default: llvm_unreachable("unexpected builtin");
4348     case ARM::BI__builtin_arm_mcrr:
4349       F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
4350       break;
4351     case ARM::BI__builtin_arm_mcrr2:
4352       F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
4353       break;
4354     }
4355
4356     // MCRR{2} instruction has 5 operands but
4357     // the intrinsic has 4 because Rt and Rt2
4358     // are represented as a single unsigned 64
4359     // bit integer in the intrinsic definition
4360     // but internally it's represented as 2 32
4361     // bit integers.
4362
4363     Value *Coproc = EmitScalarExpr(E->getArg(0));
4364     Value *Opc1 = EmitScalarExpr(E->getArg(1));
4365     Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
4366     Value *CRm = EmitScalarExpr(E->getArg(3));
4367
4368     Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4369     Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
4370     Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
4371     Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
4372
4373     return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
4374   }
4375
4376   if (BuiltinID == ARM::BI__builtin_arm_mrrc ||
4377       BuiltinID == ARM::BI__builtin_arm_mrrc2) {
4378     Function *F;
4379
4380     switch (BuiltinID) {
4381     default: llvm_unreachable("unexpected builtin");
4382     case ARM::BI__builtin_arm_mrrc:
4383       F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
4384       break;
4385     case ARM::BI__builtin_arm_mrrc2:
4386       F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
4387       break;
4388     }
4389
4390     Value *Coproc = EmitScalarExpr(E->getArg(0));
4391     Value *Opc1 = EmitScalarExpr(E->getArg(1));
4392     Value *CRm  = EmitScalarExpr(E->getArg(2));
4393     Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
4394
4395     // Returns an unsigned 64 bit integer, represented
4396     // as two 32 bit integers.
4397
4398     Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
4399     Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
4400     Rt = Builder.CreateZExt(Rt, Int64Ty);
4401     Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
4402
4403     Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
4404     RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
4405     RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
4406
4407     return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
4408   }
4409
4410   if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
4411       ((BuiltinID == ARM::BI__builtin_arm_ldrex ||
4412         BuiltinID == ARM::BI__builtin_arm_ldaex) &&
4413        getContext().getTypeSize(E->getType()) == 64) ||
4414       BuiltinID == ARM::BI__ldrexd) {
4415     Function *F;
4416
4417     switch (BuiltinID) {
4418     default: llvm_unreachable("unexpected builtin");
4419     case ARM::BI__builtin_arm_ldaex:
4420       F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
4421       break;
4422     case ARM::BI__builtin_arm_ldrexd:
4423     case ARM::BI__builtin_arm_ldrex:
4424     case ARM::BI__ldrexd:
4425       F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
4426       break;
4427     }
4428
4429     Value *LdPtr = EmitScalarExpr(E->getArg(0));
4430     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
4431                                     "ldrexd");
4432
4433     Value *Val0 = Builder.CreateExtractValue(Val, 1);
4434     Value *Val1 = Builder.CreateExtractValue(Val, 0);
4435     Val0 = Builder.CreateZExt(Val0, Int64Ty);
4436     Val1 = Builder.CreateZExt(Val1, Int64Ty);
4437
4438     Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
4439     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
4440     Val = Builder.CreateOr(Val, Val1);
4441     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
4442   }
4443
4444   if (BuiltinID == ARM::BI__builtin_arm_ldrex ||
4445       BuiltinID == ARM::BI__builtin_arm_ldaex) {
4446     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
4447
4448     QualType Ty = E->getType();
4449     llvm::Type *RealResTy = ConvertType(Ty);
4450     llvm::Type *PtrTy = llvm::IntegerType::get(
4451         getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
4452     LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
4453
4454     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex
4455                                        ? Intrinsic::arm_ldaex
4456                                        : Intrinsic::arm_ldrex,
4457                                    PtrTy);
4458     Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
4459
4460     if (RealResTy->isPointerTy())
4461       return Builder.CreateIntToPtr(Val, RealResTy);
4462     else {
4463       llvm::Type *IntResTy = llvm::IntegerType::get(
4464           getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
4465       Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
4466       return Builder.CreateBitCast(Val, RealResTy);
4467     }
4468   }
4469
4470   if (BuiltinID == ARM::BI__builtin_arm_strexd ||
4471       ((BuiltinID == ARM::BI__builtin_arm_stlex ||
4472         BuiltinID == ARM::BI__builtin_arm_strex) &&
4473        getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
4474     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4475                                        ? Intrinsic::arm_stlexd
4476                                        : Intrinsic::arm_strexd);
4477     llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, nullptr);
4478
4479     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
4480     Value *Val = EmitScalarExpr(E->getArg(0));
4481     Builder.CreateStore(Val, Tmp);
4482
4483     Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
4484     Val = Builder.CreateLoad(LdPtr);
4485
4486     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
4487     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
4488     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
4489     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
4490   }
4491
4492   if (BuiltinID == ARM::BI__builtin_arm_strex ||
4493       BuiltinID == ARM::BI__builtin_arm_stlex) {
4494     Value *StoreVal = EmitScalarExpr(E->getArg(0));
4495     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
4496
4497     QualType Ty = E->getArg(0)->getType();
4498     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
4499                                                  getContext().getTypeSize(Ty));
4500     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
4501
4502     if (StoreVal->getType()->isPointerTy())
4503       StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
4504     else {
4505       llvm::Type *IntTy = llvm::IntegerType::get(
4506           getLLVMContext(),
4507           CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
4508       StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
4509       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
4510     }
4511
4512     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4513                                        ? Intrinsic::arm_stlex
4514                                        : Intrinsic::arm_strex,
4515                                    StoreAddr->getType());
4516     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
4517   }
4518
4519   switch (BuiltinID) {
4520   case ARM::BI__iso_volatile_load8:
4521   case ARM::BI__iso_volatile_load16:
4522   case ARM::BI__iso_volatile_load32:
4523   case ARM::BI__iso_volatile_load64: {
4524     Value *Ptr = EmitScalarExpr(E->getArg(0));
4525     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4526     CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy);
4527     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
4528                                              LoadSize.getQuantity() * 8);
4529     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
4530     llvm::LoadInst *Load =
4531       Builder.CreateAlignedLoad(Ptr, LoadSize);
4532     Load->setVolatile(true);
4533     return Load;
4534   }
4535   case ARM::BI__iso_volatile_store8:
4536   case ARM::BI__iso_volatile_store16:
4537   case ARM::BI__iso_volatile_store32:
4538   case ARM::BI__iso_volatile_store64: {
4539     Value *Ptr = EmitScalarExpr(E->getArg(0));
4540     Value *Value = EmitScalarExpr(E->getArg(1));
4541     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4542     CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
4543     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
4544                                              StoreSize.getQuantity() * 8);
4545     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
4546     llvm::StoreInst *Store =
4547       Builder.CreateAlignedStore(Value, Ptr,
4548                                  StoreSize);
4549     Store->setVolatile(true);
4550     return Store;
4551   }
4552   }
4553
4554   if (BuiltinID == ARM::BI__builtin_arm_clrex) {
4555     Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
4556     return Builder.CreateCall(F);
4557   }
4558
4559   // CRC32
4560   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
4561   switch (BuiltinID) {
4562   case ARM::BI__builtin_arm_crc32b:
4563     CRCIntrinsicID = Intrinsic::arm_crc32b; break;
4564   case ARM::BI__builtin_arm_crc32cb:
4565     CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
4566   case ARM::BI__builtin_arm_crc32h:
4567     CRCIntrinsicID = Intrinsic::arm_crc32h; break;
4568   case ARM::BI__builtin_arm_crc32ch:
4569     CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
4570   case ARM::BI__builtin_arm_crc32w:
4571   case ARM::BI__builtin_arm_crc32d:
4572     CRCIntrinsicID = Intrinsic::arm_crc32w; break;
4573   case ARM::BI__builtin_arm_crc32cw:
4574   case ARM::BI__builtin_arm_crc32cd:
4575     CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
4576   }
4577
4578   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
4579     Value *Arg0 = EmitScalarExpr(E->getArg(0));
4580     Value *Arg1 = EmitScalarExpr(E->getArg(1));
4581
4582     // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w
4583     // intrinsics, hence we need different codegen for these cases.
4584     if (BuiltinID == ARM::BI__builtin_arm_crc32d ||
4585         BuiltinID == ARM::BI__builtin_arm_crc32cd) {
4586       Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4587       Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
4588       Value *Arg1b = Builder.CreateLShr(Arg1, C1);
4589       Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
4590
4591       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4592       Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
4593       return Builder.CreateCall(F, {Res, Arg1b});
4594     } else {
4595       Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
4596
4597       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4598       return Builder.CreateCall(F, {Arg0, Arg1});
4599     }
4600   }
4601
4602   if (BuiltinID == ARM::BI__builtin_arm_rsr ||
4603       BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4604       BuiltinID == ARM::BI__builtin_arm_rsrp ||
4605       BuiltinID == ARM::BI__builtin_arm_wsr ||
4606       BuiltinID == ARM::BI__builtin_arm_wsr64 ||
4607       BuiltinID == ARM::BI__builtin_arm_wsrp) {
4608
4609     bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr ||
4610                   BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4611                   BuiltinID == ARM::BI__builtin_arm_rsrp;
4612
4613     bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
4614                             BuiltinID == ARM::BI__builtin_arm_wsrp;
4615
4616     bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4617                    BuiltinID == ARM::BI__builtin_arm_wsr64;
4618
4619     llvm::Type *ValueType;
4620     llvm::Type *RegisterType;
4621     if (IsPointerBuiltin) {
4622       ValueType = VoidPtrTy;
4623       RegisterType = Int32Ty;
4624     } else if (Is64Bit) {
4625       ValueType = RegisterType = Int64Ty;
4626     } else {
4627       ValueType = RegisterType = Int32Ty;
4628     }
4629
4630     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
4631   }
4632
4633   // Find out if any arguments are required to be integer constant
4634   // expressions.
4635   unsigned ICEArguments = 0;
4636   ASTContext::GetBuiltinTypeError Error;
4637   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
4638   assert(Error == ASTContext::GE_None && "Should not codegen an error");
4639
4640   auto getAlignmentValue32 = [&](Address addr) -> Value* {
4641     return Builder.getInt32(addr.getAlignment().getQuantity());
4642   };
4643
4644   Address PtrOp0 = Address::invalid();
4645   Address PtrOp1 = Address::invalid();
4646   SmallVector<Value*, 4> Ops;
4647   bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
4648   unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
4649   for (unsigned i = 0, e = NumArgs; i != e; i++) {
4650     if (i == 0) {
4651       switch (BuiltinID) {
4652       case NEON::BI__builtin_neon_vld1_v:
4653       case NEON::BI__builtin_neon_vld1q_v:
4654       case NEON::BI__builtin_neon_vld1q_lane_v:
4655       case NEON::BI__builtin_neon_vld1_lane_v:
4656       case NEON::BI__builtin_neon_vld1_dup_v:
4657       case NEON::BI__builtin_neon_vld1q_dup_v:
4658       case NEON::BI__builtin_neon_vst1_v:
4659       case NEON::BI__builtin_neon_vst1q_v:
4660       case NEON::BI__builtin_neon_vst1q_lane_v:
4661       case NEON::BI__builtin_neon_vst1_lane_v:
4662       case NEON::BI__builtin_neon_vst2_v:
4663       case NEON::BI__builtin_neon_vst2q_v:
4664       case NEON::BI__builtin_neon_vst2_lane_v:
4665       case NEON::BI__builtin_neon_vst2q_lane_v:
4666       case NEON::BI__builtin_neon_vst3_v:
4667       case NEON::BI__builtin_neon_vst3q_v:
4668       case NEON::BI__builtin_neon_vst3_lane_v:
4669       case NEON::BI__builtin_neon_vst3q_lane_v:
4670       case NEON::BI__builtin_neon_vst4_v:
4671       case NEON::BI__builtin_neon_vst4q_v:
4672       case NEON::BI__builtin_neon_vst4_lane_v:
4673       case NEON::BI__builtin_neon_vst4q_lane_v:
4674         // Get the alignment for the argument in addition to the value;
4675         // we'll use it later.
4676         PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
4677         Ops.push_back(PtrOp0.getPointer());
4678         continue;
4679       }
4680     }
4681     if (i == 1) {
4682       switch (BuiltinID) {
4683       case NEON::BI__builtin_neon_vld2_v:
4684       case NEON::BI__builtin_neon_vld2q_v:
4685       case NEON::BI__builtin_neon_vld3_v:
4686       case NEON::BI__builtin_neon_vld3q_v:
4687       case NEON::BI__builtin_neon_vld4_v:
4688       case NEON::BI__builtin_neon_vld4q_v:
4689       case NEON::BI__builtin_neon_vld2_lane_v:
4690       case NEON::BI__builtin_neon_vld2q_lane_v:
4691       case NEON::BI__builtin_neon_vld3_lane_v:
4692       case NEON::BI__builtin_neon_vld3q_lane_v:
4693       case NEON::BI__builtin_neon_vld4_lane_v:
4694       case NEON::BI__builtin_neon_vld4q_lane_v:
4695       case NEON::BI__builtin_neon_vld2_dup_v:
4696       case NEON::BI__builtin_neon_vld3_dup_v:
4697       case NEON::BI__builtin_neon_vld4_dup_v:
4698         // Get the alignment for the argument in addition to the value;
4699         // we'll use it later.
4700         PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
4701         Ops.push_back(PtrOp1.getPointer());
4702         continue;
4703       }
4704     }
4705
4706     if ((ICEArguments & (1 << i)) == 0) {
4707       Ops.push_back(EmitScalarExpr(E->getArg(i)));
4708     } else {
4709       // If this is required to be a constant, constant fold it so that we know
4710       // that the generated intrinsic gets a ConstantInt.
4711       llvm::APSInt Result;
4712       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
4713       assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
4714       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
4715     }
4716   }
4717
4718   switch (BuiltinID) {
4719   default: break;
4720
4721   case NEON::BI__builtin_neon_vget_lane_i8:
4722   case NEON::BI__builtin_neon_vget_lane_i16:
4723   case NEON::BI__builtin_neon_vget_lane_i32:
4724   case NEON::BI__builtin_neon_vget_lane_i64:
4725   case NEON::BI__builtin_neon_vget_lane_f32:
4726   case NEON::BI__builtin_neon_vgetq_lane_i8:
4727   case NEON::BI__builtin_neon_vgetq_lane_i16:
4728   case NEON::BI__builtin_neon_vgetq_lane_i32:
4729   case NEON::BI__builtin_neon_vgetq_lane_i64:
4730   case NEON::BI__builtin_neon_vgetq_lane_f32:
4731     return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
4732
4733   case NEON::BI__builtin_neon_vset_lane_i8:
4734   case NEON::BI__builtin_neon_vset_lane_i16:
4735   case NEON::BI__builtin_neon_vset_lane_i32:
4736   case NEON::BI__builtin_neon_vset_lane_i64:
4737   case NEON::BI__builtin_neon_vset_lane_f32:
4738   case NEON::BI__builtin_neon_vsetq_lane_i8:
4739   case NEON::BI__builtin_neon_vsetq_lane_i16:
4740   case NEON::BI__builtin_neon_vsetq_lane_i32:
4741   case NEON::BI__builtin_neon_vsetq_lane_i64:
4742   case NEON::BI__builtin_neon_vsetq_lane_f32:
4743     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
4744
4745   case NEON::BI__builtin_neon_vsha1h_u32:
4746     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
4747                         "vsha1h");
4748   case NEON::BI__builtin_neon_vsha1cq_u32:
4749     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
4750                         "vsha1h");
4751   case NEON::BI__builtin_neon_vsha1pq_u32:
4752     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
4753                         "vsha1h");
4754   case NEON::BI__builtin_neon_vsha1mq_u32:
4755     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
4756                         "vsha1h");
4757
4758   // The ARM _MoveToCoprocessor builtins put the input register value as
4759   // the first argument, but the LLVM intrinsic expects it as the third one.
4760   case ARM::BI_MoveToCoprocessor:
4761   case ARM::BI_MoveToCoprocessor2: {
4762     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
4763                                    Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
4764     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
4765                                   Ops[3], Ops[4], Ops[5]});
4766   }
4767   case ARM::BI_BitScanForward:
4768   case ARM::BI_BitScanForward64:
4769     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
4770   case ARM::BI_BitScanReverse:
4771   case ARM::BI_BitScanReverse64:
4772     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
4773
4774   case ARM::BI_InterlockedAnd64:
4775     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
4776   case ARM::BI_InterlockedExchange64:
4777     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
4778   case ARM::BI_InterlockedExchangeAdd64:
4779     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
4780   case ARM::BI_InterlockedExchangeSub64:
4781     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
4782   case ARM::BI_InterlockedOr64:
4783     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
4784   case ARM::BI_InterlockedXor64:
4785     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
4786   case ARM::BI_InterlockedDecrement64:
4787     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
4788   case ARM::BI_InterlockedIncrement64:
4789     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
4790   }
4791
4792   // Get the last argument, which specifies the vector type.
4793   assert(HasExtraArg);
4794   llvm::APSInt Result;
4795   const Expr *Arg = E->getArg(E->getNumArgs()-1);
4796   if (!Arg->isIntegerConstantExpr(Result, getContext()))
4797     return nullptr;
4798
4799   if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
4800       BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
4801     // Determine the overloaded type of this builtin.
4802     llvm::Type *Ty;
4803     if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
4804       Ty = FloatTy;
4805     else
4806       Ty = DoubleTy;
4807
4808     // Determine whether this is an unsigned conversion or not.
4809     bool usgn = Result.getZExtValue() == 1;
4810     unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
4811
4812     // Call the appropriate intrinsic.
4813     Function *F = CGM.getIntrinsic(Int, Ty);
4814     return Builder.CreateCall(F, Ops, "vcvtr");
4815   }
4816
4817   // Determine the type of this overloaded NEON intrinsic.
4818   NeonTypeFlags Type(Result.getZExtValue());
4819   bool usgn = Type.isUnsigned();
4820   bool rightShift = false;
4821
4822   llvm::VectorType *VTy = GetNeonType(this, Type);
4823   llvm::Type *Ty = VTy;
4824   if (!Ty)
4825     return nullptr;
4826
4827   // Many NEON builtins have identical semantics and uses in ARM and
4828   // AArch64. Emit these in a single function.
4829   auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap);
4830   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
4831       IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
4832   if (Builtin)
4833     return EmitCommonNeonBuiltinExpr(
4834         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
4835         Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1);
4836
4837   unsigned Int;
4838   switch (BuiltinID) {
4839   default: return nullptr;
4840   case NEON::BI__builtin_neon_vld1q_lane_v:
4841     // Handle 64-bit integer elements as a special case.  Use shuffles of
4842     // one-element vectors to avoid poor code for i64 in the backend.
4843     if (VTy->getElementType()->isIntegerTy(64)) {
4844       // Extract the other lane.
4845       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4846       uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
4847       Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
4848       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
4849       // Load the value as a one-element vector.
4850       Ty = llvm::VectorType::get(VTy->getElementType(), 1);
4851       llvm::Type *Tys[] = {Ty, Int8PtrTy};
4852       Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
4853       Value *Align = getAlignmentValue32(PtrOp0);
4854       Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
4855       // Combine them.
4856       uint32_t Indices[] = {1 - Lane, Lane};
4857       SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
4858       return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
4859     }
4860     // fall through
4861   case NEON::BI__builtin_neon_vld1_lane_v: {
4862     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4863     PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
4864     Value *Ld = Builder.CreateLoad(PtrOp0);
4865     return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
4866   }
4867   case NEON::BI__builtin_neon_vld2_dup_v:
4868   case NEON::BI__builtin_neon_vld3_dup_v:
4869   case NEON::BI__builtin_neon_vld4_dup_v: {
4870     // Handle 64-bit elements as a special-case.  There is no "dup" needed.
4871     if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
4872       switch (BuiltinID) {
4873       case NEON::BI__builtin_neon_vld2_dup_v:
4874         Int = Intrinsic::arm_neon_vld2;
4875         break;
4876       case NEON::BI__builtin_neon_vld3_dup_v:
4877         Int = Intrinsic::arm_neon_vld3;
4878         break;
4879       case NEON::BI__builtin_neon_vld4_dup_v:
4880         Int = Intrinsic::arm_neon_vld4;
4881         break;
4882       default: llvm_unreachable("unknown vld_dup intrinsic?");
4883       }
4884       llvm::Type *Tys[] = {Ty, Int8PtrTy};
4885       Function *F = CGM.getIntrinsic(Int, Tys);
4886       llvm::Value *Align = getAlignmentValue32(PtrOp1);
4887       Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup");
4888       Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4889       Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4890       return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4891     }
4892     switch (BuiltinID) {
4893     case NEON::BI__builtin_neon_vld2_dup_v:
4894       Int = Intrinsic::arm_neon_vld2lane;
4895       break;
4896     case NEON::BI__builtin_neon_vld3_dup_v:
4897       Int = Intrinsic::arm_neon_vld3lane;
4898       break;
4899     case NEON::BI__builtin_neon_vld4_dup_v:
4900       Int = Intrinsic::arm_neon_vld4lane;
4901       break;
4902     default: llvm_unreachable("unknown vld_dup intrinsic?");
4903     }
4904     llvm::Type *Tys[] = {Ty, Int8PtrTy};
4905     Function *F = CGM.getIntrinsic(Int, Tys);
4906     llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
4907
4908     SmallVector<Value*, 6> Args;
4909     Args.push_back(Ops[1]);
4910     Args.append(STy->getNumElements(), UndefValue::get(Ty));
4911
4912     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
4913     Args.push_back(CI);
4914     Args.push_back(getAlignmentValue32(PtrOp1));
4915
4916     Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
4917     // splat lane 0 to all elts in each vector of the result.
4918     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
4919       Value *Val = Builder.CreateExtractValue(Ops[1], i);
4920       Value *Elt = Builder.CreateBitCast(Val, Ty);
4921       Elt = EmitNeonSplat(Elt, CI);
4922       Elt = Builder.CreateBitCast(Elt, Val->getType());
4923       Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
4924     }
4925     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4926     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4927     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4928   }
4929   case NEON::BI__builtin_neon_vqrshrn_n_v:
4930     Int =
4931       usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
4932     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
4933                         1, true);
4934   case NEON::BI__builtin_neon_vqrshrun_n_v:
4935     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
4936                         Ops, "vqrshrun_n", 1, true);
4937   case NEON::BI__builtin_neon_vqshrn_n_v:
4938     Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
4939     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
4940                         1, true);
4941   case NEON::BI__builtin_neon_vqshrun_n_v:
4942     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
4943                         Ops, "vqshrun_n", 1, true);
4944   case NEON::BI__builtin_neon_vrecpe_v:
4945   case NEON::BI__builtin_neon_vrecpeq_v:
4946     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
4947                         Ops, "vrecpe");
4948   case NEON::BI__builtin_neon_vrshrn_n_v:
4949     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
4950                         Ops, "vrshrn_n", 1, true);
4951   case NEON::BI__builtin_neon_vrsra_n_v:
4952   case NEON::BI__builtin_neon_vrsraq_n_v:
4953     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4954     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4955     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
4956     Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
4957     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
4958     return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
4959   case NEON::BI__builtin_neon_vsri_n_v:
4960   case NEON::BI__builtin_neon_vsriq_n_v:
4961     rightShift = true;
4962   case NEON::BI__builtin_neon_vsli_n_v:
4963   case NEON::BI__builtin_neon_vsliq_n_v:
4964     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
4965     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
4966                         Ops, "vsli_n");
4967   case NEON::BI__builtin_neon_vsra_n_v:
4968   case NEON::BI__builtin_neon_vsraq_n_v:
4969     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4970     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
4971     return Builder.CreateAdd(Ops[0], Ops[1]);
4972   case NEON::BI__builtin_neon_vst1q_lane_v:
4973     // Handle 64-bit integer elements as a special case.  Use a shuffle to get
4974     // a one-element vector and avoid poor code for i64 in the backend.
4975     if (VTy->getElementType()->isIntegerTy(64)) {
4976       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4977       Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
4978       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
4979       Ops[2] = getAlignmentValue32(PtrOp0);
4980       llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
4981       return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
4982                                                  Tys), Ops);
4983     }
4984     // fall through
4985   case NEON::BI__builtin_neon_vst1_lane_v: {
4986     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4987     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
4988     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4989     auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty));
4990     return St;
4991   }
4992   case NEON::BI__builtin_neon_vtbl1_v:
4993     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
4994                         Ops, "vtbl1");
4995   case NEON::BI__builtin_neon_vtbl2_v:
4996     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
4997                         Ops, "vtbl2");
4998   case NEON::BI__builtin_neon_vtbl3_v:
4999     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
5000                         Ops, "vtbl3");
5001   case NEON::BI__builtin_neon_vtbl4_v:
5002     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
5003                         Ops, "vtbl4");
5004   case NEON::BI__builtin_neon_vtbx1_v:
5005     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
5006                         Ops, "vtbx1");
5007   case NEON::BI__builtin_neon_vtbx2_v:
5008     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
5009                         Ops, "vtbx2");
5010   case NEON::BI__builtin_neon_vtbx3_v:
5011     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
5012                         Ops, "vtbx3");
5013   case NEON::BI__builtin_neon_vtbx4_v:
5014     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
5015                         Ops, "vtbx4");
5016   }
5017 }
5018
5019 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
5020                                       const CallExpr *E,
5021                                       SmallVectorImpl<Value *> &Ops) {
5022   unsigned int Int = 0;
5023   const char *s = nullptr;
5024
5025   switch (BuiltinID) {
5026   default:
5027     return nullptr;
5028   case NEON::BI__builtin_neon_vtbl1_v:
5029   case NEON::BI__builtin_neon_vqtbl1_v:
5030   case NEON::BI__builtin_neon_vqtbl1q_v:
5031   case NEON::BI__builtin_neon_vtbl2_v:
5032   case NEON::BI__builtin_neon_vqtbl2_v:
5033   case NEON::BI__builtin_neon_vqtbl2q_v:
5034   case NEON::BI__builtin_neon_vtbl3_v:
5035   case NEON::BI__builtin_neon_vqtbl3_v:
5036   case NEON::BI__builtin_neon_vqtbl3q_v:
5037   case NEON::BI__builtin_neon_vtbl4_v:
5038   case NEON::BI__builtin_neon_vqtbl4_v:
5039   case NEON::BI__builtin_neon_vqtbl4q_v:
5040     break;
5041   case NEON::BI__builtin_neon_vtbx1_v:
5042   case NEON::BI__builtin_neon_vqtbx1_v:
5043   case NEON::BI__builtin_neon_vqtbx1q_v:
5044   case NEON::BI__builtin_neon_vtbx2_v:
5045   case NEON::BI__builtin_neon_vqtbx2_v:
5046   case NEON::BI__builtin_neon_vqtbx2q_v:
5047   case NEON::BI__builtin_neon_vtbx3_v:
5048   case NEON::BI__builtin_neon_vqtbx3_v:
5049   case NEON::BI__builtin_neon_vqtbx3q_v:
5050   case NEON::BI__builtin_neon_vtbx4_v:
5051   case NEON::BI__builtin_neon_vqtbx4_v:
5052   case NEON::BI__builtin_neon_vqtbx4q_v:
5053     break;
5054   }
5055
5056   assert(E->getNumArgs() >= 3);
5057
5058   // Get the last argument, which specifies the vector type.
5059   llvm::APSInt Result;
5060   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
5061   if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
5062     return nullptr;
5063
5064   // Determine the type of this overloaded NEON intrinsic.
5065   NeonTypeFlags Type(Result.getZExtValue());
5066   llvm::VectorType *Ty = GetNeonType(&CGF, Type);
5067   if (!Ty)
5068     return nullptr;
5069
5070   CodeGen::CGBuilderTy &Builder = CGF.Builder;
5071
5072   // AArch64 scalar builtins are not overloaded, they do not have an extra
5073   // argument that specifies the vector type, need to handle each case.
5074   switch (BuiltinID) {
5075   case NEON::BI__builtin_neon_vtbl1_v: {
5076     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr,
5077                               Ops[1], Ty, Intrinsic::aarch64_neon_tbl1,
5078                               "vtbl1");
5079   }
5080   case NEON::BI__builtin_neon_vtbl2_v: {
5081     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr,
5082                               Ops[2], Ty, Intrinsic::aarch64_neon_tbl1,
5083                               "vtbl1");
5084   }
5085   case NEON::BI__builtin_neon_vtbl3_v: {
5086     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr,
5087                               Ops[3], Ty, Intrinsic::aarch64_neon_tbl2,
5088                               "vtbl2");
5089   }
5090   case NEON::BI__builtin_neon_vtbl4_v: {
5091     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr,
5092                               Ops[4], Ty, Intrinsic::aarch64_neon_tbl2,
5093                               "vtbl2");
5094   }
5095   case NEON::BI__builtin_neon_vtbx1_v: {
5096     Value *TblRes =
5097         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2],
5098                            Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
5099
5100     llvm::Constant *EightV = ConstantInt::get(Ty, 8);
5101     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
5102     CmpRes = Builder.CreateSExt(CmpRes, Ty);
5103
5104     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
5105     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
5106     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
5107   }
5108   case NEON::BI__builtin_neon_vtbx2_v: {
5109     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0],
5110                               Ops[3], Ty, Intrinsic::aarch64_neon_tbx1,
5111                               "vtbx1");
5112   }
5113   case NEON::BI__builtin_neon_vtbx3_v: {
5114     Value *TblRes =
5115         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4],
5116                            Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
5117
5118     llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
5119     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
5120                                            TwentyFourV);
5121     CmpRes = Builder.CreateSExt(CmpRes, Ty);
5122
5123     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
5124     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
5125     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
5126   }
5127   case NEON::BI__builtin_neon_vtbx4_v: {
5128     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0],
5129                               Ops[5], Ty, Intrinsic::aarch64_neon_tbx2,
5130                               "vtbx2");
5131   }
5132   case NEON::BI__builtin_neon_vqtbl1_v:
5133   case NEON::BI__builtin_neon_vqtbl1q_v:
5134     Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
5135   case NEON::BI__builtin_neon_vqtbl2_v:
5136   case NEON::BI__builtin_neon_vqtbl2q_v: {
5137     Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
5138   case NEON::BI__builtin_neon_vqtbl3_v:
5139   case NEON::BI__builtin_neon_vqtbl3q_v:
5140     Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
5141   case NEON::BI__builtin_neon_vqtbl4_v:
5142   case NEON::BI__builtin_neon_vqtbl4q_v:
5143     Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
5144   case NEON::BI__builtin_neon_vqtbx1_v:
5145   case NEON::BI__builtin_neon_vqtbx1q_v:
5146     Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
5147   case NEON::BI__builtin_neon_vqtbx2_v:
5148   case NEON::BI__builtin_neon_vqtbx2q_v:
5149     Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
5150   case NEON::BI__builtin_neon_vqtbx3_v:
5151   case NEON::BI__builtin_neon_vqtbx3q_v:
5152     Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
5153   case NEON::BI__builtin_neon_vqtbx4_v:
5154   case NEON::BI__builtin_neon_vqtbx4q_v:
5155     Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
5156   }
5157   }
5158
5159   if (!Int)
5160     return nullptr;
5161
5162   Function *F = CGF.CGM.getIntrinsic(Int, Ty);
5163   return CGF.EmitNeonCall(F, Ops, s);
5164 }
5165
5166 Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
5167   llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4);
5168   Op = Builder.CreateBitCast(Op, Int16Ty);
5169   Value *V = UndefValue::get(VTy);
5170   llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
5171   Op = Builder.CreateInsertElement(V, Op, CI);
5172   return Op;
5173 }
5174
5175 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
5176                                                const CallExpr *E) {
5177   unsigned HintID = static_cast<unsigned>(-1);
5178   switch (BuiltinID) {
5179   default: break;
5180   case AArch64::BI__builtin_arm_nop:
5181     HintID = 0;
5182     break;
5183   case AArch64::BI__builtin_arm_yield:
5184     HintID = 1;
5185     break;
5186   case AArch64::BI__builtin_arm_wfe:
5187     HintID = 2;
5188     break;
5189   case AArch64::BI__builtin_arm_wfi:
5190     HintID = 3;
5191     break;
5192   case AArch64::BI__builtin_arm_sev:
5193     HintID = 4;
5194     break;
5195   case AArch64::BI__builtin_arm_sevl:
5196     HintID = 5;
5197     break;
5198   }
5199
5200   if (HintID != static_cast<unsigned>(-1)) {
5201     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
5202     return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
5203   }
5204
5205   if (BuiltinID == AArch64::BI__builtin_arm_prefetch) {
5206     Value *Address         = EmitScalarExpr(E->getArg(0));
5207     Value *RW              = EmitScalarExpr(E->getArg(1));
5208     Value *CacheLevel      = EmitScalarExpr(E->getArg(2));
5209     Value *RetentionPolicy = EmitScalarExpr(E->getArg(3));
5210     Value *IsData          = EmitScalarExpr(E->getArg(4));
5211
5212     Value *Locality = nullptr;
5213     if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) {
5214       // Temporal fetch, needs to convert cache level to locality.
5215       Locality = llvm::ConstantInt::get(Int32Ty,
5216         -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3);
5217     } else {
5218       // Streaming fetch.
5219       Locality = llvm::ConstantInt::get(Int32Ty, 0);
5220     }
5221
5222     // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify
5223     // PLDL3STRM or PLDL2STRM.
5224     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
5225     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
5226   }
5227
5228   if (BuiltinID == AArch64::BI__builtin_arm_rbit) {
5229     assert((getContext().getTypeSize(E->getType()) == 32) &&
5230            "rbit of unusual size!");
5231     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5232     return Builder.CreateCall(
5233         CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5234   }
5235   if (BuiltinID == AArch64::BI__builtin_arm_rbit64) {
5236     assert((getContext().getTypeSize(E->getType()) == 64) &&
5237            "rbit of unusual size!");
5238     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5239     return Builder.CreateCall(
5240         CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5241   }
5242
5243   if (BuiltinID == AArch64::BI__clear_cache) {
5244     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
5245     const FunctionDecl *FD = E->getDirectCallee();
5246     Value *Ops[2];
5247     for (unsigned i = 0; i < 2; i++)
5248       Ops[i] = EmitScalarExpr(E->getArg(i));
5249     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
5250     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
5251     StringRef Name = FD->getName();
5252     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
5253   }
5254
5255   if ((BuiltinID == AArch64::BI__builtin_arm_ldrex ||
5256       BuiltinID == AArch64::BI__builtin_arm_ldaex) &&
5257       getContext().getTypeSize(E->getType()) == 128) {
5258     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
5259                                        ? Intrinsic::aarch64_ldaxp
5260                                        : Intrinsic::aarch64_ldxp);
5261
5262     Value *LdPtr = EmitScalarExpr(E->getArg(0));
5263     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
5264                                     "ldxp");
5265
5266     Value *Val0 = Builder.CreateExtractValue(Val, 1);
5267     Value *Val1 = Builder.CreateExtractValue(Val, 0);
5268     llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
5269     Val0 = Builder.CreateZExt(Val0, Int128Ty);
5270     Val1 = Builder.CreateZExt(Val1, Int128Ty);
5271
5272     Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
5273     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
5274     Val = Builder.CreateOr(Val, Val1);
5275     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
5276   } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex ||
5277              BuiltinID == AArch64::BI__builtin_arm_ldaex) {
5278     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
5279
5280     QualType Ty = E->getType();
5281     llvm::Type *RealResTy = ConvertType(Ty);
5282     llvm::Type *PtrTy = llvm::IntegerType::get(
5283         getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
5284     LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
5285
5286     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
5287                                        ? Intrinsic::aarch64_ldaxr
5288                                        : Intrinsic::aarch64_ldxr,
5289                                    PtrTy);
5290     Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
5291
5292     if (RealResTy->isPointerTy())
5293       return Builder.CreateIntToPtr(Val, RealResTy);
5294
5295     llvm::Type *IntResTy = llvm::IntegerType::get(
5296         getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
5297     Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
5298     return Builder.CreateBitCast(Val, RealResTy);
5299   }
5300
5301   if ((BuiltinID == AArch64::BI__builtin_arm_strex ||
5302        BuiltinID == AArch64::BI__builtin_arm_stlex) &&
5303       getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
5304     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
5305                                        ? Intrinsic::aarch64_stlxp
5306                                        : Intrinsic::aarch64_stxp);
5307     llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty, nullptr);
5308
5309     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
5310     EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
5311
5312     Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy));
5313     llvm::Value *Val = Builder.CreateLoad(Tmp);
5314
5315     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
5316     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
5317     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
5318                                          Int8PtrTy);
5319     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
5320   }
5321
5322   if (BuiltinID == AArch64::BI__builtin_arm_strex ||
5323       BuiltinID == AArch64::BI__builtin_arm_stlex) {
5324     Value *StoreVal = EmitScalarExpr(E->getArg(0));
5325     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
5326
5327     QualType Ty = E->getArg(0)->getType();
5328     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
5329                                                  getContext().getTypeSize(Ty));
5330     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
5331
5332     if (StoreVal->getType()->isPointerTy())
5333       StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
5334     else {
5335       llvm::Type *IntTy = llvm::IntegerType::get(
5336           getLLVMContext(),
5337           CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
5338       StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
5339       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
5340     }
5341
5342     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
5343                                        ? Intrinsic::aarch64_stlxr
5344                                        : Intrinsic::aarch64_stxr,
5345                                    StoreAddr->getType());
5346     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
5347   }
5348
5349   if (BuiltinID == AArch64::BI__builtin_arm_clrex) {
5350     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
5351     return Builder.CreateCall(F);
5352   }
5353
5354   // CRC32
5355   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
5356   switch (BuiltinID) {
5357   case AArch64::BI__builtin_arm_crc32b:
5358     CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
5359   case AArch64::BI__builtin_arm_crc32cb:
5360     CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
5361   case AArch64::BI__builtin_arm_crc32h:
5362     CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
5363   case AArch64::BI__builtin_arm_crc32ch:
5364     CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
5365   case AArch64::BI__builtin_arm_crc32w:
5366     CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
5367   case AArch64::BI__builtin_arm_crc32cw:
5368     CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
5369   case AArch64::BI__builtin_arm_crc32d:
5370     CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
5371   case AArch64::BI__builtin_arm_crc32cd:
5372     CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
5373   }
5374
5375   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
5376     Value *Arg0 = EmitScalarExpr(E->getArg(0));
5377     Value *Arg1 = EmitScalarExpr(E->getArg(1));
5378     Function *F = CGM.getIntrinsic(CRCIntrinsicID);
5379
5380     llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
5381     Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
5382
5383     return Builder.CreateCall(F, {Arg0, Arg1});
5384   }
5385
5386   if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
5387       BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
5388       BuiltinID == AArch64::BI__builtin_arm_rsrp ||
5389       BuiltinID == AArch64::BI__builtin_arm_wsr ||
5390       BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
5391       BuiltinID == AArch64::BI__builtin_arm_wsrp) {
5392
5393     bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr ||
5394                   BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
5395                   BuiltinID == AArch64::BI__builtin_arm_rsrp;
5396
5397     bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp ||
5398                             BuiltinID == AArch64::BI__builtin_arm_wsrp;
5399
5400     bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr &&
5401                    BuiltinID != AArch64::BI__builtin_arm_wsr;
5402
5403     llvm::Type *ValueType;
5404     llvm::Type *RegisterType = Int64Ty;
5405     if (IsPointerBuiltin) {
5406       ValueType = VoidPtrTy;
5407     } else if (Is64Bit) {
5408       ValueType = Int64Ty;
5409     } else {
5410       ValueType = Int32Ty;
5411     }
5412
5413     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
5414   }
5415
5416   // Find out if any arguments are required to be integer constant
5417   // expressions.
5418   unsigned ICEArguments = 0;
5419   ASTContext::GetBuiltinTypeError Error;
5420   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5421   assert(Error == ASTContext::GE_None && "Should not codegen an error");
5422
5423   llvm::SmallVector<Value*, 4> Ops;
5424   for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
5425     if ((ICEArguments & (1 << i)) == 0) {
5426       Ops.push_back(EmitScalarExpr(E->getArg(i)));
5427     } else {
5428       // If this is required to be a constant, constant fold it so that we know
5429       // that the generated intrinsic gets a ConstantInt.
5430       llvm::APSInt Result;
5431       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
5432       assert(IsConst && "Constant arg isn't actually constant?");
5433       (void)IsConst;
5434       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
5435     }
5436   }
5437
5438   auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap);
5439   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
5440       SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
5441
5442   if (Builtin) {
5443     Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
5444     Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
5445     assert(Result && "SISD intrinsic should have been handled");
5446     return Result;
5447   }
5448
5449   llvm::APSInt Result;
5450   const Expr *Arg = E->getArg(E->getNumArgs()-1);
5451   NeonTypeFlags Type(0);
5452   if (Arg->isIntegerConstantExpr(Result, getContext()))
5453     // Determine the type of this overloaded NEON intrinsic.
5454     Type = NeonTypeFlags(Result.getZExtValue());
5455
5456   bool usgn = Type.isUnsigned();
5457   bool quad = Type.isQuad();
5458
5459   // Handle non-overloaded intrinsics first.
5460   switch (BuiltinID) {
5461   default: break;
5462   case NEON::BI__builtin_neon_vldrq_p128: {
5463     llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
5464     llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0);
5465     Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
5466     return Builder.CreateAlignedLoad(Int128Ty, Ptr,
5467                                      CharUnits::fromQuantity(16));
5468   }
5469   case NEON::BI__builtin_neon_vstrq_p128: {
5470     llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
5471     Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
5472     return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
5473   }
5474   case NEON::BI__builtin_neon_vcvts_u32_f32:
5475   case NEON::BI__builtin_neon_vcvtd_u64_f64:
5476     usgn = true;
5477     // FALL THROUGH
5478   case NEON::BI__builtin_neon_vcvts_s32_f32:
5479   case NEON::BI__builtin_neon_vcvtd_s64_f64: {
5480     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5481     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5482     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5483     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5484     Ops[0] = Builder.CreateBitCast(Ops[0], FTy);
5485     if (usgn)
5486       return Builder.CreateFPToUI(Ops[0], InTy);
5487     return Builder.CreateFPToSI(Ops[0], InTy);
5488   }
5489   case NEON::BI__builtin_neon_vcvts_f32_u32:
5490   case NEON::BI__builtin_neon_vcvtd_f64_u64:
5491     usgn = true;
5492     // FALL THROUGH
5493   case NEON::BI__builtin_neon_vcvts_f32_s32:
5494   case NEON::BI__builtin_neon_vcvtd_f64_s64: {
5495     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5496     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5497     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5498     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5499     Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
5500     if (usgn)
5501       return Builder.CreateUIToFP(Ops[0], FTy);
5502     return Builder.CreateSIToFP(Ops[0], FTy);
5503   }
5504   case NEON::BI__builtin_neon_vpaddd_s64: {
5505     llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
5506     Value *Vec = EmitScalarExpr(E->getArg(0));
5507     // The vector is v2f64, so make sure it's bitcast to that.
5508     Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
5509     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5510     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5511     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5512     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5513     // Pairwise addition of a v2f64 into a scalar f64.
5514     return Builder.CreateAdd(Op0, Op1, "vpaddd");
5515   }
5516   case NEON::BI__builtin_neon_vpaddd_f64: {
5517     llvm::Type *Ty =
5518       llvm::VectorType::get(DoubleTy, 2);
5519     Value *Vec = EmitScalarExpr(E->getArg(0));
5520     // The vector is v2f64, so make sure it's bitcast to that.
5521     Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
5522     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5523     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5524     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5525     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5526     // Pairwise addition of a v2f64 into a scalar f64.
5527     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5528   }
5529   case NEON::BI__builtin_neon_vpadds_f32: {
5530     llvm::Type *Ty =
5531       llvm::VectorType::get(FloatTy, 2);
5532     Value *Vec = EmitScalarExpr(E->getArg(0));
5533     // The vector is v2f32, so make sure it's bitcast to that.
5534     Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
5535     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5536     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5537     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5538     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5539     // Pairwise addition of a v2f32 into a scalar f32.
5540     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5541   }
5542   case NEON::BI__builtin_neon_vceqzd_s64:
5543   case NEON::BI__builtin_neon_vceqzd_f64:
5544   case NEON::BI__builtin_neon_vceqzs_f32:
5545     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5546     return EmitAArch64CompareBuiltinExpr(
5547         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5548         ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
5549   case NEON::BI__builtin_neon_vcgezd_s64:
5550   case NEON::BI__builtin_neon_vcgezd_f64:
5551   case NEON::BI__builtin_neon_vcgezs_f32:
5552     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5553     return EmitAArch64CompareBuiltinExpr(
5554         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5555         ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
5556   case NEON::BI__builtin_neon_vclezd_s64:
5557   case NEON::BI__builtin_neon_vclezd_f64:
5558   case NEON::BI__builtin_neon_vclezs_f32:
5559     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5560     return EmitAArch64CompareBuiltinExpr(
5561         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5562         ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
5563   case NEON::BI__builtin_neon_vcgtzd_s64:
5564   case NEON::BI__builtin_neon_vcgtzd_f64:
5565   case NEON::BI__builtin_neon_vcgtzs_f32:
5566     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5567     return EmitAArch64CompareBuiltinExpr(
5568         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5569         ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
5570   case NEON::BI__builtin_neon_vcltzd_s64:
5571   case NEON::BI__builtin_neon_vcltzd_f64:
5572   case NEON::BI__builtin_neon_vcltzs_f32:
5573     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5574     return EmitAArch64CompareBuiltinExpr(
5575         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5576         ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
5577
5578   case NEON::BI__builtin_neon_vceqzd_u64: {
5579     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5580     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5581     Ops[0] =
5582         Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
5583     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
5584   }
5585   case NEON::BI__builtin_neon_vceqd_f64:
5586   case NEON::BI__builtin_neon_vcled_f64:
5587   case NEON::BI__builtin_neon_vcltd_f64:
5588   case NEON::BI__builtin_neon_vcged_f64:
5589   case NEON::BI__builtin_neon_vcgtd_f64: {
5590     llvm::CmpInst::Predicate P;
5591     switch (BuiltinID) {
5592     default: llvm_unreachable("missing builtin ID in switch!");
5593     case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
5594     case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
5595     case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
5596     case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
5597     case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
5598     }
5599     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5600     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5601     Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
5602     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5603     return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
5604   }
5605   case NEON::BI__builtin_neon_vceqs_f32:
5606   case NEON::BI__builtin_neon_vcles_f32:
5607   case NEON::BI__builtin_neon_vclts_f32:
5608   case NEON::BI__builtin_neon_vcges_f32:
5609   case NEON::BI__builtin_neon_vcgts_f32: {
5610     llvm::CmpInst::Predicate P;
5611     switch (BuiltinID) {
5612     default: llvm_unreachable("missing builtin ID in switch!");
5613     case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
5614     case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
5615     case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
5616     case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
5617     case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
5618     }
5619     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5620     Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
5621     Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
5622     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5623     return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
5624   }
5625   case NEON::BI__builtin_neon_vceqd_s64:
5626   case NEON::BI__builtin_neon_vceqd_u64:
5627   case NEON::BI__builtin_neon_vcgtd_s64:
5628   case NEON::BI__builtin_neon_vcgtd_u64:
5629   case NEON::BI__builtin_neon_vcltd_s64:
5630   case NEON::BI__builtin_neon_vcltd_u64:
5631   case NEON::BI__builtin_neon_vcged_u64:
5632   case NEON::BI__builtin_neon_vcged_s64:
5633   case NEON::BI__builtin_neon_vcled_u64:
5634   case NEON::BI__builtin_neon_vcled_s64: {
5635     llvm::CmpInst::Predicate P;
5636     switch (BuiltinID) {
5637     default: llvm_unreachable("missing builtin ID in switch!");
5638     case NEON::BI__builtin_neon_vceqd_s64:
5639     case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
5640     case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
5641     case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
5642     case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
5643     case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
5644     case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
5645     case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
5646     case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
5647     case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
5648     }
5649     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5650     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5651     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5652     Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
5653     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
5654   }
5655   case NEON::BI__builtin_neon_vtstd_s64:
5656   case NEON::BI__builtin_neon_vtstd_u64: {
5657     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5658     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5659     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5660     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
5661     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
5662                                 llvm::Constant::getNullValue(Int64Ty));
5663     return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
5664   }
5665   case NEON::BI__builtin_neon_vset_lane_i8:
5666   case NEON::BI__builtin_neon_vset_lane_i16:
5667   case NEON::BI__builtin_neon_vset_lane_i32:
5668   case NEON::BI__builtin_neon_vset_lane_i64:
5669   case NEON::BI__builtin_neon_vset_lane_f32:
5670   case NEON::BI__builtin_neon_vsetq_lane_i8:
5671   case NEON::BI__builtin_neon_vsetq_lane_i16:
5672   case NEON::BI__builtin_neon_vsetq_lane_i32:
5673   case NEON::BI__builtin_neon_vsetq_lane_i64:
5674   case NEON::BI__builtin_neon_vsetq_lane_f32:
5675     Ops.push_back(EmitScalarExpr(E->getArg(2)));
5676     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5677   case NEON::BI__builtin_neon_vset_lane_f64:
5678     // The vector type needs a cast for the v1f64 variant.
5679     Ops[1] = Builder.CreateBitCast(Ops[1],
5680                                    llvm::VectorType::get(DoubleTy, 1));
5681     Ops.push_back(EmitScalarExpr(E->getArg(2)));
5682     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5683   case NEON::BI__builtin_neon_vsetq_lane_f64:
5684     // The vector type needs a cast for the v2f64 variant.
5685     Ops[1] = Builder.CreateBitCast(Ops[1],
5686         llvm::VectorType::get(DoubleTy, 2));
5687     Ops.push_back(EmitScalarExpr(E->getArg(2)));
5688     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5689
5690   case NEON::BI__builtin_neon_vget_lane_i8:
5691   case NEON::BI__builtin_neon_vdupb_lane_i8:
5692     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8));
5693     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5694                                         "vget_lane");
5695   case NEON::BI__builtin_neon_vgetq_lane_i8:
5696   case NEON::BI__builtin_neon_vdupb_laneq_i8:
5697     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16));
5698     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5699                                         "vgetq_lane");
5700   case NEON::BI__builtin_neon_vget_lane_i16:
5701   case NEON::BI__builtin_neon_vduph_lane_i16:
5702     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4));
5703     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5704                                         "vget_lane");
5705   case NEON::BI__builtin_neon_vgetq_lane_i16:
5706   case NEON::BI__builtin_neon_vduph_laneq_i16:
5707     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8));
5708     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5709                                         "vgetq_lane");
5710   case NEON::BI__builtin_neon_vget_lane_i32:
5711   case NEON::BI__builtin_neon_vdups_lane_i32:
5712     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2));
5713     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5714                                         "vget_lane");
5715   case NEON::BI__builtin_neon_vdups_lane_f32:
5716     Ops[0] = Builder.CreateBitCast(Ops[0],
5717         llvm::VectorType::get(FloatTy, 2));
5718     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5719                                         "vdups_lane");
5720   case NEON::BI__builtin_neon_vgetq_lane_i32:
5721   case NEON::BI__builtin_neon_vdups_laneq_i32:
5722     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
5723     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5724                                         "vgetq_lane");
5725   case NEON::BI__builtin_neon_vget_lane_i64:
5726   case NEON::BI__builtin_neon_vdupd_lane_i64:
5727     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1));
5728     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5729                                         "vget_lane");
5730   case NEON::BI__builtin_neon_vdupd_lane_f64:
5731     Ops[0] = Builder.CreateBitCast(Ops[0],
5732         llvm::VectorType::get(DoubleTy, 1));
5733     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5734                                         "vdupd_lane");
5735   case NEON::BI__builtin_neon_vgetq_lane_i64:
5736   case NEON::BI__builtin_neon_vdupd_laneq_i64:
5737     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
5738     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5739                                         "vgetq_lane");
5740   case NEON::BI__builtin_neon_vget_lane_f32:
5741     Ops[0] = Builder.CreateBitCast(Ops[0],
5742         llvm::VectorType::get(FloatTy, 2));
5743     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5744                                         "vget_lane");
5745   case NEON::BI__builtin_neon_vget_lane_f64:
5746     Ops[0] = Builder.CreateBitCast(Ops[0],
5747         llvm::VectorType::get(DoubleTy, 1));
5748     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5749                                         "vget_lane");
5750   case NEON::BI__builtin_neon_vgetq_lane_f32:
5751   case NEON::BI__builtin_neon_vdups_laneq_f32:
5752     Ops[0] = Builder.CreateBitCast(Ops[0],
5753         llvm::VectorType::get(FloatTy, 4));
5754     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5755                                         "vgetq_lane");
5756   case NEON::BI__builtin_neon_vgetq_lane_f64:
5757   case NEON::BI__builtin_neon_vdupd_laneq_f64:
5758     Ops[0] = Builder.CreateBitCast(Ops[0],
5759         llvm::VectorType::get(DoubleTy, 2));
5760     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5761                                         "vgetq_lane");
5762   case NEON::BI__builtin_neon_vaddd_s64:
5763   case NEON::BI__builtin_neon_vaddd_u64:
5764     return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
5765   case NEON::BI__builtin_neon_vsubd_s64:
5766   case NEON::BI__builtin_neon_vsubd_u64:
5767     return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
5768   case NEON::BI__builtin_neon_vqdmlalh_s16:
5769   case NEON::BI__builtin_neon_vqdmlslh_s16: {
5770     SmallVector<Value *, 2> ProductOps;
5771     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5772     ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
5773     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
5774     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5775                           ProductOps, "vqdmlXl");
5776     Constant *CI = ConstantInt::get(SizeTy, 0);
5777     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5778
5779     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
5780                                         ? Intrinsic::aarch64_neon_sqadd
5781                                         : Intrinsic::aarch64_neon_sqsub;
5782     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
5783   }
5784   case NEON::BI__builtin_neon_vqshlud_n_s64: {
5785     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5786     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5787     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
5788                         Ops, "vqshlu_n");
5789   }
5790   case NEON::BI__builtin_neon_vqshld_n_u64:
5791   case NEON::BI__builtin_neon_vqshld_n_s64: {
5792     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
5793                                    ? Intrinsic::aarch64_neon_uqshl
5794                                    : Intrinsic::aarch64_neon_sqshl;
5795     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5796     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5797     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
5798   }
5799   case NEON::BI__builtin_neon_vrshrd_n_u64:
5800   case NEON::BI__builtin_neon_vrshrd_n_s64: {
5801     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
5802                                    ? Intrinsic::aarch64_neon_urshl
5803                                    : Intrinsic::aarch64_neon_srshl;
5804     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5805     int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
5806     Ops[1] = ConstantInt::get(Int64Ty, -SV);
5807     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
5808   }
5809   case NEON::BI__builtin_neon_vrsrad_n_u64:
5810   case NEON::BI__builtin_neon_vrsrad_n_s64: {
5811     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
5812                                    ? Intrinsic::aarch64_neon_urshl
5813                                    : Intrinsic::aarch64_neon_srshl;
5814     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5815     Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
5816     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
5817                                 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
5818     return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
5819   }
5820   case NEON::BI__builtin_neon_vshld_n_s64:
5821   case NEON::BI__builtin_neon_vshld_n_u64: {
5822     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5823     return Builder.CreateShl(
5824         Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
5825   }
5826   case NEON::BI__builtin_neon_vshrd_n_s64: {
5827     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5828     return Builder.CreateAShr(
5829         Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
5830                                                    Amt->getZExtValue())),
5831         "shrd_n");
5832   }
5833   case NEON::BI__builtin_neon_vshrd_n_u64: {
5834     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5835     uint64_t ShiftAmt = Amt->getZExtValue();
5836     // Right-shifting an unsigned value by its size yields 0.
5837     if (ShiftAmt == 64)
5838       return ConstantInt::get(Int64Ty, 0);
5839     return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
5840                               "shrd_n");
5841   }
5842   case NEON::BI__builtin_neon_vsrad_n_s64: {
5843     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
5844     Ops[1] = Builder.CreateAShr(
5845         Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
5846                                                    Amt->getZExtValue())),
5847         "shrd_n");
5848     return Builder.CreateAdd(Ops[0], Ops[1]);
5849   }
5850   case NEON::BI__builtin_neon_vsrad_n_u64: {
5851     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
5852     uint64_t ShiftAmt = Amt->getZExtValue();
5853     // Right-shifting an unsigned value by its size yields 0.
5854     // As Op + 0 = Op, return Ops[0] directly.
5855     if (ShiftAmt == 64)
5856       return Ops[0];
5857     Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
5858                                 "shrd_n");
5859     return Builder.CreateAdd(Ops[0], Ops[1]);
5860   }
5861   case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
5862   case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
5863   case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
5864   case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
5865     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
5866                                           "lane");
5867     SmallVector<Value *, 2> ProductOps;
5868     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5869     ProductOps.push_back(vectorWrapScalar16(Ops[2]));
5870     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
5871     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5872                           ProductOps, "vqdmlXl");
5873     Constant *CI = ConstantInt::get(SizeTy, 0);
5874     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5875     Ops.pop_back();
5876
5877     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
5878                        BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
5879                           ? Intrinsic::aarch64_neon_sqadd
5880                           : Intrinsic::aarch64_neon_sqsub;
5881     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
5882   }
5883   case NEON::BI__builtin_neon_vqdmlals_s32:
5884   case NEON::BI__builtin_neon_vqdmlsls_s32: {
5885     SmallVector<Value *, 2> ProductOps;
5886     ProductOps.push_back(Ops[1]);
5887     ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
5888     Ops[1] =
5889         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
5890                      ProductOps, "vqdmlXl");
5891
5892     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
5893                                         ? Intrinsic::aarch64_neon_sqadd
5894                                         : Intrinsic::aarch64_neon_sqsub;
5895     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
5896   }
5897   case NEON::BI__builtin_neon_vqdmlals_lane_s32:
5898   case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
5899   case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
5900   case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
5901     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
5902                                           "lane");
5903     SmallVector<Value *, 2> ProductOps;
5904     ProductOps.push_back(Ops[1]);
5905     ProductOps.push_back(Ops[2]);
5906     Ops[1] =
5907         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
5908                      ProductOps, "vqdmlXl");
5909     Ops.pop_back();
5910
5911     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
5912                        BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
5913                           ? Intrinsic::aarch64_neon_sqadd
5914                           : Intrinsic::aarch64_neon_sqsub;
5915     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
5916   }
5917   }
5918
5919   llvm::VectorType *VTy = GetNeonType(this, Type);
5920   llvm::Type *Ty = VTy;
5921   if (!Ty)
5922     return nullptr;
5923
5924   // Not all intrinsics handled by the common case work for AArch64 yet, so only
5925   // defer to common code if it's been added to our special map.
5926   Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
5927                                    AArch64SIMDIntrinsicsProvenSorted);
5928
5929   if (Builtin)
5930     return EmitCommonNeonBuiltinExpr(
5931         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
5932         Builtin->NameHint, Builtin->TypeModifier, E, Ops,
5933         /*never use addresses*/ Address::invalid(), Address::invalid());
5934
5935   if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops))
5936     return V;
5937
5938   unsigned Int;
5939   switch (BuiltinID) {
5940   default: return nullptr;
5941   case NEON::BI__builtin_neon_vbsl_v:
5942   case NEON::BI__builtin_neon_vbslq_v: {
5943     llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
5944     Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
5945     Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
5946     Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
5947
5948     Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
5949     Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
5950     Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
5951     return Builder.CreateBitCast(Ops[0], Ty);
5952   }
5953   case NEON::BI__builtin_neon_vfma_lane_v:
5954   case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
5955     // The ARM builtins (and instructions) have the addend as the first
5956     // operand, but the 'fma' intrinsics have it last. Swap it around here.
5957     Value *Addend = Ops[0];
5958     Value *Multiplicand = Ops[1];
5959     Value *LaneSource = Ops[2];
5960     Ops[0] = Multiplicand;
5961     Ops[1] = LaneSource;
5962     Ops[2] = Addend;
5963
5964     // Now adjust things to handle the lane access.
5965     llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ?
5966       llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) :
5967       VTy;
5968     llvm::Constant *cst = cast<Constant>(Ops[3]);
5969     Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst);
5970     Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
5971     Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
5972
5973     Ops.pop_back();
5974     Int = Intrinsic::fma;
5975     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
5976   }
5977   case NEON::BI__builtin_neon_vfma_laneq_v: {
5978     llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
5979     // v1f64 fma should be mapped to Neon scalar f64 fma
5980     if (VTy && VTy->getElementType() == DoubleTy) {
5981       Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5982       Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
5983       llvm::Type *VTy = GetNeonType(this,
5984         NeonTypeFlags(NeonTypeFlags::Float64, false, true));
5985       Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
5986       Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
5987       Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
5988       Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
5989       return Builder.CreateBitCast(Result, Ty);
5990     }
5991     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
5992     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5993     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5994
5995     llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
5996                                             VTy->getNumElements() * 2);
5997     Ops[2] = Builder.CreateBitCast(Ops[2], STy);
5998     Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
5999                                                cast<ConstantInt>(Ops[3]));
6000     Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
6001
6002     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
6003   }
6004   case NEON::BI__builtin_neon_vfmaq_laneq_v: {
6005     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6006     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6007     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6008
6009     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6010     Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
6011     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
6012   }
6013   case NEON::BI__builtin_neon_vfmas_lane_f32:
6014   case NEON::BI__builtin_neon_vfmas_laneq_f32:
6015   case NEON::BI__builtin_neon_vfmad_lane_f64:
6016   case NEON::BI__builtin_neon_vfmad_laneq_f64: {
6017     Ops.push_back(EmitScalarExpr(E->getArg(3)));
6018     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
6019     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6020     Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6021     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
6022   }
6023   case NEON::BI__builtin_neon_vmull_v:
6024     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6025     Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
6026     if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
6027     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
6028   case NEON::BI__builtin_neon_vmax_v:
6029   case NEON::BI__builtin_neon_vmaxq_v:
6030     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6031     Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
6032     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
6033     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
6034   case NEON::BI__builtin_neon_vmin_v:
6035   case NEON::BI__builtin_neon_vminq_v:
6036     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6037     Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
6038     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
6039     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
6040   case NEON::BI__builtin_neon_vabd_v:
6041   case NEON::BI__builtin_neon_vabdq_v:
6042     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6043     Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
6044     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
6045     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
6046   case NEON::BI__builtin_neon_vpadal_v:
6047   case NEON::BI__builtin_neon_vpadalq_v: {
6048     unsigned ArgElts = VTy->getNumElements();
6049     llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
6050     unsigned BitWidth = EltTy->getBitWidth();
6051     llvm::Type *ArgTy = llvm::VectorType::get(
6052         llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts);
6053     llvm::Type* Tys[2] = { VTy, ArgTy };
6054     Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
6055     SmallVector<llvm::Value*, 1> TmpOps;
6056     TmpOps.push_back(Ops[1]);
6057     Function *F = CGM.getIntrinsic(Int, Tys);
6058     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
6059     llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
6060     return Builder.CreateAdd(tmp, addend);
6061   }
6062   case NEON::BI__builtin_neon_vpmin_v:
6063   case NEON::BI__builtin_neon_vpminq_v:
6064     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6065     Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
6066     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
6067     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
6068   case NEON::BI__builtin_neon_vpmax_v:
6069   case NEON::BI__builtin_neon_vpmaxq_v:
6070     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6071     Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
6072     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
6073     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
6074   case NEON::BI__builtin_neon_vminnm_v:
6075   case NEON::BI__builtin_neon_vminnmq_v:
6076     Int = Intrinsic::aarch64_neon_fminnm;
6077     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
6078   case NEON::BI__builtin_neon_vmaxnm_v:
6079   case NEON::BI__builtin_neon_vmaxnmq_v:
6080     Int = Intrinsic::aarch64_neon_fmaxnm;
6081     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
6082   case NEON::BI__builtin_neon_vrecpss_f32: {
6083     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6084     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
6085                         Ops, "vrecps");
6086   }
6087   case NEON::BI__builtin_neon_vrecpsd_f64: {
6088     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6089     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
6090                         Ops, "vrecps");
6091   }
6092   case NEON::BI__builtin_neon_vqshrun_n_v:
6093     Int = Intrinsic::aarch64_neon_sqshrun;
6094     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
6095   case NEON::BI__builtin_neon_vqrshrun_n_v:
6096     Int = Intrinsic::aarch64_neon_sqrshrun;
6097     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
6098   case NEON::BI__builtin_neon_vqshrn_n_v:
6099     Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
6100     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
6101   case NEON::BI__builtin_neon_vrshrn_n_v:
6102     Int = Intrinsic::aarch64_neon_rshrn;
6103     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
6104   case NEON::BI__builtin_neon_vqrshrn_n_v:
6105     Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
6106     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
6107   case NEON::BI__builtin_neon_vrnda_v:
6108   case NEON::BI__builtin_neon_vrndaq_v: {
6109     Int = Intrinsic::round;
6110     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
6111   }
6112   case NEON::BI__builtin_neon_vrndi_v:
6113   case NEON::BI__builtin_neon_vrndiq_v: {
6114     Int = Intrinsic::nearbyint;
6115     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi");
6116   }
6117   case NEON::BI__builtin_neon_vrndm_v:
6118   case NEON::BI__builtin_neon_vrndmq_v: {
6119     Int = Intrinsic::floor;
6120     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
6121   }
6122   case NEON::BI__builtin_neon_vrndn_v:
6123   case NEON::BI__builtin_neon_vrndnq_v: {
6124     Int = Intrinsic::aarch64_neon_frintn;
6125     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
6126   }
6127   case NEON::BI__builtin_neon_vrndp_v:
6128   case NEON::BI__builtin_neon_vrndpq_v: {
6129     Int = Intrinsic::ceil;
6130     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
6131   }
6132   case NEON::BI__builtin_neon_vrndx_v:
6133   case NEON::BI__builtin_neon_vrndxq_v: {
6134     Int = Intrinsic::rint;
6135     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
6136   }
6137   case NEON::BI__builtin_neon_vrnd_v:
6138   case NEON::BI__builtin_neon_vrndq_v: {
6139     Int = Intrinsic::trunc;
6140     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
6141   }
6142   case NEON::BI__builtin_neon_vceqz_v:
6143   case NEON::BI__builtin_neon_vceqzq_v:
6144     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
6145                                          ICmpInst::ICMP_EQ, "vceqz");
6146   case NEON::BI__builtin_neon_vcgez_v:
6147   case NEON::BI__builtin_neon_vcgezq_v:
6148     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
6149                                          ICmpInst::ICMP_SGE, "vcgez");
6150   case NEON::BI__builtin_neon_vclez_v:
6151   case NEON::BI__builtin_neon_vclezq_v:
6152     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
6153                                          ICmpInst::ICMP_SLE, "vclez");
6154   case NEON::BI__builtin_neon_vcgtz_v:
6155   case NEON::BI__builtin_neon_vcgtzq_v:
6156     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
6157                                          ICmpInst::ICMP_SGT, "vcgtz");
6158   case NEON::BI__builtin_neon_vcltz_v:
6159   case NEON::BI__builtin_neon_vcltzq_v:
6160     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
6161                                          ICmpInst::ICMP_SLT, "vcltz");
6162   case NEON::BI__builtin_neon_vcvt_f64_v:
6163   case NEON::BI__builtin_neon_vcvtq_f64_v:
6164     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6165     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
6166     return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
6167                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
6168   case NEON::BI__builtin_neon_vcvt_f64_f32: {
6169     assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
6170            "unexpected vcvt_f64_f32 builtin");
6171     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
6172     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6173
6174     return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
6175   }
6176   case NEON::BI__builtin_neon_vcvt_f32_f64: {
6177     assert(Type.getEltType() == NeonTypeFlags::Float32 &&
6178            "unexpected vcvt_f32_f64 builtin");
6179     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
6180     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6181
6182     return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
6183   }
6184   case NEON::BI__builtin_neon_vcvt_s32_v:
6185   case NEON::BI__builtin_neon_vcvt_u32_v:
6186   case NEON::BI__builtin_neon_vcvt_s64_v:
6187   case NEON::BI__builtin_neon_vcvt_u64_v:
6188   case NEON::BI__builtin_neon_vcvtq_s32_v:
6189   case NEON::BI__builtin_neon_vcvtq_u32_v:
6190   case NEON::BI__builtin_neon_vcvtq_s64_v:
6191   case NEON::BI__builtin_neon_vcvtq_u64_v: {
6192     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
6193     if (usgn)
6194       return Builder.CreateFPToUI(Ops[0], Ty);
6195     return Builder.CreateFPToSI(Ops[0], Ty);
6196   }
6197   case NEON::BI__builtin_neon_vcvta_s32_v:
6198   case NEON::BI__builtin_neon_vcvtaq_s32_v:
6199   case NEON::BI__builtin_neon_vcvta_u32_v:
6200   case NEON::BI__builtin_neon_vcvtaq_u32_v:
6201   case NEON::BI__builtin_neon_vcvta_s64_v:
6202   case NEON::BI__builtin_neon_vcvtaq_s64_v:
6203   case NEON::BI__builtin_neon_vcvta_u64_v:
6204   case NEON::BI__builtin_neon_vcvtaq_u64_v: {
6205     Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
6206     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6207     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
6208   }
6209   case NEON::BI__builtin_neon_vcvtm_s32_v:
6210   case NEON::BI__builtin_neon_vcvtmq_s32_v:
6211   case NEON::BI__builtin_neon_vcvtm_u32_v:
6212   case NEON::BI__builtin_neon_vcvtmq_u32_v:
6213   case NEON::BI__builtin_neon_vcvtm_s64_v:
6214   case NEON::BI__builtin_neon_vcvtmq_s64_v:
6215   case NEON::BI__builtin_neon_vcvtm_u64_v:
6216   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
6217     Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
6218     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6219     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
6220   }
6221   case NEON::BI__builtin_neon_vcvtn_s32_v:
6222   case NEON::BI__builtin_neon_vcvtnq_s32_v:
6223   case NEON::BI__builtin_neon_vcvtn_u32_v:
6224   case NEON::BI__builtin_neon_vcvtnq_u32_v:
6225   case NEON::BI__builtin_neon_vcvtn_s64_v:
6226   case NEON::BI__builtin_neon_vcvtnq_s64_v:
6227   case NEON::BI__builtin_neon_vcvtn_u64_v:
6228   case NEON::BI__builtin_neon_vcvtnq_u64_v: {
6229     Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
6230     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6231     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
6232   }
6233   case NEON::BI__builtin_neon_vcvtp_s32_v:
6234   case NEON::BI__builtin_neon_vcvtpq_s32_v:
6235   case NEON::BI__builtin_neon_vcvtp_u32_v:
6236   case NEON::BI__builtin_neon_vcvtpq_u32_v:
6237   case NEON::BI__builtin_neon_vcvtp_s64_v:
6238   case NEON::BI__builtin_neon_vcvtpq_s64_v:
6239   case NEON::BI__builtin_neon_vcvtp_u64_v:
6240   case NEON::BI__builtin_neon_vcvtpq_u64_v: {
6241     Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
6242     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6243     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
6244   }
6245   case NEON::BI__builtin_neon_vmulx_v:
6246   case NEON::BI__builtin_neon_vmulxq_v: {
6247     Int = Intrinsic::aarch64_neon_fmulx;
6248     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
6249   }
6250   case NEON::BI__builtin_neon_vmul_lane_v:
6251   case NEON::BI__builtin_neon_vmul_laneq_v: {
6252     // v1f64 vmul_lane should be mapped to Neon scalar mul lane
6253     bool Quad = false;
6254     if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
6255       Quad = true;
6256     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6257     llvm::Type *VTy = GetNeonType(this,
6258       NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
6259     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6260     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
6261     Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
6262     return Builder.CreateBitCast(Result, Ty);
6263   }
6264   case NEON::BI__builtin_neon_vnegd_s64:
6265     return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
6266   case NEON::BI__builtin_neon_vpmaxnm_v:
6267   case NEON::BI__builtin_neon_vpmaxnmq_v: {
6268     Int = Intrinsic::aarch64_neon_fmaxnmp;
6269     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
6270   }
6271   case NEON::BI__builtin_neon_vpminnm_v:
6272   case NEON::BI__builtin_neon_vpminnmq_v: {
6273     Int = Intrinsic::aarch64_neon_fminnmp;
6274     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
6275   }
6276   case NEON::BI__builtin_neon_vsqrt_v:
6277   case NEON::BI__builtin_neon_vsqrtq_v: {
6278     Int = Intrinsic::sqrt;
6279     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6280     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
6281   }
6282   case NEON::BI__builtin_neon_vrbit_v:
6283   case NEON::BI__builtin_neon_vrbitq_v: {
6284     Int = Intrinsic::aarch64_neon_rbit;
6285     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
6286   }
6287   case NEON::BI__builtin_neon_vaddv_u8:
6288     // FIXME: These are handled by the AArch64 scalar code.
6289     usgn = true;
6290     // FALLTHROUGH
6291   case NEON::BI__builtin_neon_vaddv_s8: {
6292     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6293     Ty = Int32Ty;
6294     VTy = llvm::VectorType::get(Int8Ty, 8);
6295     llvm::Type *Tys[2] = { Ty, VTy };
6296     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6297     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6298     return Builder.CreateTrunc(Ops[0], Int8Ty);
6299   }
6300   case NEON::BI__builtin_neon_vaddv_u16:
6301     usgn = true;
6302     // FALLTHROUGH
6303   case NEON::BI__builtin_neon_vaddv_s16: {
6304     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6305     Ty = Int32Ty;
6306     VTy = llvm::VectorType::get(Int16Ty, 4);
6307     llvm::Type *Tys[2] = { Ty, VTy };
6308     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6309     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6310     return Builder.CreateTrunc(Ops[0], Int16Ty);
6311   }
6312   case NEON::BI__builtin_neon_vaddvq_u8:
6313     usgn = true;
6314     // FALLTHROUGH
6315   case NEON::BI__builtin_neon_vaddvq_s8: {
6316     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6317     Ty = Int32Ty;
6318     VTy = llvm::VectorType::get(Int8Ty, 16);
6319     llvm::Type *Tys[2] = { Ty, VTy };
6320     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6321     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6322     return Builder.CreateTrunc(Ops[0], Int8Ty);
6323   }
6324   case NEON::BI__builtin_neon_vaddvq_u16:
6325     usgn = true;
6326     // FALLTHROUGH
6327   case NEON::BI__builtin_neon_vaddvq_s16: {
6328     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6329     Ty = Int32Ty;
6330     VTy = llvm::VectorType::get(Int16Ty, 8);
6331     llvm::Type *Tys[2] = { Ty, VTy };
6332     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6333     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6334     return Builder.CreateTrunc(Ops[0], Int16Ty);
6335   }
6336   case NEON::BI__builtin_neon_vmaxv_u8: {
6337     Int = Intrinsic::aarch64_neon_umaxv;
6338     Ty = Int32Ty;
6339     VTy = llvm::VectorType::get(Int8Ty, 8);
6340     llvm::Type *Tys[2] = { Ty, VTy };
6341     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6342     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6343     return Builder.CreateTrunc(Ops[0], Int8Ty);
6344   }
6345   case NEON::BI__builtin_neon_vmaxv_u16: {
6346     Int = Intrinsic::aarch64_neon_umaxv;
6347     Ty = Int32Ty;
6348     VTy = llvm::VectorType::get(Int16Ty, 4);
6349     llvm::Type *Tys[2] = { Ty, VTy };
6350     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6351     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6352     return Builder.CreateTrunc(Ops[0], Int16Ty);
6353   }
6354   case NEON::BI__builtin_neon_vmaxvq_u8: {
6355     Int = Intrinsic::aarch64_neon_umaxv;
6356     Ty = Int32Ty;
6357     VTy = llvm::VectorType::get(Int8Ty, 16);
6358     llvm::Type *Tys[2] = { Ty, VTy };
6359     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6360     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6361     return Builder.CreateTrunc(Ops[0], Int8Ty);
6362   }
6363   case NEON::BI__builtin_neon_vmaxvq_u16: {
6364     Int = Intrinsic::aarch64_neon_umaxv;
6365     Ty = Int32Ty;
6366     VTy = llvm::VectorType::get(Int16Ty, 8);
6367     llvm::Type *Tys[2] = { Ty, VTy };
6368     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6369     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6370     return Builder.CreateTrunc(Ops[0], Int16Ty);
6371   }
6372   case NEON::BI__builtin_neon_vmaxv_s8: {
6373     Int = Intrinsic::aarch64_neon_smaxv;
6374     Ty = Int32Ty;
6375     VTy = llvm::VectorType::get(Int8Ty, 8);
6376     llvm::Type *Tys[2] = { Ty, VTy };
6377     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6378     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6379     return Builder.CreateTrunc(Ops[0], Int8Ty);
6380   }
6381   case NEON::BI__builtin_neon_vmaxv_s16: {
6382     Int = Intrinsic::aarch64_neon_smaxv;
6383     Ty = Int32Ty;
6384     VTy = llvm::VectorType::get(Int16Ty, 4);
6385     llvm::Type *Tys[2] = { Ty, VTy };
6386     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6387     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6388     return Builder.CreateTrunc(Ops[0], Int16Ty);
6389   }
6390   case NEON::BI__builtin_neon_vmaxvq_s8: {
6391     Int = Intrinsic::aarch64_neon_smaxv;
6392     Ty = Int32Ty;
6393     VTy = llvm::VectorType::get(Int8Ty, 16);
6394     llvm::Type *Tys[2] = { Ty, VTy };
6395     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6396     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6397     return Builder.CreateTrunc(Ops[0], Int8Ty);
6398   }
6399   case NEON::BI__builtin_neon_vmaxvq_s16: {
6400     Int = Intrinsic::aarch64_neon_smaxv;
6401     Ty = Int32Ty;
6402     VTy = llvm::VectorType::get(Int16Ty, 8);
6403     llvm::Type *Tys[2] = { Ty, VTy };
6404     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6405     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6406     return Builder.CreateTrunc(Ops[0], Int16Ty);
6407   }
6408   case NEON::BI__builtin_neon_vminv_u8: {
6409     Int = Intrinsic::aarch64_neon_uminv;
6410     Ty = Int32Ty;
6411     VTy = llvm::VectorType::get(Int8Ty, 8);
6412     llvm::Type *Tys[2] = { Ty, VTy };
6413     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6414     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6415     return Builder.CreateTrunc(Ops[0], Int8Ty);
6416   }
6417   case NEON::BI__builtin_neon_vminv_u16: {
6418     Int = Intrinsic::aarch64_neon_uminv;
6419     Ty = Int32Ty;
6420     VTy = llvm::VectorType::get(Int16Ty, 4);
6421     llvm::Type *Tys[2] = { Ty, VTy };
6422     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6423     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6424     return Builder.CreateTrunc(Ops[0], Int16Ty);
6425   }
6426   case NEON::BI__builtin_neon_vminvq_u8: {
6427     Int = Intrinsic::aarch64_neon_uminv;
6428     Ty = Int32Ty;
6429     VTy = llvm::VectorType::get(Int8Ty, 16);
6430     llvm::Type *Tys[2] = { Ty, VTy };
6431     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6432     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6433     return Builder.CreateTrunc(Ops[0], Int8Ty);
6434   }
6435   case NEON::BI__builtin_neon_vminvq_u16: {
6436     Int = Intrinsic::aarch64_neon_uminv;
6437     Ty = Int32Ty;
6438     VTy = llvm::VectorType::get(Int16Ty, 8);
6439     llvm::Type *Tys[2] = { Ty, VTy };
6440     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6441     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6442     return Builder.CreateTrunc(Ops[0], Int16Ty);
6443   }
6444   case NEON::BI__builtin_neon_vminv_s8: {
6445     Int = Intrinsic::aarch64_neon_sminv;
6446     Ty = Int32Ty;
6447     VTy = llvm::VectorType::get(Int8Ty, 8);
6448     llvm::Type *Tys[2] = { Ty, VTy };
6449     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6450     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6451     return Builder.CreateTrunc(Ops[0], Int8Ty);
6452   }
6453   case NEON::BI__builtin_neon_vminv_s16: {
6454     Int = Intrinsic::aarch64_neon_sminv;
6455     Ty = Int32Ty;
6456     VTy = llvm::VectorType::get(Int16Ty, 4);
6457     llvm::Type *Tys[2] = { Ty, VTy };
6458     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6459     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6460     return Builder.CreateTrunc(Ops[0], Int16Ty);
6461   }
6462   case NEON::BI__builtin_neon_vminvq_s8: {
6463     Int = Intrinsic::aarch64_neon_sminv;
6464     Ty = Int32Ty;
6465     VTy = llvm::VectorType::get(Int8Ty, 16);
6466     llvm::Type *Tys[2] = { Ty, VTy };
6467     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6468     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6469     return Builder.CreateTrunc(Ops[0], Int8Ty);
6470   }
6471   case NEON::BI__builtin_neon_vminvq_s16: {
6472     Int = Intrinsic::aarch64_neon_sminv;
6473     Ty = Int32Ty;
6474     VTy = llvm::VectorType::get(Int16Ty, 8);
6475     llvm::Type *Tys[2] = { Ty, VTy };
6476     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6477     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6478     return Builder.CreateTrunc(Ops[0], Int16Ty);
6479   }
6480   case NEON::BI__builtin_neon_vmul_n_f64: {
6481     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6482     Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
6483     return Builder.CreateFMul(Ops[0], RHS);
6484   }
6485   case NEON::BI__builtin_neon_vaddlv_u8: {
6486     Int = Intrinsic::aarch64_neon_uaddlv;
6487     Ty = Int32Ty;
6488     VTy = llvm::VectorType::get(Int8Ty, 8);
6489     llvm::Type *Tys[2] = { Ty, VTy };
6490     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6491     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6492     return Builder.CreateTrunc(Ops[0], Int16Ty);
6493   }
6494   case NEON::BI__builtin_neon_vaddlv_u16: {
6495     Int = Intrinsic::aarch64_neon_uaddlv;
6496     Ty = Int32Ty;
6497     VTy = llvm::VectorType::get(Int16Ty, 4);
6498     llvm::Type *Tys[2] = { Ty, VTy };
6499     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6500     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6501   }
6502   case NEON::BI__builtin_neon_vaddlvq_u8: {
6503     Int = Intrinsic::aarch64_neon_uaddlv;
6504     Ty = Int32Ty;
6505     VTy = llvm::VectorType::get(Int8Ty, 16);
6506     llvm::Type *Tys[2] = { Ty, VTy };
6507     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6508     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6509     return Builder.CreateTrunc(Ops[0], Int16Ty);
6510   }
6511   case NEON::BI__builtin_neon_vaddlvq_u16: {
6512     Int = Intrinsic::aarch64_neon_uaddlv;
6513     Ty = Int32Ty;
6514     VTy = llvm::VectorType::get(Int16Ty, 8);
6515     llvm::Type *Tys[2] = { Ty, VTy };
6516     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6517     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6518   }
6519   case NEON::BI__builtin_neon_vaddlv_s8: {
6520     Int = Intrinsic::aarch64_neon_saddlv;
6521     Ty = Int32Ty;
6522     VTy = llvm::VectorType::get(Int8Ty, 8);
6523     llvm::Type *Tys[2] = { Ty, VTy };
6524     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6525     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6526     return Builder.CreateTrunc(Ops[0], Int16Ty);
6527   }
6528   case NEON::BI__builtin_neon_vaddlv_s16: {
6529     Int = Intrinsic::aarch64_neon_saddlv;
6530     Ty = Int32Ty;
6531     VTy = llvm::VectorType::get(Int16Ty, 4);
6532     llvm::Type *Tys[2] = { Ty, VTy };
6533     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6534     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6535   }
6536   case NEON::BI__builtin_neon_vaddlvq_s8: {
6537     Int = Intrinsic::aarch64_neon_saddlv;
6538     Ty = Int32Ty;
6539     VTy = llvm::VectorType::get(Int8Ty, 16);
6540     llvm::Type *Tys[2] = { Ty, VTy };
6541     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6542     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6543     return Builder.CreateTrunc(Ops[0], Int16Ty);
6544   }
6545   case NEON::BI__builtin_neon_vaddlvq_s16: {
6546     Int = Intrinsic::aarch64_neon_saddlv;
6547     Ty = Int32Ty;
6548     VTy = llvm::VectorType::get(Int16Ty, 8);
6549     llvm::Type *Tys[2] = { Ty, VTy };
6550     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6551     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6552   }
6553   case NEON::BI__builtin_neon_vsri_n_v:
6554   case NEON::BI__builtin_neon_vsriq_n_v: {
6555     Int = Intrinsic::aarch64_neon_vsri;
6556     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6557     return EmitNeonCall(Intrin, Ops, "vsri_n");
6558   }
6559   case NEON::BI__builtin_neon_vsli_n_v:
6560   case NEON::BI__builtin_neon_vsliq_n_v: {
6561     Int = Intrinsic::aarch64_neon_vsli;
6562     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6563     return EmitNeonCall(Intrin, Ops, "vsli_n");
6564   }
6565   case NEON::BI__builtin_neon_vsra_n_v:
6566   case NEON::BI__builtin_neon_vsraq_n_v:
6567     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6568     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
6569     return Builder.CreateAdd(Ops[0], Ops[1]);
6570   case NEON::BI__builtin_neon_vrsra_n_v:
6571   case NEON::BI__builtin_neon_vrsraq_n_v: {
6572     Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
6573     SmallVector<llvm::Value*,2> TmpOps;
6574     TmpOps.push_back(Ops[1]);
6575     TmpOps.push_back(Ops[2]);
6576     Function* F = CGM.getIntrinsic(Int, Ty);
6577     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
6578     Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
6579     return Builder.CreateAdd(Ops[0], tmp);
6580   }
6581     // FIXME: Sharing loads & stores with 32-bit is complicated by the absence
6582     // of an Align parameter here.
6583   case NEON::BI__builtin_neon_vld1_x2_v:
6584   case NEON::BI__builtin_neon_vld1q_x2_v:
6585   case NEON::BI__builtin_neon_vld1_x3_v:
6586   case NEON::BI__builtin_neon_vld1q_x3_v:
6587   case NEON::BI__builtin_neon_vld1_x4_v:
6588   case NEON::BI__builtin_neon_vld1q_x4_v: {
6589     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
6590     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6591     llvm::Type *Tys[2] = { VTy, PTy };
6592     unsigned Int;
6593     switch (BuiltinID) {
6594     case NEON::BI__builtin_neon_vld1_x2_v:
6595     case NEON::BI__builtin_neon_vld1q_x2_v:
6596       Int = Intrinsic::aarch64_neon_ld1x2;
6597       break;
6598     case NEON::BI__builtin_neon_vld1_x3_v:
6599     case NEON::BI__builtin_neon_vld1q_x3_v:
6600       Int = Intrinsic::aarch64_neon_ld1x3;
6601       break;
6602     case NEON::BI__builtin_neon_vld1_x4_v:
6603     case NEON::BI__builtin_neon_vld1q_x4_v:
6604       Int = Intrinsic::aarch64_neon_ld1x4;
6605       break;
6606     }
6607     Function *F = CGM.getIntrinsic(Int, Tys);
6608     Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
6609     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6610     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6611     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6612   }
6613   case NEON::BI__builtin_neon_vst1_x2_v:
6614   case NEON::BI__builtin_neon_vst1q_x2_v:
6615   case NEON::BI__builtin_neon_vst1_x3_v:
6616   case NEON::BI__builtin_neon_vst1q_x3_v:
6617   case NEON::BI__builtin_neon_vst1_x4_v:
6618   case NEON::BI__builtin_neon_vst1q_x4_v: {
6619     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
6620     llvm::Type *Tys[2] = { VTy, PTy };
6621     unsigned Int;
6622     switch (BuiltinID) {
6623     case NEON::BI__builtin_neon_vst1_x2_v:
6624     case NEON::BI__builtin_neon_vst1q_x2_v:
6625       Int = Intrinsic::aarch64_neon_st1x2;
6626       break;
6627     case NEON::BI__builtin_neon_vst1_x3_v:
6628     case NEON::BI__builtin_neon_vst1q_x3_v:
6629       Int = Intrinsic::aarch64_neon_st1x3;
6630       break;
6631     case NEON::BI__builtin_neon_vst1_x4_v:
6632     case NEON::BI__builtin_neon_vst1q_x4_v:
6633       Int = Intrinsic::aarch64_neon_st1x4;
6634       break;
6635     }
6636     std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6637     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
6638   }
6639   case NEON::BI__builtin_neon_vld1_v:
6640   case NEON::BI__builtin_neon_vld1q_v: {
6641     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
6642     auto Alignment = CharUnits::fromQuantity(
6643         BuiltinID == NEON::BI__builtin_neon_vld1_v ? 8 : 16);
6644     return Builder.CreateAlignedLoad(VTy, Ops[0], Alignment);
6645   }
6646   case NEON::BI__builtin_neon_vst1_v:
6647   case NEON::BI__builtin_neon_vst1q_v:
6648     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
6649     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6650     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6651   case NEON::BI__builtin_neon_vld1_lane_v:
6652   case NEON::BI__builtin_neon_vld1q_lane_v: {
6653     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6654     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
6655     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6656     auto Alignment = CharUnits::fromQuantity(
6657         BuiltinID == NEON::BI__builtin_neon_vld1_lane_v ? 8 : 16);
6658     Ops[0] =
6659         Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
6660     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
6661   }
6662   case NEON::BI__builtin_neon_vld1_dup_v:
6663   case NEON::BI__builtin_neon_vld1q_dup_v: {
6664     Value *V = UndefValue::get(Ty);
6665     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
6666     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6667     auto Alignment = CharUnits::fromQuantity(
6668         BuiltinID == NEON::BI__builtin_neon_vld1_dup_v ? 8 : 16);
6669     Ops[0] =
6670         Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
6671     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
6672     Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
6673     return EmitNeonSplat(Ops[0], CI);
6674   }
6675   case NEON::BI__builtin_neon_vst1_lane_v:
6676   case NEON::BI__builtin_neon_vst1q_lane_v:
6677     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6678     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
6679     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6680     return Builder.CreateDefaultAlignedStore(Ops[1],
6681                                              Builder.CreateBitCast(Ops[0], Ty));
6682   case NEON::BI__builtin_neon_vld2_v:
6683   case NEON::BI__builtin_neon_vld2q_v: {
6684     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6685     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6686     llvm::Type *Tys[2] = { VTy, PTy };
6687     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
6688     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6689     Ops[0] = Builder.CreateBitCast(Ops[0],
6690                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6691     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6692   }
6693   case NEON::BI__builtin_neon_vld3_v:
6694   case NEON::BI__builtin_neon_vld3q_v: {
6695     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6696     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6697     llvm::Type *Tys[2] = { VTy, PTy };
6698     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
6699     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6700     Ops[0] = Builder.CreateBitCast(Ops[0],
6701                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6702     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6703   }
6704   case NEON::BI__builtin_neon_vld4_v:
6705   case NEON::BI__builtin_neon_vld4q_v: {
6706     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6707     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6708     llvm::Type *Tys[2] = { VTy, PTy };
6709     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
6710     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6711     Ops[0] = Builder.CreateBitCast(Ops[0],
6712                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6713     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6714   }
6715   case NEON::BI__builtin_neon_vld2_dup_v:
6716   case NEON::BI__builtin_neon_vld2q_dup_v: {
6717     llvm::Type *PTy =
6718       llvm::PointerType::getUnqual(VTy->getElementType());
6719     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6720     llvm::Type *Tys[2] = { VTy, PTy };
6721     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
6722     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6723     Ops[0] = Builder.CreateBitCast(Ops[0],
6724                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6725     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6726   }
6727   case NEON::BI__builtin_neon_vld3_dup_v:
6728   case NEON::BI__builtin_neon_vld3q_dup_v: {
6729     llvm::Type *PTy =
6730       llvm::PointerType::getUnqual(VTy->getElementType());
6731     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6732     llvm::Type *Tys[2] = { VTy, PTy };
6733     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
6734     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6735     Ops[0] = Builder.CreateBitCast(Ops[0],
6736                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6737     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6738   }
6739   case NEON::BI__builtin_neon_vld4_dup_v:
6740   case NEON::BI__builtin_neon_vld4q_dup_v: {
6741     llvm::Type *PTy =
6742       llvm::PointerType::getUnqual(VTy->getElementType());
6743     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6744     llvm::Type *Tys[2] = { VTy, PTy };
6745     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
6746     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6747     Ops[0] = Builder.CreateBitCast(Ops[0],
6748                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6749     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6750   }
6751   case NEON::BI__builtin_neon_vld2_lane_v:
6752   case NEON::BI__builtin_neon_vld2q_lane_v: {
6753     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6754     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
6755     Ops.push_back(Ops[1]);
6756     Ops.erase(Ops.begin()+1);
6757     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6758     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6759     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6760     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
6761     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6762     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6763     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6764   }
6765   case NEON::BI__builtin_neon_vld3_lane_v:
6766   case NEON::BI__builtin_neon_vld3q_lane_v: {
6767     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6768     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
6769     Ops.push_back(Ops[1]);
6770     Ops.erase(Ops.begin()+1);
6771     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6772     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6773     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6774     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
6775     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
6776     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6777     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6778     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6779   }
6780   case NEON::BI__builtin_neon_vld4_lane_v:
6781   case NEON::BI__builtin_neon_vld4q_lane_v: {
6782     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6783     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
6784     Ops.push_back(Ops[1]);
6785     Ops.erase(Ops.begin()+1);
6786     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6787     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6788     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6789     Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
6790     Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
6791     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane");
6792     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6793     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6794     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6795   }
6796   case NEON::BI__builtin_neon_vst2_v:
6797   case NEON::BI__builtin_neon_vst2q_v: {
6798     Ops.push_back(Ops[0]);
6799     Ops.erase(Ops.begin());
6800     llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
6801     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
6802                         Ops, "");
6803   }
6804   case NEON::BI__builtin_neon_vst2_lane_v:
6805   case NEON::BI__builtin_neon_vst2q_lane_v: {
6806     Ops.push_back(Ops[0]);
6807     Ops.erase(Ops.begin());
6808     Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
6809     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6810     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
6811                         Ops, "");
6812   }
6813   case NEON::BI__builtin_neon_vst3_v:
6814   case NEON::BI__builtin_neon_vst3q_v: {
6815     Ops.push_back(Ops[0]);
6816     Ops.erase(Ops.begin());
6817     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6818     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
6819                         Ops, "");
6820   }
6821   case NEON::BI__builtin_neon_vst3_lane_v:
6822   case NEON::BI__builtin_neon_vst3q_lane_v: {
6823     Ops.push_back(Ops[0]);
6824     Ops.erase(Ops.begin());
6825     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6826     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6827     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
6828                         Ops, "");
6829   }
6830   case NEON::BI__builtin_neon_vst4_v:
6831   case NEON::BI__builtin_neon_vst4q_v: {
6832     Ops.push_back(Ops[0]);
6833     Ops.erase(Ops.begin());
6834     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6835     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
6836                         Ops, "");
6837   }
6838   case NEON::BI__builtin_neon_vst4_lane_v:
6839   case NEON::BI__builtin_neon_vst4q_lane_v: {
6840     Ops.push_back(Ops[0]);
6841     Ops.erase(Ops.begin());
6842     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
6843     llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
6844     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
6845                         Ops, "");
6846   }
6847   case NEON::BI__builtin_neon_vtrn_v:
6848   case NEON::BI__builtin_neon_vtrnq_v: {
6849     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6850     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6851     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6852     Value *SV = nullptr;
6853
6854     for (unsigned vi = 0; vi != 2; ++vi) {
6855       SmallVector<uint32_t, 16> Indices;
6856       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6857         Indices.push_back(i+vi);
6858         Indices.push_back(i+e+vi);
6859       }
6860       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6861       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
6862       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6863     }
6864     return SV;
6865   }
6866   case NEON::BI__builtin_neon_vuzp_v:
6867   case NEON::BI__builtin_neon_vuzpq_v: {
6868     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6869     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6870     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6871     Value *SV = nullptr;
6872
6873     for (unsigned vi = 0; vi != 2; ++vi) {
6874       SmallVector<uint32_t, 16> Indices;
6875       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
6876         Indices.push_back(2*i+vi);
6877
6878       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6879       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
6880       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6881     }
6882     return SV;
6883   }
6884   case NEON::BI__builtin_neon_vzip_v:
6885   case NEON::BI__builtin_neon_vzipq_v: {
6886     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6887     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6888     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6889     Value *SV = nullptr;
6890
6891     for (unsigned vi = 0; vi != 2; ++vi) {
6892       SmallVector<uint32_t, 16> Indices;
6893       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6894         Indices.push_back((i + vi*e) >> 1);
6895         Indices.push_back(((i + vi*e) >> 1)+e);
6896       }
6897       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6898       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
6899       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6900     }
6901     return SV;
6902   }
6903   case NEON::BI__builtin_neon_vqtbl1q_v: {
6904     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
6905                         Ops, "vtbl1");
6906   }
6907   case NEON::BI__builtin_neon_vqtbl2q_v: {
6908     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
6909                         Ops, "vtbl2");
6910   }
6911   case NEON::BI__builtin_neon_vqtbl3q_v: {
6912     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
6913                         Ops, "vtbl3");
6914   }
6915   case NEON::BI__builtin_neon_vqtbl4q_v: {
6916     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
6917                         Ops, "vtbl4");
6918   }
6919   case NEON::BI__builtin_neon_vqtbx1q_v: {
6920     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
6921                         Ops, "vtbx1");
6922   }
6923   case NEON::BI__builtin_neon_vqtbx2q_v: {
6924     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
6925                         Ops, "vtbx2");
6926   }
6927   case NEON::BI__builtin_neon_vqtbx3q_v: {
6928     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
6929                         Ops, "vtbx3");
6930   }
6931   case NEON::BI__builtin_neon_vqtbx4q_v: {
6932     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
6933                         Ops, "vtbx4");
6934   }
6935   case NEON::BI__builtin_neon_vsqadd_v:
6936   case NEON::BI__builtin_neon_vsqaddq_v: {
6937     Int = Intrinsic::aarch64_neon_usqadd;
6938     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
6939   }
6940   case NEON::BI__builtin_neon_vuqadd_v:
6941   case NEON::BI__builtin_neon_vuqaddq_v: {
6942     Int = Intrinsic::aarch64_neon_suqadd;
6943     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
6944   }
6945   }
6946 }
6947
6948 llvm::Value *CodeGenFunction::
6949 BuildVector(ArrayRef<llvm::Value*> Ops) {
6950   assert((Ops.size() & (Ops.size() - 1)) == 0 &&
6951          "Not a power-of-two sized vector!");
6952   bool AllConstants = true;
6953   for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
6954     AllConstants &= isa<Constant>(Ops[i]);
6955
6956   // If this is a constant vector, create a ConstantVector.
6957   if (AllConstants) {
6958     SmallVector<llvm::Constant*, 16> CstOps;
6959     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
6960       CstOps.push_back(cast<Constant>(Ops[i]));
6961     return llvm::ConstantVector::get(CstOps);
6962   }
6963
6964   // Otherwise, insertelement the values to build the vector.
6965   Value *Result =
6966     llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
6967
6968   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
6969     Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
6970
6971   return Result;
6972 }
6973
6974 // Convert the mask from an integer type to a vector of i1.
6975 static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
6976                               unsigned NumElts) {
6977
6978   llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(),
6979                          cast<IntegerType>(Mask->getType())->getBitWidth());
6980   Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
6981
6982   // If we have less than 8 elements, then the starting mask was an i8 and
6983   // we need to extract down to the right number of elements.
6984   if (NumElts < 8) {
6985     uint32_t Indices[4];
6986     for (unsigned i = 0; i != NumElts; ++i)
6987       Indices[i] = i;
6988     MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec,
6989                                              makeArrayRef(Indices, NumElts),
6990                                              "extract");
6991   }
6992   return MaskVec;
6993 }
6994
6995 static Value *EmitX86MaskedStore(CodeGenFunction &CGF,
6996                                  SmallVectorImpl<Value *> &Ops,
6997                                  unsigned Align) {
6998   // Cast the pointer to right type.
6999   Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
7000                                llvm::PointerType::getUnqual(Ops[1]->getType()));
7001
7002   // If the mask is all ones just emit a regular store.
7003   if (const auto *C = dyn_cast<Constant>(Ops[2]))
7004     if (C->isAllOnesValue())
7005       return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align);
7006
7007   Value *MaskVec = getMaskVecValue(CGF, Ops[2],
7008                                    Ops[1]->getType()->getVectorNumElements());
7009
7010   return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec);
7011 }
7012
7013 static Value *EmitX86MaskedLoad(CodeGenFunction &CGF,
7014                                 SmallVectorImpl<Value *> &Ops, unsigned Align) {
7015   // Cast the pointer to right type.
7016   Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
7017                                llvm::PointerType::getUnqual(Ops[1]->getType()));
7018
7019   // If the mask is all ones just emit a regular store.
7020   if (const auto *C = dyn_cast<Constant>(Ops[2]))
7021     if (C->isAllOnesValue())
7022       return CGF.Builder.CreateAlignedLoad(Ops[0], Align);
7023
7024   Value *MaskVec = getMaskVecValue(CGF, Ops[2],
7025                                    Ops[1]->getType()->getVectorNumElements());
7026
7027   return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]);
7028 }
7029
7030 static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF,
7031                                         SmallVectorImpl<Value *> &Ops,
7032                                         llvm::Type *DstTy,
7033                                         unsigned SrcSizeInBits,
7034                                         unsigned Align) {
7035   // Load the subvector.
7036   Ops[0] = CGF.Builder.CreateAlignedLoad(Ops[0], Align);
7037
7038   // Create broadcast mask.
7039   unsigned NumDstElts = DstTy->getVectorNumElements();
7040   unsigned NumSrcElts = SrcSizeInBits / DstTy->getScalarSizeInBits();
7041
7042   SmallVector<uint32_t, 8> Mask;
7043   for (unsigned i = 0; i != NumDstElts; i += NumSrcElts)
7044     for (unsigned j = 0; j != NumSrcElts; ++j)
7045       Mask.push_back(j);
7046
7047   return CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], Mask, "subvecbcst");
7048 }
7049
7050 static Value *EmitX86Select(CodeGenFunction &CGF,
7051                             Value *Mask, Value *Op0, Value *Op1) {
7052
7053   // If the mask is all ones just return first argument.
7054   if (const auto *C = dyn_cast<Constant>(Mask))
7055     if (C->isAllOnesValue())
7056       return Op0;
7057
7058   Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements());
7059
7060   return CGF.Builder.CreateSelect(Mask, Op0, Op1);
7061 }
7062
7063 static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
7064                                    bool Signed, SmallVectorImpl<Value *> &Ops) {
7065   unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
7066   Value *Cmp;
7067
7068   if (CC == 3) {
7069     Cmp = Constant::getNullValue(
7070                        llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
7071   } else if (CC == 7) {
7072     Cmp = Constant::getAllOnesValue(
7073                        llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
7074   } else {
7075     ICmpInst::Predicate Pred;
7076     switch (CC) {
7077     default: llvm_unreachable("Unknown condition code");
7078     case 0: Pred = ICmpInst::ICMP_EQ;  break;
7079     case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
7080     case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
7081     case 4: Pred = ICmpInst::ICMP_NE;  break;
7082     case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
7083     case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
7084     }
7085     Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
7086   }
7087
7088   const auto *C = dyn_cast<Constant>(Ops.back());
7089   if (!C || !C->isAllOnesValue())
7090     Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), NumElts));
7091
7092   if (NumElts < 8) {
7093     uint32_t Indices[8];
7094     for (unsigned i = 0; i != NumElts; ++i)
7095       Indices[i] = i;
7096     for (unsigned i = NumElts; i != 8; ++i)
7097       Indices[i] = i % NumElts + NumElts;
7098     Cmp = CGF.Builder.CreateShuffleVector(
7099         Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
7100   }
7101   return CGF.Builder.CreateBitCast(Cmp,
7102                                    IntegerType::get(CGF.getLLVMContext(),
7103                                                     std::max(NumElts, 8U)));
7104 }
7105
7106 static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred,
7107                             ArrayRef<Value *> Ops) {
7108   Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
7109   Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
7110
7111   if (Ops.size() == 2)
7112     return Res;
7113
7114   assert(Ops.size() == 4);
7115   return EmitX86Select(CGF, Ops[3], Res, Ops[2]);
7116 }
7117
7118 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
7119                                            const CallExpr *E) {
7120   if (BuiltinID == X86::BI__builtin_ms_va_start ||
7121       BuiltinID == X86::BI__builtin_ms_va_end)
7122     return EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
7123                           BuiltinID == X86::BI__builtin_ms_va_start);
7124   if (BuiltinID == X86::BI__builtin_ms_va_copy) {
7125     // Lower this manually. We can't reliably determine whether or not any
7126     // given va_copy() is for a Win64 va_list from the calling convention
7127     // alone, because it's legal to do this from a System V ABI function.
7128     // With opaque pointer types, we won't have enough information in LLVM
7129     // IR to determine this from the argument types, either. Best to do it
7130     // now, while we have enough information.
7131     Address DestAddr = EmitMSVAListRef(E->getArg(0));
7132     Address SrcAddr = EmitMSVAListRef(E->getArg(1));
7133
7134     llvm::Type *BPP = Int8PtrPtrTy;
7135
7136     DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
7137                        DestAddr.getAlignment());
7138     SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
7139                       SrcAddr.getAlignment());
7140
7141     Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
7142     return Builder.CreateStore(ArgPtr, DestAddr);
7143   }
7144
7145   SmallVector<Value*, 4> Ops;
7146
7147   // Find out if any arguments are required to be integer constant expressions.
7148   unsigned ICEArguments = 0;
7149   ASTContext::GetBuiltinTypeError Error;
7150   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
7151   assert(Error == ASTContext::GE_None && "Should not codegen an error");
7152
7153   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
7154     // If this is a normal argument, just emit it as a scalar.
7155     if ((ICEArguments & (1 << i)) == 0) {
7156       Ops.push_back(EmitScalarExpr(E->getArg(i)));
7157       continue;
7158     }
7159
7160     // If this is required to be a constant, constant fold it so that we know
7161     // that the generated intrinsic gets a ConstantInt.
7162     llvm::APSInt Result;
7163     bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
7164     assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
7165     Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
7166   }
7167
7168   // These exist so that the builtin that takes an immediate can be bounds
7169   // checked by clang to avoid passing bad immediates to the backend. Since
7170   // AVX has a larger immediate than SSE we would need separate builtins to
7171   // do the different bounds checking. Rather than create a clang specific
7172   // SSE only builtin, this implements eight separate builtins to match gcc
7173   // implementation.
7174   auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
7175     Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
7176     llvm::Function *F = CGM.getIntrinsic(ID);
7177     return Builder.CreateCall(F, Ops);
7178   };
7179
7180   // For the vector forms of FP comparisons, translate the builtins directly to
7181   // IR.
7182   // TODO: The builtins could be removed if the SSE header files used vector
7183   // extension comparisons directly (vector ordered/unordered may need
7184   // additional support via __builtin_isnan()).
7185   auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) {
7186     Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
7187     llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
7188     llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
7189     Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
7190     return Builder.CreateBitCast(Sext, FPVecTy);
7191   };
7192
7193   switch (BuiltinID) {
7194   default: return nullptr;
7195   case X86::BI__builtin_cpu_supports: {
7196     const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
7197     StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
7198
7199     // TODO: When/if this becomes more than x86 specific then use a TargetInfo
7200     // based mapping.
7201     // Processor features and mapping to processor feature value.
7202     enum X86Features {
7203       CMOV = 0,
7204       MMX,
7205       POPCNT,
7206       SSE,
7207       SSE2,
7208       SSE3,
7209       SSSE3,
7210       SSE4_1,
7211       SSE4_2,
7212       AVX,
7213       AVX2,
7214       SSE4_A,
7215       FMA4,
7216       XOP,
7217       FMA,
7218       AVX512F,
7219       BMI,
7220       BMI2,
7221       AES,
7222       PCLMUL,
7223       AVX512VL,
7224       AVX512BW,
7225       AVX512DQ,
7226       AVX512CD,
7227       AVX512ER,
7228       AVX512PF,
7229       AVX512VBMI,
7230       AVX512IFMA,
7231       MAX
7232     };
7233
7234     X86Features Feature = StringSwitch<X86Features>(FeatureStr)
7235                               .Case("cmov", X86Features::CMOV)
7236                               .Case("mmx", X86Features::MMX)
7237                               .Case("popcnt", X86Features::POPCNT)
7238                               .Case("sse", X86Features::SSE)
7239                               .Case("sse2", X86Features::SSE2)
7240                               .Case("sse3", X86Features::SSE3)
7241                               .Case("ssse3", X86Features::SSSE3)
7242                               .Case("sse4.1", X86Features::SSE4_1)
7243                               .Case("sse4.2", X86Features::SSE4_2)
7244                               .Case("avx", X86Features::AVX)
7245                               .Case("avx2", X86Features::AVX2)
7246                               .Case("sse4a", X86Features::SSE4_A)
7247                               .Case("fma4", X86Features::FMA4)
7248                               .Case("xop", X86Features::XOP)
7249                               .Case("fma", X86Features::FMA)
7250                               .Case("avx512f", X86Features::AVX512F)
7251                               .Case("bmi", X86Features::BMI)
7252                               .Case("bmi2", X86Features::BMI2)
7253                               .Case("aes", X86Features::AES)
7254                               .Case("pclmul", X86Features::PCLMUL)
7255                               .Case("avx512vl", X86Features::AVX512VL)
7256                               .Case("avx512bw", X86Features::AVX512BW)
7257                               .Case("avx512dq", X86Features::AVX512DQ)
7258                               .Case("avx512cd", X86Features::AVX512CD)
7259                               .Case("avx512er", X86Features::AVX512ER)
7260                               .Case("avx512pf", X86Features::AVX512PF)
7261                               .Case("avx512vbmi", X86Features::AVX512VBMI)
7262                               .Case("avx512ifma", X86Features::AVX512IFMA)
7263                               .Default(X86Features::MAX);
7264     assert(Feature != X86Features::MAX && "Invalid feature!");
7265
7266     // Matching the struct layout from the compiler-rt/libgcc structure that is
7267     // filled in:
7268     // unsigned int __cpu_vendor;
7269     // unsigned int __cpu_type;
7270     // unsigned int __cpu_subtype;
7271     // unsigned int __cpu_features[1];
7272     llvm::Type *STy = llvm::StructType::get(
7273         Int32Ty, Int32Ty, Int32Ty, llvm::ArrayType::get(Int32Ty, 1), nullptr);
7274
7275     // Grab the global __cpu_model.
7276     llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
7277
7278     // Grab the first (0th) element from the field __cpu_features off of the
7279     // global in the struct STy.
7280     Value *Idxs[] = {
7281       ConstantInt::get(Int32Ty, 0),
7282       ConstantInt::get(Int32Ty, 3),
7283       ConstantInt::get(Int32Ty, 0)
7284     };
7285     Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
7286     Value *Features = Builder.CreateAlignedLoad(CpuFeatures,
7287                                                 CharUnits::fromQuantity(4));
7288
7289     // Check the value of the bit corresponding to the feature requested.
7290     Value *Bitset = Builder.CreateAnd(
7291         Features, llvm::ConstantInt::get(Int32Ty, 1ULL << Feature));
7292     return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0));
7293   }
7294   case X86::BI_mm_prefetch: {
7295     Value *Address = Ops[0];
7296     Value *RW = ConstantInt::get(Int32Ty, 0);
7297     Value *Locality = Ops[1];
7298     Value *Data = ConstantInt::get(Int32Ty, 1);
7299     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
7300     return Builder.CreateCall(F, {Address, RW, Locality, Data});
7301   }
7302   case X86::BI_mm_clflush: {
7303     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
7304                               Ops[0]);
7305   }
7306   case X86::BI_mm_lfence: {
7307     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
7308   }
7309   case X86::BI_mm_mfence: {
7310     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
7311   }
7312   case X86::BI_mm_sfence: {
7313     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
7314   }
7315   case X86::BI_mm_pause: {
7316     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
7317   }
7318   case X86::BI__rdtsc: {
7319     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
7320   }
7321   case X86::BI__builtin_ia32_undef128:
7322   case X86::BI__builtin_ia32_undef256:
7323   case X86::BI__builtin_ia32_undef512:
7324     return UndefValue::get(ConvertType(E->getType()));
7325   case X86::BI__builtin_ia32_vec_init_v8qi:
7326   case X86::BI__builtin_ia32_vec_init_v4hi:
7327   case X86::BI__builtin_ia32_vec_init_v2si:
7328     return Builder.CreateBitCast(BuildVector(Ops),
7329                                  llvm::Type::getX86_MMXTy(getLLVMContext()));
7330   case X86::BI__builtin_ia32_vec_ext_v2si:
7331     return Builder.CreateExtractElement(Ops[0],
7332                                   llvm::ConstantInt::get(Ops[1]->getType(), 0));
7333   case X86::BI_mm_setcsr:
7334   case X86::BI__builtin_ia32_ldmxcsr: {
7335     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
7336     Builder.CreateStore(Ops[0], Tmp);
7337     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
7338                           Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
7339   }
7340   case X86::BI_mm_getcsr:
7341   case X86::BI__builtin_ia32_stmxcsr: {
7342     Address Tmp = CreateMemTemp(E->getType());
7343     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
7344                        Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
7345     return Builder.CreateLoad(Tmp, "stmxcsr");
7346   }
7347   case X86::BI__builtin_ia32_xsave:
7348   case X86::BI__builtin_ia32_xsave64:
7349   case X86::BI__builtin_ia32_xrstor:
7350   case X86::BI__builtin_ia32_xrstor64:
7351   case X86::BI__builtin_ia32_xsaveopt:
7352   case X86::BI__builtin_ia32_xsaveopt64:
7353   case X86::BI__builtin_ia32_xrstors:
7354   case X86::BI__builtin_ia32_xrstors64:
7355   case X86::BI__builtin_ia32_xsavec:
7356   case X86::BI__builtin_ia32_xsavec64:
7357   case X86::BI__builtin_ia32_xsaves:
7358   case X86::BI__builtin_ia32_xsaves64: {
7359     Intrinsic::ID ID;
7360 #define INTRINSIC_X86_XSAVE_ID(NAME) \
7361     case X86::BI__builtin_ia32_##NAME: \
7362       ID = Intrinsic::x86_##NAME; \
7363       break
7364     switch (BuiltinID) {
7365     default: llvm_unreachable("Unsupported intrinsic!");
7366     INTRINSIC_X86_XSAVE_ID(xsave);
7367     INTRINSIC_X86_XSAVE_ID(xsave64);
7368     INTRINSIC_X86_XSAVE_ID(xrstor);
7369     INTRINSIC_X86_XSAVE_ID(xrstor64);
7370     INTRINSIC_X86_XSAVE_ID(xsaveopt);
7371     INTRINSIC_X86_XSAVE_ID(xsaveopt64);
7372     INTRINSIC_X86_XSAVE_ID(xrstors);
7373     INTRINSIC_X86_XSAVE_ID(xrstors64);
7374     INTRINSIC_X86_XSAVE_ID(xsavec);
7375     INTRINSIC_X86_XSAVE_ID(xsavec64);
7376     INTRINSIC_X86_XSAVE_ID(xsaves);
7377     INTRINSIC_X86_XSAVE_ID(xsaves64);
7378     }
7379 #undef INTRINSIC_X86_XSAVE_ID
7380     Value *Mhi = Builder.CreateTrunc(
7381       Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
7382     Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
7383     Ops[1] = Mhi;
7384     Ops.push_back(Mlo);
7385     return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
7386   }
7387   case X86::BI__builtin_ia32_storedqudi128_mask:
7388   case X86::BI__builtin_ia32_storedqusi128_mask:
7389   case X86::BI__builtin_ia32_storedquhi128_mask:
7390   case X86::BI__builtin_ia32_storedquqi128_mask:
7391   case X86::BI__builtin_ia32_storeupd128_mask:
7392   case X86::BI__builtin_ia32_storeups128_mask:
7393   case X86::BI__builtin_ia32_storedqudi256_mask:
7394   case X86::BI__builtin_ia32_storedqusi256_mask:
7395   case X86::BI__builtin_ia32_storedquhi256_mask:
7396   case X86::BI__builtin_ia32_storedquqi256_mask:
7397   case X86::BI__builtin_ia32_storeupd256_mask:
7398   case X86::BI__builtin_ia32_storeups256_mask:
7399   case X86::BI__builtin_ia32_storedqudi512_mask:
7400   case X86::BI__builtin_ia32_storedqusi512_mask:
7401   case X86::BI__builtin_ia32_storedquhi512_mask:
7402   case X86::BI__builtin_ia32_storedquqi512_mask:
7403   case X86::BI__builtin_ia32_storeupd512_mask:
7404   case X86::BI__builtin_ia32_storeups512_mask:
7405     return EmitX86MaskedStore(*this, Ops, 1);
7406
7407   case X86::BI__builtin_ia32_storess128_mask:
7408   case X86::BI__builtin_ia32_storesd128_mask: {
7409     return EmitX86MaskedStore(*this, Ops, 16);
7410   }
7411   case X86::BI__builtin_ia32_movdqa32store128_mask:
7412   case X86::BI__builtin_ia32_movdqa64store128_mask:
7413   case X86::BI__builtin_ia32_storeaps128_mask:
7414   case X86::BI__builtin_ia32_storeapd128_mask:
7415   case X86::BI__builtin_ia32_movdqa32store256_mask:
7416   case X86::BI__builtin_ia32_movdqa64store256_mask:
7417   case X86::BI__builtin_ia32_storeaps256_mask:
7418   case X86::BI__builtin_ia32_storeapd256_mask:
7419   case X86::BI__builtin_ia32_movdqa32store512_mask:
7420   case X86::BI__builtin_ia32_movdqa64store512_mask:
7421   case X86::BI__builtin_ia32_storeaps512_mask:
7422   case X86::BI__builtin_ia32_storeapd512_mask: {
7423     unsigned Align =
7424       getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
7425     return EmitX86MaskedStore(*this, Ops, Align);
7426   }
7427   case X86::BI__builtin_ia32_loadups128_mask:
7428   case X86::BI__builtin_ia32_loadups256_mask:
7429   case X86::BI__builtin_ia32_loadups512_mask:
7430   case X86::BI__builtin_ia32_loadupd128_mask:
7431   case X86::BI__builtin_ia32_loadupd256_mask:
7432   case X86::BI__builtin_ia32_loadupd512_mask:
7433   case X86::BI__builtin_ia32_loaddquqi128_mask:
7434   case X86::BI__builtin_ia32_loaddquqi256_mask:
7435   case X86::BI__builtin_ia32_loaddquqi512_mask:
7436   case X86::BI__builtin_ia32_loaddquhi128_mask:
7437   case X86::BI__builtin_ia32_loaddquhi256_mask:
7438   case X86::BI__builtin_ia32_loaddquhi512_mask:
7439   case X86::BI__builtin_ia32_loaddqusi128_mask:
7440   case X86::BI__builtin_ia32_loaddqusi256_mask:
7441   case X86::BI__builtin_ia32_loaddqusi512_mask:
7442   case X86::BI__builtin_ia32_loaddqudi128_mask:
7443   case X86::BI__builtin_ia32_loaddqudi256_mask:
7444   case X86::BI__builtin_ia32_loaddqudi512_mask:
7445     return EmitX86MaskedLoad(*this, Ops, 1);
7446
7447   case X86::BI__builtin_ia32_loadss128_mask:
7448   case X86::BI__builtin_ia32_loadsd128_mask:
7449     return EmitX86MaskedLoad(*this, Ops, 16);
7450
7451   case X86::BI__builtin_ia32_loadaps128_mask:
7452   case X86::BI__builtin_ia32_loadaps256_mask:
7453   case X86::BI__builtin_ia32_loadaps512_mask:
7454   case X86::BI__builtin_ia32_loadapd128_mask:
7455   case X86::BI__builtin_ia32_loadapd256_mask:
7456   case X86::BI__builtin_ia32_loadapd512_mask:
7457   case X86::BI__builtin_ia32_movdqa32load128_mask:
7458   case X86::BI__builtin_ia32_movdqa32load256_mask:
7459   case X86::BI__builtin_ia32_movdqa32load512_mask:
7460   case X86::BI__builtin_ia32_movdqa64load128_mask:
7461   case X86::BI__builtin_ia32_movdqa64load256_mask:
7462   case X86::BI__builtin_ia32_movdqa64load512_mask: {
7463     unsigned Align =
7464       getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
7465     return EmitX86MaskedLoad(*this, Ops, Align);
7466   }
7467
7468   case X86::BI__builtin_ia32_vbroadcastf128_pd256:
7469   case X86::BI__builtin_ia32_vbroadcastf128_ps256: {
7470     llvm::Type *DstTy = ConvertType(E->getType());
7471     return EmitX86SubVectorBroadcast(*this, Ops, DstTy, 128, 1);
7472   }
7473
7474   case X86::BI__builtin_ia32_storehps:
7475   case X86::BI__builtin_ia32_storelps: {
7476     llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
7477     llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
7478
7479     // cast val v2i64
7480     Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
7481
7482     // extract (0, 1)
7483     unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
7484     llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index);
7485     Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
7486
7487     // cast pointer to i64 & store
7488     Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
7489     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7490   }
7491   case X86::BI__builtin_ia32_palignr128:
7492   case X86::BI__builtin_ia32_palignr256:
7493   case X86::BI__builtin_ia32_palignr512_mask: {
7494     unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
7495
7496     unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
7497     assert(NumElts % 16 == 0);
7498
7499     // If palignr is shifting the pair of vectors more than the size of two
7500     // lanes, emit zero.
7501     if (ShiftVal >= 32)
7502       return llvm::Constant::getNullValue(ConvertType(E->getType()));
7503
7504     // If palignr is shifting the pair of input vectors more than one lane,
7505     // but less than two lanes, convert to shifting in zeroes.
7506     if (ShiftVal > 16) {
7507       ShiftVal -= 16;
7508       Ops[1] = Ops[0];
7509       Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
7510     }
7511
7512     uint32_t Indices[64];
7513     // 256-bit palignr operates on 128-bit lanes so we need to handle that
7514     for (unsigned l = 0; l != NumElts; l += 16) {
7515       for (unsigned i = 0; i != 16; ++i) {
7516         unsigned Idx = ShiftVal + i;
7517         if (Idx >= 16)
7518           Idx += NumElts - 16; // End of lane, switch operand.
7519         Indices[l + i] = Idx + l;
7520       }
7521     }
7522
7523     Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0],
7524                                                makeArrayRef(Indices, NumElts),
7525                                                "palignr");
7526
7527     // If this isn't a masked builtin, just return the align operation.
7528     if (Ops.size() == 3)
7529       return Align;
7530
7531     return EmitX86Select(*this, Ops[4], Align, Ops[3]);
7532   }
7533
7534   case X86::BI__builtin_ia32_movnti:
7535   case X86::BI__builtin_ia32_movnti64:
7536   case X86::BI__builtin_ia32_movntsd:
7537   case X86::BI__builtin_ia32_movntss: {
7538     llvm::MDNode *Node = llvm::MDNode::get(
7539         getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
7540
7541     Value *Ptr = Ops[0];
7542     Value *Src = Ops[1];
7543
7544     // Extract the 0'th element of the source vector.
7545     if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
7546         BuiltinID == X86::BI__builtin_ia32_movntss)
7547       Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
7548
7549     // Convert the type of the pointer to a pointer to the stored type.
7550     Value *BC = Builder.CreateBitCast(
7551         Ptr, llvm::PointerType::getUnqual(Src->getType()), "cast");
7552
7553     // Unaligned nontemporal store of the scalar value.
7554     StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC);
7555     SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
7556     SI->setAlignment(1);
7557     return SI;
7558   }
7559
7560   case X86::BI__builtin_ia32_selectb_128:
7561   case X86::BI__builtin_ia32_selectb_256:
7562   case X86::BI__builtin_ia32_selectb_512:
7563   case X86::BI__builtin_ia32_selectw_128:
7564   case X86::BI__builtin_ia32_selectw_256:
7565   case X86::BI__builtin_ia32_selectw_512:
7566   case X86::BI__builtin_ia32_selectd_128:
7567   case X86::BI__builtin_ia32_selectd_256:
7568   case X86::BI__builtin_ia32_selectd_512:
7569   case X86::BI__builtin_ia32_selectq_128:
7570   case X86::BI__builtin_ia32_selectq_256:
7571   case X86::BI__builtin_ia32_selectq_512:
7572   case X86::BI__builtin_ia32_selectps_128:
7573   case X86::BI__builtin_ia32_selectps_256:
7574   case X86::BI__builtin_ia32_selectps_512:
7575   case X86::BI__builtin_ia32_selectpd_128:
7576   case X86::BI__builtin_ia32_selectpd_256:
7577   case X86::BI__builtin_ia32_selectpd_512:
7578     return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
7579   case X86::BI__builtin_ia32_pcmpeqb128_mask:
7580   case X86::BI__builtin_ia32_pcmpeqb256_mask:
7581   case X86::BI__builtin_ia32_pcmpeqb512_mask:
7582   case X86::BI__builtin_ia32_pcmpeqw128_mask:
7583   case X86::BI__builtin_ia32_pcmpeqw256_mask:
7584   case X86::BI__builtin_ia32_pcmpeqw512_mask:
7585   case X86::BI__builtin_ia32_pcmpeqd128_mask:
7586   case X86::BI__builtin_ia32_pcmpeqd256_mask:
7587   case X86::BI__builtin_ia32_pcmpeqd512_mask:
7588   case X86::BI__builtin_ia32_pcmpeqq128_mask:
7589   case X86::BI__builtin_ia32_pcmpeqq256_mask:
7590   case X86::BI__builtin_ia32_pcmpeqq512_mask:
7591     return EmitX86MaskedCompare(*this, 0, false, Ops);
7592   case X86::BI__builtin_ia32_pcmpgtb128_mask:
7593   case X86::BI__builtin_ia32_pcmpgtb256_mask:
7594   case X86::BI__builtin_ia32_pcmpgtb512_mask:
7595   case X86::BI__builtin_ia32_pcmpgtw128_mask:
7596   case X86::BI__builtin_ia32_pcmpgtw256_mask:
7597   case X86::BI__builtin_ia32_pcmpgtw512_mask:
7598   case X86::BI__builtin_ia32_pcmpgtd128_mask:
7599   case X86::BI__builtin_ia32_pcmpgtd256_mask:
7600   case X86::BI__builtin_ia32_pcmpgtd512_mask:
7601   case X86::BI__builtin_ia32_pcmpgtq128_mask:
7602   case X86::BI__builtin_ia32_pcmpgtq256_mask:
7603   case X86::BI__builtin_ia32_pcmpgtq512_mask:
7604     return EmitX86MaskedCompare(*this, 6, true, Ops);
7605   case X86::BI__builtin_ia32_cmpb128_mask:
7606   case X86::BI__builtin_ia32_cmpb256_mask:
7607   case X86::BI__builtin_ia32_cmpb512_mask:
7608   case X86::BI__builtin_ia32_cmpw128_mask:
7609   case X86::BI__builtin_ia32_cmpw256_mask:
7610   case X86::BI__builtin_ia32_cmpw512_mask:
7611   case X86::BI__builtin_ia32_cmpd128_mask:
7612   case X86::BI__builtin_ia32_cmpd256_mask:
7613   case X86::BI__builtin_ia32_cmpd512_mask:
7614   case X86::BI__builtin_ia32_cmpq128_mask:
7615   case X86::BI__builtin_ia32_cmpq256_mask:
7616   case X86::BI__builtin_ia32_cmpq512_mask: {
7617     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
7618     return EmitX86MaskedCompare(*this, CC, true, Ops);
7619   }
7620   case X86::BI__builtin_ia32_ucmpb128_mask:
7621   case X86::BI__builtin_ia32_ucmpb256_mask:
7622   case X86::BI__builtin_ia32_ucmpb512_mask:
7623   case X86::BI__builtin_ia32_ucmpw128_mask:
7624   case X86::BI__builtin_ia32_ucmpw256_mask:
7625   case X86::BI__builtin_ia32_ucmpw512_mask:
7626   case X86::BI__builtin_ia32_ucmpd128_mask:
7627   case X86::BI__builtin_ia32_ucmpd256_mask:
7628   case X86::BI__builtin_ia32_ucmpd512_mask:
7629   case X86::BI__builtin_ia32_ucmpq128_mask:
7630   case X86::BI__builtin_ia32_ucmpq256_mask:
7631   case X86::BI__builtin_ia32_ucmpq512_mask: {
7632     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
7633     return EmitX86MaskedCompare(*this, CC, false, Ops);
7634   }
7635
7636   case X86::BI__builtin_ia32_vplzcntd_128_mask:
7637   case X86::BI__builtin_ia32_vplzcntd_256_mask:
7638   case X86::BI__builtin_ia32_vplzcntd_512_mask:
7639   case X86::BI__builtin_ia32_vplzcntq_128_mask:
7640   case X86::BI__builtin_ia32_vplzcntq_256_mask:
7641   case X86::BI__builtin_ia32_vplzcntq_512_mask: {
7642     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
7643     return EmitX86Select(*this, Ops[2],
7644                          Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}),
7645                          Ops[1]);
7646   }
7647
7648   case X86::BI__builtin_ia32_pmaxsb128:
7649   case X86::BI__builtin_ia32_pmaxsw128:
7650   case X86::BI__builtin_ia32_pmaxsd128:
7651   case X86::BI__builtin_ia32_pmaxsq128_mask:
7652   case X86::BI__builtin_ia32_pmaxsb256:
7653   case X86::BI__builtin_ia32_pmaxsw256:
7654   case X86::BI__builtin_ia32_pmaxsd256:
7655   case X86::BI__builtin_ia32_pmaxsq256_mask:
7656   case X86::BI__builtin_ia32_pmaxsb512_mask:
7657   case X86::BI__builtin_ia32_pmaxsw512_mask:
7658   case X86::BI__builtin_ia32_pmaxsd512_mask:
7659   case X86::BI__builtin_ia32_pmaxsq512_mask:
7660     return EmitX86MinMax(*this, ICmpInst::ICMP_SGT, Ops);
7661   case X86::BI__builtin_ia32_pmaxub128:
7662   case X86::BI__builtin_ia32_pmaxuw128:
7663   case X86::BI__builtin_ia32_pmaxud128:
7664   case X86::BI__builtin_ia32_pmaxuq128_mask:
7665   case X86::BI__builtin_ia32_pmaxub256:
7666   case X86::BI__builtin_ia32_pmaxuw256:
7667   case X86::BI__builtin_ia32_pmaxud256:
7668   case X86::BI__builtin_ia32_pmaxuq256_mask:
7669   case X86::BI__builtin_ia32_pmaxub512_mask:
7670   case X86::BI__builtin_ia32_pmaxuw512_mask:
7671   case X86::BI__builtin_ia32_pmaxud512_mask:
7672   case X86::BI__builtin_ia32_pmaxuq512_mask:
7673     return EmitX86MinMax(*this, ICmpInst::ICMP_UGT, Ops);
7674   case X86::BI__builtin_ia32_pminsb128:
7675   case X86::BI__builtin_ia32_pminsw128:
7676   case X86::BI__builtin_ia32_pminsd128:
7677   case X86::BI__builtin_ia32_pminsq128_mask:
7678   case X86::BI__builtin_ia32_pminsb256:
7679   case X86::BI__builtin_ia32_pminsw256:
7680   case X86::BI__builtin_ia32_pminsd256:
7681   case X86::BI__builtin_ia32_pminsq256_mask:
7682   case X86::BI__builtin_ia32_pminsb512_mask:
7683   case X86::BI__builtin_ia32_pminsw512_mask:
7684   case X86::BI__builtin_ia32_pminsd512_mask:
7685   case X86::BI__builtin_ia32_pminsq512_mask:
7686     return EmitX86MinMax(*this, ICmpInst::ICMP_SLT, Ops);
7687   case X86::BI__builtin_ia32_pminub128:
7688   case X86::BI__builtin_ia32_pminuw128:
7689   case X86::BI__builtin_ia32_pminud128:
7690   case X86::BI__builtin_ia32_pminuq128_mask:
7691   case X86::BI__builtin_ia32_pminub256:
7692   case X86::BI__builtin_ia32_pminuw256:
7693   case X86::BI__builtin_ia32_pminud256:
7694   case X86::BI__builtin_ia32_pminuq256_mask:
7695   case X86::BI__builtin_ia32_pminub512_mask:
7696   case X86::BI__builtin_ia32_pminuw512_mask:
7697   case X86::BI__builtin_ia32_pminud512_mask:
7698   case X86::BI__builtin_ia32_pminuq512_mask:
7699     return EmitX86MinMax(*this, ICmpInst::ICMP_ULT, Ops);
7700
7701   // 3DNow!
7702   case X86::BI__builtin_ia32_pswapdsf:
7703   case X86::BI__builtin_ia32_pswapdsi: {
7704     llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
7705     Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
7706     llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
7707     return Builder.CreateCall(F, Ops, "pswapd");
7708   }
7709   case X86::BI__builtin_ia32_rdrand16_step:
7710   case X86::BI__builtin_ia32_rdrand32_step:
7711   case X86::BI__builtin_ia32_rdrand64_step:
7712   case X86::BI__builtin_ia32_rdseed16_step:
7713   case X86::BI__builtin_ia32_rdseed32_step:
7714   case X86::BI__builtin_ia32_rdseed64_step: {
7715     Intrinsic::ID ID;
7716     switch (BuiltinID) {
7717     default: llvm_unreachable("Unsupported intrinsic!");
7718     case X86::BI__builtin_ia32_rdrand16_step:
7719       ID = Intrinsic::x86_rdrand_16;
7720       break;
7721     case X86::BI__builtin_ia32_rdrand32_step:
7722       ID = Intrinsic::x86_rdrand_32;
7723       break;
7724     case X86::BI__builtin_ia32_rdrand64_step:
7725       ID = Intrinsic::x86_rdrand_64;
7726       break;
7727     case X86::BI__builtin_ia32_rdseed16_step:
7728       ID = Intrinsic::x86_rdseed_16;
7729       break;
7730     case X86::BI__builtin_ia32_rdseed32_step:
7731       ID = Intrinsic::x86_rdseed_32;
7732       break;
7733     case X86::BI__builtin_ia32_rdseed64_step:
7734       ID = Intrinsic::x86_rdseed_64;
7735       break;
7736     }
7737
7738     Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
7739     Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
7740                                       Ops[0]);
7741     return Builder.CreateExtractValue(Call, 1);
7742   }
7743
7744   // SSE packed comparison intrinsics
7745   case X86::BI__builtin_ia32_cmpeqps:
7746   case X86::BI__builtin_ia32_cmpeqpd:
7747     return getVectorFCmpIR(CmpInst::FCMP_OEQ);
7748   case X86::BI__builtin_ia32_cmpltps:
7749   case X86::BI__builtin_ia32_cmpltpd:
7750     return getVectorFCmpIR(CmpInst::FCMP_OLT);
7751   case X86::BI__builtin_ia32_cmpleps:
7752   case X86::BI__builtin_ia32_cmplepd:
7753     return getVectorFCmpIR(CmpInst::FCMP_OLE);
7754   case X86::BI__builtin_ia32_cmpunordps:
7755   case X86::BI__builtin_ia32_cmpunordpd:
7756     return getVectorFCmpIR(CmpInst::FCMP_UNO);
7757   case X86::BI__builtin_ia32_cmpneqps:
7758   case X86::BI__builtin_ia32_cmpneqpd:
7759     return getVectorFCmpIR(CmpInst::FCMP_UNE);
7760   case X86::BI__builtin_ia32_cmpnltps:
7761   case X86::BI__builtin_ia32_cmpnltpd:
7762     return getVectorFCmpIR(CmpInst::FCMP_UGE);
7763   case X86::BI__builtin_ia32_cmpnleps:
7764   case X86::BI__builtin_ia32_cmpnlepd:
7765     return getVectorFCmpIR(CmpInst::FCMP_UGT);
7766   case X86::BI__builtin_ia32_cmpordps:
7767   case X86::BI__builtin_ia32_cmpordpd:
7768     return getVectorFCmpIR(CmpInst::FCMP_ORD);
7769   case X86::BI__builtin_ia32_cmpps:
7770   case X86::BI__builtin_ia32_cmpps256:
7771   case X86::BI__builtin_ia32_cmppd:
7772   case X86::BI__builtin_ia32_cmppd256: {
7773     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
7774     // If this one of the SSE immediates, we can use native IR.
7775     if (CC < 8) {
7776       FCmpInst::Predicate Pred;
7777       switch (CC) {
7778       case 0: Pred = FCmpInst::FCMP_OEQ; break;
7779       case 1: Pred = FCmpInst::FCMP_OLT; break;
7780       case 2: Pred = FCmpInst::FCMP_OLE; break;
7781       case 3: Pred = FCmpInst::FCMP_UNO; break;
7782       case 4: Pred = FCmpInst::FCMP_UNE; break;
7783       case 5: Pred = FCmpInst::FCMP_UGE; break;
7784       case 6: Pred = FCmpInst::FCMP_UGT; break;
7785       case 7: Pred = FCmpInst::FCMP_ORD; break;
7786       }
7787       return getVectorFCmpIR(Pred);
7788     }
7789
7790     // We can't handle 8-31 immediates with native IR, use the intrinsic.
7791     Intrinsic::ID ID;
7792     switch (BuiltinID) {
7793     default: llvm_unreachable("Unsupported intrinsic!");
7794     case X86::BI__builtin_ia32_cmpps:
7795       ID = Intrinsic::x86_sse_cmp_ps;
7796       break;
7797     case X86::BI__builtin_ia32_cmpps256:
7798       ID = Intrinsic::x86_avx_cmp_ps_256;
7799       break;
7800     case X86::BI__builtin_ia32_cmppd:
7801       ID = Intrinsic::x86_sse2_cmp_pd;
7802       break;
7803     case X86::BI__builtin_ia32_cmppd256:
7804       ID = Intrinsic::x86_avx_cmp_pd_256;
7805       break;
7806     }
7807
7808     return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
7809   }
7810
7811   // SSE scalar comparison intrinsics
7812   case X86::BI__builtin_ia32_cmpeqss:
7813     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
7814   case X86::BI__builtin_ia32_cmpltss:
7815     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
7816   case X86::BI__builtin_ia32_cmpless:
7817     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
7818   case X86::BI__builtin_ia32_cmpunordss:
7819     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
7820   case X86::BI__builtin_ia32_cmpneqss:
7821     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
7822   case X86::BI__builtin_ia32_cmpnltss:
7823     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
7824   case X86::BI__builtin_ia32_cmpnless:
7825     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
7826   case X86::BI__builtin_ia32_cmpordss:
7827     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
7828   case X86::BI__builtin_ia32_cmpeqsd:
7829     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
7830   case X86::BI__builtin_ia32_cmpltsd:
7831     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
7832   case X86::BI__builtin_ia32_cmplesd:
7833     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
7834   case X86::BI__builtin_ia32_cmpunordsd:
7835     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
7836   case X86::BI__builtin_ia32_cmpneqsd:
7837     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
7838   case X86::BI__builtin_ia32_cmpnltsd:
7839     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
7840   case X86::BI__builtin_ia32_cmpnlesd:
7841     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
7842   case X86::BI__builtin_ia32_cmpordsd:
7843     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
7844
7845   case X86::BI__emul:
7846   case X86::BI__emulu: {
7847     llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
7848     bool isSigned = (BuiltinID == X86::BI__emul);
7849     Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
7850     Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
7851     return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
7852   }
7853   case X86::BI__mulh:
7854   case X86::BI__umulh:
7855   case X86::BI_mul128:
7856   case X86::BI_umul128: {
7857     llvm::Type *ResType = ConvertType(E->getType());
7858     llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
7859
7860     bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
7861     Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
7862     Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
7863
7864     Value *MulResult, *HigherBits;
7865     if (IsSigned) {
7866       MulResult = Builder.CreateNSWMul(LHS, RHS);
7867       HigherBits = Builder.CreateAShr(MulResult, 64);
7868     } else {
7869       MulResult = Builder.CreateNUWMul(LHS, RHS);
7870       HigherBits = Builder.CreateLShr(MulResult, 64);
7871     }
7872     HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
7873
7874     if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
7875       return HigherBits;
7876
7877     Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
7878     Builder.CreateStore(HigherBits, HighBitsAddress);
7879     return Builder.CreateIntCast(MulResult, ResType, IsSigned);
7880   }
7881
7882   case X86::BI__faststorefence: {
7883     return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
7884                                llvm::CrossThread);
7885   }
7886   case X86::BI_ReadWriteBarrier:
7887   case X86::BI_ReadBarrier:
7888   case X86::BI_WriteBarrier: {
7889     return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
7890                                llvm::SingleThread);
7891   }
7892   case X86::BI_BitScanForward:
7893   case X86::BI_BitScanForward64:
7894     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
7895   case X86::BI_BitScanReverse:
7896   case X86::BI_BitScanReverse64:
7897     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
7898
7899   case X86::BI_InterlockedAnd64:
7900     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
7901   case X86::BI_InterlockedExchange64:
7902     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
7903   case X86::BI_InterlockedExchangeAdd64:
7904     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
7905   case X86::BI_InterlockedExchangeSub64:
7906     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
7907   case X86::BI_InterlockedOr64:
7908     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
7909   case X86::BI_InterlockedXor64:
7910     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
7911   case X86::BI_InterlockedDecrement64:
7912     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
7913   case X86::BI_InterlockedIncrement64:
7914     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
7915
7916   case X86::BI_AddressOfReturnAddress: {
7917     Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress);
7918     return Builder.CreateCall(F);
7919   }
7920   case X86::BI__stosb: {
7921     // We treat __stosb as a volatile memset - it may not generate "rep stosb"
7922     // instruction, but it will create a memset that won't be optimized away.
7923     return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true);
7924   }
7925   }
7926 }
7927
7928
7929 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
7930                                            const CallExpr *E) {
7931   SmallVector<Value*, 4> Ops;
7932
7933   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
7934     Ops.push_back(EmitScalarExpr(E->getArg(i)));
7935
7936   Intrinsic::ID ID = Intrinsic::not_intrinsic;
7937
7938   switch (BuiltinID) {
7939   default: return nullptr;
7940
7941   // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
7942   // call __builtin_readcyclecounter.
7943   case PPC::BI__builtin_ppc_get_timebase:
7944     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
7945
7946   // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
7947   case PPC::BI__builtin_altivec_lvx:
7948   case PPC::BI__builtin_altivec_lvxl:
7949   case PPC::BI__builtin_altivec_lvebx:
7950   case PPC::BI__builtin_altivec_lvehx:
7951   case PPC::BI__builtin_altivec_lvewx:
7952   case PPC::BI__builtin_altivec_lvsl:
7953   case PPC::BI__builtin_altivec_lvsr:
7954   case PPC::BI__builtin_vsx_lxvd2x:
7955   case PPC::BI__builtin_vsx_lxvw4x:
7956   case PPC::BI__builtin_vsx_lxvd2x_be:
7957   case PPC::BI__builtin_vsx_lxvw4x_be:
7958   case PPC::BI__builtin_vsx_lxvl:
7959   case PPC::BI__builtin_vsx_lxvll:
7960   {
7961     if(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
7962        BuiltinID == PPC::BI__builtin_vsx_lxvll){
7963       Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy);
7964     }else {
7965       Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
7966       Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
7967       Ops.pop_back();
7968     }
7969
7970     switch (BuiltinID) {
7971     default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
7972     case PPC::BI__builtin_altivec_lvx:
7973       ID = Intrinsic::ppc_altivec_lvx;
7974       break;
7975     case PPC::BI__builtin_altivec_lvxl:
7976       ID = Intrinsic::ppc_altivec_lvxl;
7977       break;
7978     case PPC::BI__builtin_altivec_lvebx:
7979       ID = Intrinsic::ppc_altivec_lvebx;
7980       break;
7981     case PPC::BI__builtin_altivec_lvehx:
7982       ID = Intrinsic::ppc_altivec_lvehx;
7983       break;
7984     case PPC::BI__builtin_altivec_lvewx:
7985       ID = Intrinsic::ppc_altivec_lvewx;
7986       break;
7987     case PPC::BI__builtin_altivec_lvsl:
7988       ID = Intrinsic::ppc_altivec_lvsl;
7989       break;
7990     case PPC::BI__builtin_altivec_lvsr:
7991       ID = Intrinsic::ppc_altivec_lvsr;
7992       break;
7993     case PPC::BI__builtin_vsx_lxvd2x:
7994       ID = Intrinsic::ppc_vsx_lxvd2x;
7995       break;
7996     case PPC::BI__builtin_vsx_lxvw4x:
7997       ID = Intrinsic::ppc_vsx_lxvw4x;
7998       break;
7999     case PPC::BI__builtin_vsx_lxvd2x_be:
8000       ID = Intrinsic::ppc_vsx_lxvd2x_be;
8001       break;
8002     case PPC::BI__builtin_vsx_lxvw4x_be:
8003       ID = Intrinsic::ppc_vsx_lxvw4x_be;
8004       break;
8005     case PPC::BI__builtin_vsx_lxvl:
8006       ID = Intrinsic::ppc_vsx_lxvl;
8007       break;
8008     case PPC::BI__builtin_vsx_lxvll:
8009       ID = Intrinsic::ppc_vsx_lxvll;
8010       break;
8011     }
8012     llvm::Function *F = CGM.getIntrinsic(ID);
8013     return Builder.CreateCall(F, Ops, "");
8014   }
8015
8016   // vec_st, vec_xst_be
8017   case PPC::BI__builtin_altivec_stvx:
8018   case PPC::BI__builtin_altivec_stvxl:
8019   case PPC::BI__builtin_altivec_stvebx:
8020   case PPC::BI__builtin_altivec_stvehx:
8021   case PPC::BI__builtin_altivec_stvewx:
8022   case PPC::BI__builtin_vsx_stxvd2x:
8023   case PPC::BI__builtin_vsx_stxvw4x:
8024   case PPC::BI__builtin_vsx_stxvd2x_be:
8025   case PPC::BI__builtin_vsx_stxvw4x_be:
8026   case PPC::BI__builtin_vsx_stxvl:
8027   case PPC::BI__builtin_vsx_stxvll:
8028   {
8029     if(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
8030       BuiltinID == PPC::BI__builtin_vsx_stxvll ){
8031       Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
8032     }else {
8033       Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
8034       Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
8035       Ops.pop_back();
8036     }
8037
8038     switch (BuiltinID) {
8039     default: llvm_unreachable("Unsupported st intrinsic!");
8040     case PPC::BI__builtin_altivec_stvx:
8041       ID = Intrinsic::ppc_altivec_stvx;
8042       break;
8043     case PPC::BI__builtin_altivec_stvxl:
8044       ID = Intrinsic::ppc_altivec_stvxl;
8045       break;
8046     case PPC::BI__builtin_altivec_stvebx:
8047       ID = Intrinsic::ppc_altivec_stvebx;
8048       break;
8049     case PPC::BI__builtin_altivec_stvehx:
8050       ID = Intrinsic::ppc_altivec_stvehx;
8051       break;
8052     case PPC::BI__builtin_altivec_stvewx:
8053       ID = Intrinsic::ppc_altivec_stvewx;
8054       break;
8055     case PPC::BI__builtin_vsx_stxvd2x:
8056       ID = Intrinsic::ppc_vsx_stxvd2x;
8057       break;
8058     case PPC::BI__builtin_vsx_stxvw4x:
8059       ID = Intrinsic::ppc_vsx_stxvw4x;
8060       break;
8061     case PPC::BI__builtin_vsx_stxvd2x_be:
8062       ID = Intrinsic::ppc_vsx_stxvd2x_be;
8063       break;
8064     case PPC::BI__builtin_vsx_stxvw4x_be:
8065       ID = Intrinsic::ppc_vsx_stxvw4x_be;
8066       break;
8067     case PPC::BI__builtin_vsx_stxvl:
8068       ID = Intrinsic::ppc_vsx_stxvl;
8069       break;
8070     case PPC::BI__builtin_vsx_stxvll:
8071       ID = Intrinsic::ppc_vsx_stxvll;
8072       break;
8073     }
8074     llvm::Function *F = CGM.getIntrinsic(ID);
8075     return Builder.CreateCall(F, Ops, "");
8076   }
8077   // Square root
8078   case PPC::BI__builtin_vsx_xvsqrtsp:
8079   case PPC::BI__builtin_vsx_xvsqrtdp: {
8080     llvm::Type *ResultType = ConvertType(E->getType());
8081     Value *X = EmitScalarExpr(E->getArg(0));
8082     ID = Intrinsic::sqrt;
8083     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
8084     return Builder.CreateCall(F, X);
8085   }
8086   // Count leading zeros
8087   case PPC::BI__builtin_altivec_vclzb:
8088   case PPC::BI__builtin_altivec_vclzh:
8089   case PPC::BI__builtin_altivec_vclzw:
8090   case PPC::BI__builtin_altivec_vclzd: {
8091     llvm::Type *ResultType = ConvertType(E->getType());
8092     Value *X = EmitScalarExpr(E->getArg(0));
8093     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8094     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
8095     return Builder.CreateCall(F, {X, Undef});
8096   }
8097   case PPC::BI__builtin_altivec_vctzb:
8098   case PPC::BI__builtin_altivec_vctzh:
8099   case PPC::BI__builtin_altivec_vctzw:
8100   case PPC::BI__builtin_altivec_vctzd: {
8101     llvm::Type *ResultType = ConvertType(E->getType());
8102     Value *X = EmitScalarExpr(E->getArg(0));
8103     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8104     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
8105     return Builder.CreateCall(F, {X, Undef});
8106   }
8107   case PPC::BI__builtin_altivec_vpopcntb:
8108   case PPC::BI__builtin_altivec_vpopcnth:
8109   case PPC::BI__builtin_altivec_vpopcntw:
8110   case PPC::BI__builtin_altivec_vpopcntd: {
8111     llvm::Type *ResultType = ConvertType(E->getType());
8112     Value *X = EmitScalarExpr(E->getArg(0));
8113     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
8114     return Builder.CreateCall(F, X);
8115   }
8116   // Copy sign
8117   case PPC::BI__builtin_vsx_xvcpsgnsp:
8118   case PPC::BI__builtin_vsx_xvcpsgndp: {
8119     llvm::Type *ResultType = ConvertType(E->getType());
8120     Value *X = EmitScalarExpr(E->getArg(0));
8121     Value *Y = EmitScalarExpr(E->getArg(1));
8122     ID = Intrinsic::copysign;
8123     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
8124     return Builder.CreateCall(F, {X, Y});
8125   }
8126   // Rounding/truncation
8127   case PPC::BI__builtin_vsx_xvrspip:
8128   case PPC::BI__builtin_vsx_xvrdpip:
8129   case PPC::BI__builtin_vsx_xvrdpim:
8130   case PPC::BI__builtin_vsx_xvrspim:
8131   case PPC::BI__builtin_vsx_xvrdpi:
8132   case PPC::BI__builtin_vsx_xvrspi:
8133   case PPC::BI__builtin_vsx_xvrdpic:
8134   case PPC::BI__builtin_vsx_xvrspic:
8135   case PPC::BI__builtin_vsx_xvrdpiz:
8136   case PPC::BI__builtin_vsx_xvrspiz: {
8137     llvm::Type *ResultType = ConvertType(E->getType());
8138     Value *X = EmitScalarExpr(E->getArg(0));
8139     if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
8140         BuiltinID == PPC::BI__builtin_vsx_xvrspim)
8141       ID = Intrinsic::floor;
8142     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
8143              BuiltinID == PPC::BI__builtin_vsx_xvrspi)
8144       ID = Intrinsic::round;
8145     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
8146              BuiltinID == PPC::BI__builtin_vsx_xvrspic)
8147       ID = Intrinsic::nearbyint;
8148     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
8149              BuiltinID == PPC::BI__builtin_vsx_xvrspip)
8150       ID = Intrinsic::ceil;
8151     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
8152              BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
8153       ID = Intrinsic::trunc;
8154     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
8155     return Builder.CreateCall(F, X);
8156   }
8157
8158   // Absolute value
8159   case PPC::BI__builtin_vsx_xvabsdp:
8160   case PPC::BI__builtin_vsx_xvabssp: {
8161     llvm::Type *ResultType = ConvertType(E->getType());
8162     Value *X = EmitScalarExpr(E->getArg(0));
8163     llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
8164     return Builder.CreateCall(F, X);
8165   }
8166
8167   // FMA variations
8168   case PPC::BI__builtin_vsx_xvmaddadp:
8169   case PPC::BI__builtin_vsx_xvmaddasp:
8170   case PPC::BI__builtin_vsx_xvnmaddadp:
8171   case PPC::BI__builtin_vsx_xvnmaddasp:
8172   case PPC::BI__builtin_vsx_xvmsubadp:
8173   case PPC::BI__builtin_vsx_xvmsubasp:
8174   case PPC::BI__builtin_vsx_xvnmsubadp:
8175   case PPC::BI__builtin_vsx_xvnmsubasp: {
8176     llvm::Type *ResultType = ConvertType(E->getType());
8177     Value *X = EmitScalarExpr(E->getArg(0));
8178     Value *Y = EmitScalarExpr(E->getArg(1));
8179     Value *Z = EmitScalarExpr(E->getArg(2));
8180     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
8181     llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
8182     switch (BuiltinID) {
8183       case PPC::BI__builtin_vsx_xvmaddadp:
8184       case PPC::BI__builtin_vsx_xvmaddasp:
8185         return Builder.CreateCall(F, {X, Y, Z});
8186       case PPC::BI__builtin_vsx_xvnmaddadp:
8187       case PPC::BI__builtin_vsx_xvnmaddasp:
8188         return Builder.CreateFSub(Zero,
8189                                   Builder.CreateCall(F, {X, Y, Z}), "sub");
8190       case PPC::BI__builtin_vsx_xvmsubadp:
8191       case PPC::BI__builtin_vsx_xvmsubasp:
8192         return Builder.CreateCall(F,
8193                                   {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
8194       case PPC::BI__builtin_vsx_xvnmsubadp:
8195       case PPC::BI__builtin_vsx_xvnmsubasp:
8196         Value *FsubRes =
8197           Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
8198         return Builder.CreateFSub(Zero, FsubRes, "sub");
8199     }
8200     llvm_unreachable("Unknown FMA operation");
8201     return nullptr; // Suppress no-return warning
8202   }
8203
8204   case PPC::BI__builtin_vsx_insertword: {
8205     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
8206
8207     // Third argument is a compile time constant int. It must be clamped to
8208     // to the range [0, 12].
8209     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
8210     assert(ArgCI &&
8211            "Third arg to xxinsertw intrinsic must be constant integer");
8212     const int64_t MaxIndex = 12;
8213     int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
8214
8215     // The builtin semantics don't exactly match the xxinsertw instructions
8216     // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
8217     // word from the first argument, and inserts it in the second argument. The
8218     // instruction extracts the word from its second input register and inserts
8219     // it into its first input register, so swap the first and second arguments.
8220     std::swap(Ops[0], Ops[1]);
8221
8222     // Need to cast the second argument from a vector of unsigned int to a
8223     // vector of long long.
8224     Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
8225
8226     if (getTarget().isLittleEndian()) {
8227       // Create a shuffle mask of (1, 0)
8228       Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
8229                                    ConstantInt::get(Int32Ty, 0)
8230                                  };
8231       Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8232
8233       // Reverse the double words in the vector we will extract from.
8234       Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
8235       Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ShuffleMask);
8236
8237       // Reverse the index.
8238       Index = MaxIndex - Index;
8239     }
8240
8241     // Intrinsic expects the first arg to be a vector of int.
8242     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
8243     Ops[2] = ConstantInt::getSigned(Int32Ty, Index);
8244     return Builder.CreateCall(F, Ops);
8245   }
8246
8247   case PPC::BI__builtin_vsx_extractuword: {
8248     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
8249
8250     // Intrinsic expects the first argument to be a vector of doublewords.
8251     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
8252
8253     // The second argument is a compile time constant int that needs to
8254     // be clamped to the range [0, 12].
8255     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[1]);
8256     assert(ArgCI &&
8257            "Second Arg to xxextractuw intrinsic must be a constant integer!");
8258     const int64_t MaxIndex = 12;
8259     int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
8260
8261     if (getTarget().isLittleEndian()) {
8262       // Reverse the index.
8263       Index = MaxIndex - Index;
8264       Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
8265
8266       // Emit the call, then reverse the double words of the results vector.
8267       Value *Call = Builder.CreateCall(F, Ops);
8268
8269       // Create a shuffle mask of (1, 0)
8270       Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
8271                                    ConstantInt::get(Int32Ty, 0)
8272                                  };
8273       Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8274
8275       Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ShuffleMask);
8276       return ShuffleCall;
8277     } else {
8278       Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
8279       return Builder.CreateCall(F, Ops);
8280     }
8281   }
8282   }
8283 }
8284
8285 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
8286                                               const CallExpr *E) {
8287   switch (BuiltinID) {
8288   case AMDGPU::BI__builtin_amdgcn_div_scale:
8289   case AMDGPU::BI__builtin_amdgcn_div_scalef: {
8290     // Translate from the intrinsics's struct return to the builtin's out
8291     // argument.
8292
8293     Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
8294
8295     llvm::Value *X = EmitScalarExpr(E->getArg(0));
8296     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
8297     llvm::Value *Z = EmitScalarExpr(E->getArg(2));
8298
8299     llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
8300                                            X->getType());
8301
8302     llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
8303
8304     llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
8305     llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
8306
8307     llvm::Type *RealFlagType
8308       = FlagOutPtr.getPointer()->getType()->getPointerElementType();
8309
8310     llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
8311     Builder.CreateStore(FlagExt, FlagOutPtr);
8312     return Result;
8313   }
8314   case AMDGPU::BI__builtin_amdgcn_div_fmas:
8315   case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
8316     llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
8317     llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
8318     llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
8319     llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
8320
8321     llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
8322                                       Src0->getType());
8323     llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
8324     return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
8325   }
8326
8327   case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
8328     return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
8329   case AMDGPU::BI__builtin_amdgcn_div_fixup:
8330   case AMDGPU::BI__builtin_amdgcn_div_fixupf:
8331   case AMDGPU::BI__builtin_amdgcn_div_fixuph:
8332     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
8333   case AMDGPU::BI__builtin_amdgcn_trig_preop:
8334   case AMDGPU::BI__builtin_amdgcn_trig_preopf:
8335     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
8336   case AMDGPU::BI__builtin_amdgcn_rcp:
8337   case AMDGPU::BI__builtin_amdgcn_rcpf:
8338   case AMDGPU::BI__builtin_amdgcn_rcph:
8339     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
8340   case AMDGPU::BI__builtin_amdgcn_rsq:
8341   case AMDGPU::BI__builtin_amdgcn_rsqf:
8342   case AMDGPU::BI__builtin_amdgcn_rsqh:
8343     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
8344   case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
8345   case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
8346     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
8347   case AMDGPU::BI__builtin_amdgcn_sinf:
8348   case AMDGPU::BI__builtin_amdgcn_sinh:
8349     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
8350   case AMDGPU::BI__builtin_amdgcn_cosf:
8351   case AMDGPU::BI__builtin_amdgcn_cosh:
8352     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
8353   case AMDGPU::BI__builtin_amdgcn_log_clampf:
8354     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
8355   case AMDGPU::BI__builtin_amdgcn_ldexp:
8356   case AMDGPU::BI__builtin_amdgcn_ldexpf:
8357   case AMDGPU::BI__builtin_amdgcn_ldexph:
8358     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
8359   case AMDGPU::BI__builtin_amdgcn_frexp_mant:
8360   case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
8361   case AMDGPU::BI__builtin_amdgcn_frexp_manth:
8362     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
8363   case AMDGPU::BI__builtin_amdgcn_frexp_exp:
8364   case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
8365     Value *Src0 = EmitScalarExpr(E->getArg(0));
8366     Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
8367                                 { Builder.getInt32Ty(), Src0->getType() });
8368     return Builder.CreateCall(F, Src0);
8369   }
8370   case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
8371     Value *Src0 = EmitScalarExpr(E->getArg(0));
8372     Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
8373                                 { Builder.getInt16Ty(), Src0->getType() });
8374     return Builder.CreateCall(F, Src0);
8375   }
8376   case AMDGPU::BI__builtin_amdgcn_fract:
8377   case AMDGPU::BI__builtin_amdgcn_fractf:
8378   case AMDGPU::BI__builtin_amdgcn_fracth:
8379     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
8380   case AMDGPU::BI__builtin_amdgcn_lerp:
8381     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
8382   case AMDGPU::BI__builtin_amdgcn_uicmp:
8383   case AMDGPU::BI__builtin_amdgcn_uicmpl:
8384   case AMDGPU::BI__builtin_amdgcn_sicmp:
8385   case AMDGPU::BI__builtin_amdgcn_sicmpl:
8386     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_icmp);
8387   case AMDGPU::BI__builtin_amdgcn_fcmp:
8388   case AMDGPU::BI__builtin_amdgcn_fcmpf:
8389     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fcmp);
8390   case AMDGPU::BI__builtin_amdgcn_class:
8391   case AMDGPU::BI__builtin_amdgcn_classf:
8392   case AMDGPU::BI__builtin_amdgcn_classh:
8393     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
8394
8395   case AMDGPU::BI__builtin_amdgcn_read_exec: {
8396     CallInst *CI = cast<CallInst>(
8397       EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec"));
8398     CI->setConvergent();
8399     return CI;
8400   }
8401
8402   // amdgcn workitem
8403   case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
8404     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
8405   case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
8406     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
8407   case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
8408     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
8409
8410   // r600 intrinsics
8411   case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
8412   case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
8413     return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee);
8414   case AMDGPU::BI__builtin_r600_read_tidig_x:
8415     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
8416   case AMDGPU::BI__builtin_r600_read_tidig_y:
8417     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
8418   case AMDGPU::BI__builtin_r600_read_tidig_z:
8419     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
8420   default:
8421     return nullptr;
8422   }
8423 }
8424
8425 /// Handle a SystemZ function in which the final argument is a pointer
8426 /// to an int that receives the post-instruction CC value.  At the LLVM level
8427 /// this is represented as a function that returns a {result, cc} pair.
8428 static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF,
8429                                          unsigned IntrinsicID,
8430                                          const CallExpr *E) {
8431   unsigned NumArgs = E->getNumArgs() - 1;
8432   SmallVector<Value *, 8> Args(NumArgs);
8433   for (unsigned I = 0; I < NumArgs; ++I)
8434     Args[I] = CGF.EmitScalarExpr(E->getArg(I));
8435   Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
8436   Value *F = CGF.CGM.getIntrinsic(IntrinsicID);
8437   Value *Call = CGF.Builder.CreateCall(F, Args);
8438   Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
8439   CGF.Builder.CreateStore(CC, CCPtr);
8440   return CGF.Builder.CreateExtractValue(Call, 0);
8441 }
8442
8443 Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
8444                                                const CallExpr *E) {
8445   switch (BuiltinID) {
8446   case SystemZ::BI__builtin_tbegin: {
8447     Value *TDB = EmitScalarExpr(E->getArg(0));
8448     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
8449     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
8450     return Builder.CreateCall(F, {TDB, Control});
8451   }
8452   case SystemZ::BI__builtin_tbegin_nofloat: {
8453     Value *TDB = EmitScalarExpr(E->getArg(0));
8454     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
8455     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
8456     return Builder.CreateCall(F, {TDB, Control});
8457   }
8458   case SystemZ::BI__builtin_tbeginc: {
8459     Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
8460     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
8461     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
8462     return Builder.CreateCall(F, {TDB, Control});
8463   }
8464   case SystemZ::BI__builtin_tabort: {
8465     Value *Data = EmitScalarExpr(E->getArg(0));
8466     Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
8467     return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
8468   }
8469   case SystemZ::BI__builtin_non_tx_store: {
8470     Value *Address = EmitScalarExpr(E->getArg(0));
8471     Value *Data = EmitScalarExpr(E->getArg(1));
8472     Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
8473     return Builder.CreateCall(F, {Data, Address});
8474   }
8475
8476   // Vector builtins.  Note that most vector builtins are mapped automatically
8477   // to target-specific LLVM intrinsics.  The ones handled specially here can
8478   // be represented via standard LLVM IR, which is preferable to enable common
8479   // LLVM optimizations.
8480
8481   case SystemZ::BI__builtin_s390_vpopctb:
8482   case SystemZ::BI__builtin_s390_vpopcth:
8483   case SystemZ::BI__builtin_s390_vpopctf:
8484   case SystemZ::BI__builtin_s390_vpopctg: {
8485     llvm::Type *ResultType = ConvertType(E->getType());
8486     Value *X = EmitScalarExpr(E->getArg(0));
8487     Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
8488     return Builder.CreateCall(F, X);
8489   }
8490
8491   case SystemZ::BI__builtin_s390_vclzb:
8492   case SystemZ::BI__builtin_s390_vclzh:
8493   case SystemZ::BI__builtin_s390_vclzf:
8494   case SystemZ::BI__builtin_s390_vclzg: {
8495     llvm::Type *ResultType = ConvertType(E->getType());
8496     Value *X = EmitScalarExpr(E->getArg(0));
8497     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8498     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
8499     return Builder.CreateCall(F, {X, Undef});
8500   }
8501
8502   case SystemZ::BI__builtin_s390_vctzb:
8503   case SystemZ::BI__builtin_s390_vctzh:
8504   case SystemZ::BI__builtin_s390_vctzf:
8505   case SystemZ::BI__builtin_s390_vctzg: {
8506     llvm::Type *ResultType = ConvertType(E->getType());
8507     Value *X = EmitScalarExpr(E->getArg(0));
8508     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8509     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
8510     return Builder.CreateCall(F, {X, Undef});
8511   }
8512
8513   case SystemZ::BI__builtin_s390_vfsqdb: {
8514     llvm::Type *ResultType = ConvertType(E->getType());
8515     Value *X = EmitScalarExpr(E->getArg(0));
8516     Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
8517     return Builder.CreateCall(F, X);
8518   }
8519   case SystemZ::BI__builtin_s390_vfmadb: {
8520     llvm::Type *ResultType = ConvertType(E->getType());
8521     Value *X = EmitScalarExpr(E->getArg(0));
8522     Value *Y = EmitScalarExpr(E->getArg(1));
8523     Value *Z = EmitScalarExpr(E->getArg(2));
8524     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
8525     return Builder.CreateCall(F, {X, Y, Z});
8526   }
8527   case SystemZ::BI__builtin_s390_vfmsdb: {
8528     llvm::Type *ResultType = ConvertType(E->getType());
8529     Value *X = EmitScalarExpr(E->getArg(0));
8530     Value *Y = EmitScalarExpr(E->getArg(1));
8531     Value *Z = EmitScalarExpr(E->getArg(2));
8532     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
8533     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
8534     return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
8535   }
8536   case SystemZ::BI__builtin_s390_vflpdb: {
8537     llvm::Type *ResultType = ConvertType(E->getType());
8538     Value *X = EmitScalarExpr(E->getArg(0));
8539     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
8540     return Builder.CreateCall(F, X);
8541   }
8542   case SystemZ::BI__builtin_s390_vflndb: {
8543     llvm::Type *ResultType = ConvertType(E->getType());
8544     Value *X = EmitScalarExpr(E->getArg(0));
8545     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
8546     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
8547     return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub");
8548   }
8549   case SystemZ::BI__builtin_s390_vfidb: {
8550     llvm::Type *ResultType = ConvertType(E->getType());
8551     Value *X = EmitScalarExpr(E->getArg(0));
8552     // Constant-fold the M4 and M5 mask arguments.
8553     llvm::APSInt M4, M5;
8554     bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext());
8555     bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext());
8556     assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?");
8557     (void)IsConstM4; (void)IsConstM5;
8558     // Check whether this instance of vfidb can be represented via a LLVM
8559     // standard intrinsic.  We only support some combinations of M4 and M5.
8560     Intrinsic::ID ID = Intrinsic::not_intrinsic;
8561     switch (M4.getZExtValue()) {
8562     default: break;
8563     case 0:  // IEEE-inexact exception allowed
8564       switch (M5.getZExtValue()) {
8565       default: break;
8566       case 0: ID = Intrinsic::rint; break;
8567       }
8568       break;
8569     case 4:  // IEEE-inexact exception suppressed
8570       switch (M5.getZExtValue()) {
8571       default: break;
8572       case 0: ID = Intrinsic::nearbyint; break;
8573       case 1: ID = Intrinsic::round; break;
8574       case 5: ID = Intrinsic::trunc; break;
8575       case 6: ID = Intrinsic::ceil; break;
8576       case 7: ID = Intrinsic::floor; break;
8577       }
8578       break;
8579     }
8580     if (ID != Intrinsic::not_intrinsic) {
8581       Function *F = CGM.getIntrinsic(ID, ResultType);
8582       return Builder.CreateCall(F, X);
8583     }
8584     Function *F = CGM.getIntrinsic(Intrinsic::s390_vfidb);
8585     Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
8586     Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
8587     return Builder.CreateCall(F, {X, M4Value, M5Value});
8588   }
8589
8590   // Vector intrisincs that output the post-instruction CC value.
8591
8592 #define INTRINSIC_WITH_CC(NAME) \
8593     case SystemZ::BI__builtin_##NAME: \
8594       return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
8595
8596   INTRINSIC_WITH_CC(s390_vpkshs);
8597   INTRINSIC_WITH_CC(s390_vpksfs);
8598   INTRINSIC_WITH_CC(s390_vpksgs);
8599
8600   INTRINSIC_WITH_CC(s390_vpklshs);
8601   INTRINSIC_WITH_CC(s390_vpklsfs);
8602   INTRINSIC_WITH_CC(s390_vpklsgs);
8603
8604   INTRINSIC_WITH_CC(s390_vceqbs);
8605   INTRINSIC_WITH_CC(s390_vceqhs);
8606   INTRINSIC_WITH_CC(s390_vceqfs);
8607   INTRINSIC_WITH_CC(s390_vceqgs);
8608
8609   INTRINSIC_WITH_CC(s390_vchbs);
8610   INTRINSIC_WITH_CC(s390_vchhs);
8611   INTRINSIC_WITH_CC(s390_vchfs);
8612   INTRINSIC_WITH_CC(s390_vchgs);
8613
8614   INTRINSIC_WITH_CC(s390_vchlbs);
8615   INTRINSIC_WITH_CC(s390_vchlhs);
8616   INTRINSIC_WITH_CC(s390_vchlfs);
8617   INTRINSIC_WITH_CC(s390_vchlgs);
8618
8619   INTRINSIC_WITH_CC(s390_vfaebs);
8620   INTRINSIC_WITH_CC(s390_vfaehs);
8621   INTRINSIC_WITH_CC(s390_vfaefs);
8622
8623   INTRINSIC_WITH_CC(s390_vfaezbs);
8624   INTRINSIC_WITH_CC(s390_vfaezhs);
8625   INTRINSIC_WITH_CC(s390_vfaezfs);
8626
8627   INTRINSIC_WITH_CC(s390_vfeebs);
8628   INTRINSIC_WITH_CC(s390_vfeehs);
8629   INTRINSIC_WITH_CC(s390_vfeefs);
8630
8631   INTRINSIC_WITH_CC(s390_vfeezbs);
8632   INTRINSIC_WITH_CC(s390_vfeezhs);
8633   INTRINSIC_WITH_CC(s390_vfeezfs);
8634
8635   INTRINSIC_WITH_CC(s390_vfenebs);
8636   INTRINSIC_WITH_CC(s390_vfenehs);
8637   INTRINSIC_WITH_CC(s390_vfenefs);
8638
8639   INTRINSIC_WITH_CC(s390_vfenezbs);
8640   INTRINSIC_WITH_CC(s390_vfenezhs);
8641   INTRINSIC_WITH_CC(s390_vfenezfs);
8642
8643   INTRINSIC_WITH_CC(s390_vistrbs);
8644   INTRINSIC_WITH_CC(s390_vistrhs);
8645   INTRINSIC_WITH_CC(s390_vistrfs);
8646
8647   INTRINSIC_WITH_CC(s390_vstrcbs);
8648   INTRINSIC_WITH_CC(s390_vstrchs);
8649   INTRINSIC_WITH_CC(s390_vstrcfs);
8650
8651   INTRINSIC_WITH_CC(s390_vstrczbs);
8652   INTRINSIC_WITH_CC(s390_vstrczhs);
8653   INTRINSIC_WITH_CC(s390_vstrczfs);
8654
8655   INTRINSIC_WITH_CC(s390_vfcedbs);
8656   INTRINSIC_WITH_CC(s390_vfchdbs);
8657   INTRINSIC_WITH_CC(s390_vfchedbs);
8658
8659   INTRINSIC_WITH_CC(s390_vftcidb);
8660
8661 #undef INTRINSIC_WITH_CC
8662
8663   default:
8664     return nullptr;
8665   }
8666 }
8667
8668 Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
8669                                              const CallExpr *E) {
8670   auto MakeLdg = [&](unsigned IntrinsicID) {
8671     Value *Ptr = EmitScalarExpr(E->getArg(0));
8672     AlignmentSource AlignSource;
8673     clang::CharUnits Align =
8674         getNaturalPointeeTypeAlignment(E->getArg(0)->getType(), &AlignSource);
8675     return Builder.CreateCall(
8676         CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
8677                                        Ptr->getType()}),
8678         {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())});
8679   };
8680   auto MakeScopedAtomic = [&](unsigned IntrinsicID) {
8681     Value *Ptr = EmitScalarExpr(E->getArg(0));
8682     return Builder.CreateCall(
8683         CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
8684                                        Ptr->getType()}),
8685         {Ptr, EmitScalarExpr(E->getArg(1))});
8686   };
8687   switch (BuiltinID) {
8688   case NVPTX::BI__nvvm_atom_add_gen_i:
8689   case NVPTX::BI__nvvm_atom_add_gen_l:
8690   case NVPTX::BI__nvvm_atom_add_gen_ll:
8691     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
8692
8693   case NVPTX::BI__nvvm_atom_sub_gen_i:
8694   case NVPTX::BI__nvvm_atom_sub_gen_l:
8695   case NVPTX::BI__nvvm_atom_sub_gen_ll:
8696     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
8697
8698   case NVPTX::BI__nvvm_atom_and_gen_i:
8699   case NVPTX::BI__nvvm_atom_and_gen_l:
8700   case NVPTX::BI__nvvm_atom_and_gen_ll:
8701     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
8702
8703   case NVPTX::BI__nvvm_atom_or_gen_i:
8704   case NVPTX::BI__nvvm_atom_or_gen_l:
8705   case NVPTX::BI__nvvm_atom_or_gen_ll:
8706     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
8707
8708   case NVPTX::BI__nvvm_atom_xor_gen_i:
8709   case NVPTX::BI__nvvm_atom_xor_gen_l:
8710   case NVPTX::BI__nvvm_atom_xor_gen_ll:
8711     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
8712
8713   case NVPTX::BI__nvvm_atom_xchg_gen_i:
8714   case NVPTX::BI__nvvm_atom_xchg_gen_l:
8715   case NVPTX::BI__nvvm_atom_xchg_gen_ll:
8716     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
8717
8718   case NVPTX::BI__nvvm_atom_max_gen_i:
8719   case NVPTX::BI__nvvm_atom_max_gen_l:
8720   case NVPTX::BI__nvvm_atom_max_gen_ll:
8721     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
8722
8723   case NVPTX::BI__nvvm_atom_max_gen_ui:
8724   case NVPTX::BI__nvvm_atom_max_gen_ul:
8725   case NVPTX::BI__nvvm_atom_max_gen_ull:
8726     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
8727
8728   case NVPTX::BI__nvvm_atom_min_gen_i:
8729   case NVPTX::BI__nvvm_atom_min_gen_l:
8730   case NVPTX::BI__nvvm_atom_min_gen_ll:
8731     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
8732
8733   case NVPTX::BI__nvvm_atom_min_gen_ui:
8734   case NVPTX::BI__nvvm_atom_min_gen_ul:
8735   case NVPTX::BI__nvvm_atom_min_gen_ull:
8736     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
8737
8738   case NVPTX::BI__nvvm_atom_cas_gen_i:
8739   case NVPTX::BI__nvvm_atom_cas_gen_l:
8740   case NVPTX::BI__nvvm_atom_cas_gen_ll:
8741     // __nvvm_atom_cas_gen_* should return the old value rather than the
8742     // success flag.
8743     return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
8744
8745   case NVPTX::BI__nvvm_atom_add_gen_f: {
8746     Value *Ptr = EmitScalarExpr(E->getArg(0));
8747     Value *Val = EmitScalarExpr(E->getArg(1));
8748     // atomicrmw only deals with integer arguments so we need to use
8749     // LLVM's nvvm_atomic_load_add_f32 intrinsic for that.
8750     Value *FnALAF32 =
8751         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType());
8752     return Builder.CreateCall(FnALAF32, {Ptr, Val});
8753   }
8754
8755   case NVPTX::BI__nvvm_atom_inc_gen_ui: {
8756     Value *Ptr = EmitScalarExpr(E->getArg(0));
8757     Value *Val = EmitScalarExpr(E->getArg(1));
8758     Value *FnALI32 =
8759         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
8760     return Builder.CreateCall(FnALI32, {Ptr, Val});
8761   }
8762
8763   case NVPTX::BI__nvvm_atom_dec_gen_ui: {
8764     Value *Ptr = EmitScalarExpr(E->getArg(0));
8765     Value *Val = EmitScalarExpr(E->getArg(1));
8766     Value *FnALD32 =
8767         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
8768     return Builder.CreateCall(FnALD32, {Ptr, Val});
8769   }
8770
8771   case NVPTX::BI__nvvm_ldg_c:
8772   case NVPTX::BI__nvvm_ldg_c2:
8773   case NVPTX::BI__nvvm_ldg_c4:
8774   case NVPTX::BI__nvvm_ldg_s:
8775   case NVPTX::BI__nvvm_ldg_s2:
8776   case NVPTX::BI__nvvm_ldg_s4:
8777   case NVPTX::BI__nvvm_ldg_i:
8778   case NVPTX::BI__nvvm_ldg_i2:
8779   case NVPTX::BI__nvvm_ldg_i4:
8780   case NVPTX::BI__nvvm_ldg_l:
8781   case NVPTX::BI__nvvm_ldg_ll:
8782   case NVPTX::BI__nvvm_ldg_ll2:
8783   case NVPTX::BI__nvvm_ldg_uc:
8784   case NVPTX::BI__nvvm_ldg_uc2:
8785   case NVPTX::BI__nvvm_ldg_uc4:
8786   case NVPTX::BI__nvvm_ldg_us:
8787   case NVPTX::BI__nvvm_ldg_us2:
8788   case NVPTX::BI__nvvm_ldg_us4:
8789   case NVPTX::BI__nvvm_ldg_ui:
8790   case NVPTX::BI__nvvm_ldg_ui2:
8791   case NVPTX::BI__nvvm_ldg_ui4:
8792   case NVPTX::BI__nvvm_ldg_ul:
8793   case NVPTX::BI__nvvm_ldg_ull:
8794   case NVPTX::BI__nvvm_ldg_ull2:
8795     // PTX Interoperability section 2.2: "For a vector with an even number of
8796     // elements, its alignment is set to number of elements times the alignment
8797     // of its member: n*alignof(t)."
8798     return MakeLdg(Intrinsic::nvvm_ldg_global_i);
8799   case NVPTX::BI__nvvm_ldg_f:
8800   case NVPTX::BI__nvvm_ldg_f2:
8801   case NVPTX::BI__nvvm_ldg_f4:
8802   case NVPTX::BI__nvvm_ldg_d:
8803   case NVPTX::BI__nvvm_ldg_d2:
8804     return MakeLdg(Intrinsic::nvvm_ldg_global_f);
8805
8806   case NVPTX::BI__nvvm_atom_cta_add_gen_i:
8807   case NVPTX::BI__nvvm_atom_cta_add_gen_l:
8808   case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
8809     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta);
8810   case NVPTX::BI__nvvm_atom_sys_add_gen_i:
8811   case NVPTX::BI__nvvm_atom_sys_add_gen_l:
8812   case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
8813     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys);
8814   case NVPTX::BI__nvvm_atom_cta_add_gen_f:
8815   case NVPTX::BI__nvvm_atom_cta_add_gen_d:
8816     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta);
8817   case NVPTX::BI__nvvm_atom_sys_add_gen_f:
8818   case NVPTX::BI__nvvm_atom_sys_add_gen_d:
8819     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys);
8820   case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
8821   case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
8822   case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
8823     return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta);
8824   case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
8825   case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
8826   case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
8827     return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys);
8828   case NVPTX::BI__nvvm_atom_cta_max_gen_i:
8829   case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
8830   case NVPTX::BI__nvvm_atom_cta_max_gen_l:
8831   case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
8832   case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
8833   case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
8834     return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta);
8835   case NVPTX::BI__nvvm_atom_sys_max_gen_i:
8836   case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
8837   case NVPTX::BI__nvvm_atom_sys_max_gen_l:
8838   case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
8839   case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
8840   case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
8841     return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys);
8842   case NVPTX::BI__nvvm_atom_cta_min_gen_i:
8843   case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
8844   case NVPTX::BI__nvvm_atom_cta_min_gen_l:
8845   case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
8846   case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
8847   case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
8848     return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta);
8849   case NVPTX::BI__nvvm_atom_sys_min_gen_i:
8850   case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
8851   case NVPTX::BI__nvvm_atom_sys_min_gen_l:
8852   case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
8853   case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
8854   case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
8855     return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys);
8856   case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
8857     return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta);
8858   case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
8859     return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta);
8860   case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
8861     return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys);
8862   case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
8863     return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys);
8864   case NVPTX::BI__nvvm_atom_cta_and_gen_i:
8865   case NVPTX::BI__nvvm_atom_cta_and_gen_l:
8866   case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
8867     return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta);
8868   case NVPTX::BI__nvvm_atom_sys_and_gen_i:
8869   case NVPTX::BI__nvvm_atom_sys_and_gen_l:
8870   case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
8871     return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys);
8872   case NVPTX::BI__nvvm_atom_cta_or_gen_i:
8873   case NVPTX::BI__nvvm_atom_cta_or_gen_l:
8874   case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
8875     return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta);
8876   case NVPTX::BI__nvvm_atom_sys_or_gen_i:
8877   case NVPTX::BI__nvvm_atom_sys_or_gen_l:
8878   case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
8879     return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys);
8880   case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
8881   case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
8882   case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
8883     return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta);
8884   case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
8885   case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
8886   case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
8887     return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys);
8888   case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
8889   case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
8890   case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
8891     Value *Ptr = EmitScalarExpr(E->getArg(0));
8892     return Builder.CreateCall(
8893         CGM.getIntrinsic(
8894             Intrinsic::nvvm_atomic_cas_gen_i_cta,
8895             {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
8896         {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
8897   }
8898   case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
8899   case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
8900   case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
8901     Value *Ptr = EmitScalarExpr(E->getArg(0));
8902     return Builder.CreateCall(
8903         CGM.getIntrinsic(
8904             Intrinsic::nvvm_atomic_cas_gen_i_sys,
8905             {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
8906         {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
8907   }
8908   default:
8909     return nullptr;
8910   }
8911 }
8912
8913 Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
8914                                                    const CallExpr *E) {
8915   switch (BuiltinID) {
8916   case WebAssembly::BI__builtin_wasm_current_memory: {
8917     llvm::Type *ResultType = ConvertType(E->getType());
8918     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType);
8919     return Builder.CreateCall(Callee);
8920   }
8921   case WebAssembly::BI__builtin_wasm_grow_memory: {
8922     Value *X = EmitScalarExpr(E->getArg(0));
8923     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType());
8924     return Builder.CreateCall(Callee, X);
8925   }
8926
8927   default:
8928     return nullptr;
8929   }
8930 }