]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp
Merge llvm, clang, lld and lldb trunk r291274, and resolve conflicts.
[FreeBSD/FreeBSD.git] / contrib / llvm / tools / clang / lib / CodeGen / CGBuiltin.cpp
1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit Builtin calls as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "CGCXXABI.h"
15 #include "CGObjCRuntime.h"
16 #include "CGOpenCLRuntime.h"
17 #include "CodeGenFunction.h"
18 #include "CodeGenModule.h"
19 #include "TargetInfo.h"
20 #include "clang/AST/ASTContext.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/Analysis/Analyses/OSLog.h"
23 #include "clang/Basic/TargetBuiltins.h"
24 #include "clang/Basic/TargetInfo.h"
25 #include "clang/CodeGen/CGFunctionInfo.h"
26 #include "llvm/ADT/StringExtras.h"
27 #include "llvm/IR/CallSite.h"
28 #include "llvm/IR/DataLayout.h"
29 #include "llvm/IR/InlineAsm.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/IR/MDBuilder.h"
32 #include <sstream>
33
34 using namespace clang;
35 using namespace CodeGen;
36 using namespace llvm;
37
38 static
39 int64_t clamp(int64_t Value, int64_t Low, int64_t High) {
40   return std::min(High, std::max(Low, Value));
41 }
42
43 /// getBuiltinLibFunction - Given a builtin id for a function like
44 /// "__builtin_fabsf", return a Function* for "fabsf".
45 llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
46                                                      unsigned BuiltinID) {
47   assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
48
49   // Get the name, skip over the __builtin_ prefix (if necessary).
50   StringRef Name;
51   GlobalDecl D(FD);
52
53   // If the builtin has been declared explicitly with an assembler label,
54   // use the mangled name. This differs from the plain label on platforms
55   // that prefix labels.
56   if (FD->hasAttr<AsmLabelAttr>())
57     Name = getMangledName(D);
58   else
59     Name = Context.BuiltinInfo.getName(BuiltinID) + 10;
60
61   llvm::FunctionType *Ty =
62     cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
63
64   return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
65 }
66
67 /// Emit the conversions required to turn the given value into an
68 /// integer of the given size.
69 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
70                         QualType T, llvm::IntegerType *IntType) {
71   V = CGF.EmitToMemory(V, T);
72
73   if (V->getType()->isPointerTy())
74     return CGF.Builder.CreatePtrToInt(V, IntType);
75
76   assert(V->getType() == IntType);
77   return V;
78 }
79
80 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
81                           QualType T, llvm::Type *ResultType) {
82   V = CGF.EmitFromMemory(V, T);
83
84   if (ResultType->isPointerTy())
85     return CGF.Builder.CreateIntToPtr(V, ResultType);
86
87   assert(V->getType() == ResultType);
88   return V;
89 }
90
91 /// Utility to insert an atomic instruction based on Instrinsic::ID
92 /// and the expression node.
93 static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF,
94                                     llvm::AtomicRMWInst::BinOp Kind,
95                                     const CallExpr *E) {
96   QualType T = E->getType();
97   assert(E->getArg(0)->getType()->isPointerType());
98   assert(CGF.getContext().hasSameUnqualifiedType(T,
99                                   E->getArg(0)->getType()->getPointeeType()));
100   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
101
102   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
103   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
104
105   llvm::IntegerType *IntType =
106     llvm::IntegerType::get(CGF.getLLVMContext(),
107                            CGF.getContext().getTypeSize(T));
108   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
109
110   llvm::Value *Args[2];
111   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
112   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
113   llvm::Type *ValueType = Args[1]->getType();
114   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
115
116   llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
117       Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
118   return EmitFromInt(CGF, Result, T, ValueType);
119 }
120
121 static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
122   Value *Val = CGF.EmitScalarExpr(E->getArg(0));
123   Value *Address = CGF.EmitScalarExpr(E->getArg(1));
124
125   // Convert the type of the pointer to a pointer to the stored type.
126   Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
127   Value *BC = CGF.Builder.CreateBitCast(
128       Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
129   LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
130   LV.setNontemporal(true);
131   CGF.EmitStoreOfScalar(Val, LV, false);
132   return nullptr;
133 }
134
135 static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
136   Value *Address = CGF.EmitScalarExpr(E->getArg(0));
137
138   LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
139   LV.setNontemporal(true);
140   return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
141 }
142
143 static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
144                                llvm::AtomicRMWInst::BinOp Kind,
145                                const CallExpr *E) {
146   return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
147 }
148
149 /// Utility to insert an atomic instruction based Instrinsic::ID and
150 /// the expression node, where the return value is the result of the
151 /// operation.
152 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
153                                    llvm::AtomicRMWInst::BinOp Kind,
154                                    const CallExpr *E,
155                                    Instruction::BinaryOps Op,
156                                    bool Invert = false) {
157   QualType T = E->getType();
158   assert(E->getArg(0)->getType()->isPointerType());
159   assert(CGF.getContext().hasSameUnqualifiedType(T,
160                                   E->getArg(0)->getType()->getPointeeType()));
161   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
162
163   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
164   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
165
166   llvm::IntegerType *IntType =
167     llvm::IntegerType::get(CGF.getLLVMContext(),
168                            CGF.getContext().getTypeSize(T));
169   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
170
171   llvm::Value *Args[2];
172   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
173   llvm::Type *ValueType = Args[1]->getType();
174   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
175   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
176
177   llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
178       Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
179   Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
180   if (Invert)
181     Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
182                                      llvm::ConstantInt::get(IntType, -1));
183   Result = EmitFromInt(CGF, Result, T, ValueType);
184   return RValue::get(Result);
185 }
186
187 /// @brief Utility to insert an atomic cmpxchg instruction.
188 ///
189 /// @param CGF The current codegen function.
190 /// @param E   Builtin call expression to convert to cmpxchg.
191 ///            arg0 - address to operate on
192 ///            arg1 - value to compare with
193 ///            arg2 - new value
194 /// @param ReturnBool Specifies whether to return success flag of
195 ///                   cmpxchg result or the old value.
196 ///
197 /// @returns result of cmpxchg, according to ReturnBool
198 static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
199                                      bool ReturnBool) {
200   QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
201   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
202   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
203
204   llvm::IntegerType *IntType = llvm::IntegerType::get(
205       CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
206   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
207
208   Value *Args[3];
209   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
210   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
211   llvm::Type *ValueType = Args[1]->getType();
212   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
213   Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
214
215   Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
216       Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
217       llvm::AtomicOrdering::SequentiallyConsistent);
218   if (ReturnBool)
219     // Extract boolean success flag and zext it to int.
220     return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
221                                   CGF.ConvertType(E->getType()));
222   else
223     // Extract old value and emit it using the same type as compare value.
224     return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
225                        ValueType);
226 }
227
228 // Emit a simple mangled intrinsic that has 1 argument and a return type
229 // matching the argument type.
230 static Value *emitUnaryBuiltin(CodeGenFunction &CGF,
231                                const CallExpr *E,
232                                unsigned IntrinsicID) {
233   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
234
235   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
236   return CGF.Builder.CreateCall(F, Src0);
237 }
238
239 // Emit an intrinsic that has 2 operands of the same type as its result.
240 static Value *emitBinaryBuiltin(CodeGenFunction &CGF,
241                                 const CallExpr *E,
242                                 unsigned IntrinsicID) {
243   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
244   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
245
246   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
247   return CGF.Builder.CreateCall(F, { Src0, Src1 });
248 }
249
250 // Emit an intrinsic that has 3 operands of the same type as its result.
251 static Value *emitTernaryBuiltin(CodeGenFunction &CGF,
252                                  const CallExpr *E,
253                                  unsigned IntrinsicID) {
254   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
255   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
256   llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
257
258   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
259   return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
260 }
261
262 // Emit an intrinsic that has 1 float or double operand, and 1 integer.
263 static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
264                                const CallExpr *E,
265                                unsigned IntrinsicID) {
266   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
267   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
268
269   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
270   return CGF.Builder.CreateCall(F, {Src0, Src1});
271 }
272
273 /// EmitFAbs - Emit a call to @llvm.fabs().
274 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
275   Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
276   llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
277   Call->setDoesNotAccessMemory();
278   return Call;
279 }
280
281 /// Emit the computation of the sign bit for a floating point value. Returns
282 /// the i1 sign bit value.
283 static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
284   LLVMContext &C = CGF.CGM.getLLVMContext();
285
286   llvm::Type *Ty = V->getType();
287   int Width = Ty->getPrimitiveSizeInBits();
288   llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
289   V = CGF.Builder.CreateBitCast(V, IntTy);
290   if (Ty->isPPC_FP128Ty()) {
291     // We want the sign bit of the higher-order double. The bitcast we just
292     // did works as if the double-double was stored to memory and then
293     // read as an i128. The "store" will put the higher-order double in the
294     // lower address in both little- and big-Endian modes, but the "load"
295     // will treat those bits as a different part of the i128: the low bits in
296     // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
297     // we need to shift the high bits down to the low before truncating.
298     Width >>= 1;
299     if (CGF.getTarget().isBigEndian()) {
300       Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
301       V = CGF.Builder.CreateLShr(V, ShiftCst);
302     }
303     // We are truncating value in order to extract the higher-order
304     // double, which we will be using to extract the sign from.
305     IntTy = llvm::IntegerType::get(C, Width);
306     V = CGF.Builder.CreateTrunc(V, IntTy);
307   }
308   Value *Zero = llvm::Constant::getNullValue(IntTy);
309   return CGF.Builder.CreateICmpSLT(V, Zero);
310 }
311
312 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD,
313                               const CallExpr *E, llvm::Constant *calleeValue) {
314   CGCallee callee = CGCallee::forDirect(calleeValue, FD);
315   return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
316 }
317
318 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
319 /// depending on IntrinsicID.
320 ///
321 /// \arg CGF The current codegen function.
322 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
323 /// \arg X The first argument to the llvm.*.with.overflow.*.
324 /// \arg Y The second argument to the llvm.*.with.overflow.*.
325 /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
326 /// \returns The result (i.e. sum/product) returned by the intrinsic.
327 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
328                                           const llvm::Intrinsic::ID IntrinsicID,
329                                           llvm::Value *X, llvm::Value *Y,
330                                           llvm::Value *&Carry) {
331   // Make sure we have integers of the same width.
332   assert(X->getType() == Y->getType() &&
333          "Arguments must be the same type. (Did you forget to make sure both "
334          "arguments have the same integer width?)");
335
336   llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
337   llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
338   Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
339   return CGF.Builder.CreateExtractValue(Tmp, 0);
340 }
341
342 static Value *emitRangedBuiltin(CodeGenFunction &CGF,
343                                 unsigned IntrinsicID,
344                                 int low, int high) {
345     llvm::MDBuilder MDHelper(CGF.getLLVMContext());
346     llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
347     Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
348     llvm::Instruction *Call = CGF.Builder.CreateCall(F);
349     Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
350     return Call;
351 }
352
353 namespace {
354   struct WidthAndSignedness {
355     unsigned Width;
356     bool Signed;
357   };
358 }
359
360 static WidthAndSignedness
361 getIntegerWidthAndSignedness(const clang::ASTContext &context,
362                              const clang::QualType Type) {
363   assert(Type->isIntegerType() && "Given type is not an integer.");
364   unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width;
365   bool Signed = Type->isSignedIntegerType();
366   return {Width, Signed};
367 }
368
369 // Given one or more integer types, this function produces an integer type that
370 // encompasses them: any value in one of the given types could be expressed in
371 // the encompassing type.
372 static struct WidthAndSignedness
373 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
374   assert(Types.size() > 0 && "Empty list of types.");
375
376   // If any of the given types is signed, we must return a signed type.
377   bool Signed = false;
378   for (const auto &Type : Types) {
379     Signed |= Type.Signed;
380   }
381
382   // The encompassing type must have a width greater than or equal to the width
383   // of the specified types.  Aditionally, if the encompassing type is signed,
384   // its width must be strictly greater than the width of any unsigned types
385   // given.
386   unsigned Width = 0;
387   for (const auto &Type : Types) {
388     unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
389     if (Width < MinWidth) {
390       Width = MinWidth;
391     }
392   }
393
394   return {Width, Signed};
395 }
396
397 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
398   llvm::Type *DestType = Int8PtrTy;
399   if (ArgValue->getType() != DestType)
400     ArgValue =
401         Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
402
403   Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
404   return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
405 }
406
407 /// Checks if using the result of __builtin_object_size(p, @p From) in place of
408 /// __builtin_object_size(p, @p To) is correct
409 static bool areBOSTypesCompatible(int From, int To) {
410   // Note: Our __builtin_object_size implementation currently treats Type=0 and
411   // Type=2 identically. Encoding this implementation detail here may make
412   // improving __builtin_object_size difficult in the future, so it's omitted.
413   return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
414 }
415
416 static llvm::Value *
417 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
418   return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
419 }
420
421 llvm::Value *
422 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
423                                                  llvm::IntegerType *ResType) {
424   uint64_t ObjectSize;
425   if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
426     return emitBuiltinObjectSize(E, Type, ResType);
427   return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
428 }
429
430 /// Returns a Value corresponding to the size of the given expression.
431 /// This Value may be either of the following:
432 ///   - A llvm::Argument (if E is a param with the pass_object_size attribute on
433 ///     it)
434 ///   - A call to the @llvm.objectsize intrinsic
435 llvm::Value *
436 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
437                                        llvm::IntegerType *ResType) {
438   // We need to reference an argument if the pointer is a parameter with the
439   // pass_object_size attribute.
440   if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
441     auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
442     auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
443     if (Param != nullptr && PS != nullptr &&
444         areBOSTypesCompatible(PS->getType(), Type)) {
445       auto Iter = SizeArguments.find(Param);
446       assert(Iter != SizeArguments.end());
447
448       const ImplicitParamDecl *D = Iter->second;
449       auto DIter = LocalDeclMap.find(D);
450       assert(DIter != LocalDeclMap.end());
451
452       return EmitLoadOfScalar(DIter->second, /*volatile=*/false,
453                               getContext().getSizeType(), E->getLocStart());
454     }
455   }
456
457   // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
458   // evaluate E for side-effects. In either case, we shouldn't lower to
459   // @llvm.objectsize.
460   if (Type == 3 || E->HasSideEffects(getContext()))
461     return getDefaultBuiltinObjectSizeResult(Type, ResType);
462
463   // LLVM only supports 0 and 2, make sure that we pass along that
464   // as a boolean.
465   auto *CI = ConstantInt::get(Builder.getInt1Ty(), (Type & 2) >> 1);
466   // FIXME: Get right address space.
467   llvm::Type *Tys[] = {ResType, Builder.getInt8PtrTy(0)};
468   Value *F = CGM.getIntrinsic(Intrinsic::objectsize, Tys);
469   return Builder.CreateCall(F, {EmitScalarExpr(E), CI});
470 }
471
472 // Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we
473 // handle them here.
474 enum class CodeGenFunction::MSVCIntrin {
475   _BitScanForward,
476   _BitScanReverse,
477   _InterlockedAnd,
478   _InterlockedDecrement,
479   _InterlockedExchange,
480   _InterlockedExchangeAdd,
481   _InterlockedExchangeSub,
482   _InterlockedIncrement,
483   _InterlockedOr,
484   _InterlockedXor,
485 };
486
487 Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
488   const CallExpr *E) {
489   switch (BuiltinID) {
490   case MSVCIntrin::_BitScanForward:
491   case MSVCIntrin::_BitScanReverse: {
492     Value *ArgValue = EmitScalarExpr(E->getArg(1));
493
494     llvm::Type *ArgType = ArgValue->getType();
495     llvm::Type *IndexType =
496       EmitScalarExpr(E->getArg(0))->getType()->getPointerElementType();
497     llvm::Type *ResultType = ConvertType(E->getType());
498
499     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
500     Value *ResZero = llvm::Constant::getNullValue(ResultType);
501     Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
502
503     BasicBlock *Begin = Builder.GetInsertBlock();
504     BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
505     Builder.SetInsertPoint(End);
506     PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
507
508     Builder.SetInsertPoint(Begin);
509     Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
510     BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
511     Builder.CreateCondBr(IsZero, End, NotZero);
512     Result->addIncoming(ResZero, Begin);
513
514     Builder.SetInsertPoint(NotZero);
515     Address IndexAddress = EmitPointerWithAlignment(E->getArg(0));
516
517     if (BuiltinID == MSVCIntrin::_BitScanForward) {
518       Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
519       Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
520       ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
521       Builder.CreateStore(ZeroCount, IndexAddress, false);
522     } else {
523       unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
524       Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
525
526       Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
527       Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
528       ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
529       Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
530       Builder.CreateStore(Index, IndexAddress, false);
531     }
532     Builder.CreateBr(End);
533     Result->addIncoming(ResOne, NotZero);
534
535     Builder.SetInsertPoint(End);
536     return Result;
537   }
538   case MSVCIntrin::_InterlockedAnd:
539     return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
540   case MSVCIntrin::_InterlockedExchange:
541     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
542   case MSVCIntrin::_InterlockedExchangeAdd:
543     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
544   case MSVCIntrin::_InterlockedExchangeSub:
545     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
546   case MSVCIntrin::_InterlockedOr:
547     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
548   case MSVCIntrin::_InterlockedXor:
549     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
550
551   case MSVCIntrin::_InterlockedDecrement: {
552     llvm::Type *IntTy = ConvertType(E->getType());
553     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
554       AtomicRMWInst::Sub,
555       EmitScalarExpr(E->getArg(0)),
556       ConstantInt::get(IntTy, 1),
557       llvm::AtomicOrdering::SequentiallyConsistent);
558     return Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1));
559   }
560   case MSVCIntrin::_InterlockedIncrement: {
561     llvm::Type *IntTy = ConvertType(E->getType());
562     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
563       AtomicRMWInst::Add,
564       EmitScalarExpr(E->getArg(0)),
565       ConstantInt::get(IntTy, 1),
566       llvm::AtomicOrdering::SequentiallyConsistent);
567     return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1));
568   }
569   }
570   llvm_unreachable("Incorrect MSVC intrinsic!");
571 }
572
573 namespace {
574 // ARC cleanup for __builtin_os_log_format
575 struct CallObjCArcUse final : EHScopeStack::Cleanup {
576   CallObjCArcUse(llvm::Value *object) : object(object) {}
577   llvm::Value *object;
578
579   void Emit(CodeGenFunction &CGF, Flags flags) override {
580     CGF.EmitARCIntrinsicUse(object);
581   }
582 };
583 }
584
585 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
586                                         unsigned BuiltinID, const CallExpr *E,
587                                         ReturnValueSlot ReturnValue) {
588   // See if we can constant fold this builtin.  If so, don't emit it at all.
589   Expr::EvalResult Result;
590   if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
591       !Result.hasSideEffects()) {
592     if (Result.Val.isInt())
593       return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
594                                                 Result.Val.getInt()));
595     if (Result.Val.isFloat())
596       return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
597                                                Result.Val.getFloat()));
598   }
599
600   switch (BuiltinID) {
601   default: break;  // Handle intrinsics and libm functions below.
602   case Builtin::BI__builtin___CFStringMakeConstantString:
603   case Builtin::BI__builtin___NSStringMakeConstantString:
604     return RValue::get(CGM.EmitConstantExpr(E, E->getType(), nullptr));
605   case Builtin::BI__builtin_stdarg_start:
606   case Builtin::BI__builtin_va_start:
607   case Builtin::BI__va_start:
608   case Builtin::BI__builtin_va_end:
609     return RValue::get(
610         EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
611                            ? EmitScalarExpr(E->getArg(0))
612                            : EmitVAListRef(E->getArg(0)).getPointer(),
613                        BuiltinID != Builtin::BI__builtin_va_end));
614   case Builtin::BI__builtin_va_copy: {
615     Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
616     Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
617
618     llvm::Type *Type = Int8PtrTy;
619
620     DstPtr = Builder.CreateBitCast(DstPtr, Type);
621     SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
622     return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy),
623                                           {DstPtr, SrcPtr}));
624   }
625   case Builtin::BI__builtin_abs:
626   case Builtin::BI__builtin_labs:
627   case Builtin::BI__builtin_llabs: {
628     Value *ArgValue = EmitScalarExpr(E->getArg(0));
629
630     Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
631     Value *CmpResult =
632     Builder.CreateICmpSGE(ArgValue,
633                           llvm::Constant::getNullValue(ArgValue->getType()),
634                                                             "abscond");
635     Value *Result =
636       Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
637
638     return RValue::get(Result);
639   }
640   case Builtin::BI__builtin_fabs:
641   case Builtin::BI__builtin_fabsf:
642   case Builtin::BI__builtin_fabsl: {
643     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
644   }
645   case Builtin::BI__builtin_fmod:
646   case Builtin::BI__builtin_fmodf:
647   case Builtin::BI__builtin_fmodl: {
648     Value *Arg1 = EmitScalarExpr(E->getArg(0));
649     Value *Arg2 = EmitScalarExpr(E->getArg(1));
650     Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod");
651     return RValue::get(Result);
652   }
653   case Builtin::BI__builtin_copysign:
654   case Builtin::BI__builtin_copysignf:
655   case Builtin::BI__builtin_copysignl: {
656     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
657   }
658   case Builtin::BI__builtin_ceil:
659   case Builtin::BI__builtin_ceilf:
660   case Builtin::BI__builtin_ceill: {
661     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil));
662   }
663   case Builtin::BI__builtin_floor:
664   case Builtin::BI__builtin_floorf:
665   case Builtin::BI__builtin_floorl: {
666     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor));
667   }
668   case Builtin::BI__builtin_trunc:
669   case Builtin::BI__builtin_truncf:
670   case Builtin::BI__builtin_truncl: {
671     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc));
672   }
673   case Builtin::BI__builtin_rint:
674   case Builtin::BI__builtin_rintf:
675   case Builtin::BI__builtin_rintl: {
676     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint));
677   }
678   case Builtin::BI__builtin_nearbyint:
679   case Builtin::BI__builtin_nearbyintf:
680   case Builtin::BI__builtin_nearbyintl: {
681     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint));
682   }
683   case Builtin::BI__builtin_round:
684   case Builtin::BI__builtin_roundf:
685   case Builtin::BI__builtin_roundl: {
686     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round));
687   }
688   case Builtin::BI__builtin_fmin:
689   case Builtin::BI__builtin_fminf:
690   case Builtin::BI__builtin_fminl: {
691     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum));
692   }
693   case Builtin::BI__builtin_fmax:
694   case Builtin::BI__builtin_fmaxf:
695   case Builtin::BI__builtin_fmaxl: {
696     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum));
697   }
698   case Builtin::BI__builtin_conj:
699   case Builtin::BI__builtin_conjf:
700   case Builtin::BI__builtin_conjl: {
701     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
702     Value *Real = ComplexVal.first;
703     Value *Imag = ComplexVal.second;
704     Value *Zero =
705       Imag->getType()->isFPOrFPVectorTy()
706         ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
707         : llvm::Constant::getNullValue(Imag->getType());
708
709     Imag = Builder.CreateFSub(Zero, Imag, "sub");
710     return RValue::getComplex(std::make_pair(Real, Imag));
711   }
712   case Builtin::BI__builtin_creal:
713   case Builtin::BI__builtin_crealf:
714   case Builtin::BI__builtin_creall:
715   case Builtin::BIcreal:
716   case Builtin::BIcrealf:
717   case Builtin::BIcreall: {
718     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
719     return RValue::get(ComplexVal.first);
720   }
721
722   case Builtin::BI__builtin_cimag:
723   case Builtin::BI__builtin_cimagf:
724   case Builtin::BI__builtin_cimagl:
725   case Builtin::BIcimag:
726   case Builtin::BIcimagf:
727   case Builtin::BIcimagl: {
728     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
729     return RValue::get(ComplexVal.second);
730   }
731
732   case Builtin::BI__builtin_ctzs:
733   case Builtin::BI__builtin_ctz:
734   case Builtin::BI__builtin_ctzl:
735   case Builtin::BI__builtin_ctzll: {
736     Value *ArgValue = EmitScalarExpr(E->getArg(0));
737
738     llvm::Type *ArgType = ArgValue->getType();
739     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
740
741     llvm::Type *ResultType = ConvertType(E->getType());
742     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
743     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
744     if (Result->getType() != ResultType)
745       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
746                                      "cast");
747     return RValue::get(Result);
748   }
749   case Builtin::BI__builtin_clzs:
750   case Builtin::BI__builtin_clz:
751   case Builtin::BI__builtin_clzl:
752   case Builtin::BI__builtin_clzll: {
753     Value *ArgValue = EmitScalarExpr(E->getArg(0));
754
755     llvm::Type *ArgType = ArgValue->getType();
756     Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
757
758     llvm::Type *ResultType = ConvertType(E->getType());
759     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
760     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
761     if (Result->getType() != ResultType)
762       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
763                                      "cast");
764     return RValue::get(Result);
765   }
766   case Builtin::BI__builtin_ffs:
767   case Builtin::BI__builtin_ffsl:
768   case Builtin::BI__builtin_ffsll: {
769     // ffs(x) -> x ? cttz(x) + 1 : 0
770     Value *ArgValue = EmitScalarExpr(E->getArg(0));
771
772     llvm::Type *ArgType = ArgValue->getType();
773     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
774
775     llvm::Type *ResultType = ConvertType(E->getType());
776     Value *Tmp =
777         Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
778                           llvm::ConstantInt::get(ArgType, 1));
779     Value *Zero = llvm::Constant::getNullValue(ArgType);
780     Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
781     Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
782     if (Result->getType() != ResultType)
783       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
784                                      "cast");
785     return RValue::get(Result);
786   }
787   case Builtin::BI__builtin_parity:
788   case Builtin::BI__builtin_parityl:
789   case Builtin::BI__builtin_parityll: {
790     // parity(x) -> ctpop(x) & 1
791     Value *ArgValue = EmitScalarExpr(E->getArg(0));
792
793     llvm::Type *ArgType = ArgValue->getType();
794     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
795
796     llvm::Type *ResultType = ConvertType(E->getType());
797     Value *Tmp = Builder.CreateCall(F, ArgValue);
798     Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
799     if (Result->getType() != ResultType)
800       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
801                                      "cast");
802     return RValue::get(Result);
803   }
804   case Builtin::BI__popcnt16:
805   case Builtin::BI__popcnt:
806   case Builtin::BI__popcnt64:
807   case Builtin::BI__builtin_popcount:
808   case Builtin::BI__builtin_popcountl:
809   case Builtin::BI__builtin_popcountll: {
810     Value *ArgValue = EmitScalarExpr(E->getArg(0));
811
812     llvm::Type *ArgType = ArgValue->getType();
813     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
814
815     llvm::Type *ResultType = ConvertType(E->getType());
816     Value *Result = Builder.CreateCall(F, ArgValue);
817     if (Result->getType() != ResultType)
818       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
819                                      "cast");
820     return RValue::get(Result);
821   }
822   case Builtin::BI_rotr8:
823   case Builtin::BI_rotr16:
824   case Builtin::BI_rotr:
825   case Builtin::BI_lrotr:
826   case Builtin::BI_rotr64: {
827     Value *Val = EmitScalarExpr(E->getArg(0));
828     Value *Shift = EmitScalarExpr(E->getArg(1));
829
830     llvm::Type *ArgType = Val->getType();
831     Shift = Builder.CreateIntCast(Shift, ArgType, false);
832     unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
833     Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
834     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
835
836     Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
837     Shift = Builder.CreateAnd(Shift, Mask);
838     Value *LeftShift = Builder.CreateSub(ArgTypeSize, Shift);
839
840     Value *RightShifted = Builder.CreateLShr(Val, Shift);
841     Value *LeftShifted = Builder.CreateShl(Val, LeftShift);
842     Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
843
844     Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
845     Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
846     return RValue::get(Result);
847   }
848   case Builtin::BI_rotl8:
849   case Builtin::BI_rotl16:
850   case Builtin::BI_rotl:
851   case Builtin::BI_lrotl:
852   case Builtin::BI_rotl64: {
853     Value *Val = EmitScalarExpr(E->getArg(0));
854     Value *Shift = EmitScalarExpr(E->getArg(1));
855
856     llvm::Type *ArgType = Val->getType();
857     Shift = Builder.CreateIntCast(Shift, ArgType, false);
858     unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
859     Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
860     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
861
862     Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
863     Shift = Builder.CreateAnd(Shift, Mask);
864     Value *RightShift = Builder.CreateSub(ArgTypeSize, Shift);
865
866     Value *LeftShifted = Builder.CreateShl(Val, Shift);
867     Value *RightShifted = Builder.CreateLShr(Val, RightShift);
868     Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
869
870     Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
871     Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
872     return RValue::get(Result);
873   }
874   case Builtin::BI__builtin_unpredictable: {
875     // Always return the argument of __builtin_unpredictable. LLVM does not
876     // handle this builtin. Metadata for this builtin should be added directly
877     // to instructions such as branches or switches that use it.
878     return RValue::get(EmitScalarExpr(E->getArg(0)));
879   }
880   case Builtin::BI__builtin_expect: {
881     Value *ArgValue = EmitScalarExpr(E->getArg(0));
882     llvm::Type *ArgType = ArgValue->getType();
883
884     Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
885     // Don't generate llvm.expect on -O0 as the backend won't use it for
886     // anything.
887     // Note, we still IRGen ExpectedValue because it could have side-effects.
888     if (CGM.getCodeGenOpts().OptimizationLevel == 0)
889       return RValue::get(ArgValue);
890
891     Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
892     Value *Result =
893         Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
894     return RValue::get(Result);
895   }
896   case Builtin::BI__builtin_assume_aligned: {
897     Value *PtrValue = EmitScalarExpr(E->getArg(0));
898     Value *OffsetValue =
899       (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
900
901     Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
902     ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
903     unsigned Alignment = (unsigned) AlignmentCI->getZExtValue();
904
905     EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue);
906     return RValue::get(PtrValue);
907   }
908   case Builtin::BI__assume:
909   case Builtin::BI__builtin_assume: {
910     if (E->getArg(0)->HasSideEffects(getContext()))
911       return RValue::get(nullptr);
912
913     Value *ArgValue = EmitScalarExpr(E->getArg(0));
914     Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
915     return RValue::get(Builder.CreateCall(FnAssume, ArgValue));
916   }
917   case Builtin::BI__builtin_bswap16:
918   case Builtin::BI__builtin_bswap32:
919   case Builtin::BI__builtin_bswap64: {
920     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
921   }
922   case Builtin::BI__builtin_bitreverse8:
923   case Builtin::BI__builtin_bitreverse16:
924   case Builtin::BI__builtin_bitreverse32:
925   case Builtin::BI__builtin_bitreverse64: {
926     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
927   }
928   case Builtin::BI__builtin_object_size: {
929     unsigned Type =
930         E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
931     auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
932
933     // We pass this builtin onto the optimizer so that it can figure out the
934     // object size in more complex cases.
935     return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType));
936   }
937   case Builtin::BI__builtin_prefetch: {
938     Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
939     // FIXME: Technically these constants should of type 'int', yes?
940     RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
941       llvm::ConstantInt::get(Int32Ty, 0);
942     Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
943       llvm::ConstantInt::get(Int32Ty, 3);
944     Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
945     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
946     return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
947   }
948   case Builtin::BI__builtin_readcyclecounter: {
949     Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
950     return RValue::get(Builder.CreateCall(F));
951   }
952   case Builtin::BI__builtin___clear_cache: {
953     Value *Begin = EmitScalarExpr(E->getArg(0));
954     Value *End = EmitScalarExpr(E->getArg(1));
955     Value *F = CGM.getIntrinsic(Intrinsic::clear_cache);
956     return RValue::get(Builder.CreateCall(F, {Begin, End}));
957   }
958   case Builtin::BI__builtin_trap:
959     return RValue::get(EmitTrapCall(Intrinsic::trap));
960   case Builtin::BI__debugbreak:
961     return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
962   case Builtin::BI__builtin_unreachable: {
963     if (SanOpts.has(SanitizerKind::Unreachable)) {
964       SanitizerScope SanScope(this);
965       EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()),
966                                SanitizerKind::Unreachable),
967                 SanitizerHandler::BuiltinUnreachable,
968                 EmitCheckSourceLocation(E->getExprLoc()), None);
969     } else
970       Builder.CreateUnreachable();
971
972     // We do need to preserve an insertion point.
973     EmitBlock(createBasicBlock("unreachable.cont"));
974
975     return RValue::get(nullptr);
976   }
977
978   case Builtin::BI__builtin_powi:
979   case Builtin::BI__builtin_powif:
980   case Builtin::BI__builtin_powil: {
981     Value *Base = EmitScalarExpr(E->getArg(0));
982     Value *Exponent = EmitScalarExpr(E->getArg(1));
983     llvm::Type *ArgType = Base->getType();
984     Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
985     return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
986   }
987
988   case Builtin::BI__builtin_isgreater:
989   case Builtin::BI__builtin_isgreaterequal:
990   case Builtin::BI__builtin_isless:
991   case Builtin::BI__builtin_islessequal:
992   case Builtin::BI__builtin_islessgreater:
993   case Builtin::BI__builtin_isunordered: {
994     // Ordered comparisons: we know the arguments to these are matching scalar
995     // floating point values.
996     Value *LHS = EmitScalarExpr(E->getArg(0));
997     Value *RHS = EmitScalarExpr(E->getArg(1));
998
999     switch (BuiltinID) {
1000     default: llvm_unreachable("Unknown ordered comparison");
1001     case Builtin::BI__builtin_isgreater:
1002       LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
1003       break;
1004     case Builtin::BI__builtin_isgreaterequal:
1005       LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
1006       break;
1007     case Builtin::BI__builtin_isless:
1008       LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
1009       break;
1010     case Builtin::BI__builtin_islessequal:
1011       LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
1012       break;
1013     case Builtin::BI__builtin_islessgreater:
1014       LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
1015       break;
1016     case Builtin::BI__builtin_isunordered:
1017       LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
1018       break;
1019     }
1020     // ZExt bool to int type.
1021     return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
1022   }
1023   case Builtin::BI__builtin_isnan: {
1024     Value *V = EmitScalarExpr(E->getArg(0));
1025     V = Builder.CreateFCmpUNO(V, V, "cmp");
1026     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
1027   }
1028
1029   case Builtin::BIfinite:
1030   case Builtin::BI__finite:
1031   case Builtin::BIfinitef:
1032   case Builtin::BI__finitef:
1033   case Builtin::BIfinitel:
1034   case Builtin::BI__finitel:
1035   case Builtin::BI__builtin_isinf:
1036   case Builtin::BI__builtin_isfinite: {
1037     // isinf(x)    --> fabs(x) == infinity
1038     // isfinite(x) --> fabs(x) != infinity
1039     // x != NaN via the ordered compare in either case.
1040     Value *V = EmitScalarExpr(E->getArg(0));
1041     Value *Fabs = EmitFAbs(*this, V);
1042     Constant *Infinity = ConstantFP::getInfinity(V->getType());
1043     CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf)
1044                                   ? CmpInst::FCMP_OEQ
1045                                   : CmpInst::FCMP_ONE;
1046     Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf");
1047     return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType())));
1048   }
1049
1050   case Builtin::BI__builtin_isinf_sign: {
1051     // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
1052     Value *Arg = EmitScalarExpr(E->getArg(0));
1053     Value *AbsArg = EmitFAbs(*this, Arg);
1054     Value *IsInf = Builder.CreateFCmpOEQ(
1055         AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
1056     Value *IsNeg = EmitSignBit(*this, Arg);
1057
1058     llvm::Type *IntTy = ConvertType(E->getType());
1059     Value *Zero = Constant::getNullValue(IntTy);
1060     Value *One = ConstantInt::get(IntTy, 1);
1061     Value *NegativeOne = ConstantInt::get(IntTy, -1);
1062     Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
1063     Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
1064     return RValue::get(Result);
1065   }
1066
1067   case Builtin::BI__builtin_isnormal: {
1068     // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
1069     Value *V = EmitScalarExpr(E->getArg(0));
1070     Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
1071
1072     Value *Abs = EmitFAbs(*this, V);
1073     Value *IsLessThanInf =
1074       Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
1075     APFloat Smallest = APFloat::getSmallestNormalized(
1076                    getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
1077     Value *IsNormal =
1078       Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
1079                             "isnormal");
1080     V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
1081     V = Builder.CreateAnd(V, IsNormal, "and");
1082     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
1083   }
1084
1085   case Builtin::BI__builtin_fpclassify: {
1086     Value *V = EmitScalarExpr(E->getArg(5));
1087     llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
1088
1089     // Create Result
1090     BasicBlock *Begin = Builder.GetInsertBlock();
1091     BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
1092     Builder.SetInsertPoint(End);
1093     PHINode *Result =
1094       Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
1095                         "fpclassify_result");
1096
1097     // if (V==0) return FP_ZERO
1098     Builder.SetInsertPoint(Begin);
1099     Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
1100                                           "iszero");
1101     Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
1102     BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
1103     Builder.CreateCondBr(IsZero, End, NotZero);
1104     Result->addIncoming(ZeroLiteral, Begin);
1105
1106     // if (V != V) return FP_NAN
1107     Builder.SetInsertPoint(NotZero);
1108     Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
1109     Value *NanLiteral = EmitScalarExpr(E->getArg(0));
1110     BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
1111     Builder.CreateCondBr(IsNan, End, NotNan);
1112     Result->addIncoming(NanLiteral, NotZero);
1113
1114     // if (fabs(V) == infinity) return FP_INFINITY
1115     Builder.SetInsertPoint(NotNan);
1116     Value *VAbs = EmitFAbs(*this, V);
1117     Value *IsInf =
1118       Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
1119                             "isinf");
1120     Value *InfLiteral = EmitScalarExpr(E->getArg(1));
1121     BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
1122     Builder.CreateCondBr(IsInf, End, NotInf);
1123     Result->addIncoming(InfLiteral, NotNan);
1124
1125     // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
1126     Builder.SetInsertPoint(NotInf);
1127     APFloat Smallest = APFloat::getSmallestNormalized(
1128         getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
1129     Value *IsNormal =
1130       Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
1131                             "isnormal");
1132     Value *NormalResult =
1133       Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
1134                            EmitScalarExpr(E->getArg(3)));
1135     Builder.CreateBr(End);
1136     Result->addIncoming(NormalResult, NotInf);
1137
1138     // return Result
1139     Builder.SetInsertPoint(End);
1140     return RValue::get(Result);
1141   }
1142
1143   case Builtin::BIalloca:
1144   case Builtin::BI_alloca:
1145   case Builtin::BI__builtin_alloca: {
1146     Value *Size = EmitScalarExpr(E->getArg(0));
1147     const TargetInfo &TI = getContext().getTargetInfo();
1148     // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
1149     unsigned SuitableAlignmentInBytes =
1150         CGM.getContext()
1151             .toCharUnitsFromBits(TI.getSuitableAlign())
1152             .getQuantity();
1153     AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
1154     AI->setAlignment(SuitableAlignmentInBytes);
1155     return RValue::get(AI);
1156   }
1157
1158   case Builtin::BI__builtin_alloca_with_align: {
1159     Value *Size = EmitScalarExpr(E->getArg(0));
1160     Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
1161     auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
1162     unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
1163     unsigned AlignmentInBytes =
1164         CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity();
1165     AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
1166     AI->setAlignment(AlignmentInBytes);
1167     return RValue::get(AI);
1168   }
1169
1170   case Builtin::BIbzero:
1171   case Builtin::BI__builtin_bzero: {
1172     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1173     Value *SizeVal = EmitScalarExpr(E->getArg(1));
1174     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1175                         E->getArg(0)->getExprLoc(), FD, 0);
1176     Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
1177     return RValue::get(Dest.getPointer());
1178   }
1179   case Builtin::BImemcpy:
1180   case Builtin::BI__builtin_memcpy: {
1181     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1182     Address Src = EmitPointerWithAlignment(E->getArg(1));
1183     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1184     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1185                         E->getArg(0)->getExprLoc(), FD, 0);
1186     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1187                         E->getArg(1)->getExprLoc(), FD, 1);
1188     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1189     return RValue::get(Dest.getPointer());
1190   }
1191
1192   case Builtin::BI__builtin___memcpy_chk: {
1193     // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
1194     llvm::APSInt Size, DstSize;
1195     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1196         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1197       break;
1198     if (Size.ugt(DstSize))
1199       break;
1200     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1201     Address Src = EmitPointerWithAlignment(E->getArg(1));
1202     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1203     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1204     return RValue::get(Dest.getPointer());
1205   }
1206
1207   case Builtin::BI__builtin_objc_memmove_collectable: {
1208     Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
1209     Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
1210     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1211     CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
1212                                                   DestAddr, SrcAddr, SizeVal);
1213     return RValue::get(DestAddr.getPointer());
1214   }
1215
1216   case Builtin::BI__builtin___memmove_chk: {
1217     // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
1218     llvm::APSInt Size, DstSize;
1219     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1220         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1221       break;
1222     if (Size.ugt(DstSize))
1223       break;
1224     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1225     Address Src = EmitPointerWithAlignment(E->getArg(1));
1226     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1227     Builder.CreateMemMove(Dest, Src, SizeVal, false);
1228     return RValue::get(Dest.getPointer());
1229   }
1230
1231   case Builtin::BImemmove:
1232   case Builtin::BI__builtin_memmove: {
1233     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1234     Address Src = EmitPointerWithAlignment(E->getArg(1));
1235     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1236     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1237                         E->getArg(0)->getExprLoc(), FD, 0);
1238     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1239                         E->getArg(1)->getExprLoc(), FD, 1);
1240     Builder.CreateMemMove(Dest, Src, SizeVal, false);
1241     return RValue::get(Dest.getPointer());
1242   }
1243   case Builtin::BImemset:
1244   case Builtin::BI__builtin_memset: {
1245     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1246     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1247                                          Builder.getInt8Ty());
1248     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1249     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1250                         E->getArg(0)->getExprLoc(), FD, 0);
1251     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1252     return RValue::get(Dest.getPointer());
1253   }
1254   case Builtin::BI__builtin___memset_chk: {
1255     // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
1256     llvm::APSInt Size, DstSize;
1257     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1258         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1259       break;
1260     if (Size.ugt(DstSize))
1261       break;
1262     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1263     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1264                                          Builder.getInt8Ty());
1265     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1266     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1267     return RValue::get(Dest.getPointer());
1268   }
1269   case Builtin::BI__builtin_dwarf_cfa: {
1270     // The offset in bytes from the first argument to the CFA.
1271     //
1272     // Why on earth is this in the frontend?  Is there any reason at
1273     // all that the backend can't reasonably determine this while
1274     // lowering llvm.eh.dwarf.cfa()?
1275     //
1276     // TODO: If there's a satisfactory reason, add a target hook for
1277     // this instead of hard-coding 0, which is correct for most targets.
1278     int32_t Offset = 0;
1279
1280     Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
1281     return RValue::get(Builder.CreateCall(F,
1282                                       llvm::ConstantInt::get(Int32Ty, Offset)));
1283   }
1284   case Builtin::BI__builtin_return_address: {
1285     Value *Depth =
1286         CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
1287     Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1288     return RValue::get(Builder.CreateCall(F, Depth));
1289   }
1290   case Builtin::BI_ReturnAddress: {
1291     Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1292     return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
1293   }
1294   case Builtin::BI__builtin_frame_address: {
1295     Value *Depth =
1296         CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
1297     Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
1298     return RValue::get(Builder.CreateCall(F, Depth));
1299   }
1300   case Builtin::BI__builtin_extract_return_addr: {
1301     Value *Address = EmitScalarExpr(E->getArg(0));
1302     Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
1303     return RValue::get(Result);
1304   }
1305   case Builtin::BI__builtin_frob_return_addr: {
1306     Value *Address = EmitScalarExpr(E->getArg(0));
1307     Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
1308     return RValue::get(Result);
1309   }
1310   case Builtin::BI__builtin_dwarf_sp_column: {
1311     llvm::IntegerType *Ty
1312       = cast<llvm::IntegerType>(ConvertType(E->getType()));
1313     int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
1314     if (Column == -1) {
1315       CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
1316       return RValue::get(llvm::UndefValue::get(Ty));
1317     }
1318     return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
1319   }
1320   case Builtin::BI__builtin_init_dwarf_reg_size_table: {
1321     Value *Address = EmitScalarExpr(E->getArg(0));
1322     if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
1323       CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
1324     return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
1325   }
1326   case Builtin::BI__builtin_eh_return: {
1327     Value *Int = EmitScalarExpr(E->getArg(0));
1328     Value *Ptr = EmitScalarExpr(E->getArg(1));
1329
1330     llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
1331     assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
1332            "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
1333     Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
1334                                   ? Intrinsic::eh_return_i32
1335                                   : Intrinsic::eh_return_i64);
1336     Builder.CreateCall(F, {Int, Ptr});
1337     Builder.CreateUnreachable();
1338
1339     // We do need to preserve an insertion point.
1340     EmitBlock(createBasicBlock("builtin_eh_return.cont"));
1341
1342     return RValue::get(nullptr);
1343   }
1344   case Builtin::BI__builtin_unwind_init: {
1345     Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
1346     return RValue::get(Builder.CreateCall(F));
1347   }
1348   case Builtin::BI__builtin_extend_pointer: {
1349     // Extends a pointer to the size of an _Unwind_Word, which is
1350     // uint64_t on all platforms.  Generally this gets poked into a
1351     // register and eventually used as an address, so if the
1352     // addressing registers are wider than pointers and the platform
1353     // doesn't implicitly ignore high-order bits when doing
1354     // addressing, we need to make sure we zext / sext based on
1355     // the platform's expectations.
1356     //
1357     // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
1358
1359     // Cast the pointer to intptr_t.
1360     Value *Ptr = EmitScalarExpr(E->getArg(0));
1361     Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
1362
1363     // If that's 64 bits, we're done.
1364     if (IntPtrTy->getBitWidth() == 64)
1365       return RValue::get(Result);
1366
1367     // Otherwise, ask the codegen data what to do.
1368     if (getTargetHooks().extendPointerWithSExt())
1369       return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
1370     else
1371       return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
1372   }
1373   case Builtin::BI__builtin_setjmp: {
1374     // Buffer is a void**.
1375     Address Buf = EmitPointerWithAlignment(E->getArg(0));
1376
1377     // Store the frame pointer to the setjmp buffer.
1378     Value *FrameAddr =
1379       Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
1380                          ConstantInt::get(Int32Ty, 0));
1381     Builder.CreateStore(FrameAddr, Buf);
1382
1383     // Store the stack pointer to the setjmp buffer.
1384     Value *StackAddr =
1385         Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
1386     Address StackSaveSlot =
1387       Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize());
1388     Builder.CreateStore(StackAddr, StackSaveSlot);
1389
1390     // Call LLVM's EH setjmp, which is lightweight.
1391     Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
1392     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1393     return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
1394   }
1395   case Builtin::BI__builtin_longjmp: {
1396     Value *Buf = EmitScalarExpr(E->getArg(0));
1397     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1398
1399     // Call LLVM's EH longjmp, which is lightweight.
1400     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
1401
1402     // longjmp doesn't return; mark this as unreachable.
1403     Builder.CreateUnreachable();
1404
1405     // We do need to preserve an insertion point.
1406     EmitBlock(createBasicBlock("longjmp.cont"));
1407
1408     return RValue::get(nullptr);
1409   }
1410   case Builtin::BI__sync_fetch_and_add:
1411   case Builtin::BI__sync_fetch_and_sub:
1412   case Builtin::BI__sync_fetch_and_or:
1413   case Builtin::BI__sync_fetch_and_and:
1414   case Builtin::BI__sync_fetch_and_xor:
1415   case Builtin::BI__sync_fetch_and_nand:
1416   case Builtin::BI__sync_add_and_fetch:
1417   case Builtin::BI__sync_sub_and_fetch:
1418   case Builtin::BI__sync_and_and_fetch:
1419   case Builtin::BI__sync_or_and_fetch:
1420   case Builtin::BI__sync_xor_and_fetch:
1421   case Builtin::BI__sync_nand_and_fetch:
1422   case Builtin::BI__sync_val_compare_and_swap:
1423   case Builtin::BI__sync_bool_compare_and_swap:
1424   case Builtin::BI__sync_lock_test_and_set:
1425   case Builtin::BI__sync_lock_release:
1426   case Builtin::BI__sync_swap:
1427     llvm_unreachable("Shouldn't make it through sema");
1428   case Builtin::BI__sync_fetch_and_add_1:
1429   case Builtin::BI__sync_fetch_and_add_2:
1430   case Builtin::BI__sync_fetch_and_add_4:
1431   case Builtin::BI__sync_fetch_and_add_8:
1432   case Builtin::BI__sync_fetch_and_add_16:
1433     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
1434   case Builtin::BI__sync_fetch_and_sub_1:
1435   case Builtin::BI__sync_fetch_and_sub_2:
1436   case Builtin::BI__sync_fetch_and_sub_4:
1437   case Builtin::BI__sync_fetch_and_sub_8:
1438   case Builtin::BI__sync_fetch_and_sub_16:
1439     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
1440   case Builtin::BI__sync_fetch_and_or_1:
1441   case Builtin::BI__sync_fetch_and_or_2:
1442   case Builtin::BI__sync_fetch_and_or_4:
1443   case Builtin::BI__sync_fetch_and_or_8:
1444   case Builtin::BI__sync_fetch_and_or_16:
1445     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
1446   case Builtin::BI__sync_fetch_and_and_1:
1447   case Builtin::BI__sync_fetch_and_and_2:
1448   case Builtin::BI__sync_fetch_and_and_4:
1449   case Builtin::BI__sync_fetch_and_and_8:
1450   case Builtin::BI__sync_fetch_and_and_16:
1451     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
1452   case Builtin::BI__sync_fetch_and_xor_1:
1453   case Builtin::BI__sync_fetch_and_xor_2:
1454   case Builtin::BI__sync_fetch_and_xor_4:
1455   case Builtin::BI__sync_fetch_and_xor_8:
1456   case Builtin::BI__sync_fetch_and_xor_16:
1457     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
1458   case Builtin::BI__sync_fetch_and_nand_1:
1459   case Builtin::BI__sync_fetch_and_nand_2:
1460   case Builtin::BI__sync_fetch_and_nand_4:
1461   case Builtin::BI__sync_fetch_and_nand_8:
1462   case Builtin::BI__sync_fetch_and_nand_16:
1463     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
1464
1465   // Clang extensions: not overloaded yet.
1466   case Builtin::BI__sync_fetch_and_min:
1467     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
1468   case Builtin::BI__sync_fetch_and_max:
1469     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
1470   case Builtin::BI__sync_fetch_and_umin:
1471     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
1472   case Builtin::BI__sync_fetch_and_umax:
1473     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
1474
1475   case Builtin::BI__sync_add_and_fetch_1:
1476   case Builtin::BI__sync_add_and_fetch_2:
1477   case Builtin::BI__sync_add_and_fetch_4:
1478   case Builtin::BI__sync_add_and_fetch_8:
1479   case Builtin::BI__sync_add_and_fetch_16:
1480     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
1481                                 llvm::Instruction::Add);
1482   case Builtin::BI__sync_sub_and_fetch_1:
1483   case Builtin::BI__sync_sub_and_fetch_2:
1484   case Builtin::BI__sync_sub_and_fetch_4:
1485   case Builtin::BI__sync_sub_and_fetch_8:
1486   case Builtin::BI__sync_sub_and_fetch_16:
1487     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
1488                                 llvm::Instruction::Sub);
1489   case Builtin::BI__sync_and_and_fetch_1:
1490   case Builtin::BI__sync_and_and_fetch_2:
1491   case Builtin::BI__sync_and_and_fetch_4:
1492   case Builtin::BI__sync_and_and_fetch_8:
1493   case Builtin::BI__sync_and_and_fetch_16:
1494     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
1495                                 llvm::Instruction::And);
1496   case Builtin::BI__sync_or_and_fetch_1:
1497   case Builtin::BI__sync_or_and_fetch_2:
1498   case Builtin::BI__sync_or_and_fetch_4:
1499   case Builtin::BI__sync_or_and_fetch_8:
1500   case Builtin::BI__sync_or_and_fetch_16:
1501     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
1502                                 llvm::Instruction::Or);
1503   case Builtin::BI__sync_xor_and_fetch_1:
1504   case Builtin::BI__sync_xor_and_fetch_2:
1505   case Builtin::BI__sync_xor_and_fetch_4:
1506   case Builtin::BI__sync_xor_and_fetch_8:
1507   case Builtin::BI__sync_xor_and_fetch_16:
1508     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
1509                                 llvm::Instruction::Xor);
1510   case Builtin::BI__sync_nand_and_fetch_1:
1511   case Builtin::BI__sync_nand_and_fetch_2:
1512   case Builtin::BI__sync_nand_and_fetch_4:
1513   case Builtin::BI__sync_nand_and_fetch_8:
1514   case Builtin::BI__sync_nand_and_fetch_16:
1515     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
1516                                 llvm::Instruction::And, true);
1517
1518   case Builtin::BI__sync_val_compare_and_swap_1:
1519   case Builtin::BI__sync_val_compare_and_swap_2:
1520   case Builtin::BI__sync_val_compare_and_swap_4:
1521   case Builtin::BI__sync_val_compare_and_swap_8:
1522   case Builtin::BI__sync_val_compare_and_swap_16:
1523     return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
1524
1525   case Builtin::BI__sync_bool_compare_and_swap_1:
1526   case Builtin::BI__sync_bool_compare_and_swap_2:
1527   case Builtin::BI__sync_bool_compare_and_swap_4:
1528   case Builtin::BI__sync_bool_compare_and_swap_8:
1529   case Builtin::BI__sync_bool_compare_and_swap_16:
1530     return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
1531
1532   case Builtin::BI__sync_swap_1:
1533   case Builtin::BI__sync_swap_2:
1534   case Builtin::BI__sync_swap_4:
1535   case Builtin::BI__sync_swap_8:
1536   case Builtin::BI__sync_swap_16:
1537     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1538
1539   case Builtin::BI__sync_lock_test_and_set_1:
1540   case Builtin::BI__sync_lock_test_and_set_2:
1541   case Builtin::BI__sync_lock_test_and_set_4:
1542   case Builtin::BI__sync_lock_test_and_set_8:
1543   case Builtin::BI__sync_lock_test_and_set_16:
1544     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1545
1546   case Builtin::BI__sync_lock_release_1:
1547   case Builtin::BI__sync_lock_release_2:
1548   case Builtin::BI__sync_lock_release_4:
1549   case Builtin::BI__sync_lock_release_8:
1550   case Builtin::BI__sync_lock_release_16: {
1551     Value *Ptr = EmitScalarExpr(E->getArg(0));
1552     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
1553     CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
1554     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
1555                                              StoreSize.getQuantity() * 8);
1556     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
1557     llvm::StoreInst *Store =
1558       Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
1559                                  StoreSize);
1560     Store->setAtomic(llvm::AtomicOrdering::Release);
1561     return RValue::get(nullptr);
1562   }
1563
1564   case Builtin::BI__sync_synchronize: {
1565     // We assume this is supposed to correspond to a C++0x-style
1566     // sequentially-consistent fence (i.e. this is only usable for
1567     // synchonization, not device I/O or anything like that). This intrinsic
1568     // is really badly designed in the sense that in theory, there isn't
1569     // any way to safely use it... but in practice, it mostly works
1570     // to use it with non-atomic loads and stores to get acquire/release
1571     // semantics.
1572     Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
1573     return RValue::get(nullptr);
1574   }
1575
1576   case Builtin::BI__builtin_nontemporal_load:
1577     return RValue::get(EmitNontemporalLoad(*this, E));
1578   case Builtin::BI__builtin_nontemporal_store:
1579     return RValue::get(EmitNontemporalStore(*this, E));
1580   case Builtin::BI__c11_atomic_is_lock_free:
1581   case Builtin::BI__atomic_is_lock_free: {
1582     // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
1583     // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
1584     // _Atomic(T) is always properly-aligned.
1585     const char *LibCallName = "__atomic_is_lock_free";
1586     CallArgList Args;
1587     Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
1588              getContext().getSizeType());
1589     if (BuiltinID == Builtin::BI__atomic_is_lock_free)
1590       Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
1591                getContext().VoidPtrTy);
1592     else
1593       Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
1594                getContext().VoidPtrTy);
1595     const CGFunctionInfo &FuncInfo =
1596         CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
1597     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
1598     llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
1599     return EmitCall(FuncInfo, CGCallee::forDirect(Func),
1600                     ReturnValueSlot(), Args);
1601   }
1602
1603   case Builtin::BI__atomic_test_and_set: {
1604     // Look at the argument type to determine whether this is a volatile
1605     // operation. The parameter type is always volatile.
1606     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1607     bool Volatile =
1608         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1609
1610     Value *Ptr = EmitScalarExpr(E->getArg(0));
1611     unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
1612     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1613     Value *NewVal = Builder.getInt8(1);
1614     Value *Order = EmitScalarExpr(E->getArg(1));
1615     if (isa<llvm::ConstantInt>(Order)) {
1616       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1617       AtomicRMWInst *Result = nullptr;
1618       switch (ord) {
1619       case 0:  // memory_order_relaxed
1620       default: // invalid order
1621         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1622                                          llvm::AtomicOrdering::Monotonic);
1623         break;
1624       case 1: // memory_order_consume
1625       case 2: // memory_order_acquire
1626         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1627                                          llvm::AtomicOrdering::Acquire);
1628         break;
1629       case 3: // memory_order_release
1630         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1631                                          llvm::AtomicOrdering::Release);
1632         break;
1633       case 4: // memory_order_acq_rel
1634
1635         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1636                                          llvm::AtomicOrdering::AcquireRelease);
1637         break;
1638       case 5: // memory_order_seq_cst
1639         Result = Builder.CreateAtomicRMW(
1640             llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1641             llvm::AtomicOrdering::SequentiallyConsistent);
1642         break;
1643       }
1644       Result->setVolatile(Volatile);
1645       return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1646     }
1647
1648     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1649
1650     llvm::BasicBlock *BBs[5] = {
1651       createBasicBlock("monotonic", CurFn),
1652       createBasicBlock("acquire", CurFn),
1653       createBasicBlock("release", CurFn),
1654       createBasicBlock("acqrel", CurFn),
1655       createBasicBlock("seqcst", CurFn)
1656     };
1657     llvm::AtomicOrdering Orders[5] = {
1658         llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
1659         llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
1660         llvm::AtomicOrdering::SequentiallyConsistent};
1661
1662     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1663     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1664
1665     Builder.SetInsertPoint(ContBB);
1666     PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
1667
1668     for (unsigned i = 0; i < 5; ++i) {
1669       Builder.SetInsertPoint(BBs[i]);
1670       AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1671                                                    Ptr, NewVal, Orders[i]);
1672       RMW->setVolatile(Volatile);
1673       Result->addIncoming(RMW, BBs[i]);
1674       Builder.CreateBr(ContBB);
1675     }
1676
1677     SI->addCase(Builder.getInt32(0), BBs[0]);
1678     SI->addCase(Builder.getInt32(1), BBs[1]);
1679     SI->addCase(Builder.getInt32(2), BBs[1]);
1680     SI->addCase(Builder.getInt32(3), BBs[2]);
1681     SI->addCase(Builder.getInt32(4), BBs[3]);
1682     SI->addCase(Builder.getInt32(5), BBs[4]);
1683
1684     Builder.SetInsertPoint(ContBB);
1685     return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1686   }
1687
1688   case Builtin::BI__atomic_clear: {
1689     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1690     bool Volatile =
1691         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1692
1693     Address Ptr = EmitPointerWithAlignment(E->getArg(0));
1694     unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace();
1695     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1696     Value *NewVal = Builder.getInt8(0);
1697     Value *Order = EmitScalarExpr(E->getArg(1));
1698     if (isa<llvm::ConstantInt>(Order)) {
1699       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1700       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1701       switch (ord) {
1702       case 0:  // memory_order_relaxed
1703       default: // invalid order
1704         Store->setOrdering(llvm::AtomicOrdering::Monotonic);
1705         break;
1706       case 3:  // memory_order_release
1707         Store->setOrdering(llvm::AtomicOrdering::Release);
1708         break;
1709       case 5:  // memory_order_seq_cst
1710         Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
1711         break;
1712       }
1713       return RValue::get(nullptr);
1714     }
1715
1716     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1717
1718     llvm::BasicBlock *BBs[3] = {
1719       createBasicBlock("monotonic", CurFn),
1720       createBasicBlock("release", CurFn),
1721       createBasicBlock("seqcst", CurFn)
1722     };
1723     llvm::AtomicOrdering Orders[3] = {
1724         llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
1725         llvm::AtomicOrdering::SequentiallyConsistent};
1726
1727     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1728     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1729
1730     for (unsigned i = 0; i < 3; ++i) {
1731       Builder.SetInsertPoint(BBs[i]);
1732       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1733       Store->setOrdering(Orders[i]);
1734       Builder.CreateBr(ContBB);
1735     }
1736
1737     SI->addCase(Builder.getInt32(0), BBs[0]);
1738     SI->addCase(Builder.getInt32(3), BBs[1]);
1739     SI->addCase(Builder.getInt32(5), BBs[2]);
1740
1741     Builder.SetInsertPoint(ContBB);
1742     return RValue::get(nullptr);
1743   }
1744
1745   case Builtin::BI__atomic_thread_fence:
1746   case Builtin::BI__atomic_signal_fence:
1747   case Builtin::BI__c11_atomic_thread_fence:
1748   case Builtin::BI__c11_atomic_signal_fence: {
1749     llvm::SynchronizationScope Scope;
1750     if (BuiltinID == Builtin::BI__atomic_signal_fence ||
1751         BuiltinID == Builtin::BI__c11_atomic_signal_fence)
1752       Scope = llvm::SingleThread;
1753     else
1754       Scope = llvm::CrossThread;
1755     Value *Order = EmitScalarExpr(E->getArg(0));
1756     if (isa<llvm::ConstantInt>(Order)) {
1757       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1758       switch (ord) {
1759       case 0:  // memory_order_relaxed
1760       default: // invalid order
1761         break;
1762       case 1:  // memory_order_consume
1763       case 2:  // memory_order_acquire
1764         Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope);
1765         break;
1766       case 3:  // memory_order_release
1767         Builder.CreateFence(llvm::AtomicOrdering::Release, Scope);
1768         break;
1769       case 4:  // memory_order_acq_rel
1770         Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope);
1771         break;
1772       case 5:  // memory_order_seq_cst
1773         Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
1774                             Scope);
1775         break;
1776       }
1777       return RValue::get(nullptr);
1778     }
1779
1780     llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
1781     AcquireBB = createBasicBlock("acquire", CurFn);
1782     ReleaseBB = createBasicBlock("release", CurFn);
1783     AcqRelBB = createBasicBlock("acqrel", CurFn);
1784     SeqCstBB = createBasicBlock("seqcst", CurFn);
1785     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1786
1787     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1788     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
1789
1790     Builder.SetInsertPoint(AcquireBB);
1791     Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope);
1792     Builder.CreateBr(ContBB);
1793     SI->addCase(Builder.getInt32(1), AcquireBB);
1794     SI->addCase(Builder.getInt32(2), AcquireBB);
1795
1796     Builder.SetInsertPoint(ReleaseBB);
1797     Builder.CreateFence(llvm::AtomicOrdering::Release, Scope);
1798     Builder.CreateBr(ContBB);
1799     SI->addCase(Builder.getInt32(3), ReleaseBB);
1800
1801     Builder.SetInsertPoint(AcqRelBB);
1802     Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope);
1803     Builder.CreateBr(ContBB);
1804     SI->addCase(Builder.getInt32(4), AcqRelBB);
1805
1806     Builder.SetInsertPoint(SeqCstBB);
1807     Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, Scope);
1808     Builder.CreateBr(ContBB);
1809     SI->addCase(Builder.getInt32(5), SeqCstBB);
1810
1811     Builder.SetInsertPoint(ContBB);
1812     return RValue::get(nullptr);
1813   }
1814
1815     // Library functions with special handling.
1816   case Builtin::BIsqrt:
1817   case Builtin::BIsqrtf:
1818   case Builtin::BIsqrtl: {
1819     // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only
1820     // in finite- or unsafe-math mode (the intrinsic has different semantics
1821     // for handling negative numbers compared to the library function, so
1822     // -fmath-errno=0 is not enough).
1823     if (!FD->hasAttr<ConstAttr>())
1824       break;
1825     if (!(CGM.getCodeGenOpts().UnsafeFPMath ||
1826           CGM.getCodeGenOpts().NoNaNsFPMath))
1827       break;
1828     Value *Arg0 = EmitScalarExpr(E->getArg(0));
1829     llvm::Type *ArgType = Arg0->getType();
1830     Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType);
1831     return RValue::get(Builder.CreateCall(F, Arg0));
1832   }
1833
1834   case Builtin::BI__builtin_pow:
1835   case Builtin::BI__builtin_powf:
1836   case Builtin::BI__builtin_powl:
1837   case Builtin::BIpow:
1838   case Builtin::BIpowf:
1839   case Builtin::BIpowl: {
1840     // Transform a call to pow* into a @llvm.pow.* intrinsic call.
1841     if (!FD->hasAttr<ConstAttr>())
1842       break;
1843     Value *Base = EmitScalarExpr(E->getArg(0));
1844     Value *Exponent = EmitScalarExpr(E->getArg(1));
1845     llvm::Type *ArgType = Base->getType();
1846     Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
1847     return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
1848   }
1849
1850   case Builtin::BIfma:
1851   case Builtin::BIfmaf:
1852   case Builtin::BIfmal:
1853   case Builtin::BI__builtin_fma:
1854   case Builtin::BI__builtin_fmaf:
1855   case Builtin::BI__builtin_fmal: {
1856     // Rewrite fma to intrinsic.
1857     Value *FirstArg = EmitScalarExpr(E->getArg(0));
1858     llvm::Type *ArgType = FirstArg->getType();
1859     Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType);
1860     return RValue::get(
1861         Builder.CreateCall(F, {FirstArg, EmitScalarExpr(E->getArg(1)),
1862                                EmitScalarExpr(E->getArg(2))}));
1863   }
1864
1865   case Builtin::BI__builtin_signbit:
1866   case Builtin::BI__builtin_signbitf:
1867   case Builtin::BI__builtin_signbitl: {
1868     return RValue::get(
1869         Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
1870                            ConvertType(E->getType())));
1871   }
1872   case Builtin::BI__builtin_annotation: {
1873     llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
1874     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
1875                                       AnnVal->getType());
1876
1877     // Get the annotation string, go through casts. Sema requires this to be a
1878     // non-wide string literal, potentially casted, so the cast<> is safe.
1879     const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
1880     StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
1881     return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
1882   }
1883   case Builtin::BI__builtin_addcb:
1884   case Builtin::BI__builtin_addcs:
1885   case Builtin::BI__builtin_addc:
1886   case Builtin::BI__builtin_addcl:
1887   case Builtin::BI__builtin_addcll:
1888   case Builtin::BI__builtin_subcb:
1889   case Builtin::BI__builtin_subcs:
1890   case Builtin::BI__builtin_subc:
1891   case Builtin::BI__builtin_subcl:
1892   case Builtin::BI__builtin_subcll: {
1893
1894     // We translate all of these builtins from expressions of the form:
1895     //   int x = ..., y = ..., carryin = ..., carryout, result;
1896     //   result = __builtin_addc(x, y, carryin, &carryout);
1897     //
1898     // to LLVM IR of the form:
1899     //
1900     //   %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
1901     //   %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
1902     //   %carry1 = extractvalue {i32, i1} %tmp1, 1
1903     //   %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
1904     //                                                       i32 %carryin)
1905     //   %result = extractvalue {i32, i1} %tmp2, 0
1906     //   %carry2 = extractvalue {i32, i1} %tmp2, 1
1907     //   %tmp3 = or i1 %carry1, %carry2
1908     //   %tmp4 = zext i1 %tmp3 to i32
1909     //   store i32 %tmp4, i32* %carryout
1910
1911     // Scalarize our inputs.
1912     llvm::Value *X = EmitScalarExpr(E->getArg(0));
1913     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
1914     llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
1915     Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
1916
1917     // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
1918     llvm::Intrinsic::ID IntrinsicId;
1919     switch (BuiltinID) {
1920     default: llvm_unreachable("Unknown multiprecision builtin id.");
1921     case Builtin::BI__builtin_addcb:
1922     case Builtin::BI__builtin_addcs:
1923     case Builtin::BI__builtin_addc:
1924     case Builtin::BI__builtin_addcl:
1925     case Builtin::BI__builtin_addcll:
1926       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
1927       break;
1928     case Builtin::BI__builtin_subcb:
1929     case Builtin::BI__builtin_subcs:
1930     case Builtin::BI__builtin_subc:
1931     case Builtin::BI__builtin_subcl:
1932     case Builtin::BI__builtin_subcll:
1933       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
1934       break;
1935     }
1936
1937     // Construct our resulting LLVM IR expression.
1938     llvm::Value *Carry1;
1939     llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
1940                                               X, Y, Carry1);
1941     llvm::Value *Carry2;
1942     llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
1943                                               Sum1, Carryin, Carry2);
1944     llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
1945                                                X->getType());
1946     Builder.CreateStore(CarryOut, CarryOutPtr);
1947     return RValue::get(Sum2);
1948   }
1949
1950   case Builtin::BI__builtin_add_overflow:
1951   case Builtin::BI__builtin_sub_overflow:
1952   case Builtin::BI__builtin_mul_overflow: {
1953     const clang::Expr *LeftArg = E->getArg(0);
1954     const clang::Expr *RightArg = E->getArg(1);
1955     const clang::Expr *ResultArg = E->getArg(2);
1956
1957     clang::QualType ResultQTy =
1958         ResultArg->getType()->castAs<PointerType>()->getPointeeType();
1959
1960     WidthAndSignedness LeftInfo =
1961         getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
1962     WidthAndSignedness RightInfo =
1963         getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
1964     WidthAndSignedness ResultInfo =
1965         getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
1966     WidthAndSignedness EncompassingInfo =
1967         EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
1968
1969     llvm::Type *EncompassingLLVMTy =
1970         llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
1971
1972     llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
1973
1974     llvm::Intrinsic::ID IntrinsicId;
1975     switch (BuiltinID) {
1976     default:
1977       llvm_unreachable("Unknown overflow builtin id.");
1978     case Builtin::BI__builtin_add_overflow:
1979       IntrinsicId = EncompassingInfo.Signed
1980                         ? llvm::Intrinsic::sadd_with_overflow
1981                         : llvm::Intrinsic::uadd_with_overflow;
1982       break;
1983     case Builtin::BI__builtin_sub_overflow:
1984       IntrinsicId = EncompassingInfo.Signed
1985                         ? llvm::Intrinsic::ssub_with_overflow
1986                         : llvm::Intrinsic::usub_with_overflow;
1987       break;
1988     case Builtin::BI__builtin_mul_overflow:
1989       IntrinsicId = EncompassingInfo.Signed
1990                         ? llvm::Intrinsic::smul_with_overflow
1991                         : llvm::Intrinsic::umul_with_overflow;
1992       break;
1993     }
1994
1995     llvm::Value *Left = EmitScalarExpr(LeftArg);
1996     llvm::Value *Right = EmitScalarExpr(RightArg);
1997     Address ResultPtr = EmitPointerWithAlignment(ResultArg);
1998
1999     // Extend each operand to the encompassing type.
2000     Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
2001     Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
2002
2003     // Perform the operation on the extended values.
2004     llvm::Value *Overflow, *Result;
2005     Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
2006
2007     if (EncompassingInfo.Width > ResultInfo.Width) {
2008       // The encompassing type is wider than the result type, so we need to
2009       // truncate it.
2010       llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
2011
2012       // To see if the truncation caused an overflow, we will extend
2013       // the result and then compare it to the original result.
2014       llvm::Value *ResultTruncExt = Builder.CreateIntCast(
2015           ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
2016       llvm::Value *TruncationOverflow =
2017           Builder.CreateICmpNE(Result, ResultTruncExt);
2018
2019       Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
2020       Result = ResultTrunc;
2021     }
2022
2023     // Finally, store the result using the pointer.
2024     bool isVolatile =
2025       ResultArg->getType()->getPointeeType().isVolatileQualified();
2026     Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
2027
2028     return RValue::get(Overflow);
2029   }
2030
2031   case Builtin::BI__builtin_uadd_overflow:
2032   case Builtin::BI__builtin_uaddl_overflow:
2033   case Builtin::BI__builtin_uaddll_overflow:
2034   case Builtin::BI__builtin_usub_overflow:
2035   case Builtin::BI__builtin_usubl_overflow:
2036   case Builtin::BI__builtin_usubll_overflow:
2037   case Builtin::BI__builtin_umul_overflow:
2038   case Builtin::BI__builtin_umull_overflow:
2039   case Builtin::BI__builtin_umulll_overflow:
2040   case Builtin::BI__builtin_sadd_overflow:
2041   case Builtin::BI__builtin_saddl_overflow:
2042   case Builtin::BI__builtin_saddll_overflow:
2043   case Builtin::BI__builtin_ssub_overflow:
2044   case Builtin::BI__builtin_ssubl_overflow:
2045   case Builtin::BI__builtin_ssubll_overflow:
2046   case Builtin::BI__builtin_smul_overflow:
2047   case Builtin::BI__builtin_smull_overflow:
2048   case Builtin::BI__builtin_smulll_overflow: {
2049
2050     // We translate all of these builtins directly to the relevant llvm IR node.
2051
2052     // Scalarize our inputs.
2053     llvm::Value *X = EmitScalarExpr(E->getArg(0));
2054     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
2055     Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
2056
2057     // Decide which of the overflow intrinsics we are lowering to:
2058     llvm::Intrinsic::ID IntrinsicId;
2059     switch (BuiltinID) {
2060     default: llvm_unreachable("Unknown overflow builtin id.");
2061     case Builtin::BI__builtin_uadd_overflow:
2062     case Builtin::BI__builtin_uaddl_overflow:
2063     case Builtin::BI__builtin_uaddll_overflow:
2064       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
2065       break;
2066     case Builtin::BI__builtin_usub_overflow:
2067     case Builtin::BI__builtin_usubl_overflow:
2068     case Builtin::BI__builtin_usubll_overflow:
2069       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
2070       break;
2071     case Builtin::BI__builtin_umul_overflow:
2072     case Builtin::BI__builtin_umull_overflow:
2073     case Builtin::BI__builtin_umulll_overflow:
2074       IntrinsicId = llvm::Intrinsic::umul_with_overflow;
2075       break;
2076     case Builtin::BI__builtin_sadd_overflow:
2077     case Builtin::BI__builtin_saddl_overflow:
2078     case Builtin::BI__builtin_saddll_overflow:
2079       IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
2080       break;
2081     case Builtin::BI__builtin_ssub_overflow:
2082     case Builtin::BI__builtin_ssubl_overflow:
2083     case Builtin::BI__builtin_ssubll_overflow:
2084       IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
2085       break;
2086     case Builtin::BI__builtin_smul_overflow:
2087     case Builtin::BI__builtin_smull_overflow:
2088     case Builtin::BI__builtin_smulll_overflow:
2089       IntrinsicId = llvm::Intrinsic::smul_with_overflow;
2090       break;
2091     }
2092
2093
2094     llvm::Value *Carry;
2095     llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
2096     Builder.CreateStore(Sum, SumOutPtr);
2097
2098     return RValue::get(Carry);
2099   }
2100   case Builtin::BI__builtin_addressof:
2101     return RValue::get(EmitLValue(E->getArg(0)).getPointer());
2102   case Builtin::BI__builtin_operator_new:
2103     return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
2104                                     E->getArg(0), false);
2105   case Builtin::BI__builtin_operator_delete:
2106     return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
2107                                     E->getArg(0), true);
2108   case Builtin::BI__noop:
2109     // __noop always evaluates to an integer literal zero.
2110     return RValue::get(ConstantInt::get(IntTy, 0));
2111   case Builtin::BI__builtin_call_with_static_chain: {
2112     const CallExpr *Call = cast<CallExpr>(E->getArg(0));
2113     const Expr *Chain = E->getArg(1);
2114     return EmitCall(Call->getCallee()->getType(),
2115                     EmitCallee(Call->getCallee()), Call, ReturnValue,
2116                     EmitScalarExpr(Chain));
2117   }
2118   case Builtin::BI_InterlockedExchange8:
2119   case Builtin::BI_InterlockedExchange16:
2120   case Builtin::BI_InterlockedExchange:
2121   case Builtin::BI_InterlockedExchangePointer:
2122     return RValue::get(
2123         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
2124   case Builtin::BI_InterlockedCompareExchangePointer: {
2125     llvm::Type *RTy;
2126     llvm::IntegerType *IntType =
2127       IntegerType::get(getLLVMContext(),
2128                        getContext().getTypeSize(E->getType()));
2129     llvm::Type *IntPtrType = IntType->getPointerTo();
2130
2131     llvm::Value *Destination =
2132       Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType);
2133
2134     llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
2135     RTy = Exchange->getType();
2136     Exchange = Builder.CreatePtrToInt(Exchange, IntType);
2137
2138     llvm::Value *Comparand =
2139       Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
2140
2141     auto Result =
2142         Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
2143                                     AtomicOrdering::SequentiallyConsistent,
2144                                     AtomicOrdering::SequentiallyConsistent);
2145     Result->setVolatile(true);
2146
2147     return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
2148                                                                          0),
2149                                               RTy));
2150   }
2151   case Builtin::BI_InterlockedCompareExchange8:
2152   case Builtin::BI_InterlockedCompareExchange16:
2153   case Builtin::BI_InterlockedCompareExchange:
2154   case Builtin::BI_InterlockedCompareExchange64: {
2155     AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg(
2156         EmitScalarExpr(E->getArg(0)),
2157         EmitScalarExpr(E->getArg(2)),
2158         EmitScalarExpr(E->getArg(1)),
2159         AtomicOrdering::SequentiallyConsistent,
2160         AtomicOrdering::SequentiallyConsistent);
2161       CXI->setVolatile(true);
2162       return RValue::get(Builder.CreateExtractValue(CXI, 0));
2163   }
2164   case Builtin::BI_InterlockedIncrement16:
2165   case Builtin::BI_InterlockedIncrement:
2166     return RValue::get(
2167         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
2168   case Builtin::BI_InterlockedDecrement16:
2169   case Builtin::BI_InterlockedDecrement:
2170     return RValue::get(
2171         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
2172   case Builtin::BI_InterlockedAnd8:
2173   case Builtin::BI_InterlockedAnd16:
2174   case Builtin::BI_InterlockedAnd:
2175     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
2176   case Builtin::BI_InterlockedExchangeAdd8:
2177   case Builtin::BI_InterlockedExchangeAdd16:
2178   case Builtin::BI_InterlockedExchangeAdd:
2179     return RValue::get(
2180         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
2181   case Builtin::BI_InterlockedExchangeSub8:
2182   case Builtin::BI_InterlockedExchangeSub16:
2183   case Builtin::BI_InterlockedExchangeSub:
2184     return RValue::get(
2185         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
2186   case Builtin::BI_InterlockedOr8:
2187   case Builtin::BI_InterlockedOr16:
2188   case Builtin::BI_InterlockedOr:
2189     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
2190   case Builtin::BI_InterlockedXor8:
2191   case Builtin::BI_InterlockedXor16:
2192   case Builtin::BI_InterlockedXor:
2193     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
2194   case Builtin::BI__readfsdword: {
2195     llvm::Type *IntTy = ConvertType(E->getType());
2196     Value *IntToPtr =
2197       Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
2198                              llvm::PointerType::get(IntTy, 257));
2199     LoadInst *Load = Builder.CreateAlignedLoad(
2200         IntTy, IntToPtr, getContext().getTypeAlignInChars(E->getType()));
2201     Load->setVolatile(true);
2202     return RValue::get(Load);
2203   }
2204
2205   case Builtin::BI__exception_code:
2206   case Builtin::BI_exception_code:
2207     return RValue::get(EmitSEHExceptionCode());
2208   case Builtin::BI__exception_info:
2209   case Builtin::BI_exception_info:
2210     return RValue::get(EmitSEHExceptionInfo());
2211   case Builtin::BI__abnormal_termination:
2212   case Builtin::BI_abnormal_termination:
2213     return RValue::get(EmitSEHAbnormalTermination());
2214   case Builtin::BI_setjmpex: {
2215     if (getTarget().getTriple().isOSMSVCRT()) {
2216       llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2217       llvm::AttributeSet ReturnsTwiceAttr =
2218           AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
2219                             llvm::Attribute::ReturnsTwice);
2220       llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction(
2221           llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2222           "_setjmpex", ReturnsTwiceAttr, /*Local=*/true);
2223       llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2224           EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2225       llvm::Value *FrameAddr =
2226           Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2227                              ConstantInt::get(Int32Ty, 0));
2228       llvm::Value *Args[] = {Buf, FrameAddr};
2229       llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args);
2230       CS.setAttributes(ReturnsTwiceAttr);
2231       return RValue::get(CS.getInstruction());
2232     }
2233     break;
2234   }
2235   case Builtin::BI_setjmp: {
2236     if (getTarget().getTriple().isOSMSVCRT()) {
2237       llvm::AttributeSet ReturnsTwiceAttr =
2238           AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
2239                             llvm::Attribute::ReturnsTwice);
2240       llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2241           EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2242       llvm::CallSite CS;
2243       if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
2244         llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy};
2245         llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction(
2246             llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true),
2247             "_setjmp3", ReturnsTwiceAttr, /*Local=*/true);
2248         llvm::Value *Count = ConstantInt::get(IntTy, 0);
2249         llvm::Value *Args[] = {Buf, Count};
2250         CS = EmitRuntimeCallOrInvoke(SetJmp3, Args);
2251       } else {
2252         llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2253         llvm::Constant *SetJmp = CGM.CreateRuntimeFunction(
2254             llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2255             "_setjmp", ReturnsTwiceAttr, /*Local=*/true);
2256         llvm::Value *FrameAddr =
2257             Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2258                                ConstantInt::get(Int32Ty, 0));
2259         llvm::Value *Args[] = {Buf, FrameAddr};
2260         CS = EmitRuntimeCallOrInvoke(SetJmp, Args);
2261       }
2262       CS.setAttributes(ReturnsTwiceAttr);
2263       return RValue::get(CS.getInstruction());
2264     }
2265     break;
2266   }
2267
2268   case Builtin::BI__GetExceptionInfo: {
2269     if (llvm::GlobalVariable *GV =
2270             CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
2271       return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
2272     break;
2273   }
2274
2275   case Builtin::BI__builtin_coro_size: {
2276     auto & Context = getContext();
2277     auto SizeTy = Context.getSizeType();
2278     auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy));
2279     Value *F = CGM.getIntrinsic(Intrinsic::coro_size, T);
2280     return RValue::get(Builder.CreateCall(F));
2281   }
2282
2283   case Builtin::BI__builtin_coro_id:
2284     return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
2285   case Builtin::BI__builtin_coro_promise:
2286     return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
2287   case Builtin::BI__builtin_coro_resume:
2288     return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
2289   case Builtin::BI__builtin_coro_frame:
2290     return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
2291   case Builtin::BI__builtin_coro_free:
2292     return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
2293   case Builtin::BI__builtin_coro_destroy:
2294     return EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
2295   case Builtin::BI__builtin_coro_done:
2296     return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
2297   case Builtin::BI__builtin_coro_alloc:
2298     return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
2299   case Builtin::BI__builtin_coro_begin:
2300     return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
2301   case Builtin::BI__builtin_coro_end:
2302     return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
2303   case Builtin::BI__builtin_coro_suspend:
2304     return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
2305   case Builtin::BI__builtin_coro_param:
2306     return EmitCoroutineIntrinsic(E, Intrinsic::coro_param);
2307
2308   // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
2309   case Builtin::BIread_pipe:
2310   case Builtin::BIwrite_pipe: {
2311     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2312           *Arg1 = EmitScalarExpr(E->getArg(1));
2313     CGOpenCLRuntime OpenCLRT(CGM);
2314     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2315     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2316
2317     // Type of the generic packet parameter.
2318     unsigned GenericAS =
2319         getContext().getTargetAddressSpace(LangAS::opencl_generic);
2320     llvm::Type *I8PTy = llvm::PointerType::get(
2321         llvm::Type::getInt8Ty(getLLVMContext()), GenericAS);
2322
2323     // Testing which overloaded version we should generate the call for.
2324     if (2U == E->getNumArgs()) {
2325       const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
2326                                                              : "__write_pipe_2";
2327       // Creating a generic function type to be able to call with any builtin or
2328       // user defined type.
2329       llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
2330       llvm::FunctionType *FTy = llvm::FunctionType::get(
2331           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2332       Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
2333       return RValue::get(
2334           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2335                              {Arg0, BCast, PacketSize, PacketAlign}));
2336     } else {
2337       assert(4 == E->getNumArgs() &&
2338              "Illegal number of parameters to pipe function");
2339       const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
2340                                                              : "__write_pipe_4";
2341
2342       llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
2343                               Int32Ty, Int32Ty};
2344       Value *Arg2 = EmitScalarExpr(E->getArg(2)),
2345             *Arg3 = EmitScalarExpr(E->getArg(3));
2346       llvm::FunctionType *FTy = llvm::FunctionType::get(
2347           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2348       Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
2349       // We know the third argument is an integer type, but we may need to cast
2350       // it to i32.
2351       if (Arg2->getType() != Int32Ty)
2352         Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
2353       return RValue::get(Builder.CreateCall(
2354           CGM.CreateRuntimeFunction(FTy, Name),
2355           {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
2356     }
2357   }
2358   // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
2359   // functions
2360   case Builtin::BIreserve_read_pipe:
2361   case Builtin::BIreserve_write_pipe:
2362   case Builtin::BIwork_group_reserve_read_pipe:
2363   case Builtin::BIwork_group_reserve_write_pipe:
2364   case Builtin::BIsub_group_reserve_read_pipe:
2365   case Builtin::BIsub_group_reserve_write_pipe: {
2366     // Composing the mangled name for the function.
2367     const char *Name;
2368     if (BuiltinID == Builtin::BIreserve_read_pipe)
2369       Name = "__reserve_read_pipe";
2370     else if (BuiltinID == Builtin::BIreserve_write_pipe)
2371       Name = "__reserve_write_pipe";
2372     else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
2373       Name = "__work_group_reserve_read_pipe";
2374     else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
2375       Name = "__work_group_reserve_write_pipe";
2376     else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
2377       Name = "__sub_group_reserve_read_pipe";
2378     else
2379       Name = "__sub_group_reserve_write_pipe";
2380
2381     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2382           *Arg1 = EmitScalarExpr(E->getArg(1));
2383     llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
2384     CGOpenCLRuntime OpenCLRT(CGM);
2385     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2386     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2387
2388     // Building the generic function prototype.
2389     llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
2390     llvm::FunctionType *FTy = llvm::FunctionType::get(
2391         ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2392     // We know the second argument is an integer type, but we may need to cast
2393     // it to i32.
2394     if (Arg1->getType() != Int32Ty)
2395       Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
2396     return RValue::get(
2397         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2398                            {Arg0, Arg1, PacketSize, PacketAlign}));
2399   }
2400   // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
2401   // functions
2402   case Builtin::BIcommit_read_pipe:
2403   case Builtin::BIcommit_write_pipe:
2404   case Builtin::BIwork_group_commit_read_pipe:
2405   case Builtin::BIwork_group_commit_write_pipe:
2406   case Builtin::BIsub_group_commit_read_pipe:
2407   case Builtin::BIsub_group_commit_write_pipe: {
2408     const char *Name;
2409     if (BuiltinID == Builtin::BIcommit_read_pipe)
2410       Name = "__commit_read_pipe";
2411     else if (BuiltinID == Builtin::BIcommit_write_pipe)
2412       Name = "__commit_write_pipe";
2413     else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
2414       Name = "__work_group_commit_read_pipe";
2415     else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
2416       Name = "__work_group_commit_write_pipe";
2417     else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
2418       Name = "__sub_group_commit_read_pipe";
2419     else
2420       Name = "__sub_group_commit_write_pipe";
2421
2422     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2423           *Arg1 = EmitScalarExpr(E->getArg(1));
2424     CGOpenCLRuntime OpenCLRT(CGM);
2425     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2426     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2427
2428     // Building the generic function prototype.
2429     llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
2430     llvm::FunctionType *FTy =
2431         llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
2432                                 llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2433
2434     return RValue::get(
2435         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2436                            {Arg0, Arg1, PacketSize, PacketAlign}));
2437   }
2438   // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
2439   case Builtin::BIget_pipe_num_packets:
2440   case Builtin::BIget_pipe_max_packets: {
2441     const char *Name;
2442     if (BuiltinID == Builtin::BIget_pipe_num_packets)
2443       Name = "__get_pipe_num_packets";
2444     else
2445       Name = "__get_pipe_max_packets";
2446
2447     // Building the generic function prototype.
2448     Value *Arg0 = EmitScalarExpr(E->getArg(0));
2449     CGOpenCLRuntime OpenCLRT(CGM);
2450     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2451     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2452     llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
2453     llvm::FunctionType *FTy = llvm::FunctionType::get(
2454         Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2455
2456     return RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2457                                           {Arg0, PacketSize, PacketAlign}));
2458   }
2459
2460   // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
2461   case Builtin::BIto_global:
2462   case Builtin::BIto_local:
2463   case Builtin::BIto_private: {
2464     auto Arg0 = EmitScalarExpr(E->getArg(0));
2465     auto NewArgT = llvm::PointerType::get(Int8Ty,
2466       CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
2467     auto NewRetT = llvm::PointerType::get(Int8Ty,
2468       CGM.getContext().getTargetAddressSpace(
2469         E->getType()->getPointeeType().getAddressSpace()));
2470     auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
2471     llvm::Value *NewArg;
2472     if (Arg0->getType()->getPointerAddressSpace() !=
2473         NewArgT->getPointerAddressSpace())
2474       NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
2475     else
2476       NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
2477     auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
2478     auto NewCall =
2479         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
2480     return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
2481       ConvertType(E->getType())));
2482   }
2483
2484   // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
2485   // It contains four different overload formats specified in Table 6.13.17.1.
2486   case Builtin::BIenqueue_kernel: {
2487     StringRef Name; // Generated function call name
2488     unsigned NumArgs = E->getNumArgs();
2489
2490     llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
2491     llvm::Type *RangeTy = ConvertType(getContext().OCLNDRangeTy);
2492
2493     llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
2494     llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
2495     llvm::Value *Range = EmitScalarExpr(E->getArg(2));
2496
2497     if (NumArgs == 4) {
2498       // The most basic form of the call with parameters:
2499       // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
2500       Name = "__enqueue_kernel_basic";
2501       llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, Int8PtrTy};
2502       llvm::FunctionType *FTy = llvm::FunctionType::get(
2503           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false);
2504
2505       llvm::Value *Block =
2506           Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy);
2507
2508       return RValue::get(Builder.CreateCall(
2509           CGM.CreateRuntimeFunction(FTy, Name), {Queue, Flags, Range, Block}));
2510     }
2511     assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
2512
2513     // Could have events and/or vaargs.
2514     if (E->getArg(3)->getType()->isBlockPointerType()) {
2515       // No events passed, but has variadic arguments.
2516       Name = "__enqueue_kernel_vaargs";
2517       llvm::Value *Block =
2518           Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy);
2519       // Create a vector of the arguments, as well as a constant value to
2520       // express to the runtime the number of variadic arguments.
2521       std::vector<llvm::Value *> Args = {Queue, Flags, Range, Block,
2522                                          ConstantInt::get(IntTy, NumArgs - 4)};
2523       std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy, Int8PtrTy,
2524                                           IntTy};
2525
2526       // Each of the following arguments specifies the size of the corresponding
2527       // argument passed to the enqueued block.
2528       for (unsigned I = 4/*Position of the first size arg*/; I < NumArgs; ++I)
2529         Args.push_back(
2530             Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy));
2531
2532       llvm::FunctionType *FTy = llvm::FunctionType::get(
2533           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
2534       return RValue::get(
2535           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2536                              llvm::ArrayRef<llvm::Value *>(Args)));
2537     }
2538     // Any calls now have event arguments passed.
2539     if (NumArgs >= 7) {
2540       llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy);
2541       llvm::Type *EventPtrTy = EventTy->getPointerTo(
2542           CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
2543
2544       llvm::Value *NumEvents =
2545           Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
2546       llvm::Value *EventList =
2547           E->getArg(4)->getType()->isArrayType()
2548               ? EmitArrayToPointerDecay(E->getArg(4)).getPointer()
2549               : EmitScalarExpr(E->getArg(4));
2550       llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5));
2551       // Convert to generic address space.
2552       EventList = Builder.CreatePointerCast(EventList, EventPtrTy);
2553       ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy);
2554       llvm::Value *Block =
2555           Builder.CreateBitCast(EmitScalarExpr(E->getArg(6)), Int8PtrTy);
2556
2557       std::vector<llvm::Type *> ArgTys = {QueueTy,  Int32Ty,    RangeTy,
2558                                           Int32Ty,  EventPtrTy, EventPtrTy,
2559                                           Int8PtrTy};
2560
2561       std::vector<llvm::Value *> Args = {Queue,     Flags,    Range, NumEvents,
2562                                          EventList, ClkEvent, Block};
2563
2564       if (NumArgs == 7) {
2565         // Has events but no variadics.
2566         Name = "__enqueue_kernel_basic_events";
2567         llvm::FunctionType *FTy = llvm::FunctionType::get(
2568             Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2569         return RValue::get(
2570             Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2571                                llvm::ArrayRef<llvm::Value *>(Args)));
2572       }
2573       // Has event info and variadics
2574       // Pass the number of variadics to the runtime function too.
2575       Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
2576       ArgTys.push_back(Int32Ty);
2577       Name = "__enqueue_kernel_events_vaargs";
2578
2579       // Each of the following arguments specifies the size of the corresponding
2580       // argument passed to the enqueued block.
2581       for (unsigned I = 7/*Position of the first size arg*/; I < NumArgs; ++I)
2582         Args.push_back(
2583             Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy));
2584
2585       llvm::FunctionType *FTy = llvm::FunctionType::get(
2586           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
2587       return RValue::get(
2588           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2589                              llvm::ArrayRef<llvm::Value *>(Args)));
2590     }
2591   }
2592   // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
2593   // parameter.
2594   case Builtin::BIget_kernel_work_group_size: {
2595     Value *Arg = EmitScalarExpr(E->getArg(0));
2596     Arg = Builder.CreateBitCast(Arg, Int8PtrTy);
2597     return RValue::get(
2598         Builder.CreateCall(CGM.CreateRuntimeFunction(
2599                                llvm::FunctionType::get(IntTy, Int8PtrTy, false),
2600                                "__get_kernel_work_group_size_impl"),
2601                            Arg));
2602   }
2603   case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
2604     Value *Arg = EmitScalarExpr(E->getArg(0));
2605     Arg = Builder.CreateBitCast(Arg, Int8PtrTy);
2606     return RValue::get(Builder.CreateCall(
2607         CGM.CreateRuntimeFunction(
2608             llvm::FunctionType::get(IntTy, Int8PtrTy, false),
2609             "__get_kernel_preferred_work_group_multiple_impl"),
2610         Arg));
2611   }
2612   case Builtin::BIprintf:
2613     if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice)
2614       return EmitCUDADevicePrintfCallExpr(E, ReturnValue);
2615     break;
2616   case Builtin::BI__builtin_canonicalize:
2617   case Builtin::BI__builtin_canonicalizef:
2618   case Builtin::BI__builtin_canonicalizel:
2619     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
2620
2621   case Builtin::BI__builtin_thread_pointer: {
2622     if (!getContext().getTargetInfo().isTLSSupported())
2623       CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
2624     // Fall through - it's already mapped to the intrinsic by GCCBuiltin.
2625     break;
2626   }
2627   case Builtin::BI__builtin_os_log_format: {
2628     assert(E->getNumArgs() >= 2 &&
2629            "__builtin_os_log_format takes at least 2 arguments");
2630     analyze_os_log::OSLogBufferLayout Layout;
2631     analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout);
2632     Address BufAddr = EmitPointerWithAlignment(E->getArg(0));
2633     // Ignore argument 1, the format string. It is not currently used.
2634     CharUnits Offset;
2635     Builder.CreateStore(
2636         Builder.getInt8(Layout.getSummaryByte()),
2637         Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
2638     Builder.CreateStore(
2639         Builder.getInt8(Layout.getNumArgsByte()),
2640         Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
2641
2642     llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
2643     for (const auto &Item : Layout.Items) {
2644       Builder.CreateStore(
2645           Builder.getInt8(Item.getDescriptorByte()),
2646           Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
2647       Builder.CreateStore(
2648           Builder.getInt8(Item.getSizeByte()),
2649           Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
2650       Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset);
2651       if (const Expr *TheExpr = Item.getExpr()) {
2652         Addr = Builder.CreateElementBitCast(
2653             Addr, ConvertTypeForMem(TheExpr->getType()));
2654         // Check if this is a retainable type.
2655         if (TheExpr->getType()->isObjCRetainableType()) {
2656           assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
2657                  "Only scalar can be a ObjC retainable type");
2658           llvm::Value *SV = EmitScalarExpr(TheExpr, /*Ignore*/ false);
2659           RValue RV = RValue::get(SV);
2660           LValue LV = MakeAddrLValue(Addr, TheExpr->getType());
2661           EmitStoreThroughLValue(RV, LV);
2662           // Check if the object is constant, if not, save it in
2663           // RetainableOperands.
2664           if (!isa<Constant>(SV))
2665             RetainableOperands.push_back(SV);
2666         } else {
2667           EmitAnyExprToMem(TheExpr, Addr, Qualifiers(), /*isInit*/ true);
2668         }
2669       } else {
2670         Addr = Builder.CreateElementBitCast(Addr, Int32Ty);
2671         Builder.CreateStore(
2672             Builder.getInt32(Item.getConstValue().getQuantity()), Addr);
2673       }
2674       Offset += Item.size();
2675     }
2676
2677     // Push a clang.arc.use cleanup for each object in RetainableOperands. The
2678     // cleanup will cause the use to appear after the final log call, keeping
2679     // the object valid while it’s held in the log buffer.  Note that if there’s
2680     // a release cleanup on the object, it will already be active; since
2681     // cleanups are emitted in reverse order, the use will occur before the
2682     // object is released.
2683     if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount &&
2684         CGM.getCodeGenOpts().OptimizationLevel != 0)
2685       for (llvm::Value *object : RetainableOperands)
2686         pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), object);
2687
2688     return RValue::get(BufAddr.getPointer());
2689   }
2690
2691   case Builtin::BI__builtin_os_log_format_buffer_size: {
2692     analyze_os_log::OSLogBufferLayout Layout;
2693     analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout);
2694     return RValue::get(ConstantInt::get(ConvertType(E->getType()),
2695                                         Layout.size().getQuantity()));
2696   }
2697   }
2698
2699   // If this is an alias for a lib function (e.g. __builtin_sin), emit
2700   // the call using the normal call path, but using the unmangled
2701   // version of the function name.
2702   if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
2703     return emitLibraryCall(*this, FD, E,
2704                            CGM.getBuiltinLibFunction(FD, BuiltinID));
2705
2706   // If this is a predefined lib function (e.g. malloc), emit the call
2707   // using exactly the normal call path.
2708   if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
2709     return emitLibraryCall(*this, FD, E,
2710                       cast<llvm::Constant>(EmitScalarExpr(E->getCallee())));
2711
2712   // Check that a call to a target specific builtin has the correct target
2713   // features.
2714   // This is down here to avoid non-target specific builtins, however, if
2715   // generic builtins start to require generic target features then we
2716   // can move this up to the beginning of the function.
2717   checkTargetFeatures(E, FD);
2718
2719   // See if we have a target specific intrinsic.
2720   const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
2721   Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
2722   StringRef Prefix =
2723       llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
2724   if (!Prefix.empty()) {
2725     IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name);
2726     // NOTE we dont need to perform a compatibility flag check here since the
2727     // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
2728     // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
2729     if (IntrinsicID == Intrinsic::not_intrinsic)
2730       IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
2731   }
2732
2733   if (IntrinsicID != Intrinsic::not_intrinsic) {
2734     SmallVector<Value*, 16> Args;
2735
2736     // Find out if any arguments are required to be integer constant
2737     // expressions.
2738     unsigned ICEArguments = 0;
2739     ASTContext::GetBuiltinTypeError Error;
2740     getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
2741     assert(Error == ASTContext::GE_None && "Should not codegen an error");
2742
2743     Function *F = CGM.getIntrinsic(IntrinsicID);
2744     llvm::FunctionType *FTy = F->getFunctionType();
2745
2746     for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
2747       Value *ArgValue;
2748       // If this is a normal argument, just emit it as a scalar.
2749       if ((ICEArguments & (1 << i)) == 0) {
2750         ArgValue = EmitScalarExpr(E->getArg(i));
2751       } else {
2752         // If this is required to be a constant, constant fold it so that we
2753         // know that the generated intrinsic gets a ConstantInt.
2754         llvm::APSInt Result;
2755         bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
2756         assert(IsConst && "Constant arg isn't actually constant?");
2757         (void)IsConst;
2758         ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
2759       }
2760
2761       // If the intrinsic arg type is different from the builtin arg type
2762       // we need to do a bit cast.
2763       llvm::Type *PTy = FTy->getParamType(i);
2764       if (PTy != ArgValue->getType()) {
2765         assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
2766                "Must be able to losslessly bit cast to param");
2767         ArgValue = Builder.CreateBitCast(ArgValue, PTy);
2768       }
2769
2770       Args.push_back(ArgValue);
2771     }
2772
2773     Value *V = Builder.CreateCall(F, Args);
2774     QualType BuiltinRetType = E->getType();
2775
2776     llvm::Type *RetTy = VoidTy;
2777     if (!BuiltinRetType->isVoidType())
2778       RetTy = ConvertType(BuiltinRetType);
2779
2780     if (RetTy != V->getType()) {
2781       assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
2782              "Must be able to losslessly bit cast result type");
2783       V = Builder.CreateBitCast(V, RetTy);
2784     }
2785
2786     return RValue::get(V);
2787   }
2788
2789   // See if we have a target specific builtin that needs to be lowered.
2790   if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
2791     return RValue::get(V);
2792
2793   ErrorUnsupported(E, "builtin function");
2794
2795   // Unknown builtin, for now just dump it out and return undef.
2796   return GetUndefRValue(E->getType());
2797 }
2798
2799 static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
2800                                         unsigned BuiltinID, const CallExpr *E,
2801                                         llvm::Triple::ArchType Arch) {
2802   switch (Arch) {
2803   case llvm::Triple::arm:
2804   case llvm::Triple::armeb:
2805   case llvm::Triple::thumb:
2806   case llvm::Triple::thumbeb:
2807     return CGF->EmitARMBuiltinExpr(BuiltinID, E);
2808   case llvm::Triple::aarch64:
2809   case llvm::Triple::aarch64_be:
2810     return CGF->EmitAArch64BuiltinExpr(BuiltinID, E);
2811   case llvm::Triple::x86:
2812   case llvm::Triple::x86_64:
2813     return CGF->EmitX86BuiltinExpr(BuiltinID, E);
2814   case llvm::Triple::ppc:
2815   case llvm::Triple::ppc64:
2816   case llvm::Triple::ppc64le:
2817     return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
2818   case llvm::Triple::r600:
2819   case llvm::Triple::amdgcn:
2820     return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
2821   case llvm::Triple::systemz:
2822     return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
2823   case llvm::Triple::nvptx:
2824   case llvm::Triple::nvptx64:
2825     return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
2826   case llvm::Triple::wasm32:
2827   case llvm::Triple::wasm64:
2828     return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
2829   default:
2830     return nullptr;
2831   }
2832 }
2833
2834 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
2835                                               const CallExpr *E) {
2836   if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
2837     assert(getContext().getAuxTargetInfo() && "Missing aux target info");
2838     return EmitTargetArchBuiltinExpr(
2839         this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
2840         getContext().getAuxTargetInfo()->getTriple().getArch());
2841   }
2842
2843   return EmitTargetArchBuiltinExpr(this, BuiltinID, E,
2844                                    getTarget().getTriple().getArch());
2845 }
2846
2847 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
2848                                      NeonTypeFlags TypeFlags,
2849                                      bool V1Ty=false) {
2850   int IsQuad = TypeFlags.isQuad();
2851   switch (TypeFlags.getEltType()) {
2852   case NeonTypeFlags::Int8:
2853   case NeonTypeFlags::Poly8:
2854     return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
2855   case NeonTypeFlags::Int16:
2856   case NeonTypeFlags::Poly16:
2857   case NeonTypeFlags::Float16:
2858     return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
2859   case NeonTypeFlags::Int32:
2860     return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
2861   case NeonTypeFlags::Int64:
2862   case NeonTypeFlags::Poly64:
2863     return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
2864   case NeonTypeFlags::Poly128:
2865     // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
2866     // There is a lot of i128 and f128 API missing.
2867     // so we use v16i8 to represent poly128 and get pattern matched.
2868     return llvm::VectorType::get(CGF->Int8Ty, 16);
2869   case NeonTypeFlags::Float32:
2870     return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
2871   case NeonTypeFlags::Float64:
2872     return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
2873   }
2874   llvm_unreachable("Unknown vector element type!");
2875 }
2876
2877 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
2878                                           NeonTypeFlags IntTypeFlags) {
2879   int IsQuad = IntTypeFlags.isQuad();
2880   switch (IntTypeFlags.getEltType()) {
2881   case NeonTypeFlags::Int32:
2882     return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
2883   case NeonTypeFlags::Int64:
2884     return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad));
2885   default:
2886     llvm_unreachable("Type can't be converted to floating-point!");
2887   }
2888 }
2889
2890 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
2891   unsigned nElts = V->getType()->getVectorNumElements();
2892   Value* SV = llvm::ConstantVector::getSplat(nElts, C);
2893   return Builder.CreateShuffleVector(V, V, SV, "lane");
2894 }
2895
2896 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
2897                                      const char *name,
2898                                      unsigned shift, bool rightshift) {
2899   unsigned j = 0;
2900   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
2901        ai != ae; ++ai, ++j)
2902     if (shift > 0 && shift == j)
2903       Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
2904     else
2905       Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
2906
2907   return Builder.CreateCall(F, Ops, name);
2908 }
2909
2910 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
2911                                             bool neg) {
2912   int SV = cast<ConstantInt>(V)->getSExtValue();
2913   return ConstantInt::get(Ty, neg ? -SV : SV);
2914 }
2915
2916 // \brief Right-shift a vector by a constant.
2917 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
2918                                           llvm::Type *Ty, bool usgn,
2919                                           const char *name) {
2920   llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
2921
2922   int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
2923   int EltSize = VTy->getScalarSizeInBits();
2924
2925   Vec = Builder.CreateBitCast(Vec, Ty);
2926
2927   // lshr/ashr are undefined when the shift amount is equal to the vector
2928   // element size.
2929   if (ShiftAmt == EltSize) {
2930     if (usgn) {
2931       // Right-shifting an unsigned value by its size yields 0.
2932       return llvm::ConstantAggregateZero::get(VTy);
2933     } else {
2934       // Right-shifting a signed value by its size is equivalent
2935       // to a shift of size-1.
2936       --ShiftAmt;
2937       Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
2938     }
2939   }
2940
2941   Shift = EmitNeonShiftVector(Shift, Ty, false);
2942   if (usgn)
2943     return Builder.CreateLShr(Vec, Shift, name);
2944   else
2945     return Builder.CreateAShr(Vec, Shift, name);
2946 }
2947
2948 enum {
2949   AddRetType = (1 << 0),
2950   Add1ArgType = (1 << 1),
2951   Add2ArgTypes = (1 << 2),
2952
2953   VectorizeRetType = (1 << 3),
2954   VectorizeArgTypes = (1 << 4),
2955
2956   InventFloatType = (1 << 5),
2957   UnsignedAlts = (1 << 6),
2958
2959   Use64BitVectors = (1 << 7),
2960   Use128BitVectors = (1 << 8),
2961
2962   Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
2963   VectorRet = AddRetType | VectorizeRetType,
2964   VectorRetGetArgs01 =
2965       AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
2966   FpCmpzModifiers =
2967       AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
2968 };
2969
2970 namespace {
2971 struct NeonIntrinsicInfo {
2972   const char *NameHint;
2973   unsigned BuiltinID;
2974   unsigned LLVMIntrinsic;
2975   unsigned AltLLVMIntrinsic;
2976   unsigned TypeModifier;
2977
2978   bool operator<(unsigned RHSBuiltinID) const {
2979     return BuiltinID < RHSBuiltinID;
2980   }
2981   bool operator<(const NeonIntrinsicInfo &TE) const {
2982     return BuiltinID < TE.BuiltinID;
2983   }
2984 };
2985 } // end anonymous namespace
2986
2987 #define NEONMAP0(NameBase) \
2988   { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
2989
2990 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
2991   { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
2992       Intrinsic::LLVMIntrinsic, 0, TypeModifier }
2993
2994 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
2995   { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
2996       Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
2997       TypeModifier }
2998
2999 static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
3000   NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
3001   NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
3002   NEONMAP1(vabs_v, arm_neon_vabs, 0),
3003   NEONMAP1(vabsq_v, arm_neon_vabs, 0),
3004   NEONMAP0(vaddhn_v),
3005   NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
3006   NEONMAP1(vaeseq_v, arm_neon_aese, 0),
3007   NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
3008   NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
3009   NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
3010   NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
3011   NEONMAP1(vcage_v, arm_neon_vacge, 0),
3012   NEONMAP1(vcageq_v, arm_neon_vacge, 0),
3013   NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
3014   NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
3015   NEONMAP1(vcale_v, arm_neon_vacge, 0),
3016   NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
3017   NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
3018   NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
3019   NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
3020   NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
3021   NEONMAP1(vclz_v, ctlz, Add1ArgType),
3022   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
3023   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
3024   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
3025   NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
3026   NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
3027   NEONMAP0(vcvt_f32_v),
3028   NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3029   NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
3030   NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
3031   NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
3032   NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
3033   NEONMAP0(vcvt_s32_v),
3034   NEONMAP0(vcvt_s64_v),
3035   NEONMAP0(vcvt_u32_v),
3036   NEONMAP0(vcvt_u64_v),
3037   NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
3038   NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
3039   NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
3040   NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
3041   NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
3042   NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
3043   NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
3044   NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
3045   NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
3046   NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
3047   NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
3048   NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
3049   NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
3050   NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
3051   NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
3052   NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
3053   NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
3054   NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
3055   NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
3056   NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
3057   NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
3058   NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
3059   NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
3060   NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
3061   NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
3062   NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
3063   NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
3064   NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
3065   NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
3066   NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
3067   NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
3068   NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
3069   NEONMAP0(vcvtq_f32_v),
3070   NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3071   NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
3072   NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
3073   NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
3074   NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
3075   NEONMAP0(vcvtq_s32_v),
3076   NEONMAP0(vcvtq_s64_v),
3077   NEONMAP0(vcvtq_u32_v),
3078   NEONMAP0(vcvtq_u64_v),
3079   NEONMAP0(vext_v),
3080   NEONMAP0(vextq_v),
3081   NEONMAP0(vfma_v),
3082   NEONMAP0(vfmaq_v),
3083   NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
3084   NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
3085   NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
3086   NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
3087   NEONMAP0(vld1_dup_v),
3088   NEONMAP1(vld1_v, arm_neon_vld1, 0),
3089   NEONMAP0(vld1q_dup_v),
3090   NEONMAP1(vld1q_v, arm_neon_vld1, 0),
3091   NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
3092   NEONMAP1(vld2_v, arm_neon_vld2, 0),
3093   NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
3094   NEONMAP1(vld2q_v, arm_neon_vld2, 0),
3095   NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
3096   NEONMAP1(vld3_v, arm_neon_vld3, 0),
3097   NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
3098   NEONMAP1(vld3q_v, arm_neon_vld3, 0),
3099   NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
3100   NEONMAP1(vld4_v, arm_neon_vld4, 0),
3101   NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
3102   NEONMAP1(vld4q_v, arm_neon_vld4, 0),
3103   NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
3104   NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
3105   NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
3106   NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
3107   NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
3108   NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
3109   NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
3110   NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
3111   NEONMAP0(vmovl_v),
3112   NEONMAP0(vmovn_v),
3113   NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
3114   NEONMAP0(vmull_v),
3115   NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
3116   NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
3117   NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
3118   NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
3119   NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
3120   NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
3121   NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
3122   NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
3123   NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
3124   NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
3125   NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
3126   NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
3127   NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
3128   NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0),
3129   NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0),
3130   NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
3131   NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
3132   NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
3133   NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
3134   NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
3135   NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
3136   NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
3137   NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
3138   NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
3139   NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
3140   NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
3141   NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
3142   NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
3143   NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
3144   NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
3145   NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
3146   NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
3147   NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
3148   NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
3149   NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
3150   NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
3151   NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
3152   NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
3153   NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
3154   NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
3155   NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
3156   NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
3157   NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
3158   NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
3159   NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
3160   NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
3161   NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
3162   NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
3163   NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
3164   NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
3165   NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
3166   NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
3167   NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
3168   NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
3169   NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
3170   NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
3171   NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
3172   NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
3173   NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
3174   NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
3175   NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
3176   NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
3177   NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
3178   NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
3179   NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
3180   NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
3181   NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
3182   NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
3183   NEONMAP0(vshl_n_v),
3184   NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
3185   NEONMAP0(vshll_n_v),
3186   NEONMAP0(vshlq_n_v),
3187   NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
3188   NEONMAP0(vshr_n_v),
3189   NEONMAP0(vshrn_n_v),
3190   NEONMAP0(vshrq_n_v),
3191   NEONMAP1(vst1_v, arm_neon_vst1, 0),
3192   NEONMAP1(vst1q_v, arm_neon_vst1, 0),
3193   NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
3194   NEONMAP1(vst2_v, arm_neon_vst2, 0),
3195   NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
3196   NEONMAP1(vst2q_v, arm_neon_vst2, 0),
3197   NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
3198   NEONMAP1(vst3_v, arm_neon_vst3, 0),
3199   NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
3200   NEONMAP1(vst3q_v, arm_neon_vst3, 0),
3201   NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
3202   NEONMAP1(vst4_v, arm_neon_vst4, 0),
3203   NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
3204   NEONMAP1(vst4q_v, arm_neon_vst4, 0),
3205   NEONMAP0(vsubhn_v),
3206   NEONMAP0(vtrn_v),
3207   NEONMAP0(vtrnq_v),
3208   NEONMAP0(vtst_v),
3209   NEONMAP0(vtstq_v),
3210   NEONMAP0(vuzp_v),
3211   NEONMAP0(vuzpq_v),
3212   NEONMAP0(vzip_v),
3213   NEONMAP0(vzipq_v)
3214 };
3215
3216 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
3217   NEONMAP1(vabs_v, aarch64_neon_abs, 0),
3218   NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
3219   NEONMAP0(vaddhn_v),
3220   NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
3221   NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
3222   NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
3223   NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
3224   NEONMAP1(vcage_v, aarch64_neon_facge, 0),
3225   NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
3226   NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
3227   NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
3228   NEONMAP1(vcale_v, aarch64_neon_facge, 0),
3229   NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
3230   NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
3231   NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
3232   NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
3233   NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
3234   NEONMAP1(vclz_v, ctlz, Add1ArgType),
3235   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
3236   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
3237   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
3238   NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
3239   NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
3240   NEONMAP0(vcvt_f32_v),
3241   NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3242   NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3243   NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
3244   NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
3245   NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
3246   NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
3247   NEONMAP0(vcvtq_f32_v),
3248   NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3249   NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3250   NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
3251   NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
3252   NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
3253   NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
3254   NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
3255   NEONMAP0(vext_v),
3256   NEONMAP0(vextq_v),
3257   NEONMAP0(vfma_v),
3258   NEONMAP0(vfmaq_v),
3259   NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
3260   NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
3261   NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
3262   NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
3263   NEONMAP0(vmovl_v),
3264   NEONMAP0(vmovn_v),
3265   NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
3266   NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
3267   NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
3268   NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
3269   NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
3270   NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
3271   NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
3272   NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
3273   NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
3274   NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
3275   NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
3276   NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
3277   NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
3278   NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
3279   NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
3280   NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
3281   NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
3282   NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
3283   NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
3284   NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
3285   NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
3286   NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
3287   NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
3288   NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
3289   NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
3290   NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
3291   NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
3292   NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
3293   NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
3294   NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
3295   NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
3296   NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
3297   NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
3298   NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
3299   NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
3300   NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
3301   NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
3302   NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
3303   NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
3304   NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
3305   NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
3306   NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
3307   NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
3308   NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
3309   NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
3310   NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
3311   NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
3312   NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
3313   NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
3314   NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
3315   NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
3316   NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
3317   NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
3318   NEONMAP0(vshl_n_v),
3319   NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3320   NEONMAP0(vshll_n_v),
3321   NEONMAP0(vshlq_n_v),
3322   NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3323   NEONMAP0(vshr_n_v),
3324   NEONMAP0(vshrn_n_v),
3325   NEONMAP0(vshrq_n_v),
3326   NEONMAP0(vsubhn_v),
3327   NEONMAP0(vtst_v),
3328   NEONMAP0(vtstq_v),
3329 };
3330
3331 static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
3332   NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
3333   NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
3334   NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
3335   NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3336   NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3337   NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3338   NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3339   NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3340   NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3341   NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3342   NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3343   NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
3344   NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3345   NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
3346   NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3347   NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3348   NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3349   NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3350   NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3351   NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3352   NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3353   NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3354   NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3355   NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3356   NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3357   NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3358   NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3359   NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3360   NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3361   NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3362   NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3363   NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3364   NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3365   NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3366   NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3367   NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3368   NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3369   NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3370   NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3371   NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3372   NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3373   NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3374   NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3375   NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3376   NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3377   NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3378   NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3379   NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3380   NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
3381   NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3382   NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3383   NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3384   NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3385   NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3386   NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3387   NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3388   NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3389   NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3390   NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3391   NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3392   NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3393   NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3394   NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3395   NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3396   NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3397   NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3398   NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3399   NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3400   NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3401   NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
3402   NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
3403   NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
3404   NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3405   NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3406   NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3407   NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3408   NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3409   NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3410   NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3411   NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3412   NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3413   NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3414   NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3415   NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
3416   NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3417   NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
3418   NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3419   NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3420   NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
3421   NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
3422   NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3423   NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3424   NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
3425   NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
3426   NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
3427   NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
3428   NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
3429   NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
3430   NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
3431   NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
3432   NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3433   NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3434   NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3435   NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3436   NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
3437   NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3438   NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3439   NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3440   NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
3441   NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3442   NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
3443   NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
3444   NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
3445   NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3446   NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3447   NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
3448   NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
3449   NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3450   NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3451   NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
3452   NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
3453   NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
3454   NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
3455   NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3456   NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3457   NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3458   NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3459   NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
3460   NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3461   NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3462   NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3463   NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3464   NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3465   NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3466   NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
3467   NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
3468   NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3469   NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3470   NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3471   NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3472   NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
3473   NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
3474   NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
3475   NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
3476   NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3477   NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3478   NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
3479   NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
3480   NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
3481   NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3482   NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3483   NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3484   NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3485   NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
3486   NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3487   NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3488   NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3489   NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3490   NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
3491   NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
3492   NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3493   NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3494   NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
3495   NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
3496   NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
3497   NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
3498   NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
3499   NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
3500   NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
3501   NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
3502   NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
3503   NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
3504   NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
3505   NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
3506   NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
3507   NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
3508   NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
3509   NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
3510   NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
3511   NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
3512   NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
3513   NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
3514   NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3515   NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
3516   NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3517   NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
3518   NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
3519   NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
3520   NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3521   NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
3522   NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3523   NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
3524 };
3525
3526 #undef NEONMAP0
3527 #undef NEONMAP1
3528 #undef NEONMAP2
3529
3530 static bool NEONSIMDIntrinsicsProvenSorted = false;
3531
3532 static bool AArch64SIMDIntrinsicsProvenSorted = false;
3533 static bool AArch64SISDIntrinsicsProvenSorted = false;
3534
3535
3536 static const NeonIntrinsicInfo *
3537 findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap,
3538                        unsigned BuiltinID, bool &MapProvenSorted) {
3539
3540 #ifndef NDEBUG
3541   if (!MapProvenSorted) {
3542     assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap)));
3543     MapProvenSorted = true;
3544   }
3545 #endif
3546
3547   const NeonIntrinsicInfo *Builtin =
3548       std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID);
3549
3550   if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
3551     return Builtin;
3552
3553   return nullptr;
3554 }
3555
3556 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
3557                                                    unsigned Modifier,
3558                                                    llvm::Type *ArgType,
3559                                                    const CallExpr *E) {
3560   int VectorSize = 0;
3561   if (Modifier & Use64BitVectors)
3562     VectorSize = 64;
3563   else if (Modifier & Use128BitVectors)
3564     VectorSize = 128;
3565
3566   // Return type.
3567   SmallVector<llvm::Type *, 3> Tys;
3568   if (Modifier & AddRetType) {
3569     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
3570     if (Modifier & VectorizeRetType)
3571       Ty = llvm::VectorType::get(
3572           Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
3573
3574     Tys.push_back(Ty);
3575   }
3576
3577   // Arguments.
3578   if (Modifier & VectorizeArgTypes) {
3579     int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
3580     ArgType = llvm::VectorType::get(ArgType, Elts);
3581   }
3582
3583   if (Modifier & (Add1ArgType | Add2ArgTypes))
3584     Tys.push_back(ArgType);
3585
3586   if (Modifier & Add2ArgTypes)
3587     Tys.push_back(ArgType);
3588
3589   if (Modifier & InventFloatType)
3590     Tys.push_back(FloatTy);
3591
3592   return CGM.getIntrinsic(IntrinsicID, Tys);
3593 }
3594
3595 static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF,
3596                                             const NeonIntrinsicInfo &SISDInfo,
3597                                             SmallVectorImpl<Value *> &Ops,
3598                                             const CallExpr *E) {
3599   unsigned BuiltinID = SISDInfo.BuiltinID;
3600   unsigned int Int = SISDInfo.LLVMIntrinsic;
3601   unsigned Modifier = SISDInfo.TypeModifier;
3602   const char *s = SISDInfo.NameHint;
3603
3604   switch (BuiltinID) {
3605   case NEON::BI__builtin_neon_vcled_s64:
3606   case NEON::BI__builtin_neon_vcled_u64:
3607   case NEON::BI__builtin_neon_vcles_f32:
3608   case NEON::BI__builtin_neon_vcled_f64:
3609   case NEON::BI__builtin_neon_vcltd_s64:
3610   case NEON::BI__builtin_neon_vcltd_u64:
3611   case NEON::BI__builtin_neon_vclts_f32:
3612   case NEON::BI__builtin_neon_vcltd_f64:
3613   case NEON::BI__builtin_neon_vcales_f32:
3614   case NEON::BI__builtin_neon_vcaled_f64:
3615   case NEON::BI__builtin_neon_vcalts_f32:
3616   case NEON::BI__builtin_neon_vcaltd_f64:
3617     // Only one direction of comparisons actually exist, cmle is actually a cmge
3618     // with swapped operands. The table gives us the right intrinsic but we
3619     // still need to do the swap.
3620     std::swap(Ops[0], Ops[1]);
3621     break;
3622   }
3623
3624   assert(Int && "Generic code assumes a valid intrinsic");
3625
3626   // Determine the type(s) of this overloaded AArch64 intrinsic.
3627   const Expr *Arg = E->getArg(0);
3628   llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
3629   Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
3630
3631   int j = 0;
3632   ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
3633   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
3634        ai != ae; ++ai, ++j) {
3635     llvm::Type *ArgTy = ai->getType();
3636     if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
3637              ArgTy->getPrimitiveSizeInBits())
3638       continue;
3639
3640     assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
3641     // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
3642     // it before inserting.
3643     Ops[j] =
3644         CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType());
3645     Ops[j] =
3646         CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
3647   }
3648
3649   Value *Result = CGF.EmitNeonCall(F, Ops, s);
3650   llvm::Type *ResultType = CGF.ConvertType(E->getType());
3651   if (ResultType->getPrimitiveSizeInBits() <
3652       Result->getType()->getPrimitiveSizeInBits())
3653     return CGF.Builder.CreateExtractElement(Result, C0);
3654
3655   return CGF.Builder.CreateBitCast(Result, ResultType, s);
3656 }
3657
3658 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
3659     unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
3660     const char *NameHint, unsigned Modifier, const CallExpr *E,
3661     SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1) {
3662   // Get the last argument, which specifies the vector type.
3663   llvm::APSInt NeonTypeConst;
3664   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
3665   if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
3666     return nullptr;
3667
3668   // Determine the type of this overloaded NEON intrinsic.
3669   NeonTypeFlags Type(NeonTypeConst.getZExtValue());
3670   bool Usgn = Type.isUnsigned();
3671   bool Quad = Type.isQuad();
3672
3673   llvm::VectorType *VTy = GetNeonType(this, Type);
3674   llvm::Type *Ty = VTy;
3675   if (!Ty)
3676     return nullptr;
3677
3678   auto getAlignmentValue32 = [&](Address addr) -> Value* {
3679     return Builder.getInt32(addr.getAlignment().getQuantity());
3680   };
3681
3682   unsigned Int = LLVMIntrinsic;
3683   if ((Modifier & UnsignedAlts) && !Usgn)
3684     Int = AltLLVMIntrinsic;
3685
3686   switch (BuiltinID) {
3687   default: break;
3688   case NEON::BI__builtin_neon_vabs_v:
3689   case NEON::BI__builtin_neon_vabsq_v:
3690     if (VTy->getElementType()->isFloatingPointTy())
3691       return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
3692     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
3693   case NEON::BI__builtin_neon_vaddhn_v: {
3694     llvm::VectorType *SrcTy =
3695         llvm::VectorType::getExtendedElementVectorType(VTy);
3696
3697     // %sum = add <4 x i32> %lhs, %rhs
3698     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3699     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
3700     Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
3701
3702     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
3703     Constant *ShiftAmt =
3704         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
3705     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
3706
3707     // %res = trunc <4 x i32> %high to <4 x i16>
3708     return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
3709   }
3710   case NEON::BI__builtin_neon_vcale_v:
3711   case NEON::BI__builtin_neon_vcaleq_v:
3712   case NEON::BI__builtin_neon_vcalt_v:
3713   case NEON::BI__builtin_neon_vcaltq_v:
3714     std::swap(Ops[0], Ops[1]);
3715   case NEON::BI__builtin_neon_vcage_v:
3716   case NEON::BI__builtin_neon_vcageq_v:
3717   case NEON::BI__builtin_neon_vcagt_v:
3718   case NEON::BI__builtin_neon_vcagtq_v: {
3719     llvm::Type *VecFlt = llvm::VectorType::get(
3720         VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy,
3721         VTy->getNumElements());
3722     llvm::Type *Tys[] = { VTy, VecFlt };
3723     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3724     return EmitNeonCall(F, Ops, NameHint);
3725   }
3726   case NEON::BI__builtin_neon_vclz_v:
3727   case NEON::BI__builtin_neon_vclzq_v:
3728     // We generate target-independent intrinsic, which needs a second argument
3729     // for whether or not clz of zero is undefined; on ARM it isn't.
3730     Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
3731     break;
3732   case NEON::BI__builtin_neon_vcvt_f32_v:
3733   case NEON::BI__builtin_neon_vcvtq_f32_v:
3734     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3735     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad));
3736     return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
3737                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
3738   case NEON::BI__builtin_neon_vcvt_n_f32_v:
3739   case NEON::BI__builtin_neon_vcvt_n_f64_v:
3740   case NEON::BI__builtin_neon_vcvtq_n_f32_v:
3741   case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
3742     llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
3743     Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
3744     Function *F = CGM.getIntrinsic(Int, Tys);
3745     return EmitNeonCall(F, Ops, "vcvt_n");
3746   }
3747   case NEON::BI__builtin_neon_vcvt_n_s32_v:
3748   case NEON::BI__builtin_neon_vcvt_n_u32_v:
3749   case NEON::BI__builtin_neon_vcvt_n_s64_v:
3750   case NEON::BI__builtin_neon_vcvt_n_u64_v:
3751   case NEON::BI__builtin_neon_vcvtq_n_s32_v:
3752   case NEON::BI__builtin_neon_vcvtq_n_u32_v:
3753   case NEON::BI__builtin_neon_vcvtq_n_s64_v:
3754   case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
3755     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
3756     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3757     return EmitNeonCall(F, Ops, "vcvt_n");
3758   }
3759   case NEON::BI__builtin_neon_vcvt_s32_v:
3760   case NEON::BI__builtin_neon_vcvt_u32_v:
3761   case NEON::BI__builtin_neon_vcvt_s64_v:
3762   case NEON::BI__builtin_neon_vcvt_u64_v:
3763   case NEON::BI__builtin_neon_vcvtq_s32_v:
3764   case NEON::BI__builtin_neon_vcvtq_u32_v:
3765   case NEON::BI__builtin_neon_vcvtq_s64_v:
3766   case NEON::BI__builtin_neon_vcvtq_u64_v: {
3767     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
3768     return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
3769                 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
3770   }
3771   case NEON::BI__builtin_neon_vcvta_s32_v:
3772   case NEON::BI__builtin_neon_vcvta_s64_v:
3773   case NEON::BI__builtin_neon_vcvta_u32_v:
3774   case NEON::BI__builtin_neon_vcvta_u64_v:
3775   case NEON::BI__builtin_neon_vcvtaq_s32_v:
3776   case NEON::BI__builtin_neon_vcvtaq_s64_v:
3777   case NEON::BI__builtin_neon_vcvtaq_u32_v:
3778   case NEON::BI__builtin_neon_vcvtaq_u64_v:
3779   case NEON::BI__builtin_neon_vcvtn_s32_v:
3780   case NEON::BI__builtin_neon_vcvtn_s64_v:
3781   case NEON::BI__builtin_neon_vcvtn_u32_v:
3782   case NEON::BI__builtin_neon_vcvtn_u64_v:
3783   case NEON::BI__builtin_neon_vcvtnq_s32_v:
3784   case NEON::BI__builtin_neon_vcvtnq_s64_v:
3785   case NEON::BI__builtin_neon_vcvtnq_u32_v:
3786   case NEON::BI__builtin_neon_vcvtnq_u64_v:
3787   case NEON::BI__builtin_neon_vcvtp_s32_v:
3788   case NEON::BI__builtin_neon_vcvtp_s64_v:
3789   case NEON::BI__builtin_neon_vcvtp_u32_v:
3790   case NEON::BI__builtin_neon_vcvtp_u64_v:
3791   case NEON::BI__builtin_neon_vcvtpq_s32_v:
3792   case NEON::BI__builtin_neon_vcvtpq_s64_v:
3793   case NEON::BI__builtin_neon_vcvtpq_u32_v:
3794   case NEON::BI__builtin_neon_vcvtpq_u64_v:
3795   case NEON::BI__builtin_neon_vcvtm_s32_v:
3796   case NEON::BI__builtin_neon_vcvtm_s64_v:
3797   case NEON::BI__builtin_neon_vcvtm_u32_v:
3798   case NEON::BI__builtin_neon_vcvtm_u64_v:
3799   case NEON::BI__builtin_neon_vcvtmq_s32_v:
3800   case NEON::BI__builtin_neon_vcvtmq_s64_v:
3801   case NEON::BI__builtin_neon_vcvtmq_u32_v:
3802   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
3803     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
3804     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
3805   }
3806   case NEON::BI__builtin_neon_vext_v:
3807   case NEON::BI__builtin_neon_vextq_v: {
3808     int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
3809     SmallVector<uint32_t, 16> Indices;
3810     for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
3811       Indices.push_back(i+CV);
3812
3813     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3814     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3815     return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
3816   }
3817   case NEON::BI__builtin_neon_vfma_v:
3818   case NEON::BI__builtin_neon_vfmaq_v: {
3819     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
3820     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3821     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3822     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3823
3824     // NEON intrinsic puts accumulator first, unlike the LLVM fma.
3825     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
3826   }
3827   case NEON::BI__builtin_neon_vld1_v:
3828   case NEON::BI__builtin_neon_vld1q_v: {
3829     llvm::Type *Tys[] = {Ty, Int8PtrTy};
3830     Ops.push_back(getAlignmentValue32(PtrOp0));
3831     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
3832   }
3833   case NEON::BI__builtin_neon_vld2_v:
3834   case NEON::BI__builtin_neon_vld2q_v:
3835   case NEON::BI__builtin_neon_vld3_v:
3836   case NEON::BI__builtin_neon_vld3q_v:
3837   case NEON::BI__builtin_neon_vld4_v:
3838   case NEON::BI__builtin_neon_vld4q_v: {
3839     llvm::Type *Tys[] = {Ty, Int8PtrTy};
3840     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3841     Value *Align = getAlignmentValue32(PtrOp1);
3842     Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
3843     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3844     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3845     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
3846   }
3847   case NEON::BI__builtin_neon_vld1_dup_v:
3848   case NEON::BI__builtin_neon_vld1q_dup_v: {
3849     Value *V = UndefValue::get(Ty);
3850     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
3851     PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty);
3852     LoadInst *Ld = Builder.CreateLoad(PtrOp0);
3853     llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
3854     Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
3855     return EmitNeonSplat(Ops[0], CI);
3856   }
3857   case NEON::BI__builtin_neon_vld2_lane_v:
3858   case NEON::BI__builtin_neon_vld2q_lane_v:
3859   case NEON::BI__builtin_neon_vld3_lane_v:
3860   case NEON::BI__builtin_neon_vld3q_lane_v:
3861   case NEON::BI__builtin_neon_vld4_lane_v:
3862   case NEON::BI__builtin_neon_vld4q_lane_v: {
3863     llvm::Type *Tys[] = {Ty, Int8PtrTy};
3864     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3865     for (unsigned I = 2; I < Ops.size() - 1; ++I)
3866       Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
3867     Ops.push_back(getAlignmentValue32(PtrOp1));
3868     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
3869     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3870     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3871     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
3872   }
3873   case NEON::BI__builtin_neon_vmovl_v: {
3874     llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
3875     Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
3876     if (Usgn)
3877       return Builder.CreateZExt(Ops[0], Ty, "vmovl");
3878     return Builder.CreateSExt(Ops[0], Ty, "vmovl");
3879   }
3880   case NEON::BI__builtin_neon_vmovn_v: {
3881     llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
3882     Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
3883     return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
3884   }
3885   case NEON::BI__builtin_neon_vmull_v:
3886     // FIXME: the integer vmull operations could be emitted in terms of pure
3887     // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
3888     // hoisting the exts outside loops. Until global ISel comes along that can
3889     // see through such movement this leads to bad CodeGen. So we need an
3890     // intrinsic for now.
3891     Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
3892     Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
3893     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
3894   case NEON::BI__builtin_neon_vpadal_v:
3895   case NEON::BI__builtin_neon_vpadalq_v: {
3896     // The source operand type has twice as many elements of half the size.
3897     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
3898     llvm::Type *EltTy =
3899       llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
3900     llvm::Type *NarrowTy =
3901       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
3902     llvm::Type *Tys[2] = { Ty, NarrowTy };
3903     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
3904   }
3905   case NEON::BI__builtin_neon_vpaddl_v:
3906   case NEON::BI__builtin_neon_vpaddlq_v: {
3907     // The source operand type has twice as many elements of half the size.
3908     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
3909     llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
3910     llvm::Type *NarrowTy =
3911       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
3912     llvm::Type *Tys[2] = { Ty, NarrowTy };
3913     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
3914   }
3915   case NEON::BI__builtin_neon_vqdmlal_v:
3916   case NEON::BI__builtin_neon_vqdmlsl_v: {
3917     SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
3918     Ops[1] =
3919         EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
3920     Ops.resize(2);
3921     return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
3922   }
3923   case NEON::BI__builtin_neon_vqshl_n_v:
3924   case NEON::BI__builtin_neon_vqshlq_n_v:
3925     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
3926                         1, false);
3927   case NEON::BI__builtin_neon_vqshlu_n_v:
3928   case NEON::BI__builtin_neon_vqshluq_n_v:
3929     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
3930                         1, false);
3931   case NEON::BI__builtin_neon_vrecpe_v:
3932   case NEON::BI__builtin_neon_vrecpeq_v:
3933   case NEON::BI__builtin_neon_vrsqrte_v:
3934   case NEON::BI__builtin_neon_vrsqrteq_v:
3935     Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
3936     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
3937
3938   case NEON::BI__builtin_neon_vrshr_n_v:
3939   case NEON::BI__builtin_neon_vrshrq_n_v:
3940     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
3941                         1, true);
3942   case NEON::BI__builtin_neon_vshl_n_v:
3943   case NEON::BI__builtin_neon_vshlq_n_v:
3944     Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
3945     return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
3946                              "vshl_n");
3947   case NEON::BI__builtin_neon_vshll_n_v: {
3948     llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
3949     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3950     if (Usgn)
3951       Ops[0] = Builder.CreateZExt(Ops[0], VTy);
3952     else
3953       Ops[0] = Builder.CreateSExt(Ops[0], VTy);
3954     Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
3955     return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
3956   }
3957   case NEON::BI__builtin_neon_vshrn_n_v: {
3958     llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
3959     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3960     Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
3961     if (Usgn)
3962       Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
3963     else
3964       Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
3965     return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
3966   }
3967   case NEON::BI__builtin_neon_vshr_n_v:
3968   case NEON::BI__builtin_neon_vshrq_n_v:
3969     return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
3970   case NEON::BI__builtin_neon_vst1_v:
3971   case NEON::BI__builtin_neon_vst1q_v:
3972   case NEON::BI__builtin_neon_vst2_v:
3973   case NEON::BI__builtin_neon_vst2q_v:
3974   case NEON::BI__builtin_neon_vst3_v:
3975   case NEON::BI__builtin_neon_vst3q_v:
3976   case NEON::BI__builtin_neon_vst4_v:
3977   case NEON::BI__builtin_neon_vst4q_v:
3978   case NEON::BI__builtin_neon_vst2_lane_v:
3979   case NEON::BI__builtin_neon_vst2q_lane_v:
3980   case NEON::BI__builtin_neon_vst3_lane_v:
3981   case NEON::BI__builtin_neon_vst3q_lane_v:
3982   case NEON::BI__builtin_neon_vst4_lane_v:
3983   case NEON::BI__builtin_neon_vst4q_lane_v: {
3984     llvm::Type *Tys[] = {Int8PtrTy, Ty};
3985     Ops.push_back(getAlignmentValue32(PtrOp0));
3986     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
3987   }
3988   case NEON::BI__builtin_neon_vsubhn_v: {
3989     llvm::VectorType *SrcTy =
3990         llvm::VectorType::getExtendedElementVectorType(VTy);
3991
3992     // %sum = add <4 x i32> %lhs, %rhs
3993     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3994     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
3995     Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
3996
3997     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
3998     Constant *ShiftAmt =
3999         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
4000     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
4001
4002     // %res = trunc <4 x i32> %high to <4 x i16>
4003     return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
4004   }
4005   case NEON::BI__builtin_neon_vtrn_v:
4006   case NEON::BI__builtin_neon_vtrnq_v: {
4007     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4008     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4009     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4010     Value *SV = nullptr;
4011
4012     for (unsigned vi = 0; vi != 2; ++vi) {
4013       SmallVector<uint32_t, 16> Indices;
4014       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
4015         Indices.push_back(i+vi);
4016         Indices.push_back(i+e+vi);
4017       }
4018       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4019       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
4020       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4021     }
4022     return SV;
4023   }
4024   case NEON::BI__builtin_neon_vtst_v:
4025   case NEON::BI__builtin_neon_vtstq_v: {
4026     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4027     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4028     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
4029     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
4030                                 ConstantAggregateZero::get(Ty));
4031     return Builder.CreateSExt(Ops[0], Ty, "vtst");
4032   }
4033   case NEON::BI__builtin_neon_vuzp_v:
4034   case NEON::BI__builtin_neon_vuzpq_v: {
4035     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4036     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4037     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4038     Value *SV = nullptr;
4039
4040     for (unsigned vi = 0; vi != 2; ++vi) {
4041       SmallVector<uint32_t, 16> Indices;
4042       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
4043         Indices.push_back(2*i+vi);
4044
4045       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4046       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
4047       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4048     }
4049     return SV;
4050   }
4051   case NEON::BI__builtin_neon_vzip_v:
4052   case NEON::BI__builtin_neon_vzipq_v: {
4053     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4054     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4055     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4056     Value *SV = nullptr;
4057
4058     for (unsigned vi = 0; vi != 2; ++vi) {
4059       SmallVector<uint32_t, 16> Indices;
4060       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
4061         Indices.push_back((i + vi*e) >> 1);
4062         Indices.push_back(((i + vi*e) >> 1)+e);
4063       }
4064       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4065       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
4066       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4067     }
4068     return SV;
4069   }
4070   }
4071
4072   assert(Int && "Expected valid intrinsic number");
4073
4074   // Determine the type(s) of this overloaded AArch64 intrinsic.
4075   Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
4076
4077   Value *Result = EmitNeonCall(F, Ops, NameHint);
4078   llvm::Type *ResultType = ConvertType(E->getType());
4079   // AArch64 intrinsic one-element vector type cast to
4080   // scalar type expected by the builtin
4081   return Builder.CreateBitCast(Result, ResultType, NameHint);
4082 }
4083
4084 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
4085     Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
4086     const CmpInst::Predicate Ip, const Twine &Name) {
4087   llvm::Type *OTy = Op->getType();
4088
4089   // FIXME: this is utterly horrific. We should not be looking at previous
4090   // codegen context to find out what needs doing. Unfortunately TableGen
4091   // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
4092   // (etc).
4093   if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
4094     OTy = BI->getOperand(0)->getType();
4095
4096   Op = Builder.CreateBitCast(Op, OTy);
4097   if (OTy->getScalarType()->isFloatingPointTy()) {
4098     Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
4099   } else {
4100     Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
4101   }
4102   return Builder.CreateSExt(Op, Ty, Name);
4103 }
4104
4105 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
4106                                  Value *ExtOp, Value *IndexOp,
4107                                  llvm::Type *ResTy, unsigned IntID,
4108                                  const char *Name) {
4109   SmallVector<Value *, 2> TblOps;
4110   if (ExtOp)
4111     TblOps.push_back(ExtOp);
4112
4113   // Build a vector containing sequential number like (0, 1, 2, ..., 15)
4114   SmallVector<uint32_t, 16> Indices;
4115   llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
4116   for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
4117     Indices.push_back(2*i);
4118     Indices.push_back(2*i+1);
4119   }
4120
4121   int PairPos = 0, End = Ops.size() - 1;
4122   while (PairPos < End) {
4123     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
4124                                                      Ops[PairPos+1], Indices,
4125                                                      Name));
4126     PairPos += 2;
4127   }
4128
4129   // If there's an odd number of 64-bit lookup table, fill the high 64-bit
4130   // of the 128-bit lookup table with zero.
4131   if (PairPos == End) {
4132     Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
4133     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
4134                                                      ZeroTbl, Indices, Name));
4135   }
4136
4137   Function *TblF;
4138   TblOps.push_back(IndexOp);
4139   TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
4140
4141   return CGF.EmitNeonCall(TblF, TblOps, Name);
4142 }
4143
4144 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
4145   unsigned Value;
4146   switch (BuiltinID) {
4147   default:
4148     return nullptr;
4149   case ARM::BI__builtin_arm_nop:
4150     Value = 0;
4151     break;
4152   case ARM::BI__builtin_arm_yield:
4153   case ARM::BI__yield:
4154     Value = 1;
4155     break;
4156   case ARM::BI__builtin_arm_wfe:
4157   case ARM::BI__wfe:
4158     Value = 2;
4159     break;
4160   case ARM::BI__builtin_arm_wfi:
4161   case ARM::BI__wfi:
4162     Value = 3;
4163     break;
4164   case ARM::BI__builtin_arm_sev:
4165   case ARM::BI__sev:
4166     Value = 4;
4167     break;
4168   case ARM::BI__builtin_arm_sevl:
4169   case ARM::BI__sevl:
4170     Value = 5;
4171     break;
4172   }
4173
4174   return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
4175                             llvm::ConstantInt::get(Int32Ty, Value));
4176 }
4177
4178 // Generates the IR for the read/write special register builtin,
4179 // ValueType is the type of the value that is to be written or read,
4180 // RegisterType is the type of the register being written to or read from.
4181 static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
4182                                          const CallExpr *E,
4183                                          llvm::Type *RegisterType,
4184                                          llvm::Type *ValueType,
4185                                          bool IsRead,
4186                                          StringRef SysReg = "") {
4187   // write and register intrinsics only support 32 and 64 bit operations.
4188   assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
4189           && "Unsupported size for register.");
4190
4191   CodeGen::CGBuilderTy &Builder = CGF.Builder;
4192   CodeGen::CodeGenModule &CGM = CGF.CGM;
4193   LLVMContext &Context = CGM.getLLVMContext();
4194
4195   if (SysReg.empty()) {
4196     const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
4197     SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
4198   }
4199
4200   llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
4201   llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4202   llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4203
4204   llvm::Type *Types[] = { RegisterType };
4205
4206   bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
4207   assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
4208             && "Can't fit 64-bit value in 32-bit register");
4209
4210   if (IsRead) {
4211     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
4212     llvm::Value *Call = Builder.CreateCall(F, Metadata);
4213
4214     if (MixedTypes)
4215       // Read into 64 bit register and then truncate result to 32 bit.
4216       return Builder.CreateTrunc(Call, ValueType);
4217
4218     if (ValueType->isPointerTy())
4219       // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
4220       return Builder.CreateIntToPtr(Call, ValueType);
4221
4222     return Call;
4223   }
4224
4225   llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
4226   llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
4227   if (MixedTypes) {
4228     // Extend 32 bit write value to 64 bit to pass to write.
4229     ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
4230     return Builder.CreateCall(F, { Metadata, ArgValue });
4231   }
4232
4233   if (ValueType->isPointerTy()) {
4234     // Have VoidPtrTy ArgValue but want to return an i32/i64.
4235     ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
4236     return Builder.CreateCall(F, { Metadata, ArgValue });
4237   }
4238
4239   return Builder.CreateCall(F, { Metadata, ArgValue });
4240 }
4241
4242 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
4243 /// argument that specifies the vector type.
4244 static bool HasExtraNeonArgument(unsigned BuiltinID) {
4245   switch (BuiltinID) {
4246   default: break;
4247   case NEON::BI__builtin_neon_vget_lane_i8:
4248   case NEON::BI__builtin_neon_vget_lane_i16:
4249   case NEON::BI__builtin_neon_vget_lane_i32:
4250   case NEON::BI__builtin_neon_vget_lane_i64:
4251   case NEON::BI__builtin_neon_vget_lane_f32:
4252   case NEON::BI__builtin_neon_vgetq_lane_i8:
4253   case NEON::BI__builtin_neon_vgetq_lane_i16:
4254   case NEON::BI__builtin_neon_vgetq_lane_i32:
4255   case NEON::BI__builtin_neon_vgetq_lane_i64:
4256   case NEON::BI__builtin_neon_vgetq_lane_f32:
4257   case NEON::BI__builtin_neon_vset_lane_i8:
4258   case NEON::BI__builtin_neon_vset_lane_i16:
4259   case NEON::BI__builtin_neon_vset_lane_i32:
4260   case NEON::BI__builtin_neon_vset_lane_i64:
4261   case NEON::BI__builtin_neon_vset_lane_f32:
4262   case NEON::BI__builtin_neon_vsetq_lane_i8:
4263   case NEON::BI__builtin_neon_vsetq_lane_i16:
4264   case NEON::BI__builtin_neon_vsetq_lane_i32:
4265   case NEON::BI__builtin_neon_vsetq_lane_i64:
4266   case NEON::BI__builtin_neon_vsetq_lane_f32:
4267   case NEON::BI__builtin_neon_vsha1h_u32:
4268   case NEON::BI__builtin_neon_vsha1cq_u32:
4269   case NEON::BI__builtin_neon_vsha1pq_u32:
4270   case NEON::BI__builtin_neon_vsha1mq_u32:
4271   case ARM::BI_MoveToCoprocessor:
4272   case ARM::BI_MoveToCoprocessor2:
4273     return false;
4274   }
4275   return true;
4276 }
4277
4278 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
4279                                            const CallExpr *E) {
4280   if (auto Hint = GetValueForARMHint(BuiltinID))
4281     return Hint;
4282
4283   if (BuiltinID == ARM::BI__emit) {
4284     bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
4285     llvm::FunctionType *FTy =
4286         llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
4287
4288     APSInt Value;
4289     if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext()))
4290       llvm_unreachable("Sema will ensure that the parameter is constant");
4291
4292     uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
4293
4294     llvm::InlineAsm *Emit =
4295         IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
4296                                  /*SideEffects=*/true)
4297                 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
4298                                  /*SideEffects=*/true);
4299
4300     return Builder.CreateCall(Emit);
4301   }
4302
4303   if (BuiltinID == ARM::BI__builtin_arm_dbg) {
4304     Value *Option = EmitScalarExpr(E->getArg(0));
4305     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
4306   }
4307
4308   if (BuiltinID == ARM::BI__builtin_arm_prefetch) {
4309     Value *Address = EmitScalarExpr(E->getArg(0));
4310     Value *RW      = EmitScalarExpr(E->getArg(1));
4311     Value *IsData  = EmitScalarExpr(E->getArg(2));
4312
4313     // Locality is not supported on ARM target
4314     Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
4315
4316     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
4317     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
4318   }
4319
4320   if (BuiltinID == ARM::BI__builtin_arm_rbit) {
4321     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_rbit),
4322                                                EmitScalarExpr(E->getArg(0)),
4323                               "rbit");
4324   }
4325
4326   if (BuiltinID == ARM::BI__clear_cache) {
4327     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
4328     const FunctionDecl *FD = E->getDirectCallee();
4329     Value *Ops[2];
4330     for (unsigned i = 0; i < 2; i++)
4331       Ops[i] = EmitScalarExpr(E->getArg(i));
4332     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
4333     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
4334     StringRef Name = FD->getName();
4335     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
4336   }
4337
4338   if (BuiltinID == ARM::BI__builtin_arm_mcrr ||
4339       BuiltinID == ARM::BI__builtin_arm_mcrr2) {
4340     Function *F;
4341
4342     switch (BuiltinID) {
4343     default: llvm_unreachable("unexpected builtin");
4344     case ARM::BI__builtin_arm_mcrr:
4345       F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
4346       break;
4347     case ARM::BI__builtin_arm_mcrr2:
4348       F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
4349       break;
4350     }
4351
4352     // MCRR{2} instruction has 5 operands but
4353     // the intrinsic has 4 because Rt and Rt2
4354     // are represented as a single unsigned 64
4355     // bit integer in the intrinsic definition
4356     // but internally it's represented as 2 32
4357     // bit integers.
4358
4359     Value *Coproc = EmitScalarExpr(E->getArg(0));
4360     Value *Opc1 = EmitScalarExpr(E->getArg(1));
4361     Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
4362     Value *CRm = EmitScalarExpr(E->getArg(3));
4363
4364     Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4365     Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
4366     Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
4367     Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
4368
4369     return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
4370   }
4371
4372   if (BuiltinID == ARM::BI__builtin_arm_mrrc ||
4373       BuiltinID == ARM::BI__builtin_arm_mrrc2) {
4374     Function *F;
4375
4376     switch (BuiltinID) {
4377     default: llvm_unreachable("unexpected builtin");
4378     case ARM::BI__builtin_arm_mrrc:
4379       F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
4380       break;
4381     case ARM::BI__builtin_arm_mrrc2:
4382       F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
4383       break;
4384     }
4385
4386     Value *Coproc = EmitScalarExpr(E->getArg(0));
4387     Value *Opc1 = EmitScalarExpr(E->getArg(1));
4388     Value *CRm  = EmitScalarExpr(E->getArg(2));
4389     Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
4390
4391     // Returns an unsigned 64 bit integer, represented
4392     // as two 32 bit integers.
4393
4394     Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
4395     Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
4396     Rt = Builder.CreateZExt(Rt, Int64Ty);
4397     Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
4398
4399     Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
4400     RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
4401     RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
4402
4403     return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
4404   }
4405
4406   if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
4407       ((BuiltinID == ARM::BI__builtin_arm_ldrex ||
4408         BuiltinID == ARM::BI__builtin_arm_ldaex) &&
4409        getContext().getTypeSize(E->getType()) == 64) ||
4410       BuiltinID == ARM::BI__ldrexd) {
4411     Function *F;
4412
4413     switch (BuiltinID) {
4414     default: llvm_unreachable("unexpected builtin");
4415     case ARM::BI__builtin_arm_ldaex:
4416       F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
4417       break;
4418     case ARM::BI__builtin_arm_ldrexd:
4419     case ARM::BI__builtin_arm_ldrex:
4420     case ARM::BI__ldrexd:
4421       F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
4422       break;
4423     }
4424
4425     Value *LdPtr = EmitScalarExpr(E->getArg(0));
4426     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
4427                                     "ldrexd");
4428
4429     Value *Val0 = Builder.CreateExtractValue(Val, 1);
4430     Value *Val1 = Builder.CreateExtractValue(Val, 0);
4431     Val0 = Builder.CreateZExt(Val0, Int64Ty);
4432     Val1 = Builder.CreateZExt(Val1, Int64Ty);
4433
4434     Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
4435     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
4436     Val = Builder.CreateOr(Val, Val1);
4437     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
4438   }
4439
4440   if (BuiltinID == ARM::BI__builtin_arm_ldrex ||
4441       BuiltinID == ARM::BI__builtin_arm_ldaex) {
4442     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
4443
4444     QualType Ty = E->getType();
4445     llvm::Type *RealResTy = ConvertType(Ty);
4446     llvm::Type *PtrTy = llvm::IntegerType::get(
4447         getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
4448     LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
4449
4450     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex
4451                                        ? Intrinsic::arm_ldaex
4452                                        : Intrinsic::arm_ldrex,
4453                                    PtrTy);
4454     Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
4455
4456     if (RealResTy->isPointerTy())
4457       return Builder.CreateIntToPtr(Val, RealResTy);
4458     else {
4459       llvm::Type *IntResTy = llvm::IntegerType::get(
4460           getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
4461       Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
4462       return Builder.CreateBitCast(Val, RealResTy);
4463     }
4464   }
4465
4466   if (BuiltinID == ARM::BI__builtin_arm_strexd ||
4467       ((BuiltinID == ARM::BI__builtin_arm_stlex ||
4468         BuiltinID == ARM::BI__builtin_arm_strex) &&
4469        getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
4470     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4471                                        ? Intrinsic::arm_stlexd
4472                                        : Intrinsic::arm_strexd);
4473     llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, nullptr);
4474
4475     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
4476     Value *Val = EmitScalarExpr(E->getArg(0));
4477     Builder.CreateStore(Val, Tmp);
4478
4479     Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
4480     Val = Builder.CreateLoad(LdPtr);
4481
4482     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
4483     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
4484     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
4485     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
4486   }
4487
4488   if (BuiltinID == ARM::BI__builtin_arm_strex ||
4489       BuiltinID == ARM::BI__builtin_arm_stlex) {
4490     Value *StoreVal = EmitScalarExpr(E->getArg(0));
4491     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
4492
4493     QualType Ty = E->getArg(0)->getType();
4494     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
4495                                                  getContext().getTypeSize(Ty));
4496     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
4497
4498     if (StoreVal->getType()->isPointerTy())
4499       StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
4500     else {
4501       llvm::Type *IntTy = llvm::IntegerType::get(
4502           getLLVMContext(),
4503           CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
4504       StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
4505       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
4506     }
4507
4508     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4509                                        ? Intrinsic::arm_stlex
4510                                        : Intrinsic::arm_strex,
4511                                    StoreAddr->getType());
4512     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
4513   }
4514
4515   switch (BuiltinID) {
4516   case ARM::BI__iso_volatile_load8:
4517   case ARM::BI__iso_volatile_load16:
4518   case ARM::BI__iso_volatile_load32:
4519   case ARM::BI__iso_volatile_load64: {
4520     Value *Ptr = EmitScalarExpr(E->getArg(0));
4521     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4522     CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy);
4523     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
4524                                              LoadSize.getQuantity() * 8);
4525     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
4526     llvm::LoadInst *Load =
4527       Builder.CreateAlignedLoad(Ptr, LoadSize);
4528     Load->setVolatile(true);
4529     return Load;
4530   }
4531   case ARM::BI__iso_volatile_store8:
4532   case ARM::BI__iso_volatile_store16:
4533   case ARM::BI__iso_volatile_store32:
4534   case ARM::BI__iso_volatile_store64: {
4535     Value *Ptr = EmitScalarExpr(E->getArg(0));
4536     Value *Value = EmitScalarExpr(E->getArg(1));
4537     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4538     CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
4539     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
4540                                              StoreSize.getQuantity() * 8);
4541     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
4542     llvm::StoreInst *Store =
4543       Builder.CreateAlignedStore(Value, Ptr,
4544                                  StoreSize);
4545     Store->setVolatile(true);
4546     return Store;
4547   }
4548   }
4549
4550   if (BuiltinID == ARM::BI__builtin_arm_clrex) {
4551     Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
4552     return Builder.CreateCall(F);
4553   }
4554
4555   // CRC32
4556   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
4557   switch (BuiltinID) {
4558   case ARM::BI__builtin_arm_crc32b:
4559     CRCIntrinsicID = Intrinsic::arm_crc32b; break;
4560   case ARM::BI__builtin_arm_crc32cb:
4561     CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
4562   case ARM::BI__builtin_arm_crc32h:
4563     CRCIntrinsicID = Intrinsic::arm_crc32h; break;
4564   case ARM::BI__builtin_arm_crc32ch:
4565     CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
4566   case ARM::BI__builtin_arm_crc32w:
4567   case ARM::BI__builtin_arm_crc32d:
4568     CRCIntrinsicID = Intrinsic::arm_crc32w; break;
4569   case ARM::BI__builtin_arm_crc32cw:
4570   case ARM::BI__builtin_arm_crc32cd:
4571     CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
4572   }
4573
4574   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
4575     Value *Arg0 = EmitScalarExpr(E->getArg(0));
4576     Value *Arg1 = EmitScalarExpr(E->getArg(1));
4577
4578     // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w
4579     // intrinsics, hence we need different codegen for these cases.
4580     if (BuiltinID == ARM::BI__builtin_arm_crc32d ||
4581         BuiltinID == ARM::BI__builtin_arm_crc32cd) {
4582       Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4583       Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
4584       Value *Arg1b = Builder.CreateLShr(Arg1, C1);
4585       Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
4586
4587       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4588       Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
4589       return Builder.CreateCall(F, {Res, Arg1b});
4590     } else {
4591       Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
4592
4593       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4594       return Builder.CreateCall(F, {Arg0, Arg1});
4595     }
4596   }
4597
4598   if (BuiltinID == ARM::BI__builtin_arm_rsr ||
4599       BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4600       BuiltinID == ARM::BI__builtin_arm_rsrp ||
4601       BuiltinID == ARM::BI__builtin_arm_wsr ||
4602       BuiltinID == ARM::BI__builtin_arm_wsr64 ||
4603       BuiltinID == ARM::BI__builtin_arm_wsrp) {
4604
4605     bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr ||
4606                   BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4607                   BuiltinID == ARM::BI__builtin_arm_rsrp;
4608
4609     bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
4610                             BuiltinID == ARM::BI__builtin_arm_wsrp;
4611
4612     bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4613                    BuiltinID == ARM::BI__builtin_arm_wsr64;
4614
4615     llvm::Type *ValueType;
4616     llvm::Type *RegisterType;
4617     if (IsPointerBuiltin) {
4618       ValueType = VoidPtrTy;
4619       RegisterType = Int32Ty;
4620     } else if (Is64Bit) {
4621       ValueType = RegisterType = Int64Ty;
4622     } else {
4623       ValueType = RegisterType = Int32Ty;
4624     }
4625
4626     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
4627   }
4628
4629   // Find out if any arguments are required to be integer constant
4630   // expressions.
4631   unsigned ICEArguments = 0;
4632   ASTContext::GetBuiltinTypeError Error;
4633   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
4634   assert(Error == ASTContext::GE_None && "Should not codegen an error");
4635
4636   auto getAlignmentValue32 = [&](Address addr) -> Value* {
4637     return Builder.getInt32(addr.getAlignment().getQuantity());
4638   };
4639
4640   Address PtrOp0 = Address::invalid();
4641   Address PtrOp1 = Address::invalid();
4642   SmallVector<Value*, 4> Ops;
4643   bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
4644   unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
4645   for (unsigned i = 0, e = NumArgs; i != e; i++) {
4646     if (i == 0) {
4647       switch (BuiltinID) {
4648       case NEON::BI__builtin_neon_vld1_v:
4649       case NEON::BI__builtin_neon_vld1q_v:
4650       case NEON::BI__builtin_neon_vld1q_lane_v:
4651       case NEON::BI__builtin_neon_vld1_lane_v:
4652       case NEON::BI__builtin_neon_vld1_dup_v:
4653       case NEON::BI__builtin_neon_vld1q_dup_v:
4654       case NEON::BI__builtin_neon_vst1_v:
4655       case NEON::BI__builtin_neon_vst1q_v:
4656       case NEON::BI__builtin_neon_vst1q_lane_v:
4657       case NEON::BI__builtin_neon_vst1_lane_v:
4658       case NEON::BI__builtin_neon_vst2_v:
4659       case NEON::BI__builtin_neon_vst2q_v:
4660       case NEON::BI__builtin_neon_vst2_lane_v:
4661       case NEON::BI__builtin_neon_vst2q_lane_v:
4662       case NEON::BI__builtin_neon_vst3_v:
4663       case NEON::BI__builtin_neon_vst3q_v:
4664       case NEON::BI__builtin_neon_vst3_lane_v:
4665       case NEON::BI__builtin_neon_vst3q_lane_v:
4666       case NEON::BI__builtin_neon_vst4_v:
4667       case NEON::BI__builtin_neon_vst4q_v:
4668       case NEON::BI__builtin_neon_vst4_lane_v:
4669       case NEON::BI__builtin_neon_vst4q_lane_v:
4670         // Get the alignment for the argument in addition to the value;
4671         // we'll use it later.
4672         PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
4673         Ops.push_back(PtrOp0.getPointer());
4674         continue;
4675       }
4676     }
4677     if (i == 1) {
4678       switch (BuiltinID) {
4679       case NEON::BI__builtin_neon_vld2_v:
4680       case NEON::BI__builtin_neon_vld2q_v:
4681       case NEON::BI__builtin_neon_vld3_v:
4682       case NEON::BI__builtin_neon_vld3q_v:
4683       case NEON::BI__builtin_neon_vld4_v:
4684       case NEON::BI__builtin_neon_vld4q_v:
4685       case NEON::BI__builtin_neon_vld2_lane_v:
4686       case NEON::BI__builtin_neon_vld2q_lane_v:
4687       case NEON::BI__builtin_neon_vld3_lane_v:
4688       case NEON::BI__builtin_neon_vld3q_lane_v:
4689       case NEON::BI__builtin_neon_vld4_lane_v:
4690       case NEON::BI__builtin_neon_vld4q_lane_v:
4691       case NEON::BI__builtin_neon_vld2_dup_v:
4692       case NEON::BI__builtin_neon_vld3_dup_v:
4693       case NEON::BI__builtin_neon_vld4_dup_v:
4694         // Get the alignment for the argument in addition to the value;
4695         // we'll use it later.
4696         PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
4697         Ops.push_back(PtrOp1.getPointer());
4698         continue;
4699       }
4700     }
4701
4702     if ((ICEArguments & (1 << i)) == 0) {
4703       Ops.push_back(EmitScalarExpr(E->getArg(i)));
4704     } else {
4705       // If this is required to be a constant, constant fold it so that we know
4706       // that the generated intrinsic gets a ConstantInt.
4707       llvm::APSInt Result;
4708       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
4709       assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
4710       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
4711     }
4712   }
4713
4714   switch (BuiltinID) {
4715   default: break;
4716
4717   case NEON::BI__builtin_neon_vget_lane_i8:
4718   case NEON::BI__builtin_neon_vget_lane_i16:
4719   case NEON::BI__builtin_neon_vget_lane_i32:
4720   case NEON::BI__builtin_neon_vget_lane_i64:
4721   case NEON::BI__builtin_neon_vget_lane_f32:
4722   case NEON::BI__builtin_neon_vgetq_lane_i8:
4723   case NEON::BI__builtin_neon_vgetq_lane_i16:
4724   case NEON::BI__builtin_neon_vgetq_lane_i32:
4725   case NEON::BI__builtin_neon_vgetq_lane_i64:
4726   case NEON::BI__builtin_neon_vgetq_lane_f32:
4727     return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
4728
4729   case NEON::BI__builtin_neon_vset_lane_i8:
4730   case NEON::BI__builtin_neon_vset_lane_i16:
4731   case NEON::BI__builtin_neon_vset_lane_i32:
4732   case NEON::BI__builtin_neon_vset_lane_i64:
4733   case NEON::BI__builtin_neon_vset_lane_f32:
4734   case NEON::BI__builtin_neon_vsetq_lane_i8:
4735   case NEON::BI__builtin_neon_vsetq_lane_i16:
4736   case NEON::BI__builtin_neon_vsetq_lane_i32:
4737   case NEON::BI__builtin_neon_vsetq_lane_i64:
4738   case NEON::BI__builtin_neon_vsetq_lane_f32:
4739     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
4740
4741   case NEON::BI__builtin_neon_vsha1h_u32:
4742     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
4743                         "vsha1h");
4744   case NEON::BI__builtin_neon_vsha1cq_u32:
4745     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
4746                         "vsha1h");
4747   case NEON::BI__builtin_neon_vsha1pq_u32:
4748     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
4749                         "vsha1h");
4750   case NEON::BI__builtin_neon_vsha1mq_u32:
4751     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
4752                         "vsha1h");
4753
4754   // The ARM _MoveToCoprocessor builtins put the input register value as
4755   // the first argument, but the LLVM intrinsic expects it as the third one.
4756   case ARM::BI_MoveToCoprocessor:
4757   case ARM::BI_MoveToCoprocessor2: {
4758     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
4759                                    Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
4760     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
4761                                   Ops[3], Ops[4], Ops[5]});
4762   }
4763   case ARM::BI_BitScanForward:
4764   case ARM::BI_BitScanForward64:
4765     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
4766   case ARM::BI_BitScanReverse:
4767   case ARM::BI_BitScanReverse64:
4768     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
4769
4770   case ARM::BI_InterlockedAnd64:
4771     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
4772   case ARM::BI_InterlockedExchange64:
4773     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
4774   case ARM::BI_InterlockedExchangeAdd64:
4775     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
4776   case ARM::BI_InterlockedExchangeSub64:
4777     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
4778   case ARM::BI_InterlockedOr64:
4779     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
4780   case ARM::BI_InterlockedXor64:
4781     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
4782   case ARM::BI_InterlockedDecrement64:
4783     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
4784   case ARM::BI_InterlockedIncrement64:
4785     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
4786   }
4787
4788   // Get the last argument, which specifies the vector type.
4789   assert(HasExtraArg);
4790   llvm::APSInt Result;
4791   const Expr *Arg = E->getArg(E->getNumArgs()-1);
4792   if (!Arg->isIntegerConstantExpr(Result, getContext()))
4793     return nullptr;
4794
4795   if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
4796       BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
4797     // Determine the overloaded type of this builtin.
4798     llvm::Type *Ty;
4799     if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
4800       Ty = FloatTy;
4801     else
4802       Ty = DoubleTy;
4803
4804     // Determine whether this is an unsigned conversion or not.
4805     bool usgn = Result.getZExtValue() == 1;
4806     unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
4807
4808     // Call the appropriate intrinsic.
4809     Function *F = CGM.getIntrinsic(Int, Ty);
4810     return Builder.CreateCall(F, Ops, "vcvtr");
4811   }
4812
4813   // Determine the type of this overloaded NEON intrinsic.
4814   NeonTypeFlags Type(Result.getZExtValue());
4815   bool usgn = Type.isUnsigned();
4816   bool rightShift = false;
4817
4818   llvm::VectorType *VTy = GetNeonType(this, Type);
4819   llvm::Type *Ty = VTy;
4820   if (!Ty)
4821     return nullptr;
4822
4823   // Many NEON builtins have identical semantics and uses in ARM and
4824   // AArch64. Emit these in a single function.
4825   auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap);
4826   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
4827       IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
4828   if (Builtin)
4829     return EmitCommonNeonBuiltinExpr(
4830         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
4831         Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1);
4832
4833   unsigned Int;
4834   switch (BuiltinID) {
4835   default: return nullptr;
4836   case NEON::BI__builtin_neon_vld1q_lane_v:
4837     // Handle 64-bit integer elements as a special case.  Use shuffles of
4838     // one-element vectors to avoid poor code for i64 in the backend.
4839     if (VTy->getElementType()->isIntegerTy(64)) {
4840       // Extract the other lane.
4841       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4842       uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
4843       Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
4844       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
4845       // Load the value as a one-element vector.
4846       Ty = llvm::VectorType::get(VTy->getElementType(), 1);
4847       llvm::Type *Tys[] = {Ty, Int8PtrTy};
4848       Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
4849       Value *Align = getAlignmentValue32(PtrOp0);
4850       Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
4851       // Combine them.
4852       uint32_t Indices[] = {1 - Lane, Lane};
4853       SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
4854       return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
4855     }
4856     // fall through
4857   case NEON::BI__builtin_neon_vld1_lane_v: {
4858     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4859     PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
4860     Value *Ld = Builder.CreateLoad(PtrOp0);
4861     return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
4862   }
4863   case NEON::BI__builtin_neon_vld2_dup_v:
4864   case NEON::BI__builtin_neon_vld3_dup_v:
4865   case NEON::BI__builtin_neon_vld4_dup_v: {
4866     // Handle 64-bit elements as a special-case.  There is no "dup" needed.
4867     if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
4868       switch (BuiltinID) {
4869       case NEON::BI__builtin_neon_vld2_dup_v:
4870         Int = Intrinsic::arm_neon_vld2;
4871         break;
4872       case NEON::BI__builtin_neon_vld3_dup_v:
4873         Int = Intrinsic::arm_neon_vld3;
4874         break;
4875       case NEON::BI__builtin_neon_vld4_dup_v:
4876         Int = Intrinsic::arm_neon_vld4;
4877         break;
4878       default: llvm_unreachable("unknown vld_dup intrinsic?");
4879       }
4880       llvm::Type *Tys[] = {Ty, Int8PtrTy};
4881       Function *F = CGM.getIntrinsic(Int, Tys);
4882       llvm::Value *Align = getAlignmentValue32(PtrOp1);
4883       Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup");
4884       Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4885       Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4886       return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4887     }
4888     switch (BuiltinID) {
4889     case NEON::BI__builtin_neon_vld2_dup_v:
4890       Int = Intrinsic::arm_neon_vld2lane;
4891       break;
4892     case NEON::BI__builtin_neon_vld3_dup_v:
4893       Int = Intrinsic::arm_neon_vld3lane;
4894       break;
4895     case NEON::BI__builtin_neon_vld4_dup_v:
4896       Int = Intrinsic::arm_neon_vld4lane;
4897       break;
4898     default: llvm_unreachable("unknown vld_dup intrinsic?");
4899     }
4900     llvm::Type *Tys[] = {Ty, Int8PtrTy};
4901     Function *F = CGM.getIntrinsic(Int, Tys);
4902     llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
4903
4904     SmallVector<Value*, 6> Args;
4905     Args.push_back(Ops[1]);
4906     Args.append(STy->getNumElements(), UndefValue::get(Ty));
4907
4908     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
4909     Args.push_back(CI);
4910     Args.push_back(getAlignmentValue32(PtrOp1));
4911
4912     Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
4913     // splat lane 0 to all elts in each vector of the result.
4914     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
4915       Value *Val = Builder.CreateExtractValue(Ops[1], i);
4916       Value *Elt = Builder.CreateBitCast(Val, Ty);
4917       Elt = EmitNeonSplat(Elt, CI);
4918       Elt = Builder.CreateBitCast(Elt, Val->getType());
4919       Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
4920     }
4921     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4922     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4923     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4924   }
4925   case NEON::BI__builtin_neon_vqrshrn_n_v:
4926     Int =
4927       usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
4928     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
4929                         1, true);
4930   case NEON::BI__builtin_neon_vqrshrun_n_v:
4931     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
4932                         Ops, "vqrshrun_n", 1, true);
4933   case NEON::BI__builtin_neon_vqshrn_n_v:
4934     Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
4935     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
4936                         1, true);
4937   case NEON::BI__builtin_neon_vqshrun_n_v:
4938     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
4939                         Ops, "vqshrun_n", 1, true);
4940   case NEON::BI__builtin_neon_vrecpe_v:
4941   case NEON::BI__builtin_neon_vrecpeq_v:
4942     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
4943                         Ops, "vrecpe");
4944   case NEON::BI__builtin_neon_vrshrn_n_v:
4945     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
4946                         Ops, "vrshrn_n", 1, true);
4947   case NEON::BI__builtin_neon_vrsra_n_v:
4948   case NEON::BI__builtin_neon_vrsraq_n_v:
4949     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4950     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4951     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
4952     Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
4953     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
4954     return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
4955   case NEON::BI__builtin_neon_vsri_n_v:
4956   case NEON::BI__builtin_neon_vsriq_n_v:
4957     rightShift = true;
4958   case NEON::BI__builtin_neon_vsli_n_v:
4959   case NEON::BI__builtin_neon_vsliq_n_v:
4960     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
4961     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
4962                         Ops, "vsli_n");
4963   case NEON::BI__builtin_neon_vsra_n_v:
4964   case NEON::BI__builtin_neon_vsraq_n_v:
4965     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4966     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
4967     return Builder.CreateAdd(Ops[0], Ops[1]);
4968   case NEON::BI__builtin_neon_vst1q_lane_v:
4969     // Handle 64-bit integer elements as a special case.  Use a shuffle to get
4970     // a one-element vector and avoid poor code for i64 in the backend.
4971     if (VTy->getElementType()->isIntegerTy(64)) {
4972       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4973       Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
4974       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
4975       Ops[2] = getAlignmentValue32(PtrOp0);
4976       llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
4977       return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
4978                                                  Tys), Ops);
4979     }
4980     // fall through
4981   case NEON::BI__builtin_neon_vst1_lane_v: {
4982     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4983     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
4984     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4985     auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty));
4986     return St;
4987   }
4988   case NEON::BI__builtin_neon_vtbl1_v:
4989     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
4990                         Ops, "vtbl1");
4991   case NEON::BI__builtin_neon_vtbl2_v:
4992     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
4993                         Ops, "vtbl2");
4994   case NEON::BI__builtin_neon_vtbl3_v:
4995     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
4996                         Ops, "vtbl3");
4997   case NEON::BI__builtin_neon_vtbl4_v:
4998     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
4999                         Ops, "vtbl4");
5000   case NEON::BI__builtin_neon_vtbx1_v:
5001     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
5002                         Ops, "vtbx1");
5003   case NEON::BI__builtin_neon_vtbx2_v:
5004     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
5005                         Ops, "vtbx2");
5006   case NEON::BI__builtin_neon_vtbx3_v:
5007     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
5008                         Ops, "vtbx3");
5009   case NEON::BI__builtin_neon_vtbx4_v:
5010     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
5011                         Ops, "vtbx4");
5012   }
5013 }
5014
5015 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
5016                                       const CallExpr *E,
5017                                       SmallVectorImpl<Value *> &Ops) {
5018   unsigned int Int = 0;
5019   const char *s = nullptr;
5020
5021   switch (BuiltinID) {
5022   default:
5023     return nullptr;
5024   case NEON::BI__builtin_neon_vtbl1_v:
5025   case NEON::BI__builtin_neon_vqtbl1_v:
5026   case NEON::BI__builtin_neon_vqtbl1q_v:
5027   case NEON::BI__builtin_neon_vtbl2_v:
5028   case NEON::BI__builtin_neon_vqtbl2_v:
5029   case NEON::BI__builtin_neon_vqtbl2q_v:
5030   case NEON::BI__builtin_neon_vtbl3_v:
5031   case NEON::BI__builtin_neon_vqtbl3_v:
5032   case NEON::BI__builtin_neon_vqtbl3q_v:
5033   case NEON::BI__builtin_neon_vtbl4_v:
5034   case NEON::BI__builtin_neon_vqtbl4_v:
5035   case NEON::BI__builtin_neon_vqtbl4q_v:
5036     break;
5037   case NEON::BI__builtin_neon_vtbx1_v:
5038   case NEON::BI__builtin_neon_vqtbx1_v:
5039   case NEON::BI__builtin_neon_vqtbx1q_v:
5040   case NEON::BI__builtin_neon_vtbx2_v:
5041   case NEON::BI__builtin_neon_vqtbx2_v:
5042   case NEON::BI__builtin_neon_vqtbx2q_v:
5043   case NEON::BI__builtin_neon_vtbx3_v:
5044   case NEON::BI__builtin_neon_vqtbx3_v:
5045   case NEON::BI__builtin_neon_vqtbx3q_v:
5046   case NEON::BI__builtin_neon_vtbx4_v:
5047   case NEON::BI__builtin_neon_vqtbx4_v:
5048   case NEON::BI__builtin_neon_vqtbx4q_v:
5049     break;
5050   }
5051
5052   assert(E->getNumArgs() >= 3);
5053
5054   // Get the last argument, which specifies the vector type.
5055   llvm::APSInt Result;
5056   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
5057   if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
5058     return nullptr;
5059
5060   // Determine the type of this overloaded NEON intrinsic.
5061   NeonTypeFlags Type(Result.getZExtValue());
5062   llvm::VectorType *Ty = GetNeonType(&CGF, Type);
5063   if (!Ty)
5064     return nullptr;
5065
5066   CodeGen::CGBuilderTy &Builder = CGF.Builder;
5067
5068   // AArch64 scalar builtins are not overloaded, they do not have an extra
5069   // argument that specifies the vector type, need to handle each case.
5070   switch (BuiltinID) {
5071   case NEON::BI__builtin_neon_vtbl1_v: {
5072     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr,
5073                               Ops[1], Ty, Intrinsic::aarch64_neon_tbl1,
5074                               "vtbl1");
5075   }
5076   case NEON::BI__builtin_neon_vtbl2_v: {
5077     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr,
5078                               Ops[2], Ty, Intrinsic::aarch64_neon_tbl1,
5079                               "vtbl1");
5080   }
5081   case NEON::BI__builtin_neon_vtbl3_v: {
5082     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr,
5083                               Ops[3], Ty, Intrinsic::aarch64_neon_tbl2,
5084                               "vtbl2");
5085   }
5086   case NEON::BI__builtin_neon_vtbl4_v: {
5087     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr,
5088                               Ops[4], Ty, Intrinsic::aarch64_neon_tbl2,
5089                               "vtbl2");
5090   }
5091   case NEON::BI__builtin_neon_vtbx1_v: {
5092     Value *TblRes =
5093         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2],
5094                            Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
5095
5096     llvm::Constant *EightV = ConstantInt::get(Ty, 8);
5097     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
5098     CmpRes = Builder.CreateSExt(CmpRes, Ty);
5099
5100     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
5101     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
5102     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
5103   }
5104   case NEON::BI__builtin_neon_vtbx2_v: {
5105     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0],
5106                               Ops[3], Ty, Intrinsic::aarch64_neon_tbx1,
5107                               "vtbx1");
5108   }
5109   case NEON::BI__builtin_neon_vtbx3_v: {
5110     Value *TblRes =
5111         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4],
5112                            Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
5113
5114     llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
5115     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
5116                                            TwentyFourV);
5117     CmpRes = Builder.CreateSExt(CmpRes, Ty);
5118
5119     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
5120     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
5121     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
5122   }
5123   case NEON::BI__builtin_neon_vtbx4_v: {
5124     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0],
5125                               Ops[5], Ty, Intrinsic::aarch64_neon_tbx2,
5126                               "vtbx2");
5127   }
5128   case NEON::BI__builtin_neon_vqtbl1_v:
5129   case NEON::BI__builtin_neon_vqtbl1q_v:
5130     Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
5131   case NEON::BI__builtin_neon_vqtbl2_v:
5132   case NEON::BI__builtin_neon_vqtbl2q_v: {
5133     Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
5134   case NEON::BI__builtin_neon_vqtbl3_v:
5135   case NEON::BI__builtin_neon_vqtbl3q_v:
5136     Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
5137   case NEON::BI__builtin_neon_vqtbl4_v:
5138   case NEON::BI__builtin_neon_vqtbl4q_v:
5139     Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
5140   case NEON::BI__builtin_neon_vqtbx1_v:
5141   case NEON::BI__builtin_neon_vqtbx1q_v:
5142     Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
5143   case NEON::BI__builtin_neon_vqtbx2_v:
5144   case NEON::BI__builtin_neon_vqtbx2q_v:
5145     Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
5146   case NEON::BI__builtin_neon_vqtbx3_v:
5147   case NEON::BI__builtin_neon_vqtbx3q_v:
5148     Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
5149   case NEON::BI__builtin_neon_vqtbx4_v:
5150   case NEON::BI__builtin_neon_vqtbx4q_v:
5151     Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
5152   }
5153   }
5154
5155   if (!Int)
5156     return nullptr;
5157
5158   Function *F = CGF.CGM.getIntrinsic(Int, Ty);
5159   return CGF.EmitNeonCall(F, Ops, s);
5160 }
5161
5162 Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
5163   llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4);
5164   Op = Builder.CreateBitCast(Op, Int16Ty);
5165   Value *V = UndefValue::get(VTy);
5166   llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
5167   Op = Builder.CreateInsertElement(V, Op, CI);
5168   return Op;
5169 }
5170
5171 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
5172                                                const CallExpr *E) {
5173   unsigned HintID = static_cast<unsigned>(-1);
5174   switch (BuiltinID) {
5175   default: break;
5176   case AArch64::BI__builtin_arm_nop:
5177     HintID = 0;
5178     break;
5179   case AArch64::BI__builtin_arm_yield:
5180     HintID = 1;
5181     break;
5182   case AArch64::BI__builtin_arm_wfe:
5183     HintID = 2;
5184     break;
5185   case AArch64::BI__builtin_arm_wfi:
5186     HintID = 3;
5187     break;
5188   case AArch64::BI__builtin_arm_sev:
5189     HintID = 4;
5190     break;
5191   case AArch64::BI__builtin_arm_sevl:
5192     HintID = 5;
5193     break;
5194   }
5195
5196   if (HintID != static_cast<unsigned>(-1)) {
5197     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
5198     return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
5199   }
5200
5201   if (BuiltinID == AArch64::BI__builtin_arm_prefetch) {
5202     Value *Address         = EmitScalarExpr(E->getArg(0));
5203     Value *RW              = EmitScalarExpr(E->getArg(1));
5204     Value *CacheLevel      = EmitScalarExpr(E->getArg(2));
5205     Value *RetentionPolicy = EmitScalarExpr(E->getArg(3));
5206     Value *IsData          = EmitScalarExpr(E->getArg(4));
5207
5208     Value *Locality = nullptr;
5209     if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) {
5210       // Temporal fetch, needs to convert cache level to locality.
5211       Locality = llvm::ConstantInt::get(Int32Ty,
5212         -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3);
5213     } else {
5214       // Streaming fetch.
5215       Locality = llvm::ConstantInt::get(Int32Ty, 0);
5216     }
5217
5218     // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify
5219     // PLDL3STRM or PLDL2STRM.
5220     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
5221     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
5222   }
5223
5224   if (BuiltinID == AArch64::BI__builtin_arm_rbit) {
5225     assert((getContext().getTypeSize(E->getType()) == 32) &&
5226            "rbit of unusual size!");
5227     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5228     return Builder.CreateCall(
5229         CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit");
5230   }
5231   if (BuiltinID == AArch64::BI__builtin_arm_rbit64) {
5232     assert((getContext().getTypeSize(E->getType()) == 64) &&
5233            "rbit of unusual size!");
5234     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5235     return Builder.CreateCall(
5236         CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit");
5237   }
5238
5239   if (BuiltinID == AArch64::BI__clear_cache) {
5240     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
5241     const FunctionDecl *FD = E->getDirectCallee();
5242     Value *Ops[2];
5243     for (unsigned i = 0; i < 2; i++)
5244       Ops[i] = EmitScalarExpr(E->getArg(i));
5245     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
5246     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
5247     StringRef Name = FD->getName();
5248     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
5249   }
5250
5251   if ((BuiltinID == AArch64::BI__builtin_arm_ldrex ||
5252       BuiltinID == AArch64::BI__builtin_arm_ldaex) &&
5253       getContext().getTypeSize(E->getType()) == 128) {
5254     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
5255                                        ? Intrinsic::aarch64_ldaxp
5256                                        : Intrinsic::aarch64_ldxp);
5257
5258     Value *LdPtr = EmitScalarExpr(E->getArg(0));
5259     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
5260                                     "ldxp");
5261
5262     Value *Val0 = Builder.CreateExtractValue(Val, 1);
5263     Value *Val1 = Builder.CreateExtractValue(Val, 0);
5264     llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
5265     Val0 = Builder.CreateZExt(Val0, Int128Ty);
5266     Val1 = Builder.CreateZExt(Val1, Int128Ty);
5267
5268     Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
5269     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
5270     Val = Builder.CreateOr(Val, Val1);
5271     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
5272   } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex ||
5273              BuiltinID == AArch64::BI__builtin_arm_ldaex) {
5274     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
5275
5276     QualType Ty = E->getType();
5277     llvm::Type *RealResTy = ConvertType(Ty);
5278     llvm::Type *PtrTy = llvm::IntegerType::get(
5279         getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
5280     LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
5281
5282     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
5283                                        ? Intrinsic::aarch64_ldaxr
5284                                        : Intrinsic::aarch64_ldxr,
5285                                    PtrTy);
5286     Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
5287
5288     if (RealResTy->isPointerTy())
5289       return Builder.CreateIntToPtr(Val, RealResTy);
5290
5291     llvm::Type *IntResTy = llvm::IntegerType::get(
5292         getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
5293     Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
5294     return Builder.CreateBitCast(Val, RealResTy);
5295   }
5296
5297   if ((BuiltinID == AArch64::BI__builtin_arm_strex ||
5298        BuiltinID == AArch64::BI__builtin_arm_stlex) &&
5299       getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
5300     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
5301                                        ? Intrinsic::aarch64_stlxp
5302                                        : Intrinsic::aarch64_stxp);
5303     llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty, nullptr);
5304
5305     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
5306     EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
5307
5308     Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy));
5309     llvm::Value *Val = Builder.CreateLoad(Tmp);
5310
5311     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
5312     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
5313     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
5314                                          Int8PtrTy);
5315     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
5316   }
5317
5318   if (BuiltinID == AArch64::BI__builtin_arm_strex ||
5319       BuiltinID == AArch64::BI__builtin_arm_stlex) {
5320     Value *StoreVal = EmitScalarExpr(E->getArg(0));
5321     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
5322
5323     QualType Ty = E->getArg(0)->getType();
5324     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
5325                                                  getContext().getTypeSize(Ty));
5326     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
5327
5328     if (StoreVal->getType()->isPointerTy())
5329       StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
5330     else {
5331       llvm::Type *IntTy = llvm::IntegerType::get(
5332           getLLVMContext(),
5333           CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
5334       StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
5335       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
5336     }
5337
5338     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
5339                                        ? Intrinsic::aarch64_stlxr
5340                                        : Intrinsic::aarch64_stxr,
5341                                    StoreAddr->getType());
5342     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
5343   }
5344
5345   if (BuiltinID == AArch64::BI__builtin_arm_clrex) {
5346     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
5347     return Builder.CreateCall(F);
5348   }
5349
5350   // CRC32
5351   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
5352   switch (BuiltinID) {
5353   case AArch64::BI__builtin_arm_crc32b:
5354     CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
5355   case AArch64::BI__builtin_arm_crc32cb:
5356     CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
5357   case AArch64::BI__builtin_arm_crc32h:
5358     CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
5359   case AArch64::BI__builtin_arm_crc32ch:
5360     CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
5361   case AArch64::BI__builtin_arm_crc32w:
5362     CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
5363   case AArch64::BI__builtin_arm_crc32cw:
5364     CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
5365   case AArch64::BI__builtin_arm_crc32d:
5366     CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
5367   case AArch64::BI__builtin_arm_crc32cd:
5368     CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
5369   }
5370
5371   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
5372     Value *Arg0 = EmitScalarExpr(E->getArg(0));
5373     Value *Arg1 = EmitScalarExpr(E->getArg(1));
5374     Function *F = CGM.getIntrinsic(CRCIntrinsicID);
5375
5376     llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
5377     Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
5378
5379     return Builder.CreateCall(F, {Arg0, Arg1});
5380   }
5381
5382   if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
5383       BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
5384       BuiltinID == AArch64::BI__builtin_arm_rsrp ||
5385       BuiltinID == AArch64::BI__builtin_arm_wsr ||
5386       BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
5387       BuiltinID == AArch64::BI__builtin_arm_wsrp) {
5388
5389     bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr ||
5390                   BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
5391                   BuiltinID == AArch64::BI__builtin_arm_rsrp;
5392
5393     bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp ||
5394                             BuiltinID == AArch64::BI__builtin_arm_wsrp;
5395
5396     bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr &&
5397                    BuiltinID != AArch64::BI__builtin_arm_wsr;
5398
5399     llvm::Type *ValueType;
5400     llvm::Type *RegisterType = Int64Ty;
5401     if (IsPointerBuiltin) {
5402       ValueType = VoidPtrTy;
5403     } else if (Is64Bit) {
5404       ValueType = Int64Ty;
5405     } else {
5406       ValueType = Int32Ty;
5407     }
5408
5409     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
5410   }
5411
5412   // Find out if any arguments are required to be integer constant
5413   // expressions.
5414   unsigned ICEArguments = 0;
5415   ASTContext::GetBuiltinTypeError Error;
5416   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5417   assert(Error == ASTContext::GE_None && "Should not codegen an error");
5418
5419   llvm::SmallVector<Value*, 4> Ops;
5420   for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
5421     if ((ICEArguments & (1 << i)) == 0) {
5422       Ops.push_back(EmitScalarExpr(E->getArg(i)));
5423     } else {
5424       // If this is required to be a constant, constant fold it so that we know
5425       // that the generated intrinsic gets a ConstantInt.
5426       llvm::APSInt Result;
5427       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
5428       assert(IsConst && "Constant arg isn't actually constant?");
5429       (void)IsConst;
5430       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
5431     }
5432   }
5433
5434   auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap);
5435   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
5436       SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
5437
5438   if (Builtin) {
5439     Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
5440     Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
5441     assert(Result && "SISD intrinsic should have been handled");
5442     return Result;
5443   }
5444
5445   llvm::APSInt Result;
5446   const Expr *Arg = E->getArg(E->getNumArgs()-1);
5447   NeonTypeFlags Type(0);
5448   if (Arg->isIntegerConstantExpr(Result, getContext()))
5449     // Determine the type of this overloaded NEON intrinsic.
5450     Type = NeonTypeFlags(Result.getZExtValue());
5451
5452   bool usgn = Type.isUnsigned();
5453   bool quad = Type.isQuad();
5454
5455   // Handle non-overloaded intrinsics first.
5456   switch (BuiltinID) {
5457   default: break;
5458   case NEON::BI__builtin_neon_vldrq_p128: {
5459     llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
5460     llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0);
5461     Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
5462     return Builder.CreateAlignedLoad(Int128Ty, Ptr,
5463                                      CharUnits::fromQuantity(16));
5464   }
5465   case NEON::BI__builtin_neon_vstrq_p128: {
5466     llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
5467     Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
5468     return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
5469   }
5470   case NEON::BI__builtin_neon_vcvts_u32_f32:
5471   case NEON::BI__builtin_neon_vcvtd_u64_f64:
5472     usgn = true;
5473     // FALL THROUGH
5474   case NEON::BI__builtin_neon_vcvts_s32_f32:
5475   case NEON::BI__builtin_neon_vcvtd_s64_f64: {
5476     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5477     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5478     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5479     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5480     Ops[0] = Builder.CreateBitCast(Ops[0], FTy);
5481     if (usgn)
5482       return Builder.CreateFPToUI(Ops[0], InTy);
5483     return Builder.CreateFPToSI(Ops[0], InTy);
5484   }
5485   case NEON::BI__builtin_neon_vcvts_f32_u32:
5486   case NEON::BI__builtin_neon_vcvtd_f64_u64:
5487     usgn = true;
5488     // FALL THROUGH
5489   case NEON::BI__builtin_neon_vcvts_f32_s32:
5490   case NEON::BI__builtin_neon_vcvtd_f64_s64: {
5491     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5492     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5493     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5494     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5495     Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
5496     if (usgn)
5497       return Builder.CreateUIToFP(Ops[0], FTy);
5498     return Builder.CreateSIToFP(Ops[0], FTy);
5499   }
5500   case NEON::BI__builtin_neon_vpaddd_s64: {
5501     llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
5502     Value *Vec = EmitScalarExpr(E->getArg(0));
5503     // The vector is v2f64, so make sure it's bitcast to that.
5504     Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
5505     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5506     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5507     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5508     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5509     // Pairwise addition of a v2f64 into a scalar f64.
5510     return Builder.CreateAdd(Op0, Op1, "vpaddd");
5511   }
5512   case NEON::BI__builtin_neon_vpaddd_f64: {
5513     llvm::Type *Ty =
5514       llvm::VectorType::get(DoubleTy, 2);
5515     Value *Vec = EmitScalarExpr(E->getArg(0));
5516     // The vector is v2f64, so make sure it's bitcast to that.
5517     Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
5518     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5519     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5520     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5521     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5522     // Pairwise addition of a v2f64 into a scalar f64.
5523     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5524   }
5525   case NEON::BI__builtin_neon_vpadds_f32: {
5526     llvm::Type *Ty =
5527       llvm::VectorType::get(FloatTy, 2);
5528     Value *Vec = EmitScalarExpr(E->getArg(0));
5529     // The vector is v2f32, so make sure it's bitcast to that.
5530     Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
5531     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5532     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5533     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5534     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5535     // Pairwise addition of a v2f32 into a scalar f32.
5536     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5537   }
5538   case NEON::BI__builtin_neon_vceqzd_s64:
5539   case NEON::BI__builtin_neon_vceqzd_f64:
5540   case NEON::BI__builtin_neon_vceqzs_f32:
5541     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5542     return EmitAArch64CompareBuiltinExpr(
5543         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5544         ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
5545   case NEON::BI__builtin_neon_vcgezd_s64:
5546   case NEON::BI__builtin_neon_vcgezd_f64:
5547   case NEON::BI__builtin_neon_vcgezs_f32:
5548     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5549     return EmitAArch64CompareBuiltinExpr(
5550         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5551         ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
5552   case NEON::BI__builtin_neon_vclezd_s64:
5553   case NEON::BI__builtin_neon_vclezd_f64:
5554   case NEON::BI__builtin_neon_vclezs_f32:
5555     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5556     return EmitAArch64CompareBuiltinExpr(
5557         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5558         ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
5559   case NEON::BI__builtin_neon_vcgtzd_s64:
5560   case NEON::BI__builtin_neon_vcgtzd_f64:
5561   case NEON::BI__builtin_neon_vcgtzs_f32:
5562     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5563     return EmitAArch64CompareBuiltinExpr(
5564         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5565         ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
5566   case NEON::BI__builtin_neon_vcltzd_s64:
5567   case NEON::BI__builtin_neon_vcltzd_f64:
5568   case NEON::BI__builtin_neon_vcltzs_f32:
5569     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5570     return EmitAArch64CompareBuiltinExpr(
5571         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5572         ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
5573
5574   case NEON::BI__builtin_neon_vceqzd_u64: {
5575     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5576     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5577     Ops[0] =
5578         Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
5579     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
5580   }
5581   case NEON::BI__builtin_neon_vceqd_f64:
5582   case NEON::BI__builtin_neon_vcled_f64:
5583   case NEON::BI__builtin_neon_vcltd_f64:
5584   case NEON::BI__builtin_neon_vcged_f64:
5585   case NEON::BI__builtin_neon_vcgtd_f64: {
5586     llvm::CmpInst::Predicate P;
5587     switch (BuiltinID) {
5588     default: llvm_unreachable("missing builtin ID in switch!");
5589     case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
5590     case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
5591     case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
5592     case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
5593     case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
5594     }
5595     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5596     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5597     Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
5598     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5599     return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
5600   }
5601   case NEON::BI__builtin_neon_vceqs_f32:
5602   case NEON::BI__builtin_neon_vcles_f32:
5603   case NEON::BI__builtin_neon_vclts_f32:
5604   case NEON::BI__builtin_neon_vcges_f32:
5605   case NEON::BI__builtin_neon_vcgts_f32: {
5606     llvm::CmpInst::Predicate P;
5607     switch (BuiltinID) {
5608     default: llvm_unreachable("missing builtin ID in switch!");
5609     case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
5610     case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
5611     case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
5612     case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
5613     case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
5614     }
5615     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5616     Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
5617     Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
5618     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5619     return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
5620   }
5621   case NEON::BI__builtin_neon_vceqd_s64:
5622   case NEON::BI__builtin_neon_vceqd_u64:
5623   case NEON::BI__builtin_neon_vcgtd_s64:
5624   case NEON::BI__builtin_neon_vcgtd_u64:
5625   case NEON::BI__builtin_neon_vcltd_s64:
5626   case NEON::BI__builtin_neon_vcltd_u64:
5627   case NEON::BI__builtin_neon_vcged_u64:
5628   case NEON::BI__builtin_neon_vcged_s64:
5629   case NEON::BI__builtin_neon_vcled_u64:
5630   case NEON::BI__builtin_neon_vcled_s64: {
5631     llvm::CmpInst::Predicate P;
5632     switch (BuiltinID) {
5633     default: llvm_unreachable("missing builtin ID in switch!");
5634     case NEON::BI__builtin_neon_vceqd_s64:
5635     case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
5636     case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
5637     case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
5638     case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
5639     case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
5640     case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
5641     case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
5642     case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
5643     case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
5644     }
5645     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5646     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5647     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5648     Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
5649     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
5650   }
5651   case NEON::BI__builtin_neon_vtstd_s64:
5652   case NEON::BI__builtin_neon_vtstd_u64: {
5653     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5654     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5655     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5656     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
5657     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
5658                                 llvm::Constant::getNullValue(Int64Ty));
5659     return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
5660   }
5661   case NEON::BI__builtin_neon_vset_lane_i8:
5662   case NEON::BI__builtin_neon_vset_lane_i16:
5663   case NEON::BI__builtin_neon_vset_lane_i32:
5664   case NEON::BI__builtin_neon_vset_lane_i64:
5665   case NEON::BI__builtin_neon_vset_lane_f32:
5666   case NEON::BI__builtin_neon_vsetq_lane_i8:
5667   case NEON::BI__builtin_neon_vsetq_lane_i16:
5668   case NEON::BI__builtin_neon_vsetq_lane_i32:
5669   case NEON::BI__builtin_neon_vsetq_lane_i64:
5670   case NEON::BI__builtin_neon_vsetq_lane_f32:
5671     Ops.push_back(EmitScalarExpr(E->getArg(2)));
5672     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5673   case NEON::BI__builtin_neon_vset_lane_f64:
5674     // The vector type needs a cast for the v1f64 variant.
5675     Ops[1] = Builder.CreateBitCast(Ops[1],
5676                                    llvm::VectorType::get(DoubleTy, 1));
5677     Ops.push_back(EmitScalarExpr(E->getArg(2)));
5678     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5679   case NEON::BI__builtin_neon_vsetq_lane_f64:
5680     // The vector type needs a cast for the v2f64 variant.
5681     Ops[1] = Builder.CreateBitCast(Ops[1],
5682         llvm::VectorType::get(DoubleTy, 2));
5683     Ops.push_back(EmitScalarExpr(E->getArg(2)));
5684     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5685
5686   case NEON::BI__builtin_neon_vget_lane_i8:
5687   case NEON::BI__builtin_neon_vdupb_lane_i8:
5688     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8));
5689     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5690                                         "vget_lane");
5691   case NEON::BI__builtin_neon_vgetq_lane_i8:
5692   case NEON::BI__builtin_neon_vdupb_laneq_i8:
5693     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16));
5694     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5695                                         "vgetq_lane");
5696   case NEON::BI__builtin_neon_vget_lane_i16:
5697   case NEON::BI__builtin_neon_vduph_lane_i16:
5698     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4));
5699     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5700                                         "vget_lane");
5701   case NEON::BI__builtin_neon_vgetq_lane_i16:
5702   case NEON::BI__builtin_neon_vduph_laneq_i16:
5703     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8));
5704     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5705                                         "vgetq_lane");
5706   case NEON::BI__builtin_neon_vget_lane_i32:
5707   case NEON::BI__builtin_neon_vdups_lane_i32:
5708     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2));
5709     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5710                                         "vget_lane");
5711   case NEON::BI__builtin_neon_vdups_lane_f32:
5712     Ops[0] = Builder.CreateBitCast(Ops[0],
5713         llvm::VectorType::get(FloatTy, 2));
5714     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5715                                         "vdups_lane");
5716   case NEON::BI__builtin_neon_vgetq_lane_i32:
5717   case NEON::BI__builtin_neon_vdups_laneq_i32:
5718     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
5719     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5720                                         "vgetq_lane");
5721   case NEON::BI__builtin_neon_vget_lane_i64:
5722   case NEON::BI__builtin_neon_vdupd_lane_i64:
5723     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1));
5724     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5725                                         "vget_lane");
5726   case NEON::BI__builtin_neon_vdupd_lane_f64:
5727     Ops[0] = Builder.CreateBitCast(Ops[0],
5728         llvm::VectorType::get(DoubleTy, 1));
5729     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5730                                         "vdupd_lane");
5731   case NEON::BI__builtin_neon_vgetq_lane_i64:
5732   case NEON::BI__builtin_neon_vdupd_laneq_i64:
5733     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
5734     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5735                                         "vgetq_lane");
5736   case NEON::BI__builtin_neon_vget_lane_f32:
5737     Ops[0] = Builder.CreateBitCast(Ops[0],
5738         llvm::VectorType::get(FloatTy, 2));
5739     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5740                                         "vget_lane");
5741   case NEON::BI__builtin_neon_vget_lane_f64:
5742     Ops[0] = Builder.CreateBitCast(Ops[0],
5743         llvm::VectorType::get(DoubleTy, 1));
5744     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5745                                         "vget_lane");
5746   case NEON::BI__builtin_neon_vgetq_lane_f32:
5747   case NEON::BI__builtin_neon_vdups_laneq_f32:
5748     Ops[0] = Builder.CreateBitCast(Ops[0],
5749         llvm::VectorType::get(FloatTy, 4));
5750     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5751                                         "vgetq_lane");
5752   case NEON::BI__builtin_neon_vgetq_lane_f64:
5753   case NEON::BI__builtin_neon_vdupd_laneq_f64:
5754     Ops[0] = Builder.CreateBitCast(Ops[0],
5755         llvm::VectorType::get(DoubleTy, 2));
5756     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5757                                         "vgetq_lane");
5758   case NEON::BI__builtin_neon_vaddd_s64:
5759   case NEON::BI__builtin_neon_vaddd_u64:
5760     return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
5761   case NEON::BI__builtin_neon_vsubd_s64:
5762   case NEON::BI__builtin_neon_vsubd_u64:
5763     return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
5764   case NEON::BI__builtin_neon_vqdmlalh_s16:
5765   case NEON::BI__builtin_neon_vqdmlslh_s16: {
5766     SmallVector<Value *, 2> ProductOps;
5767     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5768     ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
5769     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
5770     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5771                           ProductOps, "vqdmlXl");
5772     Constant *CI = ConstantInt::get(SizeTy, 0);
5773     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5774
5775     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
5776                                         ? Intrinsic::aarch64_neon_sqadd
5777                                         : Intrinsic::aarch64_neon_sqsub;
5778     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
5779   }
5780   case NEON::BI__builtin_neon_vqshlud_n_s64: {
5781     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5782     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5783     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
5784                         Ops, "vqshlu_n");
5785   }
5786   case NEON::BI__builtin_neon_vqshld_n_u64:
5787   case NEON::BI__builtin_neon_vqshld_n_s64: {
5788     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
5789                                    ? Intrinsic::aarch64_neon_uqshl
5790                                    : Intrinsic::aarch64_neon_sqshl;
5791     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5792     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5793     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
5794   }
5795   case NEON::BI__builtin_neon_vrshrd_n_u64:
5796   case NEON::BI__builtin_neon_vrshrd_n_s64: {
5797     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
5798                                    ? Intrinsic::aarch64_neon_urshl
5799                                    : Intrinsic::aarch64_neon_srshl;
5800     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5801     int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
5802     Ops[1] = ConstantInt::get(Int64Ty, -SV);
5803     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
5804   }
5805   case NEON::BI__builtin_neon_vrsrad_n_u64:
5806   case NEON::BI__builtin_neon_vrsrad_n_s64: {
5807     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
5808                                    ? Intrinsic::aarch64_neon_urshl
5809                                    : Intrinsic::aarch64_neon_srshl;
5810     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5811     Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
5812     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
5813                                 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
5814     return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
5815   }
5816   case NEON::BI__builtin_neon_vshld_n_s64:
5817   case NEON::BI__builtin_neon_vshld_n_u64: {
5818     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5819     return Builder.CreateShl(
5820         Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
5821   }
5822   case NEON::BI__builtin_neon_vshrd_n_s64: {
5823     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5824     return Builder.CreateAShr(
5825         Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
5826                                                    Amt->getZExtValue())),
5827         "shrd_n");
5828   }
5829   case NEON::BI__builtin_neon_vshrd_n_u64: {
5830     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5831     uint64_t ShiftAmt = Amt->getZExtValue();
5832     // Right-shifting an unsigned value by its size yields 0.
5833     if (ShiftAmt == 64)
5834       return ConstantInt::get(Int64Ty, 0);
5835     return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
5836                               "shrd_n");
5837   }
5838   case NEON::BI__builtin_neon_vsrad_n_s64: {
5839     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
5840     Ops[1] = Builder.CreateAShr(
5841         Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
5842                                                    Amt->getZExtValue())),
5843         "shrd_n");
5844     return Builder.CreateAdd(Ops[0], Ops[1]);
5845   }
5846   case NEON::BI__builtin_neon_vsrad_n_u64: {
5847     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
5848     uint64_t ShiftAmt = Amt->getZExtValue();
5849     // Right-shifting an unsigned value by its size yields 0.
5850     // As Op + 0 = Op, return Ops[0] directly.
5851     if (ShiftAmt == 64)
5852       return Ops[0];
5853     Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
5854                                 "shrd_n");
5855     return Builder.CreateAdd(Ops[0], Ops[1]);
5856   }
5857   case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
5858   case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
5859   case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
5860   case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
5861     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
5862                                           "lane");
5863     SmallVector<Value *, 2> ProductOps;
5864     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5865     ProductOps.push_back(vectorWrapScalar16(Ops[2]));
5866     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
5867     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5868                           ProductOps, "vqdmlXl");
5869     Constant *CI = ConstantInt::get(SizeTy, 0);
5870     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5871     Ops.pop_back();
5872
5873     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
5874                        BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
5875                           ? Intrinsic::aarch64_neon_sqadd
5876                           : Intrinsic::aarch64_neon_sqsub;
5877     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
5878   }
5879   case NEON::BI__builtin_neon_vqdmlals_s32:
5880   case NEON::BI__builtin_neon_vqdmlsls_s32: {
5881     SmallVector<Value *, 2> ProductOps;
5882     ProductOps.push_back(Ops[1]);
5883     ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
5884     Ops[1] =
5885         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
5886                      ProductOps, "vqdmlXl");
5887
5888     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
5889                                         ? Intrinsic::aarch64_neon_sqadd
5890                                         : Intrinsic::aarch64_neon_sqsub;
5891     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
5892   }
5893   case NEON::BI__builtin_neon_vqdmlals_lane_s32:
5894   case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
5895   case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
5896   case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
5897     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
5898                                           "lane");
5899     SmallVector<Value *, 2> ProductOps;
5900     ProductOps.push_back(Ops[1]);
5901     ProductOps.push_back(Ops[2]);
5902     Ops[1] =
5903         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
5904                      ProductOps, "vqdmlXl");
5905     Ops.pop_back();
5906
5907     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
5908                        BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
5909                           ? Intrinsic::aarch64_neon_sqadd
5910                           : Intrinsic::aarch64_neon_sqsub;
5911     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
5912   }
5913   }
5914
5915   llvm::VectorType *VTy = GetNeonType(this, Type);
5916   llvm::Type *Ty = VTy;
5917   if (!Ty)
5918     return nullptr;
5919
5920   // Not all intrinsics handled by the common case work for AArch64 yet, so only
5921   // defer to common code if it's been added to our special map.
5922   Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
5923                                    AArch64SIMDIntrinsicsProvenSorted);
5924
5925   if (Builtin)
5926     return EmitCommonNeonBuiltinExpr(
5927         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
5928         Builtin->NameHint, Builtin->TypeModifier, E, Ops,
5929         /*never use addresses*/ Address::invalid(), Address::invalid());
5930
5931   if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops))
5932     return V;
5933
5934   unsigned Int;
5935   switch (BuiltinID) {
5936   default: return nullptr;
5937   case NEON::BI__builtin_neon_vbsl_v:
5938   case NEON::BI__builtin_neon_vbslq_v: {
5939     llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
5940     Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
5941     Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
5942     Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
5943
5944     Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
5945     Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
5946     Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
5947     return Builder.CreateBitCast(Ops[0], Ty);
5948   }
5949   case NEON::BI__builtin_neon_vfma_lane_v:
5950   case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
5951     // The ARM builtins (and instructions) have the addend as the first
5952     // operand, but the 'fma' intrinsics have it last. Swap it around here.
5953     Value *Addend = Ops[0];
5954     Value *Multiplicand = Ops[1];
5955     Value *LaneSource = Ops[2];
5956     Ops[0] = Multiplicand;
5957     Ops[1] = LaneSource;
5958     Ops[2] = Addend;
5959
5960     // Now adjust things to handle the lane access.
5961     llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ?
5962       llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) :
5963       VTy;
5964     llvm::Constant *cst = cast<Constant>(Ops[3]);
5965     Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst);
5966     Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
5967     Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
5968
5969     Ops.pop_back();
5970     Int = Intrinsic::fma;
5971     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
5972   }
5973   case NEON::BI__builtin_neon_vfma_laneq_v: {
5974     llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
5975     // v1f64 fma should be mapped to Neon scalar f64 fma
5976     if (VTy && VTy->getElementType() == DoubleTy) {
5977       Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5978       Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
5979       llvm::Type *VTy = GetNeonType(this,
5980         NeonTypeFlags(NeonTypeFlags::Float64, false, true));
5981       Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
5982       Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
5983       Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
5984       Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
5985       return Builder.CreateBitCast(Result, Ty);
5986     }
5987     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
5988     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5989     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5990
5991     llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
5992                                             VTy->getNumElements() * 2);
5993     Ops[2] = Builder.CreateBitCast(Ops[2], STy);
5994     Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
5995                                                cast<ConstantInt>(Ops[3]));
5996     Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
5997
5998     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
5999   }
6000   case NEON::BI__builtin_neon_vfmaq_laneq_v: {
6001     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6002     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6003     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6004
6005     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6006     Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
6007     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
6008   }
6009   case NEON::BI__builtin_neon_vfmas_lane_f32:
6010   case NEON::BI__builtin_neon_vfmas_laneq_f32:
6011   case NEON::BI__builtin_neon_vfmad_lane_f64:
6012   case NEON::BI__builtin_neon_vfmad_laneq_f64: {
6013     Ops.push_back(EmitScalarExpr(E->getArg(3)));
6014     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
6015     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6016     Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6017     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
6018   }
6019   case NEON::BI__builtin_neon_vmull_v:
6020     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6021     Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
6022     if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
6023     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
6024   case NEON::BI__builtin_neon_vmax_v:
6025   case NEON::BI__builtin_neon_vmaxq_v:
6026     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6027     Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
6028     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
6029     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
6030   case NEON::BI__builtin_neon_vmin_v:
6031   case NEON::BI__builtin_neon_vminq_v:
6032     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6033     Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
6034     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
6035     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
6036   case NEON::BI__builtin_neon_vabd_v:
6037   case NEON::BI__builtin_neon_vabdq_v:
6038     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6039     Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
6040     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
6041     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
6042   case NEON::BI__builtin_neon_vpadal_v:
6043   case NEON::BI__builtin_neon_vpadalq_v: {
6044     unsigned ArgElts = VTy->getNumElements();
6045     llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
6046     unsigned BitWidth = EltTy->getBitWidth();
6047     llvm::Type *ArgTy = llvm::VectorType::get(
6048         llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts);
6049     llvm::Type* Tys[2] = { VTy, ArgTy };
6050     Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
6051     SmallVector<llvm::Value*, 1> TmpOps;
6052     TmpOps.push_back(Ops[1]);
6053     Function *F = CGM.getIntrinsic(Int, Tys);
6054     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
6055     llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
6056     return Builder.CreateAdd(tmp, addend);
6057   }
6058   case NEON::BI__builtin_neon_vpmin_v:
6059   case NEON::BI__builtin_neon_vpminq_v:
6060     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6061     Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
6062     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
6063     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
6064   case NEON::BI__builtin_neon_vpmax_v:
6065   case NEON::BI__builtin_neon_vpmaxq_v:
6066     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6067     Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
6068     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
6069     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
6070   case NEON::BI__builtin_neon_vminnm_v:
6071   case NEON::BI__builtin_neon_vminnmq_v:
6072     Int = Intrinsic::aarch64_neon_fminnm;
6073     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
6074   case NEON::BI__builtin_neon_vmaxnm_v:
6075   case NEON::BI__builtin_neon_vmaxnmq_v:
6076     Int = Intrinsic::aarch64_neon_fmaxnm;
6077     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
6078   case NEON::BI__builtin_neon_vrecpss_f32: {
6079     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6080     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
6081                         Ops, "vrecps");
6082   }
6083   case NEON::BI__builtin_neon_vrecpsd_f64: {
6084     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6085     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
6086                         Ops, "vrecps");
6087   }
6088   case NEON::BI__builtin_neon_vqshrun_n_v:
6089     Int = Intrinsic::aarch64_neon_sqshrun;
6090     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
6091   case NEON::BI__builtin_neon_vqrshrun_n_v:
6092     Int = Intrinsic::aarch64_neon_sqrshrun;
6093     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
6094   case NEON::BI__builtin_neon_vqshrn_n_v:
6095     Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
6096     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
6097   case NEON::BI__builtin_neon_vrshrn_n_v:
6098     Int = Intrinsic::aarch64_neon_rshrn;
6099     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
6100   case NEON::BI__builtin_neon_vqrshrn_n_v:
6101     Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
6102     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
6103   case NEON::BI__builtin_neon_vrnda_v:
6104   case NEON::BI__builtin_neon_vrndaq_v: {
6105     Int = Intrinsic::round;
6106     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
6107   }
6108   case NEON::BI__builtin_neon_vrndi_v:
6109   case NEON::BI__builtin_neon_vrndiq_v: {
6110     Int = Intrinsic::nearbyint;
6111     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi");
6112   }
6113   case NEON::BI__builtin_neon_vrndm_v:
6114   case NEON::BI__builtin_neon_vrndmq_v: {
6115     Int = Intrinsic::floor;
6116     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
6117   }
6118   case NEON::BI__builtin_neon_vrndn_v:
6119   case NEON::BI__builtin_neon_vrndnq_v: {
6120     Int = Intrinsic::aarch64_neon_frintn;
6121     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
6122   }
6123   case NEON::BI__builtin_neon_vrndp_v:
6124   case NEON::BI__builtin_neon_vrndpq_v: {
6125     Int = Intrinsic::ceil;
6126     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
6127   }
6128   case NEON::BI__builtin_neon_vrndx_v:
6129   case NEON::BI__builtin_neon_vrndxq_v: {
6130     Int = Intrinsic::rint;
6131     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
6132   }
6133   case NEON::BI__builtin_neon_vrnd_v:
6134   case NEON::BI__builtin_neon_vrndq_v: {
6135     Int = Intrinsic::trunc;
6136     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
6137   }
6138   case NEON::BI__builtin_neon_vceqz_v:
6139   case NEON::BI__builtin_neon_vceqzq_v:
6140     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
6141                                          ICmpInst::ICMP_EQ, "vceqz");
6142   case NEON::BI__builtin_neon_vcgez_v:
6143   case NEON::BI__builtin_neon_vcgezq_v:
6144     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
6145                                          ICmpInst::ICMP_SGE, "vcgez");
6146   case NEON::BI__builtin_neon_vclez_v:
6147   case NEON::BI__builtin_neon_vclezq_v:
6148     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
6149                                          ICmpInst::ICMP_SLE, "vclez");
6150   case NEON::BI__builtin_neon_vcgtz_v:
6151   case NEON::BI__builtin_neon_vcgtzq_v:
6152     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
6153                                          ICmpInst::ICMP_SGT, "vcgtz");
6154   case NEON::BI__builtin_neon_vcltz_v:
6155   case NEON::BI__builtin_neon_vcltzq_v:
6156     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
6157                                          ICmpInst::ICMP_SLT, "vcltz");
6158   case NEON::BI__builtin_neon_vcvt_f64_v:
6159   case NEON::BI__builtin_neon_vcvtq_f64_v:
6160     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6161     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
6162     return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
6163                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
6164   case NEON::BI__builtin_neon_vcvt_f64_f32: {
6165     assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
6166            "unexpected vcvt_f64_f32 builtin");
6167     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
6168     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6169
6170     return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
6171   }
6172   case NEON::BI__builtin_neon_vcvt_f32_f64: {
6173     assert(Type.getEltType() == NeonTypeFlags::Float32 &&
6174            "unexpected vcvt_f32_f64 builtin");
6175     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
6176     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6177
6178     return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
6179   }
6180   case NEON::BI__builtin_neon_vcvt_s32_v:
6181   case NEON::BI__builtin_neon_vcvt_u32_v:
6182   case NEON::BI__builtin_neon_vcvt_s64_v:
6183   case NEON::BI__builtin_neon_vcvt_u64_v:
6184   case NEON::BI__builtin_neon_vcvtq_s32_v:
6185   case NEON::BI__builtin_neon_vcvtq_u32_v:
6186   case NEON::BI__builtin_neon_vcvtq_s64_v:
6187   case NEON::BI__builtin_neon_vcvtq_u64_v: {
6188     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
6189     if (usgn)
6190       return Builder.CreateFPToUI(Ops[0], Ty);
6191     return Builder.CreateFPToSI(Ops[0], Ty);
6192   }
6193   case NEON::BI__builtin_neon_vcvta_s32_v:
6194   case NEON::BI__builtin_neon_vcvtaq_s32_v:
6195   case NEON::BI__builtin_neon_vcvta_u32_v:
6196   case NEON::BI__builtin_neon_vcvtaq_u32_v:
6197   case NEON::BI__builtin_neon_vcvta_s64_v:
6198   case NEON::BI__builtin_neon_vcvtaq_s64_v:
6199   case NEON::BI__builtin_neon_vcvta_u64_v:
6200   case NEON::BI__builtin_neon_vcvtaq_u64_v: {
6201     Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
6202     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6203     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
6204   }
6205   case NEON::BI__builtin_neon_vcvtm_s32_v:
6206   case NEON::BI__builtin_neon_vcvtmq_s32_v:
6207   case NEON::BI__builtin_neon_vcvtm_u32_v:
6208   case NEON::BI__builtin_neon_vcvtmq_u32_v:
6209   case NEON::BI__builtin_neon_vcvtm_s64_v:
6210   case NEON::BI__builtin_neon_vcvtmq_s64_v:
6211   case NEON::BI__builtin_neon_vcvtm_u64_v:
6212   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
6213     Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
6214     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6215     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
6216   }
6217   case NEON::BI__builtin_neon_vcvtn_s32_v:
6218   case NEON::BI__builtin_neon_vcvtnq_s32_v:
6219   case NEON::BI__builtin_neon_vcvtn_u32_v:
6220   case NEON::BI__builtin_neon_vcvtnq_u32_v:
6221   case NEON::BI__builtin_neon_vcvtn_s64_v:
6222   case NEON::BI__builtin_neon_vcvtnq_s64_v:
6223   case NEON::BI__builtin_neon_vcvtn_u64_v:
6224   case NEON::BI__builtin_neon_vcvtnq_u64_v: {
6225     Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
6226     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6227     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
6228   }
6229   case NEON::BI__builtin_neon_vcvtp_s32_v:
6230   case NEON::BI__builtin_neon_vcvtpq_s32_v:
6231   case NEON::BI__builtin_neon_vcvtp_u32_v:
6232   case NEON::BI__builtin_neon_vcvtpq_u32_v:
6233   case NEON::BI__builtin_neon_vcvtp_s64_v:
6234   case NEON::BI__builtin_neon_vcvtpq_s64_v:
6235   case NEON::BI__builtin_neon_vcvtp_u64_v:
6236   case NEON::BI__builtin_neon_vcvtpq_u64_v: {
6237     Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
6238     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6239     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
6240   }
6241   case NEON::BI__builtin_neon_vmulx_v:
6242   case NEON::BI__builtin_neon_vmulxq_v: {
6243     Int = Intrinsic::aarch64_neon_fmulx;
6244     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
6245   }
6246   case NEON::BI__builtin_neon_vmul_lane_v:
6247   case NEON::BI__builtin_neon_vmul_laneq_v: {
6248     // v1f64 vmul_lane should be mapped to Neon scalar mul lane
6249     bool Quad = false;
6250     if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
6251       Quad = true;
6252     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6253     llvm::Type *VTy = GetNeonType(this,
6254       NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
6255     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6256     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
6257     Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
6258     return Builder.CreateBitCast(Result, Ty);
6259   }
6260   case NEON::BI__builtin_neon_vnegd_s64:
6261     return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
6262   case NEON::BI__builtin_neon_vpmaxnm_v:
6263   case NEON::BI__builtin_neon_vpmaxnmq_v: {
6264     Int = Intrinsic::aarch64_neon_fmaxnmp;
6265     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
6266   }
6267   case NEON::BI__builtin_neon_vpminnm_v:
6268   case NEON::BI__builtin_neon_vpminnmq_v: {
6269     Int = Intrinsic::aarch64_neon_fminnmp;
6270     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
6271   }
6272   case NEON::BI__builtin_neon_vsqrt_v:
6273   case NEON::BI__builtin_neon_vsqrtq_v: {
6274     Int = Intrinsic::sqrt;
6275     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6276     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
6277   }
6278   case NEON::BI__builtin_neon_vrbit_v:
6279   case NEON::BI__builtin_neon_vrbitq_v: {
6280     Int = Intrinsic::aarch64_neon_rbit;
6281     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
6282   }
6283   case NEON::BI__builtin_neon_vaddv_u8:
6284     // FIXME: These are handled by the AArch64 scalar code.
6285     usgn = true;
6286     // FALLTHROUGH
6287   case NEON::BI__builtin_neon_vaddv_s8: {
6288     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6289     Ty = Int32Ty;
6290     VTy = llvm::VectorType::get(Int8Ty, 8);
6291     llvm::Type *Tys[2] = { Ty, VTy };
6292     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6293     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6294     return Builder.CreateTrunc(Ops[0], Int8Ty);
6295   }
6296   case NEON::BI__builtin_neon_vaddv_u16:
6297     usgn = true;
6298     // FALLTHROUGH
6299   case NEON::BI__builtin_neon_vaddv_s16: {
6300     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6301     Ty = Int32Ty;
6302     VTy = llvm::VectorType::get(Int16Ty, 4);
6303     llvm::Type *Tys[2] = { Ty, VTy };
6304     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6305     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6306     return Builder.CreateTrunc(Ops[0], Int16Ty);
6307   }
6308   case NEON::BI__builtin_neon_vaddvq_u8:
6309     usgn = true;
6310     // FALLTHROUGH
6311   case NEON::BI__builtin_neon_vaddvq_s8: {
6312     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6313     Ty = Int32Ty;
6314     VTy = llvm::VectorType::get(Int8Ty, 16);
6315     llvm::Type *Tys[2] = { Ty, VTy };
6316     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6317     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6318     return Builder.CreateTrunc(Ops[0], Int8Ty);
6319   }
6320   case NEON::BI__builtin_neon_vaddvq_u16:
6321     usgn = true;
6322     // FALLTHROUGH
6323   case NEON::BI__builtin_neon_vaddvq_s16: {
6324     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6325     Ty = Int32Ty;
6326     VTy = llvm::VectorType::get(Int16Ty, 8);
6327     llvm::Type *Tys[2] = { Ty, VTy };
6328     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6329     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6330     return Builder.CreateTrunc(Ops[0], Int16Ty);
6331   }
6332   case NEON::BI__builtin_neon_vmaxv_u8: {
6333     Int = Intrinsic::aarch64_neon_umaxv;
6334     Ty = Int32Ty;
6335     VTy = llvm::VectorType::get(Int8Ty, 8);
6336     llvm::Type *Tys[2] = { Ty, VTy };
6337     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6338     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6339     return Builder.CreateTrunc(Ops[0], Int8Ty);
6340   }
6341   case NEON::BI__builtin_neon_vmaxv_u16: {
6342     Int = Intrinsic::aarch64_neon_umaxv;
6343     Ty = Int32Ty;
6344     VTy = llvm::VectorType::get(Int16Ty, 4);
6345     llvm::Type *Tys[2] = { Ty, VTy };
6346     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6347     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6348     return Builder.CreateTrunc(Ops[0], Int16Ty);
6349   }
6350   case NEON::BI__builtin_neon_vmaxvq_u8: {
6351     Int = Intrinsic::aarch64_neon_umaxv;
6352     Ty = Int32Ty;
6353     VTy = llvm::VectorType::get(Int8Ty, 16);
6354     llvm::Type *Tys[2] = { Ty, VTy };
6355     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6356     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6357     return Builder.CreateTrunc(Ops[0], Int8Ty);
6358   }
6359   case NEON::BI__builtin_neon_vmaxvq_u16: {
6360     Int = Intrinsic::aarch64_neon_umaxv;
6361     Ty = Int32Ty;
6362     VTy = llvm::VectorType::get(Int16Ty, 8);
6363     llvm::Type *Tys[2] = { Ty, VTy };
6364     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6365     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6366     return Builder.CreateTrunc(Ops[0], Int16Ty);
6367   }
6368   case NEON::BI__builtin_neon_vmaxv_s8: {
6369     Int = Intrinsic::aarch64_neon_smaxv;
6370     Ty = Int32Ty;
6371     VTy = llvm::VectorType::get(Int8Ty, 8);
6372     llvm::Type *Tys[2] = { Ty, VTy };
6373     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6374     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6375     return Builder.CreateTrunc(Ops[0], Int8Ty);
6376   }
6377   case NEON::BI__builtin_neon_vmaxv_s16: {
6378     Int = Intrinsic::aarch64_neon_smaxv;
6379     Ty = Int32Ty;
6380     VTy = llvm::VectorType::get(Int16Ty, 4);
6381     llvm::Type *Tys[2] = { Ty, VTy };
6382     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6383     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6384     return Builder.CreateTrunc(Ops[0], Int16Ty);
6385   }
6386   case NEON::BI__builtin_neon_vmaxvq_s8: {
6387     Int = Intrinsic::aarch64_neon_smaxv;
6388     Ty = Int32Ty;
6389     VTy = llvm::VectorType::get(Int8Ty, 16);
6390     llvm::Type *Tys[2] = { Ty, VTy };
6391     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6392     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6393     return Builder.CreateTrunc(Ops[0], Int8Ty);
6394   }
6395   case NEON::BI__builtin_neon_vmaxvq_s16: {
6396     Int = Intrinsic::aarch64_neon_smaxv;
6397     Ty = Int32Ty;
6398     VTy = llvm::VectorType::get(Int16Ty, 8);
6399     llvm::Type *Tys[2] = { Ty, VTy };
6400     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6401     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6402     return Builder.CreateTrunc(Ops[0], Int16Ty);
6403   }
6404   case NEON::BI__builtin_neon_vminv_u8: {
6405     Int = Intrinsic::aarch64_neon_uminv;
6406     Ty = Int32Ty;
6407     VTy = llvm::VectorType::get(Int8Ty, 8);
6408     llvm::Type *Tys[2] = { Ty, VTy };
6409     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6410     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6411     return Builder.CreateTrunc(Ops[0], Int8Ty);
6412   }
6413   case NEON::BI__builtin_neon_vminv_u16: {
6414     Int = Intrinsic::aarch64_neon_uminv;
6415     Ty = Int32Ty;
6416     VTy = llvm::VectorType::get(Int16Ty, 4);
6417     llvm::Type *Tys[2] = { Ty, VTy };
6418     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6419     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6420     return Builder.CreateTrunc(Ops[0], Int16Ty);
6421   }
6422   case NEON::BI__builtin_neon_vminvq_u8: {
6423     Int = Intrinsic::aarch64_neon_uminv;
6424     Ty = Int32Ty;
6425     VTy = llvm::VectorType::get(Int8Ty, 16);
6426     llvm::Type *Tys[2] = { Ty, VTy };
6427     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6428     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6429     return Builder.CreateTrunc(Ops[0], Int8Ty);
6430   }
6431   case NEON::BI__builtin_neon_vminvq_u16: {
6432     Int = Intrinsic::aarch64_neon_uminv;
6433     Ty = Int32Ty;
6434     VTy = llvm::VectorType::get(Int16Ty, 8);
6435     llvm::Type *Tys[2] = { Ty, VTy };
6436     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6437     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6438     return Builder.CreateTrunc(Ops[0], Int16Ty);
6439   }
6440   case NEON::BI__builtin_neon_vminv_s8: {
6441     Int = Intrinsic::aarch64_neon_sminv;
6442     Ty = Int32Ty;
6443     VTy = llvm::VectorType::get(Int8Ty, 8);
6444     llvm::Type *Tys[2] = { Ty, VTy };
6445     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6446     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6447     return Builder.CreateTrunc(Ops[0], Int8Ty);
6448   }
6449   case NEON::BI__builtin_neon_vminv_s16: {
6450     Int = Intrinsic::aarch64_neon_sminv;
6451     Ty = Int32Ty;
6452     VTy = llvm::VectorType::get(Int16Ty, 4);
6453     llvm::Type *Tys[2] = { Ty, VTy };
6454     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6455     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6456     return Builder.CreateTrunc(Ops[0], Int16Ty);
6457   }
6458   case NEON::BI__builtin_neon_vminvq_s8: {
6459     Int = Intrinsic::aarch64_neon_sminv;
6460     Ty = Int32Ty;
6461     VTy = llvm::VectorType::get(Int8Ty, 16);
6462     llvm::Type *Tys[2] = { Ty, VTy };
6463     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6464     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6465     return Builder.CreateTrunc(Ops[0], Int8Ty);
6466   }
6467   case NEON::BI__builtin_neon_vminvq_s16: {
6468     Int = Intrinsic::aarch64_neon_sminv;
6469     Ty = Int32Ty;
6470     VTy = llvm::VectorType::get(Int16Ty, 8);
6471     llvm::Type *Tys[2] = { Ty, VTy };
6472     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6473     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6474     return Builder.CreateTrunc(Ops[0], Int16Ty);
6475   }
6476   case NEON::BI__builtin_neon_vmul_n_f64: {
6477     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6478     Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
6479     return Builder.CreateFMul(Ops[0], RHS);
6480   }
6481   case NEON::BI__builtin_neon_vaddlv_u8: {
6482     Int = Intrinsic::aarch64_neon_uaddlv;
6483     Ty = Int32Ty;
6484     VTy = llvm::VectorType::get(Int8Ty, 8);
6485     llvm::Type *Tys[2] = { Ty, VTy };
6486     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6487     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6488     return Builder.CreateTrunc(Ops[0], Int16Ty);
6489   }
6490   case NEON::BI__builtin_neon_vaddlv_u16: {
6491     Int = Intrinsic::aarch64_neon_uaddlv;
6492     Ty = Int32Ty;
6493     VTy = llvm::VectorType::get(Int16Ty, 4);
6494     llvm::Type *Tys[2] = { Ty, VTy };
6495     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6496     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6497   }
6498   case NEON::BI__builtin_neon_vaddlvq_u8: {
6499     Int = Intrinsic::aarch64_neon_uaddlv;
6500     Ty = Int32Ty;
6501     VTy = llvm::VectorType::get(Int8Ty, 16);
6502     llvm::Type *Tys[2] = { Ty, VTy };
6503     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6504     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6505     return Builder.CreateTrunc(Ops[0], Int16Ty);
6506   }
6507   case NEON::BI__builtin_neon_vaddlvq_u16: {
6508     Int = Intrinsic::aarch64_neon_uaddlv;
6509     Ty = Int32Ty;
6510     VTy = llvm::VectorType::get(Int16Ty, 8);
6511     llvm::Type *Tys[2] = { Ty, VTy };
6512     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6513     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6514   }
6515   case NEON::BI__builtin_neon_vaddlv_s8: {
6516     Int = Intrinsic::aarch64_neon_saddlv;
6517     Ty = Int32Ty;
6518     VTy = llvm::VectorType::get(Int8Ty, 8);
6519     llvm::Type *Tys[2] = { Ty, VTy };
6520     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6521     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6522     return Builder.CreateTrunc(Ops[0], Int16Ty);
6523   }
6524   case NEON::BI__builtin_neon_vaddlv_s16: {
6525     Int = Intrinsic::aarch64_neon_saddlv;
6526     Ty = Int32Ty;
6527     VTy = llvm::VectorType::get(Int16Ty, 4);
6528     llvm::Type *Tys[2] = { Ty, VTy };
6529     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6530     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6531   }
6532   case NEON::BI__builtin_neon_vaddlvq_s8: {
6533     Int = Intrinsic::aarch64_neon_saddlv;
6534     Ty = Int32Ty;
6535     VTy = llvm::VectorType::get(Int8Ty, 16);
6536     llvm::Type *Tys[2] = { Ty, VTy };
6537     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6538     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6539     return Builder.CreateTrunc(Ops[0], Int16Ty);
6540   }
6541   case NEON::BI__builtin_neon_vaddlvq_s16: {
6542     Int = Intrinsic::aarch64_neon_saddlv;
6543     Ty = Int32Ty;
6544     VTy = llvm::VectorType::get(Int16Ty, 8);
6545     llvm::Type *Tys[2] = { Ty, VTy };
6546     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6547     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6548   }
6549   case NEON::BI__builtin_neon_vsri_n_v:
6550   case NEON::BI__builtin_neon_vsriq_n_v: {
6551     Int = Intrinsic::aarch64_neon_vsri;
6552     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6553     return EmitNeonCall(Intrin, Ops, "vsri_n");
6554   }
6555   case NEON::BI__builtin_neon_vsli_n_v:
6556   case NEON::BI__builtin_neon_vsliq_n_v: {
6557     Int = Intrinsic::aarch64_neon_vsli;
6558     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6559     return EmitNeonCall(Intrin, Ops, "vsli_n");
6560   }
6561   case NEON::BI__builtin_neon_vsra_n_v:
6562   case NEON::BI__builtin_neon_vsraq_n_v:
6563     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6564     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
6565     return Builder.CreateAdd(Ops[0], Ops[1]);
6566   case NEON::BI__builtin_neon_vrsra_n_v:
6567   case NEON::BI__builtin_neon_vrsraq_n_v: {
6568     Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
6569     SmallVector<llvm::Value*,2> TmpOps;
6570     TmpOps.push_back(Ops[1]);
6571     TmpOps.push_back(Ops[2]);
6572     Function* F = CGM.getIntrinsic(Int, Ty);
6573     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
6574     Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
6575     return Builder.CreateAdd(Ops[0], tmp);
6576   }
6577     // FIXME: Sharing loads & stores with 32-bit is complicated by the absence
6578     // of an Align parameter here.
6579   case NEON::BI__builtin_neon_vld1_x2_v:
6580   case NEON::BI__builtin_neon_vld1q_x2_v:
6581   case NEON::BI__builtin_neon_vld1_x3_v:
6582   case NEON::BI__builtin_neon_vld1q_x3_v:
6583   case NEON::BI__builtin_neon_vld1_x4_v:
6584   case NEON::BI__builtin_neon_vld1q_x4_v: {
6585     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
6586     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6587     llvm::Type *Tys[2] = { VTy, PTy };
6588     unsigned Int;
6589     switch (BuiltinID) {
6590     case NEON::BI__builtin_neon_vld1_x2_v:
6591     case NEON::BI__builtin_neon_vld1q_x2_v:
6592       Int = Intrinsic::aarch64_neon_ld1x2;
6593       break;
6594     case NEON::BI__builtin_neon_vld1_x3_v:
6595     case NEON::BI__builtin_neon_vld1q_x3_v:
6596       Int = Intrinsic::aarch64_neon_ld1x3;
6597       break;
6598     case NEON::BI__builtin_neon_vld1_x4_v:
6599     case NEON::BI__builtin_neon_vld1q_x4_v:
6600       Int = Intrinsic::aarch64_neon_ld1x4;
6601       break;
6602     }
6603     Function *F = CGM.getIntrinsic(Int, Tys);
6604     Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
6605     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6606     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6607     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6608   }
6609   case NEON::BI__builtin_neon_vst1_x2_v:
6610   case NEON::BI__builtin_neon_vst1q_x2_v:
6611   case NEON::BI__builtin_neon_vst1_x3_v:
6612   case NEON::BI__builtin_neon_vst1q_x3_v:
6613   case NEON::BI__builtin_neon_vst1_x4_v:
6614   case NEON::BI__builtin_neon_vst1q_x4_v: {
6615     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
6616     llvm::Type *Tys[2] = { VTy, PTy };
6617     unsigned Int;
6618     switch (BuiltinID) {
6619     case NEON::BI__builtin_neon_vst1_x2_v:
6620     case NEON::BI__builtin_neon_vst1q_x2_v:
6621       Int = Intrinsic::aarch64_neon_st1x2;
6622       break;
6623     case NEON::BI__builtin_neon_vst1_x3_v:
6624     case NEON::BI__builtin_neon_vst1q_x3_v:
6625       Int = Intrinsic::aarch64_neon_st1x3;
6626       break;
6627     case NEON::BI__builtin_neon_vst1_x4_v:
6628     case NEON::BI__builtin_neon_vst1q_x4_v:
6629       Int = Intrinsic::aarch64_neon_st1x4;
6630       break;
6631     }
6632     std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6633     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
6634   }
6635   case NEON::BI__builtin_neon_vld1_v:
6636   case NEON::BI__builtin_neon_vld1q_v: {
6637     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
6638     auto Alignment = CharUnits::fromQuantity(
6639         BuiltinID == NEON::BI__builtin_neon_vld1_v ? 8 : 16);
6640     return Builder.CreateAlignedLoad(VTy, Ops[0], Alignment);
6641   }
6642   case NEON::BI__builtin_neon_vst1_v:
6643   case NEON::BI__builtin_neon_vst1q_v:
6644     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
6645     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6646     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6647   case NEON::BI__builtin_neon_vld1_lane_v:
6648   case NEON::BI__builtin_neon_vld1q_lane_v: {
6649     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6650     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
6651     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6652     auto Alignment = CharUnits::fromQuantity(
6653         BuiltinID == NEON::BI__builtin_neon_vld1_lane_v ? 8 : 16);
6654     Ops[0] =
6655         Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
6656     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
6657   }
6658   case NEON::BI__builtin_neon_vld1_dup_v:
6659   case NEON::BI__builtin_neon_vld1q_dup_v: {
6660     Value *V = UndefValue::get(Ty);
6661     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
6662     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6663     auto Alignment = CharUnits::fromQuantity(
6664         BuiltinID == NEON::BI__builtin_neon_vld1_dup_v ? 8 : 16);
6665     Ops[0] =
6666         Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
6667     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
6668     Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
6669     return EmitNeonSplat(Ops[0], CI);
6670   }
6671   case NEON::BI__builtin_neon_vst1_lane_v:
6672   case NEON::BI__builtin_neon_vst1q_lane_v:
6673     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6674     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
6675     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6676     return Builder.CreateDefaultAlignedStore(Ops[1],
6677                                              Builder.CreateBitCast(Ops[0], Ty));
6678   case NEON::BI__builtin_neon_vld2_v:
6679   case NEON::BI__builtin_neon_vld2q_v: {
6680     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6681     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6682     llvm::Type *Tys[2] = { VTy, PTy };
6683     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
6684     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6685     Ops[0] = Builder.CreateBitCast(Ops[0],
6686                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6687     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6688   }
6689   case NEON::BI__builtin_neon_vld3_v:
6690   case NEON::BI__builtin_neon_vld3q_v: {
6691     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6692     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6693     llvm::Type *Tys[2] = { VTy, PTy };
6694     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
6695     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6696     Ops[0] = Builder.CreateBitCast(Ops[0],
6697                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6698     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6699   }
6700   case NEON::BI__builtin_neon_vld4_v:
6701   case NEON::BI__builtin_neon_vld4q_v: {
6702     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6703     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6704     llvm::Type *Tys[2] = { VTy, PTy };
6705     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
6706     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6707     Ops[0] = Builder.CreateBitCast(Ops[0],
6708                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6709     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6710   }
6711   case NEON::BI__builtin_neon_vld2_dup_v:
6712   case NEON::BI__builtin_neon_vld2q_dup_v: {
6713     llvm::Type *PTy =
6714       llvm::PointerType::getUnqual(VTy->getElementType());
6715     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6716     llvm::Type *Tys[2] = { VTy, PTy };
6717     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
6718     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6719     Ops[0] = Builder.CreateBitCast(Ops[0],
6720                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6721     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6722   }
6723   case NEON::BI__builtin_neon_vld3_dup_v:
6724   case NEON::BI__builtin_neon_vld3q_dup_v: {
6725     llvm::Type *PTy =
6726       llvm::PointerType::getUnqual(VTy->getElementType());
6727     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6728     llvm::Type *Tys[2] = { VTy, PTy };
6729     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
6730     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6731     Ops[0] = Builder.CreateBitCast(Ops[0],
6732                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6733     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6734   }
6735   case NEON::BI__builtin_neon_vld4_dup_v:
6736   case NEON::BI__builtin_neon_vld4q_dup_v: {
6737     llvm::Type *PTy =
6738       llvm::PointerType::getUnqual(VTy->getElementType());
6739     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6740     llvm::Type *Tys[2] = { VTy, PTy };
6741     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
6742     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6743     Ops[0] = Builder.CreateBitCast(Ops[0],
6744                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6745     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6746   }
6747   case NEON::BI__builtin_neon_vld2_lane_v:
6748   case NEON::BI__builtin_neon_vld2q_lane_v: {
6749     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6750     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
6751     Ops.push_back(Ops[1]);
6752     Ops.erase(Ops.begin()+1);
6753     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6754     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6755     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6756     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
6757     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6758     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6759     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6760   }
6761   case NEON::BI__builtin_neon_vld3_lane_v:
6762   case NEON::BI__builtin_neon_vld3q_lane_v: {
6763     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6764     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
6765     Ops.push_back(Ops[1]);
6766     Ops.erase(Ops.begin()+1);
6767     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6768     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6769     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6770     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
6771     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
6772     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6773     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6774     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6775   }
6776   case NEON::BI__builtin_neon_vld4_lane_v:
6777   case NEON::BI__builtin_neon_vld4q_lane_v: {
6778     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6779     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
6780     Ops.push_back(Ops[1]);
6781     Ops.erase(Ops.begin()+1);
6782     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6783     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6784     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6785     Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
6786     Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
6787     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane");
6788     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6789     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6790     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6791   }
6792   case NEON::BI__builtin_neon_vst2_v:
6793   case NEON::BI__builtin_neon_vst2q_v: {
6794     Ops.push_back(Ops[0]);
6795     Ops.erase(Ops.begin());
6796     llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
6797     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
6798                         Ops, "");
6799   }
6800   case NEON::BI__builtin_neon_vst2_lane_v:
6801   case NEON::BI__builtin_neon_vst2q_lane_v: {
6802     Ops.push_back(Ops[0]);
6803     Ops.erase(Ops.begin());
6804     Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
6805     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6806     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
6807                         Ops, "");
6808   }
6809   case NEON::BI__builtin_neon_vst3_v:
6810   case NEON::BI__builtin_neon_vst3q_v: {
6811     Ops.push_back(Ops[0]);
6812     Ops.erase(Ops.begin());
6813     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6814     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
6815                         Ops, "");
6816   }
6817   case NEON::BI__builtin_neon_vst3_lane_v:
6818   case NEON::BI__builtin_neon_vst3q_lane_v: {
6819     Ops.push_back(Ops[0]);
6820     Ops.erase(Ops.begin());
6821     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6822     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6823     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
6824                         Ops, "");
6825   }
6826   case NEON::BI__builtin_neon_vst4_v:
6827   case NEON::BI__builtin_neon_vst4q_v: {
6828     Ops.push_back(Ops[0]);
6829     Ops.erase(Ops.begin());
6830     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6831     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
6832                         Ops, "");
6833   }
6834   case NEON::BI__builtin_neon_vst4_lane_v:
6835   case NEON::BI__builtin_neon_vst4q_lane_v: {
6836     Ops.push_back(Ops[0]);
6837     Ops.erase(Ops.begin());
6838     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
6839     llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
6840     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
6841                         Ops, "");
6842   }
6843   case NEON::BI__builtin_neon_vtrn_v:
6844   case NEON::BI__builtin_neon_vtrnq_v: {
6845     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6846     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6847     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6848     Value *SV = nullptr;
6849
6850     for (unsigned vi = 0; vi != 2; ++vi) {
6851       SmallVector<uint32_t, 16> Indices;
6852       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6853         Indices.push_back(i+vi);
6854         Indices.push_back(i+e+vi);
6855       }
6856       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6857       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
6858       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6859     }
6860     return SV;
6861   }
6862   case NEON::BI__builtin_neon_vuzp_v:
6863   case NEON::BI__builtin_neon_vuzpq_v: {
6864     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6865     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6866     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6867     Value *SV = nullptr;
6868
6869     for (unsigned vi = 0; vi != 2; ++vi) {
6870       SmallVector<uint32_t, 16> Indices;
6871       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
6872         Indices.push_back(2*i+vi);
6873
6874       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6875       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
6876       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6877     }
6878     return SV;
6879   }
6880   case NEON::BI__builtin_neon_vzip_v:
6881   case NEON::BI__builtin_neon_vzipq_v: {
6882     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6883     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6884     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6885     Value *SV = nullptr;
6886
6887     for (unsigned vi = 0; vi != 2; ++vi) {
6888       SmallVector<uint32_t, 16> Indices;
6889       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6890         Indices.push_back((i + vi*e) >> 1);
6891         Indices.push_back(((i + vi*e) >> 1)+e);
6892       }
6893       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6894       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
6895       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6896     }
6897     return SV;
6898   }
6899   case NEON::BI__builtin_neon_vqtbl1q_v: {
6900     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
6901                         Ops, "vtbl1");
6902   }
6903   case NEON::BI__builtin_neon_vqtbl2q_v: {
6904     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
6905                         Ops, "vtbl2");
6906   }
6907   case NEON::BI__builtin_neon_vqtbl3q_v: {
6908     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
6909                         Ops, "vtbl3");
6910   }
6911   case NEON::BI__builtin_neon_vqtbl4q_v: {
6912     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
6913                         Ops, "vtbl4");
6914   }
6915   case NEON::BI__builtin_neon_vqtbx1q_v: {
6916     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
6917                         Ops, "vtbx1");
6918   }
6919   case NEON::BI__builtin_neon_vqtbx2q_v: {
6920     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
6921                         Ops, "vtbx2");
6922   }
6923   case NEON::BI__builtin_neon_vqtbx3q_v: {
6924     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
6925                         Ops, "vtbx3");
6926   }
6927   case NEON::BI__builtin_neon_vqtbx4q_v: {
6928     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
6929                         Ops, "vtbx4");
6930   }
6931   case NEON::BI__builtin_neon_vsqadd_v:
6932   case NEON::BI__builtin_neon_vsqaddq_v: {
6933     Int = Intrinsic::aarch64_neon_usqadd;
6934     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
6935   }
6936   case NEON::BI__builtin_neon_vuqadd_v:
6937   case NEON::BI__builtin_neon_vuqaddq_v: {
6938     Int = Intrinsic::aarch64_neon_suqadd;
6939     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
6940   }
6941   }
6942 }
6943
6944 llvm::Value *CodeGenFunction::
6945 BuildVector(ArrayRef<llvm::Value*> Ops) {
6946   assert((Ops.size() & (Ops.size() - 1)) == 0 &&
6947          "Not a power-of-two sized vector!");
6948   bool AllConstants = true;
6949   for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
6950     AllConstants &= isa<Constant>(Ops[i]);
6951
6952   // If this is a constant vector, create a ConstantVector.
6953   if (AllConstants) {
6954     SmallVector<llvm::Constant*, 16> CstOps;
6955     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
6956       CstOps.push_back(cast<Constant>(Ops[i]));
6957     return llvm::ConstantVector::get(CstOps);
6958   }
6959
6960   // Otherwise, insertelement the values to build the vector.
6961   Value *Result =
6962     llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
6963
6964   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
6965     Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
6966
6967   return Result;
6968 }
6969
6970 // Convert the mask from an integer type to a vector of i1.
6971 static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
6972                               unsigned NumElts) {
6973
6974   llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(),
6975                          cast<IntegerType>(Mask->getType())->getBitWidth());
6976   Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
6977
6978   // If we have less than 8 elements, then the starting mask was an i8 and
6979   // we need to extract down to the right number of elements.
6980   if (NumElts < 8) {
6981     uint32_t Indices[4];
6982     for (unsigned i = 0; i != NumElts; ++i)
6983       Indices[i] = i;
6984     MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec,
6985                                              makeArrayRef(Indices, NumElts),
6986                                              "extract");
6987   }
6988   return MaskVec;
6989 }
6990
6991 static Value *EmitX86MaskedStore(CodeGenFunction &CGF,
6992                                  SmallVectorImpl<Value *> &Ops,
6993                                  unsigned Align) {
6994   // Cast the pointer to right type.
6995   Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
6996                                llvm::PointerType::getUnqual(Ops[1]->getType()));
6997
6998   // If the mask is all ones just emit a regular store.
6999   if (const auto *C = dyn_cast<Constant>(Ops[2]))
7000     if (C->isAllOnesValue())
7001       return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align);
7002
7003   Value *MaskVec = getMaskVecValue(CGF, Ops[2],
7004                                    Ops[1]->getType()->getVectorNumElements());
7005
7006   return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec);
7007 }
7008
7009 static Value *EmitX86MaskedLoad(CodeGenFunction &CGF,
7010                                 SmallVectorImpl<Value *> &Ops, unsigned Align) {
7011   // Cast the pointer to right type.
7012   Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
7013                                llvm::PointerType::getUnqual(Ops[1]->getType()));
7014
7015   // If the mask is all ones just emit a regular store.
7016   if (const auto *C = dyn_cast<Constant>(Ops[2]))
7017     if (C->isAllOnesValue())
7018       return CGF.Builder.CreateAlignedLoad(Ops[0], Align);
7019
7020   Value *MaskVec = getMaskVecValue(CGF, Ops[2],
7021                                    Ops[1]->getType()->getVectorNumElements());
7022
7023   return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]);
7024 }
7025
7026 static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF,
7027                                         SmallVectorImpl<Value *> &Ops,
7028                                         llvm::Type *DstTy,
7029                                         unsigned SrcSizeInBits,
7030                                         unsigned Align) {
7031   // Load the subvector.
7032   Ops[0] = CGF.Builder.CreateAlignedLoad(Ops[0], Align);
7033
7034   // Create broadcast mask.
7035   unsigned NumDstElts = DstTy->getVectorNumElements();
7036   unsigned NumSrcElts = SrcSizeInBits / DstTy->getScalarSizeInBits();
7037
7038   SmallVector<uint32_t, 8> Mask;
7039   for (unsigned i = 0; i != NumDstElts; i += NumSrcElts)
7040     for (unsigned j = 0; j != NumSrcElts; ++j)
7041       Mask.push_back(j);
7042
7043   return CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], Mask, "subvecbcst");
7044 }
7045
7046 static Value *EmitX86Select(CodeGenFunction &CGF,
7047                             Value *Mask, Value *Op0, Value *Op1) {
7048
7049   // If the mask is all ones just return first argument.
7050   if (const auto *C = dyn_cast<Constant>(Mask))
7051     if (C->isAllOnesValue())
7052       return Op0;
7053
7054   Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements());
7055
7056   return CGF.Builder.CreateSelect(Mask, Op0, Op1);
7057 }
7058
7059 static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
7060                                    bool Signed, SmallVectorImpl<Value *> &Ops) {
7061   unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
7062   Value *Cmp;
7063
7064   if (CC == 3) {
7065     Cmp = Constant::getNullValue(
7066                        llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
7067   } else if (CC == 7) {
7068     Cmp = Constant::getAllOnesValue(
7069                        llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
7070   } else {
7071     ICmpInst::Predicate Pred;
7072     switch (CC) {
7073     default: llvm_unreachable("Unknown condition code");
7074     case 0: Pred = ICmpInst::ICMP_EQ;  break;
7075     case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
7076     case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
7077     case 4: Pred = ICmpInst::ICMP_NE;  break;
7078     case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
7079     case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
7080     }
7081     Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
7082   }
7083
7084   const auto *C = dyn_cast<Constant>(Ops.back());
7085   if (!C || !C->isAllOnesValue())
7086     Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), NumElts));
7087
7088   if (NumElts < 8) {
7089     uint32_t Indices[8];
7090     for (unsigned i = 0; i != NumElts; ++i)
7091       Indices[i] = i;
7092     for (unsigned i = NumElts; i != 8; ++i)
7093       Indices[i] = i % NumElts + NumElts;
7094     Cmp = CGF.Builder.CreateShuffleVector(
7095         Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
7096   }
7097   return CGF.Builder.CreateBitCast(Cmp,
7098                                    IntegerType::get(CGF.getLLVMContext(),
7099                                                     std::max(NumElts, 8U)));
7100 }
7101
7102 static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred,
7103                             ArrayRef<Value *> Ops) {
7104   Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
7105   Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
7106
7107   if (Ops.size() == 2)
7108     return Res;
7109
7110   assert(Ops.size() == 4);
7111   return EmitX86Select(CGF, Ops[3], Res, Ops[2]);
7112 }
7113
7114 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
7115                                            const CallExpr *E) {
7116   if (BuiltinID == X86::BI__builtin_ms_va_start ||
7117       BuiltinID == X86::BI__builtin_ms_va_end)
7118     return EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
7119                           BuiltinID == X86::BI__builtin_ms_va_start);
7120   if (BuiltinID == X86::BI__builtin_ms_va_copy) {
7121     // Lower this manually. We can't reliably determine whether or not any
7122     // given va_copy() is for a Win64 va_list from the calling convention
7123     // alone, because it's legal to do this from a System V ABI function.
7124     // With opaque pointer types, we won't have enough information in LLVM
7125     // IR to determine this from the argument types, either. Best to do it
7126     // now, while we have enough information.
7127     Address DestAddr = EmitMSVAListRef(E->getArg(0));
7128     Address SrcAddr = EmitMSVAListRef(E->getArg(1));
7129
7130     llvm::Type *BPP = Int8PtrPtrTy;
7131
7132     DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
7133                        DestAddr.getAlignment());
7134     SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
7135                       SrcAddr.getAlignment());
7136
7137     Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
7138     return Builder.CreateStore(ArgPtr, DestAddr);
7139   }
7140
7141   SmallVector<Value*, 4> Ops;
7142
7143   // Find out if any arguments are required to be integer constant expressions.
7144   unsigned ICEArguments = 0;
7145   ASTContext::GetBuiltinTypeError Error;
7146   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
7147   assert(Error == ASTContext::GE_None && "Should not codegen an error");
7148
7149   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
7150     // If this is a normal argument, just emit it as a scalar.
7151     if ((ICEArguments & (1 << i)) == 0) {
7152       Ops.push_back(EmitScalarExpr(E->getArg(i)));
7153       continue;
7154     }
7155
7156     // If this is required to be a constant, constant fold it so that we know
7157     // that the generated intrinsic gets a ConstantInt.
7158     llvm::APSInt Result;
7159     bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
7160     assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
7161     Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
7162   }
7163
7164   // These exist so that the builtin that takes an immediate can be bounds
7165   // checked by clang to avoid passing bad immediates to the backend. Since
7166   // AVX has a larger immediate than SSE we would need separate builtins to
7167   // do the different bounds checking. Rather than create a clang specific
7168   // SSE only builtin, this implements eight separate builtins to match gcc
7169   // implementation.
7170   auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
7171     Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
7172     llvm::Function *F = CGM.getIntrinsic(ID);
7173     return Builder.CreateCall(F, Ops);
7174   };
7175
7176   // For the vector forms of FP comparisons, translate the builtins directly to
7177   // IR.
7178   // TODO: The builtins could be removed if the SSE header files used vector
7179   // extension comparisons directly (vector ordered/unordered may need
7180   // additional support via __builtin_isnan()).
7181   auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) {
7182     Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
7183     llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
7184     llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
7185     Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
7186     return Builder.CreateBitCast(Sext, FPVecTy);
7187   };
7188
7189   switch (BuiltinID) {
7190   default: return nullptr;
7191   case X86::BI__builtin_cpu_supports: {
7192     const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
7193     StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
7194
7195     // TODO: When/if this becomes more than x86 specific then use a TargetInfo
7196     // based mapping.
7197     // Processor features and mapping to processor feature value.
7198     enum X86Features {
7199       CMOV = 0,
7200       MMX,
7201       POPCNT,
7202       SSE,
7203       SSE2,
7204       SSE3,
7205       SSSE3,
7206       SSE4_1,
7207       SSE4_2,
7208       AVX,
7209       AVX2,
7210       SSE4_A,
7211       FMA4,
7212       XOP,
7213       FMA,
7214       AVX512F,
7215       BMI,
7216       BMI2,
7217       AES,
7218       PCLMUL,
7219       AVX512VL,
7220       AVX512BW,
7221       AVX512DQ,
7222       AVX512CD,
7223       AVX512ER,
7224       AVX512PF,
7225       AVX512VBMI,
7226       AVX512IFMA,
7227       MAX
7228     };
7229
7230     X86Features Feature = StringSwitch<X86Features>(FeatureStr)
7231                               .Case("cmov", X86Features::CMOV)
7232                               .Case("mmx", X86Features::MMX)
7233                               .Case("popcnt", X86Features::POPCNT)
7234                               .Case("sse", X86Features::SSE)
7235                               .Case("sse2", X86Features::SSE2)
7236                               .Case("sse3", X86Features::SSE3)
7237                               .Case("ssse3", X86Features::SSSE3)
7238                               .Case("sse4.1", X86Features::SSE4_1)
7239                               .Case("sse4.2", X86Features::SSE4_2)
7240                               .Case("avx", X86Features::AVX)
7241                               .Case("avx2", X86Features::AVX2)
7242                               .Case("sse4a", X86Features::SSE4_A)
7243                               .Case("fma4", X86Features::FMA4)
7244                               .Case("xop", X86Features::XOP)
7245                               .Case("fma", X86Features::FMA)
7246                               .Case("avx512f", X86Features::AVX512F)
7247                               .Case("bmi", X86Features::BMI)
7248                               .Case("bmi2", X86Features::BMI2)
7249                               .Case("aes", X86Features::AES)
7250                               .Case("pclmul", X86Features::PCLMUL)
7251                               .Case("avx512vl", X86Features::AVX512VL)
7252                               .Case("avx512bw", X86Features::AVX512BW)
7253                               .Case("avx512dq", X86Features::AVX512DQ)
7254                               .Case("avx512cd", X86Features::AVX512CD)
7255                               .Case("avx512er", X86Features::AVX512ER)
7256                               .Case("avx512pf", X86Features::AVX512PF)
7257                               .Case("avx512vbmi", X86Features::AVX512VBMI)
7258                               .Case("avx512ifma", X86Features::AVX512IFMA)
7259                               .Default(X86Features::MAX);
7260     assert(Feature != X86Features::MAX && "Invalid feature!");
7261
7262     // Matching the struct layout from the compiler-rt/libgcc structure that is
7263     // filled in:
7264     // unsigned int __cpu_vendor;
7265     // unsigned int __cpu_type;
7266     // unsigned int __cpu_subtype;
7267     // unsigned int __cpu_features[1];
7268     llvm::Type *STy = llvm::StructType::get(
7269         Int32Ty, Int32Ty, Int32Ty, llvm::ArrayType::get(Int32Ty, 1), nullptr);
7270
7271     // Grab the global __cpu_model.
7272     llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
7273
7274     // Grab the first (0th) element from the field __cpu_features off of the
7275     // global in the struct STy.
7276     Value *Idxs[] = {
7277       ConstantInt::get(Int32Ty, 0),
7278       ConstantInt::get(Int32Ty, 3),
7279       ConstantInt::get(Int32Ty, 0)
7280     };
7281     Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
7282     Value *Features = Builder.CreateAlignedLoad(CpuFeatures,
7283                                                 CharUnits::fromQuantity(4));
7284
7285     // Check the value of the bit corresponding to the feature requested.
7286     Value *Bitset = Builder.CreateAnd(
7287         Features, llvm::ConstantInt::get(Int32Ty, 1ULL << Feature));
7288     return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0));
7289   }
7290   case X86::BI_mm_prefetch: {
7291     Value *Address = Ops[0];
7292     Value *RW = ConstantInt::get(Int32Ty, 0);
7293     Value *Locality = Ops[1];
7294     Value *Data = ConstantInt::get(Int32Ty, 1);
7295     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
7296     return Builder.CreateCall(F, {Address, RW, Locality, Data});
7297   }
7298   case X86::BI_mm_clflush: {
7299     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
7300                               Ops[0]);
7301   }
7302   case X86::BI_mm_lfence: {
7303     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
7304   }
7305   case X86::BI_mm_mfence: {
7306     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
7307   }
7308   case X86::BI_mm_sfence: {
7309     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
7310   }
7311   case X86::BI_mm_pause: {
7312     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
7313   }
7314   case X86::BI__rdtsc: {
7315     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
7316   }
7317   case X86::BI__builtin_ia32_undef128:
7318   case X86::BI__builtin_ia32_undef256:
7319   case X86::BI__builtin_ia32_undef512:
7320     return UndefValue::get(ConvertType(E->getType()));
7321   case X86::BI__builtin_ia32_vec_init_v8qi:
7322   case X86::BI__builtin_ia32_vec_init_v4hi:
7323   case X86::BI__builtin_ia32_vec_init_v2si:
7324     return Builder.CreateBitCast(BuildVector(Ops),
7325                                  llvm::Type::getX86_MMXTy(getLLVMContext()));
7326   case X86::BI__builtin_ia32_vec_ext_v2si:
7327     return Builder.CreateExtractElement(Ops[0],
7328                                   llvm::ConstantInt::get(Ops[1]->getType(), 0));
7329   case X86::BI_mm_setcsr:
7330   case X86::BI__builtin_ia32_ldmxcsr: {
7331     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
7332     Builder.CreateStore(Ops[0], Tmp);
7333     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
7334                           Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
7335   }
7336   case X86::BI_mm_getcsr:
7337   case X86::BI__builtin_ia32_stmxcsr: {
7338     Address Tmp = CreateMemTemp(E->getType());
7339     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
7340                        Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
7341     return Builder.CreateLoad(Tmp, "stmxcsr");
7342   }
7343   case X86::BI__builtin_ia32_xsave:
7344   case X86::BI__builtin_ia32_xsave64:
7345   case X86::BI__builtin_ia32_xrstor:
7346   case X86::BI__builtin_ia32_xrstor64:
7347   case X86::BI__builtin_ia32_xsaveopt:
7348   case X86::BI__builtin_ia32_xsaveopt64:
7349   case X86::BI__builtin_ia32_xrstors:
7350   case X86::BI__builtin_ia32_xrstors64:
7351   case X86::BI__builtin_ia32_xsavec:
7352   case X86::BI__builtin_ia32_xsavec64:
7353   case X86::BI__builtin_ia32_xsaves:
7354   case X86::BI__builtin_ia32_xsaves64: {
7355     Intrinsic::ID ID;
7356 #define INTRINSIC_X86_XSAVE_ID(NAME) \
7357     case X86::BI__builtin_ia32_##NAME: \
7358       ID = Intrinsic::x86_##NAME; \
7359       break
7360     switch (BuiltinID) {
7361     default: llvm_unreachable("Unsupported intrinsic!");
7362     INTRINSIC_X86_XSAVE_ID(xsave);
7363     INTRINSIC_X86_XSAVE_ID(xsave64);
7364     INTRINSIC_X86_XSAVE_ID(xrstor);
7365     INTRINSIC_X86_XSAVE_ID(xrstor64);
7366     INTRINSIC_X86_XSAVE_ID(xsaveopt);
7367     INTRINSIC_X86_XSAVE_ID(xsaveopt64);
7368     INTRINSIC_X86_XSAVE_ID(xrstors);
7369     INTRINSIC_X86_XSAVE_ID(xrstors64);
7370     INTRINSIC_X86_XSAVE_ID(xsavec);
7371     INTRINSIC_X86_XSAVE_ID(xsavec64);
7372     INTRINSIC_X86_XSAVE_ID(xsaves);
7373     INTRINSIC_X86_XSAVE_ID(xsaves64);
7374     }
7375 #undef INTRINSIC_X86_XSAVE_ID
7376     Value *Mhi = Builder.CreateTrunc(
7377       Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
7378     Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
7379     Ops[1] = Mhi;
7380     Ops.push_back(Mlo);
7381     return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
7382   }
7383   case X86::BI__builtin_ia32_storedqudi128_mask:
7384   case X86::BI__builtin_ia32_storedqusi128_mask:
7385   case X86::BI__builtin_ia32_storedquhi128_mask:
7386   case X86::BI__builtin_ia32_storedquqi128_mask:
7387   case X86::BI__builtin_ia32_storeupd128_mask:
7388   case X86::BI__builtin_ia32_storeups128_mask:
7389   case X86::BI__builtin_ia32_storedqudi256_mask:
7390   case X86::BI__builtin_ia32_storedqusi256_mask:
7391   case X86::BI__builtin_ia32_storedquhi256_mask:
7392   case X86::BI__builtin_ia32_storedquqi256_mask:
7393   case X86::BI__builtin_ia32_storeupd256_mask:
7394   case X86::BI__builtin_ia32_storeups256_mask:
7395   case X86::BI__builtin_ia32_storedqudi512_mask:
7396   case X86::BI__builtin_ia32_storedqusi512_mask:
7397   case X86::BI__builtin_ia32_storedquhi512_mask:
7398   case X86::BI__builtin_ia32_storedquqi512_mask:
7399   case X86::BI__builtin_ia32_storeupd512_mask:
7400   case X86::BI__builtin_ia32_storeups512_mask:
7401     return EmitX86MaskedStore(*this, Ops, 1);
7402
7403   case X86::BI__builtin_ia32_storess128_mask:
7404   case X86::BI__builtin_ia32_storesd128_mask: {
7405     return EmitX86MaskedStore(*this, Ops, 16);
7406   }
7407   case X86::BI__builtin_ia32_movdqa32store128_mask:
7408   case X86::BI__builtin_ia32_movdqa64store128_mask:
7409   case X86::BI__builtin_ia32_storeaps128_mask:
7410   case X86::BI__builtin_ia32_storeapd128_mask:
7411   case X86::BI__builtin_ia32_movdqa32store256_mask:
7412   case X86::BI__builtin_ia32_movdqa64store256_mask:
7413   case X86::BI__builtin_ia32_storeaps256_mask:
7414   case X86::BI__builtin_ia32_storeapd256_mask:
7415   case X86::BI__builtin_ia32_movdqa32store512_mask:
7416   case X86::BI__builtin_ia32_movdqa64store512_mask:
7417   case X86::BI__builtin_ia32_storeaps512_mask:
7418   case X86::BI__builtin_ia32_storeapd512_mask: {
7419     unsigned Align =
7420       getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
7421     return EmitX86MaskedStore(*this, Ops, Align);
7422   }
7423   case X86::BI__builtin_ia32_loadups128_mask:
7424   case X86::BI__builtin_ia32_loadups256_mask:
7425   case X86::BI__builtin_ia32_loadups512_mask:
7426   case X86::BI__builtin_ia32_loadupd128_mask:
7427   case X86::BI__builtin_ia32_loadupd256_mask:
7428   case X86::BI__builtin_ia32_loadupd512_mask:
7429   case X86::BI__builtin_ia32_loaddquqi128_mask:
7430   case X86::BI__builtin_ia32_loaddquqi256_mask:
7431   case X86::BI__builtin_ia32_loaddquqi512_mask:
7432   case X86::BI__builtin_ia32_loaddquhi128_mask:
7433   case X86::BI__builtin_ia32_loaddquhi256_mask:
7434   case X86::BI__builtin_ia32_loaddquhi512_mask:
7435   case X86::BI__builtin_ia32_loaddqusi128_mask:
7436   case X86::BI__builtin_ia32_loaddqusi256_mask:
7437   case X86::BI__builtin_ia32_loaddqusi512_mask:
7438   case X86::BI__builtin_ia32_loaddqudi128_mask:
7439   case X86::BI__builtin_ia32_loaddqudi256_mask:
7440   case X86::BI__builtin_ia32_loaddqudi512_mask:
7441     return EmitX86MaskedLoad(*this, Ops, 1);
7442
7443   case X86::BI__builtin_ia32_loadss128_mask:
7444   case X86::BI__builtin_ia32_loadsd128_mask:
7445     return EmitX86MaskedLoad(*this, Ops, 16);
7446
7447   case X86::BI__builtin_ia32_loadaps128_mask:
7448   case X86::BI__builtin_ia32_loadaps256_mask:
7449   case X86::BI__builtin_ia32_loadaps512_mask:
7450   case X86::BI__builtin_ia32_loadapd128_mask:
7451   case X86::BI__builtin_ia32_loadapd256_mask:
7452   case X86::BI__builtin_ia32_loadapd512_mask:
7453   case X86::BI__builtin_ia32_movdqa32load128_mask:
7454   case X86::BI__builtin_ia32_movdqa32load256_mask:
7455   case X86::BI__builtin_ia32_movdqa32load512_mask:
7456   case X86::BI__builtin_ia32_movdqa64load128_mask:
7457   case X86::BI__builtin_ia32_movdqa64load256_mask:
7458   case X86::BI__builtin_ia32_movdqa64load512_mask: {
7459     unsigned Align =
7460       getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
7461     return EmitX86MaskedLoad(*this, Ops, Align);
7462   }
7463
7464   case X86::BI__builtin_ia32_vbroadcastf128_pd256:
7465   case X86::BI__builtin_ia32_vbroadcastf128_ps256: {
7466     llvm::Type *DstTy = ConvertType(E->getType());
7467     return EmitX86SubVectorBroadcast(*this, Ops, DstTy, 128, 1);
7468   }
7469
7470   case X86::BI__builtin_ia32_storehps:
7471   case X86::BI__builtin_ia32_storelps: {
7472     llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
7473     llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
7474
7475     // cast val v2i64
7476     Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
7477
7478     // extract (0, 1)
7479     unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
7480     llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index);
7481     Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
7482
7483     // cast pointer to i64 & store
7484     Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
7485     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7486   }
7487   case X86::BI__builtin_ia32_palignr128:
7488   case X86::BI__builtin_ia32_palignr256:
7489   case X86::BI__builtin_ia32_palignr512_mask: {
7490     unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
7491
7492     unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
7493     assert(NumElts % 16 == 0);
7494
7495     // If palignr is shifting the pair of vectors more than the size of two
7496     // lanes, emit zero.
7497     if (ShiftVal >= 32)
7498       return llvm::Constant::getNullValue(ConvertType(E->getType()));
7499
7500     // If palignr is shifting the pair of input vectors more than one lane,
7501     // but less than two lanes, convert to shifting in zeroes.
7502     if (ShiftVal > 16) {
7503       ShiftVal -= 16;
7504       Ops[1] = Ops[0];
7505       Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
7506     }
7507
7508     uint32_t Indices[64];
7509     // 256-bit palignr operates on 128-bit lanes so we need to handle that
7510     for (unsigned l = 0; l != NumElts; l += 16) {
7511       for (unsigned i = 0; i != 16; ++i) {
7512         unsigned Idx = ShiftVal + i;
7513         if (Idx >= 16)
7514           Idx += NumElts - 16; // End of lane, switch operand.
7515         Indices[l + i] = Idx + l;
7516       }
7517     }
7518
7519     Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0],
7520                                                makeArrayRef(Indices, NumElts),
7521                                                "palignr");
7522
7523     // If this isn't a masked builtin, just return the align operation.
7524     if (Ops.size() == 3)
7525       return Align;
7526
7527     return EmitX86Select(*this, Ops[4], Align, Ops[3]);
7528   }
7529
7530   case X86::BI__builtin_ia32_movnti:
7531   case X86::BI__builtin_ia32_movnti64:
7532   case X86::BI__builtin_ia32_movntsd:
7533   case X86::BI__builtin_ia32_movntss: {
7534     llvm::MDNode *Node = llvm::MDNode::get(
7535         getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
7536
7537     Value *Ptr = Ops[0];
7538     Value *Src = Ops[1];
7539
7540     // Extract the 0'th element of the source vector.
7541     if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
7542         BuiltinID == X86::BI__builtin_ia32_movntss)
7543       Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
7544
7545     // Convert the type of the pointer to a pointer to the stored type.
7546     Value *BC = Builder.CreateBitCast(
7547         Ptr, llvm::PointerType::getUnqual(Src->getType()), "cast");
7548
7549     // Unaligned nontemporal store of the scalar value.
7550     StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC);
7551     SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
7552     SI->setAlignment(1);
7553     return SI;
7554   }
7555
7556   case X86::BI__builtin_ia32_selectb_128:
7557   case X86::BI__builtin_ia32_selectb_256:
7558   case X86::BI__builtin_ia32_selectb_512:
7559   case X86::BI__builtin_ia32_selectw_128:
7560   case X86::BI__builtin_ia32_selectw_256:
7561   case X86::BI__builtin_ia32_selectw_512:
7562   case X86::BI__builtin_ia32_selectd_128:
7563   case X86::BI__builtin_ia32_selectd_256:
7564   case X86::BI__builtin_ia32_selectd_512:
7565   case X86::BI__builtin_ia32_selectq_128:
7566   case X86::BI__builtin_ia32_selectq_256:
7567   case X86::BI__builtin_ia32_selectq_512:
7568   case X86::BI__builtin_ia32_selectps_128:
7569   case X86::BI__builtin_ia32_selectps_256:
7570   case X86::BI__builtin_ia32_selectps_512:
7571   case X86::BI__builtin_ia32_selectpd_128:
7572   case X86::BI__builtin_ia32_selectpd_256:
7573   case X86::BI__builtin_ia32_selectpd_512:
7574     return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
7575   case X86::BI__builtin_ia32_pcmpeqb128_mask:
7576   case X86::BI__builtin_ia32_pcmpeqb256_mask:
7577   case X86::BI__builtin_ia32_pcmpeqb512_mask:
7578   case X86::BI__builtin_ia32_pcmpeqw128_mask:
7579   case X86::BI__builtin_ia32_pcmpeqw256_mask:
7580   case X86::BI__builtin_ia32_pcmpeqw512_mask:
7581   case X86::BI__builtin_ia32_pcmpeqd128_mask:
7582   case X86::BI__builtin_ia32_pcmpeqd256_mask:
7583   case X86::BI__builtin_ia32_pcmpeqd512_mask:
7584   case X86::BI__builtin_ia32_pcmpeqq128_mask:
7585   case X86::BI__builtin_ia32_pcmpeqq256_mask:
7586   case X86::BI__builtin_ia32_pcmpeqq512_mask:
7587     return EmitX86MaskedCompare(*this, 0, false, Ops);
7588   case X86::BI__builtin_ia32_pcmpgtb128_mask:
7589   case X86::BI__builtin_ia32_pcmpgtb256_mask:
7590   case X86::BI__builtin_ia32_pcmpgtb512_mask:
7591   case X86::BI__builtin_ia32_pcmpgtw128_mask:
7592   case X86::BI__builtin_ia32_pcmpgtw256_mask:
7593   case X86::BI__builtin_ia32_pcmpgtw512_mask:
7594   case X86::BI__builtin_ia32_pcmpgtd128_mask:
7595   case X86::BI__builtin_ia32_pcmpgtd256_mask:
7596   case X86::BI__builtin_ia32_pcmpgtd512_mask:
7597   case X86::BI__builtin_ia32_pcmpgtq128_mask:
7598   case X86::BI__builtin_ia32_pcmpgtq256_mask:
7599   case X86::BI__builtin_ia32_pcmpgtq512_mask:
7600     return EmitX86MaskedCompare(*this, 6, true, Ops);
7601   case X86::BI__builtin_ia32_cmpb128_mask:
7602   case X86::BI__builtin_ia32_cmpb256_mask:
7603   case X86::BI__builtin_ia32_cmpb512_mask:
7604   case X86::BI__builtin_ia32_cmpw128_mask:
7605   case X86::BI__builtin_ia32_cmpw256_mask:
7606   case X86::BI__builtin_ia32_cmpw512_mask:
7607   case X86::BI__builtin_ia32_cmpd128_mask:
7608   case X86::BI__builtin_ia32_cmpd256_mask:
7609   case X86::BI__builtin_ia32_cmpd512_mask:
7610   case X86::BI__builtin_ia32_cmpq128_mask:
7611   case X86::BI__builtin_ia32_cmpq256_mask:
7612   case X86::BI__builtin_ia32_cmpq512_mask: {
7613     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
7614     return EmitX86MaskedCompare(*this, CC, true, Ops);
7615   }
7616   case X86::BI__builtin_ia32_ucmpb128_mask:
7617   case X86::BI__builtin_ia32_ucmpb256_mask:
7618   case X86::BI__builtin_ia32_ucmpb512_mask:
7619   case X86::BI__builtin_ia32_ucmpw128_mask:
7620   case X86::BI__builtin_ia32_ucmpw256_mask:
7621   case X86::BI__builtin_ia32_ucmpw512_mask:
7622   case X86::BI__builtin_ia32_ucmpd128_mask:
7623   case X86::BI__builtin_ia32_ucmpd256_mask:
7624   case X86::BI__builtin_ia32_ucmpd512_mask:
7625   case X86::BI__builtin_ia32_ucmpq128_mask:
7626   case X86::BI__builtin_ia32_ucmpq256_mask:
7627   case X86::BI__builtin_ia32_ucmpq512_mask: {
7628     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
7629     return EmitX86MaskedCompare(*this, CC, false, Ops);
7630   }
7631
7632   case X86::BI__builtin_ia32_vplzcntd_128_mask:
7633   case X86::BI__builtin_ia32_vplzcntd_256_mask:
7634   case X86::BI__builtin_ia32_vplzcntd_512_mask:
7635   case X86::BI__builtin_ia32_vplzcntq_128_mask:
7636   case X86::BI__builtin_ia32_vplzcntq_256_mask:
7637   case X86::BI__builtin_ia32_vplzcntq_512_mask: {
7638     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
7639     return EmitX86Select(*this, Ops[2],
7640                          Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}),
7641                          Ops[1]);
7642   }
7643
7644   case X86::BI__builtin_ia32_pmaxsb128:
7645   case X86::BI__builtin_ia32_pmaxsw128:
7646   case X86::BI__builtin_ia32_pmaxsd128:
7647   case X86::BI__builtin_ia32_pmaxsq128_mask:
7648   case X86::BI__builtin_ia32_pmaxsb256:
7649   case X86::BI__builtin_ia32_pmaxsw256:
7650   case X86::BI__builtin_ia32_pmaxsd256:
7651   case X86::BI__builtin_ia32_pmaxsq256_mask:
7652   case X86::BI__builtin_ia32_pmaxsb512_mask:
7653   case X86::BI__builtin_ia32_pmaxsw512_mask:
7654   case X86::BI__builtin_ia32_pmaxsd512_mask:
7655   case X86::BI__builtin_ia32_pmaxsq512_mask:
7656     return EmitX86MinMax(*this, ICmpInst::ICMP_SGT, Ops);
7657   case X86::BI__builtin_ia32_pmaxub128:
7658   case X86::BI__builtin_ia32_pmaxuw128:
7659   case X86::BI__builtin_ia32_pmaxud128:
7660   case X86::BI__builtin_ia32_pmaxuq128_mask:
7661   case X86::BI__builtin_ia32_pmaxub256:
7662   case X86::BI__builtin_ia32_pmaxuw256:
7663   case X86::BI__builtin_ia32_pmaxud256:
7664   case X86::BI__builtin_ia32_pmaxuq256_mask:
7665   case X86::BI__builtin_ia32_pmaxub512_mask:
7666   case X86::BI__builtin_ia32_pmaxuw512_mask:
7667   case X86::BI__builtin_ia32_pmaxud512_mask:
7668   case X86::BI__builtin_ia32_pmaxuq512_mask:
7669     return EmitX86MinMax(*this, ICmpInst::ICMP_UGT, Ops);
7670   case X86::BI__builtin_ia32_pminsb128:
7671   case X86::BI__builtin_ia32_pminsw128:
7672   case X86::BI__builtin_ia32_pminsd128:
7673   case X86::BI__builtin_ia32_pminsq128_mask:
7674   case X86::BI__builtin_ia32_pminsb256:
7675   case X86::BI__builtin_ia32_pminsw256:
7676   case X86::BI__builtin_ia32_pminsd256:
7677   case X86::BI__builtin_ia32_pminsq256_mask:
7678   case X86::BI__builtin_ia32_pminsb512_mask:
7679   case X86::BI__builtin_ia32_pminsw512_mask:
7680   case X86::BI__builtin_ia32_pminsd512_mask:
7681   case X86::BI__builtin_ia32_pminsq512_mask:
7682     return EmitX86MinMax(*this, ICmpInst::ICMP_SLT, Ops);
7683   case X86::BI__builtin_ia32_pminub128:
7684   case X86::BI__builtin_ia32_pminuw128:
7685   case X86::BI__builtin_ia32_pminud128:
7686   case X86::BI__builtin_ia32_pminuq128_mask:
7687   case X86::BI__builtin_ia32_pminub256:
7688   case X86::BI__builtin_ia32_pminuw256:
7689   case X86::BI__builtin_ia32_pminud256:
7690   case X86::BI__builtin_ia32_pminuq256_mask:
7691   case X86::BI__builtin_ia32_pminub512_mask:
7692   case X86::BI__builtin_ia32_pminuw512_mask:
7693   case X86::BI__builtin_ia32_pminud512_mask:
7694   case X86::BI__builtin_ia32_pminuq512_mask:
7695     return EmitX86MinMax(*this, ICmpInst::ICMP_ULT, Ops);
7696
7697   // 3DNow!
7698   case X86::BI__builtin_ia32_pswapdsf:
7699   case X86::BI__builtin_ia32_pswapdsi: {
7700     llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
7701     Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
7702     llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
7703     return Builder.CreateCall(F, Ops, "pswapd");
7704   }
7705   case X86::BI__builtin_ia32_rdrand16_step:
7706   case X86::BI__builtin_ia32_rdrand32_step:
7707   case X86::BI__builtin_ia32_rdrand64_step:
7708   case X86::BI__builtin_ia32_rdseed16_step:
7709   case X86::BI__builtin_ia32_rdseed32_step:
7710   case X86::BI__builtin_ia32_rdseed64_step: {
7711     Intrinsic::ID ID;
7712     switch (BuiltinID) {
7713     default: llvm_unreachable("Unsupported intrinsic!");
7714     case X86::BI__builtin_ia32_rdrand16_step:
7715       ID = Intrinsic::x86_rdrand_16;
7716       break;
7717     case X86::BI__builtin_ia32_rdrand32_step:
7718       ID = Intrinsic::x86_rdrand_32;
7719       break;
7720     case X86::BI__builtin_ia32_rdrand64_step:
7721       ID = Intrinsic::x86_rdrand_64;
7722       break;
7723     case X86::BI__builtin_ia32_rdseed16_step:
7724       ID = Intrinsic::x86_rdseed_16;
7725       break;
7726     case X86::BI__builtin_ia32_rdseed32_step:
7727       ID = Intrinsic::x86_rdseed_32;
7728       break;
7729     case X86::BI__builtin_ia32_rdseed64_step:
7730       ID = Intrinsic::x86_rdseed_64;
7731       break;
7732     }
7733
7734     Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
7735     Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
7736                                       Ops[0]);
7737     return Builder.CreateExtractValue(Call, 1);
7738   }
7739
7740   // SSE packed comparison intrinsics
7741   case X86::BI__builtin_ia32_cmpeqps:
7742   case X86::BI__builtin_ia32_cmpeqpd:
7743     return getVectorFCmpIR(CmpInst::FCMP_OEQ);
7744   case X86::BI__builtin_ia32_cmpltps:
7745   case X86::BI__builtin_ia32_cmpltpd:
7746     return getVectorFCmpIR(CmpInst::FCMP_OLT);
7747   case X86::BI__builtin_ia32_cmpleps:
7748   case X86::BI__builtin_ia32_cmplepd:
7749     return getVectorFCmpIR(CmpInst::FCMP_OLE);
7750   case X86::BI__builtin_ia32_cmpunordps:
7751   case X86::BI__builtin_ia32_cmpunordpd:
7752     return getVectorFCmpIR(CmpInst::FCMP_UNO);
7753   case X86::BI__builtin_ia32_cmpneqps:
7754   case X86::BI__builtin_ia32_cmpneqpd:
7755     return getVectorFCmpIR(CmpInst::FCMP_UNE);
7756   case X86::BI__builtin_ia32_cmpnltps:
7757   case X86::BI__builtin_ia32_cmpnltpd:
7758     return getVectorFCmpIR(CmpInst::FCMP_UGE);
7759   case X86::BI__builtin_ia32_cmpnleps:
7760   case X86::BI__builtin_ia32_cmpnlepd:
7761     return getVectorFCmpIR(CmpInst::FCMP_UGT);
7762   case X86::BI__builtin_ia32_cmpordps:
7763   case X86::BI__builtin_ia32_cmpordpd:
7764     return getVectorFCmpIR(CmpInst::FCMP_ORD);
7765   case X86::BI__builtin_ia32_cmpps:
7766   case X86::BI__builtin_ia32_cmpps256:
7767   case X86::BI__builtin_ia32_cmppd:
7768   case X86::BI__builtin_ia32_cmppd256: {
7769     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
7770     // If this one of the SSE immediates, we can use native IR.
7771     if (CC < 8) {
7772       FCmpInst::Predicate Pred;
7773       switch (CC) {
7774       case 0: Pred = FCmpInst::FCMP_OEQ; break;
7775       case 1: Pred = FCmpInst::FCMP_OLT; break;
7776       case 2: Pred = FCmpInst::FCMP_OLE; break;
7777       case 3: Pred = FCmpInst::FCMP_UNO; break;
7778       case 4: Pred = FCmpInst::FCMP_UNE; break;
7779       case 5: Pred = FCmpInst::FCMP_UGE; break;
7780       case 6: Pred = FCmpInst::FCMP_UGT; break;
7781       case 7: Pred = FCmpInst::FCMP_ORD; break;
7782       }
7783       return getVectorFCmpIR(Pred);
7784     }
7785
7786     // We can't handle 8-31 immediates with native IR, use the intrinsic.
7787     Intrinsic::ID ID;
7788     switch (BuiltinID) {
7789     default: llvm_unreachable("Unsupported intrinsic!");
7790     case X86::BI__builtin_ia32_cmpps:
7791       ID = Intrinsic::x86_sse_cmp_ps;
7792       break;
7793     case X86::BI__builtin_ia32_cmpps256:
7794       ID = Intrinsic::x86_avx_cmp_ps_256;
7795       break;
7796     case X86::BI__builtin_ia32_cmppd:
7797       ID = Intrinsic::x86_sse2_cmp_pd;
7798       break;
7799     case X86::BI__builtin_ia32_cmppd256:
7800       ID = Intrinsic::x86_avx_cmp_pd_256;
7801       break;
7802     }
7803
7804     return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
7805   }
7806
7807   // SSE scalar comparison intrinsics
7808   case X86::BI__builtin_ia32_cmpeqss:
7809     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
7810   case X86::BI__builtin_ia32_cmpltss:
7811     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
7812   case X86::BI__builtin_ia32_cmpless:
7813     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
7814   case X86::BI__builtin_ia32_cmpunordss:
7815     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
7816   case X86::BI__builtin_ia32_cmpneqss:
7817     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
7818   case X86::BI__builtin_ia32_cmpnltss:
7819     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
7820   case X86::BI__builtin_ia32_cmpnless:
7821     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
7822   case X86::BI__builtin_ia32_cmpordss:
7823     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
7824   case X86::BI__builtin_ia32_cmpeqsd:
7825     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
7826   case X86::BI__builtin_ia32_cmpltsd:
7827     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
7828   case X86::BI__builtin_ia32_cmplesd:
7829     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
7830   case X86::BI__builtin_ia32_cmpunordsd:
7831     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
7832   case X86::BI__builtin_ia32_cmpneqsd:
7833     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
7834   case X86::BI__builtin_ia32_cmpnltsd:
7835     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
7836   case X86::BI__builtin_ia32_cmpnlesd:
7837     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
7838   case X86::BI__builtin_ia32_cmpordsd:
7839     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
7840
7841   case X86::BI__emul:
7842   case X86::BI__emulu: {
7843     llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
7844     bool isSigned = (BuiltinID == X86::BI__emul);
7845     Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
7846     Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
7847     return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
7848   }
7849   case X86::BI__mulh:
7850   case X86::BI__umulh:
7851   case X86::BI_mul128:
7852   case X86::BI_umul128: {
7853     llvm::Type *ResType = ConvertType(E->getType());
7854     llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
7855
7856     bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
7857     Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
7858     Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
7859
7860     Value *MulResult, *HigherBits;
7861     if (IsSigned) {
7862       MulResult = Builder.CreateNSWMul(LHS, RHS);
7863       HigherBits = Builder.CreateAShr(MulResult, 64);
7864     } else {
7865       MulResult = Builder.CreateNUWMul(LHS, RHS);
7866       HigherBits = Builder.CreateLShr(MulResult, 64);
7867     }
7868     HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
7869
7870     if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
7871       return HigherBits;
7872
7873     Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
7874     Builder.CreateStore(HigherBits, HighBitsAddress);
7875     return Builder.CreateIntCast(MulResult, ResType, IsSigned);
7876   }
7877
7878   case X86::BI__faststorefence: {
7879     return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
7880                                llvm::CrossThread);
7881   }
7882   case X86::BI_ReadWriteBarrier:
7883   case X86::BI_ReadBarrier:
7884   case X86::BI_WriteBarrier: {
7885     return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
7886                                llvm::SingleThread);
7887   }
7888   case X86::BI_BitScanForward:
7889   case X86::BI_BitScanForward64:
7890     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
7891   case X86::BI_BitScanReverse:
7892   case X86::BI_BitScanReverse64:
7893     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
7894
7895   case X86::BI_InterlockedAnd64:
7896     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
7897   case X86::BI_InterlockedExchange64:
7898     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
7899   case X86::BI_InterlockedExchangeAdd64:
7900     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
7901   case X86::BI_InterlockedExchangeSub64:
7902     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
7903   case X86::BI_InterlockedOr64:
7904     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
7905   case X86::BI_InterlockedXor64:
7906     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
7907   case X86::BI_InterlockedDecrement64:
7908     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
7909   case X86::BI_InterlockedIncrement64:
7910     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
7911
7912   case X86::BI_AddressOfReturnAddress: {
7913     Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress);
7914     return Builder.CreateCall(F);
7915   }
7916   case X86::BI__stosb: {
7917     // We treat __stosb as a volatile memset - it may not generate "rep stosb"
7918     // instruction, but it will create a memset that won't be optimized away.
7919     return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true);
7920   }
7921   }
7922 }
7923
7924
7925 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
7926                                            const CallExpr *E) {
7927   SmallVector<Value*, 4> Ops;
7928
7929   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
7930     Ops.push_back(EmitScalarExpr(E->getArg(i)));
7931
7932   Intrinsic::ID ID = Intrinsic::not_intrinsic;
7933
7934   switch (BuiltinID) {
7935   default: return nullptr;
7936
7937   // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
7938   // call __builtin_readcyclecounter.
7939   case PPC::BI__builtin_ppc_get_timebase:
7940     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
7941
7942   // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
7943   case PPC::BI__builtin_altivec_lvx:
7944   case PPC::BI__builtin_altivec_lvxl:
7945   case PPC::BI__builtin_altivec_lvebx:
7946   case PPC::BI__builtin_altivec_lvehx:
7947   case PPC::BI__builtin_altivec_lvewx:
7948   case PPC::BI__builtin_altivec_lvsl:
7949   case PPC::BI__builtin_altivec_lvsr:
7950   case PPC::BI__builtin_vsx_lxvd2x:
7951   case PPC::BI__builtin_vsx_lxvw4x:
7952   case PPC::BI__builtin_vsx_lxvd2x_be:
7953   case PPC::BI__builtin_vsx_lxvw4x_be:
7954   case PPC::BI__builtin_vsx_lxvl:
7955   case PPC::BI__builtin_vsx_lxvll:
7956   {
7957     if(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
7958        BuiltinID == PPC::BI__builtin_vsx_lxvll){
7959       Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy);
7960     }else {
7961       Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
7962       Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
7963       Ops.pop_back();
7964     }
7965
7966     switch (BuiltinID) {
7967     default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
7968     case PPC::BI__builtin_altivec_lvx:
7969       ID = Intrinsic::ppc_altivec_lvx;
7970       break;
7971     case PPC::BI__builtin_altivec_lvxl:
7972       ID = Intrinsic::ppc_altivec_lvxl;
7973       break;
7974     case PPC::BI__builtin_altivec_lvebx:
7975       ID = Intrinsic::ppc_altivec_lvebx;
7976       break;
7977     case PPC::BI__builtin_altivec_lvehx:
7978       ID = Intrinsic::ppc_altivec_lvehx;
7979       break;
7980     case PPC::BI__builtin_altivec_lvewx:
7981       ID = Intrinsic::ppc_altivec_lvewx;
7982       break;
7983     case PPC::BI__builtin_altivec_lvsl:
7984       ID = Intrinsic::ppc_altivec_lvsl;
7985       break;
7986     case PPC::BI__builtin_altivec_lvsr:
7987       ID = Intrinsic::ppc_altivec_lvsr;
7988       break;
7989     case PPC::BI__builtin_vsx_lxvd2x:
7990       ID = Intrinsic::ppc_vsx_lxvd2x;
7991       break;
7992     case PPC::BI__builtin_vsx_lxvw4x:
7993       ID = Intrinsic::ppc_vsx_lxvw4x;
7994       break;
7995     case PPC::BI__builtin_vsx_lxvd2x_be:
7996       ID = Intrinsic::ppc_vsx_lxvd2x_be;
7997       break;
7998     case PPC::BI__builtin_vsx_lxvw4x_be:
7999       ID = Intrinsic::ppc_vsx_lxvw4x_be;
8000       break;
8001     case PPC::BI__builtin_vsx_lxvl:
8002       ID = Intrinsic::ppc_vsx_lxvl;
8003       break;
8004     case PPC::BI__builtin_vsx_lxvll:
8005       ID = Intrinsic::ppc_vsx_lxvll;
8006       break;
8007     }
8008     llvm::Function *F = CGM.getIntrinsic(ID);
8009     return Builder.CreateCall(F, Ops, "");
8010   }
8011
8012   // vec_st, vec_xst_be
8013   case PPC::BI__builtin_altivec_stvx:
8014   case PPC::BI__builtin_altivec_stvxl:
8015   case PPC::BI__builtin_altivec_stvebx:
8016   case PPC::BI__builtin_altivec_stvehx:
8017   case PPC::BI__builtin_altivec_stvewx:
8018   case PPC::BI__builtin_vsx_stxvd2x:
8019   case PPC::BI__builtin_vsx_stxvw4x:
8020   case PPC::BI__builtin_vsx_stxvd2x_be:
8021   case PPC::BI__builtin_vsx_stxvw4x_be:
8022   case PPC::BI__builtin_vsx_stxvl:
8023   case PPC::BI__builtin_vsx_stxvll:
8024   {
8025     if(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
8026       BuiltinID == PPC::BI__builtin_vsx_stxvll ){
8027       Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
8028     }else {
8029       Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
8030       Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
8031       Ops.pop_back();
8032     }
8033
8034     switch (BuiltinID) {
8035     default: llvm_unreachable("Unsupported st intrinsic!");
8036     case PPC::BI__builtin_altivec_stvx:
8037       ID = Intrinsic::ppc_altivec_stvx;
8038       break;
8039     case PPC::BI__builtin_altivec_stvxl:
8040       ID = Intrinsic::ppc_altivec_stvxl;
8041       break;
8042     case PPC::BI__builtin_altivec_stvebx:
8043       ID = Intrinsic::ppc_altivec_stvebx;
8044       break;
8045     case PPC::BI__builtin_altivec_stvehx:
8046       ID = Intrinsic::ppc_altivec_stvehx;
8047       break;
8048     case PPC::BI__builtin_altivec_stvewx:
8049       ID = Intrinsic::ppc_altivec_stvewx;
8050       break;
8051     case PPC::BI__builtin_vsx_stxvd2x:
8052       ID = Intrinsic::ppc_vsx_stxvd2x;
8053       break;
8054     case PPC::BI__builtin_vsx_stxvw4x:
8055       ID = Intrinsic::ppc_vsx_stxvw4x;
8056       break;
8057     case PPC::BI__builtin_vsx_stxvd2x_be:
8058       ID = Intrinsic::ppc_vsx_stxvd2x_be;
8059       break;
8060     case PPC::BI__builtin_vsx_stxvw4x_be:
8061       ID = Intrinsic::ppc_vsx_stxvw4x_be;
8062       break;
8063     case PPC::BI__builtin_vsx_stxvl:
8064       ID = Intrinsic::ppc_vsx_stxvl;
8065       break;
8066     case PPC::BI__builtin_vsx_stxvll:
8067       ID = Intrinsic::ppc_vsx_stxvll;
8068       break;
8069     }
8070     llvm::Function *F = CGM.getIntrinsic(ID);
8071     return Builder.CreateCall(F, Ops, "");
8072   }
8073   // Square root
8074   case PPC::BI__builtin_vsx_xvsqrtsp:
8075   case PPC::BI__builtin_vsx_xvsqrtdp: {
8076     llvm::Type *ResultType = ConvertType(E->getType());
8077     Value *X = EmitScalarExpr(E->getArg(0));
8078     ID = Intrinsic::sqrt;
8079     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
8080     return Builder.CreateCall(F, X);
8081   }
8082   // Count leading zeros
8083   case PPC::BI__builtin_altivec_vclzb:
8084   case PPC::BI__builtin_altivec_vclzh:
8085   case PPC::BI__builtin_altivec_vclzw:
8086   case PPC::BI__builtin_altivec_vclzd: {
8087     llvm::Type *ResultType = ConvertType(E->getType());
8088     Value *X = EmitScalarExpr(E->getArg(0));
8089     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8090     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
8091     return Builder.CreateCall(F, {X, Undef});
8092   }
8093   case PPC::BI__builtin_altivec_vctzb:
8094   case PPC::BI__builtin_altivec_vctzh:
8095   case PPC::BI__builtin_altivec_vctzw:
8096   case PPC::BI__builtin_altivec_vctzd: {
8097     llvm::Type *ResultType = ConvertType(E->getType());
8098     Value *X = EmitScalarExpr(E->getArg(0));
8099     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8100     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
8101     return Builder.CreateCall(F, {X, Undef});
8102   }
8103   case PPC::BI__builtin_altivec_vpopcntb:
8104   case PPC::BI__builtin_altivec_vpopcnth:
8105   case PPC::BI__builtin_altivec_vpopcntw:
8106   case PPC::BI__builtin_altivec_vpopcntd: {
8107     llvm::Type *ResultType = ConvertType(E->getType());
8108     Value *X = EmitScalarExpr(E->getArg(0));
8109     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
8110     return Builder.CreateCall(F, X);
8111   }
8112   // Copy sign
8113   case PPC::BI__builtin_vsx_xvcpsgnsp:
8114   case PPC::BI__builtin_vsx_xvcpsgndp: {
8115     llvm::Type *ResultType = ConvertType(E->getType());
8116     Value *X = EmitScalarExpr(E->getArg(0));
8117     Value *Y = EmitScalarExpr(E->getArg(1));
8118     ID = Intrinsic::copysign;
8119     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
8120     return Builder.CreateCall(F, {X, Y});
8121   }
8122   // Rounding/truncation
8123   case PPC::BI__builtin_vsx_xvrspip:
8124   case PPC::BI__builtin_vsx_xvrdpip:
8125   case PPC::BI__builtin_vsx_xvrdpim:
8126   case PPC::BI__builtin_vsx_xvrspim:
8127   case PPC::BI__builtin_vsx_xvrdpi:
8128   case PPC::BI__builtin_vsx_xvrspi:
8129   case PPC::BI__builtin_vsx_xvrdpic:
8130   case PPC::BI__builtin_vsx_xvrspic:
8131   case PPC::BI__builtin_vsx_xvrdpiz:
8132   case PPC::BI__builtin_vsx_xvrspiz: {
8133     llvm::Type *ResultType = ConvertType(E->getType());
8134     Value *X = EmitScalarExpr(E->getArg(0));
8135     if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
8136         BuiltinID == PPC::BI__builtin_vsx_xvrspim)
8137       ID = Intrinsic::floor;
8138     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
8139              BuiltinID == PPC::BI__builtin_vsx_xvrspi)
8140       ID = Intrinsic::round;
8141     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
8142              BuiltinID == PPC::BI__builtin_vsx_xvrspic)
8143       ID = Intrinsic::nearbyint;
8144     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
8145              BuiltinID == PPC::BI__builtin_vsx_xvrspip)
8146       ID = Intrinsic::ceil;
8147     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
8148              BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
8149       ID = Intrinsic::trunc;
8150     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
8151     return Builder.CreateCall(F, X);
8152   }
8153
8154   // Absolute value
8155   case PPC::BI__builtin_vsx_xvabsdp:
8156   case PPC::BI__builtin_vsx_xvabssp: {
8157     llvm::Type *ResultType = ConvertType(E->getType());
8158     Value *X = EmitScalarExpr(E->getArg(0));
8159     llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
8160     return Builder.CreateCall(F, X);
8161   }
8162
8163   // FMA variations
8164   case PPC::BI__builtin_vsx_xvmaddadp:
8165   case PPC::BI__builtin_vsx_xvmaddasp:
8166   case PPC::BI__builtin_vsx_xvnmaddadp:
8167   case PPC::BI__builtin_vsx_xvnmaddasp:
8168   case PPC::BI__builtin_vsx_xvmsubadp:
8169   case PPC::BI__builtin_vsx_xvmsubasp:
8170   case PPC::BI__builtin_vsx_xvnmsubadp:
8171   case PPC::BI__builtin_vsx_xvnmsubasp: {
8172     llvm::Type *ResultType = ConvertType(E->getType());
8173     Value *X = EmitScalarExpr(E->getArg(0));
8174     Value *Y = EmitScalarExpr(E->getArg(1));
8175     Value *Z = EmitScalarExpr(E->getArg(2));
8176     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
8177     llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
8178     switch (BuiltinID) {
8179       case PPC::BI__builtin_vsx_xvmaddadp:
8180       case PPC::BI__builtin_vsx_xvmaddasp:
8181         return Builder.CreateCall(F, {X, Y, Z});
8182       case PPC::BI__builtin_vsx_xvnmaddadp:
8183       case PPC::BI__builtin_vsx_xvnmaddasp:
8184         return Builder.CreateFSub(Zero,
8185                                   Builder.CreateCall(F, {X, Y, Z}), "sub");
8186       case PPC::BI__builtin_vsx_xvmsubadp:
8187       case PPC::BI__builtin_vsx_xvmsubasp:
8188         return Builder.CreateCall(F,
8189                                   {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
8190       case PPC::BI__builtin_vsx_xvnmsubadp:
8191       case PPC::BI__builtin_vsx_xvnmsubasp:
8192         Value *FsubRes =
8193           Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
8194         return Builder.CreateFSub(Zero, FsubRes, "sub");
8195     }
8196     llvm_unreachable("Unknown FMA operation");
8197     return nullptr; // Suppress no-return warning
8198   }
8199
8200   case PPC::BI__builtin_vsx_insertword: {
8201     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
8202
8203     // Third argument is a compile time constant int. It must be clamped to
8204     // to the range [0, 12].
8205     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
8206     assert(ArgCI &&
8207            "Third arg to xxinsertw intrinsic must be constant integer");
8208     const int64_t MaxIndex = 12;
8209     int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
8210
8211     // The builtin semantics don't exactly match the xxinsertw instructions
8212     // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
8213     // word from the first argument, and inserts it in the second argument. The
8214     // instruction extracts the word from its second input register and inserts
8215     // it into its first input register, so swap the first and second arguments.
8216     std::swap(Ops[0], Ops[1]);
8217
8218     // Need to cast the second argument from a vector of unsigned int to a
8219     // vector of long long.
8220     Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
8221
8222     if (getTarget().isLittleEndian()) {
8223       // Create a shuffle mask of (1, 0)
8224       Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
8225                                    ConstantInt::get(Int32Ty, 0)
8226                                  };
8227       Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8228
8229       // Reverse the double words in the vector we will extract from.
8230       Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
8231       Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ShuffleMask);
8232
8233       // Reverse the index.
8234       Index = MaxIndex - Index;
8235     }
8236
8237     // Intrinsic expects the first arg to be a vector of int.
8238     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
8239     Ops[2] = ConstantInt::getSigned(Int32Ty, Index);
8240     return Builder.CreateCall(F, Ops);
8241   }
8242
8243   case PPC::BI__builtin_vsx_extractuword: {
8244     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
8245
8246     // Intrinsic expects the first argument to be a vector of doublewords.
8247     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
8248
8249     // The second argument is a compile time constant int that needs to
8250     // be clamped to the range [0, 12].
8251     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[1]);
8252     assert(ArgCI &&
8253            "Second Arg to xxextractuw intrinsic must be a constant integer!");
8254     const int64_t MaxIndex = 12;
8255     int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
8256
8257     if (getTarget().isLittleEndian()) {
8258       // Reverse the index.
8259       Index = MaxIndex - Index;
8260       Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
8261
8262       // Emit the call, then reverse the double words of the results vector.
8263       Value *Call = Builder.CreateCall(F, Ops);
8264
8265       // Create a shuffle mask of (1, 0)
8266       Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
8267                                    ConstantInt::get(Int32Ty, 0)
8268                                  };
8269       Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8270
8271       Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ShuffleMask);
8272       return ShuffleCall;
8273     } else {
8274       Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
8275       return Builder.CreateCall(F, Ops);
8276     }
8277   }
8278   }
8279 }
8280
8281 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
8282                                               const CallExpr *E) {
8283   switch (BuiltinID) {
8284   case AMDGPU::BI__builtin_amdgcn_div_scale:
8285   case AMDGPU::BI__builtin_amdgcn_div_scalef: {
8286     // Translate from the intrinsics's struct return to the builtin's out
8287     // argument.
8288
8289     Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
8290
8291     llvm::Value *X = EmitScalarExpr(E->getArg(0));
8292     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
8293     llvm::Value *Z = EmitScalarExpr(E->getArg(2));
8294
8295     llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
8296                                            X->getType());
8297
8298     llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
8299
8300     llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
8301     llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
8302
8303     llvm::Type *RealFlagType
8304       = FlagOutPtr.getPointer()->getType()->getPointerElementType();
8305
8306     llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
8307     Builder.CreateStore(FlagExt, FlagOutPtr);
8308     return Result;
8309   }
8310   case AMDGPU::BI__builtin_amdgcn_div_fmas:
8311   case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
8312     llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
8313     llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
8314     llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
8315     llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
8316
8317     llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
8318                                       Src0->getType());
8319     llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
8320     return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
8321   }
8322
8323   case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
8324     return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
8325   case AMDGPU::BI__builtin_amdgcn_div_fixup:
8326   case AMDGPU::BI__builtin_amdgcn_div_fixupf:
8327   case AMDGPU::BI__builtin_amdgcn_div_fixuph:
8328     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
8329   case AMDGPU::BI__builtin_amdgcn_trig_preop:
8330   case AMDGPU::BI__builtin_amdgcn_trig_preopf:
8331     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
8332   case AMDGPU::BI__builtin_amdgcn_rcp:
8333   case AMDGPU::BI__builtin_amdgcn_rcpf:
8334   case AMDGPU::BI__builtin_amdgcn_rcph:
8335     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
8336   case AMDGPU::BI__builtin_amdgcn_rsq:
8337   case AMDGPU::BI__builtin_amdgcn_rsqf:
8338   case AMDGPU::BI__builtin_amdgcn_rsqh:
8339     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
8340   case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
8341   case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
8342     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
8343   case AMDGPU::BI__builtin_amdgcn_sinf:
8344   case AMDGPU::BI__builtin_amdgcn_sinh:
8345     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
8346   case AMDGPU::BI__builtin_amdgcn_cosf:
8347   case AMDGPU::BI__builtin_amdgcn_cosh:
8348     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
8349   case AMDGPU::BI__builtin_amdgcn_log_clampf:
8350     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
8351   case AMDGPU::BI__builtin_amdgcn_ldexp:
8352   case AMDGPU::BI__builtin_amdgcn_ldexpf:
8353   case AMDGPU::BI__builtin_amdgcn_ldexph:
8354     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
8355   case AMDGPU::BI__builtin_amdgcn_frexp_mant:
8356   case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
8357   case AMDGPU::BI__builtin_amdgcn_frexp_manth:
8358     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
8359   case AMDGPU::BI__builtin_amdgcn_frexp_exp:
8360   case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
8361     Value *Src0 = EmitScalarExpr(E->getArg(0));
8362     Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
8363                                 { Builder.getInt32Ty(), Src0->getType() });
8364     return Builder.CreateCall(F, Src0);
8365   }
8366   case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
8367     Value *Src0 = EmitScalarExpr(E->getArg(0));
8368     Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
8369                                 { Builder.getInt16Ty(), Src0->getType() });
8370     return Builder.CreateCall(F, Src0);
8371   }
8372   case AMDGPU::BI__builtin_amdgcn_fract:
8373   case AMDGPU::BI__builtin_amdgcn_fractf:
8374   case AMDGPU::BI__builtin_amdgcn_fracth:
8375     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
8376   case AMDGPU::BI__builtin_amdgcn_lerp:
8377     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
8378   case AMDGPU::BI__builtin_amdgcn_uicmp:
8379   case AMDGPU::BI__builtin_amdgcn_uicmpl:
8380   case AMDGPU::BI__builtin_amdgcn_sicmp:
8381   case AMDGPU::BI__builtin_amdgcn_sicmpl:
8382     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_icmp);
8383   case AMDGPU::BI__builtin_amdgcn_fcmp:
8384   case AMDGPU::BI__builtin_amdgcn_fcmpf:
8385     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fcmp);
8386   case AMDGPU::BI__builtin_amdgcn_class:
8387   case AMDGPU::BI__builtin_amdgcn_classf:
8388   case AMDGPU::BI__builtin_amdgcn_classh:
8389     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
8390
8391   case AMDGPU::BI__builtin_amdgcn_read_exec: {
8392     CallInst *CI = cast<CallInst>(
8393       EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec"));
8394     CI->setConvergent();
8395     return CI;
8396   }
8397
8398   // amdgcn workitem
8399   case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
8400     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
8401   case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
8402     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
8403   case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
8404     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
8405
8406   // r600 intrinsics
8407   case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
8408   case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
8409     return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee);
8410   case AMDGPU::BI__builtin_r600_read_tidig_x:
8411     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
8412   case AMDGPU::BI__builtin_r600_read_tidig_y:
8413     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
8414   case AMDGPU::BI__builtin_r600_read_tidig_z:
8415     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
8416   default:
8417     return nullptr;
8418   }
8419 }
8420
8421 /// Handle a SystemZ function in which the final argument is a pointer
8422 /// to an int that receives the post-instruction CC value.  At the LLVM level
8423 /// this is represented as a function that returns a {result, cc} pair.
8424 static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF,
8425                                          unsigned IntrinsicID,
8426                                          const CallExpr *E) {
8427   unsigned NumArgs = E->getNumArgs() - 1;
8428   SmallVector<Value *, 8> Args(NumArgs);
8429   for (unsigned I = 0; I < NumArgs; ++I)
8430     Args[I] = CGF.EmitScalarExpr(E->getArg(I));
8431   Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
8432   Value *F = CGF.CGM.getIntrinsic(IntrinsicID);
8433   Value *Call = CGF.Builder.CreateCall(F, Args);
8434   Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
8435   CGF.Builder.CreateStore(CC, CCPtr);
8436   return CGF.Builder.CreateExtractValue(Call, 0);
8437 }
8438
8439 Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
8440                                                const CallExpr *E) {
8441   switch (BuiltinID) {
8442   case SystemZ::BI__builtin_tbegin: {
8443     Value *TDB = EmitScalarExpr(E->getArg(0));
8444     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
8445     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
8446     return Builder.CreateCall(F, {TDB, Control});
8447   }
8448   case SystemZ::BI__builtin_tbegin_nofloat: {
8449     Value *TDB = EmitScalarExpr(E->getArg(0));
8450     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
8451     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
8452     return Builder.CreateCall(F, {TDB, Control});
8453   }
8454   case SystemZ::BI__builtin_tbeginc: {
8455     Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
8456     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
8457     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
8458     return Builder.CreateCall(F, {TDB, Control});
8459   }
8460   case SystemZ::BI__builtin_tabort: {
8461     Value *Data = EmitScalarExpr(E->getArg(0));
8462     Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
8463     return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
8464   }
8465   case SystemZ::BI__builtin_non_tx_store: {
8466     Value *Address = EmitScalarExpr(E->getArg(0));
8467     Value *Data = EmitScalarExpr(E->getArg(1));
8468     Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
8469     return Builder.CreateCall(F, {Data, Address});
8470   }
8471
8472   // Vector builtins.  Note that most vector builtins are mapped automatically
8473   // to target-specific LLVM intrinsics.  The ones handled specially here can
8474   // be represented via standard LLVM IR, which is preferable to enable common
8475   // LLVM optimizations.
8476
8477   case SystemZ::BI__builtin_s390_vpopctb:
8478   case SystemZ::BI__builtin_s390_vpopcth:
8479   case SystemZ::BI__builtin_s390_vpopctf:
8480   case SystemZ::BI__builtin_s390_vpopctg: {
8481     llvm::Type *ResultType = ConvertType(E->getType());
8482     Value *X = EmitScalarExpr(E->getArg(0));
8483     Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
8484     return Builder.CreateCall(F, X);
8485   }
8486
8487   case SystemZ::BI__builtin_s390_vclzb:
8488   case SystemZ::BI__builtin_s390_vclzh:
8489   case SystemZ::BI__builtin_s390_vclzf:
8490   case SystemZ::BI__builtin_s390_vclzg: {
8491     llvm::Type *ResultType = ConvertType(E->getType());
8492     Value *X = EmitScalarExpr(E->getArg(0));
8493     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8494     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
8495     return Builder.CreateCall(F, {X, Undef});
8496   }
8497
8498   case SystemZ::BI__builtin_s390_vctzb:
8499   case SystemZ::BI__builtin_s390_vctzh:
8500   case SystemZ::BI__builtin_s390_vctzf:
8501   case SystemZ::BI__builtin_s390_vctzg: {
8502     llvm::Type *ResultType = ConvertType(E->getType());
8503     Value *X = EmitScalarExpr(E->getArg(0));
8504     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8505     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
8506     return Builder.CreateCall(F, {X, Undef});
8507   }
8508
8509   case SystemZ::BI__builtin_s390_vfsqdb: {
8510     llvm::Type *ResultType = ConvertType(E->getType());
8511     Value *X = EmitScalarExpr(E->getArg(0));
8512     Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
8513     return Builder.CreateCall(F, X);
8514   }
8515   case SystemZ::BI__builtin_s390_vfmadb: {
8516     llvm::Type *ResultType = ConvertType(E->getType());
8517     Value *X = EmitScalarExpr(E->getArg(0));
8518     Value *Y = EmitScalarExpr(E->getArg(1));
8519     Value *Z = EmitScalarExpr(E->getArg(2));
8520     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
8521     return Builder.CreateCall(F, {X, Y, Z});
8522   }
8523   case SystemZ::BI__builtin_s390_vfmsdb: {
8524     llvm::Type *ResultType = ConvertType(E->getType());
8525     Value *X = EmitScalarExpr(E->getArg(0));
8526     Value *Y = EmitScalarExpr(E->getArg(1));
8527     Value *Z = EmitScalarExpr(E->getArg(2));
8528     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
8529     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
8530     return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
8531   }
8532   case SystemZ::BI__builtin_s390_vflpdb: {
8533     llvm::Type *ResultType = ConvertType(E->getType());
8534     Value *X = EmitScalarExpr(E->getArg(0));
8535     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
8536     return Builder.CreateCall(F, X);
8537   }
8538   case SystemZ::BI__builtin_s390_vflndb: {
8539     llvm::Type *ResultType = ConvertType(E->getType());
8540     Value *X = EmitScalarExpr(E->getArg(0));
8541     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
8542     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
8543     return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub");
8544   }
8545   case SystemZ::BI__builtin_s390_vfidb: {
8546     llvm::Type *ResultType = ConvertType(E->getType());
8547     Value *X = EmitScalarExpr(E->getArg(0));
8548     // Constant-fold the M4 and M5 mask arguments.
8549     llvm::APSInt M4, M5;
8550     bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext());
8551     bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext());
8552     assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?");
8553     (void)IsConstM4; (void)IsConstM5;
8554     // Check whether this instance of vfidb can be represented via a LLVM
8555     // standard intrinsic.  We only support some combinations of M4 and M5.
8556     Intrinsic::ID ID = Intrinsic::not_intrinsic;
8557     switch (M4.getZExtValue()) {
8558     default: break;
8559     case 0:  // IEEE-inexact exception allowed
8560       switch (M5.getZExtValue()) {
8561       default: break;
8562       case 0: ID = Intrinsic::rint; break;
8563       }
8564       break;
8565     case 4:  // IEEE-inexact exception suppressed
8566       switch (M5.getZExtValue()) {
8567       default: break;
8568       case 0: ID = Intrinsic::nearbyint; break;
8569       case 1: ID = Intrinsic::round; break;
8570       case 5: ID = Intrinsic::trunc; break;
8571       case 6: ID = Intrinsic::ceil; break;
8572       case 7: ID = Intrinsic::floor; break;
8573       }
8574       break;
8575     }
8576     if (ID != Intrinsic::not_intrinsic) {
8577       Function *F = CGM.getIntrinsic(ID, ResultType);
8578       return Builder.CreateCall(F, X);
8579     }
8580     Function *F = CGM.getIntrinsic(Intrinsic::s390_vfidb);
8581     Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
8582     Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
8583     return Builder.CreateCall(F, {X, M4Value, M5Value});
8584   }
8585
8586   // Vector intrisincs that output the post-instruction CC value.
8587
8588 #define INTRINSIC_WITH_CC(NAME) \
8589     case SystemZ::BI__builtin_##NAME: \
8590       return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
8591
8592   INTRINSIC_WITH_CC(s390_vpkshs);
8593   INTRINSIC_WITH_CC(s390_vpksfs);
8594   INTRINSIC_WITH_CC(s390_vpksgs);
8595
8596   INTRINSIC_WITH_CC(s390_vpklshs);
8597   INTRINSIC_WITH_CC(s390_vpklsfs);
8598   INTRINSIC_WITH_CC(s390_vpklsgs);
8599
8600   INTRINSIC_WITH_CC(s390_vceqbs);
8601   INTRINSIC_WITH_CC(s390_vceqhs);
8602   INTRINSIC_WITH_CC(s390_vceqfs);
8603   INTRINSIC_WITH_CC(s390_vceqgs);
8604
8605   INTRINSIC_WITH_CC(s390_vchbs);
8606   INTRINSIC_WITH_CC(s390_vchhs);
8607   INTRINSIC_WITH_CC(s390_vchfs);
8608   INTRINSIC_WITH_CC(s390_vchgs);
8609
8610   INTRINSIC_WITH_CC(s390_vchlbs);
8611   INTRINSIC_WITH_CC(s390_vchlhs);
8612   INTRINSIC_WITH_CC(s390_vchlfs);
8613   INTRINSIC_WITH_CC(s390_vchlgs);
8614
8615   INTRINSIC_WITH_CC(s390_vfaebs);
8616   INTRINSIC_WITH_CC(s390_vfaehs);
8617   INTRINSIC_WITH_CC(s390_vfaefs);
8618
8619   INTRINSIC_WITH_CC(s390_vfaezbs);
8620   INTRINSIC_WITH_CC(s390_vfaezhs);
8621   INTRINSIC_WITH_CC(s390_vfaezfs);
8622
8623   INTRINSIC_WITH_CC(s390_vfeebs);
8624   INTRINSIC_WITH_CC(s390_vfeehs);
8625   INTRINSIC_WITH_CC(s390_vfeefs);
8626
8627   INTRINSIC_WITH_CC(s390_vfeezbs);
8628   INTRINSIC_WITH_CC(s390_vfeezhs);
8629   INTRINSIC_WITH_CC(s390_vfeezfs);
8630
8631   INTRINSIC_WITH_CC(s390_vfenebs);
8632   INTRINSIC_WITH_CC(s390_vfenehs);
8633   INTRINSIC_WITH_CC(s390_vfenefs);
8634
8635   INTRINSIC_WITH_CC(s390_vfenezbs);
8636   INTRINSIC_WITH_CC(s390_vfenezhs);
8637   INTRINSIC_WITH_CC(s390_vfenezfs);
8638
8639   INTRINSIC_WITH_CC(s390_vistrbs);
8640   INTRINSIC_WITH_CC(s390_vistrhs);
8641   INTRINSIC_WITH_CC(s390_vistrfs);
8642
8643   INTRINSIC_WITH_CC(s390_vstrcbs);
8644   INTRINSIC_WITH_CC(s390_vstrchs);
8645   INTRINSIC_WITH_CC(s390_vstrcfs);
8646
8647   INTRINSIC_WITH_CC(s390_vstrczbs);
8648   INTRINSIC_WITH_CC(s390_vstrczhs);
8649   INTRINSIC_WITH_CC(s390_vstrczfs);
8650
8651   INTRINSIC_WITH_CC(s390_vfcedbs);
8652   INTRINSIC_WITH_CC(s390_vfchdbs);
8653   INTRINSIC_WITH_CC(s390_vfchedbs);
8654
8655   INTRINSIC_WITH_CC(s390_vftcidb);
8656
8657 #undef INTRINSIC_WITH_CC
8658
8659   default:
8660     return nullptr;
8661   }
8662 }
8663
8664 Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
8665                                              const CallExpr *E) {
8666   auto MakeLdg = [&](unsigned IntrinsicID) {
8667     Value *Ptr = EmitScalarExpr(E->getArg(0));
8668     AlignmentSource AlignSource;
8669     clang::CharUnits Align =
8670         getNaturalPointeeTypeAlignment(E->getArg(0)->getType(), &AlignSource);
8671     return Builder.CreateCall(
8672         CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
8673                                        Ptr->getType()}),
8674         {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())});
8675   };
8676   auto MakeScopedAtomic = [&](unsigned IntrinsicID) {
8677     Value *Ptr = EmitScalarExpr(E->getArg(0));
8678     return Builder.CreateCall(
8679         CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
8680                                        Ptr->getType()}),
8681         {Ptr, EmitScalarExpr(E->getArg(1))});
8682   };
8683   switch (BuiltinID) {
8684   case NVPTX::BI__nvvm_atom_add_gen_i:
8685   case NVPTX::BI__nvvm_atom_add_gen_l:
8686   case NVPTX::BI__nvvm_atom_add_gen_ll:
8687     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
8688
8689   case NVPTX::BI__nvvm_atom_sub_gen_i:
8690   case NVPTX::BI__nvvm_atom_sub_gen_l:
8691   case NVPTX::BI__nvvm_atom_sub_gen_ll:
8692     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
8693
8694   case NVPTX::BI__nvvm_atom_and_gen_i:
8695   case NVPTX::BI__nvvm_atom_and_gen_l:
8696   case NVPTX::BI__nvvm_atom_and_gen_ll:
8697     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
8698
8699   case NVPTX::BI__nvvm_atom_or_gen_i:
8700   case NVPTX::BI__nvvm_atom_or_gen_l:
8701   case NVPTX::BI__nvvm_atom_or_gen_ll:
8702     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
8703
8704   case NVPTX::BI__nvvm_atom_xor_gen_i:
8705   case NVPTX::BI__nvvm_atom_xor_gen_l:
8706   case NVPTX::BI__nvvm_atom_xor_gen_ll:
8707     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
8708
8709   case NVPTX::BI__nvvm_atom_xchg_gen_i:
8710   case NVPTX::BI__nvvm_atom_xchg_gen_l:
8711   case NVPTX::BI__nvvm_atom_xchg_gen_ll:
8712     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
8713
8714   case NVPTX::BI__nvvm_atom_max_gen_i:
8715   case NVPTX::BI__nvvm_atom_max_gen_l:
8716   case NVPTX::BI__nvvm_atom_max_gen_ll:
8717     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
8718
8719   case NVPTX::BI__nvvm_atom_max_gen_ui:
8720   case NVPTX::BI__nvvm_atom_max_gen_ul:
8721   case NVPTX::BI__nvvm_atom_max_gen_ull:
8722     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
8723
8724   case NVPTX::BI__nvvm_atom_min_gen_i:
8725   case NVPTX::BI__nvvm_atom_min_gen_l:
8726   case NVPTX::BI__nvvm_atom_min_gen_ll:
8727     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
8728
8729   case NVPTX::BI__nvvm_atom_min_gen_ui:
8730   case NVPTX::BI__nvvm_atom_min_gen_ul:
8731   case NVPTX::BI__nvvm_atom_min_gen_ull:
8732     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
8733
8734   case NVPTX::BI__nvvm_atom_cas_gen_i:
8735   case NVPTX::BI__nvvm_atom_cas_gen_l:
8736   case NVPTX::BI__nvvm_atom_cas_gen_ll:
8737     // __nvvm_atom_cas_gen_* should return the old value rather than the
8738     // success flag.
8739     return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
8740
8741   case NVPTX::BI__nvvm_atom_add_gen_f: {
8742     Value *Ptr = EmitScalarExpr(E->getArg(0));
8743     Value *Val = EmitScalarExpr(E->getArg(1));
8744     // atomicrmw only deals with integer arguments so we need to use
8745     // LLVM's nvvm_atomic_load_add_f32 intrinsic for that.
8746     Value *FnALAF32 =
8747         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType());
8748     return Builder.CreateCall(FnALAF32, {Ptr, Val});
8749   }
8750
8751   case NVPTX::BI__nvvm_atom_inc_gen_ui: {
8752     Value *Ptr = EmitScalarExpr(E->getArg(0));
8753     Value *Val = EmitScalarExpr(E->getArg(1));
8754     Value *FnALI32 =
8755         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
8756     return Builder.CreateCall(FnALI32, {Ptr, Val});
8757   }
8758
8759   case NVPTX::BI__nvvm_atom_dec_gen_ui: {
8760     Value *Ptr = EmitScalarExpr(E->getArg(0));
8761     Value *Val = EmitScalarExpr(E->getArg(1));
8762     Value *FnALD32 =
8763         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
8764     return Builder.CreateCall(FnALD32, {Ptr, Val});
8765   }
8766
8767   case NVPTX::BI__nvvm_ldg_c:
8768   case NVPTX::BI__nvvm_ldg_c2:
8769   case NVPTX::BI__nvvm_ldg_c4:
8770   case NVPTX::BI__nvvm_ldg_s:
8771   case NVPTX::BI__nvvm_ldg_s2:
8772   case NVPTX::BI__nvvm_ldg_s4:
8773   case NVPTX::BI__nvvm_ldg_i:
8774   case NVPTX::BI__nvvm_ldg_i2:
8775   case NVPTX::BI__nvvm_ldg_i4:
8776   case NVPTX::BI__nvvm_ldg_l:
8777   case NVPTX::BI__nvvm_ldg_ll:
8778   case NVPTX::BI__nvvm_ldg_ll2:
8779   case NVPTX::BI__nvvm_ldg_uc:
8780   case NVPTX::BI__nvvm_ldg_uc2:
8781   case NVPTX::BI__nvvm_ldg_uc4:
8782   case NVPTX::BI__nvvm_ldg_us:
8783   case NVPTX::BI__nvvm_ldg_us2:
8784   case NVPTX::BI__nvvm_ldg_us4:
8785   case NVPTX::BI__nvvm_ldg_ui:
8786   case NVPTX::BI__nvvm_ldg_ui2:
8787   case NVPTX::BI__nvvm_ldg_ui4:
8788   case NVPTX::BI__nvvm_ldg_ul:
8789   case NVPTX::BI__nvvm_ldg_ull:
8790   case NVPTX::BI__nvvm_ldg_ull2:
8791     // PTX Interoperability section 2.2: "For a vector with an even number of
8792     // elements, its alignment is set to number of elements times the alignment
8793     // of its member: n*alignof(t)."
8794     return MakeLdg(Intrinsic::nvvm_ldg_global_i);
8795   case NVPTX::BI__nvvm_ldg_f:
8796   case NVPTX::BI__nvvm_ldg_f2:
8797   case NVPTX::BI__nvvm_ldg_f4:
8798   case NVPTX::BI__nvvm_ldg_d:
8799   case NVPTX::BI__nvvm_ldg_d2:
8800     return MakeLdg(Intrinsic::nvvm_ldg_global_f);
8801
8802   case NVPTX::BI__nvvm_atom_cta_add_gen_i:
8803   case NVPTX::BI__nvvm_atom_cta_add_gen_l:
8804   case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
8805     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta);
8806   case NVPTX::BI__nvvm_atom_sys_add_gen_i:
8807   case NVPTX::BI__nvvm_atom_sys_add_gen_l:
8808   case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
8809     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys);
8810   case NVPTX::BI__nvvm_atom_cta_add_gen_f:
8811   case NVPTX::BI__nvvm_atom_cta_add_gen_d:
8812     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta);
8813   case NVPTX::BI__nvvm_atom_sys_add_gen_f:
8814   case NVPTX::BI__nvvm_atom_sys_add_gen_d:
8815     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys);
8816   case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
8817   case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
8818   case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
8819     return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta);
8820   case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
8821   case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
8822   case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
8823     return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys);
8824   case NVPTX::BI__nvvm_atom_cta_max_gen_i:
8825   case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
8826   case NVPTX::BI__nvvm_atom_cta_max_gen_l:
8827   case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
8828   case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
8829   case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
8830     return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta);
8831   case NVPTX::BI__nvvm_atom_sys_max_gen_i:
8832   case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
8833   case NVPTX::BI__nvvm_atom_sys_max_gen_l:
8834   case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
8835   case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
8836   case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
8837     return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys);
8838   case NVPTX::BI__nvvm_atom_cta_min_gen_i:
8839   case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
8840   case NVPTX::BI__nvvm_atom_cta_min_gen_l:
8841   case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
8842   case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
8843   case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
8844     return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta);
8845   case NVPTX::BI__nvvm_atom_sys_min_gen_i:
8846   case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
8847   case NVPTX::BI__nvvm_atom_sys_min_gen_l:
8848   case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
8849   case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
8850   case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
8851     return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys);
8852   case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
8853     return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta);
8854   case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
8855     return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta);
8856   case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
8857     return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys);
8858   case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
8859     return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys);
8860   case NVPTX::BI__nvvm_atom_cta_and_gen_i:
8861   case NVPTX::BI__nvvm_atom_cta_and_gen_l:
8862   case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
8863     return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta);
8864   case NVPTX::BI__nvvm_atom_sys_and_gen_i:
8865   case NVPTX::BI__nvvm_atom_sys_and_gen_l:
8866   case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
8867     return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys);
8868   case NVPTX::BI__nvvm_atom_cta_or_gen_i:
8869   case NVPTX::BI__nvvm_atom_cta_or_gen_l:
8870   case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
8871     return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta);
8872   case NVPTX::BI__nvvm_atom_sys_or_gen_i:
8873   case NVPTX::BI__nvvm_atom_sys_or_gen_l:
8874   case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
8875     return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys);
8876   case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
8877   case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
8878   case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
8879     return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta);
8880   case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
8881   case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
8882   case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
8883     return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys);
8884   case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
8885   case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
8886   case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
8887     Value *Ptr = EmitScalarExpr(E->getArg(0));
8888     return Builder.CreateCall(
8889         CGM.getIntrinsic(
8890             Intrinsic::nvvm_atomic_cas_gen_i_cta,
8891             {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
8892         {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
8893   }
8894   case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
8895   case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
8896   case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
8897     Value *Ptr = EmitScalarExpr(E->getArg(0));
8898     return Builder.CreateCall(
8899         CGM.getIntrinsic(
8900             Intrinsic::nvvm_atomic_cas_gen_i_sys,
8901             {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
8902         {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
8903   }
8904   default:
8905     return nullptr;
8906   }
8907 }
8908
8909 Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
8910                                                    const CallExpr *E) {
8911   switch (BuiltinID) {
8912   case WebAssembly::BI__builtin_wasm_current_memory: {
8913     llvm::Type *ResultType = ConvertType(E->getType());
8914     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType);
8915     return Builder.CreateCall(Callee);
8916   }
8917   case WebAssembly::BI__builtin_wasm_grow_memory: {
8918     Value *X = EmitScalarExpr(E->getArg(0));
8919     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType());
8920     return Builder.CreateCall(Callee, X);
8921   }
8922
8923   default:
8924     return nullptr;
8925   }
8926 }