]> CyberLeo.Net >> Repos - FreeBSD/releng/9.0.git/blob - contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp
Copy stable/9 to releng/9.0 as part of the FreeBSD 9.0-RELEASE release
[FreeBSD/releng/9.0.git] / contrib / llvm / tools / clang / lib / CodeGen / CGBuiltin.cpp
1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit Builtin calls as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "TargetInfo.h"
15 #include "CodeGenFunction.h"
16 #include "CodeGenModule.h"
17 #include "CGObjCRuntime.h"
18 #include "clang/Basic/TargetInfo.h"
19 #include "clang/AST/APValue.h"
20 #include "clang/AST/ASTContext.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/Basic/TargetBuiltins.h"
23 #include "llvm/Intrinsics.h"
24 #include "llvm/Target/TargetData.h"
25
26 using namespace clang;
27 using namespace CodeGen;
28 using namespace llvm;
29
30 /// getBuiltinLibFunction - Given a builtin id for a function like
31 /// "__builtin_fabsf", return a Function* for "fabsf".
32 llvm::Value *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
33                                                   unsigned BuiltinID) {
34   assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
35
36   // Get the name, skip over the __builtin_ prefix (if necessary).
37   StringRef Name;
38   GlobalDecl D(FD);
39
40   // If the builtin has been declared explicitly with an assembler label,
41   // use the mangled name. This differs from the plain label on platforms
42   // that prefix labels.
43   if (FD->hasAttr<AsmLabelAttr>())
44     Name = getMangledName(D);
45   else
46     Name = Context.BuiltinInfo.GetName(BuiltinID) + 10;
47
48   llvm::FunctionType *Ty =
49     cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
50
51   return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
52 }
53
54 /// Emit the conversions required to turn the given value into an
55 /// integer of the given size.
56 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
57                         QualType T, llvm::IntegerType *IntType) {
58   V = CGF.EmitToMemory(V, T);
59
60   if (V->getType()->isPointerTy())
61     return CGF.Builder.CreatePtrToInt(V, IntType);
62
63   assert(V->getType() == IntType);
64   return V;
65 }
66
67 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
68                           QualType T, llvm::Type *ResultType) {
69   V = CGF.EmitFromMemory(V, T);
70
71   if (ResultType->isPointerTy())
72     return CGF.Builder.CreateIntToPtr(V, ResultType);
73
74   assert(V->getType() == ResultType);
75   return V;
76 }
77
78 /// Utility to insert an atomic instruction based on Instrinsic::ID
79 /// and the expression node.
80 static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
81                                llvm::AtomicRMWInst::BinOp Kind,
82                                const CallExpr *E) {
83   QualType T = E->getType();
84   assert(E->getArg(0)->getType()->isPointerType());
85   assert(CGF.getContext().hasSameUnqualifiedType(T,
86                                   E->getArg(0)->getType()->getPointeeType()));
87   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
88
89   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
90   unsigned AddrSpace =
91     cast<llvm::PointerType>(DestPtr->getType())->getAddressSpace();
92
93   llvm::IntegerType *IntType =
94     llvm::IntegerType::get(CGF.getLLVMContext(),
95                            CGF.getContext().getTypeSize(T));
96   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
97
98   llvm::Value *Args[2];
99   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
100   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
101   llvm::Type *ValueType = Args[1]->getType();
102   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
103
104   llvm::Value *Result =
105       CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1],
106                                   llvm::SequentiallyConsistent);
107   Result = EmitFromInt(CGF, Result, T, ValueType);
108   return RValue::get(Result);
109 }
110
111 /// Utility to insert an atomic instruction based Instrinsic::ID and
112 /// the expression node, where the return value is the result of the
113 /// operation.
114 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
115                                    llvm::AtomicRMWInst::BinOp Kind,
116                                    const CallExpr *E,
117                                    Instruction::BinaryOps Op) {
118   QualType T = E->getType();
119   assert(E->getArg(0)->getType()->isPointerType());
120   assert(CGF.getContext().hasSameUnqualifiedType(T,
121                                   E->getArg(0)->getType()->getPointeeType()));
122   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
123
124   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
125   unsigned AddrSpace =
126     cast<llvm::PointerType>(DestPtr->getType())->getAddressSpace();
127
128   llvm::IntegerType *IntType =
129     llvm::IntegerType::get(CGF.getLLVMContext(),
130                            CGF.getContext().getTypeSize(T));
131   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
132
133   llvm::Value *Args[2];
134   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
135   llvm::Type *ValueType = Args[1]->getType();
136   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
137   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
138
139   llvm::Value *Result =
140       CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1],
141                                   llvm::SequentiallyConsistent);
142   Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
143   Result = EmitFromInt(CGF, Result, T, ValueType);
144   return RValue::get(Result);
145 }
146
147 /// EmitFAbs - Emit a call to fabs/fabsf/fabsl, depending on the type of ValTy,
148 /// which must be a scalar floating point type.
149 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V, QualType ValTy) {
150   const BuiltinType *ValTyP = ValTy->getAs<BuiltinType>();
151   assert(ValTyP && "isn't scalar fp type!");
152   
153   StringRef FnName;
154   switch (ValTyP->getKind()) {
155   default: llvm_unreachable("Isn't a scalar fp type!");
156   case BuiltinType::Float:      FnName = "fabsf"; break;
157   case BuiltinType::Double:     FnName = "fabs"; break;
158   case BuiltinType::LongDouble: FnName = "fabsl"; break;
159   }
160   
161   // The prototype is something that takes and returns whatever V's type is.
162   llvm::FunctionType *FT = llvm::FunctionType::get(V->getType(), V->getType(),
163                                                    false);
164   llvm::Value *Fn = CGF.CGM.CreateRuntimeFunction(FT, FnName);
165
166   return CGF.Builder.CreateCall(Fn, V, "abs");
167 }
168
169 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *Fn,
170                               const CallExpr *E, llvm::Value *calleeValue) {
171   return CGF.EmitCall(E->getCallee()->getType(), calleeValue,
172                       ReturnValueSlot(), E->arg_begin(), E->arg_end(), Fn);
173 }
174
175 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
176                                         unsigned BuiltinID, const CallExpr *E) {
177   // See if we can constant fold this builtin.  If so, don't emit it at all.
178   Expr::EvalResult Result;
179   if (E->Evaluate(Result, CGM.getContext()) &&
180       !Result.hasSideEffects()) {
181     if (Result.Val.isInt())
182       return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
183                                                 Result.Val.getInt()));
184     if (Result.Val.isFloat())
185       return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
186                                                Result.Val.getFloat()));
187   }
188
189   switch (BuiltinID) {
190   default: break;  // Handle intrinsics and libm functions below.
191   case Builtin::BI__builtin___CFStringMakeConstantString:
192   case Builtin::BI__builtin___NSStringMakeConstantString:
193     return RValue::get(CGM.EmitConstantExpr(E, E->getType(), 0));
194   case Builtin::BI__builtin_stdarg_start:
195   case Builtin::BI__builtin_va_start:
196   case Builtin::BI__builtin_va_end: {
197     Value *ArgValue = EmitVAListRef(E->getArg(0));
198     llvm::Type *DestType = Int8PtrTy;
199     if (ArgValue->getType() != DestType)
200       ArgValue = Builder.CreateBitCast(ArgValue, DestType,
201                                        ArgValue->getName().data());
202
203     Intrinsic::ID inst = (BuiltinID == Builtin::BI__builtin_va_end) ?
204       Intrinsic::vaend : Intrinsic::vastart;
205     return RValue::get(Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue));
206   }
207   case Builtin::BI__builtin_va_copy: {
208     Value *DstPtr = EmitVAListRef(E->getArg(0));
209     Value *SrcPtr = EmitVAListRef(E->getArg(1));
210
211     llvm::Type *Type = Int8PtrTy;
212
213     DstPtr = Builder.CreateBitCast(DstPtr, Type);
214     SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
215     return RValue::get(Builder.CreateCall2(CGM.getIntrinsic(Intrinsic::vacopy),
216                                            DstPtr, SrcPtr));
217   }
218   case Builtin::BI__builtin_abs: {
219     Value *ArgValue = EmitScalarExpr(E->getArg(0));
220
221     Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
222     Value *CmpResult =
223     Builder.CreateICmpSGE(ArgValue,
224                           llvm::Constant::getNullValue(ArgValue->getType()),
225                                                             "abscond");
226     Value *Result =
227       Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
228
229     return RValue::get(Result);
230   }
231   case Builtin::BI__builtin_ctz:
232   case Builtin::BI__builtin_ctzl:
233   case Builtin::BI__builtin_ctzll: {
234     Value *ArgValue = EmitScalarExpr(E->getArg(0));
235
236     llvm::Type *ArgType = ArgValue->getType();
237     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
238
239     llvm::Type *ResultType = ConvertType(E->getType());
240     Value *Result = Builder.CreateCall(F, ArgValue);
241     if (Result->getType() != ResultType)
242       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
243                                      "cast");
244     return RValue::get(Result);
245   }
246   case Builtin::BI__builtin_clz:
247   case Builtin::BI__builtin_clzl:
248   case Builtin::BI__builtin_clzll: {
249     Value *ArgValue = EmitScalarExpr(E->getArg(0));
250
251     llvm::Type *ArgType = ArgValue->getType();
252     Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
253
254     llvm::Type *ResultType = ConvertType(E->getType());
255     Value *Result = Builder.CreateCall(F, ArgValue);
256     if (Result->getType() != ResultType)
257       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
258                                      "cast");
259     return RValue::get(Result);
260   }
261   case Builtin::BI__builtin_ffs:
262   case Builtin::BI__builtin_ffsl:
263   case Builtin::BI__builtin_ffsll: {
264     // ffs(x) -> x ? cttz(x) + 1 : 0
265     Value *ArgValue = EmitScalarExpr(E->getArg(0));
266
267     llvm::Type *ArgType = ArgValue->getType();
268     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
269
270     llvm::Type *ResultType = ConvertType(E->getType());
271     Value *Tmp = Builder.CreateAdd(Builder.CreateCall(F, ArgValue),
272                                    llvm::ConstantInt::get(ArgType, 1));
273     Value *Zero = llvm::Constant::getNullValue(ArgType);
274     Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
275     Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
276     if (Result->getType() != ResultType)
277       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
278                                      "cast");
279     return RValue::get(Result);
280   }
281   case Builtin::BI__builtin_parity:
282   case Builtin::BI__builtin_parityl:
283   case Builtin::BI__builtin_parityll: {
284     // parity(x) -> ctpop(x) & 1
285     Value *ArgValue = EmitScalarExpr(E->getArg(0));
286
287     llvm::Type *ArgType = ArgValue->getType();
288     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
289
290     llvm::Type *ResultType = ConvertType(E->getType());
291     Value *Tmp = Builder.CreateCall(F, ArgValue);
292     Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
293     if (Result->getType() != ResultType)
294       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
295                                      "cast");
296     return RValue::get(Result);
297   }
298   case Builtin::BI__builtin_popcount:
299   case Builtin::BI__builtin_popcountl:
300   case Builtin::BI__builtin_popcountll: {
301     Value *ArgValue = EmitScalarExpr(E->getArg(0));
302
303     llvm::Type *ArgType = ArgValue->getType();
304     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
305
306     llvm::Type *ResultType = ConvertType(E->getType());
307     Value *Result = Builder.CreateCall(F, ArgValue);
308     if (Result->getType() != ResultType)
309       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
310                                      "cast");
311     return RValue::get(Result);
312   }
313   case Builtin::BI__builtin_expect: {
314     Value *ArgValue = EmitScalarExpr(E->getArg(0));
315     llvm::Type *ArgType = ArgValue->getType();
316
317     Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
318     Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
319
320     Value *Result = Builder.CreateCall2(FnExpect, ArgValue, ExpectedValue,
321                                         "expval");
322     return RValue::get(Result);
323   }
324   case Builtin::BI__builtin_bswap32:
325   case Builtin::BI__builtin_bswap64: {
326     Value *ArgValue = EmitScalarExpr(E->getArg(0));
327     llvm::Type *ArgType = ArgValue->getType();
328     Value *F = CGM.getIntrinsic(Intrinsic::bswap, ArgType);
329     return RValue::get(Builder.CreateCall(F, ArgValue));
330   }
331   case Builtin::BI__builtin_object_size: {
332     // We pass this builtin onto the optimizer so that it can
333     // figure out the object size in more complex cases.
334     llvm::Type *ResType = ConvertType(E->getType());
335     
336     // LLVM only supports 0 and 2, make sure that we pass along that
337     // as a boolean.
338     Value *Ty = EmitScalarExpr(E->getArg(1));
339     ConstantInt *CI = dyn_cast<ConstantInt>(Ty);
340     assert(CI);
341     uint64_t val = CI->getZExtValue();
342     CI = ConstantInt::get(Builder.getInt1Ty(), (val & 0x2) >> 1);    
343     
344     Value *F = CGM.getIntrinsic(Intrinsic::objectsize, ResType);
345     return RValue::get(Builder.CreateCall2(F,
346                                            EmitScalarExpr(E->getArg(0)),
347                                            CI));
348   }
349   case Builtin::BI__builtin_prefetch: {
350     Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
351     // FIXME: Technically these constants should of type 'int', yes?
352     RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
353       llvm::ConstantInt::get(Int32Ty, 0);
354     Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
355       llvm::ConstantInt::get(Int32Ty, 3);
356     Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
357     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
358     return RValue::get(Builder.CreateCall4(F, Address, RW, Locality, Data));
359   }
360   case Builtin::BI__builtin_trap: {
361     Value *F = CGM.getIntrinsic(Intrinsic::trap);
362     return RValue::get(Builder.CreateCall(F));
363   }
364   case Builtin::BI__builtin_unreachable: {
365     if (CatchUndefined)
366       EmitBranch(getTrapBB());
367     else
368       Builder.CreateUnreachable();
369
370     // We do need to preserve an insertion point.
371     EmitBlock(createBasicBlock("unreachable.cont"));
372
373     return RValue::get(0);
374   }
375       
376   case Builtin::BI__builtin_powi:
377   case Builtin::BI__builtin_powif:
378   case Builtin::BI__builtin_powil: {
379     Value *Base = EmitScalarExpr(E->getArg(0));
380     Value *Exponent = EmitScalarExpr(E->getArg(1));
381     llvm::Type *ArgType = Base->getType();
382     Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
383     return RValue::get(Builder.CreateCall2(F, Base, Exponent));
384   }
385
386   case Builtin::BI__builtin_isgreater:
387   case Builtin::BI__builtin_isgreaterequal:
388   case Builtin::BI__builtin_isless:
389   case Builtin::BI__builtin_islessequal:
390   case Builtin::BI__builtin_islessgreater:
391   case Builtin::BI__builtin_isunordered: {
392     // Ordered comparisons: we know the arguments to these are matching scalar
393     // floating point values.
394     Value *LHS = EmitScalarExpr(E->getArg(0));
395     Value *RHS = EmitScalarExpr(E->getArg(1));
396
397     switch (BuiltinID) {
398     default: llvm_unreachable("Unknown ordered comparison");
399     case Builtin::BI__builtin_isgreater:
400       LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
401       break;
402     case Builtin::BI__builtin_isgreaterequal:
403       LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
404       break;
405     case Builtin::BI__builtin_isless:
406       LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
407       break;
408     case Builtin::BI__builtin_islessequal:
409       LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
410       break;
411     case Builtin::BI__builtin_islessgreater:
412       LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
413       break;
414     case Builtin::BI__builtin_isunordered:
415       LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
416       break;
417     }
418     // ZExt bool to int type.
419     return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
420   }
421   case Builtin::BI__builtin_isnan: {
422     Value *V = EmitScalarExpr(E->getArg(0));
423     V = Builder.CreateFCmpUNO(V, V, "cmp");
424     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
425   }
426   
427   case Builtin::BI__builtin_isinf: {
428     // isinf(x) --> fabs(x) == infinity
429     Value *V = EmitScalarExpr(E->getArg(0));
430     V = EmitFAbs(*this, V, E->getArg(0)->getType());
431     
432     V = Builder.CreateFCmpOEQ(V, ConstantFP::getInfinity(V->getType()),"isinf");
433     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
434   }
435       
436   // TODO: BI__builtin_isinf_sign
437   //   isinf_sign(x) -> isinf(x) ? (signbit(x) ? -1 : 1) : 0
438
439   case Builtin::BI__builtin_isnormal: {
440     // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
441     Value *V = EmitScalarExpr(E->getArg(0));
442     Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
443
444     Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType());
445     Value *IsLessThanInf =
446       Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
447     APFloat Smallest = APFloat::getSmallestNormalized(
448                    getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
449     Value *IsNormal =
450       Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
451                             "isnormal");
452     V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
453     V = Builder.CreateAnd(V, IsNormal, "and");
454     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
455   }
456
457   case Builtin::BI__builtin_isfinite: {
458     // isfinite(x) --> x == x && fabs(x) != infinity;
459     Value *V = EmitScalarExpr(E->getArg(0));
460     Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
461     
462     Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType());
463     Value *IsNotInf =
464       Builder.CreateFCmpUNE(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
465     
466     V = Builder.CreateAnd(Eq, IsNotInf, "and");
467     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
468   }
469
470   case Builtin::BI__builtin_fpclassify: {
471     Value *V = EmitScalarExpr(E->getArg(5));
472     llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
473
474     // Create Result
475     BasicBlock *Begin = Builder.GetInsertBlock();
476     BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
477     Builder.SetInsertPoint(End);
478     PHINode *Result =
479       Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
480                         "fpclassify_result");
481
482     // if (V==0) return FP_ZERO
483     Builder.SetInsertPoint(Begin);
484     Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
485                                           "iszero");
486     Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
487     BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
488     Builder.CreateCondBr(IsZero, End, NotZero);
489     Result->addIncoming(ZeroLiteral, Begin);
490
491     // if (V != V) return FP_NAN
492     Builder.SetInsertPoint(NotZero);
493     Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
494     Value *NanLiteral = EmitScalarExpr(E->getArg(0));
495     BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
496     Builder.CreateCondBr(IsNan, End, NotNan);
497     Result->addIncoming(NanLiteral, NotZero);
498
499     // if (fabs(V) == infinity) return FP_INFINITY
500     Builder.SetInsertPoint(NotNan);
501     Value *VAbs = EmitFAbs(*this, V, E->getArg(5)->getType());
502     Value *IsInf =
503       Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
504                             "isinf");
505     Value *InfLiteral = EmitScalarExpr(E->getArg(1));
506     BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
507     Builder.CreateCondBr(IsInf, End, NotInf);
508     Result->addIncoming(InfLiteral, NotNan);
509
510     // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
511     Builder.SetInsertPoint(NotInf);
512     APFloat Smallest = APFloat::getSmallestNormalized(
513         getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
514     Value *IsNormal =
515       Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
516                             "isnormal");
517     Value *NormalResult =
518       Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
519                            EmitScalarExpr(E->getArg(3)));
520     Builder.CreateBr(End);
521     Result->addIncoming(NormalResult, NotInf);
522
523     // return Result
524     Builder.SetInsertPoint(End);
525     return RValue::get(Result);
526   }
527       
528   case Builtin::BIalloca:
529   case Builtin::BI__builtin_alloca: {
530     Value *Size = EmitScalarExpr(E->getArg(0));
531     return RValue::get(Builder.CreateAlloca(Builder.getInt8Ty(), Size));
532   }
533   case Builtin::BIbzero:
534   case Builtin::BI__builtin_bzero: {
535     Value *Address = EmitScalarExpr(E->getArg(0));
536     Value *SizeVal = EmitScalarExpr(E->getArg(1));
537     Builder.CreateMemSet(Address, Builder.getInt8(0), SizeVal, 1, false);
538     return RValue::get(Address);
539   }
540   case Builtin::BImemcpy:
541   case Builtin::BI__builtin_memcpy: {
542     Value *Address = EmitScalarExpr(E->getArg(0));
543     Value *SrcAddr = EmitScalarExpr(E->getArg(1));
544     Value *SizeVal = EmitScalarExpr(E->getArg(2));
545     Builder.CreateMemCpy(Address, SrcAddr, SizeVal, 1, false);
546     return RValue::get(Address);
547   }
548       
549   case Builtin::BI__builtin___memcpy_chk: {
550     // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
551     llvm::APSInt Size, DstSize;
552     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
553         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
554       break;
555     if (Size.ugt(DstSize))
556       break;
557     Value *Dest = EmitScalarExpr(E->getArg(0));
558     Value *Src = EmitScalarExpr(E->getArg(1));
559     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
560     Builder.CreateMemCpy(Dest, Src, SizeVal, 1, false);
561     return RValue::get(Dest);
562   }
563       
564   case Builtin::BI__builtin_objc_memmove_collectable: {
565     Value *Address = EmitScalarExpr(E->getArg(0));
566     Value *SrcAddr = EmitScalarExpr(E->getArg(1));
567     Value *SizeVal = EmitScalarExpr(E->getArg(2));
568     CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this, 
569                                                   Address, SrcAddr, SizeVal);
570     return RValue::get(Address);
571   }
572
573   case Builtin::BI__builtin___memmove_chk: {
574     // fold __builtin_memmove_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
575     llvm::APSInt Size, DstSize;
576     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
577         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
578       break;
579     if (Size.ugt(DstSize))
580       break;
581     Value *Dest = EmitScalarExpr(E->getArg(0));
582     Value *Src = EmitScalarExpr(E->getArg(1));
583     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
584     Builder.CreateMemMove(Dest, Src, SizeVal, 1, false);
585     return RValue::get(Dest);
586   }
587
588   case Builtin::BImemmove:
589   case Builtin::BI__builtin_memmove: {
590     Value *Address = EmitScalarExpr(E->getArg(0));
591     Value *SrcAddr = EmitScalarExpr(E->getArg(1));
592     Value *SizeVal = EmitScalarExpr(E->getArg(2));
593     Builder.CreateMemMove(Address, SrcAddr, SizeVal, 1, false);
594     return RValue::get(Address);
595   }
596   case Builtin::BImemset:
597   case Builtin::BI__builtin_memset: {
598     Value *Address = EmitScalarExpr(E->getArg(0));
599     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
600                                          Builder.getInt8Ty());
601     Value *SizeVal = EmitScalarExpr(E->getArg(2));
602     Builder.CreateMemSet(Address, ByteVal, SizeVal, 1, false);
603     return RValue::get(Address);
604   }
605   case Builtin::BI__builtin___memset_chk: {
606     // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
607     llvm::APSInt Size, DstSize;
608     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
609         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
610       break;
611     if (Size.ugt(DstSize))
612       break;
613     Value *Address = EmitScalarExpr(E->getArg(0));
614     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
615                                          Builder.getInt8Ty());
616     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
617     Builder.CreateMemSet(Address, ByteVal, SizeVal, 1, false);
618     
619     return RValue::get(Address);
620   }
621   case Builtin::BI__builtin_dwarf_cfa: {
622     // The offset in bytes from the first argument to the CFA.
623     //
624     // Why on earth is this in the frontend?  Is there any reason at
625     // all that the backend can't reasonably determine this while
626     // lowering llvm.eh.dwarf.cfa()?
627     //
628     // TODO: If there's a satisfactory reason, add a target hook for
629     // this instead of hard-coding 0, which is correct for most targets.
630     int32_t Offset = 0;
631
632     Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
633     return RValue::get(Builder.CreateCall(F, 
634                                       llvm::ConstantInt::get(Int32Ty, Offset)));
635   }
636   case Builtin::BI__builtin_return_address: {
637     Value *Depth = EmitScalarExpr(E->getArg(0));
638     Depth = Builder.CreateIntCast(Depth, Int32Ty, false);
639     Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
640     return RValue::get(Builder.CreateCall(F, Depth));
641   }
642   case Builtin::BI__builtin_frame_address: {
643     Value *Depth = EmitScalarExpr(E->getArg(0));
644     Depth = Builder.CreateIntCast(Depth, Int32Ty, false);
645     Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
646     return RValue::get(Builder.CreateCall(F, Depth));
647   }
648   case Builtin::BI__builtin_extract_return_addr: {
649     Value *Address = EmitScalarExpr(E->getArg(0));
650     Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
651     return RValue::get(Result);
652   }
653   case Builtin::BI__builtin_frob_return_addr: {
654     Value *Address = EmitScalarExpr(E->getArg(0));
655     Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
656     return RValue::get(Result);
657   }
658   case Builtin::BI__builtin_dwarf_sp_column: {
659     llvm::IntegerType *Ty
660       = cast<llvm::IntegerType>(ConvertType(E->getType()));
661     int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
662     if (Column == -1) {
663       CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
664       return RValue::get(llvm::UndefValue::get(Ty));
665     }
666     return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
667   }
668   case Builtin::BI__builtin_init_dwarf_reg_size_table: {
669     Value *Address = EmitScalarExpr(E->getArg(0));
670     if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
671       CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
672     return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
673   }
674   case Builtin::BI__builtin_eh_return: {
675     Value *Int = EmitScalarExpr(E->getArg(0));
676     Value *Ptr = EmitScalarExpr(E->getArg(1));
677
678     llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
679     assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
680            "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
681     Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
682                                   ? Intrinsic::eh_return_i32
683                                   : Intrinsic::eh_return_i64);
684     Builder.CreateCall2(F, Int, Ptr);
685     Builder.CreateUnreachable();
686
687     // We do need to preserve an insertion point.
688     EmitBlock(createBasicBlock("builtin_eh_return.cont"));
689
690     return RValue::get(0);
691   }
692   case Builtin::BI__builtin_unwind_init: {
693     Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
694     return RValue::get(Builder.CreateCall(F));
695   }
696   case Builtin::BI__builtin_extend_pointer: {
697     // Extends a pointer to the size of an _Unwind_Word, which is
698     // uint64_t on all platforms.  Generally this gets poked into a
699     // register and eventually used as an address, so if the
700     // addressing registers are wider than pointers and the platform
701     // doesn't implicitly ignore high-order bits when doing
702     // addressing, we need to make sure we zext / sext based on
703     // the platform's expectations.
704     //
705     // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
706
707     // Cast the pointer to intptr_t.
708     Value *Ptr = EmitScalarExpr(E->getArg(0));
709     Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
710
711     // If that's 64 bits, we're done.
712     if (IntPtrTy->getBitWidth() == 64)
713       return RValue::get(Result);
714
715     // Otherwise, ask the codegen data what to do.
716     if (getTargetHooks().extendPointerWithSExt())
717       return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
718     else
719       return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
720   }
721   case Builtin::BI__builtin_setjmp: {
722     // Buffer is a void**.
723     Value *Buf = EmitScalarExpr(E->getArg(0));
724
725     // Store the frame pointer to the setjmp buffer.
726     Value *FrameAddr =
727       Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
728                          ConstantInt::get(Int32Ty, 0));
729     Builder.CreateStore(FrameAddr, Buf);
730
731     // Store the stack pointer to the setjmp buffer.
732     Value *StackAddr =
733       Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
734     Value *StackSaveSlot =
735       Builder.CreateGEP(Buf, ConstantInt::get(Int32Ty, 2));
736     Builder.CreateStore(StackAddr, StackSaveSlot);
737
738     // Call LLVM's EH setjmp, which is lightweight.
739     Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
740     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
741     return RValue::get(Builder.CreateCall(F, Buf));
742   }
743   case Builtin::BI__builtin_longjmp: {
744     Value *Buf = EmitScalarExpr(E->getArg(0));
745     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
746
747     // Call LLVM's EH longjmp, which is lightweight.
748     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
749
750     // longjmp doesn't return; mark this as unreachable.
751     Builder.CreateUnreachable();
752
753     // We do need to preserve an insertion point.
754     EmitBlock(createBasicBlock("longjmp.cont"));
755
756     return RValue::get(0);
757   }
758   case Builtin::BI__sync_fetch_and_add:
759   case Builtin::BI__sync_fetch_and_sub:
760   case Builtin::BI__sync_fetch_and_or:
761   case Builtin::BI__sync_fetch_and_and:
762   case Builtin::BI__sync_fetch_and_xor:
763   case Builtin::BI__sync_add_and_fetch:
764   case Builtin::BI__sync_sub_and_fetch:
765   case Builtin::BI__sync_and_and_fetch:
766   case Builtin::BI__sync_or_and_fetch:
767   case Builtin::BI__sync_xor_and_fetch:
768   case Builtin::BI__sync_val_compare_and_swap:
769   case Builtin::BI__sync_bool_compare_and_swap:
770   case Builtin::BI__sync_lock_test_and_set:
771   case Builtin::BI__sync_lock_release:
772   case Builtin::BI__sync_swap:
773     llvm_unreachable("Shouldn't make it through sema");
774   case Builtin::BI__sync_fetch_and_add_1:
775   case Builtin::BI__sync_fetch_and_add_2:
776   case Builtin::BI__sync_fetch_and_add_4:
777   case Builtin::BI__sync_fetch_and_add_8:
778   case Builtin::BI__sync_fetch_and_add_16:
779     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
780   case Builtin::BI__sync_fetch_and_sub_1:
781   case Builtin::BI__sync_fetch_and_sub_2:
782   case Builtin::BI__sync_fetch_and_sub_4:
783   case Builtin::BI__sync_fetch_and_sub_8:
784   case Builtin::BI__sync_fetch_and_sub_16:
785     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
786   case Builtin::BI__sync_fetch_and_or_1:
787   case Builtin::BI__sync_fetch_and_or_2:
788   case Builtin::BI__sync_fetch_and_or_4:
789   case Builtin::BI__sync_fetch_and_or_8:
790   case Builtin::BI__sync_fetch_and_or_16:
791     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
792   case Builtin::BI__sync_fetch_and_and_1:
793   case Builtin::BI__sync_fetch_and_and_2:
794   case Builtin::BI__sync_fetch_and_and_4:
795   case Builtin::BI__sync_fetch_and_and_8:
796   case Builtin::BI__sync_fetch_and_and_16:
797     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
798   case Builtin::BI__sync_fetch_and_xor_1:
799   case Builtin::BI__sync_fetch_and_xor_2:
800   case Builtin::BI__sync_fetch_and_xor_4:
801   case Builtin::BI__sync_fetch_and_xor_8:
802   case Builtin::BI__sync_fetch_and_xor_16:
803     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
804
805   // Clang extensions: not overloaded yet.
806   case Builtin::BI__sync_fetch_and_min:
807     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
808   case Builtin::BI__sync_fetch_and_max:
809     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
810   case Builtin::BI__sync_fetch_and_umin:
811     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
812   case Builtin::BI__sync_fetch_and_umax:
813     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
814
815   case Builtin::BI__sync_add_and_fetch_1:
816   case Builtin::BI__sync_add_and_fetch_2:
817   case Builtin::BI__sync_add_and_fetch_4:
818   case Builtin::BI__sync_add_and_fetch_8:
819   case Builtin::BI__sync_add_and_fetch_16:
820     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
821                                 llvm::Instruction::Add);
822   case Builtin::BI__sync_sub_and_fetch_1:
823   case Builtin::BI__sync_sub_and_fetch_2:
824   case Builtin::BI__sync_sub_and_fetch_4:
825   case Builtin::BI__sync_sub_and_fetch_8:
826   case Builtin::BI__sync_sub_and_fetch_16:
827     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
828                                 llvm::Instruction::Sub);
829   case Builtin::BI__sync_and_and_fetch_1:
830   case Builtin::BI__sync_and_and_fetch_2:
831   case Builtin::BI__sync_and_and_fetch_4:
832   case Builtin::BI__sync_and_and_fetch_8:
833   case Builtin::BI__sync_and_and_fetch_16:
834     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
835                                 llvm::Instruction::And);
836   case Builtin::BI__sync_or_and_fetch_1:
837   case Builtin::BI__sync_or_and_fetch_2:
838   case Builtin::BI__sync_or_and_fetch_4:
839   case Builtin::BI__sync_or_and_fetch_8:
840   case Builtin::BI__sync_or_and_fetch_16:
841     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
842                                 llvm::Instruction::Or);
843   case Builtin::BI__sync_xor_and_fetch_1:
844   case Builtin::BI__sync_xor_and_fetch_2:
845   case Builtin::BI__sync_xor_and_fetch_4:
846   case Builtin::BI__sync_xor_and_fetch_8:
847   case Builtin::BI__sync_xor_and_fetch_16:
848     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
849                                 llvm::Instruction::Xor);
850
851   case Builtin::BI__sync_val_compare_and_swap_1:
852   case Builtin::BI__sync_val_compare_and_swap_2:
853   case Builtin::BI__sync_val_compare_and_swap_4:
854   case Builtin::BI__sync_val_compare_and_swap_8:
855   case Builtin::BI__sync_val_compare_and_swap_16: {
856     QualType T = E->getType();
857     llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0));
858     unsigned AddrSpace =
859       cast<llvm::PointerType>(DestPtr->getType())->getAddressSpace();
860     
861     llvm::IntegerType *IntType =
862       llvm::IntegerType::get(getLLVMContext(),
863                              getContext().getTypeSize(T));
864     llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
865
866     Value *Args[3];
867     Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType);
868     Args[1] = EmitScalarExpr(E->getArg(1));
869     llvm::Type *ValueType = Args[1]->getType();
870     Args[1] = EmitToInt(*this, Args[1], T, IntType);
871     Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType);
872
873     Value *Result = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2],
874                                                 llvm::SequentiallyConsistent);
875     Result = EmitFromInt(*this, Result, T, ValueType);
876     return RValue::get(Result);
877   }
878
879   case Builtin::BI__sync_bool_compare_and_swap_1:
880   case Builtin::BI__sync_bool_compare_and_swap_2:
881   case Builtin::BI__sync_bool_compare_and_swap_4:
882   case Builtin::BI__sync_bool_compare_and_swap_8:
883   case Builtin::BI__sync_bool_compare_and_swap_16: {
884     QualType T = E->getArg(1)->getType();
885     llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0));
886     unsigned AddrSpace =
887       cast<llvm::PointerType>(DestPtr->getType())->getAddressSpace();
888     
889     llvm::IntegerType *IntType =
890       llvm::IntegerType::get(getLLVMContext(),
891                              getContext().getTypeSize(T));
892     llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
893
894     Value *Args[3];
895     Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType);
896     Args[1] = EmitToInt(*this, EmitScalarExpr(E->getArg(1)), T, IntType);
897     Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType);
898
899     Value *OldVal = Args[1];
900     Value *PrevVal = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2],
901                                                  llvm::SequentiallyConsistent);
902     Value *Result = Builder.CreateICmpEQ(PrevVal, OldVal);
903     // zext bool to int.
904     Result = Builder.CreateZExt(Result, ConvertType(E->getType()));
905     return RValue::get(Result);
906   }
907
908   case Builtin::BI__sync_swap_1:
909   case Builtin::BI__sync_swap_2:
910   case Builtin::BI__sync_swap_4:
911   case Builtin::BI__sync_swap_8:
912   case Builtin::BI__sync_swap_16:
913     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
914
915   case Builtin::BI__sync_lock_test_and_set_1:
916   case Builtin::BI__sync_lock_test_and_set_2:
917   case Builtin::BI__sync_lock_test_and_set_4:
918   case Builtin::BI__sync_lock_test_and_set_8:
919   case Builtin::BI__sync_lock_test_and_set_16:
920     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
921
922   case Builtin::BI__sync_lock_release_1:
923   case Builtin::BI__sync_lock_release_2:
924   case Builtin::BI__sync_lock_release_4:
925   case Builtin::BI__sync_lock_release_8:
926   case Builtin::BI__sync_lock_release_16: {
927     Value *Ptr = EmitScalarExpr(E->getArg(0));
928     llvm::Type *ElLLVMTy =
929       cast<llvm::PointerType>(Ptr->getType())->getElementType();
930     llvm::StoreInst *Store = 
931       Builder.CreateStore(llvm::Constant::getNullValue(ElLLVMTy), Ptr);
932     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
933     CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
934     Store->setAlignment(StoreSize.getQuantity());
935     Store->setAtomic(llvm::Release);
936     return RValue::get(0);
937   }
938
939   case Builtin::BI__sync_synchronize: {
940     // We assume this is supposed to correspond to a C++0x-style
941     // sequentially-consistent fence (i.e. this is only usable for
942     // synchonization, not device I/O or anything like that). This intrinsic
943     // is really badly designed in the sense that in theory, there isn't 
944     // any way to safely use it... but in practice, it mostly works
945     // to use it with non-atomic loads and stores to get acquire/release
946     // semantics.
947     Builder.CreateFence(llvm::SequentiallyConsistent);
948     return RValue::get(0);
949   }
950
951   case Builtin::BI__atomic_thread_fence:
952   case Builtin::BI__atomic_signal_fence: {
953     llvm::SynchronizationScope Scope;
954     if (BuiltinID == Builtin::BI__atomic_signal_fence)
955       Scope = llvm::SingleThread;
956     else
957       Scope = llvm::CrossThread;
958     Value *Order = EmitScalarExpr(E->getArg(0));
959     if (isa<llvm::ConstantInt>(Order)) {
960       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
961       switch (ord) {
962       case 0:  // memory_order_relaxed
963       default: // invalid order
964         break;
965       case 1:  // memory_order_consume
966       case 2:  // memory_order_acquire
967         Builder.CreateFence(llvm::Acquire, Scope);
968         break;
969       case 3:  // memory_order_release
970         Builder.CreateFence(llvm::Release, Scope);
971         break;
972       case 4:  // memory_order_acq_rel
973         Builder.CreateFence(llvm::AcquireRelease, Scope);
974         break;
975       case 5:  // memory_order_seq_cst
976         Builder.CreateFence(llvm::SequentiallyConsistent, Scope);
977         break;
978       }
979       return RValue::get(0);
980     }
981
982     llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
983     AcquireBB = createBasicBlock("acquire", CurFn);
984     ReleaseBB = createBasicBlock("release", CurFn);
985     AcqRelBB = createBasicBlock("acqrel", CurFn);
986     SeqCstBB = createBasicBlock("seqcst", CurFn);
987     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
988
989     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
990     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
991
992     Builder.SetInsertPoint(AcquireBB);
993     Builder.CreateFence(llvm::Acquire, Scope);
994     Builder.CreateBr(ContBB);
995     SI->addCase(Builder.getInt32(1), AcquireBB);
996     SI->addCase(Builder.getInt32(2), AcquireBB);
997
998     Builder.SetInsertPoint(ReleaseBB);
999     Builder.CreateFence(llvm::Release, Scope);
1000     Builder.CreateBr(ContBB);
1001     SI->addCase(Builder.getInt32(3), ReleaseBB);
1002
1003     Builder.SetInsertPoint(AcqRelBB);
1004     Builder.CreateFence(llvm::AcquireRelease, Scope);
1005     Builder.CreateBr(ContBB);
1006     SI->addCase(Builder.getInt32(4), AcqRelBB);
1007
1008     Builder.SetInsertPoint(SeqCstBB);
1009     Builder.CreateFence(llvm::SequentiallyConsistent, Scope);
1010     Builder.CreateBr(ContBB);
1011     SI->addCase(Builder.getInt32(5), SeqCstBB);
1012
1013     Builder.SetInsertPoint(ContBB);
1014     return RValue::get(0);
1015   }
1016
1017     // Library functions with special handling.
1018   case Builtin::BIsqrt:
1019   case Builtin::BIsqrtf:
1020   case Builtin::BIsqrtl: {
1021     // TODO: there is currently no set of optimizer flags
1022     // sufficient for us to rewrite sqrt to @llvm.sqrt.
1023     // -fmath-errno=0 is not good enough; we need finiteness.
1024     // We could probably precondition the call with an ult
1025     // against 0, but is that worth the complexity?
1026     break;
1027   }
1028
1029   case Builtin::BIpow:
1030   case Builtin::BIpowf:
1031   case Builtin::BIpowl: {
1032     // Rewrite sqrt to intrinsic if allowed.
1033     if (!FD->hasAttr<ConstAttr>())
1034       break;
1035     Value *Base = EmitScalarExpr(E->getArg(0));
1036     Value *Exponent = EmitScalarExpr(E->getArg(1));
1037     llvm::Type *ArgType = Base->getType();
1038     Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
1039     return RValue::get(Builder.CreateCall2(F, Base, Exponent));
1040   }
1041
1042   case Builtin::BIfma:
1043   case Builtin::BIfmaf:
1044   case Builtin::BIfmal:
1045   case Builtin::BI__builtin_fma:
1046   case Builtin::BI__builtin_fmaf:
1047   case Builtin::BI__builtin_fmal: {
1048     // Rewrite fma to intrinsic.
1049     Value *FirstArg = EmitScalarExpr(E->getArg(0));
1050     llvm::Type *ArgType = FirstArg->getType();
1051     Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType);
1052     return RValue::get(Builder.CreateCall3(F, FirstArg,
1053                                               EmitScalarExpr(E->getArg(1)),
1054                                               EmitScalarExpr(E->getArg(2))));
1055   }
1056
1057   case Builtin::BI__builtin_signbit:
1058   case Builtin::BI__builtin_signbitf:
1059   case Builtin::BI__builtin_signbitl: {
1060     LLVMContext &C = CGM.getLLVMContext();
1061
1062     Value *Arg = EmitScalarExpr(E->getArg(0));
1063     llvm::Type *ArgTy = Arg->getType();
1064     if (ArgTy->isPPC_FP128Ty())
1065       break; // FIXME: I'm not sure what the right implementation is here.
1066     int ArgWidth = ArgTy->getPrimitiveSizeInBits();
1067     llvm::Type *ArgIntTy = llvm::IntegerType::get(C, ArgWidth);
1068     Value *BCArg = Builder.CreateBitCast(Arg, ArgIntTy);
1069     Value *ZeroCmp = llvm::Constant::getNullValue(ArgIntTy);
1070     Value *Result = Builder.CreateICmpSLT(BCArg, ZeroCmp);
1071     return RValue::get(Builder.CreateZExt(Result, ConvertType(E->getType())));
1072   }
1073   case Builtin::BI__builtin_annotation: {
1074     llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
1075     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
1076                                       AnnVal->getType());
1077
1078     // Get the annotation string, go through casts. Sema requires this to be a
1079     // non-wide string literal, potentially casted, so the cast<> is safe.
1080     const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
1081     llvm::StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
1082     return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
1083   }
1084   }
1085
1086   // If this is an alias for a lib function (e.g. __builtin_sin), emit
1087   // the call using the normal call path, but using the unmangled
1088   // version of the function name.
1089   if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
1090     return emitLibraryCall(*this, FD, E,
1091                            CGM.getBuiltinLibFunction(FD, BuiltinID));
1092   
1093   // If this is a predefined lib function (e.g. malloc), emit the call
1094   // using exactly the normal call path.
1095   if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
1096     return emitLibraryCall(*this, FD, E, EmitScalarExpr(E->getCallee()));
1097
1098   // See if we have a target specific intrinsic.
1099   const char *Name = getContext().BuiltinInfo.GetName(BuiltinID);
1100   Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
1101   if (const char *Prefix =
1102       llvm::Triple::getArchTypePrefix(Target.getTriple().getArch()))
1103     IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix, Name);
1104
1105   if (IntrinsicID != Intrinsic::not_intrinsic) {
1106     SmallVector<Value*, 16> Args;
1107
1108     // Find out if any arguments are required to be integer constant
1109     // expressions.
1110     unsigned ICEArguments = 0;
1111     ASTContext::GetBuiltinTypeError Error;
1112     getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
1113     assert(Error == ASTContext::GE_None && "Should not codegen an error");
1114
1115     Function *F = CGM.getIntrinsic(IntrinsicID);
1116     llvm::FunctionType *FTy = F->getFunctionType();
1117
1118     for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
1119       Value *ArgValue;
1120       // If this is a normal argument, just emit it as a scalar.
1121       if ((ICEArguments & (1 << i)) == 0) {
1122         ArgValue = EmitScalarExpr(E->getArg(i));
1123       } else {
1124         // If this is required to be a constant, constant fold it so that we 
1125         // know that the generated intrinsic gets a ConstantInt.
1126         llvm::APSInt Result;
1127         bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
1128         assert(IsConst && "Constant arg isn't actually constant?");
1129         (void)IsConst;
1130         ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
1131       }
1132
1133       // If the intrinsic arg type is different from the builtin arg type
1134       // we need to do a bit cast.
1135       llvm::Type *PTy = FTy->getParamType(i);
1136       if (PTy != ArgValue->getType()) {
1137         assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
1138                "Must be able to losslessly bit cast to param");
1139         ArgValue = Builder.CreateBitCast(ArgValue, PTy);
1140       }
1141
1142       Args.push_back(ArgValue);
1143     }
1144
1145     Value *V = Builder.CreateCall(F, Args);
1146     QualType BuiltinRetType = E->getType();
1147
1148     llvm::Type *RetTy = llvm::Type::getVoidTy(getLLVMContext());
1149     if (!BuiltinRetType->isVoidType()) RetTy = ConvertType(BuiltinRetType);
1150
1151     if (RetTy != V->getType()) {
1152       assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
1153              "Must be able to losslessly bit cast result type");
1154       V = Builder.CreateBitCast(V, RetTy);
1155     }
1156
1157     return RValue::get(V);
1158   }
1159
1160   // See if we have a target specific builtin that needs to be lowered.
1161   if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
1162     return RValue::get(V);
1163
1164   ErrorUnsupported(E, "builtin function");
1165
1166   // Unknown builtin, for now just dump it out and return undef.
1167   if (hasAggregateLLVMType(E->getType()))
1168     return RValue::getAggregate(CreateMemTemp(E->getType()));
1169   return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
1170 }
1171
1172 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
1173                                               const CallExpr *E) {
1174   switch (Target.getTriple().getArch()) {
1175   case llvm::Triple::arm:
1176   case llvm::Triple::thumb:
1177     return EmitARMBuiltinExpr(BuiltinID, E);
1178   case llvm::Triple::x86:
1179   case llvm::Triple::x86_64:
1180     return EmitX86BuiltinExpr(BuiltinID, E);
1181   case llvm::Triple::ppc:
1182   case llvm::Triple::ppc64:
1183     return EmitPPCBuiltinExpr(BuiltinID, E);
1184   default:
1185     return 0;
1186   }
1187 }
1188
1189 static llvm::VectorType *GetNeonType(LLVMContext &C, unsigned type, bool q) {
1190   switch (type) {
1191     default: break;
1192     case 0: 
1193     case 5: return llvm::VectorType::get(llvm::Type::getInt8Ty(C), 8 << (int)q);
1194     case 6:
1195     case 7:
1196     case 1: return llvm::VectorType::get(llvm::Type::getInt16Ty(C),4 << (int)q);
1197     case 2: return llvm::VectorType::get(llvm::Type::getInt32Ty(C),2 << (int)q);
1198     case 3: return llvm::VectorType::get(llvm::Type::getInt64Ty(C),1 << (int)q);
1199     case 4: return llvm::VectorType::get(llvm::Type::getFloatTy(C),2 << (int)q);
1200   };
1201   return 0;
1202 }
1203
1204 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
1205   unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements();
1206   SmallVector<Constant*, 16> Indices(nElts, C);
1207   Value* SV = llvm::ConstantVector::get(Indices);
1208   return Builder.CreateShuffleVector(V, V, SV, "lane");
1209 }
1210
1211 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
1212                                      const char *name,
1213                                      unsigned shift, bool rightshift) {
1214   unsigned j = 0;
1215   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
1216        ai != ae; ++ai, ++j)
1217     if (shift > 0 && shift == j)
1218       Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
1219     else
1220       Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
1221
1222   return Builder.CreateCall(F, Ops, name);
1223 }
1224
1225 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty, 
1226                                             bool neg) {
1227   ConstantInt *CI = cast<ConstantInt>(V);
1228   int SV = CI->getSExtValue();
1229   
1230   llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
1231   llvm::Constant *C = ConstantInt::get(VTy->getElementType(), neg ? -SV : SV);
1232   SmallVector<llvm::Constant*, 16> CV(VTy->getNumElements(), C);
1233   return llvm::ConstantVector::get(CV);
1234 }
1235
1236 /// GetPointeeAlignment - Given an expression with a pointer type, find the
1237 /// alignment of the type referenced by the pointer.  Skip over implicit
1238 /// casts.
1239 static Value *GetPointeeAlignment(CodeGenFunction &CGF, const Expr *Addr) {
1240   unsigned Align = 1;
1241   // Check if the type is a pointer.  The implicit cast operand might not be.
1242   while (Addr->getType()->isPointerType()) {
1243     QualType PtTy = Addr->getType()->getPointeeType();
1244     unsigned NewA = CGF.getContext().getTypeAlignInChars(PtTy).getQuantity();
1245     if (NewA > Align)
1246       Align = NewA;
1247
1248     // If the address is an implicit cast, repeat with the cast operand.
1249     if (const ImplicitCastExpr *CastAddr = dyn_cast<ImplicitCastExpr>(Addr)) {
1250       Addr = CastAddr->getSubExpr();
1251       continue;
1252     }
1253     break;
1254   }
1255   return llvm::ConstantInt::get(CGF.Int32Ty, Align);
1256 }
1257
1258 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
1259                                            const CallExpr *E) {
1260   if (BuiltinID == ARM::BI__clear_cache) {
1261     const FunctionDecl *FD = E->getDirectCallee();
1262     // Oddly people write this call without args on occasion and gcc accepts
1263     // it - it's also marked as varargs in the description file.
1264     SmallVector<Value*, 2> Ops;
1265     for (unsigned i = 0; i < E->getNumArgs(); i++)
1266       Ops.push_back(EmitScalarExpr(E->getArg(i)));
1267     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
1268     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
1269     StringRef Name = FD->getName();
1270     return Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
1271   }
1272
1273   if (BuiltinID == ARM::BI__builtin_arm_ldrexd) {
1274     Function *F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
1275
1276     Value *LdPtr = EmitScalarExpr(E->getArg(0));
1277     Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd");
1278
1279     Value *Val0 = Builder.CreateExtractValue(Val, 1);
1280     Value *Val1 = Builder.CreateExtractValue(Val, 0);
1281     Val0 = Builder.CreateZExt(Val0, Int64Ty);
1282     Val1 = Builder.CreateZExt(Val1, Int64Ty);
1283
1284     Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
1285     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
1286     return Builder.CreateOr(Val, Val1);
1287   }
1288
1289   if (BuiltinID == ARM::BI__builtin_arm_strexd) {
1290     Function *F = CGM.getIntrinsic(Intrinsic::arm_strexd);
1291     llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, NULL);
1292
1293     Value *One = llvm::ConstantInt::get(Int32Ty, 1);
1294     Value *Tmp = Builder.CreateAlloca(Int64Ty, One);
1295     Value *Val = EmitScalarExpr(E->getArg(0));
1296     Builder.CreateStore(Val, Tmp);
1297
1298     Value *LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
1299     Val = Builder.CreateLoad(LdPtr);
1300
1301     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
1302     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
1303     Value *StPtr = EmitScalarExpr(E->getArg(1));
1304     return Builder.CreateCall3(F, Arg0, Arg1, StPtr, "strexd");
1305   }
1306
1307   SmallVector<Value*, 4> Ops;
1308   for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++)
1309     Ops.push_back(EmitScalarExpr(E->getArg(i)));
1310
1311   // vget_lane and vset_lane are not overloaded and do not have an extra
1312   // argument that specifies the vector type.
1313   switch (BuiltinID) {
1314   default: break;
1315   case ARM::BI__builtin_neon_vget_lane_i8:
1316   case ARM::BI__builtin_neon_vget_lane_i16:
1317   case ARM::BI__builtin_neon_vget_lane_i32:
1318   case ARM::BI__builtin_neon_vget_lane_i64:
1319   case ARM::BI__builtin_neon_vget_lane_f32:
1320   case ARM::BI__builtin_neon_vgetq_lane_i8:
1321   case ARM::BI__builtin_neon_vgetq_lane_i16:
1322   case ARM::BI__builtin_neon_vgetq_lane_i32:
1323   case ARM::BI__builtin_neon_vgetq_lane_i64:
1324   case ARM::BI__builtin_neon_vgetq_lane_f32:
1325     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
1326                                         "vget_lane");
1327   case ARM::BI__builtin_neon_vset_lane_i8:
1328   case ARM::BI__builtin_neon_vset_lane_i16:
1329   case ARM::BI__builtin_neon_vset_lane_i32:
1330   case ARM::BI__builtin_neon_vset_lane_i64:
1331   case ARM::BI__builtin_neon_vset_lane_f32:
1332   case ARM::BI__builtin_neon_vsetq_lane_i8:
1333   case ARM::BI__builtin_neon_vsetq_lane_i16:
1334   case ARM::BI__builtin_neon_vsetq_lane_i32:
1335   case ARM::BI__builtin_neon_vsetq_lane_i64:
1336   case ARM::BI__builtin_neon_vsetq_lane_f32:
1337     Ops.push_back(EmitScalarExpr(E->getArg(2)));
1338     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
1339   }
1340
1341   // Get the last argument, which specifies the vector type.
1342   llvm::APSInt Result;
1343   const Expr *Arg = E->getArg(E->getNumArgs()-1);
1344   if (!Arg->isIntegerConstantExpr(Result, getContext()))
1345     return 0;
1346
1347   if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
1348       BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
1349     // Determine the overloaded type of this builtin.
1350     llvm::Type *Ty;
1351     if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
1352       Ty = llvm::Type::getFloatTy(getLLVMContext());
1353     else
1354       Ty = llvm::Type::getDoubleTy(getLLVMContext());
1355     
1356     // Determine whether this is an unsigned conversion or not.
1357     bool usgn = Result.getZExtValue() == 1;
1358     unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
1359
1360     // Call the appropriate intrinsic.
1361     Function *F = CGM.getIntrinsic(Int, Ty);
1362     return Builder.CreateCall(F, Ops, "vcvtr");
1363   }
1364   
1365   // Determine the type of this overloaded NEON intrinsic.
1366   unsigned type = Result.getZExtValue();
1367   bool usgn = type & 0x08;
1368   bool quad = type & 0x10;
1369   bool poly = (type & 0x7) == 5 || (type & 0x7) == 6;
1370   (void)poly;  // Only used in assert()s.
1371   bool rightShift = false;
1372
1373   llvm::VectorType *VTy = GetNeonType(getLLVMContext(), type & 0x7, quad);
1374   llvm::Type *Ty = VTy;
1375   if (!Ty)
1376     return 0;
1377
1378   unsigned Int;
1379   switch (BuiltinID) {
1380   default: return 0;
1381   case ARM::BI__builtin_neon_vabd_v:
1382   case ARM::BI__builtin_neon_vabdq_v:
1383     Int = usgn ? Intrinsic::arm_neon_vabdu : Intrinsic::arm_neon_vabds;
1384     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
1385   case ARM::BI__builtin_neon_vabs_v:
1386   case ARM::BI__builtin_neon_vabsq_v:
1387     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vabs, Ty),
1388                         Ops, "vabs");
1389   case ARM::BI__builtin_neon_vaddhn_v:
1390     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vaddhn, Ty),
1391                         Ops, "vaddhn");
1392   case ARM::BI__builtin_neon_vcale_v:
1393     std::swap(Ops[0], Ops[1]);
1394   case ARM::BI__builtin_neon_vcage_v: {
1395     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacged);
1396     return EmitNeonCall(F, Ops, "vcage");
1397   }
1398   case ARM::BI__builtin_neon_vcaleq_v:
1399     std::swap(Ops[0], Ops[1]);
1400   case ARM::BI__builtin_neon_vcageq_v: {
1401     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgeq);
1402     return EmitNeonCall(F, Ops, "vcage");
1403   }
1404   case ARM::BI__builtin_neon_vcalt_v:
1405     std::swap(Ops[0], Ops[1]);
1406   case ARM::BI__builtin_neon_vcagt_v: {
1407     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtd);
1408     return EmitNeonCall(F, Ops, "vcagt");
1409   }
1410   case ARM::BI__builtin_neon_vcaltq_v:
1411     std::swap(Ops[0], Ops[1]);
1412   case ARM::BI__builtin_neon_vcagtq_v: {
1413     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtq);
1414     return EmitNeonCall(F, Ops, "vcagt");
1415   }
1416   case ARM::BI__builtin_neon_vcls_v:
1417   case ARM::BI__builtin_neon_vclsq_v: {
1418     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcls, Ty);
1419     return EmitNeonCall(F, Ops, "vcls");
1420   }
1421   case ARM::BI__builtin_neon_vclz_v:
1422   case ARM::BI__builtin_neon_vclzq_v: {
1423     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vclz, Ty);
1424     return EmitNeonCall(F, Ops, "vclz");
1425   }
1426   case ARM::BI__builtin_neon_vcnt_v:
1427   case ARM::BI__builtin_neon_vcntq_v: {
1428     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcnt, Ty);
1429     return EmitNeonCall(F, Ops, "vcnt");
1430   }
1431   case ARM::BI__builtin_neon_vcvt_f16_v: {
1432     assert((type & 0x7) == 7 && !quad && "unexpected vcvt_f16_v builtin");
1433     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvtfp2hf);
1434     return EmitNeonCall(F, Ops, "vcvt");
1435   }
1436   case ARM::BI__builtin_neon_vcvt_f32_f16: {
1437     assert((type & 0x7) == 7 && !quad && "unexpected vcvt_f32_f16 builtin");
1438     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvthf2fp);
1439     return EmitNeonCall(F, Ops, "vcvt");
1440   }
1441   case ARM::BI__builtin_neon_vcvt_f32_v:
1442   case ARM::BI__builtin_neon_vcvtq_f32_v: {
1443     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1444     Ty = GetNeonType(getLLVMContext(), 4, quad);
1445     return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 
1446                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
1447   }
1448   case ARM::BI__builtin_neon_vcvt_s32_v:
1449   case ARM::BI__builtin_neon_vcvt_u32_v:
1450   case ARM::BI__builtin_neon_vcvtq_s32_v:
1451   case ARM::BI__builtin_neon_vcvtq_u32_v: {
1452     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(getLLVMContext(), 4, quad));
1453     return usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt") 
1454                 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
1455   }
1456   case ARM::BI__builtin_neon_vcvt_n_f32_v:
1457   case ARM::BI__builtin_neon_vcvtq_n_f32_v: {
1458     llvm::Type *Tys[2] = { GetNeonType(getLLVMContext(), 4, quad), Ty };
1459     Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp : Intrinsic::arm_neon_vcvtfxs2fp;
1460     Function *F = CGM.getIntrinsic(Int, Tys);
1461     return EmitNeonCall(F, Ops, "vcvt_n");
1462   }
1463   case ARM::BI__builtin_neon_vcvt_n_s32_v:
1464   case ARM::BI__builtin_neon_vcvt_n_u32_v:
1465   case ARM::BI__builtin_neon_vcvtq_n_s32_v:
1466   case ARM::BI__builtin_neon_vcvtq_n_u32_v: {
1467     llvm::Type *Tys[2] = { Ty, GetNeonType(getLLVMContext(), 4, quad) };
1468     Int = usgn ? Intrinsic::arm_neon_vcvtfp2fxu : Intrinsic::arm_neon_vcvtfp2fxs;
1469     Function *F = CGM.getIntrinsic(Int, Tys);
1470     return EmitNeonCall(F, Ops, "vcvt_n");
1471   }
1472   case ARM::BI__builtin_neon_vext_v:
1473   case ARM::BI__builtin_neon_vextq_v: {
1474     int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
1475     SmallVector<Constant*, 16> Indices;
1476     for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
1477       Indices.push_back(ConstantInt::get(Int32Ty, i+CV));
1478     
1479     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1480     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1481     Value *SV = llvm::ConstantVector::get(Indices);
1482     return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vext");
1483   }
1484   case ARM::BI__builtin_neon_vhadd_v:
1485   case ARM::BI__builtin_neon_vhaddq_v:
1486     Int = usgn ? Intrinsic::arm_neon_vhaddu : Intrinsic::arm_neon_vhadds;
1487     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vhadd");
1488   case ARM::BI__builtin_neon_vhsub_v:
1489   case ARM::BI__builtin_neon_vhsubq_v:
1490     Int = usgn ? Intrinsic::arm_neon_vhsubu : Intrinsic::arm_neon_vhsubs;
1491     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vhsub");
1492   case ARM::BI__builtin_neon_vld1_v:
1493   case ARM::BI__builtin_neon_vld1q_v:
1494     Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1495     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Ty),
1496                         Ops, "vld1");
1497   case ARM::BI__builtin_neon_vld1_lane_v:
1498   case ARM::BI__builtin_neon_vld1q_lane_v:
1499     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1500     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
1501     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1502     Ops[0] = Builder.CreateLoad(Ops[0]);
1503     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
1504   case ARM::BI__builtin_neon_vld1_dup_v:
1505   case ARM::BI__builtin_neon_vld1q_dup_v: {
1506     Value *V = UndefValue::get(Ty);
1507     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
1508     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1509     Ops[0] = Builder.CreateLoad(Ops[0]);
1510     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
1511     Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
1512     return EmitNeonSplat(Ops[0], CI);
1513   }
1514   case ARM::BI__builtin_neon_vld2_v:
1515   case ARM::BI__builtin_neon_vld2q_v: {
1516     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2, Ty);
1517     Value *Align = GetPointeeAlignment(*this, E->getArg(1));
1518     Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld2");
1519     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1520     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1521     return Builder.CreateStore(Ops[1], Ops[0]);
1522   }
1523   case ARM::BI__builtin_neon_vld3_v:
1524   case ARM::BI__builtin_neon_vld3q_v: {
1525     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3, Ty);
1526     Value *Align = GetPointeeAlignment(*this, E->getArg(1));
1527     Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld3");
1528     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1529     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1530     return Builder.CreateStore(Ops[1], Ops[0]);
1531   }
1532   case ARM::BI__builtin_neon_vld4_v:
1533   case ARM::BI__builtin_neon_vld4q_v: {
1534     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4, Ty);
1535     Value *Align = GetPointeeAlignment(*this, E->getArg(1));
1536     Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld4");
1537     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1538     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1539     return Builder.CreateStore(Ops[1], Ops[0]);
1540   }
1541   case ARM::BI__builtin_neon_vld2_lane_v:
1542   case ARM::BI__builtin_neon_vld2q_lane_v: {
1543     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2lane, Ty);
1544     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1545     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
1546     Ops.push_back(GetPointeeAlignment(*this, E->getArg(1)));
1547     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
1548     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1549     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1550     return Builder.CreateStore(Ops[1], Ops[0]);
1551   }
1552   case ARM::BI__builtin_neon_vld3_lane_v:
1553   case ARM::BI__builtin_neon_vld3q_lane_v: {
1554     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3lane, Ty);
1555     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1556     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
1557     Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
1558     Ops.push_back(GetPointeeAlignment(*this, E->getArg(1)));
1559     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
1560     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1561     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1562     return Builder.CreateStore(Ops[1], Ops[0]);
1563   }
1564   case ARM::BI__builtin_neon_vld4_lane_v:
1565   case ARM::BI__builtin_neon_vld4q_lane_v: {
1566     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4lane, Ty);
1567     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1568     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
1569     Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
1570     Ops[5] = Builder.CreateBitCast(Ops[5], Ty);
1571     Ops.push_back(GetPointeeAlignment(*this, E->getArg(1)));
1572     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
1573     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1574     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1575     return Builder.CreateStore(Ops[1], Ops[0]);
1576   }
1577   case ARM::BI__builtin_neon_vld2_dup_v:
1578   case ARM::BI__builtin_neon_vld3_dup_v:
1579   case ARM::BI__builtin_neon_vld4_dup_v: {
1580     // Handle 64-bit elements as a special-case.  There is no "dup" needed.
1581     if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
1582       switch (BuiltinID) {
1583       case ARM::BI__builtin_neon_vld2_dup_v: 
1584         Int = Intrinsic::arm_neon_vld2; 
1585         break;
1586       case ARM::BI__builtin_neon_vld3_dup_v:
1587         Int = Intrinsic::arm_neon_vld2; 
1588         break;
1589       case ARM::BI__builtin_neon_vld4_dup_v:
1590         Int = Intrinsic::arm_neon_vld2; 
1591         break;
1592       default: llvm_unreachable("unknown vld_dup intrinsic?");
1593       }
1594       Function *F = CGM.getIntrinsic(Int, Ty);
1595       Value *Align = GetPointeeAlignment(*this, E->getArg(1));
1596       Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld_dup");
1597       Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1598       Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1599       return Builder.CreateStore(Ops[1], Ops[0]);
1600     }
1601     switch (BuiltinID) {
1602     case ARM::BI__builtin_neon_vld2_dup_v: 
1603       Int = Intrinsic::arm_neon_vld2lane; 
1604       break;
1605     case ARM::BI__builtin_neon_vld3_dup_v:
1606       Int = Intrinsic::arm_neon_vld2lane; 
1607       break;
1608     case ARM::BI__builtin_neon_vld4_dup_v:
1609       Int = Intrinsic::arm_neon_vld2lane; 
1610       break;
1611     default: llvm_unreachable("unknown vld_dup intrinsic?");
1612     }
1613     Function *F = CGM.getIntrinsic(Int, Ty);
1614     llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
1615     
1616     SmallVector<Value*, 6> Args;
1617     Args.push_back(Ops[1]);
1618     Args.append(STy->getNumElements(), UndefValue::get(Ty));
1619
1620     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
1621     Args.push_back(CI);
1622     Args.push_back(GetPointeeAlignment(*this, E->getArg(1)));
1623     
1624     Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
1625     // splat lane 0 to all elts in each vector of the result.
1626     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
1627       Value *Val = Builder.CreateExtractValue(Ops[1], i);
1628       Value *Elt = Builder.CreateBitCast(Val, Ty);
1629       Elt = EmitNeonSplat(Elt, CI);
1630       Elt = Builder.CreateBitCast(Elt, Val->getType());
1631       Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
1632     }
1633     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1634     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1635     return Builder.CreateStore(Ops[1], Ops[0]);
1636   }
1637   case ARM::BI__builtin_neon_vmax_v:
1638   case ARM::BI__builtin_neon_vmaxq_v:
1639     Int = usgn ? Intrinsic::arm_neon_vmaxu : Intrinsic::arm_neon_vmaxs;
1640     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
1641   case ARM::BI__builtin_neon_vmin_v:
1642   case ARM::BI__builtin_neon_vminq_v:
1643     Int = usgn ? Intrinsic::arm_neon_vminu : Intrinsic::arm_neon_vmins;
1644     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
1645   case ARM::BI__builtin_neon_vmovl_v: {
1646     llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
1647     Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
1648     if (usgn)
1649       return Builder.CreateZExt(Ops[0], Ty, "vmovl");
1650     return Builder.CreateSExt(Ops[0], Ty, "vmovl");
1651   }
1652   case ARM::BI__builtin_neon_vmovn_v: {
1653     llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
1654     Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
1655     return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
1656   }
1657   case ARM::BI__builtin_neon_vmul_v:
1658   case ARM::BI__builtin_neon_vmulq_v:
1659     assert(poly && "vmul builtin only supported for polynomial types");
1660     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vmulp, Ty),
1661                         Ops, "vmul");
1662   case ARM::BI__builtin_neon_vmull_v:
1663     Int = usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
1664     Int = poly ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
1665     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
1666   case ARM::BI__builtin_neon_vpadal_v:
1667   case ARM::BI__builtin_neon_vpadalq_v: {
1668     Int = usgn ? Intrinsic::arm_neon_vpadalu : Intrinsic::arm_neon_vpadals;
1669     // The source operand type has twice as many elements of half the size.
1670     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
1671     llvm::Type *EltTy =
1672       llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
1673     llvm::Type *NarrowTy =
1674       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
1675     llvm::Type *Tys[2] = { Ty, NarrowTy };
1676     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpadal");
1677   }
1678   case ARM::BI__builtin_neon_vpadd_v:
1679     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vpadd, Ty),
1680                         Ops, "vpadd");
1681   case ARM::BI__builtin_neon_vpaddl_v:
1682   case ARM::BI__builtin_neon_vpaddlq_v: {
1683     Int = usgn ? Intrinsic::arm_neon_vpaddlu : Intrinsic::arm_neon_vpaddls;
1684     // The source operand type has twice as many elements of half the size.
1685     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
1686     llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
1687     llvm::Type *NarrowTy =
1688       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
1689     llvm::Type *Tys[2] = { Ty, NarrowTy };
1690     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
1691   }
1692   case ARM::BI__builtin_neon_vpmax_v:
1693     Int = usgn ? Intrinsic::arm_neon_vpmaxu : Intrinsic::arm_neon_vpmaxs;
1694     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
1695   case ARM::BI__builtin_neon_vpmin_v:
1696     Int = usgn ? Intrinsic::arm_neon_vpminu : Intrinsic::arm_neon_vpmins;
1697     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
1698   case ARM::BI__builtin_neon_vqabs_v:
1699   case ARM::BI__builtin_neon_vqabsq_v:
1700     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqabs, Ty),
1701                         Ops, "vqabs");
1702   case ARM::BI__builtin_neon_vqadd_v:
1703   case ARM::BI__builtin_neon_vqaddq_v:
1704     Int = usgn ? Intrinsic::arm_neon_vqaddu : Intrinsic::arm_neon_vqadds;
1705     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqadd");
1706   case ARM::BI__builtin_neon_vqdmlal_v:
1707     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmlal, Ty),
1708                         Ops, "vqdmlal");
1709   case ARM::BI__builtin_neon_vqdmlsl_v:
1710     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmlsl, Ty),
1711                         Ops, "vqdmlsl");
1712   case ARM::BI__builtin_neon_vqdmulh_v:
1713   case ARM::BI__builtin_neon_vqdmulhq_v:
1714     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmulh, Ty),
1715                         Ops, "vqdmulh");
1716   case ARM::BI__builtin_neon_vqdmull_v:
1717     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, Ty),
1718                         Ops, "vqdmull");
1719   case ARM::BI__builtin_neon_vqmovn_v:
1720     Int = usgn ? Intrinsic::arm_neon_vqmovnu : Intrinsic::arm_neon_vqmovns;
1721     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqmovn");
1722   case ARM::BI__builtin_neon_vqmovun_v:
1723     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqmovnsu, Ty),
1724                         Ops, "vqdmull");
1725   case ARM::BI__builtin_neon_vqneg_v:
1726   case ARM::BI__builtin_neon_vqnegq_v:
1727     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqneg, Ty),
1728                         Ops, "vqneg");
1729   case ARM::BI__builtin_neon_vqrdmulh_v:
1730   case ARM::BI__builtin_neon_vqrdmulhq_v:
1731     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrdmulh, Ty),
1732                         Ops, "vqrdmulh");
1733   case ARM::BI__builtin_neon_vqrshl_v:
1734   case ARM::BI__builtin_neon_vqrshlq_v:
1735     Int = usgn ? Intrinsic::arm_neon_vqrshiftu : Intrinsic::arm_neon_vqrshifts;
1736     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshl");
1737   case ARM::BI__builtin_neon_vqrshrn_n_v:
1738     Int = usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
1739     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
1740                         1, true);
1741   case ARM::BI__builtin_neon_vqrshrun_n_v:
1742     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
1743                         Ops, "vqrshrun_n", 1, true);
1744   case ARM::BI__builtin_neon_vqshl_v:
1745   case ARM::BI__builtin_neon_vqshlq_v:
1746     Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts;
1747     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl");
1748   case ARM::BI__builtin_neon_vqshl_n_v:
1749   case ARM::BI__builtin_neon_vqshlq_n_v:
1750     Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts;
1751     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
1752                         1, false);
1753   case ARM::BI__builtin_neon_vqshlu_n_v:
1754   case ARM::BI__builtin_neon_vqshluq_n_v:
1755     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftsu, Ty),
1756                         Ops, "vqshlu", 1, false);
1757   case ARM::BI__builtin_neon_vqshrn_n_v:
1758     Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
1759     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
1760                         1, true);
1761   case ARM::BI__builtin_neon_vqshrun_n_v:
1762     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
1763                         Ops, "vqshrun_n", 1, true);
1764   case ARM::BI__builtin_neon_vqsub_v:
1765   case ARM::BI__builtin_neon_vqsubq_v:
1766     Int = usgn ? Intrinsic::arm_neon_vqsubu : Intrinsic::arm_neon_vqsubs;
1767     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqsub");
1768   case ARM::BI__builtin_neon_vraddhn_v:
1769     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vraddhn, Ty),
1770                         Ops, "vraddhn");
1771   case ARM::BI__builtin_neon_vrecpe_v:
1772   case ARM::BI__builtin_neon_vrecpeq_v:
1773     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
1774                         Ops, "vrecpe");
1775   case ARM::BI__builtin_neon_vrecps_v:
1776   case ARM::BI__builtin_neon_vrecpsq_v:
1777     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecps, Ty),
1778                         Ops, "vrecps");
1779   case ARM::BI__builtin_neon_vrhadd_v:
1780   case ARM::BI__builtin_neon_vrhaddq_v:
1781     Int = usgn ? Intrinsic::arm_neon_vrhaddu : Intrinsic::arm_neon_vrhadds;
1782     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrhadd");
1783   case ARM::BI__builtin_neon_vrshl_v:
1784   case ARM::BI__builtin_neon_vrshlq_v:
1785     Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
1786     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshl");
1787   case ARM::BI__builtin_neon_vrshrn_n_v:
1788     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
1789                         Ops, "vrshrn_n", 1, true);
1790   case ARM::BI__builtin_neon_vrshr_n_v:
1791   case ARM::BI__builtin_neon_vrshrq_n_v:
1792     Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
1793     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 1, true);
1794   case ARM::BI__builtin_neon_vrsqrte_v:
1795   case ARM::BI__builtin_neon_vrsqrteq_v:
1796     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrte, Ty),
1797                         Ops, "vrsqrte");
1798   case ARM::BI__builtin_neon_vrsqrts_v:
1799   case ARM::BI__builtin_neon_vrsqrtsq_v:
1800     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrts, Ty),
1801                         Ops, "vrsqrts");
1802   case ARM::BI__builtin_neon_vrsra_n_v:
1803   case ARM::BI__builtin_neon_vrsraq_n_v:
1804     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1805     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1806     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
1807     Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
1808     Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Ty), Ops[1], Ops[2]); 
1809     return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
1810   case ARM::BI__builtin_neon_vrsubhn_v:
1811     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsubhn, Ty),
1812                         Ops, "vrsubhn");
1813   case ARM::BI__builtin_neon_vshl_v:
1814   case ARM::BI__builtin_neon_vshlq_v:
1815     Int = usgn ? Intrinsic::arm_neon_vshiftu : Intrinsic::arm_neon_vshifts;
1816     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vshl");
1817   case ARM::BI__builtin_neon_vshll_n_v:
1818     Int = usgn ? Intrinsic::arm_neon_vshiftlu : Intrinsic::arm_neon_vshiftls;
1819     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vshll", 1);
1820   case ARM::BI__builtin_neon_vshl_n_v:
1821   case ARM::BI__builtin_neon_vshlq_n_v:
1822     Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
1823     return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1], "vshl_n");
1824   case ARM::BI__builtin_neon_vshrn_n_v:
1825     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftn, Ty),
1826                         Ops, "vshrn_n", 1, true);
1827   case ARM::BI__builtin_neon_vshr_n_v:
1828   case ARM::BI__builtin_neon_vshrq_n_v:
1829     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1830     Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
1831     if (usgn)
1832       return Builder.CreateLShr(Ops[0], Ops[1], "vshr_n");
1833     else
1834       return Builder.CreateAShr(Ops[0], Ops[1], "vshr_n");
1835   case ARM::BI__builtin_neon_vsri_n_v:
1836   case ARM::BI__builtin_neon_vsriq_n_v:
1837     rightShift = true;
1838   case ARM::BI__builtin_neon_vsli_n_v:
1839   case ARM::BI__builtin_neon_vsliq_n_v:
1840     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
1841     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
1842                         Ops, "vsli_n");
1843   case ARM::BI__builtin_neon_vsra_n_v:
1844   case ARM::BI__builtin_neon_vsraq_n_v:
1845     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1846     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1847     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, false);
1848     if (usgn)
1849       Ops[1] = Builder.CreateLShr(Ops[1], Ops[2], "vsra_n");
1850     else
1851       Ops[1] = Builder.CreateAShr(Ops[1], Ops[2], "vsra_n");
1852     return Builder.CreateAdd(Ops[0], Ops[1]);
1853   case ARM::BI__builtin_neon_vst1_v:
1854   case ARM::BI__builtin_neon_vst1q_v:
1855     Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1856     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, Ty),
1857                         Ops, "");
1858   case ARM::BI__builtin_neon_vst1_lane_v:
1859   case ARM::BI__builtin_neon_vst1q_lane_v:
1860     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1861     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
1862     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1863     return Builder.CreateStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty));
1864   case ARM::BI__builtin_neon_vst2_v:
1865   case ARM::BI__builtin_neon_vst2q_v:
1866     Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1867     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2, Ty),
1868                         Ops, "");
1869   case ARM::BI__builtin_neon_vst2_lane_v:
1870   case ARM::BI__builtin_neon_vst2q_lane_v:
1871     Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1872     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2lane, Ty),
1873                         Ops, "");
1874   case ARM::BI__builtin_neon_vst3_v:
1875   case ARM::BI__builtin_neon_vst3q_v:
1876     Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1877     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3, Ty),
1878                         Ops, "");
1879   case ARM::BI__builtin_neon_vst3_lane_v:
1880   case ARM::BI__builtin_neon_vst3q_lane_v:
1881     Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1882     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3lane, Ty),
1883                         Ops, "");
1884   case ARM::BI__builtin_neon_vst4_v:
1885   case ARM::BI__builtin_neon_vst4q_v:
1886     Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1887     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4, Ty),
1888                         Ops, "");
1889   case ARM::BI__builtin_neon_vst4_lane_v:
1890   case ARM::BI__builtin_neon_vst4q_lane_v:
1891     Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1892     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4lane, Ty),
1893                         Ops, "");
1894   case ARM::BI__builtin_neon_vsubhn_v:
1895     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vsubhn, Ty),
1896                         Ops, "vsubhn");
1897   case ARM::BI__builtin_neon_vtbl1_v:
1898     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
1899                         Ops, "vtbl1");
1900   case ARM::BI__builtin_neon_vtbl2_v:
1901     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
1902                         Ops, "vtbl2");
1903   case ARM::BI__builtin_neon_vtbl3_v:
1904     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
1905                         Ops, "vtbl3");
1906   case ARM::BI__builtin_neon_vtbl4_v:
1907     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
1908                         Ops, "vtbl4");
1909   case ARM::BI__builtin_neon_vtbx1_v:
1910     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
1911                         Ops, "vtbx1");
1912   case ARM::BI__builtin_neon_vtbx2_v:
1913     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
1914                         Ops, "vtbx2");
1915   case ARM::BI__builtin_neon_vtbx3_v:
1916     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
1917                         Ops, "vtbx3");
1918   case ARM::BI__builtin_neon_vtbx4_v:
1919     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
1920                         Ops, "vtbx4");
1921   case ARM::BI__builtin_neon_vtst_v:
1922   case ARM::BI__builtin_neon_vtstq_v: {
1923     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1924     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1925     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
1926     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 
1927                                 ConstantAggregateZero::get(Ty));
1928     return Builder.CreateSExt(Ops[0], Ty, "vtst");
1929   }
1930   case ARM::BI__builtin_neon_vtrn_v:
1931   case ARM::BI__builtin_neon_vtrnq_v: {
1932     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
1933     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1934     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1935     Value *SV = 0;
1936
1937     for (unsigned vi = 0; vi != 2; ++vi) {
1938       SmallVector<Constant*, 16> Indices;
1939       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
1940         Indices.push_back(ConstantInt::get(Int32Ty, i+vi));
1941         Indices.push_back(ConstantInt::get(Int32Ty, i+e+vi));
1942       }
1943       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
1944       SV = llvm::ConstantVector::get(Indices);
1945       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
1946       SV = Builder.CreateStore(SV, Addr);
1947     }
1948     return SV;
1949   }
1950   case ARM::BI__builtin_neon_vuzp_v:
1951   case ARM::BI__builtin_neon_vuzpq_v: {
1952     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
1953     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1954     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1955     Value *SV = 0;
1956     
1957     for (unsigned vi = 0; vi != 2; ++vi) {
1958       SmallVector<Constant*, 16> Indices;
1959       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
1960         Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi));
1961
1962       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
1963       SV = llvm::ConstantVector::get(Indices);
1964       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
1965       SV = Builder.CreateStore(SV, Addr);
1966     }
1967     return SV;
1968   }
1969   case ARM::BI__builtin_neon_vzip_v: 
1970   case ARM::BI__builtin_neon_vzipq_v: {
1971     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
1972     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1973     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1974     Value *SV = 0;
1975     
1976     for (unsigned vi = 0; vi != 2; ++vi) {
1977       SmallVector<Constant*, 16> Indices;
1978       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
1979         Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1));
1980         Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e));
1981       }
1982       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
1983       SV = llvm::ConstantVector::get(Indices);
1984       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
1985       SV = Builder.CreateStore(SV, Addr);
1986     }
1987     return SV;
1988   }
1989   }
1990 }
1991
1992 llvm::Value *CodeGenFunction::
1993 BuildVector(const SmallVectorImpl<llvm::Value*> &Ops) {
1994   assert((Ops.size() & (Ops.size() - 1)) == 0 &&
1995          "Not a power-of-two sized vector!");
1996   bool AllConstants = true;
1997   for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
1998     AllConstants &= isa<Constant>(Ops[i]);
1999
2000   // If this is a constant vector, create a ConstantVector.
2001   if (AllConstants) {
2002     std::vector<llvm::Constant*> CstOps;
2003     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
2004       CstOps.push_back(cast<Constant>(Ops[i]));
2005     return llvm::ConstantVector::get(CstOps);
2006   }
2007
2008   // Otherwise, insertelement the values to build the vector.
2009   Value *Result =
2010     llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
2011
2012   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
2013     Result = Builder.CreateInsertElement(Result, Ops[i],
2014                llvm::ConstantInt::get(llvm::Type::getInt32Ty(getLLVMContext()), i));
2015
2016   return Result;
2017 }
2018
2019 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
2020                                            const CallExpr *E) {
2021   SmallVector<Value*, 4> Ops;
2022
2023   // Find out if any arguments are required to be integer constant expressions.
2024   unsigned ICEArguments = 0;
2025   ASTContext::GetBuiltinTypeError Error;
2026   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
2027   assert(Error == ASTContext::GE_None && "Should not codegen an error");
2028
2029   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
2030     // If this is a normal argument, just emit it as a scalar.
2031     if ((ICEArguments & (1 << i)) == 0) {
2032       Ops.push_back(EmitScalarExpr(E->getArg(i)));
2033       continue;
2034     }
2035
2036     // If this is required to be a constant, constant fold it so that we know
2037     // that the generated intrinsic gets a ConstantInt.
2038     llvm::APSInt Result;
2039     bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
2040     assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
2041     Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
2042   }
2043
2044   switch (BuiltinID) {
2045   default: return 0;
2046   case X86::BI__builtin_ia32_pslldi128:
2047   case X86::BI__builtin_ia32_psllqi128:
2048   case X86::BI__builtin_ia32_psllwi128:
2049   case X86::BI__builtin_ia32_psradi128:
2050   case X86::BI__builtin_ia32_psrawi128:
2051   case X86::BI__builtin_ia32_psrldi128:
2052   case X86::BI__builtin_ia32_psrlqi128:
2053   case X86::BI__builtin_ia32_psrlwi128: {
2054     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty, "zext");
2055     llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
2056     llvm::Value *Zero = llvm::ConstantInt::get(Int32Ty, 0);
2057     Ops[1] = Builder.CreateInsertElement(llvm::UndefValue::get(Ty),
2058                                          Ops[1], Zero, "insert");
2059     Ops[1] = Builder.CreateBitCast(Ops[1], Ops[0]->getType(), "bitcast");
2060     const char *name = 0;
2061     Intrinsic::ID ID = Intrinsic::not_intrinsic;
2062
2063     switch (BuiltinID) {
2064     default: llvm_unreachable("Unsupported shift intrinsic!");
2065     case X86::BI__builtin_ia32_pslldi128:
2066       name = "pslldi";
2067       ID = Intrinsic::x86_sse2_psll_d;
2068       break;
2069     case X86::BI__builtin_ia32_psllqi128:
2070       name = "psllqi";
2071       ID = Intrinsic::x86_sse2_psll_q;
2072       break;
2073     case X86::BI__builtin_ia32_psllwi128:
2074       name = "psllwi";
2075       ID = Intrinsic::x86_sse2_psll_w;
2076       break;
2077     case X86::BI__builtin_ia32_psradi128:
2078       name = "psradi";
2079       ID = Intrinsic::x86_sse2_psra_d;
2080       break;
2081     case X86::BI__builtin_ia32_psrawi128:
2082       name = "psrawi";
2083       ID = Intrinsic::x86_sse2_psra_w;
2084       break;
2085     case X86::BI__builtin_ia32_psrldi128:
2086       name = "psrldi";
2087       ID = Intrinsic::x86_sse2_psrl_d;
2088       break;
2089     case X86::BI__builtin_ia32_psrlqi128:
2090       name = "psrlqi";
2091       ID = Intrinsic::x86_sse2_psrl_q;
2092       break;
2093     case X86::BI__builtin_ia32_psrlwi128:
2094       name = "psrlwi";
2095       ID = Intrinsic::x86_sse2_psrl_w;
2096       break;
2097     }
2098     llvm::Function *F = CGM.getIntrinsic(ID);
2099     return Builder.CreateCall(F, Ops, name);
2100   }
2101   case X86::BI__builtin_ia32_vec_init_v8qi:
2102   case X86::BI__builtin_ia32_vec_init_v4hi:
2103   case X86::BI__builtin_ia32_vec_init_v2si:
2104     return Builder.CreateBitCast(BuildVector(Ops),
2105                                  llvm::Type::getX86_MMXTy(getLLVMContext()));
2106   case X86::BI__builtin_ia32_vec_ext_v2si:
2107     return Builder.CreateExtractElement(Ops[0],
2108                                   llvm::ConstantInt::get(Ops[1]->getType(), 0));
2109   case X86::BI__builtin_ia32_pslldi:
2110   case X86::BI__builtin_ia32_psllqi:
2111   case X86::BI__builtin_ia32_psllwi:
2112   case X86::BI__builtin_ia32_psradi:
2113   case X86::BI__builtin_ia32_psrawi:
2114   case X86::BI__builtin_ia32_psrldi:
2115   case X86::BI__builtin_ia32_psrlqi:
2116   case X86::BI__builtin_ia32_psrlwi: {
2117     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty, "zext");
2118     llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 1);
2119     Ops[1] = Builder.CreateBitCast(Ops[1], Ty, "bitcast");
2120     const char *name = 0;
2121     Intrinsic::ID ID = Intrinsic::not_intrinsic;
2122
2123     switch (BuiltinID) {
2124     default: llvm_unreachable("Unsupported shift intrinsic!");
2125     case X86::BI__builtin_ia32_pslldi:
2126       name = "pslldi";
2127       ID = Intrinsic::x86_mmx_psll_d;
2128       break;
2129     case X86::BI__builtin_ia32_psllqi:
2130       name = "psllqi";
2131       ID = Intrinsic::x86_mmx_psll_q;
2132       break;
2133     case X86::BI__builtin_ia32_psllwi:
2134       name = "psllwi";
2135       ID = Intrinsic::x86_mmx_psll_w;
2136       break;
2137     case X86::BI__builtin_ia32_psradi:
2138       name = "psradi";
2139       ID = Intrinsic::x86_mmx_psra_d;
2140       break;
2141     case X86::BI__builtin_ia32_psrawi:
2142       name = "psrawi";
2143       ID = Intrinsic::x86_mmx_psra_w;
2144       break;
2145     case X86::BI__builtin_ia32_psrldi:
2146       name = "psrldi";
2147       ID = Intrinsic::x86_mmx_psrl_d;
2148       break;
2149     case X86::BI__builtin_ia32_psrlqi:
2150       name = "psrlqi";
2151       ID = Intrinsic::x86_mmx_psrl_q;
2152       break;
2153     case X86::BI__builtin_ia32_psrlwi:
2154       name = "psrlwi";
2155       ID = Intrinsic::x86_mmx_psrl_w;
2156       break;
2157     }
2158     llvm::Function *F = CGM.getIntrinsic(ID);
2159     return Builder.CreateCall(F, Ops, name);
2160   }
2161   case X86::BI__builtin_ia32_cmpps: {
2162     llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse_cmp_ps);
2163     return Builder.CreateCall(F, Ops, "cmpps");
2164   }
2165   case X86::BI__builtin_ia32_cmpss: {
2166     llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse_cmp_ss);
2167     return Builder.CreateCall(F, Ops, "cmpss");
2168   }
2169   case X86::BI__builtin_ia32_ldmxcsr: {
2170     llvm::Type *PtrTy = Int8PtrTy;
2171     Value *One = llvm::ConstantInt::get(Int32Ty, 1);
2172     Value *Tmp = Builder.CreateAlloca(Int32Ty, One);
2173     Builder.CreateStore(Ops[0], Tmp);
2174     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
2175                               Builder.CreateBitCast(Tmp, PtrTy));
2176   }
2177   case X86::BI__builtin_ia32_stmxcsr: {
2178     llvm::Type *PtrTy = Int8PtrTy;
2179     Value *One = llvm::ConstantInt::get(Int32Ty, 1);
2180     Value *Tmp = Builder.CreateAlloca(Int32Ty, One);
2181     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
2182                        Builder.CreateBitCast(Tmp, PtrTy));
2183     return Builder.CreateLoad(Tmp, "stmxcsr");
2184   }
2185   case X86::BI__builtin_ia32_cmppd: {
2186     llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_cmp_pd);
2187     return Builder.CreateCall(F, Ops, "cmppd");
2188   }
2189   case X86::BI__builtin_ia32_cmpsd: {
2190     llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_cmp_sd);
2191     return Builder.CreateCall(F, Ops, "cmpsd");
2192   }
2193   case X86::BI__builtin_ia32_storehps:
2194   case X86::BI__builtin_ia32_storelps: {
2195     llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
2196     llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
2197
2198     // cast val v2i64
2199     Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
2200
2201     // extract (0, 1)
2202     unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
2203     llvm::Value *Idx = llvm::ConstantInt::get(Int32Ty, Index);
2204     Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
2205
2206     // cast pointer to i64 & store
2207     Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
2208     return Builder.CreateStore(Ops[1], Ops[0]);
2209   }
2210   case X86::BI__builtin_ia32_palignr: {
2211     unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
2212     
2213     // If palignr is shifting the pair of input vectors less than 9 bytes,
2214     // emit a shuffle instruction.
2215     if (shiftVal <= 8) {
2216       SmallVector<llvm::Constant*, 8> Indices;
2217       for (unsigned i = 0; i != 8; ++i)
2218         Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i));
2219       
2220       Value* SV = llvm::ConstantVector::get(Indices);
2221       return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
2222     }
2223     
2224     // If palignr is shifting the pair of input vectors more than 8 but less
2225     // than 16 bytes, emit a logical right shift of the destination.
2226     if (shiftVal < 16) {
2227       // MMX has these as 1 x i64 vectors for some odd optimization reasons.
2228       llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 1);
2229       
2230       Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
2231       Ops[1] = llvm::ConstantInt::get(VecTy, (shiftVal-8) * 8);
2232       
2233       // create i32 constant
2234       llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_mmx_psrl_q);
2235       return Builder.CreateCall(F, makeArrayRef(&Ops[0], 2), "palignr");
2236     }
2237     
2238     // If palignr is shifting the pair of vectors more than 16 bytes, emit zero.
2239     return llvm::Constant::getNullValue(ConvertType(E->getType()));
2240   }
2241   case X86::BI__builtin_ia32_palignr128: {
2242     unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
2243     
2244     // If palignr is shifting the pair of input vectors less than 17 bytes,
2245     // emit a shuffle instruction.
2246     if (shiftVal <= 16) {
2247       SmallVector<llvm::Constant*, 16> Indices;
2248       for (unsigned i = 0; i != 16; ++i)
2249         Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i));
2250       
2251       Value* SV = llvm::ConstantVector::get(Indices);
2252       return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
2253     }
2254     
2255     // If palignr is shifting the pair of input vectors more than 16 but less
2256     // than 32 bytes, emit a logical right shift of the destination.
2257     if (shiftVal < 32) {
2258       llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
2259       
2260       Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
2261       Ops[1] = llvm::ConstantInt::get(Int32Ty, (shiftVal-16) * 8);
2262       
2263       // create i32 constant
2264       llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_psrl_dq);
2265       return Builder.CreateCall(F, makeArrayRef(&Ops[0], 2), "palignr");
2266     }
2267     
2268     // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
2269     return llvm::Constant::getNullValue(ConvertType(E->getType()));
2270   }
2271   case X86::BI__builtin_ia32_movntps:
2272   case X86::BI__builtin_ia32_movntpd:
2273   case X86::BI__builtin_ia32_movntdq:
2274   case X86::BI__builtin_ia32_movnti: {
2275     llvm::MDNode *Node = llvm::MDNode::get(getLLVMContext(),
2276                                            Builder.getInt32(1));
2277
2278     // Convert the type of the pointer to a pointer to the stored type.
2279     Value *BC = Builder.CreateBitCast(Ops[0],
2280                                 llvm::PointerType::getUnqual(Ops[1]->getType()),
2281                                       "cast");
2282     StoreInst *SI = Builder.CreateStore(Ops[1], BC);
2283     SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
2284     SI->setAlignment(16);
2285     return SI;
2286   }
2287   // 3DNow!
2288   case X86::BI__builtin_ia32_pavgusb:
2289   case X86::BI__builtin_ia32_pf2id:
2290   case X86::BI__builtin_ia32_pfacc:
2291   case X86::BI__builtin_ia32_pfadd:
2292   case X86::BI__builtin_ia32_pfcmpeq:
2293   case X86::BI__builtin_ia32_pfcmpge:
2294   case X86::BI__builtin_ia32_pfcmpgt:
2295   case X86::BI__builtin_ia32_pfmax:
2296   case X86::BI__builtin_ia32_pfmin:
2297   case X86::BI__builtin_ia32_pfmul:
2298   case X86::BI__builtin_ia32_pfrcp:
2299   case X86::BI__builtin_ia32_pfrcpit1:
2300   case X86::BI__builtin_ia32_pfrcpit2:
2301   case X86::BI__builtin_ia32_pfrsqrt:
2302   case X86::BI__builtin_ia32_pfrsqit1:
2303   case X86::BI__builtin_ia32_pfrsqrtit1:
2304   case X86::BI__builtin_ia32_pfsub:
2305   case X86::BI__builtin_ia32_pfsubr:
2306   case X86::BI__builtin_ia32_pi2fd:
2307   case X86::BI__builtin_ia32_pmulhrw:
2308   case X86::BI__builtin_ia32_pf2iw:
2309   case X86::BI__builtin_ia32_pfnacc:
2310   case X86::BI__builtin_ia32_pfpnacc:
2311   case X86::BI__builtin_ia32_pi2fw:
2312   case X86::BI__builtin_ia32_pswapdsf:
2313   case X86::BI__builtin_ia32_pswapdsi: {
2314     const char *name = 0;
2315     Intrinsic::ID ID = Intrinsic::not_intrinsic;
2316     switch(BuiltinID) {
2317     case X86::BI__builtin_ia32_pavgusb:
2318       name = "pavgusb";
2319       ID = Intrinsic::x86_3dnow_pavgusb;
2320       break;
2321     case X86::BI__builtin_ia32_pf2id:
2322       name = "pf2id";
2323       ID = Intrinsic::x86_3dnow_pf2id;
2324       break;
2325     case X86::BI__builtin_ia32_pfacc:
2326       name = "pfacc";
2327       ID = Intrinsic::x86_3dnow_pfacc;
2328       break;
2329     case X86::BI__builtin_ia32_pfadd:
2330       name = "pfadd";
2331       ID = Intrinsic::x86_3dnow_pfadd;
2332       break;
2333     case X86::BI__builtin_ia32_pfcmpeq:
2334       name = "pfcmpeq";
2335       ID = Intrinsic::x86_3dnow_pfcmpeq;
2336       break;
2337     case X86::BI__builtin_ia32_pfcmpge:
2338       name = "pfcmpge";
2339       ID = Intrinsic::x86_3dnow_pfcmpge;
2340       break;
2341     case X86::BI__builtin_ia32_pfcmpgt:
2342       name = "pfcmpgt";
2343       ID = Intrinsic::x86_3dnow_pfcmpgt;
2344       break;
2345     case X86::BI__builtin_ia32_pfmax:
2346       name = "pfmax";
2347       ID = Intrinsic::x86_3dnow_pfmax;
2348       break;
2349     case X86::BI__builtin_ia32_pfmin:
2350       name = "pfmin";
2351       ID = Intrinsic::x86_3dnow_pfmin;
2352       break;
2353     case X86::BI__builtin_ia32_pfmul:
2354       name = "pfmul";
2355       ID = Intrinsic::x86_3dnow_pfmul;
2356       break;
2357     case X86::BI__builtin_ia32_pfrcp:
2358       name = "pfrcp";
2359       ID = Intrinsic::x86_3dnow_pfrcp;
2360       break;
2361     case X86::BI__builtin_ia32_pfrcpit1:
2362       name = "pfrcpit1";
2363       ID = Intrinsic::x86_3dnow_pfrcpit1;
2364       break;
2365     case X86::BI__builtin_ia32_pfrcpit2:
2366       name = "pfrcpit2";
2367       ID = Intrinsic::x86_3dnow_pfrcpit2;
2368       break;
2369     case X86::BI__builtin_ia32_pfrsqrt:
2370       name = "pfrsqrt";
2371       ID = Intrinsic::x86_3dnow_pfrsqrt;
2372       break;
2373     case X86::BI__builtin_ia32_pfrsqit1:
2374     case X86::BI__builtin_ia32_pfrsqrtit1:
2375       name = "pfrsqit1";
2376       ID = Intrinsic::x86_3dnow_pfrsqit1;
2377       break;
2378     case X86::BI__builtin_ia32_pfsub:
2379       name = "pfsub";
2380       ID = Intrinsic::x86_3dnow_pfsub;
2381       break;
2382     case X86::BI__builtin_ia32_pfsubr:
2383       name = "pfsubr";
2384       ID = Intrinsic::x86_3dnow_pfsubr;
2385       break;
2386     case X86::BI__builtin_ia32_pi2fd:
2387       name = "pi2fd";
2388       ID = Intrinsic::x86_3dnow_pi2fd;
2389       break;
2390     case X86::BI__builtin_ia32_pmulhrw:
2391       name = "pmulhrw";
2392       ID = Intrinsic::x86_3dnow_pmulhrw;
2393       break;
2394     case X86::BI__builtin_ia32_pf2iw:
2395       name = "pf2iw";
2396       ID = Intrinsic::x86_3dnowa_pf2iw;
2397       break;
2398     case X86::BI__builtin_ia32_pfnacc:
2399       name = "pfnacc";
2400       ID = Intrinsic::x86_3dnowa_pfnacc;
2401       break;
2402     case X86::BI__builtin_ia32_pfpnacc:
2403       name = "pfpnacc";
2404       ID = Intrinsic::x86_3dnowa_pfpnacc;
2405       break;
2406     case X86::BI__builtin_ia32_pi2fw:
2407       name = "pi2fw";
2408       ID = Intrinsic::x86_3dnowa_pi2fw;
2409       break;
2410     case X86::BI__builtin_ia32_pswapdsf:
2411     case X86::BI__builtin_ia32_pswapdsi:
2412       name = "pswapd";
2413       ID = Intrinsic::x86_3dnowa_pswapd;
2414       break;
2415     }
2416     llvm::Function *F = CGM.getIntrinsic(ID);
2417     return Builder.CreateCall(F, Ops, name);
2418   }
2419   }
2420 }
2421
2422 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
2423                                            const CallExpr *E) {
2424   SmallVector<Value*, 4> Ops;
2425
2426   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
2427     Ops.push_back(EmitScalarExpr(E->getArg(i)));
2428
2429   Intrinsic::ID ID = Intrinsic::not_intrinsic;
2430
2431   switch (BuiltinID) {
2432   default: return 0;
2433
2434   // vec_ld, vec_lvsl, vec_lvsr
2435   case PPC::BI__builtin_altivec_lvx:
2436   case PPC::BI__builtin_altivec_lvxl:
2437   case PPC::BI__builtin_altivec_lvebx:
2438   case PPC::BI__builtin_altivec_lvehx:
2439   case PPC::BI__builtin_altivec_lvewx:
2440   case PPC::BI__builtin_altivec_lvsl:
2441   case PPC::BI__builtin_altivec_lvsr:
2442   {
2443     Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
2444
2445     Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
2446     Ops.pop_back();
2447
2448     switch (BuiltinID) {
2449     default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
2450     case PPC::BI__builtin_altivec_lvx:
2451       ID = Intrinsic::ppc_altivec_lvx;
2452       break;
2453     case PPC::BI__builtin_altivec_lvxl:
2454       ID = Intrinsic::ppc_altivec_lvxl;
2455       break;
2456     case PPC::BI__builtin_altivec_lvebx:
2457       ID = Intrinsic::ppc_altivec_lvebx;
2458       break;
2459     case PPC::BI__builtin_altivec_lvehx:
2460       ID = Intrinsic::ppc_altivec_lvehx;
2461       break;
2462     case PPC::BI__builtin_altivec_lvewx:
2463       ID = Intrinsic::ppc_altivec_lvewx;
2464       break;
2465     case PPC::BI__builtin_altivec_lvsl:
2466       ID = Intrinsic::ppc_altivec_lvsl;
2467       break;
2468     case PPC::BI__builtin_altivec_lvsr:
2469       ID = Intrinsic::ppc_altivec_lvsr;
2470       break;
2471     }
2472     llvm::Function *F = CGM.getIntrinsic(ID);
2473     return Builder.CreateCall(F, Ops, "");
2474   }
2475
2476   // vec_st
2477   case PPC::BI__builtin_altivec_stvx:
2478   case PPC::BI__builtin_altivec_stvxl:
2479   case PPC::BI__builtin_altivec_stvebx:
2480   case PPC::BI__builtin_altivec_stvehx:
2481   case PPC::BI__builtin_altivec_stvewx:
2482   {
2483     Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
2484     Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
2485     Ops.pop_back();
2486
2487     switch (BuiltinID) {
2488     default: llvm_unreachable("Unsupported st intrinsic!");
2489     case PPC::BI__builtin_altivec_stvx:
2490       ID = Intrinsic::ppc_altivec_stvx;
2491       break;
2492     case PPC::BI__builtin_altivec_stvxl:
2493       ID = Intrinsic::ppc_altivec_stvxl;
2494       break;
2495     case PPC::BI__builtin_altivec_stvebx:
2496       ID = Intrinsic::ppc_altivec_stvebx;
2497       break;
2498     case PPC::BI__builtin_altivec_stvehx:
2499       ID = Intrinsic::ppc_altivec_stvehx;
2500       break;
2501     case PPC::BI__builtin_altivec_stvewx:
2502       ID = Intrinsic::ppc_altivec_stvewx;
2503       break;
2504     }
2505     llvm::Function *F = CGM.getIntrinsic(ID);
2506     return Builder.CreateCall(F, Ops, "");
2507   }
2508   }
2509   return 0;
2510 }