]> CyberLeo.Net >> Repos - FreeBSD/stable/9.git/blob - contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp
Copy head to stable/9 as part of 9.0-RELEASE release cycle.
[FreeBSD/stable/9.git] / contrib / llvm / tools / clang / lib / CodeGen / CGBuiltin.cpp
1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit Builtin calls as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "TargetInfo.h"
15 #include "CodeGenFunction.h"
16 #include "CodeGenModule.h"
17 #include "CGObjCRuntime.h"
18 #include "clang/Basic/TargetInfo.h"
19 #include "clang/AST/APValue.h"
20 #include "clang/AST/ASTContext.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/Basic/TargetBuiltins.h"
23 #include "llvm/Intrinsics.h"
24 #include "llvm/Target/TargetData.h"
25
26 using namespace clang;
27 using namespace CodeGen;
28 using namespace llvm;
29
30 static void EmitMemoryBarrier(CodeGenFunction &CGF,
31                               bool LoadLoad, bool LoadStore,
32                               bool StoreLoad, bool StoreStore,
33                               bool Device) {
34   Value *True = CGF.Builder.getTrue();
35   Value *False = CGF.Builder.getFalse();
36   Value *C[5] = { LoadLoad ? True : False,
37                   LoadStore ? True : False,
38                   StoreLoad ? True : False,
39                   StoreStore ? True : False,
40                   Device ? True : False };
41   CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::memory_barrier), C);
42 }
43
44 /// Emit the conversions required to turn the given value into an
45 /// integer of the given size.
46 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
47                         QualType T, const llvm::IntegerType *IntType) {
48   V = CGF.EmitToMemory(V, T);
49
50   if (V->getType()->isPointerTy())
51     return CGF.Builder.CreatePtrToInt(V, IntType);
52
53   assert(V->getType() == IntType);
54   return V;
55 }
56
57 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
58                           QualType T, const llvm::Type *ResultType) {
59   V = CGF.EmitFromMemory(V, T);
60
61   if (ResultType->isPointerTy())
62     return CGF.Builder.CreateIntToPtr(V, ResultType);
63
64   assert(V->getType() == ResultType);
65   return V;
66 }
67
68 // The atomic builtins are also full memory barriers. This is a utility for
69 // wrapping a call to the builtins with memory barriers.
70 static Value *EmitCallWithBarrier(CodeGenFunction &CGF, Value *Fn,
71                                   ArrayRef<Value *> Args) {
72   // FIXME: We need a target hook for whether this applies to device memory or
73   // not.
74   bool Device = true;
75
76   // Create barriers both before and after the call.
77   EmitMemoryBarrier(CGF, true, true, true, true, Device);
78   Value *Result = CGF.Builder.CreateCall(Fn, Args);
79   EmitMemoryBarrier(CGF, true, true, true, true, Device);
80   return Result;
81 }
82
83 /// Utility to insert an atomic instruction based on Instrinsic::ID
84 /// and the expression node.
85 static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
86                                Intrinsic::ID Id, const CallExpr *E) {
87   QualType T = E->getType();
88   assert(E->getArg(0)->getType()->isPointerType());
89   assert(CGF.getContext().hasSameUnqualifiedType(T,
90                                   E->getArg(0)->getType()->getPointeeType()));
91   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
92
93   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
94   unsigned AddrSpace =
95     cast<llvm::PointerType>(DestPtr->getType())->getAddressSpace();
96
97   llvm::IntegerType *IntType =
98     llvm::IntegerType::get(CGF.getLLVMContext(),
99                            CGF.getContext().getTypeSize(T));
100   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
101
102   llvm::Type *IntrinsicTypes[2] = { IntType, IntPtrType };
103   llvm::Value *AtomF = CGF.CGM.getIntrinsic(Id, IntrinsicTypes);
104
105   llvm::Value *Args[2];
106   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
107   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
108   const llvm::Type *ValueType = Args[1]->getType();
109   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
110
111   llvm::Value *Result = EmitCallWithBarrier(CGF, AtomF, Args);
112   Result = EmitFromInt(CGF, Result, T, ValueType);
113   return RValue::get(Result);
114 }
115
116 /// Utility to insert an atomic instruction based Instrinsic::ID and
117 /// the expression node, where the return value is the result of the
118 /// operation.
119 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
120                                    Intrinsic::ID Id, const CallExpr *E,
121                                    Instruction::BinaryOps Op) {
122   QualType T = E->getType();
123   assert(E->getArg(0)->getType()->isPointerType());
124   assert(CGF.getContext().hasSameUnqualifiedType(T,
125                                   E->getArg(0)->getType()->getPointeeType()));
126   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
127
128   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
129   unsigned AddrSpace =
130     cast<llvm::PointerType>(DestPtr->getType())->getAddressSpace();
131
132   llvm::IntegerType *IntType =
133     llvm::IntegerType::get(CGF.getLLVMContext(),
134                            CGF.getContext().getTypeSize(T));
135   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
136
137   llvm::Type *IntrinsicTypes[2] = { IntType, IntPtrType };
138   llvm::Value *AtomF = CGF.CGM.getIntrinsic(Id, IntrinsicTypes);
139
140   llvm::Value *Args[2];
141   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
142   const llvm::Type *ValueType = Args[1]->getType();
143   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
144   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
145
146   llvm::Value *Result = EmitCallWithBarrier(CGF, AtomF, Args);
147   Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
148   Result = EmitFromInt(CGF, Result, T, ValueType);
149   return RValue::get(Result);
150 }
151
152 /// EmitFAbs - Emit a call to fabs/fabsf/fabsl, depending on the type of ValTy,
153 /// which must be a scalar floating point type.
154 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V, QualType ValTy) {
155   const BuiltinType *ValTyP = ValTy->getAs<BuiltinType>();
156   assert(ValTyP && "isn't scalar fp type!");
157   
158   StringRef FnName;
159   switch (ValTyP->getKind()) {
160   default: assert(0 && "Isn't a scalar fp type!");
161   case BuiltinType::Float:      FnName = "fabsf"; break;
162   case BuiltinType::Double:     FnName = "fabs"; break;
163   case BuiltinType::LongDouble: FnName = "fabsl"; break;
164   }
165   
166   // The prototype is something that takes and returns whatever V's type is.
167   llvm::Type *ArgTys[] = { V->getType() };
168   llvm::FunctionType *FT = llvm::FunctionType::get(V->getType(), ArgTys,
169                                                    false);
170   llvm::Value *Fn = CGF.CGM.CreateRuntimeFunction(FT, FnName);
171
172   return CGF.Builder.CreateCall(Fn, V, "abs");
173 }
174
175 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
176                                         unsigned BuiltinID, const CallExpr *E) {
177   // See if we can constant fold this builtin.  If so, don't emit it at all.
178   Expr::EvalResult Result;
179   if (E->Evaluate(Result, CGM.getContext()) &&
180       !Result.hasSideEffects()) {
181     if (Result.Val.isInt())
182       return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
183                                                 Result.Val.getInt()));
184     if (Result.Val.isFloat())
185       return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
186                                                Result.Val.getFloat()));
187   }
188
189   switch (BuiltinID) {
190   default: break;  // Handle intrinsics and libm functions below.
191   case Builtin::BI__builtin___CFStringMakeConstantString:
192   case Builtin::BI__builtin___NSStringMakeConstantString:
193     return RValue::get(CGM.EmitConstantExpr(E, E->getType(), 0));
194   case Builtin::BI__builtin_stdarg_start:
195   case Builtin::BI__builtin_va_start:
196   case Builtin::BI__builtin_va_end: {
197     Value *ArgValue = EmitVAListRef(E->getArg(0));
198     const llvm::Type *DestType = Int8PtrTy;
199     if (ArgValue->getType() != DestType)
200       ArgValue = Builder.CreateBitCast(ArgValue, DestType,
201                                        ArgValue->getName().data());
202
203     Intrinsic::ID inst = (BuiltinID == Builtin::BI__builtin_va_end) ?
204       Intrinsic::vaend : Intrinsic::vastart;
205     return RValue::get(Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue));
206   }
207   case Builtin::BI__builtin_va_copy: {
208     Value *DstPtr = EmitVAListRef(E->getArg(0));
209     Value *SrcPtr = EmitVAListRef(E->getArg(1));
210
211     const llvm::Type *Type = Int8PtrTy;
212
213     DstPtr = Builder.CreateBitCast(DstPtr, Type);
214     SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
215     return RValue::get(Builder.CreateCall2(CGM.getIntrinsic(Intrinsic::vacopy),
216                                            DstPtr, SrcPtr));
217   }
218   case Builtin::BI__builtin_abs: {
219     Value *ArgValue = EmitScalarExpr(E->getArg(0));
220
221     Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
222     Value *CmpResult =
223     Builder.CreateICmpSGE(ArgValue,
224                           llvm::Constant::getNullValue(ArgValue->getType()),
225                                                             "abscond");
226     Value *Result =
227       Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
228
229     return RValue::get(Result);
230   }
231   case Builtin::BI__builtin_ctz:
232   case Builtin::BI__builtin_ctzl:
233   case Builtin::BI__builtin_ctzll: {
234     Value *ArgValue = EmitScalarExpr(E->getArg(0));
235
236     llvm::Type *ArgType = ArgValue->getType();
237     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
238
239     const llvm::Type *ResultType = ConvertType(E->getType());
240     Value *Result = Builder.CreateCall(F, ArgValue, "tmp");
241     if (Result->getType() != ResultType)
242       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
243                                      "cast");
244     return RValue::get(Result);
245   }
246   case Builtin::BI__builtin_clz:
247   case Builtin::BI__builtin_clzl:
248   case Builtin::BI__builtin_clzll: {
249     Value *ArgValue = EmitScalarExpr(E->getArg(0));
250
251     llvm::Type *ArgType = ArgValue->getType();
252     Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
253
254     const llvm::Type *ResultType = ConvertType(E->getType());
255     Value *Result = Builder.CreateCall(F, ArgValue, "tmp");
256     if (Result->getType() != ResultType)
257       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
258                                      "cast");
259     return RValue::get(Result);
260   }
261   case Builtin::BI__builtin_ffs:
262   case Builtin::BI__builtin_ffsl:
263   case Builtin::BI__builtin_ffsll: {
264     // ffs(x) -> x ? cttz(x) + 1 : 0
265     Value *ArgValue = EmitScalarExpr(E->getArg(0));
266
267     llvm::Type *ArgType = ArgValue->getType();
268     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
269
270     const llvm::Type *ResultType = ConvertType(E->getType());
271     Value *Tmp = Builder.CreateAdd(Builder.CreateCall(F, ArgValue, "tmp"),
272                                    llvm::ConstantInt::get(ArgType, 1), "tmp");
273     Value *Zero = llvm::Constant::getNullValue(ArgType);
274     Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
275     Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
276     if (Result->getType() != ResultType)
277       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
278                                      "cast");
279     return RValue::get(Result);
280   }
281   case Builtin::BI__builtin_parity:
282   case Builtin::BI__builtin_parityl:
283   case Builtin::BI__builtin_parityll: {
284     // parity(x) -> ctpop(x) & 1
285     Value *ArgValue = EmitScalarExpr(E->getArg(0));
286
287     llvm::Type *ArgType = ArgValue->getType();
288     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
289
290     const llvm::Type *ResultType = ConvertType(E->getType());
291     Value *Tmp = Builder.CreateCall(F, ArgValue, "tmp");
292     Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1),
293                                       "tmp");
294     if (Result->getType() != ResultType)
295       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
296                                      "cast");
297     return RValue::get(Result);
298   }
299   case Builtin::BI__builtin_popcount:
300   case Builtin::BI__builtin_popcountl:
301   case Builtin::BI__builtin_popcountll: {
302     Value *ArgValue = EmitScalarExpr(E->getArg(0));
303
304     llvm::Type *ArgType = ArgValue->getType();
305     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
306
307     const llvm::Type *ResultType = ConvertType(E->getType());
308     Value *Result = Builder.CreateCall(F, ArgValue, "tmp");
309     if (Result->getType() != ResultType)
310       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
311                                      "cast");
312     return RValue::get(Result);
313   }
314   case Builtin::BI__builtin_expect: {
315     Value *ArgValue = EmitScalarExpr(E->getArg(0));
316     llvm::Type *ArgType = ArgValue->getType();
317
318     Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
319     Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
320
321     Value *Result = Builder.CreateCall2(FnExpect, ArgValue, ExpectedValue,
322                                         "expval");
323     return RValue::get(Result);
324   }
325   case Builtin::BI__builtin_bswap32:
326   case Builtin::BI__builtin_bswap64: {
327     Value *ArgValue = EmitScalarExpr(E->getArg(0));
328     llvm::Type *ArgType = ArgValue->getType();
329     Value *F = CGM.getIntrinsic(Intrinsic::bswap, ArgType);
330     return RValue::get(Builder.CreateCall(F, ArgValue, "tmp"));
331   }
332   case Builtin::BI__builtin_object_size: {
333     // We pass this builtin onto the optimizer so that it can
334     // figure out the object size in more complex cases.
335     llvm::Type *ResType = ConvertType(E->getType());
336     
337     // LLVM only supports 0 and 2, make sure that we pass along that
338     // as a boolean.
339     Value *Ty = EmitScalarExpr(E->getArg(1));
340     ConstantInt *CI = dyn_cast<ConstantInt>(Ty);
341     assert(CI);
342     uint64_t val = CI->getZExtValue();
343     CI = ConstantInt::get(Builder.getInt1Ty(), (val & 0x2) >> 1);    
344     
345     Value *F = CGM.getIntrinsic(Intrinsic::objectsize, ResType);
346     return RValue::get(Builder.CreateCall2(F,
347                                            EmitScalarExpr(E->getArg(0)),
348                                            CI));
349   }
350   case Builtin::BI__builtin_prefetch: {
351     Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
352     // FIXME: Technically these constants should of type 'int', yes?
353     RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
354       llvm::ConstantInt::get(Int32Ty, 0);
355     Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
356       llvm::ConstantInt::get(Int32Ty, 3);
357     Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
358     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
359     return RValue::get(Builder.CreateCall4(F, Address, RW, Locality, Data));
360   }
361   case Builtin::BI__builtin_trap: {
362     Value *F = CGM.getIntrinsic(Intrinsic::trap);
363     return RValue::get(Builder.CreateCall(F));
364   }
365   case Builtin::BI__builtin_unreachable: {
366     if (CatchUndefined)
367       EmitBranch(getTrapBB());
368     else
369       Builder.CreateUnreachable();
370
371     // We do need to preserve an insertion point.
372     EmitBlock(createBasicBlock("unreachable.cont"));
373
374     return RValue::get(0);
375   }
376       
377   case Builtin::BI__builtin_powi:
378   case Builtin::BI__builtin_powif:
379   case Builtin::BI__builtin_powil: {
380     Value *Base = EmitScalarExpr(E->getArg(0));
381     Value *Exponent = EmitScalarExpr(E->getArg(1));
382     llvm::Type *ArgType = Base->getType();
383     Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
384     return RValue::get(Builder.CreateCall2(F, Base, Exponent, "tmp"));
385   }
386
387   case Builtin::BI__builtin_isgreater:
388   case Builtin::BI__builtin_isgreaterequal:
389   case Builtin::BI__builtin_isless:
390   case Builtin::BI__builtin_islessequal:
391   case Builtin::BI__builtin_islessgreater:
392   case Builtin::BI__builtin_isunordered: {
393     // Ordered comparisons: we know the arguments to these are matching scalar
394     // floating point values.
395     Value *LHS = EmitScalarExpr(E->getArg(0));
396     Value *RHS = EmitScalarExpr(E->getArg(1));
397
398     switch (BuiltinID) {
399     default: assert(0 && "Unknown ordered comparison");
400     case Builtin::BI__builtin_isgreater:
401       LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
402       break;
403     case Builtin::BI__builtin_isgreaterequal:
404       LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
405       break;
406     case Builtin::BI__builtin_isless:
407       LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
408       break;
409     case Builtin::BI__builtin_islessequal:
410       LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
411       break;
412     case Builtin::BI__builtin_islessgreater:
413       LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
414       break;
415     case Builtin::BI__builtin_isunordered:
416       LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
417       break;
418     }
419     // ZExt bool to int type.
420     return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType()),
421                                           "tmp"));
422   }
423   case Builtin::BI__builtin_isnan: {
424     Value *V = EmitScalarExpr(E->getArg(0));
425     V = Builder.CreateFCmpUNO(V, V, "cmp");
426     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()), "tmp"));
427   }
428   
429   case Builtin::BI__builtin_isinf: {
430     // isinf(x) --> fabs(x) == infinity
431     Value *V = EmitScalarExpr(E->getArg(0));
432     V = EmitFAbs(*this, V, E->getArg(0)->getType());
433     
434     V = Builder.CreateFCmpOEQ(V, ConstantFP::getInfinity(V->getType()),"isinf");
435     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()), "tmp"));
436   }
437       
438   // TODO: BI__builtin_isinf_sign
439   //   isinf_sign(x) -> isinf(x) ? (signbit(x) ? -1 : 1) : 0
440
441   case Builtin::BI__builtin_isnormal: {
442     // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
443     Value *V = EmitScalarExpr(E->getArg(0));
444     Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
445
446     Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType());
447     Value *IsLessThanInf =
448       Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
449     APFloat Smallest = APFloat::getSmallestNormalized(
450                    getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
451     Value *IsNormal =
452       Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
453                             "isnormal");
454     V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
455     V = Builder.CreateAnd(V, IsNormal, "and");
456     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
457   }
458
459   case Builtin::BI__builtin_isfinite: {
460     // isfinite(x) --> x == x && fabs(x) != infinity; }
461     Value *V = EmitScalarExpr(E->getArg(0));
462     Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
463     
464     Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType());
465     Value *IsNotInf =
466       Builder.CreateFCmpUNE(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
467     
468     V = Builder.CreateAnd(Eq, IsNotInf, "and");
469     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
470   }
471
472   case Builtin::BI__builtin_fpclassify: {
473     Value *V = EmitScalarExpr(E->getArg(5));
474     const llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
475
476     // Create Result
477     BasicBlock *Begin = Builder.GetInsertBlock();
478     BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
479     Builder.SetInsertPoint(End);
480     PHINode *Result =
481       Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
482                         "fpclassify_result");
483
484     // if (V==0) return FP_ZERO
485     Builder.SetInsertPoint(Begin);
486     Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
487                                           "iszero");
488     Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
489     BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
490     Builder.CreateCondBr(IsZero, End, NotZero);
491     Result->addIncoming(ZeroLiteral, Begin);
492
493     // if (V != V) return FP_NAN
494     Builder.SetInsertPoint(NotZero);
495     Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
496     Value *NanLiteral = EmitScalarExpr(E->getArg(0));
497     BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
498     Builder.CreateCondBr(IsNan, End, NotNan);
499     Result->addIncoming(NanLiteral, NotZero);
500
501     // if (fabs(V) == infinity) return FP_INFINITY
502     Builder.SetInsertPoint(NotNan);
503     Value *VAbs = EmitFAbs(*this, V, E->getArg(5)->getType());
504     Value *IsInf =
505       Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
506                             "isinf");
507     Value *InfLiteral = EmitScalarExpr(E->getArg(1));
508     BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
509     Builder.CreateCondBr(IsInf, End, NotInf);
510     Result->addIncoming(InfLiteral, NotNan);
511
512     // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
513     Builder.SetInsertPoint(NotInf);
514     APFloat Smallest = APFloat::getSmallestNormalized(
515         getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
516     Value *IsNormal =
517       Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
518                             "isnormal");
519     Value *NormalResult =
520       Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
521                            EmitScalarExpr(E->getArg(3)));
522     Builder.CreateBr(End);
523     Result->addIncoming(NormalResult, NotInf);
524
525     // return Result
526     Builder.SetInsertPoint(End);
527     return RValue::get(Result);
528   }
529       
530   case Builtin::BIalloca:
531   case Builtin::BI__builtin_alloca: {
532     Value *Size = EmitScalarExpr(E->getArg(0));
533     return RValue::get(Builder.CreateAlloca(Builder.getInt8Ty(), Size, "tmp"));
534   }
535   case Builtin::BIbzero:
536   case Builtin::BI__builtin_bzero: {
537     Value *Address = EmitScalarExpr(E->getArg(0));
538     Value *SizeVal = EmitScalarExpr(E->getArg(1));
539     Builder.CreateMemSet(Address, Builder.getInt8(0), SizeVal, 1, false);
540     return RValue::get(Address);
541   }
542   case Builtin::BImemcpy:
543   case Builtin::BI__builtin_memcpy: {
544     Value *Address = EmitScalarExpr(E->getArg(0));
545     Value *SrcAddr = EmitScalarExpr(E->getArg(1));
546     Value *SizeVal = EmitScalarExpr(E->getArg(2));
547     Builder.CreateMemCpy(Address, SrcAddr, SizeVal, 1, false);
548     return RValue::get(Address);
549   }
550       
551   case Builtin::BI__builtin___memcpy_chk: {
552     // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
553     if (!E->getArg(2)->isEvaluatable(CGM.getContext()) ||
554         !E->getArg(3)->isEvaluatable(CGM.getContext()))
555       break;
556     llvm::APSInt Size = E->getArg(2)->EvaluateAsInt(CGM.getContext());
557     llvm::APSInt DstSize = E->getArg(3)->EvaluateAsInt(CGM.getContext());
558     if (Size.ugt(DstSize))
559       break;
560     Value *Dest = EmitScalarExpr(E->getArg(0));
561     Value *Src = EmitScalarExpr(E->getArg(1));
562     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
563     Builder.CreateMemCpy(Dest, Src, SizeVal, 1, false);
564     return RValue::get(Dest);
565   }
566       
567   case Builtin::BI__builtin_objc_memmove_collectable: {
568     Value *Address = EmitScalarExpr(E->getArg(0));
569     Value *SrcAddr = EmitScalarExpr(E->getArg(1));
570     Value *SizeVal = EmitScalarExpr(E->getArg(2));
571     CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this, 
572                                                   Address, SrcAddr, SizeVal);
573     return RValue::get(Address);
574   }
575
576   case Builtin::BI__builtin___memmove_chk: {
577     // fold __builtin_memmove_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
578     if (!E->getArg(2)->isEvaluatable(CGM.getContext()) ||
579         !E->getArg(3)->isEvaluatable(CGM.getContext()))
580       break;
581     llvm::APSInt Size = E->getArg(2)->EvaluateAsInt(CGM.getContext());
582     llvm::APSInt DstSize = E->getArg(3)->EvaluateAsInt(CGM.getContext());
583     if (Size.ugt(DstSize))
584       break;
585     Value *Dest = EmitScalarExpr(E->getArg(0));
586     Value *Src = EmitScalarExpr(E->getArg(1));
587     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
588     Builder.CreateMemMove(Dest, Src, SizeVal, 1, false);
589     return RValue::get(Dest);
590   }
591
592   case Builtin::BImemmove:
593   case Builtin::BI__builtin_memmove: {
594     Value *Address = EmitScalarExpr(E->getArg(0));
595     Value *SrcAddr = EmitScalarExpr(E->getArg(1));
596     Value *SizeVal = EmitScalarExpr(E->getArg(2));
597     Builder.CreateMemMove(Address, SrcAddr, SizeVal, 1, false);
598     return RValue::get(Address);
599   }
600   case Builtin::BImemset:
601   case Builtin::BI__builtin_memset: {
602     Value *Address = EmitScalarExpr(E->getArg(0));
603     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
604                                          Builder.getInt8Ty());
605     Value *SizeVal = EmitScalarExpr(E->getArg(2));
606     Builder.CreateMemSet(Address, ByteVal, SizeVal, 1, false);
607     return RValue::get(Address);
608   }
609   case Builtin::BI__builtin___memset_chk: {
610     // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
611     if (!E->getArg(2)->isEvaluatable(CGM.getContext()) ||
612         !E->getArg(3)->isEvaluatable(CGM.getContext()))
613       break;
614     llvm::APSInt Size = E->getArg(2)->EvaluateAsInt(CGM.getContext());
615     llvm::APSInt DstSize = E->getArg(3)->EvaluateAsInt(CGM.getContext());
616     if (Size.ugt(DstSize))
617       break;
618     Value *Address = EmitScalarExpr(E->getArg(0));
619     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
620                                          Builder.getInt8Ty());
621     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
622     Builder.CreateMemSet(Address, ByteVal, SizeVal, 1, false);
623     
624     return RValue::get(Address);
625   }
626   case Builtin::BI__builtin_dwarf_cfa: {
627     // The offset in bytes from the first argument to the CFA.
628     //
629     // Why on earth is this in the frontend?  Is there any reason at
630     // all that the backend can't reasonably determine this while
631     // lowering llvm.eh.dwarf.cfa()?
632     //
633     // TODO: If there's a satisfactory reason, add a target hook for
634     // this instead of hard-coding 0, which is correct for most targets.
635     int32_t Offset = 0;
636
637     Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
638     return RValue::get(Builder.CreateCall(F, 
639                                       llvm::ConstantInt::get(Int32Ty, Offset)));
640   }
641   case Builtin::BI__builtin_return_address: {
642     Value *Depth = EmitScalarExpr(E->getArg(0));
643     Depth = Builder.CreateIntCast(Depth, Int32Ty, false, "tmp");
644     Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
645     return RValue::get(Builder.CreateCall(F, Depth));
646   }
647   case Builtin::BI__builtin_frame_address: {
648     Value *Depth = EmitScalarExpr(E->getArg(0));
649     Depth = Builder.CreateIntCast(Depth, Int32Ty, false, "tmp");
650     Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
651     return RValue::get(Builder.CreateCall(F, Depth));
652   }
653   case Builtin::BI__builtin_extract_return_addr: {
654     Value *Address = EmitScalarExpr(E->getArg(0));
655     Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
656     return RValue::get(Result);
657   }
658   case Builtin::BI__builtin_frob_return_addr: {
659     Value *Address = EmitScalarExpr(E->getArg(0));
660     Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
661     return RValue::get(Result);
662   }
663   case Builtin::BI__builtin_dwarf_sp_column: {
664     const llvm::IntegerType *Ty
665       = cast<llvm::IntegerType>(ConvertType(E->getType()));
666     int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
667     if (Column == -1) {
668       CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
669       return RValue::get(llvm::UndefValue::get(Ty));
670     }
671     return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
672   }
673   case Builtin::BI__builtin_init_dwarf_reg_size_table: {
674     Value *Address = EmitScalarExpr(E->getArg(0));
675     if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
676       CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
677     return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
678   }
679   case Builtin::BI__builtin_eh_return: {
680     Value *Int = EmitScalarExpr(E->getArg(0));
681     Value *Ptr = EmitScalarExpr(E->getArg(1));
682
683     const llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
684     assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
685            "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
686     Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
687                                   ? Intrinsic::eh_return_i32
688                                   : Intrinsic::eh_return_i64);
689     Builder.CreateCall2(F, Int, Ptr);
690     Builder.CreateUnreachable();
691
692     // We do need to preserve an insertion point.
693     EmitBlock(createBasicBlock("builtin_eh_return.cont"));
694
695     return RValue::get(0);
696   }
697   case Builtin::BI__builtin_unwind_init: {
698     Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
699     return RValue::get(Builder.CreateCall(F));
700   }
701   case Builtin::BI__builtin_extend_pointer: {
702     // Extends a pointer to the size of an _Unwind_Word, which is
703     // uint64_t on all platforms.  Generally this gets poked into a
704     // register and eventually used as an address, so if the
705     // addressing registers are wider than pointers and the platform
706     // doesn't implicitly ignore high-order bits when doing
707     // addressing, we need to make sure we zext / sext based on
708     // the platform's expectations.
709     //
710     // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
711
712     // Cast the pointer to intptr_t.
713     Value *Ptr = EmitScalarExpr(E->getArg(0));
714     Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
715
716     // If that's 64 bits, we're done.
717     if (IntPtrTy->getBitWidth() == 64)
718       return RValue::get(Result);
719
720     // Otherwise, ask the codegen data what to do.
721     if (getTargetHooks().extendPointerWithSExt())
722       return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
723     else
724       return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
725   }
726   case Builtin::BI__builtin_setjmp: {
727     // Buffer is a void**.
728     Value *Buf = EmitScalarExpr(E->getArg(0));
729
730     // Store the frame pointer to the setjmp buffer.
731     Value *FrameAddr =
732       Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
733                          ConstantInt::get(Int32Ty, 0));
734     Builder.CreateStore(FrameAddr, Buf);
735
736     // Store the stack pointer to the setjmp buffer.
737     Value *StackAddr =
738       Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
739     Value *StackSaveSlot =
740       Builder.CreateGEP(Buf, ConstantInt::get(Int32Ty, 2));
741     Builder.CreateStore(StackAddr, StackSaveSlot);
742
743     // Call LLVM's EH setjmp, which is lightweight.
744     Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
745     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
746     return RValue::get(Builder.CreateCall(F, Buf));
747   }
748   case Builtin::BI__builtin_longjmp: {
749     Value *Buf = EmitScalarExpr(E->getArg(0));
750     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
751
752     // Call LLVM's EH longjmp, which is lightweight.
753     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
754
755     // longjmp doesn't return; mark this as unreachable.
756     Builder.CreateUnreachable();
757
758     // We do need to preserve an insertion point.
759     EmitBlock(createBasicBlock("longjmp.cont"));
760
761     return RValue::get(0);
762   }
763   case Builtin::BI__sync_fetch_and_add:
764   case Builtin::BI__sync_fetch_and_sub:
765   case Builtin::BI__sync_fetch_and_or:
766   case Builtin::BI__sync_fetch_and_and:
767   case Builtin::BI__sync_fetch_and_xor:
768   case Builtin::BI__sync_add_and_fetch:
769   case Builtin::BI__sync_sub_and_fetch:
770   case Builtin::BI__sync_and_and_fetch:
771   case Builtin::BI__sync_or_and_fetch:
772   case Builtin::BI__sync_xor_and_fetch:
773   case Builtin::BI__sync_val_compare_and_swap:
774   case Builtin::BI__sync_bool_compare_and_swap:
775   case Builtin::BI__sync_lock_test_and_set:
776   case Builtin::BI__sync_lock_release:
777   case Builtin::BI__sync_swap:
778     assert(0 && "Shouldn't make it through sema");
779   case Builtin::BI__sync_fetch_and_add_1:
780   case Builtin::BI__sync_fetch_and_add_2:
781   case Builtin::BI__sync_fetch_and_add_4:
782   case Builtin::BI__sync_fetch_and_add_8:
783   case Builtin::BI__sync_fetch_and_add_16:
784     return EmitBinaryAtomic(*this, Intrinsic::atomic_load_add, E);
785   case Builtin::BI__sync_fetch_and_sub_1:
786   case Builtin::BI__sync_fetch_and_sub_2:
787   case Builtin::BI__sync_fetch_and_sub_4:
788   case Builtin::BI__sync_fetch_and_sub_8:
789   case Builtin::BI__sync_fetch_and_sub_16:
790     return EmitBinaryAtomic(*this, Intrinsic::atomic_load_sub, E);
791   case Builtin::BI__sync_fetch_and_or_1:
792   case Builtin::BI__sync_fetch_and_or_2:
793   case Builtin::BI__sync_fetch_and_or_4:
794   case Builtin::BI__sync_fetch_and_or_8:
795   case Builtin::BI__sync_fetch_and_or_16:
796     return EmitBinaryAtomic(*this, Intrinsic::atomic_load_or, E);
797   case Builtin::BI__sync_fetch_and_and_1:
798   case Builtin::BI__sync_fetch_and_and_2:
799   case Builtin::BI__sync_fetch_and_and_4:
800   case Builtin::BI__sync_fetch_and_and_8:
801   case Builtin::BI__sync_fetch_and_and_16:
802     return EmitBinaryAtomic(*this, Intrinsic::atomic_load_and, E);
803   case Builtin::BI__sync_fetch_and_xor_1:
804   case Builtin::BI__sync_fetch_and_xor_2:
805   case Builtin::BI__sync_fetch_and_xor_4:
806   case Builtin::BI__sync_fetch_and_xor_8:
807   case Builtin::BI__sync_fetch_and_xor_16:
808     return EmitBinaryAtomic(*this, Intrinsic::atomic_load_xor, E);
809
810   // Clang extensions: not overloaded yet.
811   case Builtin::BI__sync_fetch_and_min:
812     return EmitBinaryAtomic(*this, Intrinsic::atomic_load_min, E);
813   case Builtin::BI__sync_fetch_and_max:
814     return EmitBinaryAtomic(*this, Intrinsic::atomic_load_max, E);
815   case Builtin::BI__sync_fetch_and_umin:
816     return EmitBinaryAtomic(*this, Intrinsic::atomic_load_umin, E);
817   case Builtin::BI__sync_fetch_and_umax:
818     return EmitBinaryAtomic(*this, Intrinsic::atomic_load_umax, E);
819
820   case Builtin::BI__sync_add_and_fetch_1:
821   case Builtin::BI__sync_add_and_fetch_2:
822   case Builtin::BI__sync_add_and_fetch_4:
823   case Builtin::BI__sync_add_and_fetch_8:
824   case Builtin::BI__sync_add_and_fetch_16:
825     return EmitBinaryAtomicPost(*this, Intrinsic::atomic_load_add, E,
826                                 llvm::Instruction::Add);
827   case Builtin::BI__sync_sub_and_fetch_1:
828   case Builtin::BI__sync_sub_and_fetch_2:
829   case Builtin::BI__sync_sub_and_fetch_4:
830   case Builtin::BI__sync_sub_and_fetch_8:
831   case Builtin::BI__sync_sub_and_fetch_16:
832     return EmitBinaryAtomicPost(*this, Intrinsic::atomic_load_sub, E,
833                                 llvm::Instruction::Sub);
834   case Builtin::BI__sync_and_and_fetch_1:
835   case Builtin::BI__sync_and_and_fetch_2:
836   case Builtin::BI__sync_and_and_fetch_4:
837   case Builtin::BI__sync_and_and_fetch_8:
838   case Builtin::BI__sync_and_and_fetch_16:
839     return EmitBinaryAtomicPost(*this, Intrinsic::atomic_load_and, E,
840                                 llvm::Instruction::And);
841   case Builtin::BI__sync_or_and_fetch_1:
842   case Builtin::BI__sync_or_and_fetch_2:
843   case Builtin::BI__sync_or_and_fetch_4:
844   case Builtin::BI__sync_or_and_fetch_8:
845   case Builtin::BI__sync_or_and_fetch_16:
846     return EmitBinaryAtomicPost(*this, Intrinsic::atomic_load_or, E,
847                                 llvm::Instruction::Or);
848   case Builtin::BI__sync_xor_and_fetch_1:
849   case Builtin::BI__sync_xor_and_fetch_2:
850   case Builtin::BI__sync_xor_and_fetch_4:
851   case Builtin::BI__sync_xor_and_fetch_8:
852   case Builtin::BI__sync_xor_and_fetch_16:
853     return EmitBinaryAtomicPost(*this, Intrinsic::atomic_load_xor, E,
854                                 llvm::Instruction::Xor);
855
856   case Builtin::BI__sync_val_compare_and_swap_1:
857   case Builtin::BI__sync_val_compare_and_swap_2:
858   case Builtin::BI__sync_val_compare_and_swap_4:
859   case Builtin::BI__sync_val_compare_and_swap_8:
860   case Builtin::BI__sync_val_compare_and_swap_16: {
861     QualType T = E->getType();
862     llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0));
863     unsigned AddrSpace =
864       cast<llvm::PointerType>(DestPtr->getType())->getAddressSpace();
865     
866     llvm::IntegerType *IntType =
867       llvm::IntegerType::get(getLLVMContext(),
868                              getContext().getTypeSize(T));
869     llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
870     llvm::Type *IntrinsicTypes[2] = { IntType, IntPtrType };
871     Value *AtomF = CGM.getIntrinsic(Intrinsic::atomic_cmp_swap,
872                                     IntrinsicTypes);
873
874     Value *Args[3];
875     Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType);
876     Args[1] = EmitScalarExpr(E->getArg(1));
877     const llvm::Type *ValueType = Args[1]->getType();
878     Args[1] = EmitToInt(*this, Args[1], T, IntType);
879     Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType);
880
881     Value *Result = EmitCallWithBarrier(*this, AtomF, Args);
882     Result = EmitFromInt(*this, Result, T, ValueType);
883     return RValue::get(Result);
884   }
885
886   case Builtin::BI__sync_bool_compare_and_swap_1:
887   case Builtin::BI__sync_bool_compare_and_swap_2:
888   case Builtin::BI__sync_bool_compare_and_swap_4:
889   case Builtin::BI__sync_bool_compare_and_swap_8:
890   case Builtin::BI__sync_bool_compare_and_swap_16: {
891     QualType T = E->getArg(1)->getType();
892     llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0));
893     unsigned AddrSpace =
894       cast<llvm::PointerType>(DestPtr->getType())->getAddressSpace();
895     
896     llvm::IntegerType *IntType =
897       llvm::IntegerType::get(getLLVMContext(),
898                              getContext().getTypeSize(T));
899     llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
900     llvm::Type *IntrinsicTypes[2] = { IntType, IntPtrType };
901     Value *AtomF = CGM.getIntrinsic(Intrinsic::atomic_cmp_swap,
902                                     IntrinsicTypes);
903
904     Value *Args[3];
905     Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType);
906     Args[1] = EmitToInt(*this, EmitScalarExpr(E->getArg(1)), T, IntType);
907     Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType);
908
909     Value *OldVal = Args[1];
910     Value *PrevVal = EmitCallWithBarrier(*this, AtomF, Args);
911     Value *Result = Builder.CreateICmpEQ(PrevVal, OldVal);
912     // zext bool to int.
913     Result = Builder.CreateZExt(Result, ConvertType(E->getType()));
914     return RValue::get(Result);
915   }
916
917   case Builtin::BI__sync_swap_1:
918   case Builtin::BI__sync_swap_2:
919   case Builtin::BI__sync_swap_4:
920   case Builtin::BI__sync_swap_8:
921   case Builtin::BI__sync_swap_16:
922     return EmitBinaryAtomic(*this, Intrinsic::atomic_swap, E);
923
924   case Builtin::BI__sync_lock_test_and_set_1:
925   case Builtin::BI__sync_lock_test_and_set_2:
926   case Builtin::BI__sync_lock_test_and_set_4:
927   case Builtin::BI__sync_lock_test_and_set_8:
928   case Builtin::BI__sync_lock_test_and_set_16:
929     return EmitBinaryAtomic(*this, Intrinsic::atomic_swap, E);
930
931   case Builtin::BI__sync_lock_release_1:
932   case Builtin::BI__sync_lock_release_2:
933   case Builtin::BI__sync_lock_release_4:
934   case Builtin::BI__sync_lock_release_8:
935   case Builtin::BI__sync_lock_release_16: {
936     Value *Ptr = EmitScalarExpr(E->getArg(0));
937     const llvm::Type *ElTy =
938       cast<llvm::PointerType>(Ptr->getType())->getElementType();
939     llvm::StoreInst *Store = 
940       Builder.CreateStore(llvm::Constant::getNullValue(ElTy), Ptr);
941     Store->setVolatile(true);
942     return RValue::get(0);
943   }
944
945   case Builtin::BI__sync_synchronize: {
946     // We assume like gcc appears to, that this only applies to cached memory.
947     EmitMemoryBarrier(*this, true, true, true, true, false);
948     return RValue::get(0);
949   }
950
951   case Builtin::BI__builtin_llvm_memory_barrier: {
952     Value *C[5] = {
953       EmitScalarExpr(E->getArg(0)),
954       EmitScalarExpr(E->getArg(1)),
955       EmitScalarExpr(E->getArg(2)),
956       EmitScalarExpr(E->getArg(3)),
957       EmitScalarExpr(E->getArg(4))
958     };
959     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::memory_barrier), C);
960     return RValue::get(0);
961   }
962       
963     // Library functions with special handling.
964   case Builtin::BIsqrt:
965   case Builtin::BIsqrtf:
966   case Builtin::BIsqrtl: {
967     // TODO: there is currently no set of optimizer flags
968     // sufficient for us to rewrite sqrt to @llvm.sqrt.
969     // -fmath-errno=0 is not good enough; we need finiteness.
970     // We could probably precondition the call with an ult
971     // against 0, but is that worth the complexity?
972     break;
973   }
974
975   case Builtin::BIpow:
976   case Builtin::BIpowf:
977   case Builtin::BIpowl: {
978     // Rewrite sqrt to intrinsic if allowed.
979     if (!FD->hasAttr<ConstAttr>())
980       break;
981     Value *Base = EmitScalarExpr(E->getArg(0));
982     Value *Exponent = EmitScalarExpr(E->getArg(1));
983     llvm::Type *ArgType = Base->getType();
984     Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
985     return RValue::get(Builder.CreateCall2(F, Base, Exponent, "tmp"));
986   }
987
988   case Builtin::BIfma:
989   case Builtin::BIfmaf:
990   case Builtin::BIfmal:
991   case Builtin::BI__builtin_fma:
992   case Builtin::BI__builtin_fmaf:
993   case Builtin::BI__builtin_fmal: {
994     // Rewrite fma to intrinsic.
995     Value *FirstArg = EmitScalarExpr(E->getArg(0));
996     llvm::Type *ArgType = FirstArg->getType();
997     Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType);
998     return RValue::get(Builder.CreateCall3(F, FirstArg,
999                                               EmitScalarExpr(E->getArg(1)),
1000                                               EmitScalarExpr(E->getArg(2)),
1001                                               "tmp"));
1002   }
1003
1004   case Builtin::BI__builtin_signbit:
1005   case Builtin::BI__builtin_signbitf:
1006   case Builtin::BI__builtin_signbitl: {
1007     LLVMContext &C = CGM.getLLVMContext();
1008
1009     Value *Arg = EmitScalarExpr(E->getArg(0));
1010     const llvm::Type *ArgTy = Arg->getType();
1011     if (ArgTy->isPPC_FP128Ty())
1012       break; // FIXME: I'm not sure what the right implementation is here.
1013     int ArgWidth = ArgTy->getPrimitiveSizeInBits();
1014     const llvm::Type *ArgIntTy = llvm::IntegerType::get(C, ArgWidth);
1015     Value *BCArg = Builder.CreateBitCast(Arg, ArgIntTy);
1016     Value *ZeroCmp = llvm::Constant::getNullValue(ArgIntTy);
1017     Value *Result = Builder.CreateICmpSLT(BCArg, ZeroCmp);
1018     return RValue::get(Builder.CreateZExt(Result, ConvertType(E->getType())));
1019   }
1020   }
1021
1022   // If this is an alias for a libm function (e.g. __builtin_sin) turn it into
1023   // that function.
1024   if (getContext().BuiltinInfo.isLibFunction(BuiltinID) ||
1025       getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
1026     return EmitCall(E->getCallee()->getType(),
1027                     CGM.getBuiltinLibFunction(FD, BuiltinID),
1028                     ReturnValueSlot(), E->arg_begin(), E->arg_end(), FD);
1029
1030   // See if we have a target specific intrinsic.
1031   const char *Name = getContext().BuiltinInfo.GetName(BuiltinID);
1032   Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
1033   if (const char *Prefix =
1034       llvm::Triple::getArchTypePrefix(Target.getTriple().getArch()))
1035     IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix, Name);
1036
1037   if (IntrinsicID != Intrinsic::not_intrinsic) {
1038     SmallVector<Value*, 16> Args;
1039
1040     // Find out if any arguments are required to be integer constant
1041     // expressions.
1042     unsigned ICEArguments = 0;
1043     ASTContext::GetBuiltinTypeError Error;
1044     getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
1045     assert(Error == ASTContext::GE_None && "Should not codegen an error");
1046
1047     Function *F = CGM.getIntrinsic(IntrinsicID);
1048     const llvm::FunctionType *FTy = F->getFunctionType();
1049
1050     for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
1051       Value *ArgValue;
1052       // If this is a normal argument, just emit it as a scalar.
1053       if ((ICEArguments & (1 << i)) == 0) {
1054         ArgValue = EmitScalarExpr(E->getArg(i));
1055       } else {
1056         // If this is required to be a constant, constant fold it so that we 
1057         // know that the generated intrinsic gets a ConstantInt.
1058         llvm::APSInt Result;
1059         bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
1060         assert(IsConst && "Constant arg isn't actually constant?");
1061         (void)IsConst;
1062         ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
1063       }
1064
1065       // If the intrinsic arg type is different from the builtin arg type
1066       // we need to do a bit cast.
1067       const llvm::Type *PTy = FTy->getParamType(i);
1068       if (PTy != ArgValue->getType()) {
1069         assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
1070                "Must be able to losslessly bit cast to param");
1071         ArgValue = Builder.CreateBitCast(ArgValue, PTy);
1072       }
1073
1074       Args.push_back(ArgValue);
1075     }
1076
1077     Value *V = Builder.CreateCall(F, Args);
1078     QualType BuiltinRetType = E->getType();
1079
1080     const llvm::Type *RetTy = llvm::Type::getVoidTy(getLLVMContext());
1081     if (!BuiltinRetType->isVoidType()) RetTy = ConvertType(BuiltinRetType);
1082
1083     if (RetTy != V->getType()) {
1084       assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
1085              "Must be able to losslessly bit cast result type");
1086       V = Builder.CreateBitCast(V, RetTy);
1087     }
1088
1089     return RValue::get(V);
1090   }
1091
1092   // See if we have a target specific builtin that needs to be lowered.
1093   if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
1094     return RValue::get(V);
1095
1096   ErrorUnsupported(E, "builtin function");
1097
1098   // Unknown builtin, for now just dump it out and return undef.
1099   if (hasAggregateLLVMType(E->getType()))
1100     return RValue::getAggregate(CreateMemTemp(E->getType()));
1101   return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
1102 }
1103
1104 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
1105                                               const CallExpr *E) {
1106   switch (Target.getTriple().getArch()) {
1107   case llvm::Triple::arm:
1108   case llvm::Triple::thumb:
1109     return EmitARMBuiltinExpr(BuiltinID, E);
1110   case llvm::Triple::x86:
1111   case llvm::Triple::x86_64:
1112     return EmitX86BuiltinExpr(BuiltinID, E);
1113   case llvm::Triple::ppc:
1114   case llvm::Triple::ppc64:
1115     return EmitPPCBuiltinExpr(BuiltinID, E);
1116   default:
1117     return 0;
1118   }
1119 }
1120
1121 static llvm::VectorType *GetNeonType(LLVMContext &C, unsigned type, bool q) {
1122   switch (type) {
1123     default: break;
1124     case 0: 
1125     case 5: return llvm::VectorType::get(llvm::Type::getInt8Ty(C), 8 << (int)q);
1126     case 6:
1127     case 7:
1128     case 1: return llvm::VectorType::get(llvm::Type::getInt16Ty(C),4 << (int)q);
1129     case 2: return llvm::VectorType::get(llvm::Type::getInt32Ty(C),2 << (int)q);
1130     case 3: return llvm::VectorType::get(llvm::Type::getInt64Ty(C),1 << (int)q);
1131     case 4: return llvm::VectorType::get(llvm::Type::getFloatTy(C),2 << (int)q);
1132   };
1133   return 0;
1134 }
1135
1136 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
1137   unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements();
1138   SmallVector<Constant*, 16> Indices(nElts, C);
1139   Value* SV = llvm::ConstantVector::get(Indices);
1140   return Builder.CreateShuffleVector(V, V, SV, "lane");
1141 }
1142
1143 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
1144                                      const char *name,
1145                                      unsigned shift, bool rightshift) {
1146   unsigned j = 0;
1147   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
1148        ai != ae; ++ai, ++j)
1149     if (shift > 0 && shift == j)
1150       Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
1151     else
1152       Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
1153
1154   return Builder.CreateCall(F, Ops, name);
1155 }
1156
1157 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, const llvm::Type *Ty, 
1158                                             bool neg) {
1159   ConstantInt *CI = cast<ConstantInt>(V);
1160   int SV = CI->getSExtValue();
1161   
1162   const llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
1163   llvm::Constant *C = ConstantInt::get(VTy->getElementType(), neg ? -SV : SV);
1164   SmallVector<llvm::Constant*, 16> CV(VTy->getNumElements(), C);
1165   return llvm::ConstantVector::get(CV);
1166 }
1167
1168 /// GetPointeeAlignment - Given an expression with a pointer type, find the
1169 /// alignment of the type referenced by the pointer.  Skip over implicit
1170 /// casts.
1171 static Value *GetPointeeAlignment(CodeGenFunction &CGF, const Expr *Addr) {
1172   unsigned Align = 1;
1173   // Check if the type is a pointer.  The implicit cast operand might not be.
1174   while (Addr->getType()->isPointerType()) {
1175     QualType PtTy = Addr->getType()->getPointeeType();
1176     unsigned NewA = CGF.getContext().getTypeAlignInChars(PtTy).getQuantity();
1177     if (NewA > Align)
1178       Align = NewA;
1179
1180     // If the address is an implicit cast, repeat with the cast operand.
1181     if (const ImplicitCastExpr *CastAddr = dyn_cast<ImplicitCastExpr>(Addr)) {
1182       Addr = CastAddr->getSubExpr();
1183       continue;
1184     }
1185     break;
1186   }
1187   return llvm::ConstantInt::get(CGF.Int32Ty, Align);
1188 }
1189
1190 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
1191                                            const CallExpr *E) {
1192   if (BuiltinID == ARM::BI__clear_cache) {
1193     const FunctionDecl *FD = E->getDirectCallee();
1194     // Oddly people write this call without args on occasion and gcc accepts
1195     // it - it's also marked as varargs in the description file.
1196     llvm::SmallVector<Value*, 2> Ops;
1197     for (unsigned i = 0; i < E->getNumArgs(); i++)
1198       Ops.push_back(EmitScalarExpr(E->getArg(i)));
1199     const llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
1200     const llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
1201     llvm::StringRef Name = FD->getName();
1202     return Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
1203   }
1204
1205   if (BuiltinID == ARM::BI__builtin_arm_ldrexd) {
1206     Function *F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
1207
1208     Value *LdPtr = EmitScalarExpr(E->getArg(0));
1209     Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd");
1210
1211     Value *Val0 = Builder.CreateExtractValue(Val, 1);
1212     Value *Val1 = Builder.CreateExtractValue(Val, 0);
1213     Val0 = Builder.CreateZExt(Val0, Int64Ty);
1214     Val1 = Builder.CreateZExt(Val1, Int64Ty);
1215
1216     Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
1217     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
1218     return Builder.CreateOr(Val, Val1);
1219   }
1220
1221   if (BuiltinID == ARM::BI__builtin_arm_strexd) {
1222     Function *F = CGM.getIntrinsic(Intrinsic::arm_strexd);
1223     llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, NULL);
1224
1225     Value *One = llvm::ConstantInt::get(Int32Ty, 1);
1226     Value *Tmp = Builder.CreateAlloca(Int64Ty, One, "tmp");
1227     Value *Val = EmitScalarExpr(E->getArg(0));
1228     Builder.CreateStore(Val, Tmp);
1229
1230     Value *LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
1231     Val = Builder.CreateLoad(LdPtr);
1232
1233     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
1234     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
1235     Value *StPtr = EmitScalarExpr(E->getArg(1));
1236     return Builder.CreateCall3(F, Arg0, Arg1, StPtr, "strexd");
1237   }
1238
1239   llvm::SmallVector<Value*, 4> Ops;
1240   for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++)
1241     Ops.push_back(EmitScalarExpr(E->getArg(i)));
1242
1243   llvm::APSInt Result;
1244   const Expr *Arg = E->getArg(E->getNumArgs()-1);
1245   if (!Arg->isIntegerConstantExpr(Result, getContext()))
1246     return 0;
1247
1248   if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
1249       BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
1250     // Determine the overloaded type of this builtin.
1251     llvm::Type *Ty;
1252     if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
1253       Ty = llvm::Type::getFloatTy(getLLVMContext());
1254     else
1255       Ty = llvm::Type::getDoubleTy(getLLVMContext());
1256     
1257     // Determine whether this is an unsigned conversion or not.
1258     bool usgn = Result.getZExtValue() == 1;
1259     unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
1260
1261     // Call the appropriate intrinsic.
1262     Function *F = CGM.getIntrinsic(Int, Ty);
1263     return Builder.CreateCall(F, Ops, "vcvtr");
1264   }
1265   
1266   // Determine the type of this overloaded NEON intrinsic.
1267   unsigned type = Result.getZExtValue();
1268   bool usgn = type & 0x08;
1269   bool quad = type & 0x10;
1270   bool poly = (type & 0x7) == 5 || (type & 0x7) == 6;
1271   (void)poly;  // Only used in assert()s.
1272   bool rightShift = false;
1273
1274   llvm::VectorType *VTy = GetNeonType(getLLVMContext(), type & 0x7, quad);
1275   llvm::Type *Ty = VTy;
1276   if (!Ty)
1277     return 0;
1278
1279   unsigned Int;
1280   switch (BuiltinID) {
1281   default: return 0;
1282   case ARM::BI__builtin_neon_vabd_v:
1283   case ARM::BI__builtin_neon_vabdq_v:
1284     Int = usgn ? Intrinsic::arm_neon_vabdu : Intrinsic::arm_neon_vabds;
1285     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
1286   case ARM::BI__builtin_neon_vabs_v:
1287   case ARM::BI__builtin_neon_vabsq_v:
1288     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vabs, Ty),
1289                         Ops, "vabs");
1290   case ARM::BI__builtin_neon_vaddhn_v:
1291     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vaddhn, Ty),
1292                         Ops, "vaddhn");
1293   case ARM::BI__builtin_neon_vcale_v:
1294     std::swap(Ops[0], Ops[1]);
1295   case ARM::BI__builtin_neon_vcage_v: {
1296     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacged);
1297     return EmitNeonCall(F, Ops, "vcage");
1298   }
1299   case ARM::BI__builtin_neon_vcaleq_v:
1300     std::swap(Ops[0], Ops[1]);
1301   case ARM::BI__builtin_neon_vcageq_v: {
1302     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgeq);
1303     return EmitNeonCall(F, Ops, "vcage");
1304   }
1305   case ARM::BI__builtin_neon_vcalt_v:
1306     std::swap(Ops[0], Ops[1]);
1307   case ARM::BI__builtin_neon_vcagt_v: {
1308     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtd);
1309     return EmitNeonCall(F, Ops, "vcagt");
1310   }
1311   case ARM::BI__builtin_neon_vcaltq_v:
1312     std::swap(Ops[0], Ops[1]);
1313   case ARM::BI__builtin_neon_vcagtq_v: {
1314     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtq);
1315     return EmitNeonCall(F, Ops, "vcagt");
1316   }
1317   case ARM::BI__builtin_neon_vcls_v:
1318   case ARM::BI__builtin_neon_vclsq_v: {
1319     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcls, Ty);
1320     return EmitNeonCall(F, Ops, "vcls");
1321   }
1322   case ARM::BI__builtin_neon_vclz_v:
1323   case ARM::BI__builtin_neon_vclzq_v: {
1324     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vclz, Ty);
1325     return EmitNeonCall(F, Ops, "vclz");
1326   }
1327   case ARM::BI__builtin_neon_vcnt_v:
1328   case ARM::BI__builtin_neon_vcntq_v: {
1329     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcnt, Ty);
1330     return EmitNeonCall(F, Ops, "vcnt");
1331   }
1332   case ARM::BI__builtin_neon_vcvt_f16_v: {
1333     assert((type & 0x7) == 7 && !quad && "unexpected vcvt_f16_v builtin");
1334     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvtfp2hf);
1335     return EmitNeonCall(F, Ops, "vcvt");
1336   }
1337   case ARM::BI__builtin_neon_vcvt_f32_f16: {
1338     assert((type & 0x7) == 7 && !quad && "unexpected vcvt_f32_f16 builtin");
1339     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvthf2fp);
1340     return EmitNeonCall(F, Ops, "vcvt");
1341   }
1342   case ARM::BI__builtin_neon_vcvt_f32_v:
1343   case ARM::BI__builtin_neon_vcvtq_f32_v: {
1344     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1345     Ty = GetNeonType(getLLVMContext(), 4, quad);
1346     return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 
1347                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
1348   }
1349   case ARM::BI__builtin_neon_vcvt_s32_v:
1350   case ARM::BI__builtin_neon_vcvt_u32_v:
1351   case ARM::BI__builtin_neon_vcvtq_s32_v:
1352   case ARM::BI__builtin_neon_vcvtq_u32_v: {
1353     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(getLLVMContext(), 4, quad));
1354     return usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt") 
1355                 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
1356   }
1357   case ARM::BI__builtin_neon_vcvt_n_f32_v:
1358   case ARM::BI__builtin_neon_vcvtq_n_f32_v: {
1359     llvm::Type *Tys[2] = { GetNeonType(getLLVMContext(), 4, quad), Ty };
1360     Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp : Intrinsic::arm_neon_vcvtfxs2fp;
1361     Function *F = CGM.getIntrinsic(Int, Tys);
1362     return EmitNeonCall(F, Ops, "vcvt_n");
1363   }
1364   case ARM::BI__builtin_neon_vcvt_n_s32_v:
1365   case ARM::BI__builtin_neon_vcvt_n_u32_v:
1366   case ARM::BI__builtin_neon_vcvtq_n_s32_v:
1367   case ARM::BI__builtin_neon_vcvtq_n_u32_v: {
1368     llvm::Type *Tys[2] = { Ty, GetNeonType(getLLVMContext(), 4, quad) };
1369     Int = usgn ? Intrinsic::arm_neon_vcvtfp2fxu : Intrinsic::arm_neon_vcvtfp2fxs;
1370     Function *F = CGM.getIntrinsic(Int, Tys);
1371     return EmitNeonCall(F, Ops, "vcvt_n");
1372   }
1373   case ARM::BI__builtin_neon_vext_v:
1374   case ARM::BI__builtin_neon_vextq_v: {
1375     int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
1376     SmallVector<Constant*, 16> Indices;
1377     for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
1378       Indices.push_back(ConstantInt::get(Int32Ty, i+CV));
1379     
1380     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1381     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1382     Value *SV = llvm::ConstantVector::get(Indices);
1383     return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vext");
1384   }
1385   case ARM::BI__builtin_neon_vget_lane_i8:
1386   case ARM::BI__builtin_neon_vget_lane_i16:
1387   case ARM::BI__builtin_neon_vget_lane_i32:
1388   case ARM::BI__builtin_neon_vget_lane_i64:
1389   case ARM::BI__builtin_neon_vget_lane_f32:
1390   case ARM::BI__builtin_neon_vgetq_lane_i8:
1391   case ARM::BI__builtin_neon_vgetq_lane_i16:
1392   case ARM::BI__builtin_neon_vgetq_lane_i32:
1393   case ARM::BI__builtin_neon_vgetq_lane_i64:
1394   case ARM::BI__builtin_neon_vgetq_lane_f32:
1395     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
1396                                         "vget_lane");
1397   case ARM::BI__builtin_neon_vhadd_v:
1398   case ARM::BI__builtin_neon_vhaddq_v:
1399     Int = usgn ? Intrinsic::arm_neon_vhaddu : Intrinsic::arm_neon_vhadds;
1400     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vhadd");
1401   case ARM::BI__builtin_neon_vhsub_v:
1402   case ARM::BI__builtin_neon_vhsubq_v:
1403     Int = usgn ? Intrinsic::arm_neon_vhsubu : Intrinsic::arm_neon_vhsubs;
1404     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vhsub");
1405   case ARM::BI__builtin_neon_vld1_v:
1406   case ARM::BI__builtin_neon_vld1q_v:
1407     Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1408     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Ty),
1409                         Ops, "vld1");
1410   case ARM::BI__builtin_neon_vld1_lane_v:
1411   case ARM::BI__builtin_neon_vld1q_lane_v:
1412     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1413     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
1414     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1415     Ops[0] = Builder.CreateLoad(Ops[0]);
1416     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
1417   case ARM::BI__builtin_neon_vld1_dup_v:
1418   case ARM::BI__builtin_neon_vld1q_dup_v: {
1419     Value *V = UndefValue::get(Ty);
1420     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
1421     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1422     Ops[0] = Builder.CreateLoad(Ops[0]);
1423     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
1424     Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
1425     return EmitNeonSplat(Ops[0], CI);
1426   }
1427   case ARM::BI__builtin_neon_vld2_v:
1428   case ARM::BI__builtin_neon_vld2q_v: {
1429     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2, Ty);
1430     Value *Align = GetPointeeAlignment(*this, E->getArg(1));
1431     Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld2");
1432     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1433     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1434     return Builder.CreateStore(Ops[1], Ops[0]);
1435   }
1436   case ARM::BI__builtin_neon_vld3_v:
1437   case ARM::BI__builtin_neon_vld3q_v: {
1438     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3, Ty);
1439     Value *Align = GetPointeeAlignment(*this, E->getArg(1));
1440     Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld3");
1441     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1442     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1443     return Builder.CreateStore(Ops[1], Ops[0]);
1444   }
1445   case ARM::BI__builtin_neon_vld4_v:
1446   case ARM::BI__builtin_neon_vld4q_v: {
1447     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4, Ty);
1448     Value *Align = GetPointeeAlignment(*this, E->getArg(1));
1449     Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld4");
1450     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1451     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1452     return Builder.CreateStore(Ops[1], Ops[0]);
1453   }
1454   case ARM::BI__builtin_neon_vld2_lane_v:
1455   case ARM::BI__builtin_neon_vld2q_lane_v: {
1456     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2lane, Ty);
1457     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1458     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
1459     Ops.push_back(GetPointeeAlignment(*this, E->getArg(1)));
1460     Ops[1] = Builder.CreateCall(F,
1461                                 ArrayRef<Value *>(Ops.begin() + 1, Ops.end()),
1462                                 "vld2_lane");
1463     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1464     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1465     return Builder.CreateStore(Ops[1], Ops[0]);
1466   }
1467   case ARM::BI__builtin_neon_vld3_lane_v:
1468   case ARM::BI__builtin_neon_vld3q_lane_v: {
1469     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3lane, Ty);
1470     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1471     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
1472     Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
1473     Ops.push_back(GetPointeeAlignment(*this, E->getArg(1)));
1474     Ops[1] = Builder.CreateCall(F,
1475                                 ArrayRef<Value *>(Ops.begin() + 1, Ops.end()),
1476                                 "vld3_lane");
1477     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1478     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1479     return Builder.CreateStore(Ops[1], Ops[0]);
1480   }
1481   case ARM::BI__builtin_neon_vld4_lane_v:
1482   case ARM::BI__builtin_neon_vld4q_lane_v: {
1483     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4lane, Ty);
1484     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1485     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
1486     Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
1487     Ops[5] = Builder.CreateBitCast(Ops[5], Ty);
1488     Ops.push_back(GetPointeeAlignment(*this, E->getArg(1)));
1489     Ops[1] = Builder.CreateCall(F,
1490                                 ArrayRef<Value *>(Ops.begin() + 1, Ops.end()),
1491                                 "vld3_lane");
1492     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1493     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1494     return Builder.CreateStore(Ops[1], Ops[0]);
1495   }
1496   case ARM::BI__builtin_neon_vld2_dup_v:
1497   case ARM::BI__builtin_neon_vld3_dup_v:
1498   case ARM::BI__builtin_neon_vld4_dup_v: {
1499     // Handle 64-bit elements as a special-case.  There is no "dup" needed.
1500     if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
1501       switch (BuiltinID) {
1502       case ARM::BI__builtin_neon_vld2_dup_v: 
1503         Int = Intrinsic::arm_neon_vld2; 
1504         break;
1505       case ARM::BI__builtin_neon_vld3_dup_v:
1506         Int = Intrinsic::arm_neon_vld2; 
1507         break;
1508       case ARM::BI__builtin_neon_vld4_dup_v:
1509         Int = Intrinsic::arm_neon_vld2; 
1510         break;
1511       default: assert(0 && "unknown vld_dup intrinsic?");
1512       }
1513       Function *F = CGM.getIntrinsic(Int, Ty);
1514       Value *Align = GetPointeeAlignment(*this, E->getArg(1));
1515       Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld_dup");
1516       Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1517       Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1518       return Builder.CreateStore(Ops[1], Ops[0]);
1519     }
1520     switch (BuiltinID) {
1521     case ARM::BI__builtin_neon_vld2_dup_v: 
1522       Int = Intrinsic::arm_neon_vld2lane; 
1523       break;
1524     case ARM::BI__builtin_neon_vld3_dup_v:
1525       Int = Intrinsic::arm_neon_vld2lane; 
1526       break;
1527     case ARM::BI__builtin_neon_vld4_dup_v:
1528       Int = Intrinsic::arm_neon_vld2lane; 
1529       break;
1530     default: assert(0 && "unknown vld_dup intrinsic?");
1531     }
1532     Function *F = CGM.getIntrinsic(Int, Ty);
1533     const llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
1534     
1535     SmallVector<Value*, 6> Args;
1536     Args.push_back(Ops[1]);
1537     Args.append(STy->getNumElements(), UndefValue::get(Ty));
1538
1539     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
1540     Args.push_back(CI);
1541     Args.push_back(GetPointeeAlignment(*this, E->getArg(1)));
1542     
1543     Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
1544     // splat lane 0 to all elts in each vector of the result.
1545     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
1546       Value *Val = Builder.CreateExtractValue(Ops[1], i);
1547       Value *Elt = Builder.CreateBitCast(Val, Ty);
1548       Elt = EmitNeonSplat(Elt, CI);
1549       Elt = Builder.CreateBitCast(Elt, Val->getType());
1550       Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
1551     }
1552     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1553     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1554     return Builder.CreateStore(Ops[1], Ops[0]);
1555   }
1556   case ARM::BI__builtin_neon_vmax_v:
1557   case ARM::BI__builtin_neon_vmaxq_v:
1558     Int = usgn ? Intrinsic::arm_neon_vmaxu : Intrinsic::arm_neon_vmaxs;
1559     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
1560   case ARM::BI__builtin_neon_vmin_v:
1561   case ARM::BI__builtin_neon_vminq_v:
1562     Int = usgn ? Intrinsic::arm_neon_vminu : Intrinsic::arm_neon_vmins;
1563     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
1564   case ARM::BI__builtin_neon_vmovl_v: {
1565     const llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
1566     Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
1567     if (usgn)
1568       return Builder.CreateZExt(Ops[0], Ty, "vmovl");
1569     return Builder.CreateSExt(Ops[0], Ty, "vmovl");
1570   }
1571   case ARM::BI__builtin_neon_vmovn_v: {
1572     const llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
1573     Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
1574     return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
1575   }
1576   case ARM::BI__builtin_neon_vmul_v:
1577   case ARM::BI__builtin_neon_vmulq_v:
1578     assert(poly && "vmul builtin only supported for polynomial types");
1579     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vmulp, Ty),
1580                         Ops, "vmul");
1581   case ARM::BI__builtin_neon_vmull_v:
1582     Int = usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
1583     Int = poly ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
1584     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
1585   case ARM::BI__builtin_neon_vpadal_v:
1586   case ARM::BI__builtin_neon_vpadalq_v: {
1587     Int = usgn ? Intrinsic::arm_neon_vpadalu : Intrinsic::arm_neon_vpadals;
1588     // The source operand type has twice as many elements of half the size.
1589     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
1590     const llvm::Type *EltTy =
1591       llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
1592     llvm::Type *NarrowTy =
1593       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
1594     llvm::Type *Tys[2] = { Ty, NarrowTy };
1595     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpadal");
1596   }
1597   case ARM::BI__builtin_neon_vpadd_v:
1598     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vpadd, Ty),
1599                         Ops, "vpadd");
1600   case ARM::BI__builtin_neon_vpaddl_v:
1601   case ARM::BI__builtin_neon_vpaddlq_v: {
1602     Int = usgn ? Intrinsic::arm_neon_vpaddlu : Intrinsic::arm_neon_vpaddls;
1603     // The source operand type has twice as many elements of half the size.
1604     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
1605     const llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
1606     llvm::Type *NarrowTy =
1607       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
1608     llvm::Type *Tys[2] = { Ty, NarrowTy };
1609     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
1610   }
1611   case ARM::BI__builtin_neon_vpmax_v:
1612     Int = usgn ? Intrinsic::arm_neon_vpmaxu : Intrinsic::arm_neon_vpmaxs;
1613     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
1614   case ARM::BI__builtin_neon_vpmin_v:
1615     Int = usgn ? Intrinsic::arm_neon_vpminu : Intrinsic::arm_neon_vpmins;
1616     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
1617   case ARM::BI__builtin_neon_vqabs_v:
1618   case ARM::BI__builtin_neon_vqabsq_v:
1619     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqabs, Ty),
1620                         Ops, "vqabs");
1621   case ARM::BI__builtin_neon_vqadd_v:
1622   case ARM::BI__builtin_neon_vqaddq_v:
1623     Int = usgn ? Intrinsic::arm_neon_vqaddu : Intrinsic::arm_neon_vqadds;
1624     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqadd");
1625   case ARM::BI__builtin_neon_vqdmlal_v:
1626     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmlal, Ty),
1627                         Ops, "vqdmlal");
1628   case ARM::BI__builtin_neon_vqdmlsl_v:
1629     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmlsl, Ty),
1630                         Ops, "vqdmlsl");
1631   case ARM::BI__builtin_neon_vqdmulh_v:
1632   case ARM::BI__builtin_neon_vqdmulhq_v:
1633     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmulh, Ty),
1634                         Ops, "vqdmulh");
1635   case ARM::BI__builtin_neon_vqdmull_v:
1636     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, Ty),
1637                         Ops, "vqdmull");
1638   case ARM::BI__builtin_neon_vqmovn_v:
1639     Int = usgn ? Intrinsic::arm_neon_vqmovnu : Intrinsic::arm_neon_vqmovns;
1640     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqmovn");
1641   case ARM::BI__builtin_neon_vqmovun_v:
1642     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqmovnsu, Ty),
1643                         Ops, "vqdmull");
1644   case ARM::BI__builtin_neon_vqneg_v:
1645   case ARM::BI__builtin_neon_vqnegq_v:
1646     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqneg, Ty),
1647                         Ops, "vqneg");
1648   case ARM::BI__builtin_neon_vqrdmulh_v:
1649   case ARM::BI__builtin_neon_vqrdmulhq_v:
1650     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrdmulh, Ty),
1651                         Ops, "vqrdmulh");
1652   case ARM::BI__builtin_neon_vqrshl_v:
1653   case ARM::BI__builtin_neon_vqrshlq_v:
1654     Int = usgn ? Intrinsic::arm_neon_vqrshiftu : Intrinsic::arm_neon_vqrshifts;
1655     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshl");
1656   case ARM::BI__builtin_neon_vqrshrn_n_v:
1657     Int = usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
1658     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
1659                         1, true);
1660   case ARM::BI__builtin_neon_vqrshrun_n_v:
1661     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
1662                         Ops, "vqrshrun_n", 1, true);
1663   case ARM::BI__builtin_neon_vqshl_v:
1664   case ARM::BI__builtin_neon_vqshlq_v:
1665     Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts;
1666     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl");
1667   case ARM::BI__builtin_neon_vqshl_n_v:
1668   case ARM::BI__builtin_neon_vqshlq_n_v:
1669     Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts;
1670     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
1671                         1, false);
1672   case ARM::BI__builtin_neon_vqshlu_n_v:
1673   case ARM::BI__builtin_neon_vqshluq_n_v:
1674     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftsu, Ty),
1675                         Ops, "vqshlu", 1, false);
1676   case ARM::BI__builtin_neon_vqshrn_n_v:
1677     Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
1678     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
1679                         1, true);
1680   case ARM::BI__builtin_neon_vqshrun_n_v:
1681     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
1682                         Ops, "vqshrun_n", 1, true);
1683   case ARM::BI__builtin_neon_vqsub_v:
1684   case ARM::BI__builtin_neon_vqsubq_v:
1685     Int = usgn ? Intrinsic::arm_neon_vqsubu : Intrinsic::arm_neon_vqsubs;
1686     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqsub");
1687   case ARM::BI__builtin_neon_vraddhn_v:
1688     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vraddhn, Ty),
1689                         Ops, "vraddhn");
1690   case ARM::BI__builtin_neon_vrecpe_v:
1691   case ARM::BI__builtin_neon_vrecpeq_v:
1692     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
1693                         Ops, "vrecpe");
1694   case ARM::BI__builtin_neon_vrecps_v:
1695   case ARM::BI__builtin_neon_vrecpsq_v:
1696     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecps, Ty),
1697                         Ops, "vrecps");
1698   case ARM::BI__builtin_neon_vrhadd_v:
1699   case ARM::BI__builtin_neon_vrhaddq_v:
1700     Int = usgn ? Intrinsic::arm_neon_vrhaddu : Intrinsic::arm_neon_vrhadds;
1701     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrhadd");
1702   case ARM::BI__builtin_neon_vrshl_v:
1703   case ARM::BI__builtin_neon_vrshlq_v:
1704     Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
1705     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshl");
1706   case ARM::BI__builtin_neon_vrshrn_n_v:
1707     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
1708                         Ops, "vrshrn_n", 1, true);
1709   case ARM::BI__builtin_neon_vrshr_n_v:
1710   case ARM::BI__builtin_neon_vrshrq_n_v:
1711     Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
1712     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 1, true);
1713   case ARM::BI__builtin_neon_vrsqrte_v:
1714   case ARM::BI__builtin_neon_vrsqrteq_v:
1715     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrte, Ty),
1716                         Ops, "vrsqrte");
1717   case ARM::BI__builtin_neon_vrsqrts_v:
1718   case ARM::BI__builtin_neon_vrsqrtsq_v:
1719     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrts, Ty),
1720                         Ops, "vrsqrts");
1721   case ARM::BI__builtin_neon_vrsra_n_v:
1722   case ARM::BI__builtin_neon_vrsraq_n_v:
1723     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1724     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1725     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
1726     Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
1727     Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Ty), Ops[1], Ops[2]); 
1728     return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
1729   case ARM::BI__builtin_neon_vrsubhn_v:
1730     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsubhn, Ty),
1731                         Ops, "vrsubhn");
1732   case ARM::BI__builtin_neon_vset_lane_i8:
1733   case ARM::BI__builtin_neon_vset_lane_i16:
1734   case ARM::BI__builtin_neon_vset_lane_i32:
1735   case ARM::BI__builtin_neon_vset_lane_i64:
1736   case ARM::BI__builtin_neon_vset_lane_f32:
1737   case ARM::BI__builtin_neon_vsetq_lane_i8:
1738   case ARM::BI__builtin_neon_vsetq_lane_i16:
1739   case ARM::BI__builtin_neon_vsetq_lane_i32:
1740   case ARM::BI__builtin_neon_vsetq_lane_i64:
1741   case ARM::BI__builtin_neon_vsetq_lane_f32:
1742     Ops.push_back(EmitScalarExpr(E->getArg(2)));
1743     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
1744   case ARM::BI__builtin_neon_vshl_v:
1745   case ARM::BI__builtin_neon_vshlq_v:
1746     Int = usgn ? Intrinsic::arm_neon_vshiftu : Intrinsic::arm_neon_vshifts;
1747     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vshl");
1748   case ARM::BI__builtin_neon_vshll_n_v:
1749     Int = usgn ? Intrinsic::arm_neon_vshiftlu : Intrinsic::arm_neon_vshiftls;
1750     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vshll", 1);
1751   case ARM::BI__builtin_neon_vshl_n_v:
1752   case ARM::BI__builtin_neon_vshlq_n_v:
1753     Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
1754     return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1], "vshl_n");
1755   case ARM::BI__builtin_neon_vshrn_n_v:
1756     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftn, Ty),
1757                         Ops, "vshrn_n", 1, true);
1758   case ARM::BI__builtin_neon_vshr_n_v:
1759   case ARM::BI__builtin_neon_vshrq_n_v:
1760     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1761     Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
1762     if (usgn)
1763       return Builder.CreateLShr(Ops[0], Ops[1], "vshr_n");
1764     else
1765       return Builder.CreateAShr(Ops[0], Ops[1], "vshr_n");
1766   case ARM::BI__builtin_neon_vsri_n_v:
1767   case ARM::BI__builtin_neon_vsriq_n_v:
1768     rightShift = true;
1769   case ARM::BI__builtin_neon_vsli_n_v:
1770   case ARM::BI__builtin_neon_vsliq_n_v:
1771     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
1772     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
1773                         Ops, "vsli_n");
1774   case ARM::BI__builtin_neon_vsra_n_v:
1775   case ARM::BI__builtin_neon_vsraq_n_v:
1776     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1777     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1778     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, false);
1779     if (usgn)
1780       Ops[1] = Builder.CreateLShr(Ops[1], Ops[2], "vsra_n");
1781     else
1782       Ops[1] = Builder.CreateAShr(Ops[1], Ops[2], "vsra_n");
1783     return Builder.CreateAdd(Ops[0], Ops[1]);
1784   case ARM::BI__builtin_neon_vst1_v:
1785   case ARM::BI__builtin_neon_vst1q_v:
1786     Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1787     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, Ty),
1788                         Ops, "");
1789   case ARM::BI__builtin_neon_vst1_lane_v:
1790   case ARM::BI__builtin_neon_vst1q_lane_v:
1791     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1792     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
1793     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1794     return Builder.CreateStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty));
1795   case ARM::BI__builtin_neon_vst2_v:
1796   case ARM::BI__builtin_neon_vst2q_v:
1797     Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1798     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2, Ty),
1799                         Ops, "");
1800   case ARM::BI__builtin_neon_vst2_lane_v:
1801   case ARM::BI__builtin_neon_vst2q_lane_v:
1802     Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1803     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2lane, Ty),
1804                         Ops, "");
1805   case ARM::BI__builtin_neon_vst3_v:
1806   case ARM::BI__builtin_neon_vst3q_v:
1807     Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1808     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3, Ty),
1809                         Ops, "");
1810   case ARM::BI__builtin_neon_vst3_lane_v:
1811   case ARM::BI__builtin_neon_vst3q_lane_v:
1812     Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1813     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3lane, Ty),
1814                         Ops, "");
1815   case ARM::BI__builtin_neon_vst4_v:
1816   case ARM::BI__builtin_neon_vst4q_v:
1817     Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1818     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4, Ty),
1819                         Ops, "");
1820   case ARM::BI__builtin_neon_vst4_lane_v:
1821   case ARM::BI__builtin_neon_vst4q_lane_v:
1822     Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1823     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4lane, Ty),
1824                         Ops, "");
1825   case ARM::BI__builtin_neon_vsubhn_v:
1826     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vsubhn, Ty),
1827                         Ops, "vsubhn");
1828   case ARM::BI__builtin_neon_vtbl1_v:
1829     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
1830                         Ops, "vtbl1");
1831   case ARM::BI__builtin_neon_vtbl2_v:
1832     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
1833                         Ops, "vtbl2");
1834   case ARM::BI__builtin_neon_vtbl3_v:
1835     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
1836                         Ops, "vtbl3");
1837   case ARM::BI__builtin_neon_vtbl4_v:
1838     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
1839                         Ops, "vtbl4");
1840   case ARM::BI__builtin_neon_vtbx1_v:
1841     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
1842                         Ops, "vtbx1");
1843   case ARM::BI__builtin_neon_vtbx2_v:
1844     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
1845                         Ops, "vtbx2");
1846   case ARM::BI__builtin_neon_vtbx3_v:
1847     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
1848                         Ops, "vtbx3");
1849   case ARM::BI__builtin_neon_vtbx4_v:
1850     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
1851                         Ops, "vtbx4");
1852   case ARM::BI__builtin_neon_vtst_v:
1853   case ARM::BI__builtin_neon_vtstq_v: {
1854     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1855     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1856     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
1857     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 
1858                                 ConstantAggregateZero::get(Ty));
1859     return Builder.CreateSExt(Ops[0], Ty, "vtst");
1860   }
1861   case ARM::BI__builtin_neon_vtrn_v:
1862   case ARM::BI__builtin_neon_vtrnq_v: {
1863     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
1864     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1865     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1866     Value *SV = 0;
1867
1868     for (unsigned vi = 0; vi != 2; ++vi) {
1869       SmallVector<Constant*, 16> Indices;
1870       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
1871         Indices.push_back(ConstantInt::get(Int32Ty, i+vi));
1872         Indices.push_back(ConstantInt::get(Int32Ty, i+e+vi));
1873       }
1874       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
1875       SV = llvm::ConstantVector::get(Indices);
1876       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
1877       SV = Builder.CreateStore(SV, Addr);
1878     }
1879     return SV;
1880   }
1881   case ARM::BI__builtin_neon_vuzp_v:
1882   case ARM::BI__builtin_neon_vuzpq_v: {
1883     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
1884     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1885     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1886     Value *SV = 0;
1887     
1888     for (unsigned vi = 0; vi != 2; ++vi) {
1889       SmallVector<Constant*, 16> Indices;
1890       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
1891         Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi));
1892
1893       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
1894       SV = llvm::ConstantVector::get(Indices);
1895       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
1896       SV = Builder.CreateStore(SV, Addr);
1897     }
1898     return SV;
1899   }
1900   case ARM::BI__builtin_neon_vzip_v: 
1901   case ARM::BI__builtin_neon_vzipq_v: {
1902     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
1903     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1904     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1905     Value *SV = 0;
1906     
1907     for (unsigned vi = 0; vi != 2; ++vi) {
1908       SmallVector<Constant*, 16> Indices;
1909       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
1910         Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1));
1911         Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e));
1912       }
1913       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
1914       SV = llvm::ConstantVector::get(Indices);
1915       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
1916       SV = Builder.CreateStore(SV, Addr);
1917     }
1918     return SV;
1919   }
1920   }
1921 }
1922
1923 llvm::Value *CodeGenFunction::
1924 BuildVector(const llvm::SmallVectorImpl<llvm::Value*> &Ops) {
1925   assert((Ops.size() & (Ops.size() - 1)) == 0 &&
1926          "Not a power-of-two sized vector!");
1927   bool AllConstants = true;
1928   for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
1929     AllConstants &= isa<Constant>(Ops[i]);
1930
1931   // If this is a constant vector, create a ConstantVector.
1932   if (AllConstants) {
1933     std::vector<llvm::Constant*> CstOps;
1934     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
1935       CstOps.push_back(cast<Constant>(Ops[i]));
1936     return llvm::ConstantVector::get(CstOps);
1937   }
1938
1939   // Otherwise, insertelement the values to build the vector.
1940   Value *Result =
1941     llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
1942
1943   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
1944     Result = Builder.CreateInsertElement(Result, Ops[i],
1945                llvm::ConstantInt::get(llvm::Type::getInt32Ty(getLLVMContext()), i));
1946
1947   return Result;
1948 }
1949
1950 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1951                                            const CallExpr *E) {
1952   llvm::SmallVector<Value*, 4> Ops;
1953
1954   // Find out if any arguments are required to be integer constant expressions.
1955   unsigned ICEArguments = 0;
1956   ASTContext::GetBuiltinTypeError Error;
1957   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
1958   assert(Error == ASTContext::GE_None && "Should not codegen an error");
1959
1960   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
1961     // If this is a normal argument, just emit it as a scalar.
1962     if ((ICEArguments & (1 << i)) == 0) {
1963       Ops.push_back(EmitScalarExpr(E->getArg(i)));
1964       continue;
1965     }
1966
1967     // If this is required to be a constant, constant fold it so that we know
1968     // that the generated intrinsic gets a ConstantInt.
1969     llvm::APSInt Result;
1970     bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
1971     assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
1972     Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
1973   }
1974
1975   switch (BuiltinID) {
1976   default: return 0;
1977   case X86::BI__builtin_ia32_pslldi128:
1978   case X86::BI__builtin_ia32_psllqi128:
1979   case X86::BI__builtin_ia32_psllwi128:
1980   case X86::BI__builtin_ia32_psradi128:
1981   case X86::BI__builtin_ia32_psrawi128:
1982   case X86::BI__builtin_ia32_psrldi128:
1983   case X86::BI__builtin_ia32_psrlqi128:
1984   case X86::BI__builtin_ia32_psrlwi128: {
1985     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty, "zext");
1986     const llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
1987     llvm::Value *Zero = llvm::ConstantInt::get(Int32Ty, 0);
1988     Ops[1] = Builder.CreateInsertElement(llvm::UndefValue::get(Ty),
1989                                          Ops[1], Zero, "insert");
1990     Ops[1] = Builder.CreateBitCast(Ops[1], Ops[0]->getType(), "bitcast");
1991     const char *name = 0;
1992     Intrinsic::ID ID = Intrinsic::not_intrinsic;
1993
1994     switch (BuiltinID) {
1995     default: assert(0 && "Unsupported shift intrinsic!");
1996     case X86::BI__builtin_ia32_pslldi128:
1997       name = "pslldi";
1998       ID = Intrinsic::x86_sse2_psll_d;
1999       break;
2000     case X86::BI__builtin_ia32_psllqi128:
2001       name = "psllqi";
2002       ID = Intrinsic::x86_sse2_psll_q;
2003       break;
2004     case X86::BI__builtin_ia32_psllwi128:
2005       name = "psllwi";
2006       ID = Intrinsic::x86_sse2_psll_w;
2007       break;
2008     case X86::BI__builtin_ia32_psradi128:
2009       name = "psradi";
2010       ID = Intrinsic::x86_sse2_psra_d;
2011       break;
2012     case X86::BI__builtin_ia32_psrawi128:
2013       name = "psrawi";
2014       ID = Intrinsic::x86_sse2_psra_w;
2015       break;
2016     case X86::BI__builtin_ia32_psrldi128:
2017       name = "psrldi";
2018       ID = Intrinsic::x86_sse2_psrl_d;
2019       break;
2020     case X86::BI__builtin_ia32_psrlqi128:
2021       name = "psrlqi";
2022       ID = Intrinsic::x86_sse2_psrl_q;
2023       break;
2024     case X86::BI__builtin_ia32_psrlwi128:
2025       name = "psrlwi";
2026       ID = Intrinsic::x86_sse2_psrl_w;
2027       break;
2028     }
2029     llvm::Function *F = CGM.getIntrinsic(ID);
2030     return Builder.CreateCall(F, Ops, name);
2031   }
2032   case X86::BI__builtin_ia32_vec_init_v8qi:
2033   case X86::BI__builtin_ia32_vec_init_v4hi:
2034   case X86::BI__builtin_ia32_vec_init_v2si:
2035     return Builder.CreateBitCast(BuildVector(Ops),
2036                                  llvm::Type::getX86_MMXTy(getLLVMContext()));
2037   case X86::BI__builtin_ia32_vec_ext_v2si:
2038     return Builder.CreateExtractElement(Ops[0],
2039                                   llvm::ConstantInt::get(Ops[1]->getType(), 0));
2040   case X86::BI__builtin_ia32_pslldi:
2041   case X86::BI__builtin_ia32_psllqi:
2042   case X86::BI__builtin_ia32_psllwi:
2043   case X86::BI__builtin_ia32_psradi:
2044   case X86::BI__builtin_ia32_psrawi:
2045   case X86::BI__builtin_ia32_psrldi:
2046   case X86::BI__builtin_ia32_psrlqi:
2047   case X86::BI__builtin_ia32_psrlwi: {
2048     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty, "zext");
2049     const llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 1);
2050     Ops[1] = Builder.CreateBitCast(Ops[1], Ty, "bitcast");
2051     const char *name = 0;
2052     Intrinsic::ID ID = Intrinsic::not_intrinsic;
2053
2054     switch (BuiltinID) {
2055     default: assert(0 && "Unsupported shift intrinsic!");
2056     case X86::BI__builtin_ia32_pslldi:
2057       name = "pslldi";
2058       ID = Intrinsic::x86_mmx_psll_d;
2059       break;
2060     case X86::BI__builtin_ia32_psllqi:
2061       name = "psllqi";
2062       ID = Intrinsic::x86_mmx_psll_q;
2063       break;
2064     case X86::BI__builtin_ia32_psllwi:
2065       name = "psllwi";
2066       ID = Intrinsic::x86_mmx_psll_w;
2067       break;
2068     case X86::BI__builtin_ia32_psradi:
2069       name = "psradi";
2070       ID = Intrinsic::x86_mmx_psra_d;
2071       break;
2072     case X86::BI__builtin_ia32_psrawi:
2073       name = "psrawi";
2074       ID = Intrinsic::x86_mmx_psra_w;
2075       break;
2076     case X86::BI__builtin_ia32_psrldi:
2077       name = "psrldi";
2078       ID = Intrinsic::x86_mmx_psrl_d;
2079       break;
2080     case X86::BI__builtin_ia32_psrlqi:
2081       name = "psrlqi";
2082       ID = Intrinsic::x86_mmx_psrl_q;
2083       break;
2084     case X86::BI__builtin_ia32_psrlwi:
2085       name = "psrlwi";
2086       ID = Intrinsic::x86_mmx_psrl_w;
2087       break;
2088     }
2089     llvm::Function *F = CGM.getIntrinsic(ID);
2090     return Builder.CreateCall(F, Ops, name);
2091   }
2092   case X86::BI__builtin_ia32_cmpps: {
2093     llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse_cmp_ps);
2094     return Builder.CreateCall(F, Ops, "cmpps");
2095   }
2096   case X86::BI__builtin_ia32_cmpss: {
2097     llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse_cmp_ss);
2098     return Builder.CreateCall(F, Ops, "cmpss");
2099   }
2100   case X86::BI__builtin_ia32_ldmxcsr: {
2101     const llvm::Type *PtrTy = Int8PtrTy;
2102     Value *One = llvm::ConstantInt::get(Int32Ty, 1);
2103     Value *Tmp = Builder.CreateAlloca(Int32Ty, One, "tmp");
2104     Builder.CreateStore(Ops[0], Tmp);
2105     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
2106                               Builder.CreateBitCast(Tmp, PtrTy));
2107   }
2108   case X86::BI__builtin_ia32_stmxcsr: {
2109     const llvm::Type *PtrTy = Int8PtrTy;
2110     Value *One = llvm::ConstantInt::get(Int32Ty, 1);
2111     Value *Tmp = Builder.CreateAlloca(Int32Ty, One, "tmp");
2112     One = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
2113                              Builder.CreateBitCast(Tmp, PtrTy));
2114     return Builder.CreateLoad(Tmp, "stmxcsr");
2115   }
2116   case X86::BI__builtin_ia32_cmppd: {
2117     llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_cmp_pd);
2118     return Builder.CreateCall(F, Ops, "cmppd");
2119   }
2120   case X86::BI__builtin_ia32_cmpsd: {
2121     llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_cmp_sd);
2122     return Builder.CreateCall(F, Ops, "cmpsd");
2123   }
2124   case X86::BI__builtin_ia32_storehps:
2125   case X86::BI__builtin_ia32_storelps: {
2126     llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
2127     llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
2128
2129     // cast val v2i64
2130     Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
2131
2132     // extract (0, 1)
2133     unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
2134     llvm::Value *Idx = llvm::ConstantInt::get(Int32Ty, Index);
2135     Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
2136
2137     // cast pointer to i64 & store
2138     Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
2139     return Builder.CreateStore(Ops[1], Ops[0]);
2140   }
2141   case X86::BI__builtin_ia32_palignr: {
2142     unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
2143     
2144     // If palignr is shifting the pair of input vectors less than 9 bytes,
2145     // emit a shuffle instruction.
2146     if (shiftVal <= 8) {
2147       llvm::SmallVector<llvm::Constant*, 8> Indices;
2148       for (unsigned i = 0; i != 8; ++i)
2149         Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i));
2150       
2151       Value* SV = llvm::ConstantVector::get(Indices);
2152       return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
2153     }
2154     
2155     // If palignr is shifting the pair of input vectors more than 8 but less
2156     // than 16 bytes, emit a logical right shift of the destination.
2157     if (shiftVal < 16) {
2158       // MMX has these as 1 x i64 vectors for some odd optimization reasons.
2159       const llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 1);
2160       
2161       Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
2162       Ops[1] = llvm::ConstantInt::get(VecTy, (shiftVal-8) * 8);
2163       
2164       // create i32 constant
2165       llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_mmx_psrl_q);
2166       return Builder.CreateCall(F, ArrayRef<Value *>(&Ops[0], 2), "palignr");
2167     }
2168     
2169     // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
2170     return llvm::Constant::getNullValue(ConvertType(E->getType()));
2171   }
2172   case X86::BI__builtin_ia32_palignr128: {
2173     unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
2174     
2175     // If palignr is shifting the pair of input vectors less than 17 bytes,
2176     // emit a shuffle instruction.
2177     if (shiftVal <= 16) {
2178       llvm::SmallVector<llvm::Constant*, 16> Indices;
2179       for (unsigned i = 0; i != 16; ++i)
2180         Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i));
2181       
2182       Value* SV = llvm::ConstantVector::get(Indices);
2183       return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
2184     }
2185     
2186     // If palignr is shifting the pair of input vectors more than 16 but less
2187     // than 32 bytes, emit a logical right shift of the destination.
2188     if (shiftVal < 32) {
2189       const llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
2190       
2191       Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
2192       Ops[1] = llvm::ConstantInt::get(Int32Ty, (shiftVal-16) * 8);
2193       
2194       // create i32 constant
2195       llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_psrl_dq);
2196       return Builder.CreateCall(F, ArrayRef<Value *>(&Ops[0], 2), "palignr");
2197     }
2198     
2199     // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
2200     return llvm::Constant::getNullValue(ConvertType(E->getType()));
2201   }
2202   case X86::BI__builtin_ia32_movntps:
2203   case X86::BI__builtin_ia32_movntpd:
2204   case X86::BI__builtin_ia32_movntdq:
2205   case X86::BI__builtin_ia32_movnti: {
2206     llvm::MDNode *Node = llvm::MDNode::get(getLLVMContext(),
2207                                            Builder.getInt32(1));
2208
2209     // Convert the type of the pointer to a pointer to the stored type.
2210     Value *BC = Builder.CreateBitCast(Ops[0],
2211                                 llvm::PointerType::getUnqual(Ops[1]->getType()),
2212                                       "cast");
2213     StoreInst *SI = Builder.CreateStore(Ops[1], BC);
2214     SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
2215     SI->setAlignment(16);
2216     return SI;
2217   }
2218   // 3DNow!
2219   case X86::BI__builtin_ia32_pavgusb:
2220   case X86::BI__builtin_ia32_pf2id:
2221   case X86::BI__builtin_ia32_pfacc:
2222   case X86::BI__builtin_ia32_pfadd:
2223   case X86::BI__builtin_ia32_pfcmpeq:
2224   case X86::BI__builtin_ia32_pfcmpge:
2225   case X86::BI__builtin_ia32_pfcmpgt:
2226   case X86::BI__builtin_ia32_pfmax:
2227   case X86::BI__builtin_ia32_pfmin:
2228   case X86::BI__builtin_ia32_pfmul:
2229   case X86::BI__builtin_ia32_pfrcp:
2230   case X86::BI__builtin_ia32_pfrcpit1:
2231   case X86::BI__builtin_ia32_pfrcpit2:
2232   case X86::BI__builtin_ia32_pfrsqrt:
2233   case X86::BI__builtin_ia32_pfrsqit1:
2234   case X86::BI__builtin_ia32_pfrsqrtit1:
2235   case X86::BI__builtin_ia32_pfsub:
2236   case X86::BI__builtin_ia32_pfsubr:
2237   case X86::BI__builtin_ia32_pi2fd:
2238   case X86::BI__builtin_ia32_pmulhrw:
2239   case X86::BI__builtin_ia32_pf2iw:
2240   case X86::BI__builtin_ia32_pfnacc:
2241   case X86::BI__builtin_ia32_pfpnacc:
2242   case X86::BI__builtin_ia32_pi2fw:
2243   case X86::BI__builtin_ia32_pswapdsf:
2244   case X86::BI__builtin_ia32_pswapdsi: {
2245     const char *name = 0;
2246     Intrinsic::ID ID = Intrinsic::not_intrinsic;
2247     switch(BuiltinID) {
2248     case X86::BI__builtin_ia32_pavgusb:
2249       name = "pavgusb";
2250       ID = Intrinsic::x86_3dnow_pavgusb;
2251       break;
2252     case X86::BI__builtin_ia32_pf2id:
2253       name = "pf2id";
2254       ID = Intrinsic::x86_3dnow_pf2id;
2255       break;
2256     case X86::BI__builtin_ia32_pfacc:
2257       name = "pfacc";
2258       ID = Intrinsic::x86_3dnow_pfacc;
2259       break;
2260     case X86::BI__builtin_ia32_pfadd:
2261       name = "pfadd";
2262       ID = Intrinsic::x86_3dnow_pfadd;
2263       break;
2264     case X86::BI__builtin_ia32_pfcmpeq:
2265       name = "pfcmpeq";
2266       ID = Intrinsic::x86_3dnow_pfcmpeq;
2267       break;
2268     case X86::BI__builtin_ia32_pfcmpge:
2269       name = "pfcmpge";
2270       ID = Intrinsic::x86_3dnow_pfcmpge;
2271       break;
2272     case X86::BI__builtin_ia32_pfcmpgt:
2273       name = "pfcmpgt";
2274       ID = Intrinsic::x86_3dnow_pfcmpgt;
2275       break;
2276     case X86::BI__builtin_ia32_pfmax:
2277       name = "pfmax";
2278       ID = Intrinsic::x86_3dnow_pfmax;
2279       break;
2280     case X86::BI__builtin_ia32_pfmin:
2281       name = "pfmin";
2282       ID = Intrinsic::x86_3dnow_pfmin;
2283       break;
2284     case X86::BI__builtin_ia32_pfmul:
2285       name = "pfmul";
2286       ID = Intrinsic::x86_3dnow_pfmul;
2287       break;
2288     case X86::BI__builtin_ia32_pfrcp:
2289       name = "pfrcp";
2290       ID = Intrinsic::x86_3dnow_pfrcp;
2291       break;
2292     case X86::BI__builtin_ia32_pfrcpit1:
2293       name = "pfrcpit1";
2294       ID = Intrinsic::x86_3dnow_pfrcpit1;
2295       break;
2296     case X86::BI__builtin_ia32_pfrcpit2:
2297       name = "pfrcpit2";
2298       ID = Intrinsic::x86_3dnow_pfrcpit2;
2299       break;
2300     case X86::BI__builtin_ia32_pfrsqrt:
2301       name = "pfrsqrt";
2302       ID = Intrinsic::x86_3dnow_pfrsqrt;
2303       break;
2304     case X86::BI__builtin_ia32_pfrsqit1:
2305     case X86::BI__builtin_ia32_pfrsqrtit1:
2306       name = "pfrsqit1";
2307       ID = Intrinsic::x86_3dnow_pfrsqit1;
2308       break;
2309     case X86::BI__builtin_ia32_pfsub:
2310       name = "pfsub";
2311       ID = Intrinsic::x86_3dnow_pfsub;
2312       break;
2313     case X86::BI__builtin_ia32_pfsubr:
2314       name = "pfsubr";
2315       ID = Intrinsic::x86_3dnow_pfsubr;
2316       break;
2317     case X86::BI__builtin_ia32_pi2fd:
2318       name = "pi2fd";
2319       ID = Intrinsic::x86_3dnow_pi2fd;
2320       break;
2321     case X86::BI__builtin_ia32_pmulhrw:
2322       name = "pmulhrw";
2323       ID = Intrinsic::x86_3dnow_pmulhrw;
2324       break;
2325     case X86::BI__builtin_ia32_pf2iw:
2326       name = "pf2iw";
2327       ID = Intrinsic::x86_3dnowa_pf2iw;
2328       break;
2329     case X86::BI__builtin_ia32_pfnacc:
2330       name = "pfnacc";
2331       ID = Intrinsic::x86_3dnowa_pfnacc;
2332       break;
2333     case X86::BI__builtin_ia32_pfpnacc:
2334       name = "pfpnacc";
2335       ID = Intrinsic::x86_3dnowa_pfpnacc;
2336       break;
2337     case X86::BI__builtin_ia32_pi2fw:
2338       name = "pi2fw";
2339       ID = Intrinsic::x86_3dnowa_pi2fw;
2340       break;
2341     case X86::BI__builtin_ia32_pswapdsf:
2342     case X86::BI__builtin_ia32_pswapdsi:
2343       name = "pswapd";
2344       ID = Intrinsic::x86_3dnowa_pswapd;
2345       break;
2346     }
2347     llvm::Function *F = CGM.getIntrinsic(ID);
2348     return Builder.CreateCall(F, Ops, name);
2349   }
2350   }
2351 }
2352
2353 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
2354                                            const CallExpr *E) {
2355   llvm::SmallVector<Value*, 4> Ops;
2356
2357   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
2358     Ops.push_back(EmitScalarExpr(E->getArg(i)));
2359
2360   Intrinsic::ID ID = Intrinsic::not_intrinsic;
2361
2362   switch (BuiltinID) {
2363   default: return 0;
2364
2365   // vec_ld, vec_lvsl, vec_lvsr
2366   case PPC::BI__builtin_altivec_lvx:
2367   case PPC::BI__builtin_altivec_lvxl:
2368   case PPC::BI__builtin_altivec_lvebx:
2369   case PPC::BI__builtin_altivec_lvehx:
2370   case PPC::BI__builtin_altivec_lvewx:
2371   case PPC::BI__builtin_altivec_lvsl:
2372   case PPC::BI__builtin_altivec_lvsr:
2373   {
2374     Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
2375
2376     Ops[0] = Builder.CreateGEP(Ops[1], Ops[0], "tmp");
2377     Ops.pop_back();
2378
2379     switch (BuiltinID) {
2380     default: assert(0 && "Unsupported ld/lvsl/lvsr intrinsic!");
2381     case PPC::BI__builtin_altivec_lvx:
2382       ID = Intrinsic::ppc_altivec_lvx;
2383       break;
2384     case PPC::BI__builtin_altivec_lvxl:
2385       ID = Intrinsic::ppc_altivec_lvxl;
2386       break;
2387     case PPC::BI__builtin_altivec_lvebx:
2388       ID = Intrinsic::ppc_altivec_lvebx;
2389       break;
2390     case PPC::BI__builtin_altivec_lvehx:
2391       ID = Intrinsic::ppc_altivec_lvehx;
2392       break;
2393     case PPC::BI__builtin_altivec_lvewx:
2394       ID = Intrinsic::ppc_altivec_lvewx;
2395       break;
2396     case PPC::BI__builtin_altivec_lvsl:
2397       ID = Intrinsic::ppc_altivec_lvsl;
2398       break;
2399     case PPC::BI__builtin_altivec_lvsr:
2400       ID = Intrinsic::ppc_altivec_lvsr;
2401       break;
2402     }
2403     llvm::Function *F = CGM.getIntrinsic(ID);
2404     return Builder.CreateCall(F, Ops, "");
2405   }
2406
2407   // vec_st
2408   case PPC::BI__builtin_altivec_stvx:
2409   case PPC::BI__builtin_altivec_stvxl:
2410   case PPC::BI__builtin_altivec_stvebx:
2411   case PPC::BI__builtin_altivec_stvehx:
2412   case PPC::BI__builtin_altivec_stvewx:
2413   {
2414     Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
2415     Ops[1] = Builder.CreateGEP(Ops[2], Ops[1], "tmp");
2416     Ops.pop_back();
2417
2418     switch (BuiltinID) {
2419     default: assert(0 && "Unsupported st intrinsic!");
2420     case PPC::BI__builtin_altivec_stvx:
2421       ID = Intrinsic::ppc_altivec_stvx;
2422       break;
2423     case PPC::BI__builtin_altivec_stvxl:
2424       ID = Intrinsic::ppc_altivec_stvxl;
2425       break;
2426     case PPC::BI__builtin_altivec_stvebx:
2427       ID = Intrinsic::ppc_altivec_stvebx;
2428       break;
2429     case PPC::BI__builtin_altivec_stvehx:
2430       ID = Intrinsic::ppc_altivec_stvehx;
2431       break;
2432     case PPC::BI__builtin_altivec_stvewx:
2433       ID = Intrinsic::ppc_altivec_stvewx;
2434       break;
2435     }
2436     llvm::Function *F = CGM.getIntrinsic(ID);
2437     return Builder.CreateCall(F, Ops, "");
2438   }
2439   }
2440   return 0;
2441 }