1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This contains code to emit Builtin calls as LLVM code.
12 //===----------------------------------------------------------------------===//
14 #include "TargetInfo.h"
15 #include "CodeGenFunction.h"
16 #include "CodeGenModule.h"
17 #include "CGObjCRuntime.h"
18 #include "clang/Basic/TargetInfo.h"
19 #include "clang/AST/ASTContext.h"
20 #include "clang/AST/Decl.h"
21 #include "clang/Basic/TargetBuiltins.h"
22 #include "llvm/Intrinsics.h"
23 #include "llvm/DataLayout.h"
25 using namespace clang;
26 using namespace CodeGen;
29 /// getBuiltinLibFunction - Given a builtin id for a function like
30 /// "__builtin_fabsf", return a Function* for "fabsf".
31 llvm::Value *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
33 assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
35 // Get the name, skip over the __builtin_ prefix (if necessary).
39 // If the builtin has been declared explicitly with an assembler label,
40 // use the mangled name. This differs from the plain label on platforms
41 // that prefix labels.
42 if (FD->hasAttr<AsmLabelAttr>())
43 Name = getMangledName(D);
45 Name = Context.BuiltinInfo.GetName(BuiltinID) + 10;
47 llvm::FunctionType *Ty =
48 cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
50 return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
53 /// Emit the conversions required to turn the given value into an
54 /// integer of the given size.
55 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
56 QualType T, llvm::IntegerType *IntType) {
57 V = CGF.EmitToMemory(V, T);
59 if (V->getType()->isPointerTy())
60 return CGF.Builder.CreatePtrToInt(V, IntType);
62 assert(V->getType() == IntType);
66 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
67 QualType T, llvm::Type *ResultType) {
68 V = CGF.EmitFromMemory(V, T);
70 if (ResultType->isPointerTy())
71 return CGF.Builder.CreateIntToPtr(V, ResultType);
73 assert(V->getType() == ResultType);
77 /// Utility to insert an atomic instruction based on Instrinsic::ID
78 /// and the expression node.
79 static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
80 llvm::AtomicRMWInst::BinOp Kind,
82 QualType T = E->getType();
83 assert(E->getArg(0)->getType()->isPointerType());
84 assert(CGF.getContext().hasSameUnqualifiedType(T,
85 E->getArg(0)->getType()->getPointeeType()));
86 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
88 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
89 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
91 llvm::IntegerType *IntType =
92 llvm::IntegerType::get(CGF.getLLVMContext(),
93 CGF.getContext().getTypeSize(T));
94 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
97 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
98 Args[1] = CGF.EmitScalarExpr(E->getArg(1));
99 llvm::Type *ValueType = Args[1]->getType();
100 Args[1] = EmitToInt(CGF, Args[1], T, IntType);
102 llvm::Value *Result =
103 CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1],
104 llvm::SequentiallyConsistent);
105 Result = EmitFromInt(CGF, Result, T, ValueType);
106 return RValue::get(Result);
109 /// Utility to insert an atomic instruction based Instrinsic::ID and
110 /// the expression node, where the return value is the result of the
112 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
113 llvm::AtomicRMWInst::BinOp Kind,
115 Instruction::BinaryOps Op) {
116 QualType T = E->getType();
117 assert(E->getArg(0)->getType()->isPointerType());
118 assert(CGF.getContext().hasSameUnqualifiedType(T,
119 E->getArg(0)->getType()->getPointeeType()));
120 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
122 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
123 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
125 llvm::IntegerType *IntType =
126 llvm::IntegerType::get(CGF.getLLVMContext(),
127 CGF.getContext().getTypeSize(T));
128 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
130 llvm::Value *Args[2];
131 Args[1] = CGF.EmitScalarExpr(E->getArg(1));
132 llvm::Type *ValueType = Args[1]->getType();
133 Args[1] = EmitToInt(CGF, Args[1], T, IntType);
134 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
136 llvm::Value *Result =
137 CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1],
138 llvm::SequentiallyConsistent);
139 Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
140 Result = EmitFromInt(CGF, Result, T, ValueType);
141 return RValue::get(Result);
144 /// EmitFAbs - Emit a call to fabs/fabsf/fabsl, depending on the type of ValTy,
145 /// which must be a scalar floating point type.
146 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V, QualType ValTy) {
147 const BuiltinType *ValTyP = ValTy->getAs<BuiltinType>();
148 assert(ValTyP && "isn't scalar fp type!");
151 switch (ValTyP->getKind()) {
152 default: llvm_unreachable("Isn't a scalar fp type!");
153 case BuiltinType::Float: FnName = "fabsf"; break;
154 case BuiltinType::Double: FnName = "fabs"; break;
155 case BuiltinType::LongDouble: FnName = "fabsl"; break;
158 // The prototype is something that takes and returns whatever V's type is.
159 llvm::FunctionType *FT = llvm::FunctionType::get(V->getType(), V->getType(),
161 llvm::Value *Fn = CGF.CGM.CreateRuntimeFunction(FT, FnName);
163 return CGF.Builder.CreateCall(Fn, V, "abs");
166 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *Fn,
167 const CallExpr *E, llvm::Value *calleeValue) {
168 return CGF.EmitCall(E->getCallee()->getType(), calleeValue,
169 ReturnValueSlot(), E->arg_begin(), E->arg_end(), Fn);
172 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
173 unsigned BuiltinID, const CallExpr *E) {
174 // See if we can constant fold this builtin. If so, don't emit it at all.
175 Expr::EvalResult Result;
176 if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
177 !Result.hasSideEffects()) {
178 if (Result.Val.isInt())
179 return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
180 Result.Val.getInt()));
181 if (Result.Val.isFloat())
182 return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
183 Result.Val.getFloat()));
187 default: break; // Handle intrinsics and libm functions below.
188 case Builtin::BI__builtin___CFStringMakeConstantString:
189 case Builtin::BI__builtin___NSStringMakeConstantString:
190 return RValue::get(CGM.EmitConstantExpr(E, E->getType(), 0));
191 case Builtin::BI__builtin_stdarg_start:
192 case Builtin::BI__builtin_va_start:
193 case Builtin::BI__builtin_va_end: {
194 Value *ArgValue = EmitVAListRef(E->getArg(0));
195 llvm::Type *DestType = Int8PtrTy;
196 if (ArgValue->getType() != DestType)
197 ArgValue = Builder.CreateBitCast(ArgValue, DestType,
198 ArgValue->getName().data());
200 Intrinsic::ID inst = (BuiltinID == Builtin::BI__builtin_va_end) ?
201 Intrinsic::vaend : Intrinsic::vastart;
202 return RValue::get(Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue));
204 case Builtin::BI__builtin_va_copy: {
205 Value *DstPtr = EmitVAListRef(E->getArg(0));
206 Value *SrcPtr = EmitVAListRef(E->getArg(1));
208 llvm::Type *Type = Int8PtrTy;
210 DstPtr = Builder.CreateBitCast(DstPtr, Type);
211 SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
212 return RValue::get(Builder.CreateCall2(CGM.getIntrinsic(Intrinsic::vacopy),
215 case Builtin::BI__builtin_abs:
216 case Builtin::BI__builtin_labs:
217 case Builtin::BI__builtin_llabs: {
218 Value *ArgValue = EmitScalarExpr(E->getArg(0));
220 Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
222 Builder.CreateICmpSGE(ArgValue,
223 llvm::Constant::getNullValue(ArgValue->getType()),
226 Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
228 return RValue::get(Result);
231 case Builtin::BI__builtin_conj:
232 case Builtin::BI__builtin_conjf:
233 case Builtin::BI__builtin_conjl: {
234 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
235 Value *Real = ComplexVal.first;
236 Value *Imag = ComplexVal.second;
238 Imag->getType()->isFPOrFPVectorTy()
239 ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
240 : llvm::Constant::getNullValue(Imag->getType());
242 Imag = Builder.CreateFSub(Zero, Imag, "sub");
243 return RValue::getComplex(std::make_pair(Real, Imag));
245 case Builtin::BI__builtin_creal:
246 case Builtin::BI__builtin_crealf:
247 case Builtin::BI__builtin_creall: {
248 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
249 return RValue::get(ComplexVal.first);
252 case Builtin::BI__builtin_cimag:
253 case Builtin::BI__builtin_cimagf:
254 case Builtin::BI__builtin_cimagl: {
255 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
256 return RValue::get(ComplexVal.second);
259 case Builtin::BI__builtin_ctzs:
260 case Builtin::BI__builtin_ctz:
261 case Builtin::BI__builtin_ctzl:
262 case Builtin::BI__builtin_ctzll: {
263 Value *ArgValue = EmitScalarExpr(E->getArg(0));
265 llvm::Type *ArgType = ArgValue->getType();
266 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
268 llvm::Type *ResultType = ConvertType(E->getType());
269 Value *ZeroUndef = Builder.getInt1(Target.isCLZForZeroUndef());
270 Value *Result = Builder.CreateCall2(F, ArgValue, ZeroUndef);
271 if (Result->getType() != ResultType)
272 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
274 return RValue::get(Result);
276 case Builtin::BI__builtin_clzs:
277 case Builtin::BI__builtin_clz:
278 case Builtin::BI__builtin_clzl:
279 case Builtin::BI__builtin_clzll: {
280 Value *ArgValue = EmitScalarExpr(E->getArg(0));
282 llvm::Type *ArgType = ArgValue->getType();
283 Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
285 llvm::Type *ResultType = ConvertType(E->getType());
286 Value *ZeroUndef = Builder.getInt1(Target.isCLZForZeroUndef());
287 Value *Result = Builder.CreateCall2(F, ArgValue, ZeroUndef);
288 if (Result->getType() != ResultType)
289 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
291 return RValue::get(Result);
293 case Builtin::BI__builtin_ffs:
294 case Builtin::BI__builtin_ffsl:
295 case Builtin::BI__builtin_ffsll: {
296 // ffs(x) -> x ? cttz(x) + 1 : 0
297 Value *ArgValue = EmitScalarExpr(E->getArg(0));
299 llvm::Type *ArgType = ArgValue->getType();
300 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
302 llvm::Type *ResultType = ConvertType(E->getType());
303 Value *Tmp = Builder.CreateAdd(Builder.CreateCall2(F, ArgValue,
305 llvm::ConstantInt::get(ArgType, 1));
306 Value *Zero = llvm::Constant::getNullValue(ArgType);
307 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
308 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
309 if (Result->getType() != ResultType)
310 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
312 return RValue::get(Result);
314 case Builtin::BI__builtin_parity:
315 case Builtin::BI__builtin_parityl:
316 case Builtin::BI__builtin_parityll: {
317 // parity(x) -> ctpop(x) & 1
318 Value *ArgValue = EmitScalarExpr(E->getArg(0));
320 llvm::Type *ArgType = ArgValue->getType();
321 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
323 llvm::Type *ResultType = ConvertType(E->getType());
324 Value *Tmp = Builder.CreateCall(F, ArgValue);
325 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
326 if (Result->getType() != ResultType)
327 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
329 return RValue::get(Result);
331 case Builtin::BI__builtin_popcount:
332 case Builtin::BI__builtin_popcountl:
333 case Builtin::BI__builtin_popcountll: {
334 Value *ArgValue = EmitScalarExpr(E->getArg(0));
336 llvm::Type *ArgType = ArgValue->getType();
337 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
339 llvm::Type *ResultType = ConvertType(E->getType());
340 Value *Result = Builder.CreateCall(F, ArgValue);
341 if (Result->getType() != ResultType)
342 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
344 return RValue::get(Result);
346 case Builtin::BI__builtin_expect: {
347 Value *ArgValue = EmitScalarExpr(E->getArg(0));
348 llvm::Type *ArgType = ArgValue->getType();
350 Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
351 Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
353 Value *Result = Builder.CreateCall2(FnExpect, ArgValue, ExpectedValue,
355 return RValue::get(Result);
357 case Builtin::BI__builtin_bswap16:
358 case Builtin::BI__builtin_bswap32:
359 case Builtin::BI__builtin_bswap64: {
360 Value *ArgValue = EmitScalarExpr(E->getArg(0));
361 llvm::Type *ArgType = ArgValue->getType();
362 Value *F = CGM.getIntrinsic(Intrinsic::bswap, ArgType);
363 return RValue::get(Builder.CreateCall(F, ArgValue));
365 case Builtin::BI__builtin_object_size: {
366 // We rely on constant folding to deal with expressions with side effects.
367 assert(!E->getArg(0)->HasSideEffects(getContext()) &&
368 "should have been constant folded");
370 // We pass this builtin onto the optimizer so that it can
371 // figure out the object size in more complex cases.
372 llvm::Type *ResType = ConvertType(E->getType());
374 // LLVM only supports 0 and 2, make sure that we pass along that
376 Value *Ty = EmitScalarExpr(E->getArg(1));
377 ConstantInt *CI = dyn_cast<ConstantInt>(Ty);
379 uint64_t val = CI->getZExtValue();
380 CI = ConstantInt::get(Builder.getInt1Ty(), (val & 0x2) >> 1);
382 Value *F = CGM.getIntrinsic(Intrinsic::objectsize, ResType);
383 return RValue::get(Builder.CreateCall2(F, EmitScalarExpr(E->getArg(0)),CI));
385 case Builtin::BI__builtin_prefetch: {
386 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
387 // FIXME: Technically these constants should of type 'int', yes?
388 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
389 llvm::ConstantInt::get(Int32Ty, 0);
390 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
391 llvm::ConstantInt::get(Int32Ty, 3);
392 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
393 Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
394 return RValue::get(Builder.CreateCall4(F, Address, RW, Locality, Data));
396 case Builtin::BI__builtin_readcyclecounter: {
397 Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
398 return RValue::get(Builder.CreateCall(F));
400 case Builtin::BI__builtin_trap: {
401 Value *F = CGM.getIntrinsic(Intrinsic::trap);
402 return RValue::get(Builder.CreateCall(F));
404 case Builtin::BI__debugbreak: {
405 Value *F = CGM.getIntrinsic(Intrinsic::debugtrap);
406 return RValue::get(Builder.CreateCall(F));
408 case Builtin::BI__builtin_unreachable: {
409 if (getLangOpts().SanitizeUnreachable)
410 EmitCheck(Builder.getFalse(), "builtin_unreachable",
411 EmitCheckSourceLocation(E->getExprLoc()),
412 llvm::ArrayRef<llvm::Value *>());
414 Builder.CreateUnreachable();
416 // We do need to preserve an insertion point.
417 EmitBlock(createBasicBlock("unreachable.cont"));
419 return RValue::get(0);
422 case Builtin::BI__builtin_powi:
423 case Builtin::BI__builtin_powif:
424 case Builtin::BI__builtin_powil: {
425 Value *Base = EmitScalarExpr(E->getArg(0));
426 Value *Exponent = EmitScalarExpr(E->getArg(1));
427 llvm::Type *ArgType = Base->getType();
428 Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
429 return RValue::get(Builder.CreateCall2(F, Base, Exponent));
432 case Builtin::BI__builtin_isgreater:
433 case Builtin::BI__builtin_isgreaterequal:
434 case Builtin::BI__builtin_isless:
435 case Builtin::BI__builtin_islessequal:
436 case Builtin::BI__builtin_islessgreater:
437 case Builtin::BI__builtin_isunordered: {
438 // Ordered comparisons: we know the arguments to these are matching scalar
439 // floating point values.
440 Value *LHS = EmitScalarExpr(E->getArg(0));
441 Value *RHS = EmitScalarExpr(E->getArg(1));
444 default: llvm_unreachable("Unknown ordered comparison");
445 case Builtin::BI__builtin_isgreater:
446 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
448 case Builtin::BI__builtin_isgreaterequal:
449 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
451 case Builtin::BI__builtin_isless:
452 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
454 case Builtin::BI__builtin_islessequal:
455 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
457 case Builtin::BI__builtin_islessgreater:
458 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
460 case Builtin::BI__builtin_isunordered:
461 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
464 // ZExt bool to int type.
465 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
467 case Builtin::BI__builtin_isnan: {
468 Value *V = EmitScalarExpr(E->getArg(0));
469 V = Builder.CreateFCmpUNO(V, V, "cmp");
470 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
473 case Builtin::BI__builtin_isinf: {
474 // isinf(x) --> fabs(x) == infinity
475 Value *V = EmitScalarExpr(E->getArg(0));
476 V = EmitFAbs(*this, V, E->getArg(0)->getType());
478 V = Builder.CreateFCmpOEQ(V, ConstantFP::getInfinity(V->getType()),"isinf");
479 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
482 // TODO: BI__builtin_isinf_sign
483 // isinf_sign(x) -> isinf(x) ? (signbit(x) ? -1 : 1) : 0
485 case Builtin::BI__builtin_isnormal: {
486 // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
487 Value *V = EmitScalarExpr(E->getArg(0));
488 Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
490 Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType());
491 Value *IsLessThanInf =
492 Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
493 APFloat Smallest = APFloat::getSmallestNormalized(
494 getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
496 Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
498 V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
499 V = Builder.CreateAnd(V, IsNormal, "and");
500 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
503 case Builtin::BI__builtin_isfinite: {
504 // isfinite(x) --> x == x && fabs(x) != infinity;
505 Value *V = EmitScalarExpr(E->getArg(0));
506 Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
508 Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType());
510 Builder.CreateFCmpUNE(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
512 V = Builder.CreateAnd(Eq, IsNotInf, "and");
513 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
516 case Builtin::BI__builtin_fpclassify: {
517 Value *V = EmitScalarExpr(E->getArg(5));
518 llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
521 BasicBlock *Begin = Builder.GetInsertBlock();
522 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
523 Builder.SetInsertPoint(End);
525 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
526 "fpclassify_result");
528 // if (V==0) return FP_ZERO
529 Builder.SetInsertPoint(Begin);
530 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
532 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
533 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
534 Builder.CreateCondBr(IsZero, End, NotZero);
535 Result->addIncoming(ZeroLiteral, Begin);
537 // if (V != V) return FP_NAN
538 Builder.SetInsertPoint(NotZero);
539 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
540 Value *NanLiteral = EmitScalarExpr(E->getArg(0));
541 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
542 Builder.CreateCondBr(IsNan, End, NotNan);
543 Result->addIncoming(NanLiteral, NotZero);
545 // if (fabs(V) == infinity) return FP_INFINITY
546 Builder.SetInsertPoint(NotNan);
547 Value *VAbs = EmitFAbs(*this, V, E->getArg(5)->getType());
549 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
551 Value *InfLiteral = EmitScalarExpr(E->getArg(1));
552 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
553 Builder.CreateCondBr(IsInf, End, NotInf);
554 Result->addIncoming(InfLiteral, NotNan);
556 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
557 Builder.SetInsertPoint(NotInf);
558 APFloat Smallest = APFloat::getSmallestNormalized(
559 getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
561 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
563 Value *NormalResult =
564 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
565 EmitScalarExpr(E->getArg(3)));
566 Builder.CreateBr(End);
567 Result->addIncoming(NormalResult, NotInf);
570 Builder.SetInsertPoint(End);
571 return RValue::get(Result);
574 case Builtin::BIalloca:
575 case Builtin::BI__builtin_alloca: {
576 Value *Size = EmitScalarExpr(E->getArg(0));
577 return RValue::get(Builder.CreateAlloca(Builder.getInt8Ty(), Size));
579 case Builtin::BIbzero:
580 case Builtin::BI__builtin_bzero: {
581 std::pair<llvm::Value*, unsigned> Dest =
582 EmitPointerWithAlignment(E->getArg(0));
583 Value *SizeVal = EmitScalarExpr(E->getArg(1));
584 Builder.CreateMemSet(Dest.first, Builder.getInt8(0), SizeVal,
586 return RValue::get(Dest.first);
588 case Builtin::BImemcpy:
589 case Builtin::BI__builtin_memcpy: {
590 std::pair<llvm::Value*, unsigned> Dest =
591 EmitPointerWithAlignment(E->getArg(0));
592 std::pair<llvm::Value*, unsigned> Src =
593 EmitPointerWithAlignment(E->getArg(1));
594 Value *SizeVal = EmitScalarExpr(E->getArg(2));
595 unsigned Align = std::min(Dest.second, Src.second);
596 Builder.CreateMemCpy(Dest.first, Src.first, SizeVal, Align, false);
597 return RValue::get(Dest.first);
600 case Builtin::BI__builtin___memcpy_chk: {
601 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
602 llvm::APSInt Size, DstSize;
603 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
604 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
606 if (Size.ugt(DstSize))
608 std::pair<llvm::Value*, unsigned> Dest =
609 EmitPointerWithAlignment(E->getArg(0));
610 std::pair<llvm::Value*, unsigned> Src =
611 EmitPointerWithAlignment(E->getArg(1));
612 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
613 unsigned Align = std::min(Dest.second, Src.second);
614 Builder.CreateMemCpy(Dest.first, Src.first, SizeVal, Align, false);
615 return RValue::get(Dest.first);
618 case Builtin::BI__builtin_objc_memmove_collectable: {
619 Value *Address = EmitScalarExpr(E->getArg(0));
620 Value *SrcAddr = EmitScalarExpr(E->getArg(1));
621 Value *SizeVal = EmitScalarExpr(E->getArg(2));
622 CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
623 Address, SrcAddr, SizeVal);
624 return RValue::get(Address);
627 case Builtin::BI__builtin___memmove_chk: {
628 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
629 llvm::APSInt Size, DstSize;
630 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
631 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
633 if (Size.ugt(DstSize))
635 std::pair<llvm::Value*, unsigned> Dest =
636 EmitPointerWithAlignment(E->getArg(0));
637 std::pair<llvm::Value*, unsigned> Src =
638 EmitPointerWithAlignment(E->getArg(1));
639 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
640 unsigned Align = std::min(Dest.second, Src.second);
641 Builder.CreateMemMove(Dest.first, Src.first, SizeVal, Align, false);
642 return RValue::get(Dest.first);
645 case Builtin::BImemmove:
646 case Builtin::BI__builtin_memmove: {
647 std::pair<llvm::Value*, unsigned> Dest =
648 EmitPointerWithAlignment(E->getArg(0));
649 std::pair<llvm::Value*, unsigned> Src =
650 EmitPointerWithAlignment(E->getArg(1));
651 Value *SizeVal = EmitScalarExpr(E->getArg(2));
652 unsigned Align = std::min(Dest.second, Src.second);
653 Builder.CreateMemMove(Dest.first, Src.first, SizeVal, Align, false);
654 return RValue::get(Dest.first);
656 case Builtin::BImemset:
657 case Builtin::BI__builtin_memset: {
658 std::pair<llvm::Value*, unsigned> Dest =
659 EmitPointerWithAlignment(E->getArg(0));
660 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
661 Builder.getInt8Ty());
662 Value *SizeVal = EmitScalarExpr(E->getArg(2));
663 Builder.CreateMemSet(Dest.first, ByteVal, SizeVal, Dest.second, false);
664 return RValue::get(Dest.first);
666 case Builtin::BI__builtin___memset_chk: {
667 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
668 llvm::APSInt Size, DstSize;
669 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
670 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
672 if (Size.ugt(DstSize))
674 std::pair<llvm::Value*, unsigned> Dest =
675 EmitPointerWithAlignment(E->getArg(0));
676 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
677 Builder.getInt8Ty());
678 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
679 Builder.CreateMemSet(Dest.first, ByteVal, SizeVal, Dest.second, false);
680 return RValue::get(Dest.first);
682 case Builtin::BI__builtin_dwarf_cfa: {
683 // The offset in bytes from the first argument to the CFA.
685 // Why on earth is this in the frontend? Is there any reason at
686 // all that the backend can't reasonably determine this while
687 // lowering llvm.eh.dwarf.cfa()?
689 // TODO: If there's a satisfactory reason, add a target hook for
690 // this instead of hard-coding 0, which is correct for most targets.
693 Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
694 return RValue::get(Builder.CreateCall(F,
695 llvm::ConstantInt::get(Int32Ty, Offset)));
697 case Builtin::BI__builtin_return_address: {
698 Value *Depth = EmitScalarExpr(E->getArg(0));
699 Depth = Builder.CreateIntCast(Depth, Int32Ty, false);
700 Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
701 return RValue::get(Builder.CreateCall(F, Depth));
703 case Builtin::BI__builtin_frame_address: {
704 Value *Depth = EmitScalarExpr(E->getArg(0));
705 Depth = Builder.CreateIntCast(Depth, Int32Ty, false);
706 Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
707 return RValue::get(Builder.CreateCall(F, Depth));
709 case Builtin::BI__builtin_extract_return_addr: {
710 Value *Address = EmitScalarExpr(E->getArg(0));
711 Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
712 return RValue::get(Result);
714 case Builtin::BI__builtin_frob_return_addr: {
715 Value *Address = EmitScalarExpr(E->getArg(0));
716 Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
717 return RValue::get(Result);
719 case Builtin::BI__builtin_dwarf_sp_column: {
720 llvm::IntegerType *Ty
721 = cast<llvm::IntegerType>(ConvertType(E->getType()));
722 int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
724 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
725 return RValue::get(llvm::UndefValue::get(Ty));
727 return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
729 case Builtin::BI__builtin_init_dwarf_reg_size_table: {
730 Value *Address = EmitScalarExpr(E->getArg(0));
731 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
732 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
733 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
735 case Builtin::BI__builtin_eh_return: {
736 Value *Int = EmitScalarExpr(E->getArg(0));
737 Value *Ptr = EmitScalarExpr(E->getArg(1));
739 llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
740 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
741 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
742 Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
743 ? Intrinsic::eh_return_i32
744 : Intrinsic::eh_return_i64);
745 Builder.CreateCall2(F, Int, Ptr);
746 Builder.CreateUnreachable();
748 // We do need to preserve an insertion point.
749 EmitBlock(createBasicBlock("builtin_eh_return.cont"));
751 return RValue::get(0);
753 case Builtin::BI__builtin_unwind_init: {
754 Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
755 return RValue::get(Builder.CreateCall(F));
757 case Builtin::BI__builtin_extend_pointer: {
758 // Extends a pointer to the size of an _Unwind_Word, which is
759 // uint64_t on all platforms. Generally this gets poked into a
760 // register and eventually used as an address, so if the
761 // addressing registers are wider than pointers and the platform
762 // doesn't implicitly ignore high-order bits when doing
763 // addressing, we need to make sure we zext / sext based on
764 // the platform's expectations.
766 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
768 // Cast the pointer to intptr_t.
769 Value *Ptr = EmitScalarExpr(E->getArg(0));
770 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
772 // If that's 64 bits, we're done.
773 if (IntPtrTy->getBitWidth() == 64)
774 return RValue::get(Result);
776 // Otherwise, ask the codegen data what to do.
777 if (getTargetHooks().extendPointerWithSExt())
778 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
780 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
782 case Builtin::BI__builtin_setjmp: {
783 // Buffer is a void**.
784 Value *Buf = EmitScalarExpr(E->getArg(0));
786 // Store the frame pointer to the setjmp buffer.
788 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
789 ConstantInt::get(Int32Ty, 0));
790 Builder.CreateStore(FrameAddr, Buf);
792 // Store the stack pointer to the setjmp buffer.
794 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
795 Value *StackSaveSlot =
796 Builder.CreateGEP(Buf, ConstantInt::get(Int32Ty, 2));
797 Builder.CreateStore(StackAddr, StackSaveSlot);
799 // Call LLVM's EH setjmp, which is lightweight.
800 Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
801 Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
802 return RValue::get(Builder.CreateCall(F, Buf));
804 case Builtin::BI__builtin_longjmp: {
805 Value *Buf = EmitScalarExpr(E->getArg(0));
806 Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
808 // Call LLVM's EH longjmp, which is lightweight.
809 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
811 // longjmp doesn't return; mark this as unreachable.
812 Builder.CreateUnreachable();
814 // We do need to preserve an insertion point.
815 EmitBlock(createBasicBlock("longjmp.cont"));
817 return RValue::get(0);
819 case Builtin::BI__sync_fetch_and_add:
820 case Builtin::BI__sync_fetch_and_sub:
821 case Builtin::BI__sync_fetch_and_or:
822 case Builtin::BI__sync_fetch_and_and:
823 case Builtin::BI__sync_fetch_and_xor:
824 case Builtin::BI__sync_add_and_fetch:
825 case Builtin::BI__sync_sub_and_fetch:
826 case Builtin::BI__sync_and_and_fetch:
827 case Builtin::BI__sync_or_and_fetch:
828 case Builtin::BI__sync_xor_and_fetch:
829 case Builtin::BI__sync_val_compare_and_swap:
830 case Builtin::BI__sync_bool_compare_and_swap:
831 case Builtin::BI__sync_lock_test_and_set:
832 case Builtin::BI__sync_lock_release:
833 case Builtin::BI__sync_swap:
834 llvm_unreachable("Shouldn't make it through sema");
835 case Builtin::BI__sync_fetch_and_add_1:
836 case Builtin::BI__sync_fetch_and_add_2:
837 case Builtin::BI__sync_fetch_and_add_4:
838 case Builtin::BI__sync_fetch_and_add_8:
839 case Builtin::BI__sync_fetch_and_add_16:
840 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
841 case Builtin::BI__sync_fetch_and_sub_1:
842 case Builtin::BI__sync_fetch_and_sub_2:
843 case Builtin::BI__sync_fetch_and_sub_4:
844 case Builtin::BI__sync_fetch_and_sub_8:
845 case Builtin::BI__sync_fetch_and_sub_16:
846 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
847 case Builtin::BI__sync_fetch_and_or_1:
848 case Builtin::BI__sync_fetch_and_or_2:
849 case Builtin::BI__sync_fetch_and_or_4:
850 case Builtin::BI__sync_fetch_and_or_8:
851 case Builtin::BI__sync_fetch_and_or_16:
852 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
853 case Builtin::BI__sync_fetch_and_and_1:
854 case Builtin::BI__sync_fetch_and_and_2:
855 case Builtin::BI__sync_fetch_and_and_4:
856 case Builtin::BI__sync_fetch_and_and_8:
857 case Builtin::BI__sync_fetch_and_and_16:
858 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
859 case Builtin::BI__sync_fetch_and_xor_1:
860 case Builtin::BI__sync_fetch_and_xor_2:
861 case Builtin::BI__sync_fetch_and_xor_4:
862 case Builtin::BI__sync_fetch_and_xor_8:
863 case Builtin::BI__sync_fetch_and_xor_16:
864 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
866 // Clang extensions: not overloaded yet.
867 case Builtin::BI__sync_fetch_and_min:
868 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
869 case Builtin::BI__sync_fetch_and_max:
870 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
871 case Builtin::BI__sync_fetch_and_umin:
872 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
873 case Builtin::BI__sync_fetch_and_umax:
874 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
876 case Builtin::BI__sync_add_and_fetch_1:
877 case Builtin::BI__sync_add_and_fetch_2:
878 case Builtin::BI__sync_add_and_fetch_4:
879 case Builtin::BI__sync_add_and_fetch_8:
880 case Builtin::BI__sync_add_and_fetch_16:
881 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
882 llvm::Instruction::Add);
883 case Builtin::BI__sync_sub_and_fetch_1:
884 case Builtin::BI__sync_sub_and_fetch_2:
885 case Builtin::BI__sync_sub_and_fetch_4:
886 case Builtin::BI__sync_sub_and_fetch_8:
887 case Builtin::BI__sync_sub_and_fetch_16:
888 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
889 llvm::Instruction::Sub);
890 case Builtin::BI__sync_and_and_fetch_1:
891 case Builtin::BI__sync_and_and_fetch_2:
892 case Builtin::BI__sync_and_and_fetch_4:
893 case Builtin::BI__sync_and_and_fetch_8:
894 case Builtin::BI__sync_and_and_fetch_16:
895 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
896 llvm::Instruction::And);
897 case Builtin::BI__sync_or_and_fetch_1:
898 case Builtin::BI__sync_or_and_fetch_2:
899 case Builtin::BI__sync_or_and_fetch_4:
900 case Builtin::BI__sync_or_and_fetch_8:
901 case Builtin::BI__sync_or_and_fetch_16:
902 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
903 llvm::Instruction::Or);
904 case Builtin::BI__sync_xor_and_fetch_1:
905 case Builtin::BI__sync_xor_and_fetch_2:
906 case Builtin::BI__sync_xor_and_fetch_4:
907 case Builtin::BI__sync_xor_and_fetch_8:
908 case Builtin::BI__sync_xor_and_fetch_16:
909 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
910 llvm::Instruction::Xor);
912 case Builtin::BI__sync_val_compare_and_swap_1:
913 case Builtin::BI__sync_val_compare_and_swap_2:
914 case Builtin::BI__sync_val_compare_and_swap_4:
915 case Builtin::BI__sync_val_compare_and_swap_8:
916 case Builtin::BI__sync_val_compare_and_swap_16: {
917 QualType T = E->getType();
918 llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0));
919 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
921 llvm::IntegerType *IntType =
922 llvm::IntegerType::get(getLLVMContext(),
923 getContext().getTypeSize(T));
924 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
927 Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType);
928 Args[1] = EmitScalarExpr(E->getArg(1));
929 llvm::Type *ValueType = Args[1]->getType();
930 Args[1] = EmitToInt(*this, Args[1], T, IntType);
931 Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType);
933 Value *Result = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2],
934 llvm::SequentiallyConsistent);
935 Result = EmitFromInt(*this, Result, T, ValueType);
936 return RValue::get(Result);
939 case Builtin::BI__sync_bool_compare_and_swap_1:
940 case Builtin::BI__sync_bool_compare_and_swap_2:
941 case Builtin::BI__sync_bool_compare_and_swap_4:
942 case Builtin::BI__sync_bool_compare_and_swap_8:
943 case Builtin::BI__sync_bool_compare_and_swap_16: {
944 QualType T = E->getArg(1)->getType();
945 llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0));
946 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
948 llvm::IntegerType *IntType =
949 llvm::IntegerType::get(getLLVMContext(),
950 getContext().getTypeSize(T));
951 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
954 Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType);
955 Args[1] = EmitToInt(*this, EmitScalarExpr(E->getArg(1)), T, IntType);
956 Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType);
958 Value *OldVal = Args[1];
959 Value *PrevVal = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2],
960 llvm::SequentiallyConsistent);
961 Value *Result = Builder.CreateICmpEQ(PrevVal, OldVal);
963 Result = Builder.CreateZExt(Result, ConvertType(E->getType()));
964 return RValue::get(Result);
967 case Builtin::BI__sync_swap_1:
968 case Builtin::BI__sync_swap_2:
969 case Builtin::BI__sync_swap_4:
970 case Builtin::BI__sync_swap_8:
971 case Builtin::BI__sync_swap_16:
972 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
974 case Builtin::BI__sync_lock_test_and_set_1:
975 case Builtin::BI__sync_lock_test_and_set_2:
976 case Builtin::BI__sync_lock_test_and_set_4:
977 case Builtin::BI__sync_lock_test_and_set_8:
978 case Builtin::BI__sync_lock_test_and_set_16:
979 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
981 case Builtin::BI__sync_lock_release_1:
982 case Builtin::BI__sync_lock_release_2:
983 case Builtin::BI__sync_lock_release_4:
984 case Builtin::BI__sync_lock_release_8:
985 case Builtin::BI__sync_lock_release_16: {
986 Value *Ptr = EmitScalarExpr(E->getArg(0));
987 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
988 CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
989 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
990 StoreSize.getQuantity() * 8);
991 Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
992 llvm::StoreInst *Store =
993 Builder.CreateStore(llvm::Constant::getNullValue(ITy), Ptr);
994 Store->setAlignment(StoreSize.getQuantity());
995 Store->setAtomic(llvm::Release);
996 return RValue::get(0);
999 case Builtin::BI__sync_synchronize: {
1000 // We assume this is supposed to correspond to a C++0x-style
1001 // sequentially-consistent fence (i.e. this is only usable for
1002 // synchonization, not device I/O or anything like that). This intrinsic
1003 // is really badly designed in the sense that in theory, there isn't
1004 // any way to safely use it... but in practice, it mostly works
1005 // to use it with non-atomic loads and stores to get acquire/release
1007 Builder.CreateFence(llvm::SequentiallyConsistent);
1008 return RValue::get(0);
1011 case Builtin::BI__c11_atomic_is_lock_free:
1012 case Builtin::BI__atomic_is_lock_free: {
1013 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
1014 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
1015 // _Atomic(T) is always properly-aligned.
1016 const char *LibCallName = "__atomic_is_lock_free";
1018 Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
1019 getContext().getSizeType());
1020 if (BuiltinID == Builtin::BI__atomic_is_lock_free)
1021 Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
1022 getContext().VoidPtrTy);
1024 Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
1025 getContext().VoidPtrTy);
1026 const CGFunctionInfo &FuncInfo =
1027 CGM.getTypes().arrangeFreeFunctionCall(E->getType(), Args,
1028 FunctionType::ExtInfo(),
1030 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
1031 llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
1032 return EmitCall(FuncInfo, Func, ReturnValueSlot(), Args);
1035 case Builtin::BI__atomic_test_and_set: {
1036 // Look at the argument type to determine whether this is a volatile
1037 // operation. The parameter type is always volatile.
1038 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1040 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1042 Value *Ptr = EmitScalarExpr(E->getArg(0));
1043 unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
1044 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1045 Value *NewVal = Builder.getInt8(1);
1046 Value *Order = EmitScalarExpr(E->getArg(1));
1047 if (isa<llvm::ConstantInt>(Order)) {
1048 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1049 AtomicRMWInst *Result = 0;
1051 case 0: // memory_order_relaxed
1052 default: // invalid order
1053 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1057 case 1: // memory_order_consume
1058 case 2: // memory_order_acquire
1059 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1063 case 3: // memory_order_release
1064 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1068 case 4: // memory_order_acq_rel
1069 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1071 llvm::AcquireRelease);
1073 case 5: // memory_order_seq_cst
1074 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1076 llvm::SequentiallyConsistent);
1079 Result->setVolatile(Volatile);
1080 return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1083 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1085 llvm::BasicBlock *BBs[5] = {
1086 createBasicBlock("monotonic", CurFn),
1087 createBasicBlock("acquire", CurFn),
1088 createBasicBlock("release", CurFn),
1089 createBasicBlock("acqrel", CurFn),
1090 createBasicBlock("seqcst", CurFn)
1092 llvm::AtomicOrdering Orders[5] = {
1093 llvm::Monotonic, llvm::Acquire, llvm::Release,
1094 llvm::AcquireRelease, llvm::SequentiallyConsistent
1097 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1098 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1100 Builder.SetInsertPoint(ContBB);
1101 PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
1103 for (unsigned i = 0; i < 5; ++i) {
1104 Builder.SetInsertPoint(BBs[i]);
1105 AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1106 Ptr, NewVal, Orders[i]);
1107 RMW->setVolatile(Volatile);
1108 Result->addIncoming(RMW, BBs[i]);
1109 Builder.CreateBr(ContBB);
1112 SI->addCase(Builder.getInt32(0), BBs[0]);
1113 SI->addCase(Builder.getInt32(1), BBs[1]);
1114 SI->addCase(Builder.getInt32(2), BBs[1]);
1115 SI->addCase(Builder.getInt32(3), BBs[2]);
1116 SI->addCase(Builder.getInt32(4), BBs[3]);
1117 SI->addCase(Builder.getInt32(5), BBs[4]);
1119 Builder.SetInsertPoint(ContBB);
1120 return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1123 case Builtin::BI__atomic_clear: {
1124 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1126 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1128 Value *Ptr = EmitScalarExpr(E->getArg(0));
1129 unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
1130 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1131 Value *NewVal = Builder.getInt8(0);
1132 Value *Order = EmitScalarExpr(E->getArg(1));
1133 if (isa<llvm::ConstantInt>(Order)) {
1134 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1135 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1136 Store->setAlignment(1);
1138 case 0: // memory_order_relaxed
1139 default: // invalid order
1140 Store->setOrdering(llvm::Monotonic);
1142 case 3: // memory_order_release
1143 Store->setOrdering(llvm::Release);
1145 case 5: // memory_order_seq_cst
1146 Store->setOrdering(llvm::SequentiallyConsistent);
1149 return RValue::get(0);
1152 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1154 llvm::BasicBlock *BBs[3] = {
1155 createBasicBlock("monotonic", CurFn),
1156 createBasicBlock("release", CurFn),
1157 createBasicBlock("seqcst", CurFn)
1159 llvm::AtomicOrdering Orders[3] = {
1160 llvm::Monotonic, llvm::Release, llvm::SequentiallyConsistent
1163 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1164 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1166 for (unsigned i = 0; i < 3; ++i) {
1167 Builder.SetInsertPoint(BBs[i]);
1168 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1169 Store->setAlignment(1);
1170 Store->setOrdering(Orders[i]);
1171 Builder.CreateBr(ContBB);
1174 SI->addCase(Builder.getInt32(0), BBs[0]);
1175 SI->addCase(Builder.getInt32(3), BBs[1]);
1176 SI->addCase(Builder.getInt32(5), BBs[2]);
1178 Builder.SetInsertPoint(ContBB);
1179 return RValue::get(0);
1182 case Builtin::BI__atomic_thread_fence:
1183 case Builtin::BI__atomic_signal_fence:
1184 case Builtin::BI__c11_atomic_thread_fence:
1185 case Builtin::BI__c11_atomic_signal_fence: {
1186 llvm::SynchronizationScope Scope;
1187 if (BuiltinID == Builtin::BI__atomic_signal_fence ||
1188 BuiltinID == Builtin::BI__c11_atomic_signal_fence)
1189 Scope = llvm::SingleThread;
1191 Scope = llvm::CrossThread;
1192 Value *Order = EmitScalarExpr(E->getArg(0));
1193 if (isa<llvm::ConstantInt>(Order)) {
1194 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1196 case 0: // memory_order_relaxed
1197 default: // invalid order
1199 case 1: // memory_order_consume
1200 case 2: // memory_order_acquire
1201 Builder.CreateFence(llvm::Acquire, Scope);
1203 case 3: // memory_order_release
1204 Builder.CreateFence(llvm::Release, Scope);
1206 case 4: // memory_order_acq_rel
1207 Builder.CreateFence(llvm::AcquireRelease, Scope);
1209 case 5: // memory_order_seq_cst
1210 Builder.CreateFence(llvm::SequentiallyConsistent, Scope);
1213 return RValue::get(0);
1216 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
1217 AcquireBB = createBasicBlock("acquire", CurFn);
1218 ReleaseBB = createBasicBlock("release", CurFn);
1219 AcqRelBB = createBasicBlock("acqrel", CurFn);
1220 SeqCstBB = createBasicBlock("seqcst", CurFn);
1221 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1223 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1224 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
1226 Builder.SetInsertPoint(AcquireBB);
1227 Builder.CreateFence(llvm::Acquire, Scope);
1228 Builder.CreateBr(ContBB);
1229 SI->addCase(Builder.getInt32(1), AcquireBB);
1230 SI->addCase(Builder.getInt32(2), AcquireBB);
1232 Builder.SetInsertPoint(ReleaseBB);
1233 Builder.CreateFence(llvm::Release, Scope);
1234 Builder.CreateBr(ContBB);
1235 SI->addCase(Builder.getInt32(3), ReleaseBB);
1237 Builder.SetInsertPoint(AcqRelBB);
1238 Builder.CreateFence(llvm::AcquireRelease, Scope);
1239 Builder.CreateBr(ContBB);
1240 SI->addCase(Builder.getInt32(4), AcqRelBB);
1242 Builder.SetInsertPoint(SeqCstBB);
1243 Builder.CreateFence(llvm::SequentiallyConsistent, Scope);
1244 Builder.CreateBr(ContBB);
1245 SI->addCase(Builder.getInt32(5), SeqCstBB);
1247 Builder.SetInsertPoint(ContBB);
1248 return RValue::get(0);
1251 // Library functions with special handling.
1252 case Builtin::BIsqrt:
1253 case Builtin::BIsqrtf:
1254 case Builtin::BIsqrtl: {
1255 // TODO: there is currently no set of optimizer flags
1256 // sufficient for us to rewrite sqrt to @llvm.sqrt.
1257 // -fmath-errno=0 is not good enough; we need finiteness.
1258 // We could probably precondition the call with an ult
1259 // against 0, but is that worth the complexity?
1263 case Builtin::BIpow:
1264 case Builtin::BIpowf:
1265 case Builtin::BIpowl: {
1266 // Rewrite sqrt to intrinsic if allowed.
1267 if (!FD->hasAttr<ConstAttr>())
1269 Value *Base = EmitScalarExpr(E->getArg(0));
1270 Value *Exponent = EmitScalarExpr(E->getArg(1));
1271 llvm::Type *ArgType = Base->getType();
1272 Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
1273 return RValue::get(Builder.CreateCall2(F, Base, Exponent));
1276 case Builtin::BIfma:
1277 case Builtin::BIfmaf:
1278 case Builtin::BIfmal:
1279 case Builtin::BI__builtin_fma:
1280 case Builtin::BI__builtin_fmaf:
1281 case Builtin::BI__builtin_fmal: {
1282 // Rewrite fma to intrinsic.
1283 Value *FirstArg = EmitScalarExpr(E->getArg(0));
1284 llvm::Type *ArgType = FirstArg->getType();
1285 Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType);
1286 return RValue::get(Builder.CreateCall3(F, FirstArg,
1287 EmitScalarExpr(E->getArg(1)),
1288 EmitScalarExpr(E->getArg(2))));
1291 case Builtin::BI__builtin_signbit:
1292 case Builtin::BI__builtin_signbitf:
1293 case Builtin::BI__builtin_signbitl: {
1294 LLVMContext &C = CGM.getLLVMContext();
1296 Value *Arg = EmitScalarExpr(E->getArg(0));
1297 llvm::Type *ArgTy = Arg->getType();
1298 if (ArgTy->isPPC_FP128Ty())
1299 break; // FIXME: I'm not sure what the right implementation is here.
1300 int ArgWidth = ArgTy->getPrimitiveSizeInBits();
1301 llvm::Type *ArgIntTy = llvm::IntegerType::get(C, ArgWidth);
1302 Value *BCArg = Builder.CreateBitCast(Arg, ArgIntTy);
1303 Value *ZeroCmp = llvm::Constant::getNullValue(ArgIntTy);
1304 Value *Result = Builder.CreateICmpSLT(BCArg, ZeroCmp);
1305 return RValue::get(Builder.CreateZExt(Result, ConvertType(E->getType())));
1307 case Builtin::BI__builtin_annotation: {
1308 llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
1309 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
1312 // Get the annotation string, go through casts. Sema requires this to be a
1313 // non-wide string literal, potentially casted, so the cast<> is safe.
1314 const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
1315 llvm::StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
1316 return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
1318 case Builtin::BI__noop:
1319 return RValue::get(0);
1322 // If this is an alias for a lib function (e.g. __builtin_sin), emit
1323 // the call using the normal call path, but using the unmangled
1324 // version of the function name.
1325 if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
1326 return emitLibraryCall(*this, FD, E,
1327 CGM.getBuiltinLibFunction(FD, BuiltinID));
1329 // If this is a predefined lib function (e.g. malloc), emit the call
1330 // using exactly the normal call path.
1331 if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
1332 return emitLibraryCall(*this, FD, E, EmitScalarExpr(E->getCallee()));
1334 // See if we have a target specific intrinsic.
1335 const char *Name = getContext().BuiltinInfo.GetName(BuiltinID);
1336 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
1337 if (const char *Prefix =
1338 llvm::Triple::getArchTypePrefix(Target.getTriple().getArch()))
1339 IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix, Name);
1341 if (IntrinsicID != Intrinsic::not_intrinsic) {
1342 SmallVector<Value*, 16> Args;
1344 // Find out if any arguments are required to be integer constant
1346 unsigned ICEArguments = 0;
1347 ASTContext::GetBuiltinTypeError Error;
1348 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
1349 assert(Error == ASTContext::GE_None && "Should not codegen an error");
1351 Function *F = CGM.getIntrinsic(IntrinsicID);
1352 llvm::FunctionType *FTy = F->getFunctionType();
1354 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
1356 // If this is a normal argument, just emit it as a scalar.
1357 if ((ICEArguments & (1 << i)) == 0) {
1358 ArgValue = EmitScalarExpr(E->getArg(i));
1360 // If this is required to be a constant, constant fold it so that we
1361 // know that the generated intrinsic gets a ConstantInt.
1362 llvm::APSInt Result;
1363 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
1364 assert(IsConst && "Constant arg isn't actually constant?");
1366 ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
1369 // If the intrinsic arg type is different from the builtin arg type
1370 // we need to do a bit cast.
1371 llvm::Type *PTy = FTy->getParamType(i);
1372 if (PTy != ArgValue->getType()) {
1373 assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
1374 "Must be able to losslessly bit cast to param");
1375 ArgValue = Builder.CreateBitCast(ArgValue, PTy);
1378 Args.push_back(ArgValue);
1381 Value *V = Builder.CreateCall(F, Args);
1382 QualType BuiltinRetType = E->getType();
1384 llvm::Type *RetTy = VoidTy;
1385 if (!BuiltinRetType->isVoidType())
1386 RetTy = ConvertType(BuiltinRetType);
1388 if (RetTy != V->getType()) {
1389 assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
1390 "Must be able to losslessly bit cast result type");
1391 V = Builder.CreateBitCast(V, RetTy);
1394 return RValue::get(V);
1397 // See if we have a target specific builtin that needs to be lowered.
1398 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
1399 return RValue::get(V);
1401 ErrorUnsupported(E, "builtin function");
1403 // Unknown builtin, for now just dump it out and return undef.
1404 if (hasAggregateLLVMType(E->getType()))
1405 return RValue::getAggregate(CreateMemTemp(E->getType()));
1406 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
1409 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
1410 const CallExpr *E) {
1411 switch (Target.getTriple().getArch()) {
1412 case llvm::Triple::arm:
1413 case llvm::Triple::thumb:
1414 return EmitARMBuiltinExpr(BuiltinID, E);
1415 case llvm::Triple::x86:
1416 case llvm::Triple::x86_64:
1417 return EmitX86BuiltinExpr(BuiltinID, E);
1418 case llvm::Triple::ppc:
1419 case llvm::Triple::ppc64:
1420 return EmitPPCBuiltinExpr(BuiltinID, E);
1426 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
1427 NeonTypeFlags TypeFlags) {
1428 int IsQuad = TypeFlags.isQuad();
1429 switch (TypeFlags.getEltType()) {
1430 case NeonTypeFlags::Int8:
1431 case NeonTypeFlags::Poly8:
1432 return llvm::VectorType::get(CGF->Int8Ty, 8 << IsQuad);
1433 case NeonTypeFlags::Int16:
1434 case NeonTypeFlags::Poly16:
1435 case NeonTypeFlags::Float16:
1436 return llvm::VectorType::get(CGF->Int16Ty, 4 << IsQuad);
1437 case NeonTypeFlags::Int32:
1438 return llvm::VectorType::get(CGF->Int32Ty, 2 << IsQuad);
1439 case NeonTypeFlags::Int64:
1440 return llvm::VectorType::get(CGF->Int64Ty, 1 << IsQuad);
1441 case NeonTypeFlags::Float32:
1442 return llvm::VectorType::get(CGF->FloatTy, 2 << IsQuad);
1444 llvm_unreachable("Invalid NeonTypeFlags element type!");
1447 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
1448 unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements();
1449 Value* SV = llvm::ConstantVector::getSplat(nElts, C);
1450 return Builder.CreateShuffleVector(V, V, SV, "lane");
1453 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
1455 unsigned shift, bool rightshift) {
1457 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
1458 ai != ae; ++ai, ++j)
1459 if (shift > 0 && shift == j)
1460 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
1462 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
1464 return Builder.CreateCall(F, Ops, name);
1467 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
1469 int SV = cast<ConstantInt>(V)->getSExtValue();
1471 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
1472 llvm::Constant *C = ConstantInt::get(VTy->getElementType(), neg ? -SV : SV);
1473 return llvm::ConstantVector::getSplat(VTy->getNumElements(), C);
1476 /// GetPointeeAlignment - Given an expression with a pointer type, find the
1477 /// alignment of the type referenced by the pointer. Skip over implicit
1479 std::pair<llvm::Value*, unsigned>
1480 CodeGenFunction::EmitPointerWithAlignment(const Expr *Addr) {
1481 assert(Addr->getType()->isPointerType());
1482 Addr = Addr->IgnoreParens();
1483 if (const ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(Addr)) {
1484 if ((ICE->getCastKind() == CK_BitCast || ICE->getCastKind() == CK_NoOp) &&
1485 ICE->getSubExpr()->getType()->isPointerType()) {
1486 std::pair<llvm::Value*, unsigned> Ptr =
1487 EmitPointerWithAlignment(ICE->getSubExpr());
1488 Ptr.first = Builder.CreateBitCast(Ptr.first,
1489 ConvertType(Addr->getType()));
1491 } else if (ICE->getCastKind() == CK_ArrayToPointerDecay) {
1492 LValue LV = EmitLValue(ICE->getSubExpr());
1493 unsigned Align = LV.getAlignment().getQuantity();
1495 // FIXME: Once LValues are fixed to always set alignment,
1497 QualType PtTy = ICE->getSubExpr()->getType();
1498 if (!PtTy->isIncompleteType())
1499 Align = getContext().getTypeAlignInChars(PtTy).getQuantity();
1503 return std::make_pair(LV.getAddress(), Align);
1506 if (const UnaryOperator *UO = dyn_cast<UnaryOperator>(Addr)) {
1507 if (UO->getOpcode() == UO_AddrOf) {
1508 LValue LV = EmitLValue(UO->getSubExpr());
1509 unsigned Align = LV.getAlignment().getQuantity();
1511 // FIXME: Once LValues are fixed to always set alignment,
1513 QualType PtTy = UO->getSubExpr()->getType();
1514 if (!PtTy->isIncompleteType())
1515 Align = getContext().getTypeAlignInChars(PtTy).getQuantity();
1519 return std::make_pair(LV.getAddress(), Align);
1524 QualType PtTy = Addr->getType()->getPointeeType();
1525 if (!PtTy->isIncompleteType())
1526 Align = getContext().getTypeAlignInChars(PtTy).getQuantity();
1528 return std::make_pair(EmitScalarExpr(Addr), Align);
1531 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
1532 const CallExpr *E) {
1533 if (BuiltinID == ARM::BI__clear_cache) {
1534 const FunctionDecl *FD = E->getDirectCallee();
1535 // Oddly people write this call without args on occasion and gcc accepts
1536 // it - it's also marked as varargs in the description file.
1537 SmallVector<Value*, 2> Ops;
1538 for (unsigned i = 0; i < E->getNumArgs(); i++)
1539 Ops.push_back(EmitScalarExpr(E->getArg(i)));
1540 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
1541 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
1542 StringRef Name = FD->getName();
1543 return Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
1546 if (BuiltinID == ARM::BI__builtin_arm_ldrexd) {
1547 Function *F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
1549 Value *LdPtr = EmitScalarExpr(E->getArg(0));
1550 Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd");
1552 Value *Val0 = Builder.CreateExtractValue(Val, 1);
1553 Value *Val1 = Builder.CreateExtractValue(Val, 0);
1554 Val0 = Builder.CreateZExt(Val0, Int64Ty);
1555 Val1 = Builder.CreateZExt(Val1, Int64Ty);
1557 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
1558 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
1559 return Builder.CreateOr(Val, Val1);
1562 if (BuiltinID == ARM::BI__builtin_arm_strexd) {
1563 Function *F = CGM.getIntrinsic(Intrinsic::arm_strexd);
1564 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, NULL);
1566 Value *One = llvm::ConstantInt::get(Int32Ty, 1);
1567 Value *Tmp = Builder.CreateAlloca(Int64Ty, One);
1568 Value *Val = EmitScalarExpr(E->getArg(0));
1569 Builder.CreateStore(Val, Tmp);
1571 Value *LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
1572 Val = Builder.CreateLoad(LdPtr);
1574 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
1575 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
1576 Value *StPtr = EmitScalarExpr(E->getArg(1));
1577 return Builder.CreateCall3(F, Arg0, Arg1, StPtr, "strexd");
1580 SmallVector<Value*, 4> Ops;
1581 llvm::Value *Align = 0;
1582 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
1584 switch (BuiltinID) {
1585 case ARM::BI__builtin_neon_vld1_v:
1586 case ARM::BI__builtin_neon_vld1q_v:
1587 case ARM::BI__builtin_neon_vld1q_lane_v:
1588 case ARM::BI__builtin_neon_vld1_lane_v:
1589 case ARM::BI__builtin_neon_vld1_dup_v:
1590 case ARM::BI__builtin_neon_vld1q_dup_v:
1591 case ARM::BI__builtin_neon_vst1_v:
1592 case ARM::BI__builtin_neon_vst1q_v:
1593 case ARM::BI__builtin_neon_vst1q_lane_v:
1594 case ARM::BI__builtin_neon_vst1_lane_v:
1595 case ARM::BI__builtin_neon_vst2_v:
1596 case ARM::BI__builtin_neon_vst2q_v:
1597 case ARM::BI__builtin_neon_vst2_lane_v:
1598 case ARM::BI__builtin_neon_vst2q_lane_v:
1599 case ARM::BI__builtin_neon_vst3_v:
1600 case ARM::BI__builtin_neon_vst3q_v:
1601 case ARM::BI__builtin_neon_vst3_lane_v:
1602 case ARM::BI__builtin_neon_vst3q_lane_v:
1603 case ARM::BI__builtin_neon_vst4_v:
1604 case ARM::BI__builtin_neon_vst4q_v:
1605 case ARM::BI__builtin_neon_vst4_lane_v:
1606 case ARM::BI__builtin_neon_vst4q_lane_v:
1607 // Get the alignment for the argument in addition to the value;
1608 // we'll use it later.
1609 std::pair<llvm::Value*, unsigned> Src =
1610 EmitPointerWithAlignment(E->getArg(0));
1611 Ops.push_back(Src.first);
1612 Align = Builder.getInt32(Src.second);
1617 switch (BuiltinID) {
1618 case ARM::BI__builtin_neon_vld2_v:
1619 case ARM::BI__builtin_neon_vld2q_v:
1620 case ARM::BI__builtin_neon_vld3_v:
1621 case ARM::BI__builtin_neon_vld3q_v:
1622 case ARM::BI__builtin_neon_vld4_v:
1623 case ARM::BI__builtin_neon_vld4q_v:
1624 case ARM::BI__builtin_neon_vld2_lane_v:
1625 case ARM::BI__builtin_neon_vld2q_lane_v:
1626 case ARM::BI__builtin_neon_vld3_lane_v:
1627 case ARM::BI__builtin_neon_vld3q_lane_v:
1628 case ARM::BI__builtin_neon_vld4_lane_v:
1629 case ARM::BI__builtin_neon_vld4q_lane_v:
1630 case ARM::BI__builtin_neon_vld2_dup_v:
1631 case ARM::BI__builtin_neon_vld3_dup_v:
1632 case ARM::BI__builtin_neon_vld4_dup_v:
1633 // Get the alignment for the argument in addition to the value;
1634 // we'll use it later.
1635 std::pair<llvm::Value*, unsigned> Src =
1636 EmitPointerWithAlignment(E->getArg(1));
1637 Ops.push_back(Src.first);
1638 Align = Builder.getInt32(Src.second);
1642 Ops.push_back(EmitScalarExpr(E->getArg(i)));
1645 // vget_lane and vset_lane are not overloaded and do not have an extra
1646 // argument that specifies the vector type.
1647 switch (BuiltinID) {
1649 case ARM::BI__builtin_neon_vget_lane_i8:
1650 case ARM::BI__builtin_neon_vget_lane_i16:
1651 case ARM::BI__builtin_neon_vget_lane_i32:
1652 case ARM::BI__builtin_neon_vget_lane_i64:
1653 case ARM::BI__builtin_neon_vget_lane_f32:
1654 case ARM::BI__builtin_neon_vgetq_lane_i8:
1655 case ARM::BI__builtin_neon_vgetq_lane_i16:
1656 case ARM::BI__builtin_neon_vgetq_lane_i32:
1657 case ARM::BI__builtin_neon_vgetq_lane_i64:
1658 case ARM::BI__builtin_neon_vgetq_lane_f32:
1659 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
1661 case ARM::BI__builtin_neon_vset_lane_i8:
1662 case ARM::BI__builtin_neon_vset_lane_i16:
1663 case ARM::BI__builtin_neon_vset_lane_i32:
1664 case ARM::BI__builtin_neon_vset_lane_i64:
1665 case ARM::BI__builtin_neon_vset_lane_f32:
1666 case ARM::BI__builtin_neon_vsetq_lane_i8:
1667 case ARM::BI__builtin_neon_vsetq_lane_i16:
1668 case ARM::BI__builtin_neon_vsetq_lane_i32:
1669 case ARM::BI__builtin_neon_vsetq_lane_i64:
1670 case ARM::BI__builtin_neon_vsetq_lane_f32:
1671 Ops.push_back(EmitScalarExpr(E->getArg(2)));
1672 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
1675 // Get the last argument, which specifies the vector type.
1676 llvm::APSInt Result;
1677 const Expr *Arg = E->getArg(E->getNumArgs()-1);
1678 if (!Arg->isIntegerConstantExpr(Result, getContext()))
1681 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
1682 BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
1683 // Determine the overloaded type of this builtin.
1685 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
1690 // Determine whether this is an unsigned conversion or not.
1691 bool usgn = Result.getZExtValue() == 1;
1692 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
1694 // Call the appropriate intrinsic.
1695 Function *F = CGM.getIntrinsic(Int, Ty);
1696 return Builder.CreateCall(F, Ops, "vcvtr");
1699 // Determine the type of this overloaded NEON intrinsic.
1700 NeonTypeFlags Type(Result.getZExtValue());
1701 bool usgn = Type.isUnsigned();
1702 bool quad = Type.isQuad();
1703 bool rightShift = false;
1705 llvm::VectorType *VTy = GetNeonType(this, Type);
1706 llvm::Type *Ty = VTy;
1711 switch (BuiltinID) {
1713 case ARM::BI__builtin_neon_vbsl_v:
1714 case ARM::BI__builtin_neon_vbslq_v:
1715 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vbsl, Ty),
1717 case ARM::BI__builtin_neon_vabd_v:
1718 case ARM::BI__builtin_neon_vabdq_v:
1719 Int = usgn ? Intrinsic::arm_neon_vabdu : Intrinsic::arm_neon_vabds;
1720 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
1721 case ARM::BI__builtin_neon_vabs_v:
1722 case ARM::BI__builtin_neon_vabsq_v:
1723 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vabs, Ty),
1725 case ARM::BI__builtin_neon_vaddhn_v:
1726 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vaddhn, Ty),
1728 case ARM::BI__builtin_neon_vcale_v:
1729 std::swap(Ops[0], Ops[1]);
1730 case ARM::BI__builtin_neon_vcage_v: {
1731 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacged);
1732 return EmitNeonCall(F, Ops, "vcage");
1734 case ARM::BI__builtin_neon_vcaleq_v:
1735 std::swap(Ops[0], Ops[1]);
1736 case ARM::BI__builtin_neon_vcageq_v: {
1737 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgeq);
1738 return EmitNeonCall(F, Ops, "vcage");
1740 case ARM::BI__builtin_neon_vcalt_v:
1741 std::swap(Ops[0], Ops[1]);
1742 case ARM::BI__builtin_neon_vcagt_v: {
1743 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtd);
1744 return EmitNeonCall(F, Ops, "vcagt");
1746 case ARM::BI__builtin_neon_vcaltq_v:
1747 std::swap(Ops[0], Ops[1]);
1748 case ARM::BI__builtin_neon_vcagtq_v: {
1749 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtq);
1750 return EmitNeonCall(F, Ops, "vcagt");
1752 case ARM::BI__builtin_neon_vcls_v:
1753 case ARM::BI__builtin_neon_vclsq_v: {
1754 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcls, Ty);
1755 return EmitNeonCall(F, Ops, "vcls");
1757 case ARM::BI__builtin_neon_vclz_v:
1758 case ARM::BI__builtin_neon_vclzq_v: {
1759 // Generate target-independent intrinsic; also need to add second argument
1760 // for whether or not clz of zero is undefined; on ARM it isn't.
1761 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ty);
1762 Ops.push_back(Builder.getInt1(Target.isCLZForZeroUndef()));
1763 return EmitNeonCall(F, Ops, "vclz");
1765 case ARM::BI__builtin_neon_vcnt_v:
1766 case ARM::BI__builtin_neon_vcntq_v: {
1767 // generate target-independent intrinsic
1768 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, Ty);
1769 return EmitNeonCall(F, Ops, "vctpop");
1771 case ARM::BI__builtin_neon_vcvt_f16_v: {
1772 assert(Type.getEltType() == NeonTypeFlags::Float16 && !quad &&
1773 "unexpected vcvt_f16_v builtin");
1774 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvtfp2hf);
1775 return EmitNeonCall(F, Ops, "vcvt");
1777 case ARM::BI__builtin_neon_vcvt_f32_f16: {
1778 assert(Type.getEltType() == NeonTypeFlags::Float16 && !quad &&
1779 "unexpected vcvt_f32_f16 builtin");
1780 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvthf2fp);
1781 return EmitNeonCall(F, Ops, "vcvt");
1783 case ARM::BI__builtin_neon_vcvt_f32_v:
1784 case ARM::BI__builtin_neon_vcvtq_f32_v:
1785 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1786 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
1787 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
1788 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
1789 case ARM::BI__builtin_neon_vcvt_s32_v:
1790 case ARM::BI__builtin_neon_vcvt_u32_v:
1791 case ARM::BI__builtin_neon_vcvtq_s32_v:
1792 case ARM::BI__builtin_neon_vcvtq_u32_v: {
1793 llvm::Type *FloatTy =
1794 GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
1795 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
1796 return usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
1797 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
1799 case ARM::BI__builtin_neon_vcvt_n_f32_v:
1800 case ARM::BI__builtin_neon_vcvtq_n_f32_v: {
1801 llvm::Type *FloatTy =
1802 GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
1803 llvm::Type *Tys[2] = { FloatTy, Ty };
1804 Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp
1805 : Intrinsic::arm_neon_vcvtfxs2fp;
1806 Function *F = CGM.getIntrinsic(Int, Tys);
1807 return EmitNeonCall(F, Ops, "vcvt_n");
1809 case ARM::BI__builtin_neon_vcvt_n_s32_v:
1810 case ARM::BI__builtin_neon_vcvt_n_u32_v:
1811 case ARM::BI__builtin_neon_vcvtq_n_s32_v:
1812 case ARM::BI__builtin_neon_vcvtq_n_u32_v: {
1813 llvm::Type *FloatTy =
1814 GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
1815 llvm::Type *Tys[2] = { Ty, FloatTy };
1816 Int = usgn ? Intrinsic::arm_neon_vcvtfp2fxu
1817 : Intrinsic::arm_neon_vcvtfp2fxs;
1818 Function *F = CGM.getIntrinsic(Int, Tys);
1819 return EmitNeonCall(F, Ops, "vcvt_n");
1821 case ARM::BI__builtin_neon_vext_v:
1822 case ARM::BI__builtin_neon_vextq_v: {
1823 int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
1824 SmallVector<Constant*, 16> Indices;
1825 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
1826 Indices.push_back(ConstantInt::get(Int32Ty, i+CV));
1828 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1829 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1830 Value *SV = llvm::ConstantVector::get(Indices);
1831 return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vext");
1833 case ARM::BI__builtin_neon_vhadd_v:
1834 case ARM::BI__builtin_neon_vhaddq_v:
1835 Int = usgn ? Intrinsic::arm_neon_vhaddu : Intrinsic::arm_neon_vhadds;
1836 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vhadd");
1837 case ARM::BI__builtin_neon_vhsub_v:
1838 case ARM::BI__builtin_neon_vhsubq_v:
1839 Int = usgn ? Intrinsic::arm_neon_vhsubu : Intrinsic::arm_neon_vhsubs;
1840 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vhsub");
1841 case ARM::BI__builtin_neon_vld1_v:
1842 case ARM::BI__builtin_neon_vld1q_v:
1843 Ops.push_back(Align);
1844 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Ty),
1846 case ARM::BI__builtin_neon_vld1q_lane_v:
1847 // Handle 64-bit integer elements as a special case. Use shuffles of
1848 // one-element vectors to avoid poor code for i64 in the backend.
1849 if (VTy->getElementType()->isIntegerTy(64)) {
1850 // Extract the other lane.
1851 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1852 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
1853 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
1854 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
1855 // Load the value as a one-element vector.
1856 Ty = llvm::VectorType::get(VTy->getElementType(), 1);
1857 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Ty);
1858 Value *Ld = Builder.CreateCall2(F, Ops[0], Align);
1860 SmallVector<Constant*, 2> Indices;
1861 Indices.push_back(ConstantInt::get(Int32Ty, 1-Lane));
1862 Indices.push_back(ConstantInt::get(Int32Ty, Lane));
1863 SV = llvm::ConstantVector::get(Indices);
1864 return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
1867 case ARM::BI__builtin_neon_vld1_lane_v: {
1868 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1869 Ty = llvm::PointerType::getUnqual(VTy->getElementType());
1870 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1871 LoadInst *Ld = Builder.CreateLoad(Ops[0]);
1872 Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
1873 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
1875 case ARM::BI__builtin_neon_vld1_dup_v:
1876 case ARM::BI__builtin_neon_vld1q_dup_v: {
1877 Value *V = UndefValue::get(Ty);
1878 Ty = llvm::PointerType::getUnqual(VTy->getElementType());
1879 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1880 LoadInst *Ld = Builder.CreateLoad(Ops[0]);
1881 Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
1882 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
1883 Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
1884 return EmitNeonSplat(Ops[0], CI);
1886 case ARM::BI__builtin_neon_vld2_v:
1887 case ARM::BI__builtin_neon_vld2q_v: {
1888 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2, Ty);
1889 Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld2");
1890 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1891 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1892 return Builder.CreateStore(Ops[1], Ops[0]);
1894 case ARM::BI__builtin_neon_vld3_v:
1895 case ARM::BI__builtin_neon_vld3q_v: {
1896 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3, Ty);
1897 Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld3");
1898 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1899 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1900 return Builder.CreateStore(Ops[1], Ops[0]);
1902 case ARM::BI__builtin_neon_vld4_v:
1903 case ARM::BI__builtin_neon_vld4q_v: {
1904 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4, Ty);
1905 Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld4");
1906 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1907 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1908 return Builder.CreateStore(Ops[1], Ops[0]);
1910 case ARM::BI__builtin_neon_vld2_lane_v:
1911 case ARM::BI__builtin_neon_vld2q_lane_v: {
1912 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2lane, Ty);
1913 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1914 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
1915 Ops.push_back(Align);
1916 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
1917 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1918 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1919 return Builder.CreateStore(Ops[1], Ops[0]);
1921 case ARM::BI__builtin_neon_vld3_lane_v:
1922 case ARM::BI__builtin_neon_vld3q_lane_v: {
1923 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3lane, Ty);
1924 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1925 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
1926 Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
1927 Ops.push_back(Align);
1928 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
1929 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1930 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1931 return Builder.CreateStore(Ops[1], Ops[0]);
1933 case ARM::BI__builtin_neon_vld4_lane_v:
1934 case ARM::BI__builtin_neon_vld4q_lane_v: {
1935 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4lane, Ty);
1936 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1937 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
1938 Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
1939 Ops[5] = Builder.CreateBitCast(Ops[5], Ty);
1940 Ops.push_back(Align);
1941 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
1942 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1943 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1944 return Builder.CreateStore(Ops[1], Ops[0]);
1946 case ARM::BI__builtin_neon_vld2_dup_v:
1947 case ARM::BI__builtin_neon_vld3_dup_v:
1948 case ARM::BI__builtin_neon_vld4_dup_v: {
1949 // Handle 64-bit elements as a special-case. There is no "dup" needed.
1950 if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
1951 switch (BuiltinID) {
1952 case ARM::BI__builtin_neon_vld2_dup_v:
1953 Int = Intrinsic::arm_neon_vld2;
1955 case ARM::BI__builtin_neon_vld3_dup_v:
1956 Int = Intrinsic::arm_neon_vld3;
1958 case ARM::BI__builtin_neon_vld4_dup_v:
1959 Int = Intrinsic::arm_neon_vld4;
1961 default: llvm_unreachable("unknown vld_dup intrinsic?");
1963 Function *F = CGM.getIntrinsic(Int, Ty);
1964 Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld_dup");
1965 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1966 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1967 return Builder.CreateStore(Ops[1], Ops[0]);
1969 switch (BuiltinID) {
1970 case ARM::BI__builtin_neon_vld2_dup_v:
1971 Int = Intrinsic::arm_neon_vld2lane;
1973 case ARM::BI__builtin_neon_vld3_dup_v:
1974 Int = Intrinsic::arm_neon_vld3lane;
1976 case ARM::BI__builtin_neon_vld4_dup_v:
1977 Int = Intrinsic::arm_neon_vld4lane;
1979 default: llvm_unreachable("unknown vld_dup intrinsic?");
1981 Function *F = CGM.getIntrinsic(Int, Ty);
1982 llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
1984 SmallVector<Value*, 6> Args;
1985 Args.push_back(Ops[1]);
1986 Args.append(STy->getNumElements(), UndefValue::get(Ty));
1988 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
1990 Args.push_back(Align);
1992 Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
1993 // splat lane 0 to all elts in each vector of the result.
1994 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
1995 Value *Val = Builder.CreateExtractValue(Ops[1], i);
1996 Value *Elt = Builder.CreateBitCast(Val, Ty);
1997 Elt = EmitNeonSplat(Elt, CI);
1998 Elt = Builder.CreateBitCast(Elt, Val->getType());
1999 Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
2001 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
2002 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2003 return Builder.CreateStore(Ops[1], Ops[0]);
2005 case ARM::BI__builtin_neon_vmax_v:
2006 case ARM::BI__builtin_neon_vmaxq_v:
2007 Int = usgn ? Intrinsic::arm_neon_vmaxu : Intrinsic::arm_neon_vmaxs;
2008 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
2009 case ARM::BI__builtin_neon_vmin_v:
2010 case ARM::BI__builtin_neon_vminq_v:
2011 Int = usgn ? Intrinsic::arm_neon_vminu : Intrinsic::arm_neon_vmins;
2012 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
2013 case ARM::BI__builtin_neon_vmovl_v: {
2014 llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
2015 Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
2017 return Builder.CreateZExt(Ops[0], Ty, "vmovl");
2018 return Builder.CreateSExt(Ops[0], Ty, "vmovl");
2020 case ARM::BI__builtin_neon_vmovn_v: {
2021 llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
2022 Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
2023 return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
2025 case ARM::BI__builtin_neon_vmul_v:
2026 case ARM::BI__builtin_neon_vmulq_v:
2027 assert(Type.isPoly() && "vmul builtin only supported for polynomial types");
2028 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vmulp, Ty),
2030 case ARM::BI__builtin_neon_vmull_v:
2031 Int = usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
2032 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
2033 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
2034 case ARM::BI__builtin_neon_vfma_v:
2035 case ARM::BI__builtin_neon_vfmaq_v: {
2036 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
2037 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2038 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2039 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2040 return Builder.CreateCall3(F, Ops[0], Ops[1], Ops[2]);
2042 case ARM::BI__builtin_neon_vpadal_v:
2043 case ARM::BI__builtin_neon_vpadalq_v: {
2044 Int = usgn ? Intrinsic::arm_neon_vpadalu : Intrinsic::arm_neon_vpadals;
2045 // The source operand type has twice as many elements of half the size.
2046 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2048 llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
2049 llvm::Type *NarrowTy =
2050 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
2051 llvm::Type *Tys[2] = { Ty, NarrowTy };
2052 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpadal");
2054 case ARM::BI__builtin_neon_vpadd_v:
2055 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vpadd, Ty),
2057 case ARM::BI__builtin_neon_vpaddl_v:
2058 case ARM::BI__builtin_neon_vpaddlq_v: {
2059 Int = usgn ? Intrinsic::arm_neon_vpaddlu : Intrinsic::arm_neon_vpaddls;
2060 // The source operand type has twice as many elements of half the size.
2061 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2062 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
2063 llvm::Type *NarrowTy =
2064 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
2065 llvm::Type *Tys[2] = { Ty, NarrowTy };
2066 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
2068 case ARM::BI__builtin_neon_vpmax_v:
2069 Int = usgn ? Intrinsic::arm_neon_vpmaxu : Intrinsic::arm_neon_vpmaxs;
2070 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
2071 case ARM::BI__builtin_neon_vpmin_v:
2072 Int = usgn ? Intrinsic::arm_neon_vpminu : Intrinsic::arm_neon_vpmins;
2073 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
2074 case ARM::BI__builtin_neon_vqabs_v:
2075 case ARM::BI__builtin_neon_vqabsq_v:
2076 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqabs, Ty),
2078 case ARM::BI__builtin_neon_vqadd_v:
2079 case ARM::BI__builtin_neon_vqaddq_v:
2080 Int = usgn ? Intrinsic::arm_neon_vqaddu : Intrinsic::arm_neon_vqadds;
2081 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqadd");
2082 case ARM::BI__builtin_neon_vqdmlal_v:
2083 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmlal, Ty),
2085 case ARM::BI__builtin_neon_vqdmlsl_v:
2086 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmlsl, Ty),
2088 case ARM::BI__builtin_neon_vqdmulh_v:
2089 case ARM::BI__builtin_neon_vqdmulhq_v:
2090 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmulh, Ty),
2092 case ARM::BI__builtin_neon_vqdmull_v:
2093 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, Ty),
2095 case ARM::BI__builtin_neon_vqmovn_v:
2096 Int = usgn ? Intrinsic::arm_neon_vqmovnu : Intrinsic::arm_neon_vqmovns;
2097 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqmovn");
2098 case ARM::BI__builtin_neon_vqmovun_v:
2099 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqmovnsu, Ty),
2101 case ARM::BI__builtin_neon_vqneg_v:
2102 case ARM::BI__builtin_neon_vqnegq_v:
2103 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqneg, Ty),
2105 case ARM::BI__builtin_neon_vqrdmulh_v:
2106 case ARM::BI__builtin_neon_vqrdmulhq_v:
2107 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrdmulh, Ty),
2109 case ARM::BI__builtin_neon_vqrshl_v:
2110 case ARM::BI__builtin_neon_vqrshlq_v:
2111 Int = usgn ? Intrinsic::arm_neon_vqrshiftu : Intrinsic::arm_neon_vqrshifts;
2112 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshl");
2113 case ARM::BI__builtin_neon_vqrshrn_n_v:
2115 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
2116 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
2118 case ARM::BI__builtin_neon_vqrshrun_n_v:
2119 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
2120 Ops, "vqrshrun_n", 1, true);
2121 case ARM::BI__builtin_neon_vqshl_v:
2122 case ARM::BI__builtin_neon_vqshlq_v:
2123 Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts;
2124 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl");
2125 case ARM::BI__builtin_neon_vqshl_n_v:
2126 case ARM::BI__builtin_neon_vqshlq_n_v:
2127 Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts;
2128 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
2130 case ARM::BI__builtin_neon_vqshlu_n_v:
2131 case ARM::BI__builtin_neon_vqshluq_n_v:
2132 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftsu, Ty),
2133 Ops, "vqshlu", 1, false);
2134 case ARM::BI__builtin_neon_vqshrn_n_v:
2135 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
2136 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
2138 case ARM::BI__builtin_neon_vqshrun_n_v:
2139 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
2140 Ops, "vqshrun_n", 1, true);
2141 case ARM::BI__builtin_neon_vqsub_v:
2142 case ARM::BI__builtin_neon_vqsubq_v:
2143 Int = usgn ? Intrinsic::arm_neon_vqsubu : Intrinsic::arm_neon_vqsubs;
2144 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqsub");
2145 case ARM::BI__builtin_neon_vraddhn_v:
2146 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vraddhn, Ty),
2148 case ARM::BI__builtin_neon_vrecpe_v:
2149 case ARM::BI__builtin_neon_vrecpeq_v:
2150 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
2152 case ARM::BI__builtin_neon_vrecps_v:
2153 case ARM::BI__builtin_neon_vrecpsq_v:
2154 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecps, Ty),
2156 case ARM::BI__builtin_neon_vrhadd_v:
2157 case ARM::BI__builtin_neon_vrhaddq_v:
2158 Int = usgn ? Intrinsic::arm_neon_vrhaddu : Intrinsic::arm_neon_vrhadds;
2159 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrhadd");
2160 case ARM::BI__builtin_neon_vrshl_v:
2161 case ARM::BI__builtin_neon_vrshlq_v:
2162 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
2163 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshl");
2164 case ARM::BI__builtin_neon_vrshrn_n_v:
2165 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
2166 Ops, "vrshrn_n", 1, true);
2167 case ARM::BI__builtin_neon_vrshr_n_v:
2168 case ARM::BI__builtin_neon_vrshrq_n_v:
2169 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
2170 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 1, true);
2171 case ARM::BI__builtin_neon_vrsqrte_v:
2172 case ARM::BI__builtin_neon_vrsqrteq_v:
2173 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrte, Ty),
2175 case ARM::BI__builtin_neon_vrsqrts_v:
2176 case ARM::BI__builtin_neon_vrsqrtsq_v:
2177 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrts, Ty),
2179 case ARM::BI__builtin_neon_vrsra_n_v:
2180 case ARM::BI__builtin_neon_vrsraq_n_v:
2181 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2182 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2183 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
2184 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
2185 Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Ty), Ops[1], Ops[2]);
2186 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
2187 case ARM::BI__builtin_neon_vrsubhn_v:
2188 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsubhn, Ty),
2190 case ARM::BI__builtin_neon_vshl_v:
2191 case ARM::BI__builtin_neon_vshlq_v:
2192 Int = usgn ? Intrinsic::arm_neon_vshiftu : Intrinsic::arm_neon_vshifts;
2193 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vshl");
2194 case ARM::BI__builtin_neon_vshll_n_v:
2195 Int = usgn ? Intrinsic::arm_neon_vshiftlu : Intrinsic::arm_neon_vshiftls;
2196 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vshll", 1);
2197 case ARM::BI__builtin_neon_vshl_n_v:
2198 case ARM::BI__builtin_neon_vshlq_n_v:
2199 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
2200 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
2202 case ARM::BI__builtin_neon_vshrn_n_v:
2203 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftn, Ty),
2204 Ops, "vshrn_n", 1, true);
2205 case ARM::BI__builtin_neon_vshr_n_v:
2206 case ARM::BI__builtin_neon_vshrq_n_v:
2207 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2208 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
2210 return Builder.CreateLShr(Ops[0], Ops[1], "vshr_n");
2212 return Builder.CreateAShr(Ops[0], Ops[1], "vshr_n");
2213 case ARM::BI__builtin_neon_vsri_n_v:
2214 case ARM::BI__builtin_neon_vsriq_n_v:
2216 case ARM::BI__builtin_neon_vsli_n_v:
2217 case ARM::BI__builtin_neon_vsliq_n_v:
2218 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
2219 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
2221 case ARM::BI__builtin_neon_vsra_n_v:
2222 case ARM::BI__builtin_neon_vsraq_n_v:
2223 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2224 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2225 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, false);
2227 Ops[1] = Builder.CreateLShr(Ops[1], Ops[2], "vsra_n");
2229 Ops[1] = Builder.CreateAShr(Ops[1], Ops[2], "vsra_n");
2230 return Builder.CreateAdd(Ops[0], Ops[1]);
2231 case ARM::BI__builtin_neon_vst1_v:
2232 case ARM::BI__builtin_neon_vst1q_v:
2233 Ops.push_back(Align);
2234 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, Ty),
2236 case ARM::BI__builtin_neon_vst1q_lane_v:
2237 // Handle 64-bit integer elements as a special case. Use a shuffle to get
2238 // a one-element vector and avoid poor code for i64 in the backend.
2239 if (VTy->getElementType()->isIntegerTy(64)) {
2240 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2241 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
2242 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
2244 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
2245 Ops[1]->getType()), Ops);
2248 case ARM::BI__builtin_neon_vst1_lane_v: {
2249 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2250 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
2251 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
2252 StoreInst *St = Builder.CreateStore(Ops[1],
2253 Builder.CreateBitCast(Ops[0], Ty));
2254 St->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
2257 case ARM::BI__builtin_neon_vst2_v:
2258 case ARM::BI__builtin_neon_vst2q_v:
2259 Ops.push_back(Align);
2260 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2, Ty),
2262 case ARM::BI__builtin_neon_vst2_lane_v:
2263 case ARM::BI__builtin_neon_vst2q_lane_v:
2264 Ops.push_back(Align);
2265 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2lane, Ty),
2267 case ARM::BI__builtin_neon_vst3_v:
2268 case ARM::BI__builtin_neon_vst3q_v:
2269 Ops.push_back(Align);
2270 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3, Ty),
2272 case ARM::BI__builtin_neon_vst3_lane_v:
2273 case ARM::BI__builtin_neon_vst3q_lane_v:
2274 Ops.push_back(Align);
2275 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3lane, Ty),
2277 case ARM::BI__builtin_neon_vst4_v:
2278 case ARM::BI__builtin_neon_vst4q_v:
2279 Ops.push_back(Align);
2280 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4, Ty),
2282 case ARM::BI__builtin_neon_vst4_lane_v:
2283 case ARM::BI__builtin_neon_vst4q_lane_v:
2284 Ops.push_back(Align);
2285 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4lane, Ty),
2287 case ARM::BI__builtin_neon_vsubhn_v:
2288 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vsubhn, Ty),
2290 case ARM::BI__builtin_neon_vtbl1_v:
2291 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
2293 case ARM::BI__builtin_neon_vtbl2_v:
2294 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
2296 case ARM::BI__builtin_neon_vtbl3_v:
2297 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
2299 case ARM::BI__builtin_neon_vtbl4_v:
2300 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
2302 case ARM::BI__builtin_neon_vtbx1_v:
2303 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
2305 case ARM::BI__builtin_neon_vtbx2_v:
2306 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
2308 case ARM::BI__builtin_neon_vtbx3_v:
2309 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
2311 case ARM::BI__builtin_neon_vtbx4_v:
2312 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
2314 case ARM::BI__builtin_neon_vtst_v:
2315 case ARM::BI__builtin_neon_vtstq_v: {
2316 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2317 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2318 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
2319 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
2320 ConstantAggregateZero::get(Ty));
2321 return Builder.CreateSExt(Ops[0], Ty, "vtst");
2323 case ARM::BI__builtin_neon_vtrn_v:
2324 case ARM::BI__builtin_neon_vtrnq_v: {
2325 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
2326 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2327 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2330 for (unsigned vi = 0; vi != 2; ++vi) {
2331 SmallVector<Constant*, 16> Indices;
2332 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
2333 Indices.push_back(Builder.getInt32(i+vi));
2334 Indices.push_back(Builder.getInt32(i+e+vi));
2336 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
2337 SV = llvm::ConstantVector::get(Indices);
2338 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
2339 SV = Builder.CreateStore(SV, Addr);
2343 case ARM::BI__builtin_neon_vuzp_v:
2344 case ARM::BI__builtin_neon_vuzpq_v: {
2345 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
2346 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2347 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2350 for (unsigned vi = 0; vi != 2; ++vi) {
2351 SmallVector<Constant*, 16> Indices;
2352 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
2353 Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi));
2355 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
2356 SV = llvm::ConstantVector::get(Indices);
2357 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
2358 SV = Builder.CreateStore(SV, Addr);
2362 case ARM::BI__builtin_neon_vzip_v:
2363 case ARM::BI__builtin_neon_vzipq_v: {
2364 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
2365 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2366 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2369 for (unsigned vi = 0; vi != 2; ++vi) {
2370 SmallVector<Constant*, 16> Indices;
2371 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
2372 Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1));
2373 Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e));
2375 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
2376 SV = llvm::ConstantVector::get(Indices);
2377 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
2378 SV = Builder.CreateStore(SV, Addr);
2385 llvm::Value *CodeGenFunction::
2386 BuildVector(ArrayRef<llvm::Value*> Ops) {
2387 assert((Ops.size() & (Ops.size() - 1)) == 0 &&
2388 "Not a power-of-two sized vector!");
2389 bool AllConstants = true;
2390 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
2391 AllConstants &= isa<Constant>(Ops[i]);
2393 // If this is a constant vector, create a ConstantVector.
2395 SmallVector<llvm::Constant*, 16> CstOps;
2396 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
2397 CstOps.push_back(cast<Constant>(Ops[i]));
2398 return llvm::ConstantVector::get(CstOps);
2401 // Otherwise, insertelement the values to build the vector.
2403 llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
2405 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
2406 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
2411 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
2412 const CallExpr *E) {
2413 SmallVector<Value*, 4> Ops;
2415 // Find out if any arguments are required to be integer constant expressions.
2416 unsigned ICEArguments = 0;
2417 ASTContext::GetBuiltinTypeError Error;
2418 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
2419 assert(Error == ASTContext::GE_None && "Should not codegen an error");
2421 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
2422 // If this is a normal argument, just emit it as a scalar.
2423 if ((ICEArguments & (1 << i)) == 0) {
2424 Ops.push_back(EmitScalarExpr(E->getArg(i)));
2428 // If this is required to be a constant, constant fold it so that we know
2429 // that the generated intrinsic gets a ConstantInt.
2430 llvm::APSInt Result;
2431 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
2432 assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
2433 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
2436 switch (BuiltinID) {
2438 case X86::BI__builtin_ia32_vec_init_v8qi:
2439 case X86::BI__builtin_ia32_vec_init_v4hi:
2440 case X86::BI__builtin_ia32_vec_init_v2si:
2441 return Builder.CreateBitCast(BuildVector(Ops),
2442 llvm::Type::getX86_MMXTy(getLLVMContext()));
2443 case X86::BI__builtin_ia32_vec_ext_v2si:
2444 return Builder.CreateExtractElement(Ops[0],
2445 llvm::ConstantInt::get(Ops[1]->getType(), 0));
2446 case X86::BI__builtin_ia32_ldmxcsr: {
2447 llvm::Type *PtrTy = Int8PtrTy;
2448 Value *One = llvm::ConstantInt::get(Int32Ty, 1);
2449 Value *Tmp = Builder.CreateAlloca(Int32Ty, One);
2450 Builder.CreateStore(Ops[0], Tmp);
2451 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
2452 Builder.CreateBitCast(Tmp, PtrTy));
2454 case X86::BI__builtin_ia32_stmxcsr: {
2455 llvm::Type *PtrTy = Int8PtrTy;
2456 Value *One = llvm::ConstantInt::get(Int32Ty, 1);
2457 Value *Tmp = Builder.CreateAlloca(Int32Ty, One);
2458 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
2459 Builder.CreateBitCast(Tmp, PtrTy));
2460 return Builder.CreateLoad(Tmp, "stmxcsr");
2462 case X86::BI__builtin_ia32_storehps:
2463 case X86::BI__builtin_ia32_storelps: {
2464 llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
2465 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
2468 Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
2471 unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
2472 llvm::Value *Idx = llvm::ConstantInt::get(Int32Ty, Index);
2473 Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
2475 // cast pointer to i64 & store
2476 Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
2477 return Builder.CreateStore(Ops[1], Ops[0]);
2479 case X86::BI__builtin_ia32_palignr: {
2480 unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
2482 // If palignr is shifting the pair of input vectors less than 9 bytes,
2483 // emit a shuffle instruction.
2484 if (shiftVal <= 8) {
2485 SmallVector<llvm::Constant*, 8> Indices;
2486 for (unsigned i = 0; i != 8; ++i)
2487 Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i));
2489 Value* SV = llvm::ConstantVector::get(Indices);
2490 return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
2493 // If palignr is shifting the pair of input vectors more than 8 but less
2494 // than 16 bytes, emit a logical right shift of the destination.
2495 if (shiftVal < 16) {
2496 // MMX has these as 1 x i64 vectors for some odd optimization reasons.
2497 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 1);
2499 Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
2500 Ops[1] = llvm::ConstantInt::get(VecTy, (shiftVal-8) * 8);
2502 // create i32 constant
2503 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_mmx_psrl_q);
2504 return Builder.CreateCall(F, makeArrayRef(&Ops[0], 2), "palignr");
2507 // If palignr is shifting the pair of vectors more than 16 bytes, emit zero.
2508 return llvm::Constant::getNullValue(ConvertType(E->getType()));
2510 case X86::BI__builtin_ia32_palignr128: {
2511 unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
2513 // If palignr is shifting the pair of input vectors less than 17 bytes,
2514 // emit a shuffle instruction.
2515 if (shiftVal <= 16) {
2516 SmallVector<llvm::Constant*, 16> Indices;
2517 for (unsigned i = 0; i != 16; ++i)
2518 Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i));
2520 Value* SV = llvm::ConstantVector::get(Indices);
2521 return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
2524 // If palignr is shifting the pair of input vectors more than 16 but less
2525 // than 32 bytes, emit a logical right shift of the destination.
2526 if (shiftVal < 32) {
2527 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
2529 Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
2530 Ops[1] = llvm::ConstantInt::get(Int32Ty, (shiftVal-16) * 8);
2532 // create i32 constant
2533 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_psrl_dq);
2534 return Builder.CreateCall(F, makeArrayRef(&Ops[0], 2), "palignr");
2537 // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
2538 return llvm::Constant::getNullValue(ConvertType(E->getType()));
2540 case X86::BI__builtin_ia32_palignr256: {
2541 unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
2543 // If palignr is shifting the pair of input vectors less than 17 bytes,
2544 // emit a shuffle instruction.
2545 if (shiftVal <= 16) {
2546 SmallVector<llvm::Constant*, 32> Indices;
2547 // 256-bit palignr operates on 128-bit lanes so we need to handle that
2548 for (unsigned l = 0; l != 2; ++l) {
2549 unsigned LaneStart = l * 16;
2550 unsigned LaneEnd = (l+1) * 16;
2551 for (unsigned i = 0; i != 16; ++i) {
2552 unsigned Idx = shiftVal + i + LaneStart;
2553 if (Idx >= LaneEnd) Idx += 16; // end of lane, switch operand
2554 Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx));
2558 Value* SV = llvm::ConstantVector::get(Indices);
2559 return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
2562 // If palignr is shifting the pair of input vectors more than 16 but less
2563 // than 32 bytes, emit a logical right shift of the destination.
2564 if (shiftVal < 32) {
2565 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 4);
2567 Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
2568 Ops[1] = llvm::ConstantInt::get(Int32Ty, (shiftVal-16) * 8);
2570 // create i32 constant
2571 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_avx2_psrl_dq);
2572 return Builder.CreateCall(F, makeArrayRef(&Ops[0], 2), "palignr");
2575 // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
2576 return llvm::Constant::getNullValue(ConvertType(E->getType()));
2578 case X86::BI__builtin_ia32_movntps:
2579 case X86::BI__builtin_ia32_movntps256:
2580 case X86::BI__builtin_ia32_movntpd:
2581 case X86::BI__builtin_ia32_movntpd256:
2582 case X86::BI__builtin_ia32_movntdq:
2583 case X86::BI__builtin_ia32_movntdq256:
2584 case X86::BI__builtin_ia32_movnti: {
2585 llvm::MDNode *Node = llvm::MDNode::get(getLLVMContext(),
2586 Builder.getInt32(1));
2588 // Convert the type of the pointer to a pointer to the stored type.
2589 Value *BC = Builder.CreateBitCast(Ops[0],
2590 llvm::PointerType::getUnqual(Ops[1]->getType()),
2592 StoreInst *SI = Builder.CreateStore(Ops[1], BC);
2593 SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
2594 SI->setAlignment(16);
2598 case X86::BI__builtin_ia32_pswapdsf:
2599 case X86::BI__builtin_ia32_pswapdsi: {
2600 const char *name = 0;
2601 Intrinsic::ID ID = Intrinsic::not_intrinsic;
2603 default: llvm_unreachable("Unsupported intrinsic!");
2604 case X86::BI__builtin_ia32_pswapdsf:
2605 case X86::BI__builtin_ia32_pswapdsi:
2607 ID = Intrinsic::x86_3dnowa_pswapd;
2610 llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
2611 Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
2612 llvm::Function *F = CGM.getIntrinsic(ID);
2613 return Builder.CreateCall(F, Ops, name);
2615 case X86::BI__builtin_ia32_rdrand16_step:
2616 case X86::BI__builtin_ia32_rdrand32_step:
2617 case X86::BI__builtin_ia32_rdrand64_step: {
2619 switch (BuiltinID) {
2620 default: llvm_unreachable("Unsupported intrinsic!");
2621 case X86::BI__builtin_ia32_rdrand16_step:
2622 ID = Intrinsic::x86_rdrand_16;
2624 case X86::BI__builtin_ia32_rdrand32_step:
2625 ID = Intrinsic::x86_rdrand_32;
2627 case X86::BI__builtin_ia32_rdrand64_step:
2628 ID = Intrinsic::x86_rdrand_64;
2632 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
2633 Builder.CreateStore(Builder.CreateExtractValue(Call, 0), Ops[0]);
2634 return Builder.CreateExtractValue(Call, 1);
2640 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
2641 const CallExpr *E) {
2642 SmallVector<Value*, 4> Ops;
2644 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
2645 Ops.push_back(EmitScalarExpr(E->getArg(i)));
2647 Intrinsic::ID ID = Intrinsic::not_intrinsic;
2649 switch (BuiltinID) {
2652 // vec_ld, vec_lvsl, vec_lvsr
2653 case PPC::BI__builtin_altivec_lvx:
2654 case PPC::BI__builtin_altivec_lvxl:
2655 case PPC::BI__builtin_altivec_lvebx:
2656 case PPC::BI__builtin_altivec_lvehx:
2657 case PPC::BI__builtin_altivec_lvewx:
2658 case PPC::BI__builtin_altivec_lvsl:
2659 case PPC::BI__builtin_altivec_lvsr:
2661 Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
2663 Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
2666 switch (BuiltinID) {
2667 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
2668 case PPC::BI__builtin_altivec_lvx:
2669 ID = Intrinsic::ppc_altivec_lvx;
2671 case PPC::BI__builtin_altivec_lvxl:
2672 ID = Intrinsic::ppc_altivec_lvxl;
2674 case PPC::BI__builtin_altivec_lvebx:
2675 ID = Intrinsic::ppc_altivec_lvebx;
2677 case PPC::BI__builtin_altivec_lvehx:
2678 ID = Intrinsic::ppc_altivec_lvehx;
2680 case PPC::BI__builtin_altivec_lvewx:
2681 ID = Intrinsic::ppc_altivec_lvewx;
2683 case PPC::BI__builtin_altivec_lvsl:
2684 ID = Intrinsic::ppc_altivec_lvsl;
2686 case PPC::BI__builtin_altivec_lvsr:
2687 ID = Intrinsic::ppc_altivec_lvsr;
2690 llvm::Function *F = CGM.getIntrinsic(ID);
2691 return Builder.CreateCall(F, Ops, "");
2695 case PPC::BI__builtin_altivec_stvx:
2696 case PPC::BI__builtin_altivec_stvxl:
2697 case PPC::BI__builtin_altivec_stvebx:
2698 case PPC::BI__builtin_altivec_stvehx:
2699 case PPC::BI__builtin_altivec_stvewx:
2701 Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
2702 Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
2705 switch (BuiltinID) {
2706 default: llvm_unreachable("Unsupported st intrinsic!");
2707 case PPC::BI__builtin_altivec_stvx:
2708 ID = Intrinsic::ppc_altivec_stvx;
2710 case PPC::BI__builtin_altivec_stvxl:
2711 ID = Intrinsic::ppc_altivec_stvxl;
2713 case PPC::BI__builtin_altivec_stvebx:
2714 ID = Intrinsic::ppc_altivec_stvebx;
2716 case PPC::BI__builtin_altivec_stvehx:
2717 ID = Intrinsic::ppc_altivec_stvehx;
2719 case PPC::BI__builtin_altivec_stvewx:
2720 ID = Intrinsic::ppc_altivec_stvewx;
2723 llvm::Function *F = CGM.getIntrinsic(ID);
2724 return Builder.CreateCall(F, Ops, "");