1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
2 // The LLVM Compiler Infrastructure
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
7 //===----------------------------------------------------------------------===//
9 // This file implements the SPUTargetLowering class.
11 //===----------------------------------------------------------------------===//
13 #include "SPUISelLowering.h"
14 #include "SPUTargetMachine.h"
15 #include "SPUFrameLowering.h"
16 #include "SPUMachineFunction.h"
17 #include "llvm/Constants.h"
18 #include "llvm/Function.h"
19 #include "llvm/Intrinsics.h"
20 #include "llvm/CallingConv.h"
21 #include "llvm/Type.h"
22 #include "llvm/CodeGen/CallingConvLower.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/SelectionDAG.h"
28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
29 #include "llvm/Target/TargetOptions.h"
30 #include "llvm/ADT/VectorExtras.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/MathExtras.h"
34 #include "llvm/Support/raw_ostream.h"
39 // Used in getTargetNodeName() below
41 std::map<unsigned, const char *> node_names;
43 // Byte offset of the preferred slot (counted from the MSB)
44 int prefslotOffset(EVT VT) {
46 if (VT==MVT::i1) retval=3;
47 if (VT==MVT::i8) retval=3;
48 if (VT==MVT::i16) retval=2;
53 //! Expand a library call into an actual call DAG node
56 This code is taken from SelectionDAGLegalize, since it is not exposed as
57 part of the LLVM SelectionDAG API.
61 ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
62 bool isSigned, SDValue &Hi, const SPUTargetLowering &TLI) {
63 // The input chain to this libcall is the entry node of the function.
64 // Legalizing the call will automatically add the previous call to the
66 SDValue InChain = DAG.getEntryNode();
68 TargetLowering::ArgListTy Args;
69 TargetLowering::ArgListEntry Entry;
70 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
71 EVT ArgVT = Op.getOperand(i).getValueType();
72 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
73 Entry.Node = Op.getOperand(i);
75 Entry.isSExt = isSigned;
76 Entry.isZExt = !isSigned;
77 Args.push_back(Entry);
79 SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
82 // Splice the libcall in wherever FindInputOutputChains tells us to.
84 Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
85 std::pair<SDValue, SDValue> CallInfo =
86 TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
87 0, TLI.getLibcallCallingConv(LC), false,
88 /*isReturnValueUsed=*/true,
89 Callee, Args, DAG, Op.getDebugLoc());
91 return CallInfo.first;
95 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
96 : TargetLowering(TM, new TargetLoweringObjectFileELF()),
99 // Use _setjmp/_longjmp instead of setjmp/longjmp.
100 setUseUnderscoreSetJmp(true);
101 setUseUnderscoreLongJmp(true);
103 // Set RTLIB libcall names as used by SPU:
104 setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
106 // Set up the SPU's register classes:
107 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
108 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
109 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
110 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
111 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
112 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
113 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
115 // SPU has no sign or zero extended loads for i1, i8, i16:
116 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
117 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
118 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
120 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
121 setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
123 setTruncStoreAction(MVT::i128, MVT::i64, Expand);
124 setTruncStoreAction(MVT::i128, MVT::i32, Expand);
125 setTruncStoreAction(MVT::i128, MVT::i16, Expand);
126 setTruncStoreAction(MVT::i128, MVT::i8, Expand);
128 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
130 // SPU constant load actions are custom lowered:
131 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
132 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
134 // SPU's loads and stores have to be custom lowered:
135 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
137 MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
139 setOperationAction(ISD::LOAD, VT, Custom);
140 setOperationAction(ISD::STORE, VT, Custom);
141 setLoadExtAction(ISD::EXTLOAD, VT, Custom);
142 setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
143 setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
145 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
146 MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
147 setTruncStoreAction(VT, StoreVT, Expand);
151 for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
153 MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype;
155 setOperationAction(ISD::LOAD, VT, Custom);
156 setOperationAction(ISD::STORE, VT, Custom);
158 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
159 MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
160 setTruncStoreAction(VT, StoreVT, Expand);
164 // Expand the jumptable branches
165 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
166 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
168 // Custom lower SELECT_CC for most cases, but expand by default
169 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
170 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
171 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
172 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
173 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
175 // SPU has no intrinsics for these particular operations:
176 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
177 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
179 // SPU has no division/remainder instructions
180 setOperationAction(ISD::SREM, MVT::i8, Expand);
181 setOperationAction(ISD::UREM, MVT::i8, Expand);
182 setOperationAction(ISD::SDIV, MVT::i8, Expand);
183 setOperationAction(ISD::UDIV, MVT::i8, Expand);
184 setOperationAction(ISD::SDIVREM, MVT::i8, Expand);
185 setOperationAction(ISD::UDIVREM, MVT::i8, Expand);
186 setOperationAction(ISD::SREM, MVT::i16, Expand);
187 setOperationAction(ISD::UREM, MVT::i16, Expand);
188 setOperationAction(ISD::SDIV, MVT::i16, Expand);
189 setOperationAction(ISD::UDIV, MVT::i16, Expand);
190 setOperationAction(ISD::SDIVREM, MVT::i16, Expand);
191 setOperationAction(ISD::UDIVREM, MVT::i16, Expand);
192 setOperationAction(ISD::SREM, MVT::i32, Expand);
193 setOperationAction(ISD::UREM, MVT::i32, Expand);
194 setOperationAction(ISD::SDIV, MVT::i32, Expand);
195 setOperationAction(ISD::UDIV, MVT::i32, Expand);
196 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
197 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
198 setOperationAction(ISD::SREM, MVT::i64, Expand);
199 setOperationAction(ISD::UREM, MVT::i64, Expand);
200 setOperationAction(ISD::SDIV, MVT::i64, Expand);
201 setOperationAction(ISD::UDIV, MVT::i64, Expand);
202 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
203 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
204 setOperationAction(ISD::SREM, MVT::i128, Expand);
205 setOperationAction(ISD::UREM, MVT::i128, Expand);
206 setOperationAction(ISD::SDIV, MVT::i128, Expand);
207 setOperationAction(ISD::UDIV, MVT::i128, Expand);
208 setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
209 setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
211 // We don't support sin/cos/sqrt/fmod
212 setOperationAction(ISD::FSIN , MVT::f64, Expand);
213 setOperationAction(ISD::FCOS , MVT::f64, Expand);
214 setOperationAction(ISD::FREM , MVT::f64, Expand);
215 setOperationAction(ISD::FSIN , MVT::f32, Expand);
216 setOperationAction(ISD::FCOS , MVT::f32, Expand);
217 setOperationAction(ISD::FREM , MVT::f32, Expand);
219 // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
221 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
222 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
224 setOperationAction(ISD::FMA, MVT::f64, Expand);
225 setOperationAction(ISD::FMA, MVT::f32, Expand);
227 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
228 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
230 // SPU can do rotate right and left, so legalize it... but customize for i8
231 // because instructions don't exist.
233 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
235 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
236 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
237 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
239 setOperationAction(ISD::ROTL, MVT::i32, Legal);
240 setOperationAction(ISD::ROTL, MVT::i16, Legal);
241 setOperationAction(ISD::ROTL, MVT::i8, Custom);
243 // SPU has no native version of shift left/right for i8
244 setOperationAction(ISD::SHL, MVT::i8, Custom);
245 setOperationAction(ISD::SRL, MVT::i8, Custom);
246 setOperationAction(ISD::SRA, MVT::i8, Custom);
248 // Make these operations legal and handle them during instruction selection:
249 setOperationAction(ISD::SHL, MVT::i64, Legal);
250 setOperationAction(ISD::SRL, MVT::i64, Legal);
251 setOperationAction(ISD::SRA, MVT::i64, Legal);
253 // Custom lower i8, i32 and i64 multiplications
254 setOperationAction(ISD::MUL, MVT::i8, Custom);
255 setOperationAction(ISD::MUL, MVT::i32, Legal);
256 setOperationAction(ISD::MUL, MVT::i64, Legal);
258 // Expand double-width multiplication
259 // FIXME: It would probably be reasonable to support some of these operations
260 setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand);
261 setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand);
262 setOperationAction(ISD::MULHU, MVT::i8, Expand);
263 setOperationAction(ISD::MULHS, MVT::i8, Expand);
264 setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
265 setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
266 setOperationAction(ISD::MULHU, MVT::i16, Expand);
267 setOperationAction(ISD::MULHS, MVT::i16, Expand);
268 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
269 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
270 setOperationAction(ISD::MULHU, MVT::i32, Expand);
271 setOperationAction(ISD::MULHS, MVT::i32, Expand);
272 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
273 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
274 setOperationAction(ISD::MULHU, MVT::i64, Expand);
275 setOperationAction(ISD::MULHS, MVT::i64, Expand);
277 // Need to custom handle (some) common i8, i64 math ops
278 setOperationAction(ISD::ADD, MVT::i8, Custom);
279 setOperationAction(ISD::ADD, MVT::i64, Legal);
280 setOperationAction(ISD::SUB, MVT::i8, Custom);
281 setOperationAction(ISD::SUB, MVT::i64, Legal);
283 // SPU does not have BSWAP. It does have i32 support CTLZ.
284 // CTPOP has to be custom lowered.
285 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
286 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
288 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
289 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
290 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
291 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
292 setOperationAction(ISD::CTPOP, MVT::i128, Expand);
294 setOperationAction(ISD::CTTZ , MVT::i8, Expand);
295 setOperationAction(ISD::CTTZ , MVT::i16, Expand);
296 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
297 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
298 setOperationAction(ISD::CTTZ , MVT::i128, Expand);
300 setOperationAction(ISD::CTLZ , MVT::i8, Promote);
301 setOperationAction(ISD::CTLZ , MVT::i16, Promote);
302 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
303 setOperationAction(ISD::CTLZ , MVT::i64, Expand);
304 setOperationAction(ISD::CTLZ , MVT::i128, Expand);
306 // SPU has a version of select that implements (a&~c)|(b&c), just like
307 // select ought to work:
308 setOperationAction(ISD::SELECT, MVT::i8, Legal);
309 setOperationAction(ISD::SELECT, MVT::i16, Legal);
310 setOperationAction(ISD::SELECT, MVT::i32, Legal);
311 setOperationAction(ISD::SELECT, MVT::i64, Legal);
313 setOperationAction(ISD::SETCC, MVT::i8, Legal);
314 setOperationAction(ISD::SETCC, MVT::i16, Legal);
315 setOperationAction(ISD::SETCC, MVT::i32, Legal);
316 setOperationAction(ISD::SETCC, MVT::i64, Legal);
317 setOperationAction(ISD::SETCC, MVT::f64, Custom);
319 // Custom lower i128 -> i64 truncates
320 setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
322 // Custom lower i32/i64 -> i128 sign extend
323 setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom);
325 setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
326 setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
327 setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
328 setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
329 // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
330 // to expand to a libcall, hence the custom lowering:
331 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
332 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
333 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
334 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
335 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
336 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
338 // FDIV on SPU requires custom lowering
339 setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall
341 // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
342 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
343 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
344 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
345 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
346 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
347 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
348 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
349 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
351 setOperationAction(ISD::BITCAST, MVT::i32, Legal);
352 setOperationAction(ISD::BITCAST, MVT::f32, Legal);
353 setOperationAction(ISD::BITCAST, MVT::i64, Legal);
354 setOperationAction(ISD::BITCAST, MVT::f64, Legal);
356 // We cannot sextinreg(i1). Expand to shifts.
357 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
359 // We want to legalize GlobalAddress and ConstantPool nodes into the
360 // appropriate instructions to materialize the address.
361 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
363 MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
365 setOperationAction(ISD::GlobalAddress, VT, Custom);
366 setOperationAction(ISD::ConstantPool, VT, Custom);
367 setOperationAction(ISD::JumpTable, VT, Custom);
370 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
371 setOperationAction(ISD::VASTART , MVT::Other, Custom);
373 // Use the default implementation.
374 setOperationAction(ISD::VAARG , MVT::Other, Expand);
375 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
376 setOperationAction(ISD::VAEND , MVT::Other, Expand);
377 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
378 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
379 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
380 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
382 // Cell SPU has instructions for converting between i64 and fp.
383 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
384 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
386 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
387 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
389 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
390 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
392 // First set operation action for all vector types to expand. Then we
393 // will selectively turn on ones that can be effectively codegen'd.
394 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
395 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
396 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
397 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
398 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
399 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
401 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
402 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
403 MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
405 // Set operation actions to legal types only.
406 if (!isTypeLegal(VT)) continue;
408 // add/sub are legal for all supported vector VT's.
409 setOperationAction(ISD::ADD, VT, Legal);
410 setOperationAction(ISD::SUB, VT, Legal);
411 // mul has to be custom lowered.
412 setOperationAction(ISD::MUL, VT, Legal);
414 setOperationAction(ISD::AND, VT, Legal);
415 setOperationAction(ISD::OR, VT, Legal);
416 setOperationAction(ISD::XOR, VT, Legal);
417 setOperationAction(ISD::LOAD, VT, Custom);
418 setOperationAction(ISD::SELECT, VT, Legal);
419 setOperationAction(ISD::STORE, VT, Custom);
421 // These operations need to be expanded:
422 setOperationAction(ISD::SDIV, VT, Expand);
423 setOperationAction(ISD::SREM, VT, Expand);
424 setOperationAction(ISD::UDIV, VT, Expand);
425 setOperationAction(ISD::UREM, VT, Expand);
427 // Custom lower build_vector, constant pool spills, insert and
428 // extract vector elements:
429 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
430 setOperationAction(ISD::ConstantPool, VT, Custom);
431 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
432 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
433 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
434 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
437 setOperationAction(ISD::AND, MVT::v16i8, Custom);
438 setOperationAction(ISD::OR, MVT::v16i8, Custom);
439 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
440 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
442 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
444 setBooleanContents(ZeroOrNegativeOneBooleanContent);
445 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); // FIXME: Is this correct?
447 setStackPointerRegisterToSaveRestore(SPU::R1);
449 // We have target-specific dag combine patterns for the following nodes:
450 setTargetDAGCombine(ISD::ADD);
451 setTargetDAGCombine(ISD::ZERO_EXTEND);
452 setTargetDAGCombine(ISD::SIGN_EXTEND);
453 setTargetDAGCombine(ISD::ANY_EXTEND);
455 setMinFunctionAlignment(3);
457 computeRegisterProperties();
459 // Set pre-RA register scheduler default to BURR, which produces slightly
460 // better code than the default (could also be TDRR, but TargetLowering.h
461 // needs a mod to support that model):
462 setSchedulingPreference(Sched::RegPressure);
466 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
468 if (node_names.empty()) {
469 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
470 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
471 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
472 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
473 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
474 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
475 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
476 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
477 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
478 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
479 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
480 node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
481 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
482 node_names[(unsigned) SPUISD::SHL_BITS] = "SPUISD::SHL_BITS";
483 node_names[(unsigned) SPUISD::SHL_BYTES] = "SPUISD::SHL_BYTES";
484 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
485 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
486 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
487 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
488 "SPUISD::ROTBYTES_LEFT_BITS";
489 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
490 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
491 node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
492 node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
493 node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
496 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
498 return ((i != node_names.end()) ? i->second : 0);
501 //===----------------------------------------------------------------------===//
502 // Return the Cell SPU's SETCC result type
503 //===----------------------------------------------------------------------===//
505 EVT SPUTargetLowering::getSetCCResultType(EVT VT) const {
506 // i8, i16 and i32 are valid SETCC result types
507 MVT::SimpleValueType retval;
509 switch(VT.getSimpleVT().SimpleTy){
512 retval = MVT::i8; break;
514 retval = MVT::i16; break;
522 //===----------------------------------------------------------------------===//
523 // Calling convention code:
524 //===----------------------------------------------------------------------===//
526 #include "SPUGenCallingConv.inc"
528 //===----------------------------------------------------------------------===//
529 // LowerOperation implementation
530 //===----------------------------------------------------------------------===//
532 /// Custom lower loads for CellSPU
534 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
535 within a 16-byte block, we have to rotate to extract the requested element.
537 For extending loads, we also want to ensure that the following sequence is
538 emitted, e.g. for MVT::f32 extending load to MVT::f64:
542 %2 v16i8,ch = rotate %1
543 %3 v4f8, ch = bitconvert %2
544 %4 f32 = vec2perfslot %3
545 %5 f64 = fp_extend %4
549 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
550 LoadSDNode *LN = cast<LoadSDNode>(Op);
551 SDValue the_chain = LN->getChain();
552 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
553 EVT InVT = LN->getMemoryVT();
554 EVT OutVT = Op.getValueType();
555 ISD::LoadExtType ExtType = LN->getExtensionType();
556 unsigned alignment = LN->getAlignment();
557 int pso = prefslotOffset(InVT);
558 DebugLoc dl = Op.getDebugLoc();
559 EVT vecVT = InVT.isVector()? InVT: EVT::getVectorVT(*DAG.getContext(), InVT,
560 (128 / InVT.getSizeInBits()));
563 assert( LN->getAddressingMode() == ISD::UNINDEXED
564 && "we should get only UNINDEXED adresses");
565 // clean aligned loads can be selected as-is
566 if (InVT.getSizeInBits() == 128 && (alignment%16) == 0)
569 // Get pointerinfos to the memory chunk(s) that contain the data to load
570 uint64_t mpi_offset = LN->getPointerInfo().Offset;
571 mpi_offset -= mpi_offset%16;
572 MachinePointerInfo lowMemPtr(LN->getPointerInfo().V, mpi_offset);
573 MachinePointerInfo highMemPtr(LN->getPointerInfo().V, mpi_offset+16);
576 SDValue basePtr = LN->getBasePtr();
579 if ((alignment%16) == 0) {
582 // Special cases for a known aligned load to simplify the base pointer
583 // and the rotation amount:
584 if (basePtr.getOpcode() == ISD::ADD
585 && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
586 // Known offset into basePtr
587 int64_t offset = CN->getSExtValue();
588 int64_t rotamt = int64_t((offset & 0xf) - pso);
593 rotate = DAG.getConstant(rotamt, MVT::i16);
595 // Simplify the base pointer for this case:
596 basePtr = basePtr.getOperand(0);
597 if ((offset & ~0xf) > 0) {
598 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
600 DAG.getConstant((offset & ~0xf), PtrVT));
602 } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
603 || (basePtr.getOpcode() == SPUISD::IndirectAddr
604 && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
605 && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
606 // Plain aligned a-form address: rotate into preferred slot
607 // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
608 int64_t rotamt = -pso;
611 rotate = DAG.getConstant(rotamt, MVT::i16);
613 // Offset the rotate amount by the basePtr and the preferred slot
615 int64_t rotamt = -pso;
618 rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
620 DAG.getConstant(rotamt, PtrVT));
623 // Unaligned load: must be more pessimistic about addressing modes:
624 if (basePtr.getOpcode() == ISD::ADD) {
625 MachineFunction &MF = DAG.getMachineFunction();
626 MachineRegisterInfo &RegInfo = MF.getRegInfo();
627 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
630 SDValue Op0 = basePtr.getOperand(0);
631 SDValue Op1 = basePtr.getOperand(1);
633 if (isa<ConstantSDNode>(Op1)) {
634 // Convert the (add <ptr>, <const>) to an indirect address contained
635 // in a register. Note that this is done because we need to avoid
636 // creating a 0(reg) d-form address due to the SPU's block loads.
637 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
638 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
639 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
641 // Convert the (add <arg1>, <arg2>) to an indirect address, which
642 // will likely be lowered as a reg(reg) x-form address.
643 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
646 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
648 DAG.getConstant(0, PtrVT));
651 // Offset the rotate amount by the basePtr and the preferred slot
653 rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
655 DAG.getConstant(-pso, PtrVT));
658 // Do the load as a i128 to allow possible shifting
659 SDValue low = DAG.getLoad(MVT::i128, dl, the_chain, basePtr,
661 LN->isVolatile(), LN->isNonTemporal(), 16);
663 // When the size is not greater than alignment we get all data with just
665 if (alignment >= InVT.getSizeInBits()/8) {
667 the_chain = low.getValue(1);
669 // Rotate into the preferred slot:
670 result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::i128,
671 low.getValue(0), rotate);
673 // Convert the loaded v16i8 vector to the appropriate vector type
674 // specified by the operand:
675 EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
676 InVT, (128 / InVT.getSizeInBits()));
677 result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
678 DAG.getNode(ISD::BITCAST, dl, vecVT, result));
680 // When alignment is less than the size, we might need (known only at
681 // run-time) two loads
682 // TODO: if the memory address is composed only from constants, we have
683 // extra kowledge, and might avoid the second load
685 // storage position offset from lower 16 byte aligned memory chunk
686 SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
687 basePtr, DAG.getConstant( 0xf, MVT::i32 ) );
688 // get a registerfull of ones. (this implementation is a workaround: LLVM
689 // cannot handle 128 bit signed int constants)
690 SDValue ones = DAG.getConstant(-1, MVT::v4i32 );
691 ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones);
693 SDValue high = DAG.getLoad(MVT::i128, dl, the_chain,
694 DAG.getNode(ISD::ADD, dl, PtrVT,
696 DAG.getConstant(16, PtrVT)),
698 LN->isVolatile(), LN->isNonTemporal(), 16);
700 the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
703 // Shift the (possible) high part right to compensate the misalignemnt.
704 // if there is no highpart (i.e. value is i64 and offset is 4), this
705 // will zero out the high value.
706 high = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, high,
707 DAG.getNode(ISD::SUB, dl, MVT::i32,
708 DAG.getConstant( 16, MVT::i32),
712 // Shift the low similarly
713 // TODO: add SPUISD::SHL_BYTES
714 low = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, low, offset );
716 // Merge the two parts
717 result = DAG.getNode(ISD::BITCAST, dl, vecVT,
718 DAG.getNode(ISD::OR, dl, MVT::i128, low, high));
720 if (!InVT.isVector()) {
721 result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, result );
725 // Handle extending loads by extending the scalar result:
726 if (ExtType == ISD::SEXTLOAD) {
727 result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
728 } else if (ExtType == ISD::ZEXTLOAD) {
729 result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
730 } else if (ExtType == ISD::EXTLOAD) {
731 unsigned NewOpc = ISD::ANY_EXTEND;
733 if (OutVT.isFloatingPoint())
734 NewOpc = ISD::FP_EXTEND;
736 result = DAG.getNode(NewOpc, dl, OutVT, result);
739 SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
740 SDValue retops[2] = {
745 result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
746 retops, sizeof(retops) / sizeof(retops[0]));
750 /// Custom lower stores for CellSPU
752 All CellSPU stores are aligned to 16-byte boundaries, so for elements
753 within a 16-byte block, we have to generate a shuffle to insert the
754 requested element into its place, then store the resulting block.
757 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
758 StoreSDNode *SN = cast<StoreSDNode>(Op);
759 SDValue Value = SN->getValue();
760 EVT VT = Value.getValueType();
761 EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
762 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
763 DebugLoc dl = Op.getDebugLoc();
764 unsigned alignment = SN->getAlignment();
766 EVT vecVT = StVT.isVector()? StVT: EVT::getVectorVT(*DAG.getContext(), StVT,
767 (128 / StVT.getSizeInBits()));
768 // Get pointerinfos to the memory chunk(s) that contain the data to load
769 uint64_t mpi_offset = SN->getPointerInfo().Offset;
770 mpi_offset -= mpi_offset%16;
771 MachinePointerInfo lowMemPtr(SN->getPointerInfo().V, mpi_offset);
772 MachinePointerInfo highMemPtr(SN->getPointerInfo().V, mpi_offset+16);
776 assert( SN->getAddressingMode() == ISD::UNINDEXED
777 && "we should get only UNINDEXED adresses");
778 // clean aligned loads can be selected as-is
779 if (StVT.getSizeInBits() == 128 && (alignment%16) == 0)
782 SDValue alignLoadVec;
783 SDValue basePtr = SN->getBasePtr();
784 SDValue the_chain = SN->getChain();
785 SDValue insertEltOffs;
787 if ((alignment%16) == 0) {
789 // Special cases for a known aligned load to simplify the base pointer
790 // and insertion byte:
791 if (basePtr.getOpcode() == ISD::ADD
792 && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
793 // Known offset into basePtr
794 int64_t offset = CN->getSExtValue();
796 // Simplify the base pointer for this case:
797 basePtr = basePtr.getOperand(0);
798 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
800 DAG.getConstant((offset & 0xf), PtrVT));
802 if ((offset & ~0xf) > 0) {
803 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
805 DAG.getConstant((offset & ~0xf), PtrVT));
808 // Otherwise, assume it's at byte 0 of basePtr
809 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
811 DAG.getConstant(0, PtrVT));
812 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
814 DAG.getConstant(0, PtrVT));
817 // Unaligned load: must be more pessimistic about addressing modes:
818 if (basePtr.getOpcode() == ISD::ADD) {
819 MachineFunction &MF = DAG.getMachineFunction();
820 MachineRegisterInfo &RegInfo = MF.getRegInfo();
821 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
824 SDValue Op0 = basePtr.getOperand(0);
825 SDValue Op1 = basePtr.getOperand(1);
827 if (isa<ConstantSDNode>(Op1)) {
828 // Convert the (add <ptr>, <const>) to an indirect address contained
829 // in a register. Note that this is done because we need to avoid
830 // creating a 0(reg) d-form address due to the SPU's block loads.
831 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
832 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
833 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
835 // Convert the (add <arg1>, <arg2>) to an indirect address, which
836 // will likely be lowered as a reg(reg) x-form address.
837 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
840 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
842 DAG.getConstant(0, PtrVT));
845 // Insertion point is solely determined by basePtr's contents
846 insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
848 DAG.getConstant(0, PtrVT));
851 // Load the lower part of the memory to which to store.
852 SDValue low = DAG.getLoad(vecVT, dl, the_chain, basePtr,
853 lowMemPtr, SN->isVolatile(), SN->isNonTemporal(), 16);
855 // if we don't need to store over the 16 byte boundary, one store suffices
856 if (alignment >= StVT.getSizeInBits()/8) {
858 the_chain = low.getValue(1);
860 LoadSDNode *LN = cast<LoadSDNode>(low);
861 SDValue theValue = SN->getValue();
864 && (theValue.getOpcode() == ISD::AssertZext
865 || theValue.getOpcode() == ISD::AssertSext)) {
866 // Drill down and get the value for zero- and sign-extended
868 theValue = theValue.getOperand(0);
871 // If the base pointer is already a D-form address, then just create
872 // a new D-form address with a slot offset and the orignal base pointer.
873 // Otherwise generate a D-form address with the slot offset relative
874 // to the stack pointer, which is always aligned.
876 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
877 errs() << "CellSPU LowerSTORE: basePtr = ";
878 basePtr.getNode()->dump(&DAG);
883 SDValue insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT,
885 SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT,
888 result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
890 DAG.getNode(ISD::BITCAST, dl,
891 MVT::v4i32, insertEltOp));
893 result = DAG.getStore(the_chain, dl, result, basePtr,
895 LN->isVolatile(), LN->isNonTemporal(),
899 // do the store when it might cross the 16 byte memory access boundary.
901 // TODO issue a warning if SN->isVolatile()== true? This is likely not
902 // what the user wanted.
904 // address offset from nearest lower 16byte alinged address
905 SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
907 DAG.getConstant(0xf, MVT::i32));
909 SDValue offset_compl = DAG.getNode(ISD::SUB, dl, MVT::i32,
910 DAG.getConstant( 16, MVT::i32),
912 // 16 - sizeof(Value)
913 SDValue surplus = DAG.getNode(ISD::SUB, dl, MVT::i32,
914 DAG.getConstant( 16, MVT::i32),
915 DAG.getConstant( VT.getSizeInBits()/8,
917 // get a registerfull of ones
918 SDValue ones = DAG.getConstant(-1, MVT::v4i32);
919 ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones);
921 // Create the 128 bit masks that have ones where the data to store is
923 SDValue lowmask, himask;
924 // if the value to store don't fill up the an entire 128 bits, zero
925 // out the last bits of the mask so that only the value we want to store
927 // this is e.g. in the case of store i32, align 2
929 Value = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, Value);
930 lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, ones, surplus);
931 lowmask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
933 Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value);
934 Value = DAG.getNode(ISD::AND, dl, MVT::i128, Value, lowmask);
939 Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value);
941 // this will zero, if there are no data that goes to the high quad
942 himask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
944 lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, lowmask,
947 // Load in the old data and zero out the parts that will be overwritten with
948 // the new data to store.
949 SDValue hi = DAG.getLoad(MVT::i128, dl, the_chain,
950 DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
951 DAG.getConstant( 16, PtrVT)),
953 SN->isVolatile(), SN->isNonTemporal(), 16);
954 the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
957 low = DAG.getNode(ISD::AND, dl, MVT::i128,
958 DAG.getNode( ISD::BITCAST, dl, MVT::i128, low),
959 DAG.getNode( ISD::XOR, dl, MVT::i128, lowmask, ones));
960 hi = DAG.getNode(ISD::AND, dl, MVT::i128,
961 DAG.getNode( ISD::BITCAST, dl, MVT::i128, hi),
962 DAG.getNode( ISD::XOR, dl, MVT::i128, himask, ones));
964 // Shift the Value to store into place. rlow contains the parts that go to
965 // the lower memory chunk, rhi has the parts that go to the upper one.
966 SDValue rlow = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, Value, offset);
967 rlow = DAG.getNode(ISD::AND, dl, MVT::i128, rlow, lowmask);
968 SDValue rhi = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, Value,
971 // Merge the old data and the new data and store the results
972 // Need to convert vectors here to integer as 'OR'ing floats assert
973 rlow = DAG.getNode(ISD::OR, dl, MVT::i128,
974 DAG.getNode(ISD::BITCAST, dl, MVT::i128, low),
975 DAG.getNode(ISD::BITCAST, dl, MVT::i128, rlow));
976 rhi = DAG.getNode(ISD::OR, dl, MVT::i128,
977 DAG.getNode(ISD::BITCAST, dl, MVT::i128, hi),
978 DAG.getNode(ISD::BITCAST, dl, MVT::i128, rhi));
980 low = DAG.getStore(the_chain, dl, rlow, basePtr,
982 SN->isVolatile(), SN->isNonTemporal(), 16);
983 hi = DAG.getStore(the_chain, dl, rhi,
984 DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
985 DAG.getConstant( 16, PtrVT)),
987 SN->isVolatile(), SN->isNonTemporal(), 16);
988 result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(0),
995 //! Generate the address of a constant pool entry.
997 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
998 EVT PtrVT = Op.getValueType();
999 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
1000 const Constant *C = CP->getConstVal();
1001 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
1002 SDValue Zero = DAG.getConstant(0, PtrVT);
1003 const TargetMachine &TM = DAG.getTarget();
1004 // FIXME there is no actual debug info here
1005 DebugLoc dl = Op.getDebugLoc();
1007 if (TM.getRelocationModel() == Reloc::Static) {
1008 if (!ST->usingLargeMem()) {
1009 // Just return the SDValue with the constant pool address in it.
1010 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
1012 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
1013 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
1014 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
1018 llvm_unreachable("LowerConstantPool: Relocation model other than static"
1023 //! Alternate entry point for generating the address of a constant pool entry
1025 SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
1026 return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
1030 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1031 EVT PtrVT = Op.getValueType();
1032 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
1033 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
1034 SDValue Zero = DAG.getConstant(0, PtrVT);
1035 const TargetMachine &TM = DAG.getTarget();
1036 // FIXME there is no actual debug info here
1037 DebugLoc dl = Op.getDebugLoc();
1039 if (TM.getRelocationModel() == Reloc::Static) {
1040 if (!ST->usingLargeMem()) {
1041 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
1043 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
1044 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
1045 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
1049 llvm_unreachable("LowerJumpTable: Relocation model other than static"
1055 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1056 EVT PtrVT = Op.getValueType();
1057 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
1058 const GlobalValue *GV = GSDN->getGlobal();
1059 SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(),
1060 PtrVT, GSDN->getOffset());
1061 const TargetMachine &TM = DAG.getTarget();
1062 SDValue Zero = DAG.getConstant(0, PtrVT);
1063 // FIXME there is no actual debug info here
1064 DebugLoc dl = Op.getDebugLoc();
1066 if (TM.getRelocationModel() == Reloc::Static) {
1067 if (!ST->usingLargeMem()) {
1068 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
1070 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
1071 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
1072 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
1075 report_fatal_error("LowerGlobalAddress: Relocation model other than static"
1083 //! Custom lower double precision floating point constants
1085 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
1086 EVT VT = Op.getValueType();
1087 // FIXME there is no actual debug info here
1088 DebugLoc dl = Op.getDebugLoc();
1090 if (VT == MVT::f64) {
1091 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
1094 "LowerConstantFP: Node is not ConstantFPSDNode");
1096 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
1097 SDValue T = DAG.getConstant(dbits, MVT::i64);
1098 SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
1099 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1100 DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Tvec));
1107 SPUTargetLowering::LowerFormalArguments(SDValue Chain,
1108 CallingConv::ID CallConv, bool isVarArg,
1109 const SmallVectorImpl<ISD::InputArg>
1111 DebugLoc dl, SelectionDAG &DAG,
1112 SmallVectorImpl<SDValue> &InVals)
1115 MachineFunction &MF = DAG.getMachineFunction();
1116 MachineFrameInfo *MFI = MF.getFrameInfo();
1117 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1118 SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>();
1120 unsigned ArgOffset = SPUFrameLowering::minStackSize();
1121 unsigned ArgRegIdx = 0;
1122 unsigned StackSlotSize = SPUFrameLowering::stackSlotSize();
1124 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1126 SmallVector<CCValAssign, 16> ArgLocs;
1127 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1128 getTargetMachine(), ArgLocs, *DAG.getContext());
1129 // FIXME: allow for other calling conventions
1130 CCInfo.AnalyzeFormalArguments(Ins, CCC_SPU);
1132 // Add DAG nodes to load the arguments or copy them out of registers.
1133 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
1134 EVT ObjectVT = Ins[ArgNo].VT;
1135 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
1137 CCValAssign &VA = ArgLocs[ArgNo];
1139 if (VA.isRegLoc()) {
1140 const TargetRegisterClass *ArgRegClass;
1142 switch (ObjectVT.getSimpleVT().SimpleTy) {
1144 report_fatal_error("LowerFormalArguments Unhandled argument type: " +
1145 Twine(ObjectVT.getEVTString()));
1147 ArgRegClass = &SPU::R8CRegClass;
1150 ArgRegClass = &SPU::R16CRegClass;
1153 ArgRegClass = &SPU::R32CRegClass;
1156 ArgRegClass = &SPU::R64CRegClass;
1159 ArgRegClass = &SPU::GPRCRegClass;
1162 ArgRegClass = &SPU::R32FPRegClass;
1165 ArgRegClass = &SPU::R64FPRegClass;
1173 ArgRegClass = &SPU::VECREGRegClass;
1177 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1178 RegInfo.addLiveIn(VA.getLocReg(), VReg);
1179 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
1182 // We need to load the argument to a virtual register if we determined
1183 // above that we ran out of physical registers of the appropriate type
1184 // or we're forced to do vararg
1185 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true);
1186 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1187 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
1189 ArgOffset += StackSlotSize;
1192 InVals.push_back(ArgVal);
1194 Chain = ArgVal.getOperand(0);
1199 // FIXME: we should be able to query the argument registers from
1200 // tablegen generated code.
1201 static const unsigned ArgRegs[] = {
1202 SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9,
1203 SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16,
1204 SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23,
1205 SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30,
1206 SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37,
1207 SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44,
1208 SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51,
1209 SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58,
1210 SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65,
1211 SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72,
1212 SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79
1214 // size of ArgRegs array
1215 unsigned NumArgRegs = 77;
1217 // We will spill (79-3)+1 registers to the stack
1218 SmallVector<SDValue, 79-3+1> MemOps;
1220 // Create the frame slot
1221 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1222 FuncInfo->setVarArgsFrameIndex(
1223 MFI->CreateFixedObject(StackSlotSize, ArgOffset, true));
1224 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
1225 unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::VECREGRegClass);
1226 SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8);
1227 SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, MachinePointerInfo(),
1229 Chain = Store.getOperand(0);
1230 MemOps.push_back(Store);
1232 // Increment address by stack slot size for the next stored argument
1233 ArgOffset += StackSlotSize;
1235 if (!MemOps.empty())
1236 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1237 &MemOps[0], MemOps.size());
1243 /// isLSAAddress - Return the immediate to use if the specified
1244 /// value is representable as a LSA address.
1245 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1246 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1249 int Addr = C->getZExtValue();
1250 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1251 (Addr << 14 >> 14) != Addr)
1252 return 0; // Top 14 bits have to be sext of immediate.
1254 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1258 SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1259 CallingConv::ID CallConv, bool isVarArg,
1261 const SmallVectorImpl<ISD::OutputArg> &Outs,
1262 const SmallVectorImpl<SDValue> &OutVals,
1263 const SmallVectorImpl<ISD::InputArg> &Ins,
1264 DebugLoc dl, SelectionDAG &DAG,
1265 SmallVectorImpl<SDValue> &InVals) const {
1266 // CellSPU target does not yet support tail call optimization.
1269 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
1270 unsigned NumOps = Outs.size();
1271 unsigned StackSlotSize = SPUFrameLowering::stackSlotSize();
1273 SmallVector<CCValAssign, 16> ArgLocs;
1274 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1275 getTargetMachine(), ArgLocs, *DAG.getContext());
1276 // FIXME: allow for other calling conventions
1277 CCInfo.AnalyzeCallOperands(Outs, CCC_SPU);
1279 const unsigned NumArgRegs = ArgLocs.size();
1282 // Handy pointer type
1283 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1285 // Set up a copy of the stack pointer for use loading and storing any
1286 // arguments that may not fit in the registers available for argument
1288 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1290 // Figure out which arguments are going to go in registers, and which in
1292 unsigned ArgOffset = SPUFrameLowering::minStackSize(); // Just below [LR]
1293 unsigned ArgRegIdx = 0;
1295 // Keep track of registers passing arguments
1296 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1297 // And the arguments passed on the stack
1298 SmallVector<SDValue, 8> MemOpChains;
1300 for (; ArgRegIdx != NumOps; ++ArgRegIdx) {
1301 SDValue Arg = OutVals[ArgRegIdx];
1302 CCValAssign &VA = ArgLocs[ArgRegIdx];
1304 // PtrOff will be used to store the current argument to the stack if a
1305 // register cannot be found for it.
1306 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1307 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
1309 switch (Arg.getValueType().getSimpleVT().SimpleTy) {
1310 default: llvm_unreachable("Unexpected ValueType for argument!");
1324 if (ArgRegIdx != NumArgRegs) {
1325 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1327 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
1328 MachinePointerInfo(),
1330 ArgOffset += StackSlotSize;
1336 // Accumulate how many bytes are to be pushed on the stack, including the
1337 // linkage area, and parameter passing area. According to the SPU ABI,
1338 // we minimally need space for [LR] and [SP].
1339 unsigned NumStackBytes = ArgOffset - SPUFrameLowering::minStackSize();
1341 // Insert a call sequence start
1342 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1345 if (!MemOpChains.empty()) {
1346 // Adjust the stack pointer for the stack arguments.
1347 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1348 &MemOpChains[0], MemOpChains.size());
1351 // Build a sequence of copy-to-reg nodes chained together with token chain
1352 // and flag operands which copy the outgoing args into the appropriate regs.
1354 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1355 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1356 RegsToPass[i].second, InFlag);
1357 InFlag = Chain.getValue(1);
1360 SmallVector<SDValue, 8> Ops;
1361 unsigned CallOpc = SPUISD::CALL;
1363 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1364 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1365 // node so that legalize doesn't hack it.
1366 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1367 const GlobalValue *GV = G->getGlobal();
1368 EVT CalleeVT = Callee.getValueType();
1369 SDValue Zero = DAG.getConstant(0, PtrVT);
1370 SDValue GA = DAG.getTargetGlobalAddress(GV, dl, CalleeVT);
1372 if (!ST->usingLargeMem()) {
1373 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1374 // style calls, otherwise, external symbols are BRASL calls. This assumes
1375 // that declared/defined symbols are in the same compilation unit and can
1376 // be reached through PC-relative jumps.
1379 // This may be an unsafe assumption for JIT and really large compilation
1381 if (GV->isDeclaration()) {
1382 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
1384 Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
1387 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1389 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
1391 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1392 EVT CalleeVT = Callee.getValueType();
1393 SDValue Zero = DAG.getConstant(0, PtrVT);
1394 SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1395 Callee.getValueType());
1397 if (!ST->usingLargeMem()) {
1398 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
1400 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
1402 } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1403 // If this is an absolute destination address that appears to be a legal
1404 // local store address, use the munged value.
1405 Callee = SDValue(Dest, 0);
1408 Ops.push_back(Chain);
1409 Ops.push_back(Callee);
1411 // Add argument registers to the end of the list so that they are known live
1413 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1414 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1415 RegsToPass[i].second.getValueType()));
1417 if (InFlag.getNode())
1418 Ops.push_back(InFlag);
1419 // Returns a chain and a flag for retval copy to use.
1420 Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Glue),
1421 &Ops[0], Ops.size());
1422 InFlag = Chain.getValue(1);
1424 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1425 DAG.getIntPtrConstant(0, true), InFlag);
1427 InFlag = Chain.getValue(1);
1429 // If the function returns void, just return the chain.
1433 // Now handle the return value(s)
1434 SmallVector<CCValAssign, 16> RVLocs;
1435 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1436 getTargetMachine(), RVLocs, *DAG.getContext());
1437 CCRetInfo.AnalyzeCallResult(Ins, CCC_SPU);
1440 // If the call has results, copy the values out of the ret val registers.
1441 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1442 CCValAssign VA = RVLocs[i];
1444 SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1446 Chain = Val.getValue(1);
1447 InFlag = Val.getValue(2);
1448 InVals.push_back(Val);
1455 SPUTargetLowering::LowerReturn(SDValue Chain,
1456 CallingConv::ID CallConv, bool isVarArg,
1457 const SmallVectorImpl<ISD::OutputArg> &Outs,
1458 const SmallVectorImpl<SDValue> &OutVals,
1459 DebugLoc dl, SelectionDAG &DAG) const {
1461 SmallVector<CCValAssign, 16> RVLocs;
1462 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1463 getTargetMachine(), RVLocs, *DAG.getContext());
1464 CCInfo.AnalyzeReturn(Outs, RetCC_SPU);
1466 // If this is the first return lowered for this function, add the regs to the
1467 // liveout set for the function.
1468 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1469 for (unsigned i = 0; i != RVLocs.size(); ++i)
1470 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1475 // Copy the result values into the output registers.
1476 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1477 CCValAssign &VA = RVLocs[i];
1478 assert(VA.isRegLoc() && "Can only return in registers!");
1479 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1481 Flag = Chain.getValue(1);
1485 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1487 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
1491 //===----------------------------------------------------------------------===//
1492 // Vector related lowering:
1493 //===----------------------------------------------------------------------===//
1495 static ConstantSDNode *
1496 getVecImm(SDNode *N) {
1497 SDValue OpVal(0, 0);
1499 // Check to see if this buildvec has a single non-undef value in its elements.
1500 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1501 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1502 if (OpVal.getNode() == 0)
1503 OpVal = N->getOperand(i);
1504 else if (OpVal != N->getOperand(i))
1508 if (OpVal.getNode() != 0) {
1509 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1517 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1518 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1520 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1522 if (ConstantSDNode *CN = getVecImm(N)) {
1523 uint64_t Value = CN->getZExtValue();
1524 if (ValueType == MVT::i64) {
1525 uint64_t UValue = CN->getZExtValue();
1526 uint32_t upper = uint32_t(UValue >> 32);
1527 uint32_t lower = uint32_t(UValue);
1530 Value = Value >> 32;
1532 if (Value <= 0x3ffff)
1533 return DAG.getTargetConstant(Value, ValueType);
1539 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1540 /// and the value fits into a signed 16-bit constant, and if so, return the
1542 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1544 if (ConstantSDNode *CN = getVecImm(N)) {
1545 int64_t Value = CN->getSExtValue();
1546 if (ValueType == MVT::i64) {
1547 uint64_t UValue = CN->getZExtValue();
1548 uint32_t upper = uint32_t(UValue >> 32);
1549 uint32_t lower = uint32_t(UValue);
1552 Value = Value >> 32;
1554 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1555 return DAG.getTargetConstant(Value, ValueType);
1562 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1563 /// and the value fits into a signed 10-bit constant, and if so, return the
1565 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1567 if (ConstantSDNode *CN = getVecImm(N)) {
1568 int64_t Value = CN->getSExtValue();
1569 if (ValueType == MVT::i64) {
1570 uint64_t UValue = CN->getZExtValue();
1571 uint32_t upper = uint32_t(UValue >> 32);
1572 uint32_t lower = uint32_t(UValue);
1575 Value = Value >> 32;
1577 if (isInt<10>(Value))
1578 return DAG.getTargetConstant(Value, ValueType);
1584 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1585 /// and the value fits into a signed 8-bit constant, and if so, return the
1588 /// @note: The incoming vector is v16i8 because that's the only way we can load
1589 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1591 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1593 if (ConstantSDNode *CN = getVecImm(N)) {
1594 int Value = (int) CN->getZExtValue();
1595 if (ValueType == MVT::i16
1596 && Value <= 0xffff /* truncated from uint64_t */
1597 && ((short) Value >> 8) == ((short) Value & 0xff))
1598 return DAG.getTargetConstant(Value & 0xff, ValueType);
1599 else if (ValueType == MVT::i8
1600 && (Value & 0xff) == Value)
1601 return DAG.getTargetConstant(Value, ValueType);
1607 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1608 /// and the value fits into a signed 16-bit constant, and if so, return the
1610 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1612 if (ConstantSDNode *CN = getVecImm(N)) {
1613 uint64_t Value = CN->getZExtValue();
1614 if ((ValueType == MVT::i32
1615 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1616 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1617 return DAG.getTargetConstant(Value >> 16, ValueType);
1623 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1624 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1625 if (ConstantSDNode *CN = getVecImm(N)) {
1626 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1632 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1633 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1634 if (ConstantSDNode *CN = getVecImm(N)) {
1635 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1641 //! Lower a BUILD_VECTOR instruction creatively:
1643 LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1644 EVT VT = Op.getValueType();
1645 EVT EltVT = VT.getVectorElementType();
1646 DebugLoc dl = Op.getDebugLoc();
1647 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
1648 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
1649 unsigned minSplatBits = EltVT.getSizeInBits();
1651 if (minSplatBits < 16)
1654 APInt APSplatBits, APSplatUndef;
1655 unsigned SplatBitSize;
1658 if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
1659 HasAnyUndefs, minSplatBits)
1660 || minSplatBits < SplatBitSize)
1661 return SDValue(); // Wasn't a constant vector or splat exceeded min
1663 uint64_t SplatBits = APSplatBits.getZExtValue();
1665 switch (VT.getSimpleVT().SimpleTy) {
1667 report_fatal_error("CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " +
1668 Twine(VT.getEVTString()));
1671 uint32_t Value32 = uint32_t(SplatBits);
1672 assert(SplatBitSize == 32
1673 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1674 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1675 SDValue T = DAG.getConstant(Value32, MVT::i32);
1676 return DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,
1677 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
1681 uint64_t f64val = uint64_t(SplatBits);
1682 assert(SplatBitSize == 64
1683 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1684 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1685 SDValue T = DAG.getConstant(f64val, MVT::i64);
1686 return DAG.getNode(ISD::BITCAST, dl, MVT::v2f64,
1687 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
1691 // 8-bit constants have to be expanded to 16-bits
1692 unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
1693 SmallVector<SDValue, 8> Ops;
1695 Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
1696 return DAG.getNode(ISD::BITCAST, dl, VT,
1697 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
1700 unsigned short Value16 = SplatBits;
1701 SDValue T = DAG.getConstant(Value16, EltVT);
1702 SmallVector<SDValue, 8> Ops;
1705 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
1708 SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1709 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
1712 return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
1722 SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
1724 uint32_t upper = uint32_t(SplatVal >> 32);
1725 uint32_t lower = uint32_t(SplatVal);
1727 if (upper == lower) {
1728 // Magic constant that can be matched by IL, ILA, et. al.
1729 SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
1730 return DAG.getNode(ISD::BITCAST, dl, OpVT,
1731 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1732 Val, Val, Val, Val));
1734 bool upper_special, lower_special;
1736 // NOTE: This code creates common-case shuffle masks that can be easily
1737 // detected as common expressions. It is not attempting to create highly
1738 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1740 // Detect if the upper or lower half is a special shuffle mask pattern:
1741 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1742 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1744 // Both upper and lower are special, lower to a constant pool load:
1745 if (lower_special && upper_special) {
1746 SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
1747 return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
1748 SplatValCN, SplatValCN);
1753 SmallVector<SDValue, 16> ShufBytes;
1756 // Create lower vector if not a special pattern
1757 if (!lower_special) {
1758 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1759 LO32 = DAG.getNode(ISD::BITCAST, dl, OpVT,
1760 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1761 LO32C, LO32C, LO32C, LO32C));
1764 // Create upper vector if not a special pattern
1765 if (!upper_special) {
1766 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1767 HI32 = DAG.getNode(ISD::BITCAST, dl, OpVT,
1768 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1769 HI32C, HI32C, HI32C, HI32C));
1772 // If either upper or lower are special, then the two input operands are
1773 // the same (basically, one of them is a "don't care")
1779 for (int i = 0; i < 4; ++i) {
1781 for (int j = 0; j < 4; ++j) {
1783 bool process_upper, process_lower;
1785 process_upper = (upper_special && (i & 1) == 0);
1786 process_lower = (lower_special && (i & 1) == 1);
1788 if (process_upper || process_lower) {
1789 if ((process_upper && upper == 0)
1790 || (process_lower && lower == 0))
1792 else if ((process_upper && upper == 0xffffffff)
1793 || (process_lower && lower == 0xffffffff))
1795 else if ((process_upper && upper == 0x80000000)
1796 || (process_lower && lower == 0x80000000))
1797 val |= (j == 0 ? 0xe0 : 0x80);
1799 val |= i * 4 + j + ((i & 1) * 16);
1802 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1805 return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
1806 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1807 &ShufBytes[0], ShufBytes.size()));
1811 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1812 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1813 /// permutation vector, V3, is monotonically increasing with one "exception"
1814 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1815 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1816 /// In either case, the net result is going to eventually invoke SHUFB to
1817 /// permute/shuffle the bytes from V1 and V2.
1819 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1820 /// control word for byte/halfword/word insertion. This takes care of a single
1821 /// element move from V2 into V1.
1823 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1824 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1825 const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
1826 SDValue V1 = Op.getOperand(0);
1827 SDValue V2 = Op.getOperand(1);
1828 DebugLoc dl = Op.getDebugLoc();
1830 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1832 // If we have a single element being moved from V1 to V2, this can be handled
1833 // using the C*[DX] compute mask instructions, but the vector elements have
1834 // to be monotonically increasing with one exception element, and the source
1835 // slot of the element to move must be the same as the destination.
1836 EVT VecVT = V1.getValueType();
1837 EVT EltVT = VecVT.getVectorElementType();
1838 unsigned EltsFromV2 = 0;
1839 unsigned V2EltOffset = 0;
1840 unsigned V2EltIdx0 = 0;
1841 unsigned CurrElt = 0;
1842 unsigned MaxElts = VecVT.getVectorNumElements();
1843 unsigned PrevElt = 0;
1844 bool monotonic = true;
1847 EVT maskVT; // which of the c?d instructions to use
1849 if (EltVT == MVT::i8) {
1851 maskVT = MVT::v16i8;
1852 } else if (EltVT == MVT::i16) {
1854 maskVT = MVT::v8i16;
1855 } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1857 maskVT = MVT::v4i32;
1858 } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1860 maskVT = MVT::v2i64;
1862 llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
1864 for (unsigned i = 0; i != MaxElts; ++i) {
1865 if (SVN->getMaskElt(i) < 0)
1868 unsigned SrcElt = SVN->getMaskElt(i);
1871 if (SrcElt >= V2EltIdx0) {
1872 // TODO: optimize for the monotonic case when several consecutive
1873 // elements are taken form V2. Do we ever get such a case?
1874 if (EltsFromV2 == 0 && CurrElt == (SrcElt - V2EltIdx0))
1875 V2EltOffset = (SrcElt - V2EltIdx0) * (EltVT.getSizeInBits()/8);
1879 } else if (CurrElt != SrcElt) {
1887 if (PrevElt > 0 && SrcElt < MaxElts) {
1888 if ((PrevElt == SrcElt - 1)
1889 || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1894 } else if (i == 0 || (PrevElt==0 && SrcElt==1)) {
1895 // First time or after a "wrap around"
1899 // This isn't a rotation, takes elements from vector 2
1905 if (EltsFromV2 == 1 && monotonic) {
1906 // Compute mask and shuffle
1907 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1909 // As SHUFFLE_MASK becomes a c?d instruction, feed it an address
1910 // R1 ($sp) is used here only as it is guaranteed to have last bits zero
1911 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
1912 DAG.getRegister(SPU::R1, PtrVT),
1913 DAG.getConstant(V2EltOffset, MVT::i32));
1914 SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl,
1917 // Use shuffle mask in SHUFB synthetic instruction:
1918 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
1920 } else if (rotate) {
1923 rotamt *= EltVT.getSizeInBits()/8;
1924 return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
1925 V1, DAG.getConstant(rotamt, MVT::i16));
1927 // Convert the SHUFFLE_VECTOR mask's input element units to the
1929 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1931 SmallVector<SDValue, 16> ResultMask;
1932 for (unsigned i = 0, e = MaxElts; i != e; ++i) {
1933 unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
1935 for (unsigned j = 0; j < BytesPerElement; ++j)
1936 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
1938 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
1939 &ResultMask[0], ResultMask.size());
1940 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
1944 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1945 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1946 DebugLoc dl = Op.getDebugLoc();
1948 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1949 // For a constant, build the appropriate constant vector, which will
1950 // eventually simplify to a vector register load.
1952 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1953 SmallVector<SDValue, 16> ConstVecValues;
1957 // Create a constant vector:
1958 switch (Op.getValueType().getSimpleVT().SimpleTy) {
1959 default: llvm_unreachable("Unexpected constant value type in "
1960 "LowerSCALAR_TO_VECTOR");
1961 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1962 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1963 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1964 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1965 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1966 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1969 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1970 for (size_t j = 0; j < n_copies; ++j)
1971 ConstVecValues.push_back(CValue);
1973 return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
1974 &ConstVecValues[0], ConstVecValues.size());
1976 // Otherwise, copy the value from one register to another:
1977 switch (Op0.getValueType().getSimpleVT().SimpleTy) {
1978 default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
1985 return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
1992 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1993 EVT VT = Op.getValueType();
1994 SDValue N = Op.getOperand(0);
1995 SDValue Elt = Op.getOperand(1);
1996 DebugLoc dl = Op.getDebugLoc();
1999 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
2000 // Constant argument:
2001 int EltNo = (int) C->getZExtValue();
2004 if (VT == MVT::i8 && EltNo >= 16)
2005 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2006 else if (VT == MVT::i16 && EltNo >= 8)
2007 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2008 else if (VT == MVT::i32 && EltNo >= 4)
2009 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2010 else if (VT == MVT::i64 && EltNo >= 2)
2011 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2013 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2014 // i32 and i64: Element 0 is the preferred slot
2015 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
2018 // Need to generate shuffle mask and extract:
2019 int prefslot_begin = -1, prefslot_end = -1;
2020 int elt_byte = EltNo * VT.getSizeInBits() / 8;
2022 switch (VT.getSimpleVT().SimpleTy) {
2024 assert(false && "Invalid value type!");
2026 prefslot_begin = prefslot_end = 3;
2030 prefslot_begin = 2; prefslot_end = 3;
2035 prefslot_begin = 0; prefslot_end = 3;
2040 prefslot_begin = 0; prefslot_end = 7;
2045 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2046 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2048 unsigned int ShufBytes[16] = {
2049 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
2051 for (int i = 0; i < 16; ++i) {
2052 // zero fill uppper part of preferred slot, don't care about the
2054 unsigned int mask_val;
2055 if (i <= prefslot_end) {
2057 ((i < prefslot_begin)
2059 : elt_byte + (i - prefslot_begin));
2061 ShufBytes[i] = mask_val;
2063 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
2066 SDValue ShufMask[4];
2067 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
2068 unsigned bidx = i * 4;
2069 unsigned int bits = ((ShufBytes[bidx] << 24) |
2070 (ShufBytes[bidx+1] << 16) |
2071 (ShufBytes[bidx+2] << 8) |
2073 ShufMask[i] = DAG.getConstant(bits, MVT::i32);
2076 SDValue ShufMaskVec =
2077 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2078 &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
2080 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2081 DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
2082 N, N, ShufMaskVec));
2084 // Variable index: Rotate the requested element into slot 0, then replicate
2085 // slot 0 across the vector
2086 EVT VecVT = N.getValueType();
2087 if (!VecVT.isSimple() || !VecVT.isVector()) {
2088 report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
2092 // Make life easier by making sure the index is zero-extended to i32
2093 if (Elt.getValueType() != MVT::i32)
2094 Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
2096 // Scale the index to a bit/byte shift quantity
2098 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2099 unsigned scaleShift = scaleFactor.logBase2();
2102 if (scaleShift > 0) {
2103 // Scale the shift factor:
2104 Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
2105 DAG.getConstant(scaleShift, MVT::i32));
2108 vecShift = DAG.getNode(SPUISD::SHL_BYTES, dl, VecVT, N, Elt);
2110 // Replicate the bytes starting at byte 0 across the entire vector (for
2111 // consistency with the notion of a unified register set)
2114 switch (VT.getSimpleVT().SimpleTy) {
2116 report_fatal_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
2120 SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2121 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2122 factor, factor, factor, factor);
2126 SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2127 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2128 factor, factor, factor, factor);
2133 SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2134 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2135 factor, factor, factor, factor);
2140 SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2141 SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2142 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2143 loFactor, hiFactor, loFactor, hiFactor);
2148 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2149 DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2150 vecShift, vecShift, replicate));
2156 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2157 SDValue VecOp = Op.getOperand(0);
2158 SDValue ValOp = Op.getOperand(1);
2159 SDValue IdxOp = Op.getOperand(2);
2160 DebugLoc dl = Op.getDebugLoc();
2161 EVT VT = Op.getValueType();
2162 EVT eltVT = ValOp.getValueType();
2164 // use 0 when the lane to insert to is 'undef'
2166 if (IdxOp.getOpcode() != ISD::UNDEF) {
2167 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2168 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2169 Offset = (CN->getSExtValue()) * eltVT.getSizeInBits()/8;
2172 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2173 // Use $sp ($1) because it's always 16-byte aligned and it's available:
2174 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
2175 DAG.getRegister(SPU::R1, PtrVT),
2176 DAG.getConstant(Offset, PtrVT));
2177 // widen the mask when dealing with half vectors
2178 EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(),
2179 128/ VT.getVectorElementType().getSizeInBits());
2180 SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer);
2183 DAG.getNode(SPUISD::SHUFB, dl, VT,
2184 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
2186 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ShufMask));
2191 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2192 const TargetLowering &TLI)
2194 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2195 DebugLoc dl = Op.getDebugLoc();
2196 EVT ShiftVT = TLI.getShiftAmountTy(N0.getValueType());
2198 assert(Op.getValueType() == MVT::i8);
2201 llvm_unreachable("Unhandled i8 math operator");
2205 // 8-bit addition: Promote the arguments up to 16-bits and truncate
2207 SDValue N1 = Op.getOperand(1);
2208 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2209 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2210 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2211 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2216 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2218 SDValue N1 = Op.getOperand(1);
2219 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2220 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2221 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2222 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2226 SDValue N1 = Op.getOperand(1);
2227 EVT N1VT = N1.getValueType();
2229 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2230 if (!N1VT.bitsEq(ShiftVT)) {
2231 unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
2234 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2237 // Replicate lower 8-bits into upper 8:
2239 DAG.getNode(ISD::OR, dl, MVT::i16, N0,
2240 DAG.getNode(ISD::SHL, dl, MVT::i16,
2241 N0, DAG.getConstant(8, MVT::i32)));
2243 // Truncate back down to i8
2244 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2245 DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
2249 SDValue N1 = Op.getOperand(1);
2250 EVT N1VT = N1.getValueType();
2252 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2253 if (!N1VT.bitsEq(ShiftVT)) {
2254 unsigned N1Opc = ISD::ZERO_EXTEND;
2256 if (N1.getValueType().bitsGT(ShiftVT))
2257 N1Opc = ISD::TRUNCATE;
2259 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2262 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2263 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2266 SDValue N1 = Op.getOperand(1);
2267 EVT N1VT = N1.getValueType();
2269 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2270 if (!N1VT.bitsEq(ShiftVT)) {
2271 unsigned N1Opc = ISD::SIGN_EXTEND;
2273 if (N1VT.bitsGT(ShiftVT))
2274 N1Opc = ISD::TRUNCATE;
2275 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2278 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2279 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2282 SDValue N1 = Op.getOperand(1);
2284 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2285 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2286 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2287 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2295 //! Lower byte immediate operations for v16i8 vectors:
2297 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2300 EVT VT = Op.getValueType();
2301 DebugLoc dl = Op.getDebugLoc();
2303 ConstVec = Op.getOperand(0);
2304 Arg = Op.getOperand(1);
2305 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2306 if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) {
2307 ConstVec = ConstVec.getOperand(0);
2309 ConstVec = Op.getOperand(1);
2310 Arg = Op.getOperand(0);
2311 if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) {
2312 ConstVec = ConstVec.getOperand(0);
2317 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2318 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
2319 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
2321 APInt APSplatBits, APSplatUndef;
2322 unsigned SplatBitSize;
2324 unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
2326 if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
2327 HasAnyUndefs, minSplatBits)
2328 && minSplatBits <= SplatBitSize) {
2329 uint64_t SplatBits = APSplatBits.getZExtValue();
2330 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2332 SmallVector<SDValue, 16> tcVec;
2333 tcVec.assign(16, tc);
2334 return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
2335 DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
2339 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2340 // lowered. Return the operation, rather than a null SDValue.
2344 //! Custom lowering for CTPOP (count population)
2346 Custom lowering code that counts the number ones in the input
2347 operand. SPU has such an instruction, but it counts the number of
2348 ones per byte, which then have to be accumulated.
2350 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2351 EVT VT = Op.getValueType();
2352 EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
2353 VT, (128 / VT.getSizeInBits()));
2354 DebugLoc dl = Op.getDebugLoc();
2356 switch (VT.getSimpleVT().SimpleTy) {
2358 assert(false && "Invalid value type!");
2360 SDValue N = Op.getOperand(0);
2361 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2363 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2364 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2366 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
2370 MachineFunction &MF = DAG.getMachineFunction();
2371 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2373 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2375 SDValue N = Op.getOperand(0);
2376 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2377 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2378 SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2380 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2381 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2383 // CNTB_result becomes the chain to which all of the virtual registers
2384 // CNTB_reg, SUM1_reg become associated:
2385 SDValue CNTB_result =
2386 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
2388 SDValue CNTB_rescopy =
2389 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2391 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
2393 return DAG.getNode(ISD::AND, dl, MVT::i16,
2394 DAG.getNode(ISD::ADD, dl, MVT::i16,
2395 DAG.getNode(ISD::SRL, dl, MVT::i16,
2402 MachineFunction &MF = DAG.getMachineFunction();
2403 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2405 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2406 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2408 SDValue N = Op.getOperand(0);
2409 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2410 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2411 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2412 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2414 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2415 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2417 // CNTB_result becomes the chain to which all of the virtual registers
2418 // CNTB_reg, SUM1_reg become associated:
2419 SDValue CNTB_result =
2420 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
2422 SDValue CNTB_rescopy =
2423 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2426 DAG.getNode(ISD::SRL, dl, MVT::i32,
2427 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
2431 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
2432 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
2434 SDValue Sum1_rescopy =
2435 DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
2438 DAG.getNode(ISD::SRL, dl, MVT::i32,
2439 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
2442 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
2443 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
2445 return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
2455 //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2457 f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2458 All conversions to i64 are expanded to a libcall.
2460 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2461 const SPUTargetLowering &TLI) {
2462 EVT OpVT = Op.getValueType();
2463 SDValue Op0 = Op.getOperand(0);
2464 EVT Op0VT = Op0.getValueType();
2466 if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
2467 || OpVT == MVT::i64) {
2468 // Convert f32 / f64 to i32 / i64 via libcall.
2470 (Op.getOpcode() == ISD::FP_TO_SINT)
2471 ? RTLIB::getFPTOSINT(Op0VT, OpVT)
2472 : RTLIB::getFPTOUINT(Op0VT, OpVT);
2473 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2475 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2481 //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2483 i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2484 All conversions from i64 are expanded to a libcall.
2486 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2487 const SPUTargetLowering &TLI) {
2488 EVT OpVT = Op.getValueType();
2489 SDValue Op0 = Op.getOperand(0);
2490 EVT Op0VT = Op0.getValueType();
2492 if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
2493 || Op0VT == MVT::i64) {
2494 // Convert i32, i64 to f64 via libcall:
2496 (Op.getOpcode() == ISD::SINT_TO_FP)
2497 ? RTLIB::getSINTTOFP(Op0VT, OpVT)
2498 : RTLIB::getUINTTOFP(Op0VT, OpVT);
2499 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2501 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2507 //! Lower ISD::SETCC
2509 This handles MVT::f64 (double floating point) condition lowering
2511 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2512 const TargetLowering &TLI) {
2513 CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
2514 DebugLoc dl = Op.getDebugLoc();
2515 assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2517 SDValue lhs = Op.getOperand(0);
2518 SDValue rhs = Op.getOperand(1);
2519 EVT lhsVT = lhs.getValueType();
2520 assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2522 EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2523 APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2524 EVT IntVT(MVT::i64);
2526 // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2527 // selected to a NOP:
2528 SDValue i64lhs = DAG.getNode(ISD::BITCAST, dl, IntVT, lhs);
2530 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2531 DAG.getNode(ISD::SRL, dl, IntVT,
2532 i64lhs, DAG.getConstant(32, MVT::i32)));
2533 SDValue lhsHi32abs =
2534 DAG.getNode(ISD::AND, dl, MVT::i32,
2535 lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
2537 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
2539 // SETO and SETUO only use the lhs operand:
2540 if (CC->get() == ISD::SETO) {
2541 // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2543 APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2544 return DAG.getNode(ISD::XOR, dl, ccResultVT,
2545 DAG.getSetCC(dl, ccResultVT,
2546 lhs, DAG.getConstantFP(0.0, lhsVT),
2548 DAG.getConstant(ccResultAllOnes, ccResultVT));
2549 } else if (CC->get() == ISD::SETUO) {
2550 // Evaluates to true if Op0 is [SQ]NaN
2551 return DAG.getNode(ISD::AND, dl, ccResultVT,
2552 DAG.getSetCC(dl, ccResultVT,
2554 DAG.getConstant(0x7ff00000, MVT::i32),
2556 DAG.getSetCC(dl, ccResultVT,
2558 DAG.getConstant(0, MVT::i32),
2562 SDValue i64rhs = DAG.getNode(ISD::BITCAST, dl, IntVT, rhs);
2564 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2565 DAG.getNode(ISD::SRL, dl, IntVT,
2566 i64rhs, DAG.getConstant(32, MVT::i32)));
2568 // If a value is negative, subtract from the sign magnitude constant:
2569 SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2571 // Convert the sign-magnitude representation into 2's complement:
2572 SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2573 lhsHi32, DAG.getConstant(31, MVT::i32));
2574 SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
2576 DAG.getNode(ISD::SELECT, dl, IntVT,
2577 lhsSelectMask, lhsSignMag2TC, i64lhs);
2579 SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2580 rhsHi32, DAG.getConstant(31, MVT::i32));
2581 SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
2583 DAG.getNode(ISD::SELECT, dl, IntVT,
2584 rhsSelectMask, rhsSignMag2TC, i64rhs);
2588 switch (CC->get()) {
2591 compareOp = ISD::SETEQ; break;
2594 compareOp = ISD::SETGT; break;
2597 compareOp = ISD::SETGE; break;
2600 compareOp = ISD::SETLT; break;
2603 compareOp = ISD::SETLE; break;
2606 compareOp = ISD::SETNE; break;
2608 report_fatal_error("CellSPU ISel Select: unimplemented f64 condition");
2612 DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
2613 (ISD::CondCode) compareOp);
2615 if ((CC->get() & 0x8) == 0) {
2616 // Ordered comparison:
2617 SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
2618 lhs, DAG.getConstantFP(0.0, MVT::f64),
2620 SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
2621 rhs, DAG.getConstantFP(0.0, MVT::f64),
2623 SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
2625 result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
2631 //! Lower ISD::SELECT_CC
2633 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2636 \note Need to revisit this in the future: if the code path through the true
2637 and false value computations is longer than the latency of a branch (6
2638 cycles), then it would be more advantageous to branch and insert a new basic
2639 block and branch on the condition. However, this code does not make that
2640 assumption, given the simplisitc uses so far.
2643 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2644 const TargetLowering &TLI) {
2645 EVT VT = Op.getValueType();
2646 SDValue lhs = Op.getOperand(0);
2647 SDValue rhs = Op.getOperand(1);
2648 SDValue trueval = Op.getOperand(2);
2649 SDValue falseval = Op.getOperand(3);
2650 SDValue condition = Op.getOperand(4);
2651 DebugLoc dl = Op.getDebugLoc();
2653 // NOTE: SELB's arguments: $rA, $rB, $mask
2655 // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2656 // where bits in $mask are 1. CCond will be inverted, having 1s where the
2657 // condition was true and 0s where the condition was false. Hence, the
2658 // arguments to SELB get reversed.
2660 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2661 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2662 // with another "cannot select select_cc" assert:
2664 SDValue compare = DAG.getNode(ISD::SETCC, dl,
2665 TLI.getSetCCResultType(Op.getValueType()),
2666 lhs, rhs, condition);
2667 return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
2670 //! Custom lower ISD::TRUNCATE
2671 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2673 // Type to truncate to
2674 EVT VT = Op.getValueType();
2675 MVT simpleVT = VT.getSimpleVT();
2676 EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
2677 VT, (128 / VT.getSizeInBits()));
2678 DebugLoc dl = Op.getDebugLoc();
2680 // Type to truncate from
2681 SDValue Op0 = Op.getOperand(0);
2682 EVT Op0VT = Op0.getValueType();
2684 if (Op0VT == MVT::i128 && simpleVT == MVT::i64) {
2685 // Create shuffle mask, least significant doubleword of quadword
2686 unsigned maskHigh = 0x08090a0b;
2687 unsigned maskLow = 0x0c0d0e0f;
2688 // Use a shuffle to perform the truncation
2689 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2690 DAG.getConstant(maskHigh, MVT::i32),
2691 DAG.getConstant(maskLow, MVT::i32),
2692 DAG.getConstant(maskHigh, MVT::i32),
2693 DAG.getConstant(maskLow, MVT::i32));
2695 SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2696 Op0, Op0, shufMask);
2698 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
2701 return SDValue(); // Leave the truncate unmolested
2705 * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic
2706 * algorithm is to duplicate the sign bit using rotmai to generate at
2707 * least one byte full of sign bits. Then propagate the "sign-byte" into
2708 * the leftmost words and the i64/i32 into the rightmost words using shufb.
2710 * @param Op The sext operand
2711 * @param DAG The current DAG
2712 * @return The SDValue with the entire instruction sequence
2714 static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
2716 DebugLoc dl = Op.getDebugLoc();
2718 // Type to extend to
2719 MVT OpVT = Op.getValueType().getSimpleVT();
2721 // Type to extend from
2722 SDValue Op0 = Op.getOperand(0);
2723 MVT Op0VT = Op0.getValueType().getSimpleVT();
2725 // extend i8 & i16 via i32
2726 if (Op0VT == MVT::i8 || Op0VT == MVT::i16) {
2727 Op0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, Op0);
2731 // The type to extend to needs to be a i128 and
2732 // the type to extend from needs to be i64 or i32.
2733 assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
2734 "LowerSIGN_EXTEND: input and/or output operand have wrong size");
2737 // Create shuffle mask
2738 unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
2739 unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte 8 - 11
2740 unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15
2741 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2742 DAG.getConstant(mask1, MVT::i32),
2743 DAG.getConstant(mask1, MVT::i32),
2744 DAG.getConstant(mask2, MVT::i32),
2745 DAG.getConstant(mask3, MVT::i32));
2747 // Word wise arithmetic right shift to generate at least one byte
2748 // that contains sign bits.
2749 MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32;
2750 SDValue sraVal = DAG.getNode(ISD::SRA,
2753 DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
2754 DAG.getConstant(31, MVT::i32));
2756 // reinterpret as a i128 (SHUFB requires it). This gets lowered away.
2757 SDValue extended = SDValue(DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
2759 DAG.getTargetConstant(
2760 SPU::GPRCRegClass.getID(),
2762 // Shuffle bytes - Copy the sign bits into the upper 64 bits
2763 // and the input value into the lower 64 bits.
2764 SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
2765 extended, sraVal, shufMask);
2766 return DAG.getNode(ISD::BITCAST, dl, MVT::i128, extShuffle);
2769 //! Custom (target-specific) lowering entry point
2771 This is where LLVM's DAG selection process calls to do target-specific
2775 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
2777 unsigned Opc = (unsigned) Op.getOpcode();
2778 EVT VT = Op.getValueType();
2783 errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2784 errs() << "Op.getOpcode() = " << Opc << "\n";
2785 errs() << "*Op.getNode():\n";
2786 Op.getNode()->dump();
2788 llvm_unreachable(0);
2794 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2796 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2797 case ISD::ConstantPool:
2798 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2799 case ISD::GlobalAddress:
2800 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2801 case ISD::JumpTable:
2802 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2803 case ISD::ConstantFP:
2804 return LowerConstantFP(Op, DAG);
2806 // i8, i64 math ops:
2815 return LowerI8Math(Op, DAG, Opc, *this);
2819 case ISD::FP_TO_SINT:
2820 case ISD::FP_TO_UINT:
2821 return LowerFP_TO_INT(Op, DAG, *this);
2823 case ISD::SINT_TO_FP:
2824 case ISD::UINT_TO_FP:
2825 return LowerINT_TO_FP(Op, DAG, *this);
2827 // Vector-related lowering.
2828 case ISD::BUILD_VECTOR:
2829 return LowerBUILD_VECTOR(Op, DAG);
2830 case ISD::SCALAR_TO_VECTOR:
2831 return LowerSCALAR_TO_VECTOR(Op, DAG);
2832 case ISD::VECTOR_SHUFFLE:
2833 return LowerVECTOR_SHUFFLE(Op, DAG);
2834 case ISD::EXTRACT_VECTOR_ELT:
2835 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2836 case ISD::INSERT_VECTOR_ELT:
2837 return LowerINSERT_VECTOR_ELT(Op, DAG);
2839 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2843 return LowerByteImmed(Op, DAG);
2845 // Vector and i8 multiply:
2848 return LowerI8Math(Op, DAG, Opc, *this);
2851 return LowerCTPOP(Op, DAG);
2853 case ISD::SELECT_CC:
2854 return LowerSELECT_CC(Op, DAG, *this);
2857 return LowerSETCC(Op, DAG, *this);
2860 return LowerTRUNCATE(Op, DAG);
2862 case ISD::SIGN_EXTEND:
2863 return LowerSIGN_EXTEND(Op, DAG);
2869 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2870 SmallVectorImpl<SDValue>&Results,
2871 SelectionDAG &DAG) const
2874 unsigned Opc = (unsigned) N->getOpcode();
2875 EVT OpVT = N->getValueType(0);
2879 errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2880 errs() << "Op.getOpcode() = " << Opc << "\n";
2881 errs() << "*Op.getNode():\n";
2889 /* Otherwise, return unchanged */
2892 //===----------------------------------------------------------------------===//
2893 // Target Optimization Hooks
2894 //===----------------------------------------------------------------------===//
2897 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2900 TargetMachine &TM = getTargetMachine();
2902 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2903 SelectionDAG &DAG = DCI.DAG;
2904 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2905 EVT NodeVT = N->getValueType(0); // The node's value type
2906 EVT Op0VT = Op0.getValueType(); // The first operand's result
2907 SDValue Result; // Initially, empty result
2908 DebugLoc dl = N->getDebugLoc();
2910 switch (N->getOpcode()) {
2913 SDValue Op1 = N->getOperand(1);
2915 if (Op0.getOpcode() == SPUISD::IndirectAddr
2916 || Op1.getOpcode() == SPUISD::IndirectAddr) {
2917 // Normalize the operands to reduce repeated code
2918 SDValue IndirectArg = Op0, AddArg = Op1;
2920 if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2925 if (isa<ConstantSDNode>(AddArg)) {
2926 ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2927 SDValue IndOp1 = IndirectArg.getOperand(1);
2929 if (CN0->isNullValue()) {
2930 // (add (SPUindirect <arg>, <arg>), 0) ->
2931 // (SPUindirect <arg>, <arg>)
2933 #if !defined(NDEBUG)
2934 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2936 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2937 << "With: (SPUindirect <arg>, <arg>)\n";
2942 } else if (isa<ConstantSDNode>(IndOp1)) {
2943 // (add (SPUindirect <arg>, <const>), <const>) ->
2944 // (SPUindirect <arg>, <const + const>)
2945 ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2946 int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2947 SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2949 #if !defined(NDEBUG)
2950 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2952 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2953 << "), " << CN0->getSExtValue() << ")\n"
2954 << "With: (SPUindirect <arg>, "
2955 << combinedConst << ")\n";
2959 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2960 IndirectArg, combinedValue);
2966 case ISD::SIGN_EXTEND:
2967 case ISD::ZERO_EXTEND:
2968 case ISD::ANY_EXTEND: {
2969 if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2970 // (any_extend (SPUextract_elt0 <arg>)) ->
2971 // (SPUextract_elt0 <arg>)
2972 // Types must match, however...
2973 #if !defined(NDEBUG)
2974 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2975 errs() << "\nReplace: ";
2977 errs() << "\nWith: ";
2978 Op0.getNode()->dump(&DAG);
2987 case SPUISD::IndirectAddr: {
2988 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2989 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2990 if (CN != 0 && CN->isNullValue()) {
2991 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2992 // (SPUaform <addr>, 0)
2994 DEBUG(errs() << "Replace: ");
2995 DEBUG(N->dump(&DAG));
2996 DEBUG(errs() << "\nWith: ");
2997 DEBUG(Op0.getNode()->dump(&DAG));
2998 DEBUG(errs() << "\n");
3002 } else if (Op0.getOpcode() == ISD::ADD) {
3003 SDValue Op1 = N->getOperand(1);
3004 if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
3005 // (SPUindirect (add <arg>, <arg>), 0) ->
3006 // (SPUindirect <arg>, <arg>)
3007 if (CN1->isNullValue()) {
3009 #if !defined(NDEBUG)
3010 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
3012 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
3013 << "With: (SPUindirect <arg>, <arg>)\n";
3017 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
3018 Op0.getOperand(0), Op0.getOperand(1));
3024 case SPUISD::SHL_BITS:
3025 case SPUISD::SHL_BYTES:
3026 case SPUISD::ROTBYTES_LEFT: {
3027 SDValue Op1 = N->getOperand(1);
3029 // Kill degenerate vector shifts:
3030 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
3031 if (CN->isNullValue()) {
3037 case SPUISD::PREFSLOT2VEC: {
3038 switch (Op0.getOpcode()) {
3041 case ISD::ANY_EXTEND:
3042 case ISD::ZERO_EXTEND:
3043 case ISD::SIGN_EXTEND: {
3044 // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
3046 // but only if the SPUprefslot2vec and <arg> types match.
3047 SDValue Op00 = Op0.getOperand(0);
3048 if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
3049 SDValue Op000 = Op00.getOperand(0);
3050 if (Op000.getValueType() == NodeVT) {
3056 case SPUISD::VEC2PREFSLOT: {
3057 // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
3059 Result = Op0.getOperand(0);
3067 // Otherwise, return unchanged.
3069 if (Result.getNode()) {
3070 DEBUG(errs() << "\nReplace.SPU: ");
3071 DEBUG(N->dump(&DAG));
3072 DEBUG(errs() << "\nWith: ");
3073 DEBUG(Result.getNode()->dump(&DAG));
3074 DEBUG(errs() << "\n");
3081 //===----------------------------------------------------------------------===//
3082 // Inline Assembly Support
3083 //===----------------------------------------------------------------------===//
3085 /// getConstraintType - Given a constraint letter, return the type of
3086 /// constraint it is for this target.
3087 SPUTargetLowering::ConstraintType
3088 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
3089 if (ConstraintLetter.size() == 1) {
3090 switch (ConstraintLetter[0]) {
3097 return C_RegisterClass;
3100 return TargetLowering::getConstraintType(ConstraintLetter);
3103 /// Examine constraint type and operand type and determine a weight value.
3104 /// This object must already have been set up with the operand type
3105 /// and the current alternative constraint selected.
3106 TargetLowering::ConstraintWeight
3107 SPUTargetLowering::getSingleConstraintMatchWeight(
3108 AsmOperandInfo &info, const char *constraint) const {
3109 ConstraintWeight weight = CW_Invalid;
3110 Value *CallOperandVal = info.CallOperandVal;
3111 // If we don't have a value, we can't do a match,
3112 // but allow it at the lowest weight.
3113 if (CallOperandVal == NULL)
3115 // Look at the constraint type.
3116 switch (*constraint) {
3118 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
3120 //FIXME: Seems like the supported constraint letters were just copied
3121 // from PPC, as the following doesn't correspond to the GCC docs.
3122 // I'm leaving it so until someone adds the corresponding lowering support.
3129 weight = CW_Register;
3135 std::pair<unsigned, const TargetRegisterClass*>
3136 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
3139 if (Constraint.size() == 1) {
3140 // GCC RS6000 Constraint Letters
3141 switch (Constraint[0]) {
3145 return std::make_pair(0U, SPU::R64CRegisterClass);
3146 return std::make_pair(0U, SPU::R32CRegisterClass);
3149 return std::make_pair(0U, SPU::R32FPRegisterClass);
3150 else if (VT == MVT::f64)
3151 return std::make_pair(0U, SPU::R64FPRegisterClass);
3154 return std::make_pair(0U, SPU::GPRCRegisterClass);
3158 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3161 //! Compute used/known bits for a SPU operand
3163 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3167 const SelectionDAG &DAG,
3168 unsigned Depth ) const {
3170 const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
3172 switch (Op.getOpcode()) {
3174 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3180 case SPUISD::PREFSLOT2VEC:
3181 case SPUISD::LDRESULT:
3182 case SPUISD::VEC2PREFSLOT:
3183 case SPUISD::SHLQUAD_L_BITS:
3184 case SPUISD::SHLQUAD_L_BYTES:
3185 case SPUISD::VEC_ROTL:
3186 case SPUISD::VEC_ROTR:
3187 case SPUISD::ROTBYTES_LEFT:
3188 case SPUISD::SELECT_MASK:
3195 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3196 unsigned Depth) const {
3197 switch (Op.getOpcode()) {
3202 EVT VT = Op.getValueType();
3204 if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
3207 return VT.getSizeInBits();
3212 // LowerAsmOperandForConstraint
3214 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3215 std::string &Constraint,
3216 std::vector<SDValue> &Ops,
3217 SelectionDAG &DAG) const {
3218 // Default, for the time being, to the base class handler
3219 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
3222 /// isLegalAddressImmediate - Return true if the integer value can be used
3223 /// as the offset of the target addressing mode.
3224 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3226 // SPU's addresses are 256K:
3227 return (V > -(1 << 18) && V < (1 << 18) - 1);
3230 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3235 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3236 // The SPU target isn't yet aware of offsets.
3240 // can we compare to Imm without writing it into a register?
3241 bool SPUTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
3242 //ceqi, cgti, etc. all take s10 operand
3243 return isInt<10>(Imm);
3247 SPUTargetLowering::isLegalAddressingMode(const AddrMode &AM,
3250 // A-form: 18bit absolute address.
3251 if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && AM.BaseOffs == 0)
3254 // D-form: reg + 14bit offset
3255 if (AM.BaseGV ==0 && AM.HasBaseReg && AM.Scale == 0 && isInt<14>(AM.BaseOffs))
3259 if (AM.BaseGV == 0 && AM.HasBaseReg && AM.Scale == 1 && AM.BaseOffs ==0)