contrib/llvm/lib/Target/R600/AMDGPUISelLowering.cpp

   1 //===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 /// \file
  11 /// \brief This is the parent TargetLowering class for hardware code gen
  12 /// targets.
  13 //
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "AMDGPUISelLowering.h"
  17 #include "AMDGPU.h"
  18 #include "AMDGPUFrameLowering.h"
  19 #include "AMDGPURegisterInfo.h"
  20 #include "AMDGPUSubtarget.h"
  21 #include "AMDILIntrinsicInfo.h"
  22 #include "R600MachineFunctionInfo.h"
  23 #include "SIMachineFunctionInfo.h"
  24 #include "llvm/CodeGen/CallingConvLower.h"
  25 #include "llvm/CodeGen/MachineFunction.h"
  26 #include "llvm/CodeGen/MachineRegisterInfo.h"
  27 #include "llvm/CodeGen/SelectionDAG.h"
  28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
  29 #include "llvm/IR/DataLayout.h"
  30
  31 using namespace llvm;
  32 static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT,
  33                       CCValAssign::LocInfo LocInfo,
  34                       ISD::ArgFlagsTy ArgFlags, CCState &State) {
  35   unsigned Offset = State.AllocateStack(ValVT.getSizeInBits() / 8, ArgFlags.getOrigAlign());
  36     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
  37
  38   return true;
  39 }
  40
  41 #include "AMDGPUGenCallingConv.inc"
  42
  43 AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
  44   TargetLowering(TM, new TargetLoweringObjectFileELF()) {
  45
  46   // Initialize target lowering borrowed from AMDIL
  47   InitAMDILLowering();
  48
  49   // We need to custom lower some of the intrinsics
  50   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
  51
  52   // Library functions.  These default to Expand, but we have instructions
  53   // for them.
  54   setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
  55   setOperationAction(ISD::FEXP2,  MVT::f32, Legal);
  56   setOperationAction(ISD::FPOW,   MVT::f32, Legal);
  57   setOperationAction(ISD::FLOG2,  MVT::f32, Legal);
  58   setOperationAction(ISD::FABS,   MVT::f32, Legal);
  59   setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
  60   setOperationAction(ISD::FRINT,  MVT::f32, Legal);
  61   setOperationAction(ISD::FROUND, MVT::f32, Legal);
  62
  63   // The hardware supports ROTR, but not ROTL
  64   setOperationAction(ISD::ROTL, MVT::i32, Expand);
  65
  66   // Lower floating point store/load to integer store/load to reduce the number
  67   // of patterns in tablegen.
  68   setOperationAction(ISD::STORE, MVT::f32, Promote);
  69   AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32);
  70
  71   setOperationAction(ISD::STORE, MVT::v2f32, Promote);
  72   AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32);
  73
  74   setOperationAction(ISD::STORE, MVT::v4f32, Promote);
  75   AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
  76
  77   setOperationAction(ISD::STORE, MVT::v8f32, Promote);
  78   AddPromotedToType(ISD::STORE, MVT::v8f32, MVT::v8i32);
  79
  80   setOperationAction(ISD::STORE, MVT::v16f32, Promote);
  81   AddPromotedToType(ISD::STORE, MVT::v16f32, MVT::v16i32);
  82
  83   setOperationAction(ISD::STORE, MVT::f64, Promote);
  84   AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64);
  85
  86   // Custom lowering of vector stores is required for local address space
  87   // stores.
  88   setOperationAction(ISD::STORE, MVT::v4i32, Custom);
  89   // XXX: Native v2i32 local address space stores are possible, but not
  90   // currently implemented.
  91   setOperationAction(ISD::STORE, MVT::v2i32, Custom);
  92
  93   setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
  94   setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
  95   setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom);
  96   // XXX: This can be change to Custom, once ExpandVectorStores can
  97   // handle 64-bit stores.
  98   setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand);
  99
 100   setOperationAction(ISD::LOAD, MVT::f32, Promote);
 101   AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);
 102
 103   setOperationAction(ISD::LOAD, MVT::v2f32, Promote);
 104   AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32);
 105
 106   setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
 107   AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
 108
 109   setOperationAction(ISD::LOAD, MVT::v8f32, Promote);
 110   AddPromotedToType(ISD::LOAD, MVT::v8f32, MVT::v8i32);
 111
 112   setOperationAction(ISD::LOAD, MVT::v16f32, Promote);
 113   AddPromotedToType(ISD::LOAD, MVT::v16f32, MVT::v16i32);
 114
 115   setOperationAction(ISD::LOAD, MVT::f64, Promote);
 116   AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64);
 117
 118   setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom);
 119   setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom);
 120   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Custom);
 121   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom);
 122
 123   setLoadExtAction(ISD::EXTLOAD, MVT::v2i8, Expand);
 124   setLoadExtAction(ISD::SEXTLOAD, MVT::v2i8, Expand);
 125   setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i8, Expand);
 126   setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Expand);
 127   setLoadExtAction(ISD::SEXTLOAD, MVT::v4i8, Expand);
 128   setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Expand);
 129   setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, Expand);
 130   setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, Expand);
 131   setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i16, Expand);
 132   setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, Expand);
 133   setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, Expand);
 134   setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, Expand);
 135
 136   setOperationAction(ISD::BR_CC, MVT::i1, Expand);
 137
 138   setOperationAction(ISD::FNEG, MVT::v2f32, Expand);
 139   setOperationAction(ISD::FNEG, MVT::v4f32, Expand);
 140
 141   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
 142
 143   setOperationAction(ISD::MUL, MVT::i64, Expand);
 144
 145   setOperationAction(ISD::UDIV, MVT::i32, Expand);
 146   setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
 147   setOperationAction(ISD::UREM, MVT::i32, Expand);
 148   setOperationAction(ISD::VSELECT, MVT::v2f32, Expand);
 149   setOperationAction(ISD::VSELECT, MVT::v4f32, Expand);
 150
 151   static const MVT::SimpleValueType IntTypes[] = {
 152     MVT::v2i32, MVT::v4i32
 153   };
 154   const size_t NumIntTypes = array_lengthof(IntTypes);
 155
 156   for (unsigned int x  = 0; x < NumIntTypes; ++x) {
 157     MVT::SimpleValueType VT = IntTypes[x];
 158     //Expand the following operations for the current type by default
 159     setOperationAction(ISD::ADD,  VT, Expand);
 160     setOperationAction(ISD::AND,  VT, Expand);
 161     setOperationAction(ISD::FP_TO_SINT, VT, Expand);
 162     setOperationAction(ISD::FP_TO_UINT, VT, Expand);
 163     setOperationAction(ISD::MUL,  VT, Expand);
 164     setOperationAction(ISD::OR,   VT, Expand);
 165     setOperationAction(ISD::SHL,  VT, Expand);
 166     setOperationAction(ISD::SINT_TO_FP, VT, Expand);
 167     setOperationAction(ISD::SRL,  VT, Expand);
 168     setOperationAction(ISD::SRA,  VT, Expand);
 169     setOperationAction(ISD::SUB,  VT, Expand);
 170     setOperationAction(ISD::UDIV, VT, Expand);
 171     setOperationAction(ISD::UINT_TO_FP, VT, Expand);
 172     setOperationAction(ISD::UREM, VT, Expand);
 173     setOperationAction(ISD::VSELECT, VT, Expand);
 174     setOperationAction(ISD::XOR,  VT, Expand);
 175   }
 176
 177   static const MVT::SimpleValueType FloatTypes[] = {
 178     MVT::v2f32, MVT::v4f32
 179   };
 180   const size_t NumFloatTypes = array_lengthof(FloatTypes);
 181
 182   for (unsigned int x = 0; x < NumFloatTypes; ++x) {
 183     MVT::SimpleValueType VT = FloatTypes[x];
 184     setOperationAction(ISD::FABS, VT, Expand);
 185     setOperationAction(ISD::FADD, VT, Expand);
 186     setOperationAction(ISD::FDIV, VT, Expand);
 187     setOperationAction(ISD::FFLOOR, VT, Expand);
 188     setOperationAction(ISD::FMUL, VT, Expand);
 189     setOperationAction(ISD::FRINT, VT, Expand);
 190     setOperationAction(ISD::FSQRT, VT, Expand);
 191     setOperationAction(ISD::FSUB, VT, Expand);
 192   }
 193 }
 194
 195 //===----------------------------------------------------------------------===//
 196 // Target Information
 197 //===----------------------------------------------------------------------===//
 198
 199 MVT AMDGPUTargetLowering::getVectorIdxTy() const {
 200   return MVT::i32;
 201 }
 202
 203 bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy,
 204                                                    EVT CastTy) const {
 205   if (LoadTy.getSizeInBits() != CastTy.getSizeInBits())
 206     return true;
 207
 208   unsigned LScalarSize = LoadTy.getScalarType().getSizeInBits();
 209   unsigned CastScalarSize = CastTy.getScalarType().getSizeInBits();
 210
 211   return ((LScalarSize <= CastScalarSize) ||
 212           (CastScalarSize >= 32) ||
 213           (LScalarSize < 32));
 214 }
 215
 216 //===---------------------------------------------------------------------===//
 217 // Target Properties
 218 //===---------------------------------------------------------------------===//
 219
 220 bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const {
 221   assert(VT.isFloatingPoint());
 222   return VT == MVT::f32;
 223 }
 224
 225 bool AMDGPUTargetLowering::isFNegFree(EVT VT) const {
 226   assert(VT.isFloatingPoint());
 227   return VT == MVT::f32;
 228 }
 229
 230 //===---------------------------------------------------------------------===//
 231 // TargetLowering Callbacks
 232 //===---------------------------------------------------------------------===//
 233
 234 void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State,
 235                              const SmallVectorImpl<ISD::InputArg> &Ins) const {
 236
 237   State.AnalyzeFormalArguments(Ins, CC_AMDGPU);
 238 }
 239
 240 SDValue AMDGPUTargetLowering::LowerReturn(
 241                                      SDValue Chain,
 242                                      CallingConv::ID CallConv,
 243                                      bool isVarArg,
 244                                      const SmallVectorImpl<ISD::OutputArg> &Outs,
 245                                      const SmallVectorImpl<SDValue> &OutVals,
 246                                      SDLoc DL, SelectionDAG &DAG) const {
 247   return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
 248 }
 249
 250 //===---------------------------------------------------------------------===//
 251 // Target specific lowering
 252 //===---------------------------------------------------------------------===//
 253
 254 SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
 255     const {
 256   switch (Op.getOpcode()) {
 257   default:
 258     Op.getNode()->dump();
 259     assert(0 && "Custom lowering code for this"
 260         "instruction is not implemented yet!");
 261     break;
 262   // AMDIL DAG lowering
 263   case ISD::SDIV: return LowerSDIV(Op, DAG);
 264   case ISD::SREM: return LowerSREM(Op, DAG);
 265   case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
 266   case ISD::BRCOND: return LowerBRCOND(Op, DAG);
 267   // AMDGPU DAG lowering
 268   case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
 269   case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
 270   case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
 271   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
 272   case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
 273   case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
 274   }
 275   return Op;
 276 }
 277
 278 SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
 279                                                  SDValue Op,
 280                                                  SelectionDAG &DAG) const {
 281
 282   const DataLayout *TD = getTargetMachine().getDataLayout();
 283   GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
 284
 285   assert(G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS);
 286   // XXX: What does the value of G->getOffset() mean?
 287   assert(G->getOffset() == 0 &&
 288          "Do not know what to do with an non-zero offset");
 289
 290   const GlobalValue *GV = G->getGlobal();
 291
 292   unsigned Offset;
 293   if (MFI->LocalMemoryObjects.count(GV) == 0) {
 294     uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType());
 295     Offset = MFI->LDSSize;
 296     MFI->LocalMemoryObjects[GV] = Offset;
 297     // XXX: Account for alignment?
 298     MFI->LDSSize += Size;
 299   } else {
 300     Offset = MFI->LocalMemoryObjects[GV];
 301   }
 302
 303   return DAG.getConstant(Offset, getPointerTy(G->getAddressSpace()));
 304 }
 305
 306 void AMDGPUTargetLowering::ExtractVectorElements(SDValue Op, SelectionDAG &DAG,
 307                                          SmallVectorImpl<SDValue> &Args,
 308                                          unsigned Start,
 309                                          unsigned Count) const {
 310   EVT VT = Op.getValueType();
 311   for (unsigned i = Start, e = Start + Count; i != e; ++i) {
 312     Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op),
 313                                VT.getVectorElementType(),
 314                                Op, DAG.getConstant(i, MVT::i32)));
 315   }
 316 }
 317
 318 SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
 319                                                   SelectionDAG &DAG) const {
 320   SmallVector<SDValue, 8> Args;
 321   SDValue A = Op.getOperand(0);
 322   SDValue B = Op.getOperand(1);
 323
 324   ExtractVectorElements(A, DAG, Args, 0,
 325                         A.getValueType().getVectorNumElements());
 326   ExtractVectorElements(B, DAG, Args, 0,
 327                         B.getValueType().getVectorNumElements());
 328
 329   return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(),
 330                      &Args[0], Args.size());
 331 }
 332
 333 SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
 334                                                      SelectionDAG &DAG) const {
 335
 336   SmallVector<SDValue, 8> Args;
 337   EVT VT = Op.getValueType();
 338   unsigned Start = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
 339   ExtractVectorElements(Op.getOperand(0), DAG, Args, Start,
 340                         VT.getVectorNumElements());
 341
 342   return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(),
 343                      &Args[0], Args.size());
 344 }
 345
 346 SDValue AMDGPUTargetLowering::LowerFrameIndex(SDValue Op,
 347                                               SelectionDAG &DAG) const {
 348
 349   MachineFunction &MF = DAG.getMachineFunction();
 350   const AMDGPUFrameLowering *TFL =
 351    static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
 352
 353   FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
 354   assert(FIN);
 355
 356   unsigned FrameIndex = FIN->getIndex();
 357   unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
 358   return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF),
 359                          Op.getValueType());
 360 }
 361
 362 SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
 363     SelectionDAG &DAG) const {
 364   unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
 365   SDLoc DL(Op);
 366   EVT VT = Op.getValueType();
 367
 368   switch (IntrinsicID) {
 369     default: return Op;
 370     case AMDGPUIntrinsic::AMDIL_abs:
 371       return LowerIntrinsicIABS(Op, DAG);
 372     case AMDGPUIntrinsic::AMDIL_exp:
 373       return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
 374     case AMDGPUIntrinsic::AMDGPU_lrp:
 375       return LowerIntrinsicLRP(Op, DAG);
 376     case AMDGPUIntrinsic::AMDIL_fraction:
 377       return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
 378     case AMDGPUIntrinsic::AMDIL_max:
 379       return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
 380                                                   Op.getOperand(2));
 381     case AMDGPUIntrinsic::AMDGPU_imax:
 382       return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
 383                                                   Op.getOperand(2));
 384     case AMDGPUIntrinsic::AMDGPU_umax:
 385       return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
 386                                                   Op.getOperand(2));
 387     case AMDGPUIntrinsic::AMDIL_min:
 388       return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
 389                                                   Op.getOperand(2));
 390     case AMDGPUIntrinsic::AMDGPU_imin:
 391       return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
 392                                                   Op.getOperand(2));
 393     case AMDGPUIntrinsic::AMDGPU_umin:
 394       return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
 395                                                   Op.getOperand(2));
 396     case AMDGPUIntrinsic::AMDIL_round_nearest:
 397       return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
 398   }
 399 }
 400
 401 ///IABS(a) = SMAX(sub(0, a), a)
 402 SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
 403     SelectionDAG &DAG) const {
 404
 405   SDLoc DL(Op);
 406   EVT VT = Op.getValueType();
 407   SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
 408                                               Op.getOperand(1));
 409
 410   return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
 411 }
 412
 413 /// Linear Interpolation
 414 /// LRP(a, b, c) = muladd(a,  b, (1 - a) * c)
 415 SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
 416     SelectionDAG &DAG) const {
 417   SDLoc DL(Op);
 418   EVT VT = Op.getValueType();
 419   SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
 420                                 DAG.getConstantFP(1.0f, MVT::f32),
 421                                 Op.getOperand(1));
 422   SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
 423                                                     Op.getOperand(3));
 424   return DAG.getNode(ISD::FADD, DL, VT,
 425       DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)),
 426       OneSubAC);
 427 }
 428
 429 /// \brief Generate Min/Max node
 430 SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op,
 431     SelectionDAG &DAG) const {
 432   SDLoc DL(Op);
 433   EVT VT = Op.getValueType();
 434
 435   SDValue LHS = Op.getOperand(0);
 436   SDValue RHS = Op.getOperand(1);
 437   SDValue True = Op.getOperand(2);
 438   SDValue False = Op.getOperand(3);
 439   SDValue CC = Op.getOperand(4);
 440
 441   if (VT != MVT::f32 ||
 442       !((LHS == True && RHS == False) || (LHS == False && RHS == True))) {
 443     return SDValue();
 444   }
 445
 446   ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
 447   switch (CCOpcode) {
 448   case ISD::SETOEQ:
 449   case ISD::SETONE:
 450   case ISD::SETUNE:
 451   case ISD::SETNE:
 452   case ISD::SETUEQ:
 453   case ISD::SETEQ:
 454   case ISD::SETFALSE:
 455   case ISD::SETFALSE2:
 456   case ISD::SETTRUE:
 457   case ISD::SETTRUE2:
 458   case ISD::SETUO:
 459   case ISD::SETO:
 460     assert(0 && "Operation should already be optimised !");
 461   case ISD::SETULE:
 462   case ISD::SETULT:
 463   case ISD::SETOLE:
 464   case ISD::SETOLT:
 465   case ISD::SETLE:
 466   case ISD::SETLT: {
 467     if (LHS == True)
 468       return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
 469     else
 470       return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
 471   }
 472   case ISD::SETGT:
 473   case ISD::SETGE:
 474   case ISD::SETUGE:
 475   case ISD::SETOGE:
 476   case ISD::SETUGT:
 477   case ISD::SETOGT: {
 478     if (LHS == True)
 479       return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
 480     else
 481       return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
 482   }
 483   case ISD::SETCC_INVALID:
 484     assert(0 && "Invalid setcc condcode !");
 485   }
 486   return Op;
 487 }
 488
 489 SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue &Op,
 490                                               SelectionDAG &DAG) const {
 491   LoadSDNode *Load = dyn_cast<LoadSDNode>(Op);
 492   EVT MemEltVT = Load->getMemoryVT().getVectorElementType();
 493   EVT EltVT = Op.getValueType().getVectorElementType();
 494   EVT PtrVT = Load->getBasePtr().getValueType();
 495   unsigned NumElts = Load->getMemoryVT().getVectorNumElements();
 496   SmallVector<SDValue, 8> Loads;
 497   SDLoc SL(Op);
 498
 499   for (unsigned i = 0, e = NumElts; i != e; ++i) {
 500     SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Load->getBasePtr(),
 501                     DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8), PtrVT));
 502     Loads.push_back(DAG.getExtLoad(Load->getExtensionType(), SL, EltVT,
 503                         Load->getChain(), Ptr,
 504                         MachinePointerInfo(Load->getMemOperand()->getValue()),
 505                         MemEltVT, Load->isVolatile(), Load->isNonTemporal(),
 506                         Load->getAlignment()));
 507   }
 508   return DAG.getNode(ISD::BUILD_VECTOR, SL, Op.getValueType(), &Loads[0],
 509                      Loads.size());
 510 }
 511
 512 SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op,
 513                                                SelectionDAG &DAG) const {
 514   StoreSDNode *Store = dyn_cast<StoreSDNode>(Op);
 515   EVT MemVT = Store->getMemoryVT();
 516   unsigned MemBits = MemVT.getSizeInBits();
 517
 518   // Byte stores are really expensive, so if possible, try to pack
 519   // 32-bit vector truncatating store into an i32 store.
 520   // XXX: We could also handle optimize other vector bitwidths
 521   if (!MemVT.isVector() || MemBits > 32) {
 522     return SDValue();
 523   }
 524
 525   SDLoc DL(Op);
 526   const SDValue &Value = Store->getValue();
 527   EVT VT = Value.getValueType();
 528   const SDValue &Ptr = Store->getBasePtr();
 529   EVT MemEltVT = MemVT.getVectorElementType();
 530   unsigned MemEltBits = MemEltVT.getSizeInBits();
 531   unsigned MemNumElements = MemVT.getVectorNumElements();
 532   EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
 533   SDValue Mask;
 534   switch(MemEltBits) {
 535   case 8:
 536     Mask = DAG.getConstant(0xFF, PackedVT);
 537     break;
 538   case 16:
 539     Mask = DAG.getConstant(0xFFFF, PackedVT);
 540     break;
 541   default:
 542     llvm_unreachable("Cannot lower this vector store");
 543   }
 544   SDValue PackedValue;
 545   for (unsigned i = 0; i < MemNumElements; ++i) {
 546     EVT ElemVT = VT.getVectorElementType();
 547     SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value,
 548                               DAG.getConstant(i, MVT::i32));
 549     Elt = DAG.getZExtOrTrunc(Elt, DL, PackedVT);
 550     Elt = DAG.getNode(ISD::AND, DL, PackedVT, Elt, Mask);
 551     SDValue Shift = DAG.getConstant(MemEltBits * i, PackedVT);
 552     Elt = DAG.getNode(ISD::SHL, DL, PackedVT, Elt, Shift);
 553     if (i == 0) {
 554       PackedValue = Elt;
 555     } else {
 556       PackedValue = DAG.getNode(ISD::OR, DL, PackedVT, PackedValue, Elt);
 557     }
 558   }
 559   return DAG.getStore(Store->getChain(), DL, PackedValue, Ptr,
 560                       MachinePointerInfo(Store->getMemOperand()->getValue()),
 561                       Store->isVolatile(),  Store->isNonTemporal(),
 562                       Store->getAlignment());
 563 }
 564
 565 SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
 566                                             SelectionDAG &DAG) const {
 567   StoreSDNode *Store = cast<StoreSDNode>(Op);
 568   EVT MemEltVT = Store->getMemoryVT().getVectorElementType();
 569   EVT EltVT = Store->getValue().getValueType().getVectorElementType();
 570   EVT PtrVT = Store->getBasePtr().getValueType();
 571   unsigned NumElts = Store->getMemoryVT().getVectorNumElements();
 572   SDLoc SL(Op);
 573
 574   SmallVector<SDValue, 8> Chains;
 575
 576   for (unsigned i = 0, e = NumElts; i != e; ++i) {
 577     SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
 578                               Store->getValue(), DAG.getConstant(i, MVT::i32));
 579     SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT,
 580                               Store->getBasePtr(),
 581                             DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8),
 582                                             PtrVT));
 583     Chains.push_back(DAG.getTruncStore(Store->getChain(), SL, Val, Ptr,
 584                          MachinePointerInfo(Store->getMemOperand()->getValue()),
 585                          MemEltVT, Store->isVolatile(), Store->isNonTemporal(),
 586                          Store->getAlignment()));
 587   }
 588   return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, &Chains[0], NumElts);
 589 }
 590
 591 SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
 592   SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG);
 593   if (Result.getNode()) {
 594     return Result;
 595   }
 596
 597   StoreSDNode *Store = cast<StoreSDNode>(Op);
 598   if ((Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
 599        Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
 600       Store->getValue().getValueType().isVector()) {
 601     return SplitVectorStore(Op, DAG);
 602   }
 603   return SDValue();
 604 }
 605
 606 SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
 607     SelectionDAG &DAG) const {
 608   SDLoc DL(Op);
 609   EVT VT = Op.getValueType();
 610
 611   SDValue Num = Op.getOperand(0);
 612   SDValue Den = Op.getOperand(1);
 613
 614   SmallVector<SDValue, 8> Results;
 615
 616   // RCP =  URECIP(Den) = 2^32 / Den + e
 617   // e is rounding error.
 618   SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
 619
 620   // RCP_LO = umulo(RCP, Den) */
 621   SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
 622
 623   // RCP_HI = mulhu (RCP, Den) */
 624   SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
 625
 626   // NEG_RCP_LO = -RCP_LO
 627   SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
 628                                                      RCP_LO);
 629
 630   // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
 631   SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
 632                                            NEG_RCP_LO, RCP_LO,
 633                                            ISD::SETEQ);
 634   // Calculate the rounding error from the URECIP instruction
 635   // E = mulhu(ABS_RCP_LO, RCP)
 636   SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
 637
 638   // RCP_A_E = RCP + E
 639   SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
 640
 641   // RCP_S_E = RCP - E
 642   SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
 643
 644   // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
 645   SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
 646                                      RCP_A_E, RCP_S_E,
 647                                      ISD::SETEQ);
 648   // Quotient = mulhu(Tmp0, Num)
 649   SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
 650
 651   // Num_S_Remainder = Quotient * Den
 652   SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
 653
 654   // Remainder = Num - Num_S_Remainder
 655   SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
 656
 657   // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
 658   SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
 659                                                  DAG.getConstant(-1, VT),
 660                                                  DAG.getConstant(0, VT),
 661                                                  ISD::SETUGE);
 662   // Remainder_GE_Zero = (Num >= Num_S_Remainder ? -1 : 0)
 663   SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Num,
 664                                                   Num_S_Remainder,
 665                                                   DAG.getConstant(-1, VT),
 666                                                   DAG.getConstant(0, VT),
 667                                                   ISD::SETUGE);
 668   // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
 669   SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
 670                                                Remainder_GE_Zero);
 671
 672   // Calculate Division result:
 673
 674   // Quotient_A_One = Quotient + 1
 675   SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
 676                                                          DAG.getConstant(1, VT));
 677
 678   // Quotient_S_One = Quotient - 1
 679   SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
 680                                                          DAG.getConstant(1, VT));
 681
 682   // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
 683   SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
 684                                      Quotient, Quotient_A_One, ISD::SETEQ);
 685
 686   // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
 687   Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
 688                             Quotient_S_One, Div, ISD::SETEQ);
 689
 690   // Calculate Rem result:
 691
 692   // Remainder_S_Den = Remainder - Den
 693   SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
 694
 695   // Remainder_A_Den = Remainder + Den
 696   SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
 697
 698   // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
 699   SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
 700                                     Remainder, Remainder_S_Den, ISD::SETEQ);
 701
 702   // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
 703   Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
 704                             Remainder_A_Den, Rem, ISD::SETEQ);
 705   SDValue Ops[2];
 706   Ops[0] = Div;
 707   Ops[1] = Rem;
 708   return DAG.getMergeValues(Ops, 2, DL);
 709 }
 710
 711 SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
 712                                                SelectionDAG &DAG) const {
 713   SDValue S0 = Op.getOperand(0);
 714   SDLoc DL(Op);
 715   if (Op.getValueType() != MVT::f32 || S0.getValueType() != MVT::i64)
 716     return SDValue();
 717
 718   // f32 uint_to_fp i64
 719   SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
 720                            DAG.getConstant(0, MVT::i32));
 721   SDValue FloatLo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Lo);
 722   SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
 723                            DAG.getConstant(1, MVT::i32));
 724   SDValue FloatHi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Hi);
 725   FloatHi = DAG.getNode(ISD::FMUL, DL, MVT::f32, FloatHi,
 726                         DAG.getConstantFP(4294967296.0f, MVT::f32)); // 2^32
 727   return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi);
 728
 729 }
 730
 731 //===----------------------------------------------------------------------===//
 732 // Helper functions
 733 //===----------------------------------------------------------------------===//
 734
 735 void AMDGPUTargetLowering::getOriginalFunctionArgs(
 736                                SelectionDAG &DAG,
 737                                const Function *F,
 738                                const SmallVectorImpl<ISD::InputArg> &Ins,
 739                                SmallVectorImpl<ISD::InputArg> &OrigIns) const {
 740
 741   for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
 742     if (Ins[i].ArgVT == Ins[i].VT) {
 743       OrigIns.push_back(Ins[i]);
 744       continue;
 745     }
 746
 747     EVT VT;
 748     if (Ins[i].ArgVT.isVector() && !Ins[i].VT.isVector()) {
 749       // Vector has been split into scalars.
 750       VT = Ins[i].ArgVT.getVectorElementType();
 751     } else if (Ins[i].VT.isVector() && Ins[i].ArgVT.isVector() &&
 752                Ins[i].ArgVT.getVectorElementType() !=
 753                Ins[i].VT.getVectorElementType()) {
 754       // Vector elements have been promoted
 755       VT = Ins[i].ArgVT;
 756     } else {
 757       // Vector has been spilt into smaller vectors.
 758       VT = Ins[i].VT;
 759     }
 760
 761     ISD::InputArg Arg(Ins[i].Flags, VT, VT, Ins[i].Used,
 762                       Ins[i].OrigArgIndex, Ins[i].PartOffset);
 763     OrigIns.push_back(Arg);
 764   }
 765 }
 766
 767 bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const {
 768   if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
 769     return CFP->isExactlyValue(1.0);
 770   }
 771   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
 772     return C->isAllOnesValue();
 773   }
 774   return false;
 775 }
 776
 777 bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const {
 778   if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
 779     return CFP->getValueAPF().isZero();
 780   }
 781   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
 782     return C->isNullValue();
 783   }
 784   return false;
 785 }
 786
 787 SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
 788                                                   const TargetRegisterClass *RC,
 789                                                    unsigned Reg, EVT VT) const {
 790   MachineFunction &MF = DAG.getMachineFunction();
 791   MachineRegisterInfo &MRI = MF.getRegInfo();
 792   unsigned VirtualRegister;
 793   if (!MRI.isLiveIn(Reg)) {
 794     VirtualRegister = MRI.createVirtualRegister(RC);
 795     MRI.addLiveIn(Reg, VirtualRegister);
 796   } else {
 797     VirtualRegister = MRI.getLiveInVirtReg(Reg);
 798   }
 799   return DAG.getRegister(VirtualRegister, VT);
 800 }
 801
 802 #define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
 803
 804 const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
 805   switch (Opcode) {
 806   default: return 0;
 807   // AMDIL DAG nodes
 808   NODE_NAME_CASE(CALL);
 809   NODE_NAME_CASE(UMUL);
 810   NODE_NAME_CASE(DIV_INF);
 811   NODE_NAME_CASE(RET_FLAG);
 812   NODE_NAME_CASE(BRANCH_COND);
 813
 814   // AMDGPU DAG nodes
 815   NODE_NAME_CASE(DWORDADDR)
 816   NODE_NAME_CASE(FRACT)
 817   NODE_NAME_CASE(FMAX)
 818   NODE_NAME_CASE(SMAX)
 819   NODE_NAME_CASE(UMAX)
 820   NODE_NAME_CASE(FMIN)
 821   NODE_NAME_CASE(SMIN)
 822   NODE_NAME_CASE(UMIN)
 823   NODE_NAME_CASE(URECIP)
 824   NODE_NAME_CASE(EXPORT)
 825   NODE_NAME_CASE(CONST_ADDRESS)
 826   NODE_NAME_CASE(REGISTER_LOAD)
 827   NODE_NAME_CASE(REGISTER_STORE)
 828   NODE_NAME_CASE(LOAD_CONSTANT)
 829   NODE_NAME_CASE(LOAD_INPUT)
 830   NODE_NAME_CASE(SAMPLE)
 831   NODE_NAME_CASE(SAMPLEB)
 832   NODE_NAME_CASE(SAMPLED)
 833   NODE_NAME_CASE(SAMPLEL)
 834   NODE_NAME_CASE(STORE_MSKOR)
 835   NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
 836   }
 837 }