contrib/llvm/lib/Target/R600/AMDILISelLowering.cpp

   1 //===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //==-----------------------------------------------------------------------===//
   9 //
  10 /// \file
  11 /// \brief TargetLowering functions borrowed from AMDIL.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "AMDGPUISelLowering.h"
  16 #include "AMDGPURegisterInfo.h"
  17 #include "AMDGPUSubtarget.h"
  18 #include "AMDILIntrinsicInfo.h"
  19 #include "llvm/CodeGen/MachineFrameInfo.h"
  20 #include "llvm/CodeGen/MachineRegisterInfo.h"
  21 #include "llvm/CodeGen/PseudoSourceValue.h"
  22 #include "llvm/CodeGen/SelectionDAG.h"
  23 #include "llvm/CodeGen/SelectionDAGNodes.h"
  24 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
  25 #include "llvm/IR/CallingConv.h"
  26 #include "llvm/IR/DerivedTypes.h"
  27 #include "llvm/IR/Instructions.h"
  28 #include "llvm/IR/Intrinsics.h"
  29 #include "llvm/Support/raw_ostream.h"
  30 #include "llvm/Target/TargetInstrInfo.h"
  31 #include "llvm/Target/TargetOptions.h"
  32
  33 using namespace llvm;
  34 //===----------------------------------------------------------------------===//
  35 // TargetLowering Implementation Help Functions End
  36 //===----------------------------------------------------------------------===//
  37
  38 //===----------------------------------------------------------------------===//
  39 // TargetLowering Class Implementation Begins
  40 //===----------------------------------------------------------------------===//
  41 void AMDGPUTargetLowering::InitAMDILLowering() {
  42   static const int types[] = {
  43     (int)MVT::i8,
  44     (int)MVT::i16,
  45     (int)MVT::i32,
  46     (int)MVT::f32,
  47     (int)MVT::f64,
  48     (int)MVT::i64,
  49     (int)MVT::v2i8,
  50     (int)MVT::v4i8,
  51     (int)MVT::v2i16,
  52     (int)MVT::v4i16,
  53     (int)MVT::v4f32,
  54     (int)MVT::v4i32,
  55     (int)MVT::v2f32,
  56     (int)MVT::v2i32,
  57     (int)MVT::v2f64,
  58     (int)MVT::v2i64
  59   };
  60
  61   static const int IntTypes[] = {
  62     (int)MVT::i8,
  63     (int)MVT::i16,
  64     (int)MVT::i32,
  65     (int)MVT::i64
  66   };
  67
  68   static const int FloatTypes[] = {
  69     (int)MVT::f32,
  70     (int)MVT::f64
  71   };
  72
  73   static const int VectorTypes[] = {
  74     (int)MVT::v2i8,
  75     (int)MVT::v4i8,
  76     (int)MVT::v2i16,
  77     (int)MVT::v4i16,
  78     (int)MVT::v4f32,
  79     (int)MVT::v4i32,
  80     (int)MVT::v2f32,
  81     (int)MVT::v2i32,
  82     (int)MVT::v2f64,
  83     (int)MVT::v2i64
  84   };
  85   const size_t NumTypes = array_lengthof(types);
  86   const size_t NumFloatTypes = array_lengthof(FloatTypes);
  87   const size_t NumIntTypes = array_lengthof(IntTypes);
  88   const size_t NumVectorTypes = array_lengthof(VectorTypes);
  89
  90   const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
  91   // These are the current register classes that are
  92   // supported
  93
  94   for (unsigned int x  = 0; x < NumTypes; ++x) {
  95     MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
  96
  97     //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
  98     // We cannot sextinreg, expand to shifts
  99     setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
 100     setOperationAction(ISD::SUBE, VT, Expand);
 101     setOperationAction(ISD::SUBC, VT, Expand);
 102     setOperationAction(ISD::ADDE, VT, Expand);
 103     setOperationAction(ISD::ADDC, VT, Expand);
 104     setOperationAction(ISD::BRCOND, VT, Custom);
 105     setOperationAction(ISD::BR_JT, VT, Expand);
 106     setOperationAction(ISD::BRIND, VT, Expand);
 107     // TODO: Implement custom UREM/SREM routines
 108     setOperationAction(ISD::SREM, VT, Expand);
 109     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 110     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
 111     if (VT != MVT::i64 && VT != MVT::v2i64) {
 112       setOperationAction(ISD::SDIV, VT, Custom);
 113     }
 114   }
 115   for (unsigned int x = 0; x < NumFloatTypes; ++x) {
 116     MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
 117
 118     // IL does not have these operations for floating point types
 119     setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
 120     setOperationAction(ISD::SETOLT, VT, Expand);
 121     setOperationAction(ISD::SETOGE, VT, Expand);
 122     setOperationAction(ISD::SETOGT, VT, Expand);
 123     setOperationAction(ISD::SETOLE, VT, Expand);
 124     setOperationAction(ISD::SETULT, VT, Expand);
 125     setOperationAction(ISD::SETUGE, VT, Expand);
 126     setOperationAction(ISD::SETUGT, VT, Expand);
 127     setOperationAction(ISD::SETULE, VT, Expand);
 128   }
 129
 130   for (unsigned int x = 0; x < NumIntTypes; ++x) {
 131     MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
 132
 133     // GPU also does not have divrem function for signed or unsigned
 134     setOperationAction(ISD::SDIVREM, VT, Expand);
 135
 136     // GPU does not have [S|U]MUL_LOHI functions as a single instruction
 137     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 138     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
 139
 140     setOperationAction(ISD::BSWAP, VT, Expand);
 141
 142     // GPU doesn't have any counting operators
 143     setOperationAction(ISD::CTPOP, VT, Expand);
 144     setOperationAction(ISD::CTTZ, VT, Expand);
 145     setOperationAction(ISD::CTLZ, VT, Expand);
 146   }
 147
 148   for (unsigned int ii = 0; ii < NumVectorTypes; ++ii) {
 149     MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
 150
 151     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
 152     setOperationAction(ISD::SDIVREM, VT, Expand);
 153     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 154     // setOperationAction(ISD::VSETCC, VT, Expand);
 155     setOperationAction(ISD::SELECT_CC, VT, Expand);
 156
 157   }
 158   setOperationAction(ISD::MULHU, MVT::i64, Expand);
 159   setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
 160   setOperationAction(ISD::MULHS, MVT::i64, Expand);
 161   setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
 162   setOperationAction(ISD::ADD, MVT::v2i64, Expand);
 163   setOperationAction(ISD::SREM, MVT::v2i64, Expand);
 164   setOperationAction(ISD::Constant          , MVT::i64  , Legal);
 165   setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
 166   setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
 167   setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
 168   setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
 169   setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
 170   if (STM.hasHWFP64()) {
 171     // we support loading/storing v2f64 but not operations on the type
 172     setOperationAction(ISD::FADD, MVT::v2f64, Expand);
 173     setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
 174     setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
 175     setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
 176     setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
 177     setOperationAction(ISD::ConstantFP        , MVT::f64  , Legal);
 178     // We want to expand vector conversions into their scalar
 179     // counterparts.
 180     setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
 181     setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
 182     setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
 183     setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
 184     setOperationAction(ISD::FABS, MVT::f64, Expand);
 185     setOperationAction(ISD::FABS, MVT::v2f64, Expand);
 186   }
 187   // TODO: Fix the UDIV24 algorithm so it works for these
 188   // types correctly. This needs vector comparisons
 189   // for this to work correctly.
 190   setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
 191   setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
 192   setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
 193   setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
 194   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
 195   setOperationAction(ISD::SUBC, MVT::Other, Expand);
 196   setOperationAction(ISD::ADDE, MVT::Other, Expand);
 197   setOperationAction(ISD::ADDC, MVT::Other, Expand);
 198   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
 199   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
 200   setOperationAction(ISD::BRIND, MVT::Other, Expand);
 201   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
 202
 203
 204   // Use the default implementation.
 205   setOperationAction(ISD::ConstantFP        , MVT::f32    , Legal);
 206   setOperationAction(ISD::Constant          , MVT::i32    , Legal);
 207
 208   setSchedulingPreference(Sched::RegPressure);
 209   setPow2DivIsCheap(false);
 210   setSelectIsExpensive(true);
 211   setJumpIsExpensive(true);
 212
 213   MaxStoresPerMemcpy  = 4096;
 214   MaxStoresPerMemmove = 4096;
 215   MaxStoresPerMemset  = 4096;
 216
 217 }
 218
 219 bool
 220 AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
 221     const CallInst &I, unsigned Intrinsic) const {
 222   return false;
 223 }
 224
 225 // The backend supports 32 and 64 bit floating point immediates
 226 bool
 227 AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
 228   if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
 229       || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
 230     return true;
 231   } else {
 232     return false;
 233   }
 234 }
 235
 236 bool
 237 AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
 238   if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
 239       || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
 240     return false;
 241   } else {
 242     return true;
 243   }
 244 }
 245
 246
 247 // isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
 248 // be zero. Op is expected to be a target specific node. Used by DAG
 249 // combiner.
 250
 251 void
 252 AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
 253     const SDValue Op,
 254     APInt &KnownZero,
 255     APInt &KnownOne,
 256     const SelectionDAG &DAG,
 257     unsigned Depth) const {
 258   APInt KnownZero2;
 259   APInt KnownOne2;
 260   KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
 261   switch (Op.getOpcode()) {
 262     default: break;
 263     case ISD::SELECT_CC:
 264              DAG.ComputeMaskedBits(
 265                  Op.getOperand(1),
 266                  KnownZero,
 267                  KnownOne,
 268                  Depth + 1
 269                  );
 270              DAG.ComputeMaskedBits(
 271                  Op.getOperand(0),
 272                  KnownZero2,
 273                  KnownOne2
 274                  );
 275              assert((KnownZero & KnownOne) == 0
 276                  && "Bits known to be one AND zero?");
 277              assert((KnownZero2 & KnownOne2) == 0
 278                  && "Bits known to be one AND zero?");
 279              // Only known if known in both the LHS and RHS
 280              KnownOne &= KnownOne2;
 281              KnownZero &= KnownZero2;
 282              break;
 283   };
 284 }
 285
 286 //===----------------------------------------------------------------------===//
 287 //                           Other Lowering Hooks
 288 //===----------------------------------------------------------------------===//
 289
 290 SDValue
 291 AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
 292   EVT OVT = Op.getValueType();
 293   SDValue DST;
 294   if (OVT.getScalarType() == MVT::i64) {
 295     DST = LowerSDIV64(Op, DAG);
 296   } else if (OVT.getScalarType() == MVT::i32) {
 297     DST = LowerSDIV32(Op, DAG);
 298   } else if (OVT.getScalarType() == MVT::i16
 299       || OVT.getScalarType() == MVT::i8) {
 300     DST = LowerSDIV24(Op, DAG);
 301   } else {
 302     DST = SDValue(Op.getNode(), 0);
 303   }
 304   return DST;
 305 }
 306
 307 SDValue
 308 AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const {
 309   EVT OVT = Op.getValueType();
 310   SDValue DST;
 311   if (OVT.getScalarType() == MVT::i64) {
 312     DST = LowerSREM64(Op, DAG);
 313   } else if (OVT.getScalarType() == MVT::i32) {
 314     DST = LowerSREM32(Op, DAG);
 315   } else if (OVT.getScalarType() == MVT::i16) {
 316     DST = LowerSREM16(Op, DAG);
 317   } else if (OVT.getScalarType() == MVT::i8) {
 318     DST = LowerSREM8(Op, DAG);
 319   } else {
 320     DST = SDValue(Op.getNode(), 0);
 321   }
 322   return DST;
 323 }
 324
 325 SDValue
 326 AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const {
 327   SDValue Data = Op.getOperand(0);
 328   VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
 329   SDLoc DL(Op);
 330   EVT DVT = Data.getValueType();
 331   EVT BVT = BaseType->getVT();
 332   unsigned baseBits = BVT.getScalarType().getSizeInBits();
 333   unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
 334   unsigned shiftBits = srcBits - baseBits;
 335   if (srcBits < 32) {
 336     // If the op is less than 32 bits, then it needs to extend to 32bits
 337     // so it can properly keep the upper bits valid.
 338     EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
 339     Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
 340     shiftBits = 32 - baseBits;
 341     DVT = IVT;
 342   }
 343   SDValue Shift = DAG.getConstant(shiftBits, DVT);
 344   // Shift left by 'Shift' bits.
 345   Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
 346   // Signed shift Right by 'Shift' bits.
 347   Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
 348   if (srcBits < 32) {
 349     // Once the sign extension is done, the op needs to be converted to
 350     // its original type.
 351     Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
 352   }
 353   return Data;
 354 }
 355 EVT
 356 AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const {
 357   int iSize = (size * numEle);
 358   int vEle = (iSize >> ((size == 64) ? 6 : 5));
 359   if (!vEle) {
 360     vEle = 1;
 361   }
 362   if (size == 64) {
 363     if (vEle == 1) {
 364       return EVT(MVT::i64);
 365     } else {
 366       return EVT(MVT::getVectorVT(MVT::i64, vEle));
 367     }
 368   } else {
 369     if (vEle == 1) {
 370       return EVT(MVT::i32);
 371     } else {
 372       return EVT(MVT::getVectorVT(MVT::i32, vEle));
 373     }
 374   }
 375 }
 376
 377 SDValue
 378 AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
 379   SDValue Chain = Op.getOperand(0);
 380   SDValue Cond  = Op.getOperand(1);
 381   SDValue Jump  = Op.getOperand(2);
 382   SDValue Result;
 383   Result = DAG.getNode(
 384       AMDGPUISD::BRANCH_COND,
 385       SDLoc(Op),
 386       Op.getValueType(),
 387       Chain, Jump, Cond);
 388   return Result;
 389 }
 390
 391 SDValue
 392 AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {
 393   SDLoc DL(Op);
 394   EVT OVT = Op.getValueType();
 395   SDValue LHS = Op.getOperand(0);
 396   SDValue RHS = Op.getOperand(1);
 397   MVT INTTY;
 398   MVT FLTTY;
 399   if (!OVT.isVector()) {
 400     INTTY = MVT::i32;
 401     FLTTY = MVT::f32;
 402   } else if (OVT.getVectorNumElements() == 2) {
 403     INTTY = MVT::v2i32;
 404     FLTTY = MVT::v2f32;
 405   } else if (OVT.getVectorNumElements() == 4) {
 406     INTTY = MVT::v4i32;
 407     FLTTY = MVT::v4f32;
 408   }
 409   unsigned bitsize = OVT.getScalarType().getSizeInBits();
 410   // char|short jq = ia ^ ib;
 411   SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
 412
 413   // jq = jq >> (bitsize - 2)
 414   jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
 415
 416   // jq = jq | 0x1
 417   jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
 418
 419   // jq = (int)jq
 420   jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
 421
 422   // int ia = (int)LHS;
 423   SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
 424
 425   // int ib, (int)RHS;
 426   SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
 427
 428   // float fa = (float)ia;
 429   SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
 430
 431   // float fb = (float)ib;
 432   SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
 433
 434   // float fq = native_divide(fa, fb);
 435   SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
 436
 437   // fq = trunc(fq);
 438   fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
 439
 440   // float fqneg = -fq;
 441   SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
 442
 443   // float fr = mad(fqneg, fb, fa);
 444   SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY,
 445       DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa);
 446
 447   // int iq = (int)fq;
 448   SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
 449
 450   // fr = fabs(fr);
 451   fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
 452
 453   // fb = fabs(fb);
 454   fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
 455
 456   // int cv = fr >= fb;
 457   SDValue cv;
 458   if (INTTY == MVT::i32) {
 459     cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
 460   } else {
 461     cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
 462   }
 463   // jq = (cv ? jq : 0);
 464   jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
 465       DAG.getConstant(0, OVT));
 466   // dst = iq + jq;
 467   iq = DAG.getSExtOrTrunc(iq, DL, OVT);
 468   iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
 469   return iq;
 470 }
 471
 472 SDValue
 473 AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const {
 474   SDLoc DL(Op);
 475   EVT OVT = Op.getValueType();
 476   SDValue LHS = Op.getOperand(0);
 477   SDValue RHS = Op.getOperand(1);
 478   // The LowerSDIV32 function generates equivalent to the following IL.
 479   // mov r0, LHS
 480   // mov r1, RHS
 481   // ilt r10, r0, 0
 482   // ilt r11, r1, 0
 483   // iadd r0, r0, r10
 484   // iadd r1, r1, r11
 485   // ixor r0, r0, r10
 486   // ixor r1, r1, r11
 487   // udiv r0, r0, r1
 488   // ixor r10, r10, r11
 489   // iadd r0, r0, r10
 490   // ixor DST, r0, r10
 491
 492   // mov r0, LHS
 493   SDValue r0 = LHS;
 494
 495   // mov r1, RHS
 496   SDValue r1 = RHS;
 497
 498   // ilt r10, r0, 0
 499   SDValue r10 = DAG.getSelectCC(DL,
 500       r0, DAG.getConstant(0, OVT),
 501       DAG.getConstant(-1, MVT::i32),
 502       DAG.getConstant(0, MVT::i32),
 503       ISD::SETLT);
 504
 505   // ilt r11, r1, 0
 506   SDValue r11 = DAG.getSelectCC(DL,
 507       r1, DAG.getConstant(0, OVT),
 508       DAG.getConstant(-1, MVT::i32),
 509       DAG.getConstant(0, MVT::i32),
 510       ISD::SETLT);
 511
 512   // iadd r0, r0, r10
 513   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
 514
 515   // iadd r1, r1, r11
 516   r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
 517
 518   // ixor r0, r0, r10
 519   r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
 520
 521   // ixor r1, r1, r11
 522   r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
 523
 524   // udiv r0, r0, r1
 525   r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
 526
 527   // ixor r10, r10, r11
 528   r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
 529
 530   // iadd r0, r0, r10
 531   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
 532
 533   // ixor DST, r0, r10
 534   SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
 535   return DST;
 536 }
 537
 538 SDValue
 539 AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const {
 540   return SDValue(Op.getNode(), 0);
 541 }
 542
 543 SDValue
 544 AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const {
 545   SDLoc DL(Op);
 546   EVT OVT = Op.getValueType();
 547   MVT INTTY = MVT::i32;
 548   if (OVT == MVT::v2i8) {
 549     INTTY = MVT::v2i32;
 550   } else if (OVT == MVT::v4i8) {
 551     INTTY = MVT::v4i32;
 552   }
 553   SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
 554   SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
 555   LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
 556   LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
 557   return LHS;
 558 }
 559
 560 SDValue
 561 AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const {
 562   SDLoc DL(Op);
 563   EVT OVT = Op.getValueType();
 564   MVT INTTY = MVT::i32;
 565   if (OVT == MVT::v2i16) {
 566     INTTY = MVT::v2i32;
 567   } else if (OVT == MVT::v4i16) {
 568     INTTY = MVT::v4i32;
 569   }
 570   SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
 571   SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
 572   LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
 573   LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
 574   return LHS;
 575 }
 576
 577 SDValue
 578 AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const {
 579   SDLoc DL(Op);
 580   EVT OVT = Op.getValueType();
 581   SDValue LHS = Op.getOperand(0);
 582   SDValue RHS = Op.getOperand(1);
 583   // The LowerSREM32 function generates equivalent to the following IL.
 584   // mov r0, LHS
 585   // mov r1, RHS
 586   // ilt r10, r0, 0
 587   // ilt r11, r1, 0
 588   // iadd r0, r0, r10
 589   // iadd r1, r1, r11
 590   // ixor r0, r0, r10
 591   // ixor r1, r1, r11
 592   // udiv r20, r0, r1
 593   // umul r20, r20, r1
 594   // sub r0, r0, r20
 595   // iadd r0, r0, r10
 596   // ixor DST, r0, r10
 597
 598   // mov r0, LHS
 599   SDValue r0 = LHS;
 600
 601   // mov r1, RHS
 602   SDValue r1 = RHS;
 603
 604   // ilt r10, r0, 0
 605   SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
 606
 607   // ilt r11, r1, 0
 608   SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
 609
 610   // iadd r0, r0, r10
 611   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
 612
 613   // iadd r1, r1, r11
 614   r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
 615
 616   // ixor r0, r0, r10
 617   r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
 618
 619   // ixor r1, r1, r11
 620   r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
 621
 622   // udiv r20, r0, r1
 623   SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
 624
 625   // umul r20, r20, r1
 626   r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
 627
 628   // sub r0, r0, r20
 629   r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
 630
 631   // iadd r0, r0, r10
 632   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
 633
 634   // ixor DST, r0, r10
 635   SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
 636   return DST;
 637 }
 638
 639 SDValue
 640 AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const {
 641   return SDValue(Op.getNode(), 0);
 642 }