contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp

   1 //===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 /// \file
  11 /// \brief Implementation of the TargetInstrInfo class that is common to all
  12 /// AMD GPUs.
  13 //
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "AMDGPUInstrInfo.h"
  17 #include "AMDGPURegisterInfo.h"
  18 #include "AMDGPUTargetMachine.h"
  19 #include "llvm/CodeGen/MachineFrameInfo.h"
  20 #include "llvm/CodeGen/MachineInstrBuilder.h"
  21 #include "llvm/CodeGen/MachineRegisterInfo.h"
  22
  23 using namespace llvm;
  24
  25 #define GET_INSTRINFO_CTOR_DTOR
  26 #define GET_INSTRMAP_INFO
  27 #include "AMDGPUGenInstrInfo.inc"
  28
  29 // Pin the vtable to this file.
  30 void AMDGPUInstrInfo::anchor() {}
  31
  32 AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST)
  33   : AMDGPUGenInstrInfo(-1, -1), ST(ST) {}
  34
  35 // FIXME: This behaves strangely. If, for example, you have 32 load + stores,
  36 // the first 16 loads will be interleaved with the stores, and the next 16 will
  37 // be clustered as expected. It should really split into 2 16 store batches.
  38 //
  39 // Loads are clustered until this returns false, rather than trying to schedule
  40 // groups of stores. This also means we have to deal with saying different
  41 // address space loads should be clustered, and ones which might cause bank
  42 // conflicts.
  43 //
  44 // This might be deprecated so it might not be worth that much effort to fix.
  45 bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
  46                                               int64_t Offset0, int64_t Offset1,
  47                                               unsigned NumLoads) const {
  48   assert(Offset1 > Offset0 &&
  49          "Second offset should be larger than first offset!");
  50   // If we have less than 16 loads in a row, and the offsets are within 64
  51   // bytes, then schedule together.
  52
  53   // A cacheline is 64 bytes (for global memory).
  54   return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
  55 }
  56
  57 int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const {
  58   switch (Channels) {
  59   default: return Opcode;
  60   case 1: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_1);
  61   case 2: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_2);
  62   case 3: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_3);
  63   }
  64 }
  65
  66 // This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
  67 enum SIEncodingFamily {
  68   SI = 0,
  69   VI = 1
  70 };
  71
  72 // Wrapper for Tablegen'd function.  enum Subtarget is not defined in any
  73 // header files, so we need to wrap it in a function that takes unsigned
  74 // instead.
  75 namespace llvm {
  76 namespace AMDGPU {
  77 static int getMCOpcode(uint16_t Opcode, unsigned Gen) {
  78   return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
  79 }
  80 }
  81 }
  82
  83 static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) {
  84   switch (ST.getGeneration()) {
  85   case AMDGPUSubtarget::SOUTHERN_ISLANDS:
  86   case AMDGPUSubtarget::SEA_ISLANDS:
  87     return SIEncodingFamily::SI;
  88   case AMDGPUSubtarget::VOLCANIC_ISLANDS:
  89     return SIEncodingFamily::VI;
  90
  91   // FIXME: This should never be called for r600 GPUs.
  92   case AMDGPUSubtarget::R600:
  93   case AMDGPUSubtarget::R700:
  94   case AMDGPUSubtarget::EVERGREEN:
  95   case AMDGPUSubtarget::NORTHERN_ISLANDS:
  96     return SIEncodingFamily::SI;
  97   }
  98
  99   llvm_unreachable("Unknown subtarget generation!");
 100 }
 101
 102 int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
 103   int MCOp = AMDGPU::getMCOpcode(Opcode, subtargetEncodingFamily(ST));
 104
 105   // -1 means that Opcode is already a native instruction.
 106   if (MCOp == -1)
 107     return Opcode;
 108
 109   // (uint16_t)-1 means that Opcode is a pseudo instruction that has
 110   // no encoding in the given subtarget generation.
 111   if (MCOp == (uint16_t)-1)
 112     return -1;
 113
 114   return MCOp;
 115 }