1 //===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// \brief Implementation of the TargetInstrInfo class that is common to all
14 //===----------------------------------------------------------------------===//
16 #include "AMDGPUInstrInfo.h"
17 #include "AMDGPURegisterInfo.h"
18 #include "AMDGPUTargetMachine.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineInstrBuilder.h"
21 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #define GET_INSTRINFO_CTOR_DTOR
26 #define GET_INSTRMAP_INFO
27 #include "AMDGPUGenInstrInfo.inc"
29 // Pin the vtable to this file.
30 void AMDGPUInstrInfo::anchor() {}
32 AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST)
33 : AMDGPUGenInstrInfo(-1, -1), ST(ST) {}
35 // FIXME: This behaves strangely. If, for example, you have 32 load + stores,
36 // the first 16 loads will be interleaved with the stores, and the next 16 will
37 // be clustered as expected. It should really split into 2 16 store batches.
39 // Loads are clustered until this returns false, rather than trying to schedule
40 // groups of stores. This also means we have to deal with saying different
41 // address space loads should be clustered, and ones which might cause bank
44 // This might be deprecated so it might not be worth that much effort to fix.
45 bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
46 int64_t Offset0, int64_t Offset1,
47 unsigned NumLoads) const {
48 assert(Offset1 > Offset0 &&
49 "Second offset should be larger than first offset!");
50 // If we have less than 16 loads in a row, and the offsets are within 64
51 // bytes, then schedule together.
53 // A cacheline is 64 bytes (for global memory).
54 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
57 int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const {
59 default: return Opcode;
60 case 1: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_1);
61 case 2: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_2);
62 case 3: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_3);
66 // This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
67 enum SIEncodingFamily {
72 // Wrapper for Tablegen'd function. enum Subtarget is not defined in any
73 // header files, so we need to wrap it in a function that takes unsigned
77 static int getMCOpcode(uint16_t Opcode, unsigned Gen) {
78 return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
83 static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) {
84 switch (ST.getGeneration()) {
85 case AMDGPUSubtarget::SOUTHERN_ISLANDS:
86 case AMDGPUSubtarget::SEA_ISLANDS:
87 return SIEncodingFamily::SI;
88 case AMDGPUSubtarget::VOLCANIC_ISLANDS:
89 return SIEncodingFamily::VI;
91 // FIXME: This should never be called for r600 GPUs.
92 case AMDGPUSubtarget::R600:
93 case AMDGPUSubtarget::R700:
94 case AMDGPUSubtarget::EVERGREEN:
95 case AMDGPUSubtarget::NORTHERN_ISLANDS:
96 return SIEncodingFamily::SI;
99 llvm_unreachable("Unknown subtarget generation!");
102 int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
103 int MCOp = AMDGPU::getMCOpcode(Opcode, subtargetEncodingFamily(ST));
105 // -1 means that Opcode is already a native instruction.
109 // (uint16_t)-1 means that Opcode is a pseudo instruction that has
110 // no encoding in the given subtarget generation.
111 if (MCOp == (uint16_t)-1)