contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.td

   1 //===-- SIRegisterInfo.td - SI Register defs ---------------*- tablegen -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9
  10 //===----------------------------------------------------------------------===//
  11 //  Declarations that describe the SI registers
  12 //===----------------------------------------------------------------------===//
  13 class SIReg <string n, bits<16> regIdx = 0> : Register<n>,
  14   DwarfRegNum<[!cast<int>(HWEncoding)]> {
  15   let Namespace = "AMDGPU";
  16
  17   // This is the not yet the complete register encoding. An additional
  18   // bit is set for VGPRs.
  19   let HWEncoding = regIdx;
  20 }
  21
  22 // Special Registers
  23 def VCC_LO : SIReg<"vcc_lo", 106>;
  24 def VCC_HI : SIReg<"vcc_hi", 107>;
  25
  26 // VCC for 64-bit instructions
  27 def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]>,
  28           DwarfRegAlias<VCC_LO> {
  29   let Namespace = "AMDGPU";
  30   let SubRegIndices = [sub0, sub1];
  31   let HWEncoding = 106;
  32 }
  33
  34 def EXEC_LO : SIReg<"exec_lo", 126>;
  35 def EXEC_HI : SIReg<"exec_hi", 127>;
  36
  37 def EXEC : RegisterWithSubRegs<"EXEC", [EXEC_LO, EXEC_HI]>,
  38            DwarfRegAlias<EXEC_LO> {
  39   let Namespace = "AMDGPU";
  40   let SubRegIndices = [sub0, sub1];
  41   let HWEncoding = 126;
  42 }
  43
  44 def SCC : SIReg<"scc", 253>;
  45 def M0 : SIReg <"m0", 124>;
  46
  47 // Trap handler registers
  48 def TBA_LO : SIReg<"tba_lo", 108>;
  49 def TBA_HI : SIReg<"tba_hi", 109>;
  50
  51 def TBA : RegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]>,
  52           DwarfRegAlias<TBA_LO> {
  53   let Namespace = "AMDGPU";
  54   let SubRegIndices = [sub0, sub1];
  55   let HWEncoding = 108;
  56 }
  57
  58 def TMA_LO : SIReg<"tma_lo", 110>;
  59 def TMA_HI : SIReg<"tma_hi", 111>;
  60
  61 def TMA : RegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]>,
  62           DwarfRegAlias<TMA_LO> {
  63   let Namespace = "AMDGPU";
  64   let SubRegIndices = [sub0, sub1];
  65   let HWEncoding = 110;
  66 }
  67
  68 def TTMP0 : SIReg <"ttmp0", 112>;
  69 def TTMP1 : SIReg <"ttmp1", 113>;
  70 def TTMP2 : SIReg <"ttmp2", 114>;
  71 def TTMP3 : SIReg <"ttmp3", 115>;
  72 def TTMP4 : SIReg <"ttmp4", 116>;
  73 def TTMP5 : SIReg <"ttmp5", 117>;
  74 def TTMP6 : SIReg <"ttmp6", 118>;
  75 def TTMP7 : SIReg <"ttmp7", 119>;
  76 def TTMP8 : SIReg <"ttmp8", 120>;
  77 def TTMP9 : SIReg <"ttmp9", 121>;
  78 def TTMP10 : SIReg <"ttmp10", 122>;
  79 def TTMP11 : SIReg <"ttmp11", 123>;
  80
  81 multiclass FLAT_SCR_LOHI_m <string n, bits<16> ci_e, bits<16> vi_e> {
  82   def _ci : SIReg<n, ci_e>;
  83   def _vi : SIReg<n, vi_e>;
  84   def "" : SIReg<"", 0>;
  85 }
  86
  87 class FlatReg <Register lo, Register hi, bits<16> encoding> :
  88     RegisterWithSubRegs<"flat_scratch", [lo, hi]>,
  89     DwarfRegAlias<lo> {
  90   let Namespace = "AMDGPU";
  91   let SubRegIndices = [sub0, sub1];
  92   let HWEncoding = encoding;
  93 }
  94
  95 defm FLAT_SCR_LO : FLAT_SCR_LOHI_m<"flat_scratch_lo", 104, 102>; // Offset in units of 256-bytes.
  96 defm FLAT_SCR_HI : FLAT_SCR_LOHI_m<"flat_scratch_hi", 105, 103>; // Size is the per-thread scratch size, in bytes.
  97
  98 def FLAT_SCR_ci : FlatReg<FLAT_SCR_LO_ci, FLAT_SCR_HI_ci, 104>;
  99 def FLAT_SCR_vi : FlatReg<FLAT_SCR_LO_vi, FLAT_SCR_HI_vi, 102>;
 100 def FLAT_SCR : FlatReg<FLAT_SCR_LO, FLAT_SCR_HI, 0>;
 101
 102 // SGPR registers
 103 foreach Index = 0-103 in {
 104   def SGPR#Index : SIReg <"SGPR"#Index, Index>;
 105 }
 106
 107 // VGPR registers
 108 foreach Index = 0-255 in {
 109   def VGPR#Index : SIReg <"VGPR"#Index, Index> {
 110     let HWEncoding{8} = 1;
 111   }
 112 }
 113
 114 //===----------------------------------------------------------------------===//
 115 //  Groupings using register classes and tuples
 116 //===----------------------------------------------------------------------===//
 117
 118 def SCC_CLASS : RegisterClass<"AMDGPU", [i1], 1, (add SCC)> {
 119   let CopyCost = -1;
 120   let isAllocatable = 0;
 121 }
 122
 123 // TODO: Do we need to set DwarfRegAlias on register tuples?
 124
 125 // SGPR 32-bit registers
 126 def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
 127                             (add (sequence "SGPR%u", 0, 103))> {
 128   let AllocationPriority = 1;
 129 }
 130
 131 // SGPR 64-bit registers
 132 def SGPR_64Regs : RegisterTuples<[sub0, sub1],
 133                              [(add (decimate SGPR_32, 2)),
 134                               (add (decimate (shl SGPR_32, 1), 2))]>;
 135
 136 // SGPR 128-bit registers
 137 def SGPR_128Regs : RegisterTuples<[sub0, sub1, sub2, sub3],
 138                               [(add (decimate SGPR_32, 4)),
 139                                (add (decimate (shl SGPR_32, 1), 4)),
 140                                (add (decimate (shl SGPR_32, 2), 4)),
 141                                (add (decimate (shl SGPR_32, 3), 4))]>;
 142
 143 // SGPR 256-bit registers
 144 def SGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],
 145                               [(add (decimate SGPR_32, 4)),
 146                                (add (decimate (shl SGPR_32, 1), 4)),
 147                                (add (decimate (shl SGPR_32, 2), 4)),
 148                                (add (decimate (shl SGPR_32, 3), 4)),
 149                                (add (decimate (shl SGPR_32, 4), 4)),
 150                                (add (decimate (shl SGPR_32, 5), 4)),
 151                                (add (decimate (shl SGPR_32, 6), 4)),
 152                                (add (decimate (shl SGPR_32, 7), 4))]>;
 153
 154 // SGPR 512-bit registers
 155 def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
 156                                sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15],
 157                               [(add (decimate SGPR_32, 4)),
 158                                (add (decimate (shl SGPR_32, 1), 4)),
 159                                (add (decimate (shl SGPR_32, 2), 4)),
 160                                (add (decimate (shl SGPR_32, 3), 4)),
 161                                (add (decimate (shl SGPR_32, 4), 4)),
 162                                (add (decimate (shl SGPR_32, 5), 4)),
 163                                (add (decimate (shl SGPR_32, 6), 4)),
 164                                (add (decimate (shl SGPR_32, 7), 4)),
 165                                (add (decimate (shl SGPR_32, 8), 4)),
 166                                (add (decimate (shl SGPR_32, 9), 4)),
 167                                (add (decimate (shl SGPR_32, 10), 4)),
 168                                (add (decimate (shl SGPR_32, 11), 4)),
 169                                (add (decimate (shl SGPR_32, 12), 4)),
 170                                (add (decimate (shl SGPR_32, 13), 4)),
 171                                (add (decimate (shl SGPR_32, 14), 4)),
 172                                (add (decimate (shl SGPR_32, 15), 4))]>;
 173
 174 // Trap handler TMP 32-bit registers
 175 def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
 176                             (add (sequence "TTMP%u", 0, 11))> {
 177   let isAllocatable = 0;
 178 }
 179
 180 // Trap handler TMP 64-bit registers
 181 def TTMP_64Regs : RegisterTuples<[sub0, sub1],
 182                              [(add (decimate TTMP_32, 2)),
 183                               (add (decimate (shl TTMP_32, 1), 2))]>;
 184
 185 // Trap handler TMP 128-bit registers
 186 def TTMP_128Regs : RegisterTuples<[sub0, sub1, sub2, sub3],
 187                               [(add (decimate TTMP_32, 4)),
 188                                (add (decimate (shl TTMP_32, 1), 4)),
 189                                (add (decimate (shl TTMP_32, 2), 4)),
 190                                (add (decimate (shl TTMP_32, 3), 4))]>;
 191
 192 // VGPR 32-bit registers
 193 def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
 194                             (add (sequence "VGPR%u", 0, 255))> {
 195   let AllocationPriority = 1;
 196 }
 197
 198 // VGPR 64-bit registers
 199 def VGPR_64 : RegisterTuples<[sub0, sub1],
 200                              [(add (trunc VGPR_32, 255)),
 201                               (add (shl VGPR_32, 1))]>;
 202
 203 // VGPR 96-bit registers
 204 def VGPR_96 : RegisterTuples<[sub0, sub1, sub2],
 205                              [(add (trunc VGPR_32, 254)),
 206                               (add (shl VGPR_32, 1)),
 207                               (add (shl VGPR_32, 2))]>;
 208
 209 // VGPR 128-bit registers
 210 def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
 211                               [(add (trunc VGPR_32, 253)),
 212                                (add (shl VGPR_32, 1)),
 213                                (add (shl VGPR_32, 2)),
 214                                (add (shl VGPR_32, 3))]>;
 215
 216 // VGPR 256-bit registers
 217 def VGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],
 218                               [(add (trunc VGPR_32, 249)),
 219                                (add (shl VGPR_32, 1)),
 220                                (add (shl VGPR_32, 2)),
 221                                (add (shl VGPR_32, 3)),
 222                                (add (shl VGPR_32, 4)),
 223                                (add (shl VGPR_32, 5)),
 224                                (add (shl VGPR_32, 6)),
 225                                (add (shl VGPR_32, 7))]>;
 226
 227 // VGPR 512-bit registers
 228 def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
 229                                sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15],
 230                               [(add (trunc VGPR_32, 241)),
 231                                (add (shl VGPR_32, 1)),
 232                                (add (shl VGPR_32, 2)),
 233                                (add (shl VGPR_32, 3)),
 234                                (add (shl VGPR_32, 4)),
 235                                (add (shl VGPR_32, 5)),
 236                                (add (shl VGPR_32, 6)),
 237                                (add (shl VGPR_32, 7)),
 238                                (add (shl VGPR_32, 8)),
 239                                (add (shl VGPR_32, 9)),
 240                                (add (shl VGPR_32, 10)),
 241                                (add (shl VGPR_32, 11)),
 242                                (add (shl VGPR_32, 12)),
 243                                (add (shl VGPR_32, 13)),
 244                                (add (shl VGPR_32, 14)),
 245                                (add (shl VGPR_32, 15))]>;
 246
 247 //===----------------------------------------------------------------------===//
 248 //  Register classes used as source and destination
 249 //===----------------------------------------------------------------------===//
 250
 251 class RegImmMatcher<string name> : AsmOperandClass {
 252   let Name = name;
 253   let RenderMethod = "addRegOrImmOperands";
 254 }
 255
 256 // Subset of SReg_32 without M0 for SMRD instructions and alike.
 257 // See comments in SIInstructions.td for more info.
 258 def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32], 32,
 259   (add SGPR_32, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI,
 260    TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI)> {
 261   let AllocationPriority = 1;
 262 }
 263
 264 // Register class for all scalar registers (SGPRs + Special Registers)
 265 def SReg_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
 266   (add SReg_32_XM0, M0)> {
 267   let AllocationPriority = 1;
 268 }
 269
 270 def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add SGPR_64Regs)> {
 271   let AllocationPriority = 2;
 272 }
 273
 274 def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add TTMP_64Regs)> {
 275   let isAllocatable = 0;
 276 }
 277
 278 def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1], 32,
 279   (add SGPR_64, VCC, EXEC, FLAT_SCR, TTMP_64, TBA, TMA)> {
 280   let AllocationPriority = 2;
 281 }
 282
 283 // Requires 2 s_mov_b64 to copy
 284 let CopyCost = 2 in {
 285
 286 def SGPR_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128Regs)> {
 287   let AllocationPriority = 4;
 288 }
 289
 290 def TTMP_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add TTMP_128Regs)> {
 291   let isAllocatable = 0;
 292 }
 293
 294 def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128, TTMP_128)> {
 295   let AllocationPriority = 4;
 296 }
 297
 298 } // End CopyCost = 2
 299
 300 def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256)> {
 301   // Requires 4 s_mov_b64 to copy
 302   let CopyCost = 4;
 303   let AllocationPriority = 5;
 304 }
 305
 306 def SReg_512 : RegisterClass<"AMDGPU", [v64i8, v16i32], 32, (add SGPR_512)> {
 307   // Requires 8 s_mov_b64 to copy
 308   let CopyCost = 8;
 309   let AllocationPriority = 6;
 310 }
 311
 312 // Register class for all vector registers (VGPRs + Interploation Registers)
 313 def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32], 32, (add VGPR_64)> {
 314   // Requires 2 v_mov_b32 to copy
 315   let CopyCost = 2;
 316   let AllocationPriority = 2;
 317 }
 318
 319 def VReg_96 : RegisterClass<"AMDGPU", [untyped], 32, (add VGPR_96)> {
 320   let Size = 96;
 321
 322   // Requires 3 v_mov_b32 to copy
 323   let CopyCost = 3;
 324   let AllocationPriority = 3;
 325 }
 326
 327 def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, (add VGPR_128)> {
 328   // Requires 4 v_mov_b32 to copy
 329   let CopyCost = 4;
 330   let AllocationPriority = 4;
 331 }
 332
 333 def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add VGPR_256)> {
 334   let CopyCost = 8;
 335   let AllocationPriority = 5;
 336 }
 337
 338 def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add VGPR_512)> {
 339   let CopyCost = 16;
 340   let AllocationPriority = 6;
 341 }
 342
 343 def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)> {
 344   let Size = 32;
 345 }
 346
 347 class RegImmOperand <RegisterClass rc> : RegisterOperand<rc> {
 348   let OperandNamespace = "AMDGPU";
 349   let OperandType = "OPERAND_REG_IMM32";
 350 }
 351
 352 class RegInlineOperand <RegisterClass rc> : RegisterOperand<rc> {
 353   let OperandNamespace = "AMDGPU";
 354   let OperandType = "OPERAND_REG_INLINE_C";
 355 }
 356
 357 //===----------------------------------------------------------------------===//
 358 //  SSrc_* Operands with an SGPR or a 32-bit immediate
 359 //===----------------------------------------------------------------------===//
 360
 361 def SSrc_32 : RegImmOperand<SReg_32> {
 362   let ParserMatchClass = RegImmMatcher<"SSrc32">;
 363 }
 364
 365 def SSrc_64 : RegImmOperand<SReg_64> {
 366   let ParserMatchClass = RegImmMatcher<"SSrc64">;
 367 }
 368
 369 //===----------------------------------------------------------------------===//
 370 //  SCSrc_* Operands with an SGPR or a inline constant
 371 //===----------------------------------------------------------------------===//
 372
 373 def SCSrc_32 : RegInlineOperand<SReg_32> {
 374   let ParserMatchClass = RegImmMatcher<"SCSrc32">;
 375 }
 376
 377 //===----------------------------------------------------------------------===//
 378 //  VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate
 379 //===----------------------------------------------------------------------===//
 380
 381 def VS_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VGPR_32, SReg_32)>;
 382
 383 def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 32, (add VReg_64, SReg_64)> {
 384   let CopyCost = 2;
 385 }
 386
 387 def VSrc_32 : RegisterOperand<VS_32> {
 388   let OperandNamespace = "AMDGPU";
 389   let OperandType = "OPERAND_REG_IMM32";
 390   let ParserMatchClass = RegImmMatcher<"VSrc32">;
 391 }
 392
 393 def VSrc_64 : RegisterOperand<VS_64> {
 394   let OperandNamespace = "AMDGPU";
 395   let OperandType = "OPERAND_REG_IMM32";
 396   let ParserMatchClass = RegImmMatcher<"VSrc64">;
 397 }
 398
 399 //===----------------------------------------------------------------------===//
 400 //  VCSrc_* Operands with an SGPR, VGPR or an inline constant
 401 //===----------------------------------------------------------------------===//
 402
 403 def VCSrc_32 : RegisterOperand<VS_32> {
 404   let OperandNamespace = "AMDGPU";
 405   let OperandType = "OPERAND_REG_INLINE_C";
 406   let ParserMatchClass = RegImmMatcher<"VCSrc32">;
 407 }
 408
 409 def VCSrc_64 : RegisterOperand<VS_64> {
 410   let OperandNamespace = "AMDGPU";
 411   let OperandType = "OPERAND_REG_INLINE_C";
 412   let ParserMatchClass = RegImmMatcher<"VCSrc64">;
 413 }
 414
 415 //===----------------------------------------------------------------------===//
 416 //  SCSrc_* Operands with an SGPR or an inline constant
 417 //===----------------------------------------------------------------------===//
 418
 419 def SCSrc_64 : RegisterOperand<SReg_64> {
 420   let OperandNamespace = "AMDGPU";
 421   let OperandType = "OPERAND_REG_INLINE_C";
 422   let ParserMatchClass = RegImmMatcher<"SCSrc64">;
 423 }