contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp

   1 //===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 /// \file
  10 /// This file implements the targeting of the Machinelegalizer class for
  11 /// AArch64.
  12 /// \todo This should be generated by TableGen.
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "AArch64LegalizerInfo.h"
  16 #include "AArch64Subtarget.h"
  17 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
  18 #include "llvm/CodeGen/MachineInstr.h"
  19 #include "llvm/CodeGen/MachineRegisterInfo.h"
  20 #include "llvm/CodeGen/TargetOpcodes.h"
  21 #include "llvm/CodeGen/ValueTypes.h"
  22 #include "llvm/IR/DerivedTypes.h"
  23 #include "llvm/IR/Type.h"
  24
  25 using namespace llvm;
  26 using namespace LegalizeActions;
  27 using namespace LegalityPredicates;
  28
  29 AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
  30   using namespace TargetOpcode;
  31   const LLT p0 = LLT::pointer(0, 64);
  32   const LLT s1 = LLT::scalar(1);
  33   const LLT s8 = LLT::scalar(8);
  34   const LLT s16 = LLT::scalar(16);
  35   const LLT s32 = LLT::scalar(32);
  36   const LLT s64 = LLT::scalar(64);
  37   const LLT s128 = LLT::scalar(128);
  38   const LLT s256 = LLT::scalar(256);
  39   const LLT s512 = LLT::scalar(512);
  40   const LLT v16s8 = LLT::vector(16, 8);
  41   const LLT v8s8 = LLT::vector(8, 8);
  42   const LLT v4s8 = LLT::vector(4, 8);
  43   const LLT v8s16 = LLT::vector(8, 16);
  44   const LLT v4s16 = LLT::vector(4, 16);
  45   const LLT v2s16 = LLT::vector(2, 16);
  46   const LLT v2s32 = LLT::vector(2, 32);
  47   const LLT v4s32 = LLT::vector(4, 32);
  48   const LLT v2s64 = LLT::vector(2, 64);
  49
  50   getActionDefinitionsBuilder(G_IMPLICIT_DEF)
  51       .legalFor({p0, s1, s8, s16, s32, s64})
  52       .clampScalar(0, s1, s64)
  53       .widenScalarToNextPow2(0, 8);
  54
  55   getActionDefinitionsBuilder(G_PHI)
  56       .legalFor({p0, s16, s32, s64})
  57       .clampScalar(0, s16, s64)
  58       .widenScalarToNextPow2(0);
  59
  60   getActionDefinitionsBuilder(G_BSWAP)
  61       .legalFor({s32, s64})
  62       .clampScalar(0, s16, s64)
  63       .widenScalarToNextPow2(0);
  64
  65   getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR, G_SHL})
  66       .legalFor({s32, s64, v2s32, v4s32, v2s64})
  67       .clampScalar(0, s32, s64)
  68       .widenScalarToNextPow2(0)
  69       .clampNumElements(0, v2s32, v4s32)
  70       .clampNumElements(0, v2s64, v2s64)
  71       .moreElementsToNextPow2(0);
  72
  73   getActionDefinitionsBuilder(G_GEP)
  74       .legalFor({{p0, s64}})
  75       .clampScalar(1, s64, s64);
  76
  77   getActionDefinitionsBuilder(G_PTR_MASK).legalFor({p0});
  78
  79   getActionDefinitionsBuilder({G_LSHR, G_ASHR, G_SDIV, G_UDIV})
  80       .legalFor({s32, s64})
  81       .clampScalar(0, s32, s64)
  82       .widenScalarToNextPow2(0);
  83
  84   getActionDefinitionsBuilder({G_SREM, G_UREM})
  85       .lowerFor({s1, s8, s16, s32, s64});
  86
  87   getActionDefinitionsBuilder({G_SMULO, G_UMULO})
  88       .lowerFor({{s64, s1}});
  89
  90   getActionDefinitionsBuilder({G_SMULH, G_UMULH}).legalFor({s32, s64});
  91
  92   getActionDefinitionsBuilder({G_UADDE, G_USUBE, G_SADDO, G_SSUBO})
  93       .legalFor({{s32, s1}, {s64, s1}});
  94
  95   getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMA, G_FMUL, G_FDIV})
  96       .legalFor({s32, s64});
  97
  98   getActionDefinitionsBuilder({G_FREM, G_FPOW}).libcallFor({s32, s64});
  99
 100   getActionDefinitionsBuilder(G_INSERT)
 101       .unsupportedIf([=](const LegalityQuery &Query) {
 102         return Query.Types[0].getSizeInBits() <= Query.Types[1].getSizeInBits();
 103       })
 104       .legalIf([=](const LegalityQuery &Query) {
 105         const LLT &Ty0 = Query.Types[0];
 106         const LLT &Ty1 = Query.Types[1];
 107         if (Ty0 != s32 && Ty0 != s64 && Ty0 != p0)
 108           return false;
 109         return isPowerOf2_32(Ty1.getSizeInBits()) &&
 110                (Ty1.getSizeInBits() == 1 || Ty1.getSizeInBits() >= 8);
 111       })
 112       .clampScalar(0, s32, s64)
 113       .widenScalarToNextPow2(0)
 114       .maxScalarIf(typeInSet(0, {s32}), 1, s16)
 115       .maxScalarIf(typeInSet(0, {s64}), 1, s32)
 116       .widenScalarToNextPow2(1);
 117
 118   getActionDefinitionsBuilder(G_EXTRACT)
 119       .unsupportedIf([=](const LegalityQuery &Query) {
 120         return Query.Types[0].getSizeInBits() >= Query.Types[1].getSizeInBits();
 121       })
 122       .legalIf([=](const LegalityQuery &Query) {
 123         const LLT &Ty0 = Query.Types[0];
 124         const LLT &Ty1 = Query.Types[1];
 125         if (Ty1 != s32 && Ty1 != s64)
 126           return false;
 127         if (Ty1 == p0)
 128           return true;
 129         return isPowerOf2_32(Ty0.getSizeInBits()) &&
 130                (Ty0.getSizeInBits() == 1 || Ty0.getSizeInBits() >= 8);
 131       })
 132       .clampScalar(1, s32, s64)
 133       .widenScalarToNextPow2(1)
 134       .maxScalarIf(typeInSet(1, {s32}), 0, s16)
 135       .maxScalarIf(typeInSet(1, {s64}), 0, s32)
 136       .widenScalarToNextPow2(0);
 137
 138   getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
 139       .legalForTypesWithMemSize({{s32, p0, 8},
 140                                  {s32, p0, 16},
 141                                  {s32, p0, 32},
 142                                  {s64, p0, 64},
 143                                  {p0, p0, 64},
 144                                  {v2s32, p0, 64}})
 145       .clampScalar(0, s32, s64)
 146       .widenScalarToNextPow2(0)
 147       // TODO: We could support sum-of-pow2's but the lowering code doesn't know
 148       //       how to do that yet.
 149       .unsupportedIfMemSizeNotPow2()
 150       // Lower anything left over into G_*EXT and G_LOAD
 151       .lower();
 152
 153   getActionDefinitionsBuilder(G_LOAD)
 154       .legalForTypesWithMemSize({{s8, p0, 8},
 155                                  {s16, p0, 16},
 156                                  {s32, p0, 32},
 157                                  {s64, p0, 64},
 158                                  {p0, p0, 64},
 159                                  {v2s32, p0, 64}})
 160       // These extends are also legal
 161       .legalForTypesWithMemSize({{s32, p0, 8},
 162                                  {s32, p0, 16}})
 163       .clampScalar(0, s8, s64)
 164       .widenScalarToNextPow2(0)
 165       // TODO: We could support sum-of-pow2's but the lowering code doesn't know
 166       //       how to do that yet.
 167       .unsupportedIfMemSizeNotPow2()
 168       // Lower any any-extending loads left into G_ANYEXT and G_LOAD
 169       .lowerIf([=](const LegalityQuery &Query) {
 170         return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].Size * 8;
 171       })
 172       .clampNumElements(0, v2s32, v2s32);
 173
 174   getActionDefinitionsBuilder(G_STORE)
 175       .legalForTypesWithMemSize({{s8, p0, 8},
 176                                  {s16, p0, 16},
 177                                  {s32, p0, 32},
 178                                  {s64, p0, 64},
 179                                  {p0, p0, 64},
 180                                  {v2s32, p0, 64}})
 181       .clampScalar(0, s8, s64)
 182       .widenScalarToNextPow2(0)
 183       // TODO: We could support sum-of-pow2's but the lowering code doesn't know
 184       //       how to do that yet.
 185       .unsupportedIfMemSizeNotPow2()
 186       .lowerIf([=](const LegalityQuery &Query) {
 187         return Query.Types[0].isScalar() &&
 188                Query.Types[0].getSizeInBits() != Query.MMODescrs[0].Size * 8;
 189       })
 190       .clampNumElements(0, v2s32, v2s32);
 191
 192   // Constants
 193   getActionDefinitionsBuilder(G_CONSTANT)
 194       .legalFor({p0, s32, s64})
 195       .clampScalar(0, s32, s64)
 196       .widenScalarToNextPow2(0);
 197   getActionDefinitionsBuilder(G_FCONSTANT)
 198       .legalFor({s32, s64})
 199       .clampScalar(0, s32, s64);
 200
 201   getActionDefinitionsBuilder(G_ICMP)
 202       .legalFor({{s32, s32}, {s32, s64}, {s32, p0}})
 203       .clampScalar(0, s32, s32)
 204       .clampScalar(1, s32, s64)
 205       .widenScalarToNextPow2(1);
 206
 207   getActionDefinitionsBuilder(G_FCMP)
 208       .legalFor({{s32, s32}, {s32, s64}})
 209       .clampScalar(0, s32, s32)
 210       .clampScalar(1, s32, s64)
 211       .widenScalarToNextPow2(1);
 212
 213   // Extensions
 214   getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
 215       .legalForCartesianProduct({s8, s16, s32, s64}, {s1, s8, s16, s32});
 216
 217   // FP conversions
 218   getActionDefinitionsBuilder(G_FPTRUNC).legalFor(
 219       {{s16, s32}, {s16, s64}, {s32, s64}});
 220   getActionDefinitionsBuilder(G_FPEXT).legalFor(
 221       {{s32, s16}, {s64, s16}, {s64, s32}});
 222
 223   // Conversions
 224   getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
 225       .legalForCartesianProduct({s32, s64})
 226       .clampScalar(0, s32, s64)
 227       .widenScalarToNextPow2(0)
 228       .clampScalar(1, s32, s64)
 229       .widenScalarToNextPow2(1);
 230
 231   getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
 232       .legalForCartesianProduct({s32, s64})
 233       .clampScalar(1, s32, s64)
 234       .widenScalarToNextPow2(1)
 235       .clampScalar(0, s32, s64)
 236       .widenScalarToNextPow2(0);
 237
 238   // Control-flow
 239   getActionDefinitionsBuilder(G_BRCOND).legalFor({s1, s8, s16, s32});
 240   getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
 241
 242   // Select
 243   getActionDefinitionsBuilder(G_SELECT)
 244       .legalFor({{s32, s1}, {s64, s1}, {p0, s1}})
 245       .clampScalar(0, s32, s64)
 246       .widenScalarToNextPow2(0);
 247
 248   // Pointer-handling
 249   getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
 250   getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
 251
 252   getActionDefinitionsBuilder(G_PTRTOINT)
 253       .legalForCartesianProduct({s1, s8, s16, s32, s64}, {p0})
 254       .maxScalar(0, s64)
 255       .widenScalarToNextPow2(0, /*Min*/ 8);
 256
 257   getActionDefinitionsBuilder(G_INTTOPTR)
 258       .unsupportedIf([&](const LegalityQuery &Query) {
 259         return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
 260       })
 261       .legalFor({{p0, s64}});
 262
 263   // Casts for 32 and 64-bit width type are just copies.
 264   // Same for 128-bit width type, except they are on the FPR bank.
 265   getActionDefinitionsBuilder(G_BITCAST)
 266       // FIXME: This is wrong since G_BITCAST is not allowed to change the
 267       // number of bits but it's what the previous code described and fixing
 268       // it breaks tests.
 269       .legalForCartesianProduct({s1, s8, s16, s32, s64, s128, v16s8, v8s8, v4s8,
 270                                  v8s16, v4s16, v2s16, v4s32, v2s32, v2s64});
 271
 272   getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
 273
 274   // va_list must be a pointer, but most sized types are pretty easy to handle
 275   // as the destination.
 276   getActionDefinitionsBuilder(G_VAARG)
 277       .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
 278       .clampScalar(0, s8, s64)
 279       .widenScalarToNextPow2(0, /*Min*/ 8);
 280
 281   if (ST.hasLSE()) {
 282     getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
 283         .lowerIf(all(
 284             typeInSet(0, {s8, s16, s32, s64}), typeIs(1, s1), typeIs(2, p0),
 285             atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Monotonic)));
 286
 287     getActionDefinitionsBuilder(
 288         {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND,
 289          G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MIN, G_ATOMICRMW_MAX,
 290          G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX, G_ATOMIC_CMPXCHG})
 291         .legalIf(all(
 292             typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0),
 293             atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Monotonic)));
 294   }
 295
 296   getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
 297
 298   // Merge/Unmerge
 299   for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
 300     unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
 301     unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
 302
 303     auto notValidElt = [](const LegalityQuery &Query, unsigned TypeIdx) {
 304       const LLT &Ty = Query.Types[TypeIdx];
 305       if (Ty.isVector()) {
 306         const LLT &EltTy = Ty.getElementType();
 307         if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 64)
 308           return true;
 309         if (!isPowerOf2_32(EltTy.getSizeInBits()))
 310           return true;
 311       }
 312       return false;
 313     };
 314     auto scalarize =
 315         [](const LegalityQuery &Query, unsigned TypeIdx) {
 316           const LLT &Ty = Query.Types[TypeIdx];
 317           return std::make_pair(TypeIdx, Ty.getElementType());
 318         };
 319
 320     // FIXME: This rule is horrible, but specifies the same as what we had
 321     // before with the particularly strange definitions removed (e.g.
 322     // s8 = G_MERGE_VALUES s32, s32).
 323     // Part of the complexity comes from these ops being extremely flexible. For
 324     // example, you can build/decompose vectors with it, concatenate vectors,
 325     // etc. and in addition to this you can also bitcast with it at the same
 326     // time. We've been considering breaking it up into multiple ops to make it
 327     // more manageable throughout the backend.
 328     getActionDefinitionsBuilder(Op)
 329         // Break up vectors with weird elements into scalars
 330         .fewerElementsIf(
 331             [=](const LegalityQuery &Query) { return notValidElt(Query, 0); },
 332             [=](const LegalityQuery &Query) { return scalarize(Query, 0); })
 333         .fewerElementsIf(
 334             [=](const LegalityQuery &Query) { return notValidElt(Query, 1); },
 335             [=](const LegalityQuery &Query) { return scalarize(Query, 1); })
 336         // Clamp the big scalar to s8-s512 and make it either a power of 2, 192,
 337         // or 384.
 338         .clampScalar(BigTyIdx, s8, s512)
 339         .widenScalarIf(
 340             [=](const LegalityQuery &Query) {
 341               const LLT &Ty = Query.Types[BigTyIdx];
 342               return !isPowerOf2_32(Ty.getSizeInBits()) &&
 343                      Ty.getSizeInBits() % 64 != 0;
 344             },
 345             [=](const LegalityQuery &Query) {
 346               // Pick the next power of 2, or a multiple of 64 over 128.
 347               // Whichever is smaller.
 348               const LLT &Ty = Query.Types[BigTyIdx];
 349               unsigned NewSizeInBits = 1
 350                                        << Log2_32_Ceil(Ty.getSizeInBits() + 1);
 351               if (NewSizeInBits >= 256) {
 352                 unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1);
 353                 if (RoundedTo < NewSizeInBits)
 354                   NewSizeInBits = RoundedTo;
 355               }
 356               return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits));
 357             })
 358         // Clamp the little scalar to s8-s256 and make it a power of 2. It's not
 359         // worth considering the multiples of 64 since 2*192 and 2*384 are not
 360         // valid.
 361         .clampScalar(LitTyIdx, s8, s256)
 362         .widenScalarToNextPow2(LitTyIdx, /*Min*/ 8)
 363         // So at this point, we have s8, s16, s32, s64, s128, s192, s256, s384,
 364         // s512, <X x s8>, <X x s16>, <X x s32>, or <X x s64>.
 365         // At this point it's simple enough to accept the legal types.
 366         .legalIf([=](const LegalityQuery &Query) {
 367           const LLT &BigTy = Query.Types[BigTyIdx];
 368           const LLT &LitTy = Query.Types[LitTyIdx];
 369           if (BigTy.isVector() && BigTy.getSizeInBits() < 32)
 370             return false;
 371           if (LitTy.isVector() && LitTy.getSizeInBits() < 32)
 372             return false;
 373           return BigTy.getSizeInBits() % LitTy.getSizeInBits() == 0;
 374         })
 375         // Any vectors left are the wrong size. Scalarize them.
 376         .fewerElementsIf([](const LegalityQuery &Query) { return true; },
 377                          [](const LegalityQuery &Query) {
 378                            return std::make_pair(
 379                                0, Query.Types[0].getElementType());
 380                          })
 381         .fewerElementsIf([](const LegalityQuery &Query) { return true; },
 382                          [](const LegalityQuery &Query) {
 383                            return std::make_pair(
 384                                1, Query.Types[1].getElementType());
 385                          });
 386   }
 387
 388   computeTables();
 389   verify(*ST.getInstrInfo());
 390 }
 391
 392 bool AArch64LegalizerInfo::legalizeCustom(MachineInstr &MI,
 393                                           MachineRegisterInfo &MRI,
 394                                           MachineIRBuilder &MIRBuilder) const {
 395   switch (MI.getOpcode()) {
 396   default:
 397     // No idea what to do.
 398     return false;
 399   case TargetOpcode::G_VAARG:
 400     return legalizeVaArg(MI, MRI, MIRBuilder);
 401   }
 402
 403   llvm_unreachable("expected switch to return");
 404 }
 405
 406 bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
 407                                          MachineRegisterInfo &MRI,
 408                                          MachineIRBuilder &MIRBuilder) const {
 409   MIRBuilder.setInstr(MI);
 410   MachineFunction &MF = MIRBuilder.getMF();
 411   unsigned Align = MI.getOperand(2).getImm();
 412   unsigned Dst = MI.getOperand(0).getReg();
 413   unsigned ListPtr = MI.getOperand(1).getReg();
 414
 415   LLT PtrTy = MRI.getType(ListPtr);
 416   LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
 417
 418   const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
 419   unsigned List = MRI.createGenericVirtualRegister(PtrTy);
 420   MIRBuilder.buildLoad(
 421       List, ListPtr,
 422       *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
 423                                PtrSize, /* Align = */ PtrSize));
 424
 425   unsigned DstPtr;
 426   if (Align > PtrSize) {
 427     // Realign the list to the actual required alignment.
 428     auto AlignMinus1 = MIRBuilder.buildConstant(IntPtrTy, Align - 1);
 429
 430     unsigned ListTmp = MRI.createGenericVirtualRegister(PtrTy);
 431     MIRBuilder.buildGEP(ListTmp, List, AlignMinus1->getOperand(0).getReg());
 432
 433     DstPtr = MRI.createGenericVirtualRegister(PtrTy);
 434     MIRBuilder.buildPtrMask(DstPtr, ListTmp, Log2_64(Align));
 435   } else
 436     DstPtr = List;
 437
 438   uint64_t ValSize = MRI.getType(Dst).getSizeInBits() / 8;
 439   MIRBuilder.buildLoad(
 440       Dst, DstPtr,
 441       *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
 442                                ValSize, std::max(Align, PtrSize)));
 443
 444   unsigned SizeReg = MRI.createGenericVirtualRegister(IntPtrTy);
 445   MIRBuilder.buildConstant(SizeReg, alignTo(ValSize, PtrSize));
 446
 447   unsigned NewList = MRI.createGenericVirtualRegister(PtrTy);
 448   MIRBuilder.buildGEP(NewList, DstPtr, SizeReg);
 449
 450   MIRBuilder.buildStore(
 451       NewList, ListPtr,
 452       *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOStore,
 453                                PtrSize, /* Align = */ PtrSize));
 454
 455   MI.eraseFromParent();
 456   return true;
 457 }