contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp

   1 //===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the ARMSelectionDAGInfo class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "ARMTargetMachine.h"
  15 #include "llvm/CodeGen/SelectionDAG.h"
  16 #include "llvm/IR/DerivedTypes.h"
  17 using namespace llvm;
  18
  19 #define DEBUG_TYPE "arm-selectiondag-info"
  20
  21 // Emit, if possible, a specialized version of the given Libcall. Typically this
  22 // means selecting the appropriately aligned version, but we also convert memset
  23 // of 0 into memclr.
  24 SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall(
  25     SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
  26     SDValue Size, unsigned Align, RTLIB::Libcall LC) const {
  27   const ARMSubtarget &Subtarget =
  28       DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
  29   const ARMTargetLowering *TLI = Subtarget.getTargetLowering();
  30
  31   // Only use a specialized AEABI function if the default version of this
  32   // Libcall is an AEABI function.
  33   if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0)
  34     return SDValue();
  35
  36   // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be
  37   // able to translate memset to memclr and use the value to index the function
  38   // name array.
  39   enum {
  40     AEABI_MEMCPY = 0,
  41     AEABI_MEMMOVE,
  42     AEABI_MEMSET,
  43     AEABI_MEMCLR
  44   } AEABILibcall;
  45   switch (LC) {
  46   case RTLIB::MEMCPY:
  47     AEABILibcall = AEABI_MEMCPY;
  48     break;
  49   case RTLIB::MEMMOVE:
  50     AEABILibcall = AEABI_MEMMOVE;
  51     break;
  52   case RTLIB::MEMSET:
  53     AEABILibcall = AEABI_MEMSET;
  54     if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src))
  55       if (ConstantSrc->getZExtValue() == 0)
  56         AEABILibcall = AEABI_MEMCLR;
  57     break;
  58   default:
  59     return SDValue();
  60   }
  61
  62   // Choose the most-aligned libcall variant that we can
  63   enum {
  64     ALIGN1 = 0,
  65     ALIGN4,
  66     ALIGN8
  67   } AlignVariant;
  68   if ((Align & 7) == 0)
  69     AlignVariant = ALIGN8;
  70   else if ((Align & 3) == 0)
  71     AlignVariant = ALIGN4;
  72   else
  73     AlignVariant = ALIGN1;
  74
  75   TargetLowering::ArgListTy Args;
  76   TargetLowering::ArgListEntry Entry;
  77   Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
  78   Entry.Node = Dst;
  79   Args.push_back(Entry);
  80   if (AEABILibcall == AEABI_MEMCLR) {
  81     Entry.Node = Size;
  82     Args.push_back(Entry);
  83   } else if (AEABILibcall == AEABI_MEMSET) {
  84     // Adjust parameters for memset, EABI uses format (ptr, size, value),
  85     // GNU library uses (ptr, value, size)
  86     // See RTABI section 4.3.4
  87     Entry.Node = Size;
  88     Args.push_back(Entry);
  89
  90     // Extend or truncate the argument to be an i32 value for the call.
  91     if (Src.getValueType().bitsGT(MVT::i32))
  92       Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
  93     else if (Src.getValueType().bitsLT(MVT::i32))
  94       Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
  95
  96     Entry.Node = Src;
  97     Entry.Ty = Type::getInt32Ty(*DAG.getContext());
  98     Entry.IsSExt = false;
  99     Args.push_back(Entry);
 100   } else {
 101     Entry.Node = Src;
 102     Args.push_back(Entry);
 103
 104     Entry.Node = Size;
 105     Args.push_back(Entry);
 106   }
 107
 108   char const *FunctionNames[4][3] = {
 109     { "__aeabi_memcpy",  "__aeabi_memcpy4",  "__aeabi_memcpy8"  },
 110     { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" },
 111     { "__aeabi_memset",  "__aeabi_memset4",  "__aeabi_memset8"  },
 112     { "__aeabi_memclr",  "__aeabi_memclr4",  "__aeabi_memclr8"  }
 113   };
 114   TargetLowering::CallLoweringInfo CLI(DAG);
 115   CLI.setDebugLoc(dl)
 116       .setChain(Chain)
 117       .setLibCallee(
 118           TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),
 119           DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
 120                                 TLI->getPointerTy(DAG.getDataLayout())),
 121           std::move(Args))
 122       .setDiscardResult();
 123   std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
 124
 125   return CallResult.second;
 126 }
 127
 128 SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(
 129     SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
 130     SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline,
 131     MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
 132   const ARMSubtarget &Subtarget =
 133       DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
 134   // Do repeated 4-byte loads and stores. To be improved.
 135   // This requires 4-byte alignment.
 136   if ((Align & 3) != 0)
 137     return SDValue();
 138   // This requires the copy size to be a constant, preferably
 139   // within a subtarget-specific limit.
 140   ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
 141   if (!ConstantSize)
 142     return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
 143                                   RTLIB::MEMCPY);
 144   uint64_t SizeVal = ConstantSize->getZExtValue();
 145   if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold())
 146     return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
 147                                   RTLIB::MEMCPY);
 148
 149   unsigned BytesLeft = SizeVal & 3;
 150   unsigned NumMemOps = SizeVal >> 2;
 151   unsigned EmittedNumMemOps = 0;
 152   EVT VT = MVT::i32;
 153   unsigned VTSize = 4;
 154   unsigned i = 0;
 155   // Emit a maximum of 4 loads in Thumb1 since we have fewer registers
 156   const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6;
 157   SDValue TFOps[6];
 158   SDValue Loads[6];
 159   uint64_t SrcOff = 0, DstOff = 0;
 160
 161   // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to
 162   // VLDM/VSTM and make this code emit it when appropriate. This would reduce
 163   // pressure on the general purpose registers. However this seems harder to map
 164   // onto the register allocator's view of the world.
 165
 166   // The number of MEMCPY pseudo-instructions to emit. We use up to
 167   // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm
 168   // later on. This is a lower bound on the number of MEMCPY operations we must
 169   // emit.
 170   unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM;
 171
 172   // Code size optimisation: do not inline memcpy if expansion results in
 173   // more instructions than the libary call.
 174   if (NumMEMCPYs > 1 && DAG.getMachineFunction().getFunction().optForMinSize()) {
 175     return SDValue();
 176   }
 177
 178   SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue);
 179
 180   for (unsigned I = 0; I != NumMEMCPYs; ++I) {
 181     // Evenly distribute registers among MEMCPY operations to reduce register
 182     // pressure.
 183     unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs;
 184     unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps;
 185
 186     Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src,
 187                       DAG.getConstant(NumRegs, dl, MVT::i32));
 188     Src = Dst.getValue(1);
 189     Chain = Dst.getValue(2);
 190
 191     DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize);
 192     SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize);
 193
 194     EmittedNumMemOps = NextEmittedNumMemOps;
 195   }
 196
 197   if (BytesLeft == 0)
 198     return Chain;
 199
 200   // Issue loads / stores for the trailing (1 - 3) bytes.
 201   auto getRemainingValueType = [](unsigned BytesLeft) {
 202     return (BytesLeft >= 2) ? MVT::i16 : MVT::i8;
 203   };
 204   auto getRemainingSize = [](unsigned BytesLeft) {
 205     return (BytesLeft >= 2) ? 2 : 1;
 206   };
 207
 208   unsigned BytesLeftSave = BytesLeft;
 209   i = 0;
 210   while (BytesLeft) {
 211     VT = getRemainingValueType(BytesLeft);
 212     VTSize = getRemainingSize(BytesLeft);
 213     Loads[i] = DAG.getLoad(VT, dl, Chain,
 214                            DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
 215                                        DAG.getConstant(SrcOff, dl, MVT::i32)),
 216                            SrcPtrInfo.getWithOffset(SrcOff));
 217     TFOps[i] = Loads[i].getValue(1);
 218     ++i;
 219     SrcOff += VTSize;
 220     BytesLeft -= VTSize;
 221   }
 222   Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
 223                       makeArrayRef(TFOps, i));
 224
 225   i = 0;
 226   BytesLeft = BytesLeftSave;
 227   while (BytesLeft) {
 228     VT = getRemainingValueType(BytesLeft);
 229     VTSize = getRemainingSize(BytesLeft);
 230     TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
 231                             DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
 232                                         DAG.getConstant(DstOff, dl, MVT::i32)),
 233                             DstPtrInfo.getWithOffset(DstOff));
 234     ++i;
 235     DstOff += VTSize;
 236     BytesLeft -= VTSize;
 237   }
 238   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
 239                      makeArrayRef(TFOps, i));
 240 }
 241
 242 SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemmove(
 243     SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
 244     SDValue Size, unsigned Align, bool isVolatile,
 245     MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
 246   return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
 247                                 RTLIB::MEMMOVE);
 248 }
 249
 250 SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemset(
 251     SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
 252     SDValue Size, unsigned Align, bool isVolatile,
 253     MachinePointerInfo DstPtrInfo) const {
 254   return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
 255                                 RTLIB::MEMSET);
 256 }