contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp

   1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the PPCISelLowering class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "PPCISelLowering.h"
  15 #include "MCTargetDesc/PPCPredicates.h"
  16 #include "PPC.h"
  17 #include "PPCCCState.h"
  18 #include "PPCCallingConv.h"
  19 #include "PPCFrameLowering.h"
  20 #include "PPCInstrInfo.h"
  21 #include "PPCMachineFunctionInfo.h"
  22 #include "PPCPerfectShuffle.h"
  23 #include "PPCRegisterInfo.h"
  24 #include "PPCSubtarget.h"
  25 #include "PPCTargetMachine.h"
  26 #include "llvm/ADT/APFloat.h"
  27 #include "llvm/ADT/APInt.h"
  28 #include "llvm/ADT/ArrayRef.h"
  29 #include "llvm/ADT/DenseMap.h"
  30 #include "llvm/ADT/None.h"
  31 #include "llvm/ADT/STLExtras.h"
  32 #include "llvm/ADT/SmallPtrSet.h"
  33 #include "llvm/ADT/SmallSet.h"
  34 #include "llvm/ADT/SmallVector.h"
  35 #include "llvm/ADT/Statistic.h"
  36 #include "llvm/ADT/StringRef.h"
  37 #include "llvm/ADT/StringSwitch.h"
  38 #include "llvm/CodeGen/CallingConvLower.h"
  39 #include "llvm/CodeGen/ISDOpcodes.h"
  40 #include "llvm/CodeGen/MachineBasicBlock.h"
  41 #include "llvm/CodeGen/MachineFrameInfo.h"
  42 #include "llvm/CodeGen/MachineFunction.h"
  43 #include "llvm/CodeGen/MachineInstr.h"
  44 #include "llvm/CodeGen/MachineInstrBuilder.h"
  45 #include "llvm/CodeGen/MachineJumpTableInfo.h"
  46 #include "llvm/CodeGen/MachineLoopInfo.h"
  47 #include "llvm/CodeGen/MachineMemOperand.h"
  48 #include "llvm/CodeGen/MachineOperand.h"
  49 #include "llvm/CodeGen/MachineRegisterInfo.h"
  50 #include "llvm/CodeGen/MachineValueType.h"
  51 #include "llvm/CodeGen/RuntimeLibcalls.h"
  52 #include "llvm/CodeGen/SelectionDAG.h"
  53 #include "llvm/CodeGen/SelectionDAGNodes.h"
  54 #include "llvm/CodeGen/ValueTypes.h"
  55 #include "llvm/IR/CallSite.h"
  56 #include "llvm/IR/CallingConv.h"
  57 #include "llvm/IR/Constant.h"
  58 #include "llvm/IR/Constants.h"
  59 #include "llvm/IR/DataLayout.h"
  60 #include "llvm/IR/DebugLoc.h"
  61 #include "llvm/IR/DerivedTypes.h"
  62 #include "llvm/IR/Function.h"
  63 #include "llvm/IR/GlobalValue.h"
  64 #include "llvm/IR/IRBuilder.h"
  65 #include "llvm/IR/Instructions.h"
  66 #include "llvm/IR/Intrinsics.h"
  67 #include "llvm/IR/Module.h"
  68 #include "llvm/IR/Type.h"
  69 #include "llvm/IR/Use.h"
  70 #include "llvm/IR/Value.h"
  71 #include "llvm/MC/MCExpr.h"
  72 #include "llvm/MC/MCRegisterInfo.h"
  73 #include "llvm/Support/AtomicOrdering.h"
  74 #include "llvm/Support/BranchProbability.h"
  75 #include "llvm/Support/Casting.h"
  76 #include "llvm/Support/CodeGen.h"
  77 #include "llvm/Support/CommandLine.h"
  78 #include "llvm/Support/Compiler.h"
  79 #include "llvm/Support/Debug.h"
  80 #include "llvm/Support/ErrorHandling.h"
  81 #include "llvm/Support/Format.h"
  82 #include "llvm/Support/KnownBits.h"
  83 #include "llvm/Support/MathExtras.h"
  84 #include "llvm/Support/raw_ostream.h"
  85 #include "llvm/Target/TargetInstrInfo.h"
  86 #include "llvm/Target/TargetLowering.h"
  87 #include "llvm/Target/TargetMachine.h"
  88 #include "llvm/Target/TargetOptions.h"
  89 #include "llvm/Target/TargetRegisterInfo.h"
  90 #include <algorithm>
  91 #include <cassert>
  92 #include <cstdint>
  93 #include <iterator>
  94 #include <list>
  95 #include <utility>
  96 #include <vector>
  97
  98 using namespace llvm;
  99
 100 #define DEBUG_TYPE "ppc-lowering"
 101
 102 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
 103 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
 104
 105 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
 106 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
 107
 108 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
 109 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
 110
 111 static cl::opt<bool> DisableSCO("disable-ppc-sco",
 112 cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
 113
 114 STATISTIC(NumTailCalls, "Number of tail calls");
 115 STATISTIC(NumSiblingCalls, "Number of sibling calls");
 116
 117 // FIXME: Remove this once the bug has been fixed!
 118 extern cl::opt<bool> ANDIGlueBug;
 119
 120 PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
 121                                      const PPCSubtarget &STI)
 122     : TargetLowering(TM), Subtarget(STI) {
 123   // Use _setjmp/_longjmp instead of setjmp/longjmp.
 124   setUseUnderscoreSetJmp(true);
 125   setUseUnderscoreLongJmp(true);
 126
 127   // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
 128   // arguments are at least 4/8 bytes aligned.
 129   bool isPPC64 = Subtarget.isPPC64();
 130   setMinStackArgumentAlignment(isPPC64 ? 8:4);
 131
 132   // Set up the register classes.
 133   addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
 134   if (!useSoftFloat()) {
 135     addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
 136     addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
 137   }
 138
 139   // Match BITREVERSE to customized fast code sequence in the td file.
 140   setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
 141   setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
 142
 143   // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
 144   for (MVT VT : MVT::integer_valuetypes()) {
 145     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
 146     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
 147   }
 148
 149   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
 150
 151   // PowerPC has pre-inc load and store's.
 152   setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
 153   setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
 154   setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
 155   setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
 156   setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
 157   setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
 158   setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
 159   setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
 160   setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
 161   setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
 162   setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
 163   setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
 164   setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
 165   setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
 166
 167   if (Subtarget.useCRBits()) {
 168     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 169
 170     if (isPPC64 || Subtarget.hasFPCVT()) {
 171       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
 172       AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
 173                          isPPC64 ? MVT::i64 : MVT::i32);
 174       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
 175       AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
 176                         isPPC64 ? MVT::i64 : MVT::i32);
 177     } else {
 178       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
 179       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
 180     }
 181
 182     // PowerPC does not support direct load/store of condition registers.
 183     setOperationAction(ISD::LOAD, MVT::i1, Custom);
 184     setOperationAction(ISD::STORE, MVT::i1, Custom);
 185
 186     // FIXME: Remove this once the ANDI glue bug is fixed:
 187     if (ANDIGlueBug)
 188       setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
 189
 190     for (MVT VT : MVT::integer_valuetypes()) {
 191       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
 192       setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
 193       setTruncStoreAction(VT, MVT::i1, Expand);
 194     }
 195
 196     addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
 197   }
 198
 199   // This is used in the ppcf128->int sequence.  Note it has different semantics
 200   // from FP_ROUND:  that rounds to nearest, this rounds to zero.
 201   setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
 202
 203   // We do not currently implement these libm ops for PowerPC.
 204   setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
 205   setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
 206   setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
 207   setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
 208   setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
 209   setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
 210
 211   // PowerPC has no SREM/UREM instructions unless we are on P9
 212   // On P9 we may use a hardware instruction to compute the remainder.
 213   // The instructions are not legalized directly because in the cases where the
 214   // result of both the remainder and the division is required it is more
 215   // efficient to compute the remainder from the result of the division rather
 216   // than use the remainder instruction.
 217   if (Subtarget.isISA3_0()) {
 218     setOperationAction(ISD::SREM, MVT::i32, Custom);
 219     setOperationAction(ISD::UREM, MVT::i32, Custom);
 220     setOperationAction(ISD::SREM, MVT::i64, Custom);
 221     setOperationAction(ISD::UREM, MVT::i64, Custom);
 222   } else {
 223     setOperationAction(ISD::SREM, MVT::i32, Expand);
 224     setOperationAction(ISD::UREM, MVT::i32, Expand);
 225     setOperationAction(ISD::SREM, MVT::i64, Expand);
 226     setOperationAction(ISD::UREM, MVT::i64, Expand);
 227   }
 228
 229   // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
 230   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
 231   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
 232   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
 233   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
 234   setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
 235   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
 236   setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
 237   setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
 238
 239   // We don't support sin/cos/sqrt/fmod/pow
 240   setOperationAction(ISD::FSIN , MVT::f64, Expand);
 241   setOperationAction(ISD::FCOS , MVT::f64, Expand);
 242   setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
 243   setOperationAction(ISD::FREM , MVT::f64, Expand);
 244   setOperationAction(ISD::FPOW , MVT::f64, Expand);
 245   setOperationAction(ISD::FMA  , MVT::f64, Legal);
 246   setOperationAction(ISD::FSIN , MVT::f32, Expand);
 247   setOperationAction(ISD::FCOS , MVT::f32, Expand);
 248   setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
 249   setOperationAction(ISD::FREM , MVT::f32, Expand);
 250   setOperationAction(ISD::FPOW , MVT::f32, Expand);
 251   setOperationAction(ISD::FMA  , MVT::f32, Legal);
 252
 253   setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
 254
 255   // If we're enabling GP optimizations, use hardware square root
 256   if (!Subtarget.hasFSQRT() &&
 257       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
 258         Subtarget.hasFRE()))
 259     setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 260
 261   if (!Subtarget.hasFSQRT() &&
 262       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
 263         Subtarget.hasFRES()))
 264     setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 265
 266   if (Subtarget.hasFCPSGN()) {
 267     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
 268     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
 269   } else {
 270     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
 271     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 272   }
 273
 274   if (Subtarget.hasFPRND()) {
 275     setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
 276     setOperationAction(ISD::FCEIL,  MVT::f64, Legal);
 277     setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
 278     setOperationAction(ISD::FROUND, MVT::f64, Legal);
 279
 280     setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
 281     setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
 282     setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
 283     setOperationAction(ISD::FROUND, MVT::f32, Legal);
 284   }
 285
 286   // PowerPC does not have BSWAP
 287   // CTPOP or CTTZ were introduced in P8/P9 respectivelly
 288   setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
 289   setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
 290   if (Subtarget.isISA3_0()) {
 291     setOperationAction(ISD::CTTZ , MVT::i32  , Legal);
 292     setOperationAction(ISD::CTTZ , MVT::i64  , Legal);
 293   } else {
 294     setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
 295     setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
 296   }
 297
 298   if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
 299     setOperationAction(ISD::CTPOP, MVT::i32  , Legal);
 300     setOperationAction(ISD::CTPOP, MVT::i64  , Legal);
 301   } else {
 302     setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
 303     setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
 304   }
 305
 306   // PowerPC does not have ROTR
 307   setOperationAction(ISD::ROTR, MVT::i32   , Expand);
 308   setOperationAction(ISD::ROTR, MVT::i64   , Expand);
 309
 310   if (!Subtarget.useCRBits()) {
 311     // PowerPC does not have Select
 312     setOperationAction(ISD::SELECT, MVT::i32, Expand);
 313     setOperationAction(ISD::SELECT, MVT::i64, Expand);
 314     setOperationAction(ISD::SELECT, MVT::f32, Expand);
 315     setOperationAction(ISD::SELECT, MVT::f64, Expand);
 316   }
 317
 318   // PowerPC wants to turn select_cc of FP into fsel when possible.
 319   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
 320   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
 321
 322   // PowerPC wants to optimize integer setcc a bit
 323   if (!Subtarget.useCRBits())
 324     setOperationAction(ISD::SETCC, MVT::i32, Custom);
 325
 326   // PowerPC does not have BRCOND which requires SetCC
 327   if (!Subtarget.useCRBits())
 328     setOperationAction(ISD::BRCOND, MVT::Other, Expand);
 329
 330   setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
 331
 332   // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
 333   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
 334
 335   // PowerPC does not have [U|S]INT_TO_FP
 336   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
 337   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
 338
 339   if (Subtarget.hasDirectMove() && isPPC64) {
 340     setOperationAction(ISD::BITCAST, MVT::f32, Legal);
 341     setOperationAction(ISD::BITCAST, MVT::i32, Legal);
 342     setOperationAction(ISD::BITCAST, MVT::i64, Legal);
 343     setOperationAction(ISD::BITCAST, MVT::f64, Legal);
 344   } else {
 345     setOperationAction(ISD::BITCAST, MVT::f32, Expand);
 346     setOperationAction(ISD::BITCAST, MVT::i32, Expand);
 347     setOperationAction(ISD::BITCAST, MVT::i64, Expand);
 348     setOperationAction(ISD::BITCAST, MVT::f64, Expand);
 349   }
 350
 351   // We cannot sextinreg(i1).  Expand to shifts.
 352   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 353
 354   // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
 355   // SjLj exception handling but a light-weight setjmp/longjmp replacement to
 356   // support continuation, user-level threading, and etc.. As a result, no
 357   // other SjLj exception interfaces are implemented and please don't build
 358   // your own exception handling based on them.
 359   // LLVM/Clang supports zero-cost DWARF exception handling.
 360   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
 361   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
 362
 363   // We want to legalize GlobalAddress and ConstantPool nodes into the
 364   // appropriate instructions to materialize the address.
 365   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
 366   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
 367   setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
 368   setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
 369   setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
 370   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
 371   setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
 372   setOperationAction(ISD::BlockAddress,  MVT::i64, Custom);
 373   setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
 374   setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
 375
 376   // TRAP is legal.
 377   setOperationAction(ISD::TRAP, MVT::Other, Legal);
 378
 379   // TRAMPOLINE is custom lowered.
 380   setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
 381   setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
 382
 383   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
 384   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 385
 386   if (Subtarget.isSVR4ABI()) {
 387     if (isPPC64) {
 388       // VAARG always uses double-word chunks, so promote anything smaller.
 389       setOperationAction(ISD::VAARG, MVT::i1, Promote);
 390       AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64);
 391       setOperationAction(ISD::VAARG, MVT::i8, Promote);
 392       AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64);
 393       setOperationAction(ISD::VAARG, MVT::i16, Promote);
 394       AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64);
 395       setOperationAction(ISD::VAARG, MVT::i32, Promote);
 396       AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64);
 397       setOperationAction(ISD::VAARG, MVT::Other, Expand);
 398     } else {
 399       // VAARG is custom lowered with the 32-bit SVR4 ABI.
 400       setOperationAction(ISD::VAARG, MVT::Other, Custom);
 401       setOperationAction(ISD::VAARG, MVT::i64, Custom);
 402     }
 403   } else
 404     setOperationAction(ISD::VAARG, MVT::Other, Expand);
 405
 406   if (Subtarget.isSVR4ABI() && !isPPC64)
 407     // VACOPY is custom lowered with the 32-bit SVR4 ABI.
 408     setOperationAction(ISD::VACOPY            , MVT::Other, Custom);
 409   else
 410     setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 411
 412   // Use the default implementation.
 413   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 414   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 415   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
 416   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
 417   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
 418   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
 419   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
 420   setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
 421   setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
 422
 423   // We want to custom lower some of our intrinsics.
 424   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
 425
 426   // To handle counter-based loop conditions.
 427   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
 428
 429   setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
 430   setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
 431   setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
 432   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
 433
 434   // Comparisons that require checking two conditions.
 435   setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
 436   setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
 437   setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
 438   setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
 439   setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
 440   setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
 441   setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
 442   setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
 443   setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
 444   setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
 445   setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
 446   setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
 447
 448   if (Subtarget.has64BitSupport()) {
 449     // They also have instructions for converting between i64 and fp.
 450     setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 451     setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
 452     setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 453     setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
 454     // This is just the low 32 bits of a (signed) fp->i64 conversion.
 455     // We cannot do this with Promote because i64 is not a legal type.
 456     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
 457
 458     if (Subtarget.hasLFIWAX() || Subtarget.isPPC64())
 459       setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
 460   } else {
 461     // PowerPC does not have FP_TO_UINT on 32-bit implementations.
 462     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
 463   }
 464
 465   // With the instructions enabled under FPCVT, we can do everything.
 466   if (Subtarget.hasFPCVT()) {
 467     if (Subtarget.has64BitSupport()) {
 468       setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 469       setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
 470       setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 471       setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
 472     }
 473
 474     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
 475     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
 476     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
 477     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
 478   }
 479
 480   if (Subtarget.use64BitRegs()) {
 481     // 64-bit PowerPC implementations can support i64 types directly
 482     addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
 483     // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
 484     setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 485     // 64-bit PowerPC wants to expand i128 shifts itself.
 486     setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
 487     setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
 488     setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
 489   } else {
 490     // 32-bit PowerPC wants to expand i64 shifts itself.
 491     setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
 492     setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
 493     setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
 494   }
 495
 496   if (Subtarget.hasAltivec()) {
 497     // First set operation action for all vector types to expand. Then we
 498     // will selectively turn on ones that can be effectively codegen'd.
 499     for (MVT VT : MVT::vector_valuetypes()) {
 500       // add/sub are legal for all supported vector VT's.
 501       setOperationAction(ISD::ADD, VT, Legal);
 502       setOperationAction(ISD::SUB, VT, Legal);
 503
 504       // Vector instructions introduced in P8
 505       if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
 506         setOperationAction(ISD::CTPOP, VT, Legal);
 507         setOperationAction(ISD::CTLZ, VT, Legal);
 508       }
 509       else {
 510         setOperationAction(ISD::CTPOP, VT, Expand);
 511         setOperationAction(ISD::CTLZ, VT, Expand);
 512       }
 513
 514       // Vector instructions introduced in P9
 515       if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
 516         setOperationAction(ISD::CTTZ, VT, Legal);
 517       else
 518         setOperationAction(ISD::CTTZ, VT, Expand);
 519
 520       // We promote all shuffles to v16i8.
 521       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
 522       AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
 523
 524       // We promote all non-typed operations to v4i32.
 525       setOperationAction(ISD::AND   , VT, Promote);
 526       AddPromotedToType (ISD::AND   , VT, MVT::v4i32);
 527       setOperationAction(ISD::OR    , VT, Promote);
 528       AddPromotedToType (ISD::OR    , VT, MVT::v4i32);
 529       setOperationAction(ISD::XOR   , VT, Promote);
 530       AddPromotedToType (ISD::XOR   , VT, MVT::v4i32);
 531       setOperationAction(ISD::LOAD  , VT, Promote);
 532       AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
 533       setOperationAction(ISD::SELECT, VT, Promote);
 534       AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
 535       setOperationAction(ISD::SELECT_CC, VT, Promote);
 536       AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
 537       setOperationAction(ISD::STORE, VT, Promote);
 538       AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
 539
 540       // No other operations are legal.
 541       setOperationAction(ISD::MUL , VT, Expand);
 542       setOperationAction(ISD::SDIV, VT, Expand);
 543       setOperationAction(ISD::SREM, VT, Expand);
 544       setOperationAction(ISD::UDIV, VT, Expand);
 545       setOperationAction(ISD::UREM, VT, Expand);
 546       setOperationAction(ISD::FDIV, VT, Expand);
 547       setOperationAction(ISD::FREM, VT, Expand);
 548       setOperationAction(ISD::FNEG, VT, Expand);
 549       setOperationAction(ISD::FSQRT, VT, Expand);
 550       setOperationAction(ISD::FLOG, VT, Expand);
 551       setOperationAction(ISD::FLOG10, VT, Expand);
 552       setOperationAction(ISD::FLOG2, VT, Expand);
 553       setOperationAction(ISD::FEXP, VT, Expand);
 554       setOperationAction(ISD::FEXP2, VT, Expand);
 555       setOperationAction(ISD::FSIN, VT, Expand);
 556       setOperationAction(ISD::FCOS, VT, Expand);
 557       setOperationAction(ISD::FABS, VT, Expand);
 558       setOperationAction(ISD::FFLOOR, VT, Expand);
 559       setOperationAction(ISD::FCEIL,  VT, Expand);
 560       setOperationAction(ISD::FTRUNC, VT, Expand);
 561       setOperationAction(ISD::FRINT,  VT, Expand);
 562       setOperationAction(ISD::FNEARBYINT, VT, Expand);
 563       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
 564       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
 565       setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
 566       setOperationAction(ISD::MULHU, VT, Expand);
 567       setOperationAction(ISD::MULHS, VT, Expand);
 568       setOperationAction(ISD::UMUL_LOHI, VT, Expand);
 569       setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 570       setOperationAction(ISD::UDIVREM, VT, Expand);
 571       setOperationAction(ISD::SDIVREM, VT, Expand);
 572       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
 573       setOperationAction(ISD::FPOW, VT, Expand);
 574       setOperationAction(ISD::BSWAP, VT, Expand);
 575       setOperationAction(ISD::VSELECT, VT, Expand);
 576       setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
 577       setOperationAction(ISD::ROTL, VT, Expand);
 578       setOperationAction(ISD::ROTR, VT, Expand);
 579
 580       for (MVT InnerVT : MVT::vector_valuetypes()) {
 581         setTruncStoreAction(VT, InnerVT, Expand);
 582         setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
 583         setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
 584         setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
 585       }
 586     }
 587
 588     // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
 589     // with merges, splats, etc.
 590     setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
 591
 592     setOperationAction(ISD::AND   , MVT::v4i32, Legal);
 593     setOperationAction(ISD::OR    , MVT::v4i32, Legal);
 594     setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
 595     setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
 596     setOperationAction(ISD::SELECT, MVT::v4i32,
 597                        Subtarget.useCRBits() ? Legal : Expand);
 598     setOperationAction(ISD::STORE , MVT::v4i32, Legal);
 599     setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
 600     setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
 601     setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
 602     setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
 603     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
 604     setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
 605     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
 606     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
 607
 608     addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
 609     addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
 610     addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
 611     addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
 612
 613     setOperationAction(ISD::MUL, MVT::v4f32, Legal);
 614     setOperationAction(ISD::FMA, MVT::v4f32, Legal);
 615
 616     if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) {
 617       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
 618       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
 619     }
 620
 621     if (Subtarget.hasP8Altivec())
 622       setOperationAction(ISD::MUL, MVT::v4i32, Legal);
 623     else
 624       setOperationAction(ISD::MUL, MVT::v4i32, Custom);
 625
 626     setOperationAction(ISD::MUL, MVT::v8i16, Custom);
 627     setOperationAction(ISD::MUL, MVT::v16i8, Custom);
 628
 629     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 630     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
 631
 632     setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
 633     setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
 634     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
 635     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
 636
 637     // Altivec does not contain unordered floating-point compare instructions
 638     setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
 639     setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
 640     setCondCodeAction(ISD::SETO,   MVT::v4f32, Expand);
 641     setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
 642
 643     if (Subtarget.hasVSX()) {
 644       setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
 645       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
 646       if (Subtarget.hasP8Vector()) {
 647         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
 648         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
 649       }
 650       if (Subtarget.hasDirectMove() && isPPC64) {
 651         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
 652         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
 653         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
 654         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
 655         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
 656         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
 657         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
 658         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
 659       }
 660       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
 661
 662       setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
 663       setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
 664       setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
 665       setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
 666       setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
 667
 668       setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
 669
 670       setOperationAction(ISD::MUL, MVT::v2f64, Legal);
 671       setOperationAction(ISD::FMA, MVT::v2f64, Legal);
 672
 673       setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
 674       setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
 675
 676       setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
 677       setOperationAction(ISD::VSELECT, MVT::v8i16, Legal);
 678       setOperationAction(ISD::VSELECT, MVT::v4i32, Legal);
 679       setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
 680       setOperationAction(ISD::VSELECT, MVT::v2f64, Legal);
 681
 682       // Share the Altivec comparison restrictions.
 683       setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
 684       setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
 685       setCondCodeAction(ISD::SETO,   MVT::v2f64, Expand);
 686       setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
 687
 688       setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
 689       setOperationAction(ISD::STORE, MVT::v2f64, Legal);
 690
 691       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
 692
 693       if (Subtarget.hasP8Vector())
 694         addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
 695
 696       addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
 697
 698       addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
 699       addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
 700       addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
 701
 702       if (Subtarget.hasP8Altivec()) {
 703         setOperationAction(ISD::SHL, MVT::v2i64, Legal);
 704         setOperationAction(ISD::SRA, MVT::v2i64, Legal);
 705         setOperationAction(ISD::SRL, MVT::v2i64, Legal);
 706
 707         // 128 bit shifts can be accomplished via 3 instructions for SHL and
 708         // SRL, but not for SRA because of the instructions available:
 709         // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
 710         // doing
 711         setOperationAction(ISD::SHL, MVT::v1i128, Expand);
 712         setOperationAction(ISD::SRL, MVT::v1i128, Expand);
 713         setOperationAction(ISD::SRA, MVT::v1i128, Expand);
 714
 715         setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
 716       }
 717       else {
 718         setOperationAction(ISD::SHL, MVT::v2i64, Expand);
 719         setOperationAction(ISD::SRA, MVT::v2i64, Expand);
 720         setOperationAction(ISD::SRL, MVT::v2i64, Expand);
 721
 722         setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
 723
 724         // VSX v2i64 only supports non-arithmetic operations.
 725         setOperationAction(ISD::ADD, MVT::v2i64, Expand);
 726         setOperationAction(ISD::SUB, MVT::v2i64, Expand);
 727       }
 728
 729       setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
 730       AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
 731       setOperationAction(ISD::STORE, MVT::v2i64, Promote);
 732       AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
 733
 734       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
 735
 736       setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
 737       setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
 738       setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
 739       setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
 740
 741       // Vector operation legalization checks the result type of
 742       // SIGN_EXTEND_INREG, overall legalization checks the inner type.
 743       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
 744       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
 745       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
 746       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
 747
 748       setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
 749       setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
 750       setOperationAction(ISD::FABS, MVT::v4f32, Legal);
 751       setOperationAction(ISD::FABS, MVT::v2f64, Legal);
 752
 753       if (Subtarget.hasDirectMove())
 754         setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
 755       setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
 756
 757       addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
 758     }
 759
 760     if (Subtarget.hasP8Altivec()) {
 761       addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
 762       addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
 763     }
 764
 765     if (Subtarget.hasP9Vector()) {
 766       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
 767       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
 768
 769       // 128 bit shifts can be accomplished via 3 instructions for SHL and
 770       // SRL, but not for SRA because of the instructions available:
 771       // VS{RL} and VS{RL}O.
 772       setOperationAction(ISD::SHL, MVT::v1i128, Legal);
 773       setOperationAction(ISD::SRL, MVT::v1i128, Legal);
 774       setOperationAction(ISD::SRA, MVT::v1i128, Expand);
 775     }
 776   }
 777
 778   if (Subtarget.hasQPX()) {
 779     setOperationAction(ISD::FADD, MVT::v4f64, Legal);
 780     setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
 781     setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
 782     setOperationAction(ISD::FREM, MVT::v4f64, Expand);
 783
 784     setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal);
 785     setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand);
 786
 787     setOperationAction(ISD::LOAD  , MVT::v4f64, Custom);
 788     setOperationAction(ISD::STORE , MVT::v4f64, Custom);
 789
 790     setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom);
 791     setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom);
 792
 793     if (!Subtarget.useCRBits())
 794       setOperationAction(ISD::SELECT, MVT::v4f64, Expand);
 795     setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
 796
 797     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal);
 798     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand);
 799     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand);
 800     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand);
 801     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom);
 802     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal);
 803     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom);
 804
 805     setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal);
 806     setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand);
 807
 808     setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal);
 809     setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand);
 810     setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal);
 811
 812     setOperationAction(ISD::FNEG , MVT::v4f64, Legal);
 813     setOperationAction(ISD::FABS , MVT::v4f64, Legal);
 814     setOperationAction(ISD::FSIN , MVT::v4f64, Expand);
 815     setOperationAction(ISD::FCOS , MVT::v4f64, Expand);
 816     setOperationAction(ISD::FPOW , MVT::v4f64, Expand);
 817     setOperationAction(ISD::FLOG , MVT::v4f64, Expand);
 818     setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand);
 819     setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand);
 820     setOperationAction(ISD::FEXP , MVT::v4f64, Expand);
 821     setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand);
 822
 823     setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal);
 824     setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal);
 825
 826     setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal);
 827     setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal);
 828
 829     addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass);
 830
 831     setOperationAction(ISD::FADD, MVT::v4f32, Legal);
 832     setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
 833     setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
 834     setOperationAction(ISD::FREM, MVT::v4f32, Expand);
 835
 836     setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
 837     setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand);
 838
 839     setOperationAction(ISD::LOAD  , MVT::v4f32, Custom);
 840     setOperationAction(ISD::STORE , MVT::v4f32, Custom);
 841
 842     if (!Subtarget.useCRBits())
 843       setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
 844     setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
 845
 846     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal);
 847     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand);
 848     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand);
 849     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand);
 850     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom);
 851     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
 852     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
 853
 854     setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal);
 855     setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand);
 856
 857     setOperationAction(ISD::FNEG , MVT::v4f32, Legal);
 858     setOperationAction(ISD::FABS , MVT::v4f32, Legal);
 859     setOperationAction(ISD::FSIN , MVT::v4f32, Expand);
 860     setOperationAction(ISD::FCOS , MVT::v4f32, Expand);
 861     setOperationAction(ISD::FPOW , MVT::v4f32, Expand);
 862     setOperationAction(ISD::FLOG , MVT::v4f32, Expand);
 863     setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand);
 864     setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand);
 865     setOperationAction(ISD::FEXP , MVT::v4f32, Expand);
 866     setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand);
 867
 868     setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
 869     setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
 870
 871     setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal);
 872     setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal);
 873
 874     addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass);
 875
 876     setOperationAction(ISD::AND , MVT::v4i1, Legal);
 877     setOperationAction(ISD::OR , MVT::v4i1, Legal);
 878     setOperationAction(ISD::XOR , MVT::v4i1, Legal);
 879
 880     if (!Subtarget.useCRBits())
 881       setOperationAction(ISD::SELECT, MVT::v4i1, Expand);
 882     setOperationAction(ISD::VSELECT, MVT::v4i1, Legal);
 883
 884     setOperationAction(ISD::LOAD  , MVT::v4i1, Custom);
 885     setOperationAction(ISD::STORE , MVT::v4i1, Custom);
 886
 887     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom);
 888     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand);
 889     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand);
 890     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand);
 891     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom);
 892     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand);
 893     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
 894
 895     setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
 896     setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
 897
 898     addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass);
 899
 900     setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
 901     setOperationAction(ISD::FCEIL,  MVT::v4f64, Legal);
 902     setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
 903     setOperationAction(ISD::FROUND, MVT::v4f64, Legal);
 904
 905     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
 906     setOperationAction(ISD::FCEIL,  MVT::v4f32, Legal);
 907     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
 908     setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
 909
 910     setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand);
 911     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
 912
 913     // These need to set FE_INEXACT, and so cannot be vectorized here.
 914     setOperationAction(ISD::FRINT, MVT::v4f64, Expand);
 915     setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
 916
 917     if (TM.Options.UnsafeFPMath) {
 918       setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
 919       setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
 920
 921       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
 922       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
 923     } else {
 924       setOperationAction(ISD::FDIV, MVT::v4f64, Expand);
 925       setOperationAction(ISD::FSQRT, MVT::v4f64, Expand);
 926
 927       setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
 928       setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
 929     }
 930   }
 931
 932   if (Subtarget.has64BitSupport())
 933     setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
 934
 935   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
 936
 937   if (!isPPC64) {
 938     setOperationAction(ISD::ATOMIC_LOAD,  MVT::i64, Expand);
 939     setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
 940   }
 941
 942   setBooleanContents(ZeroOrOneBooleanContent);
 943
 944   if (Subtarget.hasAltivec()) {
 945     // Altivec instructions set fields to all zeros or all ones.
 946     setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
 947   }
 948
 949   if (!isPPC64) {
 950     // These libcalls are not available in 32-bit.
 951     setLibcallName(RTLIB::SHL_I128, nullptr);
 952     setLibcallName(RTLIB::SRL_I128, nullptr);
 953     setLibcallName(RTLIB::SRA_I128, nullptr);
 954   }
 955
 956   setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
 957
 958   // We have target-specific dag combine patterns for the following nodes:
 959   setTargetDAGCombine(ISD::SHL);
 960   setTargetDAGCombine(ISD::SRA);
 961   setTargetDAGCombine(ISD::SRL);
 962   setTargetDAGCombine(ISD::SINT_TO_FP);
 963   setTargetDAGCombine(ISD::BUILD_VECTOR);
 964   if (Subtarget.hasFPCVT())
 965     setTargetDAGCombine(ISD::UINT_TO_FP);
 966   setTargetDAGCombine(ISD::LOAD);
 967   setTargetDAGCombine(ISD::STORE);
 968   setTargetDAGCombine(ISD::BR_CC);
 969   if (Subtarget.useCRBits())
 970     setTargetDAGCombine(ISD::BRCOND);
 971   setTargetDAGCombine(ISD::BSWAP);
 972   setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
 973   setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
 974   setTargetDAGCombine(ISD::INTRINSIC_VOID);
 975
 976   setTargetDAGCombine(ISD::SIGN_EXTEND);
 977   setTargetDAGCombine(ISD::ZERO_EXTEND);
 978   setTargetDAGCombine(ISD::ANY_EXTEND);
 979
 980   if (Subtarget.useCRBits()) {
 981     setTargetDAGCombine(ISD::TRUNCATE);
 982     setTargetDAGCombine(ISD::SETCC);
 983     setTargetDAGCombine(ISD::SELECT_CC);
 984   }
 985
 986   // Use reciprocal estimates.
 987   if (TM.Options.UnsafeFPMath) {
 988     setTargetDAGCombine(ISD::FDIV);
 989     setTargetDAGCombine(ISD::FSQRT);
 990   }
 991
 992   // Darwin long double math library functions have $LDBL128 appended.
 993   if (Subtarget.isDarwin()) {
 994     setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
 995     setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
 996     setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
 997     setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
 998     setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
 999     setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
1000     setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
1001     setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
1002     setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
1003     setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
1004   }
1005
1006   // With 32 condition bits, we don't need to sink (and duplicate) compares
1007   // aggressively in CodeGenPrep.
1008   if (Subtarget.useCRBits()) {
1009     setHasMultipleConditionRegisters();
1010     setJumpIsExpensive();
1011   }
1012
1013   setMinFunctionAlignment(2);
1014   if (Subtarget.isDarwin())
1015     setPrefFunctionAlignment(4);
1016
1017   switch (Subtarget.getDarwinDirective()) {
1018   default: break;
1019   case PPC::DIR_970:
1020   case PPC::DIR_A2:
1021   case PPC::DIR_E500mc:
1022   case PPC::DIR_E5500:
1023   case PPC::DIR_PWR4:
1024   case PPC::DIR_PWR5:
1025   case PPC::DIR_PWR5X:
1026   case PPC::DIR_PWR6:
1027   case PPC::DIR_PWR6X:
1028   case PPC::DIR_PWR7:
1029   case PPC::DIR_PWR8:
1030   case PPC::DIR_PWR9:
1031     setPrefFunctionAlignment(4);
1032     setPrefLoopAlignment(4);
1033     break;
1034   }
1035
1036   if (Subtarget.enableMachineScheduler())
1037     setSchedulingPreference(Sched::Source);
1038   else
1039     setSchedulingPreference(Sched::Hybrid);
1040
1041   computeRegisterProperties(STI.getRegisterInfo());
1042
1043   // The Freescale cores do better with aggressive inlining of memcpy and
1044   // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1045   if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
1046       Subtarget.getDarwinDirective() == PPC::DIR_E5500) {
1047     MaxStoresPerMemset = 32;
1048     MaxStoresPerMemsetOptSize = 16;
1049     MaxStoresPerMemcpy = 32;
1050     MaxStoresPerMemcpyOptSize = 8;
1051     MaxStoresPerMemmove = 32;
1052     MaxStoresPerMemmoveOptSize = 8;
1053   } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) {
1054     // The A2 also benefits from (very) aggressive inlining of memcpy and
1055     // friends. The overhead of a the function call, even when warm, can be
1056     // over one hundred cycles.
1057     MaxStoresPerMemset = 128;
1058     MaxStoresPerMemcpy = 128;
1059     MaxStoresPerMemmove = 128;
1060     MaxLoadsPerMemcmp = 128;
1061   } else {
1062     MaxLoadsPerMemcmp = 8;
1063     MaxLoadsPerMemcmpOptSize = 4;
1064   }
1065 }
1066
1067 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1068 /// the desired ByVal argument alignment.
1069 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
1070                              unsigned MaxMaxAlign) {
1071   if (MaxAlign == MaxMaxAlign)
1072     return;
1073   if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1074     if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
1075       MaxAlign = 32;
1076     else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
1077       MaxAlign = 16;
1078   } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1079     unsigned EltAlign = 0;
1080     getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1081     if (EltAlign > MaxAlign)
1082       MaxAlign = EltAlign;
1083   } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1084     for (auto *EltTy : STy->elements()) {
1085       unsigned EltAlign = 0;
1086       getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1087       if (EltAlign > MaxAlign)
1088         MaxAlign = EltAlign;
1089       if (MaxAlign == MaxMaxAlign)
1090         break;
1091     }
1092   }
1093 }
1094
1095 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1096 /// function arguments in the caller parameter area.
1097 unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
1098                                                   const DataLayout &DL) const {
1099   // Darwin passes everything on 4 byte boundary.
1100   if (Subtarget.isDarwin())
1101     return 4;
1102
1103   // 16byte and wider vectors are passed on 16byte boundary.
1104   // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1105   unsigned Align = Subtarget.isPPC64() ? 8 : 4;
1106   if (Subtarget.hasAltivec() || Subtarget.hasQPX())
1107     getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16);
1108   return Align;
1109 }
1110
1111 bool PPCTargetLowering::useSoftFloat() const {
1112   return Subtarget.useSoftFloat();
1113 }
1114
1115 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1116   switch ((PPCISD::NodeType)Opcode) {
1117   case PPCISD::FIRST_NUMBER:    break;
1118   case PPCISD::FSEL:            return "PPCISD::FSEL";
1119   case PPCISD::FCFID:           return "PPCISD::FCFID";
1120   case PPCISD::FCFIDU:          return "PPCISD::FCFIDU";
1121   case PPCISD::FCFIDS:          return "PPCISD::FCFIDS";
1122   case PPCISD::FCFIDUS:         return "PPCISD::FCFIDUS";
1123   case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
1124   case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
1125   case PPCISD::FCTIDUZ:         return "PPCISD::FCTIDUZ";
1126   case PPCISD::FCTIWUZ:         return "PPCISD::FCTIWUZ";
1127   case PPCISD::FRE:             return "PPCISD::FRE";
1128   case PPCISD::FRSQRTE:         return "PPCISD::FRSQRTE";
1129   case PPCISD::STFIWX:          return "PPCISD::STFIWX";
1130   case PPCISD::VMADDFP:         return "PPCISD::VMADDFP";
1131   case PPCISD::VNMSUBFP:        return "PPCISD::VNMSUBFP";
1132   case PPCISD::VPERM:           return "PPCISD::VPERM";
1133   case PPCISD::XXSPLT:          return "PPCISD::XXSPLT";
1134   case PPCISD::XXINSERT:        return "PPCISD::XXINSERT";
1135   case PPCISD::XXREVERSE:       return "PPCISD::XXREVERSE";
1136   case PPCISD::XXPERMDI:        return "PPCISD::XXPERMDI";
1137   case PPCISD::VECSHL:          return "PPCISD::VECSHL";
1138   case PPCISD::CMPB:            return "PPCISD::CMPB";
1139   case PPCISD::Hi:              return "PPCISD::Hi";
1140   case PPCISD::Lo:              return "PPCISD::Lo";
1141   case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
1142   case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
1143   case PPCISD::DYNAREAOFFSET:   return "PPCISD::DYNAREAOFFSET";
1144   case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
1145   case PPCISD::SRL:             return "PPCISD::SRL";
1146   case PPCISD::SRA:             return "PPCISD::SRA";
1147   case PPCISD::SHL:             return "PPCISD::SHL";
1148   case PPCISD::SRA_ADDZE:       return "PPCISD::SRA_ADDZE";
1149   case PPCISD::CALL:            return "PPCISD::CALL";
1150   case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
1151   case PPCISD::MTCTR:           return "PPCISD::MTCTR";
1152   case PPCISD::BCTRL:           return "PPCISD::BCTRL";
1153   case PPCISD::BCTRL_LOAD_TOC:  return "PPCISD::BCTRL_LOAD_TOC";
1154   case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
1155   case PPCISD::READ_TIME_BASE:  return "PPCISD::READ_TIME_BASE";
1156   case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
1157   case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1158   case PPCISD::MFOCRF:          return "PPCISD::MFOCRF";
1159   case PPCISD::MFVSR:           return "PPCISD::MFVSR";
1160   case PPCISD::MTVSRA:          return "PPCISD::MTVSRA";
1161   case PPCISD::MTVSRZ:          return "PPCISD::MTVSRZ";
1162   case PPCISD::SINT_VEC_TO_FP:  return "PPCISD::SINT_VEC_TO_FP";
1163   case PPCISD::UINT_VEC_TO_FP:  return "PPCISD::UINT_VEC_TO_FP";
1164   case PPCISD::ANDIo_1_EQ_BIT:  return "PPCISD::ANDIo_1_EQ_BIT";
1165   case PPCISD::ANDIo_1_GT_BIT:  return "PPCISD::ANDIo_1_GT_BIT";
1166   case PPCISD::VCMP:            return "PPCISD::VCMP";
1167   case PPCISD::VCMPo:           return "PPCISD::VCMPo";
1168   case PPCISD::LBRX:            return "PPCISD::LBRX";
1169   case PPCISD::STBRX:           return "PPCISD::STBRX";
1170   case PPCISD::LFIWAX:          return "PPCISD::LFIWAX";
1171   case PPCISD::LFIWZX:          return "PPCISD::LFIWZX";
1172   case PPCISD::LXSIZX:          return "PPCISD::LXSIZX";
1173   case PPCISD::STXSIX:          return "PPCISD::STXSIX";
1174   case PPCISD::VEXTS:           return "PPCISD::VEXTS";
1175   case PPCISD::SExtVElems:      return "PPCISD::SExtVElems";
1176   case PPCISD::LXVD2X:          return "PPCISD::LXVD2X";
1177   case PPCISD::STXVD2X:         return "PPCISD::STXVD2X";
1178   case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
1179   case PPCISD::BDNZ:            return "PPCISD::BDNZ";
1180   case PPCISD::BDZ:             return "PPCISD::BDZ";
1181   case PPCISD::MFFS:            return "PPCISD::MFFS";
1182   case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
1183   case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
1184   case PPCISD::CR6SET:          return "PPCISD::CR6SET";
1185   case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
1186   case PPCISD::PPC32_GOT:       return "PPCISD::PPC32_GOT";
1187   case PPCISD::PPC32_PICGOT:    return "PPCISD::PPC32_PICGOT";
1188   case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1189   case PPCISD::LD_GOT_TPREL_L:  return "PPCISD::LD_GOT_TPREL_L";
1190   case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
1191   case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
1192   case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
1193   case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
1194   case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1195   case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
1196   case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
1197   case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
1198   case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1199   case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1200   case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
1201   case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
1202   case PPCISD::SC:              return "PPCISD::SC";
1203   case PPCISD::CLRBHRB:         return "PPCISD::CLRBHRB";
1204   case PPCISD::MFBHRBE:         return "PPCISD::MFBHRBE";
1205   case PPCISD::RFEBB:           return "PPCISD::RFEBB";
1206   case PPCISD::XXSWAPD:         return "PPCISD::XXSWAPD";
1207   case PPCISD::SWAP_NO_CHAIN:   return "PPCISD::SWAP_NO_CHAIN";
1208   case PPCISD::QVFPERM:         return "PPCISD::QVFPERM";
1209   case PPCISD::QVGPCI:          return "PPCISD::QVGPCI";
1210   case PPCISD::QVALIGNI:        return "PPCISD::QVALIGNI";
1211   case PPCISD::QVESPLATI:       return "PPCISD::QVESPLATI";
1212   case PPCISD::QBFLT:           return "PPCISD::QBFLT";
1213   case PPCISD::QVLFSb:          return "PPCISD::QVLFSb";
1214   }
1215   return nullptr;
1216 }
1217
1218 EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
1219                                           EVT VT) const {
1220   if (!VT.isVector())
1221     return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1222
1223   if (Subtarget.hasQPX())
1224     return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements());
1225
1226   return VT.changeVectorElementTypeToInteger();
1227 }
1228
1229 bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1230   assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1231   return true;
1232 }
1233
1234 //===----------------------------------------------------------------------===//
1235 // Node matching predicates, for use by the tblgen matching code.
1236 //===----------------------------------------------------------------------===//
1237
1238 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1239 static bool isFloatingPointZero(SDValue Op) {
1240   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1241     return CFP->getValueAPF().isZero();
1242   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1243     // Maybe this has already been legalized into the constant pool?
1244     if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1245       if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1246         return CFP->getValueAPF().isZero();
1247   }
1248   return false;
1249 }
1250
1251 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
1252 /// true if Op is undef or if it matches the specified value.
1253 static bool isConstantOrUndef(int Op, int Val) {
1254   return Op < 0 || Op == Val;
1255 }
1256
1257 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1258 /// VPKUHUM instruction.
1259 /// The ShuffleKind distinguishes between big-endian operations with
1260 /// two different inputs (0), either-endian operations with two identical
1261 /// inputs (1), and little-endian operations with two different inputs (2).
1262 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1263 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1264                                SelectionDAG &DAG) {
1265   bool IsLE = DAG.getDataLayout().isLittleEndian();
1266   if (ShuffleKind == 0) {
1267     if (IsLE)
1268       return false;
1269     for (unsigned i = 0; i != 16; ++i)
1270       if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1271         return false;
1272   } else if (ShuffleKind == 2) {
1273     if (!IsLE)
1274       return false;
1275     for (unsigned i = 0; i != 16; ++i)
1276       if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1277         return false;
1278   } else if (ShuffleKind == 1) {
1279     unsigned j = IsLE ? 0 : 1;
1280     for (unsigned i = 0; i != 8; ++i)
1281       if (!isConstantOrUndef(N->getMaskElt(i),    i*2+j) ||
1282           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j))
1283         return false;
1284   }
1285   return true;
1286 }
1287
1288 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1289 /// VPKUWUM instruction.
1290 /// The ShuffleKind distinguishes between big-endian operations with
1291 /// two different inputs (0), either-endian operations with two identical
1292 /// inputs (1), and little-endian operations with two different inputs (2).
1293 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1294 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1295                                SelectionDAG &DAG) {
1296   bool IsLE = DAG.getDataLayout().isLittleEndian();
1297   if (ShuffleKind == 0) {
1298     if (IsLE)
1299       return false;
1300     for (unsigned i = 0; i != 16; i += 2)
1301       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
1302           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3))
1303         return false;
1304   } else if (ShuffleKind == 2) {
1305     if (!IsLE)
1306       return false;
1307     for (unsigned i = 0; i != 16; i += 2)
1308       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1309           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1))
1310         return false;
1311   } else if (ShuffleKind == 1) {
1312     unsigned j = IsLE ? 0 : 2;
1313     for (unsigned i = 0; i != 8; i += 2)
1314       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1315           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1316           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1317           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1))
1318         return false;
1319   }
1320   return true;
1321 }
1322
1323 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1324 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1325 /// current subtarget.
1326 ///
1327 /// The ShuffleKind distinguishes between big-endian operations with
1328 /// two different inputs (0), either-endian operations with two identical
1329 /// inputs (1), and little-endian operations with two different inputs (2).
1330 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1331 bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1332                                SelectionDAG &DAG) {
1333   const PPCSubtarget& Subtarget =
1334     static_cast<const PPCSubtarget&>(DAG.getSubtarget());
1335   if (!Subtarget.hasP8Vector())
1336     return false;
1337
1338   bool IsLE = DAG.getDataLayout().isLittleEndian();
1339   if (ShuffleKind == 0) {
1340     if (IsLE)
1341       return false;
1342     for (unsigned i = 0; i != 16; i += 4)
1343       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+4) ||
1344           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+5) ||
1345           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+6) ||
1346           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+7))
1347         return false;
1348   } else if (ShuffleKind == 2) {
1349     if (!IsLE)
1350       return false;
1351     for (unsigned i = 0; i != 16; i += 4)
1352       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1353           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1) ||
1354           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+2) ||
1355           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+3))
1356         return false;
1357   } else if (ShuffleKind == 1) {
1358     unsigned j = IsLE ? 0 : 4;
1359     for (unsigned i = 0; i != 8; i += 4)
1360       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1361           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1362           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+j+2) ||
1363           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+j+3) ||
1364           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1365           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1) ||
1366           !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1367           !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1368         return false;
1369   }
1370   return true;
1371 }
1372
1373 /// isVMerge - Common function, used to match vmrg* shuffles.
1374 ///
1375 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1376                      unsigned LHSStart, unsigned RHSStart) {
1377   if (N->getValueType(0) != MVT::v16i8)
1378     return false;
1379   assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1380          "Unsupported merge size!");
1381
1382   for (unsigned i = 0; i != 8/UnitSize; ++i)     // Step over units
1383     for (unsigned j = 0; j != UnitSize; ++j) {   // Step over bytes within unit
1384       if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1385                              LHSStart+j+i*UnitSize) ||
1386           !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1387                              RHSStart+j+i*UnitSize))
1388         return false;
1389     }
1390   return true;
1391 }
1392
1393 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1394 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1395 /// The ShuffleKind distinguishes between big-endian merges with two
1396 /// different inputs (0), either-endian merges with two identical inputs (1),
1397 /// and little-endian merges with two different inputs (2).  For the latter,
1398 /// the input operands are swapped (see PPCInstrAltivec.td).
1399 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1400                              unsigned ShuffleKind, SelectionDAG &DAG) {
1401   if (DAG.getDataLayout().isLittleEndian()) {
1402     if (ShuffleKind == 1) // unary
1403       return isVMerge(N, UnitSize, 0, 0);
1404     else if (ShuffleKind == 2) // swapped
1405       return isVMerge(N, UnitSize, 0, 16);
1406     else
1407       return false;
1408   } else {
1409     if (ShuffleKind == 1) // unary
1410       return isVMerge(N, UnitSize, 8, 8);
1411     else if (ShuffleKind == 0) // normal
1412       return isVMerge(N, UnitSize, 8, 24);
1413     else
1414       return false;
1415   }
1416 }
1417
1418 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1419 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1420 /// The ShuffleKind distinguishes between big-endian merges with two
1421 /// different inputs (0), either-endian merges with two identical inputs (1),
1422 /// and little-endian merges with two different inputs (2).  For the latter,
1423 /// the input operands are swapped (see PPCInstrAltivec.td).
1424 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1425                              unsigned ShuffleKind, SelectionDAG &DAG) {
1426   if (DAG.getDataLayout().isLittleEndian()) {
1427     if (ShuffleKind == 1) // unary
1428       return isVMerge(N, UnitSize, 8, 8);
1429     else if (ShuffleKind == 2) // swapped
1430       return isVMerge(N, UnitSize, 8, 24);
1431     else
1432       return false;
1433   } else {
1434     if (ShuffleKind == 1) // unary
1435       return isVMerge(N, UnitSize, 0, 0);
1436     else if (ShuffleKind == 0) // normal
1437       return isVMerge(N, UnitSize, 0, 16);
1438     else
1439       return false;
1440   }
1441 }
1442
1443 /**
1444  * \brief Common function used to match vmrgew and vmrgow shuffles
1445  *
1446  * The indexOffset determines whether to look for even or odd words in
1447  * the shuffle mask. This is based on the of the endianness of the target
1448  * machine.
1449  *   - Little Endian:
1450  *     - Use offset of 0 to check for odd elements
1451  *     - Use offset of 4 to check for even elements
1452  *   - Big Endian:
1453  *     - Use offset of 0 to check for even elements
1454  *     - Use offset of 4 to check for odd elements
1455  * A detailed description of the vector element ordering for little endian and
1456  * big endian can be found at
1457  * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1458  * Targeting your applications - what little endian and big endian IBM XL C/C++
1459  * compiler differences mean to you
1460  *
1461  * The mask to the shuffle vector instruction specifies the indices of the
1462  * elements from the two input vectors to place in the result. The elements are
1463  * numbered in array-access order, starting with the first vector. These vectors
1464  * are always of type v16i8, thus each vector will contain 16 elements of size
1465  * 8. More info on the shuffle vector can be found in the
1466  * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1467  * Language Reference.
1468  *
1469  * The RHSStartValue indicates whether the same input vectors are used (unary)
1470  * or two different input vectors are used, based on the following:
1471  *   - If the instruction uses the same vector for both inputs, the range of the
1472  *     indices will be 0 to 15. In this case, the RHSStart value passed should
1473  *     be 0.
1474  *   - If the instruction has two different vectors then the range of the
1475  *     indices will be 0 to 31. In this case, the RHSStart value passed should
1476  *     be 16 (indices 0-15 specify elements in the first vector while indices 16
1477  *     to 31 specify elements in the second vector).
1478  *
1479  * \param[in] N The shuffle vector SD Node to analyze
1480  * \param[in] IndexOffset Specifies whether to look for even or odd elements
1481  * \param[in] RHSStartValue Specifies the starting index for the righthand input
1482  * vector to the shuffle_vector instruction
1483  * \return true iff this shuffle vector represents an even or odd word merge
1484  */
1485 static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1486                      unsigned RHSStartValue) {
1487   if (N->getValueType(0) != MVT::v16i8)
1488     return false;
1489
1490   for (unsigned i = 0; i < 2; ++i)
1491     for (unsigned j = 0; j < 4; ++j)
1492       if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1493                              i*RHSStartValue+j+IndexOffset) ||
1494           !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1495                              i*RHSStartValue+j+IndexOffset+8))
1496         return false;
1497   return true;
1498 }
1499
1500 /**
1501  * \brief Determine if the specified shuffle mask is suitable for the vmrgew or
1502  * vmrgow instructions.
1503  *
1504  * \param[in] N The shuffle vector SD Node to analyze
1505  * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1506  * \param[in] ShuffleKind Identify the type of merge:
1507  *   - 0 = big-endian merge with two different inputs;
1508  *   - 1 = either-endian merge with two identical inputs;
1509  *   - 2 = little-endian merge with two different inputs (inputs are swapped for
1510  *     little-endian merges).
1511  * \param[in] DAG The current SelectionDAG
1512  * \return true iff this shuffle mask
1513  */
1514 bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
1515                               unsigned ShuffleKind, SelectionDAG &DAG) {
1516   if (DAG.getDataLayout().isLittleEndian()) {
1517     unsigned indexOffset = CheckEven ? 4 : 0;
1518     if (ShuffleKind == 1) // Unary
1519       return isVMerge(N, indexOffset, 0);
1520     else if (ShuffleKind == 2) // swapped
1521       return isVMerge(N, indexOffset, 16);
1522     else
1523       return false;
1524   }
1525   else {
1526     unsigned indexOffset = CheckEven ? 0 : 4;
1527     if (ShuffleKind == 1) // Unary
1528       return isVMerge(N, indexOffset, 0);
1529     else if (ShuffleKind == 0) // Normal
1530       return isVMerge(N, indexOffset, 16);
1531     else
1532       return false;
1533   }
1534   return false;
1535 }
1536
1537 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1538 /// amount, otherwise return -1.
1539 /// The ShuffleKind distinguishes between big-endian operations with two
1540 /// different inputs (0), either-endian operations with two identical inputs
1541 /// (1), and little-endian operations with two different inputs (2).  For the
1542 /// latter, the input operands are swapped (see PPCInstrAltivec.td).
1543 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
1544                              SelectionDAG &DAG) {
1545   if (N->getValueType(0) != MVT::v16i8)
1546     return -1;
1547
1548   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1549
1550   // Find the first non-undef value in the shuffle mask.
1551   unsigned i;
1552   for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
1553     /*search*/;
1554
1555   if (i == 16) return -1;  // all undef.
1556
1557   // Otherwise, check to see if the rest of the elements are consecutively
1558   // numbered from this value.
1559   unsigned ShiftAmt = SVOp->getMaskElt(i);
1560   if (ShiftAmt < i) return -1;
1561
1562   ShiftAmt -= i;
1563   bool isLE = DAG.getDataLayout().isLittleEndian();
1564
1565   if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
1566     // Check the rest of the elements to see if they are consecutive.
1567     for (++i; i != 16; ++i)
1568       if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1569         return -1;
1570   } else if (ShuffleKind == 1) {
1571     // Check the rest of the elements to see if they are consecutive.
1572     for (++i; i != 16; ++i)
1573       if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
1574         return -1;
1575   } else
1576     return -1;
1577
1578   if (isLE)
1579     ShiftAmt = 16 - ShiftAmt;
1580
1581   return ShiftAmt;
1582 }
1583
1584 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1585 /// specifies a splat of a single element that is suitable for input to
1586 /// VSPLTB/VSPLTH/VSPLTW.
1587 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
1588   assert(N->getValueType(0) == MVT::v16i8 &&
1589          (EltSize == 1 || EltSize == 2 || EltSize == 4));
1590
1591   // The consecutive indices need to specify an element, not part of two
1592   // different elements.  So abandon ship early if this isn't the case.
1593   if (N->getMaskElt(0) % EltSize != 0)
1594     return false;
1595
1596   // This is a splat operation if each element of the permute is the same, and
1597   // if the value doesn't reference the second vector.
1598   unsigned ElementBase = N->getMaskElt(0);
1599
1600   // FIXME: Handle UNDEF elements too!
1601   if (ElementBase >= 16)
1602     return false;
1603
1604   // Check that the indices are consecutive, in the case of a multi-byte element
1605   // splatted with a v16i8 mask.
1606   for (unsigned i = 1; i != EltSize; ++i)
1607     if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
1608       return false;
1609
1610   for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
1611     if (N->getMaskElt(i) < 0) continue;
1612     for (unsigned j = 0; j != EltSize; ++j)
1613       if (N->getMaskElt(i+j) != N->getMaskElt(j))
1614         return false;
1615   }
1616   return true;
1617 }
1618
1619 /// Check that the mask is shuffling N byte elements. Within each N byte
1620 /// element of the mask, the indices could be either in increasing or
1621 /// decreasing order as long as they are consecutive.
1622 /// \param[in] N the shuffle vector SD Node to analyze
1623 /// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
1624 /// Word/DoubleWord/QuadWord).
1625 /// \param[in] StepLen the delta indices number among the N byte element, if
1626 /// the mask is in increasing/decreasing order then it is 1/-1.
1627 /// \return true iff the mask is shuffling N byte elements.
1628 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
1629                                    int StepLen) {
1630   assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
1631          "Unexpected element width.");
1632   assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
1633
1634   unsigned NumOfElem = 16 / Width;
1635   unsigned MaskVal[16]; //  Width is never greater than 16
1636   for (unsigned i = 0; i < NumOfElem; ++i) {
1637     MaskVal[0] = N->getMaskElt(i * Width);
1638     if ((StepLen == 1) && (MaskVal[0] % Width)) {
1639       return false;
1640     } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
1641       return false;
1642     }
1643
1644     for (unsigned int j = 1; j < Width; ++j) {
1645       MaskVal[j] = N->getMaskElt(i * Width + j);
1646       if (MaskVal[j] != MaskVal[j-1] + StepLen) {
1647         return false;
1648       }
1649     }
1650   }
1651
1652   return true;
1653 }
1654
1655 bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
1656                           unsigned &InsertAtByte, bool &Swap, bool IsLE) {
1657   if (!isNByteElemShuffleMask(N, 4, 1))
1658     return false;
1659
1660   // Now we look at mask elements 0,4,8,12
1661   unsigned M0 = N->getMaskElt(0) / 4;
1662   unsigned M1 = N->getMaskElt(4) / 4;
1663   unsigned M2 = N->getMaskElt(8) / 4;
1664   unsigned M3 = N->getMaskElt(12) / 4;
1665   unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
1666   unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
1667
1668   // Below, let H and L be arbitrary elements of the shuffle mask
1669   // where H is in the range [4,7] and L is in the range [0,3].
1670   // H, 1, 2, 3 or L, 5, 6, 7
1671   if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
1672       (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
1673     ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
1674     InsertAtByte = IsLE ? 12 : 0;
1675     Swap = M0 < 4;
1676     return true;
1677   }
1678   // 0, H, 2, 3 or 4, L, 6, 7
1679   if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
1680       (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
1681     ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
1682     InsertAtByte = IsLE ? 8 : 4;
1683     Swap = M1 < 4;
1684     return true;
1685   }
1686   // 0, 1, H, 3 or 4, 5, L, 7
1687   if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
1688       (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
1689     ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
1690     InsertAtByte = IsLE ? 4 : 8;
1691     Swap = M2 < 4;
1692     return true;
1693   }
1694   // 0, 1, 2, H or 4, 5, 6, L
1695   if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
1696       (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
1697     ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
1698     InsertAtByte = IsLE ? 0 : 12;
1699     Swap = M3 < 4;
1700     return true;
1701   }
1702
1703   // If both vector operands for the shuffle are the same vector, the mask will
1704   // contain only elements from the first one and the second one will be undef.
1705   if (N->getOperand(1).isUndef()) {
1706     ShiftElts = 0;
1707     Swap = true;
1708     unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
1709     if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
1710       InsertAtByte = IsLE ? 12 : 0;
1711       return true;
1712     }
1713     if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
1714       InsertAtByte = IsLE ? 8 : 4;
1715       return true;
1716     }
1717     if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
1718       InsertAtByte = IsLE ? 4 : 8;
1719       return true;
1720     }
1721     if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
1722       InsertAtByte = IsLE ? 0 : 12;
1723       return true;
1724     }
1725   }
1726
1727   return false;
1728 }
1729
1730 bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
1731                                bool &Swap, bool IsLE) {
1732   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
1733   // Ensure each byte index of the word is consecutive.
1734   if (!isNByteElemShuffleMask(N, 4, 1))
1735     return false;
1736
1737   // Now we look at mask elements 0,4,8,12, which are the beginning of words.
1738   unsigned M0 = N->getMaskElt(0) / 4;
1739   unsigned M1 = N->getMaskElt(4) / 4;
1740   unsigned M2 = N->getMaskElt(8) / 4;
1741   unsigned M3 = N->getMaskElt(12) / 4;
1742
1743   // If both vector operands for the shuffle are the same vector, the mask will
1744   // contain only elements from the first one and the second one will be undef.
1745   if (N->getOperand(1).isUndef()) {
1746     assert(M0 < 4 && "Indexing into an undef vector?");
1747     if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
1748       return false;
1749
1750     ShiftElts = IsLE ? (4 - M0) % 4 : M0;
1751     Swap = false;
1752     return true;
1753   }
1754
1755   // Ensure each word index of the ShuffleVector Mask is consecutive.
1756   if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
1757     return false;
1758
1759   if (IsLE) {
1760     if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
1761       // Input vectors don't need to be swapped if the leading element
1762       // of the result is one of the 3 left elements of the second vector
1763       // (or if there is no shift to be done at all).
1764       Swap = false;
1765       ShiftElts = (8 - M0) % 8;
1766     } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
1767       // Input vectors need to be swapped if the leading element
1768       // of the result is one of the 3 left elements of the first vector
1769       // (or if we're shifting by 4 - thereby simply swapping the vectors).
1770       Swap = true;
1771       ShiftElts = (4 - M0) % 4;
1772     }
1773
1774     return true;
1775   } else {                                          // BE
1776     if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
1777       // Input vectors don't need to be swapped if the leading element
1778       // of the result is one of the 4 elements of the first vector.
1779       Swap = false;
1780       ShiftElts = M0;
1781     } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
1782       // Input vectors need to be swapped if the leading element
1783       // of the result is one of the 4 elements of the right vector.
1784       Swap = true;
1785       ShiftElts = M0 - 4;
1786     }
1787
1788     return true;
1789   }
1790 }
1791
1792 bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) {
1793   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
1794
1795   if (!isNByteElemShuffleMask(N, Width, -1))
1796     return false;
1797
1798   for (int i = 0; i < 16; i += Width)
1799     if (N->getMaskElt(i) != i + Width - 1)
1800       return false;
1801
1802   return true;
1803 }
1804
1805 bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {
1806   return isXXBRShuffleMaskHelper(N, 2);
1807 }
1808
1809 bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {
1810   return isXXBRShuffleMaskHelper(N, 4);
1811 }
1812
1813 bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {
1814   return isXXBRShuffleMaskHelper(N, 8);
1815 }
1816
1817 bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {
1818   return isXXBRShuffleMaskHelper(N, 16);
1819 }
1820
1821 /// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
1822 /// if the inputs to the instruction should be swapped and set \p DM to the
1823 /// value for the immediate.
1824 /// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
1825 /// AND element 0 of the result comes from the first input (LE) or second input
1826 /// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
1827 /// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
1828 /// mask.
1829 bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
1830                                bool &Swap, bool IsLE) {
1831   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
1832
1833   // Ensure each byte index of the double word is consecutive.
1834   if (!isNByteElemShuffleMask(N, 8, 1))
1835     return false;
1836
1837   unsigned M0 = N->getMaskElt(0) / 8;
1838   unsigned M1 = N->getMaskElt(8) / 8;
1839   assert(((M0 | M1) < 4) && "A mask element out of bounds?");
1840
1841   // If both vector operands for the shuffle are the same vector, the mask will
1842   // contain only elements from the first one and the second one will be undef.
1843   if (N->getOperand(1).isUndef()) {
1844     if ((M0 | M1) < 2) {
1845       DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
1846       Swap = false;
1847       return true;
1848     } else
1849       return false;
1850   }
1851
1852   if (IsLE) {
1853     if (M0 > 1 && M1 < 2) {
1854       Swap = false;
1855     } else if (M0 < 2 && M1 > 1) {
1856       M0 = (M0 + 2) % 4;
1857       M1 = (M1 + 2) % 4;
1858       Swap = true;
1859     } else
1860       return false;
1861
1862     // Note: if control flow comes here that means Swap is already set above
1863     DM = (((~M1) & 1) << 1) + ((~M0) & 1);
1864     return true;
1865   } else { // BE
1866     if (M0 < 2 && M1 > 1) {
1867       Swap = false;
1868     } else if (M0 > 1 && M1 < 2) {
1869       M0 = (M0 + 2) % 4;
1870       M1 = (M1 + 2) % 4;
1871       Swap = true;
1872     } else
1873       return false;
1874
1875     // Note: if control flow comes here that means Swap is already set above
1876     DM = (M0 << 1) + (M1 & 1);
1877     return true;
1878   }
1879 }
1880
1881
1882 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
1883 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
1884 unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
1885                                 SelectionDAG &DAG) {
1886   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1887   assert(isSplatShuffleMask(SVOp, EltSize));
1888   if (DAG.getDataLayout().isLittleEndian())
1889     return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
1890   else
1891     return SVOp->getMaskElt(0) / EltSize;
1892 }
1893
1894 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
1895 /// by using a vspltis[bhw] instruction of the specified element size, return
1896 /// the constant being splatted.  The ByteSize field indicates the number of
1897 /// bytes of each element [124] -> [bhw].
1898 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
1899   SDValue OpVal(nullptr, 0);
1900
1901   // If ByteSize of the splat is bigger than the element size of the
1902   // build_vector, then we have a case where we are checking for a splat where
1903   // multiple elements of the buildvector are folded together into a single
1904   // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
1905   unsigned EltSize = 16/N->getNumOperands();
1906   if (EltSize < ByteSize) {
1907     unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
1908     SDValue UniquedVals[4];
1909     assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
1910
1911     // See if all of the elements in the buildvector agree across.
1912     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1913       if (N->getOperand(i).isUndef()) continue;
1914       // If the element isn't a constant, bail fully out.
1915       if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
1916
1917       if (!UniquedVals[i&(Multiple-1)].getNode())
1918         UniquedVals[i&(Multiple-1)] = N->getOperand(i);
1919       else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
1920         return SDValue();  // no match.
1921     }
1922
1923     // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
1924     // either constant or undef values that are identical for each chunk.  See
1925     // if these chunks can form into a larger vspltis*.
1926
1927     // Check to see if all of the leading entries are either 0 or -1.  If
1928     // neither, then this won't fit into the immediate field.
1929     bool LeadingZero = true;
1930     bool LeadingOnes = true;
1931     for (unsigned i = 0; i != Multiple-1; ++i) {
1932       if (!UniquedVals[i].getNode()) continue;  // Must have been undefs.
1933
1934       LeadingZero &= isNullConstant(UniquedVals[i]);
1935       LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
1936     }
1937     // Finally, check the least significant entry.
1938     if (LeadingZero) {
1939       if (!UniquedVals[Multiple-1].getNode())
1940         return DAG.getTargetConstant(0, SDLoc(N), MVT::i32);  // 0,0,0,undef
1941       int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
1942       if (Val < 16)                                   // 0,0,0,4 -> vspltisw(4)
1943         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
1944     }
1945     if (LeadingOnes) {
1946       if (!UniquedVals[Multiple-1].getNode())
1947         return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
1948       int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
1949       if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
1950         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
1951     }
1952
1953     return SDValue();
1954   }
1955
1956   // Check to see if this buildvec has a single non-undef value in its elements.
1957   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1958     if (N->getOperand(i).isUndef()) continue;
1959     if (!OpVal.getNode())
1960       OpVal = N->getOperand(i);
1961     else if (OpVal != N->getOperand(i))
1962       return SDValue();
1963   }
1964
1965   if (!OpVal.getNode()) return SDValue();  // All UNDEF: use implicit def.
1966
1967   unsigned ValSizeInBytes = EltSize;
1968   uint64_t Value = 0;
1969   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1970     Value = CN->getZExtValue();
1971   } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1972     assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
1973     Value = FloatToBits(CN->getValueAPF().convertToFloat());
1974   }
1975
1976   // If the splat value is larger than the element value, then we can never do
1977   // this splat.  The only case that we could fit the replicated bits into our
1978   // immediate field for would be zero, and we prefer to use vxor for it.
1979   if (ValSizeInBytes < ByteSize) return SDValue();
1980
1981   // If the element value is larger than the splat value, check if it consists
1982   // of a repeated bit pattern of size ByteSize.
1983   if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
1984     return SDValue();
1985
1986   // Properly sign extend the value.
1987   int MaskVal = SignExtend32(Value, ByteSize * 8);
1988
1989   // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
1990   if (MaskVal == 0) return SDValue();
1991
1992   // Finally, if this value fits in a 5 bit sext field, return it
1993   if (SignExtend32<5>(MaskVal) == MaskVal)
1994     return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
1995   return SDValue();
1996 }
1997
1998 /// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
1999 /// amount, otherwise return -1.
2000 int PPC::isQVALIGNIShuffleMask(SDNode *N) {
2001   EVT VT = N->getValueType(0);
2002   if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
2003     return -1;
2004
2005   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2006
2007   // Find the first non-undef value in the shuffle mask.
2008   unsigned i;
2009   for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
2010     /*search*/;
2011
2012   if (i == 4) return -1;  // all undef.
2013
2014   // Otherwise, check to see if the rest of the elements are consecutively
2015   // numbered from this value.
2016   unsigned ShiftAmt = SVOp->getMaskElt(i);
2017   if (ShiftAmt < i) return -1;
2018   ShiftAmt -= i;
2019
2020   // Check the rest of the elements to see if they are consecutive.
2021   for (++i; i != 4; ++i)
2022     if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2023       return -1;
2024
2025   return ShiftAmt;
2026 }
2027
2028 //===----------------------------------------------------------------------===//
2029 //  Addressing Mode Selection
2030 //===----------------------------------------------------------------------===//
2031
2032 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2033 /// or 64-bit immediate, and if the value can be accurately represented as a
2034 /// sign extension from a 16-bit value.  If so, this returns true and the
2035 /// immediate.
2036 bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2037   if (!isa<ConstantSDNode>(N))
2038     return false;
2039
2040   Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();
2041   if (N->getValueType(0) == MVT::i32)
2042     return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
2043   else
2044     return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2045 }
2046 bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2047   return isIntS16Immediate(Op.getNode(), Imm);
2048 }
2049
2050 /// SelectAddressRegReg - Given the specified addressed, check to see if it
2051 /// can be represented as an indexed [r+r] operation.  Returns false if it
2052 /// can be more efficiently represented with [r+imm].
2053 bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
2054                                             SDValue &Index,
2055                                             SelectionDAG &DAG) const {
2056   int16_t imm = 0;
2057   if (N.getOpcode() == ISD::ADD) {
2058     if (isIntS16Immediate(N.getOperand(1), imm))
2059       return false;    // r+i
2060     if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2061       return false;    // r+i
2062
2063     Base = N.getOperand(0);
2064     Index = N.getOperand(1);
2065     return true;
2066   } else if (N.getOpcode() == ISD::OR) {
2067     if (isIntS16Immediate(N.getOperand(1), imm))
2068       return false;    // r+i can fold it if we can.
2069
2070     // If this is an or of disjoint bitfields, we can codegen this as an add
2071     // (for better address arithmetic) if the LHS and RHS of the OR are provably
2072     // disjoint.
2073     KnownBits LHSKnown, RHSKnown;
2074     DAG.computeKnownBits(N.getOperand(0), LHSKnown);
2075
2076     if (LHSKnown.Zero.getBoolValue()) {
2077       DAG.computeKnownBits(N.getOperand(1), RHSKnown);
2078       // If all of the bits are known zero on the LHS or RHS, the add won't
2079       // carry.
2080       if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2081         Base = N.getOperand(0);
2082         Index = N.getOperand(1);
2083         return true;
2084       }
2085     }
2086   }
2087
2088   return false;
2089 }
2090
2091 // If we happen to be doing an i64 load or store into a stack slot that has
2092 // less than a 4-byte alignment, then the frame-index elimination may need to
2093 // use an indexed load or store instruction (because the offset may not be a
2094 // multiple of 4). The extra register needed to hold the offset comes from the
2095 // register scavenger, and it is possible that the scavenger will need to use
2096 // an emergency spill slot. As a result, we need to make sure that a spill slot
2097 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2098 // stack slot.
2099 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2100   // FIXME: This does not handle the LWA case.
2101   if (VT != MVT::i64)
2102     return;
2103
2104   // NOTE: We'll exclude negative FIs here, which come from argument
2105   // lowering, because there are no known test cases triggering this problem
2106   // using packed structures (or similar). We can remove this exclusion if
2107   // we find such a test case. The reason why this is so test-case driven is
2108   // because this entire 'fixup' is only to prevent crashes (from the
2109   // register scavenger) on not-really-valid inputs. For example, if we have:
2110   //   %a = alloca i1
2111   //   %b = bitcast i1* %a to i64*
2112   //   store i64* a, i64 b
2113   // then the store should really be marked as 'align 1', but is not. If it
2114   // were marked as 'align 1' then the indexed form would have been
2115   // instruction-selected initially, and the problem this 'fixup' is preventing
2116   // won't happen regardless.
2117   if (FrameIdx < 0)
2118     return;
2119
2120   MachineFunction &MF = DAG.getMachineFunction();
2121   MachineFrameInfo &MFI = MF.getFrameInfo();
2122
2123   unsigned Align = MFI.getObjectAlignment(FrameIdx);
2124   if (Align >= 4)
2125     return;
2126
2127   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2128   FuncInfo->setHasNonRISpills();
2129 }
2130
2131 /// Returns true if the address N can be represented by a base register plus
2132 /// a signed 16-bit displacement [r+imm], and if it is not better
2133 /// represented as reg+reg.  If \p Alignment is non-zero, only accept
2134 /// displacements that are multiples of that value.
2135 bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
2136                                             SDValue &Base,
2137                                             SelectionDAG &DAG,
2138                                             unsigned Alignment) const {
2139   // FIXME dl should come from parent load or store, not from address
2140   SDLoc dl(N);
2141   // If this can be more profitably realized as r+r, fail.
2142   if (SelectAddressRegReg(N, Disp, Base, DAG))
2143     return false;
2144
2145   if (N.getOpcode() == ISD::ADD) {
2146     int16_t imm = 0;
2147     if (isIntS16Immediate(N.getOperand(1), imm) &&
2148         (!Alignment || (imm % Alignment) == 0)) {
2149       Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2150       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2151         Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2152         fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2153       } else {
2154         Base = N.getOperand(0);
2155       }
2156       return true; // [r+i]
2157     } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2158       // Match LOAD (ADD (X, Lo(G))).
2159       assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
2160              && "Cannot handle constant offsets yet!");
2161       Disp = N.getOperand(1).getOperand(0);  // The global address.
2162       assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
2163              Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
2164              Disp.getOpcode() == ISD::TargetConstantPool ||
2165              Disp.getOpcode() == ISD::TargetJumpTable);
2166       Base = N.getOperand(0);
2167       return true;  // [&g+r]
2168     }
2169   } else if (N.getOpcode() == ISD::OR) {
2170     int16_t imm = 0;
2171     if (isIntS16Immediate(N.getOperand(1), imm) &&
2172         (!Alignment || (imm % Alignment) == 0)) {
2173       // If this is an or of disjoint bitfields, we can codegen this as an add
2174       // (for better address arithmetic) if the LHS and RHS of the OR are
2175       // provably disjoint.
2176       KnownBits LHSKnown;
2177       DAG.computeKnownBits(N.getOperand(0), LHSKnown);
2178
2179       if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2180         // If all of the bits are known zero on the LHS or RHS, the add won't
2181         // carry.
2182         if (FrameIndexSDNode *FI =
2183               dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2184           Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2185           fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2186         } else {
2187           Base = N.getOperand(0);
2188         }
2189         Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2190         return true;
2191       }
2192     }
2193   } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2194     // Loading from a constant address.
2195
2196     // If this address fits entirely in a 16-bit sext immediate field, codegen
2197     // this as "d, 0"
2198     int16_t Imm;
2199     if (isIntS16Immediate(CN, Imm) && (!Alignment || (Imm % Alignment) == 0)) {
2200       Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2201       Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2202                              CN->getValueType(0));
2203       return true;
2204     }
2205
2206     // Handle 32-bit sext immediates with LIS + addr mode.
2207     if ((CN->getValueType(0) == MVT::i32 ||
2208          (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2209         (!Alignment || (CN->getZExtValue() % Alignment) == 0)) {
2210       int Addr = (int)CN->getZExtValue();
2211
2212       // Otherwise, break this down into an LIS + disp.
2213       Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2214
2215       Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2216                                    MVT::i32);
2217       unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2218       Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2219       return true;
2220     }
2221   }
2222
2223   Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2224   if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2225     Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2226     fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2227   } else
2228     Base = N;
2229   return true;      // [r+0]
2230 }
2231
2232 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2233 /// represented as an indexed [r+r] operation.
2234 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
2235                                                 SDValue &Index,
2236                                                 SelectionDAG &DAG) const {
2237   // Check to see if we can easily represent this as an [r+r] address.  This
2238   // will fail if it thinks that the address is more profitably represented as
2239   // reg+imm, e.g. where imm = 0.
2240   if (SelectAddressRegReg(N, Base, Index, DAG))
2241     return true;
2242
2243   // If the address is the result of an add, we will utilize the fact that the
2244   // address calculation includes an implicit add.  However, we can reduce
2245   // register pressure if we do not materialize a constant just for use as the
2246   // index register.  We only get rid of the add if it is not an add of a
2247   // value and a 16-bit signed constant and both have a single use.
2248   int16_t imm = 0;
2249   if (N.getOpcode() == ISD::ADD &&
2250       (!isIntS16Immediate(N.getOperand(1), imm) ||
2251        !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2252     Base = N.getOperand(0);
2253     Index = N.getOperand(1);
2254     return true;
2255   }
2256
2257   // Otherwise, do it the hard way, using R0 as the base register.
2258   Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2259                          N.getValueType());
2260   Index = N;
2261   return true;
2262 }
2263
2264 /// getPreIndexedAddressParts - returns true by value, base pointer and
2265 /// offset pointer and addressing mode by reference if the node's address
2266 /// can be legally represented as pre-indexed load / store address.
2267 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
2268                                                   SDValue &Offset,
2269                                                   ISD::MemIndexedMode &AM,
2270                                                   SelectionDAG &DAG) const {
2271   if (DisablePPCPreinc) return false;
2272
2273   bool isLoad = true;
2274   SDValue Ptr;
2275   EVT VT;
2276   unsigned Alignment;
2277   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2278     Ptr = LD->getBasePtr();
2279     VT = LD->getMemoryVT();
2280     Alignment = LD->getAlignment();
2281   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2282     Ptr = ST->getBasePtr();
2283     VT  = ST->getMemoryVT();
2284     Alignment = ST->getAlignment();
2285     isLoad = false;
2286   } else
2287     return false;
2288
2289   // PowerPC doesn't have preinc load/store instructions for vectors (except
2290   // for QPX, which does have preinc r+r forms).
2291   if (VT.isVector()) {
2292     if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) {
2293       return false;
2294     } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) {
2295       AM = ISD::PRE_INC;
2296       return true;
2297     }
2298   }
2299
2300   if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2301     // Common code will reject creating a pre-inc form if the base pointer
2302     // is a frame index, or if N is a store and the base pointer is either
2303     // the same as or a predecessor of the value being stored.  Check for
2304     // those situations here, and try with swapped Base/Offset instead.
2305     bool Swap = false;
2306
2307     if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
2308       Swap = true;
2309     else if (!isLoad) {
2310       SDValue Val = cast<StoreSDNode>(N)->getValue();
2311       if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2312         Swap = true;
2313     }
2314
2315     if (Swap)
2316       std::swap(Base, Offset);
2317
2318     AM = ISD::PRE_INC;
2319     return true;
2320   }
2321
2322   // LDU/STU can only handle immediates that are a multiple of 4.
2323   if (VT != MVT::i64) {
2324     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 0))
2325       return false;
2326   } else {
2327     // LDU/STU need an address with at least 4-byte alignment.
2328     if (Alignment < 4)
2329       return false;
2330
2331     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 4))
2332       return false;
2333   }
2334
2335   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2336     // PPC64 doesn't have lwau, but it does have lwaux.  Reject preinc load of
2337     // sext i32 to i64 when addr mode is r+i.
2338     if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2339         LD->getExtensionType() == ISD::SEXTLOAD &&
2340         isa<ConstantSDNode>(Offset))
2341       return false;
2342   }
2343
2344   AM = ISD::PRE_INC;
2345   return true;
2346 }
2347
2348 //===----------------------------------------------------------------------===//
2349 //  LowerOperation implementation
2350 //===----------------------------------------------------------------------===//
2351
2352 /// Return true if we should reference labels using a PICBase, set the HiOpFlags
2353 /// and LoOpFlags to the target MO flags.
2354 static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2355                                unsigned &HiOpFlags, unsigned &LoOpFlags,
2356                                const GlobalValue *GV = nullptr) {
2357   HiOpFlags = PPCII::MO_HA;
2358   LoOpFlags = PPCII::MO_LO;
2359
2360   // Don't use the pic base if not in PIC relocation model.
2361   if (IsPIC) {
2362     HiOpFlags |= PPCII::MO_PIC_FLAG;
2363     LoOpFlags |= PPCII::MO_PIC_FLAG;
2364   }
2365
2366   // If this is a reference to a global value that requires a non-lazy-ptr, make
2367   // sure that instruction lowering adds it.
2368   if (GV && Subtarget.hasLazyResolverStub(GV)) {
2369     HiOpFlags |= PPCII::MO_NLP_FLAG;
2370     LoOpFlags |= PPCII::MO_NLP_FLAG;
2371
2372     if (GV->hasHiddenVisibility()) {
2373       HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
2374       LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
2375     }
2376   }
2377 }
2378
2379 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
2380                              SelectionDAG &DAG) {
2381   SDLoc DL(HiPart);
2382   EVT PtrVT = HiPart.getValueType();
2383   SDValue Zero = DAG.getConstant(0, DL, PtrVT);
2384
2385   SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
2386   SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
2387
2388   // With PIC, the first instruction is actually "GR+hi(&G)".
2389   if (isPIC)
2390     Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
2391                      DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
2392
2393   // Generate non-pic code that has direct accesses to the constant pool.
2394   // The address of the global is just (hi(&g)+lo(&g)).
2395   return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
2396 }
2397
2398 static void setUsesTOCBasePtr(MachineFunction &MF) {
2399   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2400   FuncInfo->setUsesTOCBasePtr();
2401 }
2402
2403 static void setUsesTOCBasePtr(SelectionDAG &DAG) {
2404   setUsesTOCBasePtr(DAG.getMachineFunction());
2405 }
2406
2407 static SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, bool Is64Bit,
2408                            SDValue GA) {
2409   EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
2410   SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) :
2411                 DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
2412
2413   SDValue Ops[] = { GA, Reg };
2414   return DAG.getMemIntrinsicNode(
2415       PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
2416       MachinePointerInfo::getGOT(DAG.getMachineFunction()), 0, false, true,
2417       false, 0);
2418 }
2419
2420 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
2421                                              SelectionDAG &DAG) const {
2422   EVT PtrVT = Op.getValueType();
2423   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2424   const Constant *C = CP->getConstVal();
2425
2426   // 64-bit SVR4 ABI code is always position-independent.
2427   // The actual address of the GlobalValue is stored in the TOC.
2428   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2429     setUsesTOCBasePtr(DAG);
2430     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
2431     return getTOCEntry(DAG, SDLoc(CP), true, GA);
2432   }
2433
2434   unsigned MOHiFlag, MOLoFlag;
2435   bool IsPIC = isPositionIndependent();
2436   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2437
2438   if (IsPIC && Subtarget.isSVR4ABI()) {
2439     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
2440                                            PPCII::MO_PIC_FLAG);
2441     return getTOCEntry(DAG, SDLoc(CP), false, GA);
2442   }
2443
2444   SDValue CPIHi =
2445     DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
2446   SDValue CPILo =
2447     DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
2448   return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
2449 }
2450
2451 // For 64-bit PowerPC, prefer the more compact relative encodings.
2452 // This trades 32 bits per jump table entry for one or two instructions
2453 // on the jump site.
2454 unsigned PPCTargetLowering::getJumpTableEncoding() const {
2455   if (isJumpTableRelative())
2456     return MachineJumpTableInfo::EK_LabelDifference32;
2457
2458   return TargetLowering::getJumpTableEncoding();
2459 }
2460
2461 bool PPCTargetLowering::isJumpTableRelative() const {
2462   if (Subtarget.isPPC64())
2463     return true;
2464   return TargetLowering::isJumpTableRelative();
2465 }
2466
2467 SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
2468                                                     SelectionDAG &DAG) const {
2469   if (!Subtarget.isPPC64())
2470     return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2471
2472   switch (getTargetMachine().getCodeModel()) {
2473   case CodeModel::Default:
2474   case CodeModel::Small:
2475   case CodeModel::Medium:
2476     return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2477   default:
2478     return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
2479                        getPointerTy(DAG.getDataLayout()));
2480   }
2481 }
2482
2483 const MCExpr *
2484 PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
2485                                                 unsigned JTI,
2486                                                 MCContext &Ctx) const {
2487   if (!Subtarget.isPPC64())
2488     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2489
2490   switch (getTargetMachine().getCodeModel()) {
2491   case CodeModel::Default:
2492   case CodeModel::Small:
2493   case CodeModel::Medium:
2494     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2495   default:
2496     return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2497   }
2498 }
2499
2500 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
2501   EVT PtrVT = Op.getValueType();
2502   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2503
2504   // 64-bit SVR4 ABI code is always position-independent.
2505   // The actual address of the GlobalValue is stored in the TOC.
2506   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2507     setUsesTOCBasePtr(DAG);
2508     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
2509     return getTOCEntry(DAG, SDLoc(JT), true, GA);
2510   }
2511
2512   unsigned MOHiFlag, MOLoFlag;
2513   bool IsPIC = isPositionIndependent();
2514   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2515
2516   if (IsPIC && Subtarget.isSVR4ABI()) {
2517     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
2518                                         PPCII::MO_PIC_FLAG);
2519     return getTOCEntry(DAG, SDLoc(GA), false, GA);
2520   }
2521
2522   SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
2523   SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
2524   return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
2525 }
2526
2527 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
2528                                              SelectionDAG &DAG) const {
2529   EVT PtrVT = Op.getValueType();
2530   BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
2531   const BlockAddress *BA = BASDN->getBlockAddress();
2532
2533   // 64-bit SVR4 ABI code is always position-independent.
2534   // The actual BlockAddress is stored in the TOC.
2535   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2536     setUsesTOCBasePtr(DAG);
2537     SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
2538     return getTOCEntry(DAG, SDLoc(BASDN), true, GA);
2539   }
2540
2541   unsigned MOHiFlag, MOLoFlag;
2542   bool IsPIC = isPositionIndependent();
2543   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2544   SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
2545   SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
2546   return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
2547 }
2548
2549 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
2550                                               SelectionDAG &DAG) const {
2551   // FIXME: TLS addresses currently use medium model code sequences,
2552   // which is the most useful form.  Eventually support for small and
2553   // large models could be added if users need it, at the cost of
2554   // additional complexity.
2555   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
2556   if (DAG.getTarget().Options.EmulatedTLS)
2557     return LowerToTLSEmulatedModel(GA, DAG);
2558
2559   SDLoc dl(GA);
2560   const GlobalValue *GV = GA->getGlobal();
2561   EVT PtrVT = getPointerTy(DAG.getDataLayout());
2562   bool is64bit = Subtarget.isPPC64();
2563   const Module *M = DAG.getMachineFunction().getFunction()->getParent();
2564   PICLevel::Level picLevel = M->getPICLevel();
2565
2566   TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
2567
2568   if (Model == TLSModel::LocalExec) {
2569     SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2570                                                PPCII::MO_TPREL_HA);
2571     SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2572                                                PPCII::MO_TPREL_LO);
2573     SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
2574                              : DAG.getRegister(PPC::R2, MVT::i32);
2575
2576     SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
2577     return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
2578   }
2579
2580   if (Model == TLSModel::InitialExec) {
2581     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2582     SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2583                                                 PPCII::MO_TLS);
2584     SDValue GOTPtr;
2585     if (is64bit) {
2586       setUsesTOCBasePtr(DAG);
2587       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2588       GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
2589                            PtrVT, GOTReg, TGA);
2590     } else
2591       GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
2592     SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
2593                                    PtrVT, TGA, GOTPtr);
2594     return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
2595   }
2596
2597   if (Model == TLSModel::GeneralDynamic) {
2598     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2599     SDValue GOTPtr;
2600     if (is64bit) {
2601       setUsesTOCBasePtr(DAG);
2602       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2603       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
2604                                    GOTReg, TGA);
2605     } else {
2606       if (picLevel == PICLevel::SmallPIC)
2607         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2608       else
2609         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2610     }
2611     return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
2612                        GOTPtr, TGA, TGA);
2613   }
2614
2615   if (Model == TLSModel::LocalDynamic) {
2616     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2617     SDValue GOTPtr;
2618     if (is64bit) {
2619       setUsesTOCBasePtr(DAG);
2620       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2621       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
2622                            GOTReg, TGA);
2623     } else {
2624       if (picLevel == PICLevel::SmallPIC)
2625         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2626       else
2627         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2628     }
2629     SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
2630                                   PtrVT, GOTPtr, TGA, TGA);
2631     SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
2632                                       PtrVT, TLSAddr, TGA);
2633     return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
2634   }
2635
2636   llvm_unreachable("Unknown TLS model!");
2637 }
2638
2639 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
2640                                               SelectionDAG &DAG) const {
2641   EVT PtrVT = Op.getValueType();
2642   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
2643   SDLoc DL(GSDN);
2644   const GlobalValue *GV = GSDN->getGlobal();
2645
2646   // 64-bit SVR4 ABI code is always position-independent.
2647   // The actual address of the GlobalValue is stored in the TOC.
2648   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2649     setUsesTOCBasePtr(DAG);
2650     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
2651     return getTOCEntry(DAG, DL, true, GA);
2652   }
2653
2654   unsigned MOHiFlag, MOLoFlag;
2655   bool IsPIC = isPositionIndependent();
2656   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
2657
2658   if (IsPIC && Subtarget.isSVR4ABI()) {
2659     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
2660                                             GSDN->getOffset(),
2661                                             PPCII::MO_PIC_FLAG);
2662     return getTOCEntry(DAG, DL, false, GA);
2663   }
2664
2665   SDValue GAHi =
2666     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
2667   SDValue GALo =
2668     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
2669
2670   SDValue Ptr = LowerLabelRef(GAHi, GALo, IsPIC, DAG);
2671
2672   // If the global reference is actually to a non-lazy-pointer, we have to do an
2673   // extra load to get the address of the global.
2674   if (MOHiFlag & PPCII::MO_NLP_FLAG)
2675     Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
2676   return Ptr;
2677 }
2678
2679 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
2680   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
2681   SDLoc dl(Op);
2682
2683   if (Op.getValueType() == MVT::v2i64) {
2684     // When the operands themselves are v2i64 values, we need to do something
2685     // special because VSX has no underlying comparison operations for these.
2686     if (Op.getOperand(0).getValueType() == MVT::v2i64) {
2687       // Equality can be handled by casting to the legal type for Altivec
2688       // comparisons, everything else needs to be expanded.
2689       if (CC == ISD::SETEQ || CC == ISD::SETNE) {
2690         return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
2691                  DAG.getSetCC(dl, MVT::v4i32,
2692                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
2693                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
2694                    CC));
2695       }
2696
2697       return SDValue();
2698     }
2699
2700     // We handle most of these in the usual way.
2701     return Op;
2702   }
2703
2704   // If we're comparing for equality to zero, expose the fact that this is
2705   // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
2706   // fold the new nodes.
2707   if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
2708     return V;
2709
2710   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
2711     // Leave comparisons against 0 and -1 alone for now, since they're usually
2712     // optimized.  FIXME: revisit this when we can custom lower all setcc
2713     // optimizations.
2714     if (C->isAllOnesValue() || C->isNullValue())
2715       return SDValue();
2716   }
2717
2718   // If we have an integer seteq/setne, turn it into a compare against zero
2719   // by xor'ing the rhs with the lhs, which is faster than setting a
2720   // condition register, reading it back out, and masking the correct bit.  The
2721   // normal approach here uses sub to do this instead of xor.  Using xor exposes
2722   // the result to other bit-twiddling opportunities.
2723   EVT LHSVT = Op.getOperand(0).getValueType();
2724   if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
2725     EVT VT = Op.getValueType();
2726     SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
2727                                 Op.getOperand(1));
2728     return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
2729   }
2730   return SDValue();
2731 }
2732
2733 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
2734   SDNode *Node = Op.getNode();
2735   EVT VT = Node->getValueType(0);
2736   EVT PtrVT = getPointerTy(DAG.getDataLayout());
2737   SDValue InChain = Node->getOperand(0);
2738   SDValue VAListPtr = Node->getOperand(1);
2739   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
2740   SDLoc dl(Node);
2741
2742   assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
2743
2744   // gpr_index
2745   SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
2746                                     VAListPtr, MachinePointerInfo(SV), MVT::i8);
2747   InChain = GprIndex.getValue(1);
2748
2749   if (VT == MVT::i64) {
2750     // Check if GprIndex is even
2751     SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
2752                                  DAG.getConstant(1, dl, MVT::i32));
2753     SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
2754                                 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
2755     SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
2756                                           DAG.getConstant(1, dl, MVT::i32));
2757     // Align GprIndex to be even if it isn't
2758     GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
2759                            GprIndex);
2760   }
2761
2762   // fpr index is 1 byte after gpr
2763   SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2764                                DAG.getConstant(1, dl, MVT::i32));
2765
2766   // fpr
2767   SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
2768                                     FprPtr, MachinePointerInfo(SV), MVT::i8);
2769   InChain = FprIndex.getValue(1);
2770
2771   SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2772                                        DAG.getConstant(8, dl, MVT::i32));
2773
2774   SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2775                                         DAG.getConstant(4, dl, MVT::i32));
2776
2777   // areas
2778   SDValue OverflowArea =
2779       DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
2780   InChain = OverflowArea.getValue(1);
2781
2782   SDValue RegSaveArea =
2783       DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
2784   InChain = RegSaveArea.getValue(1);
2785
2786   // select overflow_area if index > 8
2787   SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
2788                             DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
2789
2790   // adjustment constant gpr_index * 4/8
2791   SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
2792                                     VT.isInteger() ? GprIndex : FprIndex,
2793                                     DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
2794                                                     MVT::i32));
2795
2796   // OurReg = RegSaveArea + RegConstant
2797   SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
2798                                RegConstant);
2799
2800   // Floating types are 32 bytes into RegSaveArea
2801   if (VT.isFloatingPoint())
2802     OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
2803                          DAG.getConstant(32, dl, MVT::i32));
2804
2805   // increase {f,g}pr_index by 1 (or 2 if VT is i64)
2806   SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
2807                                    VT.isInteger() ? GprIndex : FprIndex,
2808                                    DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
2809                                                    MVT::i32));
2810
2811   InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
2812                               VT.isInteger() ? VAListPtr : FprPtr,
2813                               MachinePointerInfo(SV), MVT::i8);
2814
2815   // determine if we should load from reg_save_area or overflow_area
2816   SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
2817
2818   // increase overflow_area by 4/8 if gpr/fpr > 8
2819   SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
2820                                           DAG.getConstant(VT.isInteger() ? 4 : 8,
2821                                           dl, MVT::i32));
2822
2823   OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
2824                              OverflowAreaPlusN);
2825
2826   InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
2827                               MachinePointerInfo(), MVT::i32);
2828
2829   return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
2830 }
2831
2832 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
2833   assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
2834
2835   // We have to copy the entire va_list struct:
2836   // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
2837   return DAG.getMemcpy(Op.getOperand(0), Op,
2838                        Op.getOperand(1), Op.getOperand(2),
2839                        DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, false, true,
2840                        false, MachinePointerInfo(), MachinePointerInfo());
2841 }
2842
2843 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
2844                                                   SelectionDAG &DAG) const {
2845   return Op.getOperand(0);
2846 }
2847
2848 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
2849                                                 SelectionDAG &DAG) const {
2850   SDValue Chain = Op.getOperand(0);
2851   SDValue Trmp = Op.getOperand(1); // trampoline
2852   SDValue FPtr = Op.getOperand(2); // nested function
2853   SDValue Nest = Op.getOperand(3); // 'nest' parameter value
2854   SDLoc dl(Op);
2855
2856   EVT PtrVT = getPointerTy(DAG.getDataLayout());
2857   bool isPPC64 = (PtrVT == MVT::i64);
2858   Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
2859
2860   TargetLowering::ArgListTy Args;
2861   TargetLowering::ArgListEntry Entry;
2862
2863   Entry.Ty = IntPtrTy;
2864   Entry.Node = Trmp; Args.push_back(Entry);
2865
2866   // TrampSize == (isPPC64 ? 48 : 40);
2867   Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
2868                                isPPC64 ? MVT::i64 : MVT::i32);
2869   Args.push_back(Entry);
2870
2871   Entry.Node = FPtr; Args.push_back(Entry);
2872   Entry.Node = Nest; Args.push_back(Entry);
2873
2874   // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
2875   TargetLowering::CallLoweringInfo CLI(DAG);
2876   CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
2877       CallingConv::C, Type::getVoidTy(*DAG.getContext()),
2878       DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
2879
2880   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2881   return CallResult.second;
2882 }
2883
2884 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
2885   MachineFunction &MF = DAG.getMachineFunction();
2886   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2887   EVT PtrVT = getPointerTy(MF.getDataLayout());
2888
2889   SDLoc dl(Op);
2890
2891   if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
2892     // vastart just stores the address of the VarArgsFrameIndex slot into the
2893     // memory location argument.
2894     SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
2895     const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2896     return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
2897                         MachinePointerInfo(SV));
2898   }
2899
2900   // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
2901   // We suppose the given va_list is already allocated.
2902   //
2903   // typedef struct {
2904   //  char gpr;     /* index into the array of 8 GPRs
2905   //                 * stored in the register save area
2906   //                 * gpr=0 corresponds to r3,
2907   //                 * gpr=1 to r4, etc.
2908   //                 */
2909   //  char fpr;     /* index into the array of 8 FPRs
2910   //                 * stored in the register save area
2911   //                 * fpr=0 corresponds to f1,
2912   //                 * fpr=1 to f2, etc.
2913   //                 */
2914   //  char *overflow_arg_area;
2915   //                /* location on stack that holds
2916   //                 * the next overflow argument
2917   //                 */
2918   //  char *reg_save_area;
2919   //               /* where r3:r10 and f1:f8 (if saved)
2920   //                * are stored
2921   //                */
2922   // } va_list[1];
2923
2924   SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
2925   SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
2926   SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
2927                                             PtrVT);
2928   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
2929                                  PtrVT);
2930
2931   uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
2932   SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
2933
2934   uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
2935   SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
2936
2937   uint64_t FPROffset = 1;
2938   SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
2939
2940   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2941
2942   // Store first byte : number of int regs
2943   SDValue firstStore =
2944       DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
2945                         MachinePointerInfo(SV), MVT::i8);
2946   uint64_t nextOffset = FPROffset;
2947   SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
2948                                   ConstFPROffset);
2949
2950   // Store second byte : number of float regs
2951   SDValue secondStore =
2952       DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
2953                         MachinePointerInfo(SV, nextOffset), MVT::i8);
2954   nextOffset += StackOffset;
2955   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
2956
2957   // Store second word : arguments given on stack
2958   SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
2959                                     MachinePointerInfo(SV, nextOffset));
2960   nextOffset += FrameOffset;
2961   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
2962
2963   // Store third word : arguments given in registers
2964   return DAG.getStore(thirdStore, dl, FR, nextPtr,
2965                       MachinePointerInfo(SV, nextOffset));
2966 }
2967
2968 #include "PPCGenCallingConv.inc"
2969
2970 // Function whose sole purpose is to kill compiler warnings
2971 // stemming from unused functions included from PPCGenCallingConv.inc.
2972 CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
2973   return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
2974 }
2975
2976 bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
2977                                       CCValAssign::LocInfo &LocInfo,
2978                                       ISD::ArgFlagsTy &ArgFlags,
2979                                       CCState &State) {
2980   return true;
2981 }
2982
2983 bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
2984                                              MVT &LocVT,
2985                                              CCValAssign::LocInfo &LocInfo,
2986                                              ISD::ArgFlagsTy &ArgFlags,
2987                                              CCState &State) {
2988   static const MCPhysReg ArgRegs[] = {
2989     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
2990     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
2991   };
2992   const unsigned NumArgRegs = array_lengthof(ArgRegs);
2993
2994   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
2995
2996   // Skip one register if the first unallocated register has an even register
2997   // number and there are still argument registers available which have not been
2998   // allocated yet. RegNum is actually an index into ArgRegs, which means we
2999   // need to skip a register if RegNum is odd.
3000   if (RegNum != NumArgRegs && RegNum % 2 == 1) {
3001     State.AllocateReg(ArgRegs[RegNum]);
3002   }
3003
3004   // Always return false here, as this function only makes sure that the first
3005   // unallocated register has an odd register number and does not actually
3006   // allocate a register for the current argument.
3007   return false;
3008 }
3009
3010 bool
3011 llvm::CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT,
3012                                                   MVT &LocVT,
3013                                                   CCValAssign::LocInfo &LocInfo,
3014                                                   ISD::ArgFlagsTy &ArgFlags,
3015                                                   CCState &State) {
3016   static const MCPhysReg ArgRegs[] = {
3017     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3018     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3019   };
3020   const unsigned NumArgRegs = array_lengthof(ArgRegs);
3021
3022   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
3023   int RegsLeft = NumArgRegs - RegNum;
3024
3025   // Skip if there is not enough registers left for long double type (4 gpr regs
3026   // in soft float mode) and put long double argument on the stack.
3027   if (RegNum != NumArgRegs && RegsLeft < 4) {
3028     for (int i = 0; i < RegsLeft; i++) {
3029       State.AllocateReg(ArgRegs[RegNum + i]);
3030     }
3031   }
3032
3033   return false;
3034 }
3035
3036 bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
3037                                                MVT &LocVT,
3038                                                CCValAssign::LocInfo &LocInfo,
3039                                                ISD::ArgFlagsTy &ArgFlags,
3040                                                CCState &State) {
3041   static const MCPhysReg ArgRegs[] = {
3042     PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3043     PPC::F8
3044   };
3045
3046   const unsigned NumArgRegs = array_lengthof(ArgRegs);
3047
3048   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
3049
3050   // If there is only one Floating-point register left we need to put both f64
3051   // values of a split ppc_fp128 value on the stack.
3052   if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
3053     State.AllocateReg(ArgRegs[RegNum]);
3054   }
3055
3056   // Always return false here, as this function only makes sure that the two f64
3057   // values a ppc_fp128 value is split into are both passed in registers or both
3058   // passed on the stack and does not actually allocate a register for the
3059   // current argument.
3060   return false;
3061 }
3062
3063 /// FPR - The set of FP registers that should be allocated for arguments,
3064 /// on Darwin.
3065 static const MCPhysReg FPR[] = {PPC::F1,  PPC::F2,  PPC::F3, PPC::F4, PPC::F5,
3066                                 PPC::F6,  PPC::F7,  PPC::F8, PPC::F9, PPC::F10,
3067                                 PPC::F11, PPC::F12, PPC::F13};
3068
3069 /// QFPR - The set of QPX registers that should be allocated for arguments.
3070 static const MCPhysReg QFPR[] = {
3071     PPC::QF1, PPC::QF2, PPC::QF3,  PPC::QF4,  PPC::QF5,  PPC::QF6, PPC::QF7,
3072     PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13};
3073
3074 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
3075 /// the stack.
3076 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3077                                        unsigned PtrByteSize) {
3078   unsigned ArgSize = ArgVT.getStoreSize();
3079   if (Flags.isByVal())
3080     ArgSize = Flags.getByValSize();
3081
3082   // Round up to multiples of the pointer size, except for array members,
3083   // which are always packed.
3084   if (!Flags.isInConsecutiveRegs())
3085     ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3086
3087   return ArgSize;
3088 }
3089
3090 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
3091 /// on the stack.
3092 static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
3093                                             ISD::ArgFlagsTy Flags,
3094                                             unsigned PtrByteSize) {
3095   unsigned Align = PtrByteSize;
3096
3097   // Altivec parameters are padded to a 16 byte boundary.
3098   if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3099       ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3100       ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3101       ArgVT == MVT::v1i128)
3102     Align = 16;
3103   // QPX vector types stored in double-precision are padded to a 32 byte
3104   // boundary.
3105   else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1)
3106     Align = 32;
3107
3108   // ByVal parameters are aligned as requested.
3109   if (Flags.isByVal()) {
3110     unsigned BVAlign = Flags.getByValAlign();
3111     if (BVAlign > PtrByteSize) {
3112       if (BVAlign % PtrByteSize != 0)
3113           llvm_unreachable(
3114             "ByVal alignment is not a multiple of the pointer size");
3115
3116       Align = BVAlign;
3117     }
3118   }
3119
3120   // Array members are always packed to their original alignment.
3121   if (Flags.isInConsecutiveRegs()) {
3122     // If the array member was split into multiple registers, the first
3123     // needs to be aligned to the size of the full type.  (Except for
3124     // ppcf128, which is only aligned as its f64 components.)
3125     if (Flags.isSplit() && OrigVT != MVT::ppcf128)
3126       Align = OrigVT.getStoreSize();
3127     else
3128       Align = ArgVT.getStoreSize();
3129   }
3130
3131   return Align;
3132 }
3133
3134 /// CalculateStackSlotUsed - Return whether this argument will use its
3135 /// stack slot (instead of being passed in registers).  ArgOffset,
3136 /// AvailableFPRs, and AvailableVRs must hold the current argument
3137 /// position, and will be updated to account for this argument.
3138 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
3139                                    ISD::ArgFlagsTy Flags,
3140                                    unsigned PtrByteSize,
3141                                    unsigned LinkageSize,
3142                                    unsigned ParamAreaSize,
3143                                    unsigned &ArgOffset,
3144                                    unsigned &AvailableFPRs,
3145                                    unsigned &AvailableVRs, bool HasQPX) {
3146   bool UseMemory = false;
3147
3148   // Respect alignment of argument on the stack.
3149   unsigned Align =
3150     CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
3151   ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
3152   // If there's no space left in the argument save area, we must
3153   // use memory (this check also catches zero-sized arguments).
3154   if (ArgOffset >= LinkageSize + ParamAreaSize)
3155     UseMemory = true;
3156
3157   // Allocate argument on the stack.
3158   ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
3159   if (Flags.isInConsecutiveRegsLast())
3160     ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3161   // If we overran the argument save area, we must use memory
3162   // (this check catches arguments passed partially in memory)
3163   if (ArgOffset > LinkageSize + ParamAreaSize)
3164     UseMemory = true;
3165
3166   // However, if the argument is actually passed in an FPR or a VR,
3167   // we don't use memory after all.
3168   if (!Flags.isByVal()) {
3169     if (ArgVT == MVT::f32 || ArgVT == MVT::f64 ||
3170         // QPX registers overlap with the scalar FP registers.
3171         (HasQPX && (ArgVT == MVT::v4f32 ||
3172                     ArgVT == MVT::v4f64 ||
3173                     ArgVT == MVT::v4i1)))
3174       if (AvailableFPRs > 0) {
3175         --AvailableFPRs;
3176         return false;
3177       }
3178     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3179         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3180         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3181         ArgVT == MVT::v1i128)
3182       if (AvailableVRs > 0) {
3183         --AvailableVRs;
3184         return false;
3185       }
3186   }
3187
3188   return UseMemory;
3189 }
3190
3191 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
3192 /// ensure minimum alignment required for target.
3193 static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
3194                                      unsigned NumBytes) {
3195   unsigned TargetAlign = Lowering->getStackAlignment();
3196   unsigned AlignMask = TargetAlign - 1;
3197   NumBytes = (NumBytes + AlignMask) & ~AlignMask;
3198   return NumBytes;
3199 }
3200
3201 SDValue PPCTargetLowering::LowerFormalArguments(
3202     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3203     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3204     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3205   if (Subtarget.isSVR4ABI()) {
3206     if (Subtarget.isPPC64())
3207       return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,
3208                                          dl, DAG, InVals);
3209     else
3210       return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins,
3211                                          dl, DAG, InVals);
3212   } else {
3213     return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
3214                                        dl, DAG, InVals);
3215   }
3216 }
3217
3218 SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
3219     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3220     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3221     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3222
3223   // 32-bit SVR4 ABI Stack Frame Layout:
3224   //              +-----------------------------------+
3225   //        +-->  |            Back chain             |
3226   //        |     +-----------------------------------+
3227   //        |     | Floating-point register save area |
3228   //        |     +-----------------------------------+
3229   //        |     |    General register save area     |
3230   //        |     +-----------------------------------+
3231   //        |     |          CR save word             |
3232   //        |     +-----------------------------------+
3233   //        |     |         VRSAVE save word          |
3234   //        |     +-----------------------------------+
3235   //        |     |         Alignment padding         |
3236   //        |     +-----------------------------------+
3237   //        |     |     Vector register save area     |
3238   //        |     +-----------------------------------+
3239   //        |     |       Local variable space        |
3240   //        |     +-----------------------------------+
3241   //        |     |        Parameter list area        |
3242   //        |     +-----------------------------------+
3243   //        |     |           LR save word            |
3244   //        |     +-----------------------------------+
3245   // SP-->  +---  |            Back chain             |
3246   //              +-----------------------------------+
3247   //
3248   // Specifications:
3249   //   System V Application Binary Interface PowerPC Processor Supplement
3250   //   AltiVec Technology Programming Interface Manual
3251
3252   MachineFunction &MF = DAG.getMachineFunction();
3253   MachineFrameInfo &MFI = MF.getFrameInfo();
3254   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3255
3256   EVT PtrVT = getPointerTy(MF.getDataLayout());
3257   // Potential tail calls could cause overwriting of argument stack slots.
3258   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3259                        (CallConv == CallingConv::Fast));
3260   unsigned PtrByteSize = 4;
3261
3262   // Assign locations to all of the incoming arguments.
3263   SmallVector<CCValAssign, 16> ArgLocs;
3264   PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3265                  *DAG.getContext());
3266
3267   // Reserve space for the linkage area on the stack.
3268   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3269   CCInfo.AllocateStack(LinkageSize, PtrByteSize);
3270   if (useSoftFloat())
3271     CCInfo.PreAnalyzeFormalArguments(Ins);
3272
3273   CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
3274   CCInfo.clearWasPPCF128();
3275
3276   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3277     CCValAssign &VA = ArgLocs[i];
3278
3279     // Arguments stored in registers.
3280     if (VA.isRegLoc()) {
3281       const TargetRegisterClass *RC;
3282       EVT ValVT = VA.getValVT();
3283
3284       switch (ValVT.getSimpleVT().SimpleTy) {
3285         default:
3286           llvm_unreachable("ValVT not supported by formal arguments Lowering");
3287         case MVT::i1:
3288         case MVT::i32:
3289           RC = &PPC::GPRCRegClass;
3290           break;
3291         case MVT::f32:
3292           if (Subtarget.hasP8Vector())
3293             RC = &PPC::VSSRCRegClass;
3294           else
3295             RC = &PPC::F4RCRegClass;
3296           break;
3297         case MVT::f64:
3298           if (Subtarget.hasVSX())
3299             RC = &PPC::VSFRCRegClass;
3300           else
3301             RC = &PPC::F8RCRegClass;
3302           break;
3303         case MVT::v16i8:
3304         case MVT::v8i16:
3305         case MVT::v4i32:
3306           RC = &PPC::VRRCRegClass;
3307           break;
3308         case MVT::v4f32:
3309           RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass;
3310           break;
3311         case MVT::v2f64:
3312         case MVT::v2i64:
3313           RC = &PPC::VRRCRegClass;
3314           break;
3315         case MVT::v4f64:
3316           RC = &PPC::QFRCRegClass;
3317           break;
3318         case MVT::v4i1:
3319           RC = &PPC::QBRCRegClass;
3320           break;
3321       }
3322
3323       // Transform the arguments stored in physical registers into virtual ones.
3324       unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3325       SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
3326                                             ValVT == MVT::i1 ? MVT::i32 : ValVT);
3327
3328       if (ValVT == MVT::i1)
3329         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
3330
3331       InVals.push_back(ArgValue);
3332     } else {
3333       // Argument stored in memory.
3334       assert(VA.isMemLoc());
3335
3336       unsigned ArgSize = VA.getLocVT().getStoreSize();
3337       int FI = MFI.CreateFixedObject(ArgSize, VA.getLocMemOffset(),
3338                                      isImmutable);
3339
3340       // Create load nodes to retrieve arguments from the stack.
3341       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3342       InVals.push_back(
3343           DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
3344     }
3345   }
3346
3347   // Assign locations to all of the incoming aggregate by value arguments.
3348   // Aggregates passed by value are stored in the local variable space of the
3349   // caller's stack frame, right above the parameter list area.
3350   SmallVector<CCValAssign, 16> ByValArgLocs;
3351   CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3352                       ByValArgLocs, *DAG.getContext());
3353
3354   // Reserve stack space for the allocations in CCInfo.
3355   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
3356
3357   CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
3358
3359   // Area that is at least reserved in the caller of this function.
3360   unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
3361   MinReservedArea = std::max(MinReservedArea, LinkageSize);
3362
3363   // Set the size that is at least reserved in caller of this function.  Tail
3364   // call optimized function's reserved stack space needs to be aligned so that
3365   // taking the difference between two stack areas will result in an aligned
3366   // stack.
3367   MinReservedArea =
3368       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3369   FuncInfo->setMinReservedArea(MinReservedArea);
3370
3371   SmallVector<SDValue, 8> MemOps;
3372
3373   // If the function takes variable number of arguments, make a frame index for
3374   // the start of the first vararg value... for expansion of llvm.va_start.
3375   if (isVarArg) {
3376     static const MCPhysReg GPArgRegs[] = {
3377       PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3378       PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3379     };
3380     const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
3381
3382     static const MCPhysReg FPArgRegs[] = {
3383       PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3384       PPC::F8
3385     };
3386     unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
3387
3388     if (useSoftFloat())
3389        NumFPArgRegs = 0;
3390
3391     FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
3392     FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
3393
3394     // Make room for NumGPArgRegs and NumFPArgRegs.
3395     int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
3396                 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
3397
3398     FuncInfo->setVarArgsStackOffset(
3399       MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
3400                             CCInfo.getNextStackOffset(), true));
3401
3402     FuncInfo->setVarArgsFrameIndex(MFI.CreateStackObject(Depth, 8, false));
3403     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3404
3405     // The fixed integer arguments of a variadic function are stored to the
3406     // VarArgsFrameIndex on the stack so that they may be loaded by
3407     // dereferencing the result of va_next.
3408     for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
3409       // Get an existing live-in vreg, or add a new one.
3410       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
3411       if (!VReg)
3412         VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
3413
3414       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3415       SDValue Store =
3416           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3417       MemOps.push_back(Store);
3418       // Increment the address by four for the next argument to store
3419       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3420       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3421     }
3422
3423     // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
3424     // is set.
3425     // The double arguments are stored to the VarArgsFrameIndex
3426     // on the stack.
3427     for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
3428       // Get an existing live-in vreg, or add a new one.
3429       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
3430       if (!VReg)
3431         VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
3432
3433       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
3434       SDValue Store =
3435           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3436       MemOps.push_back(Store);
3437       // Increment the address by eight for the next argument to store
3438       SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
3439                                          PtrVT);
3440       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3441     }
3442   }
3443
3444   if (!MemOps.empty())
3445     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3446
3447   return Chain;
3448 }
3449
3450 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3451 // value to MVT::i64 and then truncate to the correct register size.
3452 SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
3453                                              EVT ObjectVT, SelectionDAG &DAG,
3454                                              SDValue ArgVal,
3455                                              const SDLoc &dl) const {
3456   if (Flags.isSExt())
3457     ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
3458                          DAG.getValueType(ObjectVT));
3459   else if (Flags.isZExt())
3460     ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
3461                          DAG.getValueType(ObjectVT));
3462
3463   return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
3464 }
3465
3466 SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
3467     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3468     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3469     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3470   // TODO: add description of PPC stack frame format, or at least some docs.
3471   //
3472   bool isELFv2ABI = Subtarget.isELFv2ABI();
3473   bool isLittleEndian = Subtarget.isLittleEndian();
3474   MachineFunction &MF = DAG.getMachineFunction();
3475   MachineFrameInfo &MFI = MF.getFrameInfo();
3476   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3477
3478   assert(!(CallConv == CallingConv::Fast && isVarArg) &&
3479          "fastcc not supported on varargs functions");
3480
3481   EVT PtrVT = getPointerTy(MF.getDataLayout());
3482   // Potential tail calls could cause overwriting of argument stack slots.
3483   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3484                        (CallConv == CallingConv::Fast));
3485   unsigned PtrByteSize = 8;
3486   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3487
3488   static const MCPhysReg GPR[] = {
3489     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3490     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3491   };
3492   static const MCPhysReg VR[] = {
3493     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3494     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3495   };
3496
3497   const unsigned Num_GPR_Regs = array_lengthof(GPR);
3498   const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
3499   const unsigned Num_VR_Regs  = array_lengthof(VR);
3500   const unsigned Num_QFPR_Regs = Num_FPR_Regs;
3501
3502   // Do a first pass over the arguments to determine whether the ABI
3503   // guarantees that our caller has allocated the parameter save area
3504   // on its stack frame.  In the ELFv1 ABI, this is always the case;
3505   // in the ELFv2 ABI, it is true if this is a vararg function or if
3506   // any parameter is located in a stack slot.
3507
3508   bool HasParameterArea = !isELFv2ABI || isVarArg;
3509   unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
3510   unsigned NumBytes = LinkageSize;
3511   unsigned AvailableFPRs = Num_FPR_Regs;
3512   unsigned AvailableVRs = Num_VR_Regs;
3513   for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
3514     if (Ins[i].Flags.isNest())
3515       continue;
3516
3517     if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
3518                                PtrByteSize, LinkageSize, ParamAreaSize,
3519                                NumBytes, AvailableFPRs, AvailableVRs,
3520                                Subtarget.hasQPX()))
3521       HasParameterArea = true;
3522   }
3523
3524   // Add DAG nodes to load the arguments or copy them out of registers.  On
3525   // entry to a function on PPC, the arguments start after the linkage area,
3526   // although the first ones are often in registers.
3527
3528   unsigned ArgOffset = LinkageSize;
3529   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3530   unsigned &QFPR_idx = FPR_idx;
3531   SmallVector<SDValue, 8> MemOps;
3532   Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
3533   unsigned CurArgIdx = 0;
3534   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
3535     SDValue ArgVal;
3536     bool needsLoad = false;
3537     EVT ObjectVT = Ins[ArgNo].VT;
3538     EVT OrigVT = Ins[ArgNo].ArgVT;
3539     unsigned ObjSize = ObjectVT.getStoreSize();
3540     unsigned ArgSize = ObjSize;
3541     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3542     if (Ins[ArgNo].isOrigArg()) {
3543       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3544       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3545     }
3546     // We re-align the argument offset for each argument, except when using the
3547     // fast calling convention, when we need to make sure we do that only when
3548     // we'll actually use a stack slot.
3549     unsigned CurArgOffset, Align;
3550     auto ComputeArgOffset = [&]() {
3551       /* Respect alignment of argument on the stack.  */
3552       Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
3553       ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
3554       CurArgOffset = ArgOffset;
3555     };
3556
3557     if (CallConv != CallingConv::Fast) {
3558       ComputeArgOffset();
3559
3560       /* Compute GPR index associated with argument offset.  */
3561       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3562       GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
3563     }
3564
3565     // FIXME the codegen can be much improved in some cases.
3566     // We do not have to keep everything in memory.
3567     if (Flags.isByVal()) {
3568       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
3569
3570       if (CallConv == CallingConv::Fast)
3571         ComputeArgOffset();
3572
3573       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3574       ObjSize = Flags.getByValSize();
3575       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3576       // Empty aggregate parameters do not take up registers.  Examples:
3577       //   struct { } a;
3578       //   union  { } b;
3579       //   int c[0];
3580       // etc.  However, we have to provide a place-holder in InVals, so
3581       // pretend we have an 8-byte item at the current address for that
3582       // purpose.
3583       if (!ObjSize) {
3584         int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
3585         SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3586         InVals.push_back(FIN);
3587         continue;
3588       }
3589
3590       // Create a stack object covering all stack doublewords occupied
3591       // by the argument.  If the argument is (fully or partially) on
3592       // the stack, or if the argument is fully in registers but the
3593       // caller has allocated the parameter save anyway, we can refer
3594       // directly to the caller's stack frame.  Otherwise, create a
3595       // local copy in our own frame.
3596       int FI;
3597       if (HasParameterArea ||
3598           ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
3599         FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
3600       else
3601         FI = MFI.CreateStackObject(ArgSize, Align, false);
3602       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3603
3604       // Handle aggregates smaller than 8 bytes.
3605       if (ObjSize < PtrByteSize) {
3606         // The value of the object is its address, which differs from the
3607         // address of the enclosing doubleword on big-endian systems.
3608         SDValue Arg = FIN;
3609         if (!isLittleEndian) {
3610           SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
3611           Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
3612         }
3613         InVals.push_back(Arg);
3614
3615         if (GPR_idx != Num_GPR_Regs) {
3616           unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3617           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3618           SDValue Store;
3619
3620           if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
3621             EVT ObjType = (ObjSize == 1 ? MVT::i8 :
3622                            (ObjSize == 2 ? MVT::i16 : MVT::i32));
3623             Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
3624                                       MachinePointerInfo(&*FuncArg), ObjType);
3625           } else {
3626             // For sizes that don't fit a truncating store (3, 5, 6, 7),
3627             // store the whole register as-is to the parameter save area
3628             // slot.
3629             Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3630                                  MachinePointerInfo(&*FuncArg));
3631           }
3632
3633           MemOps.push_back(Store);
3634         }
3635         // Whether we copied from a register or not, advance the offset
3636         // into the parameter save area by a full doubleword.
3637         ArgOffset += PtrByteSize;
3638         continue;
3639       }
3640
3641       // The value of the object is its address, which is the address of
3642       // its first stack doubleword.
3643       InVals.push_back(FIN);
3644
3645       // Store whatever pieces of the object are in registers to memory.
3646       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
3647         if (GPR_idx == Num_GPR_Regs)
3648           break;
3649
3650         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3651         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3652         SDValue Addr = FIN;
3653         if (j) {
3654           SDValue Off = DAG.getConstant(j, dl, PtrVT);
3655           Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
3656         }
3657         SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
3658                                      MachinePointerInfo(&*FuncArg, j));
3659         MemOps.push_back(Store);
3660         ++GPR_idx;
3661       }
3662       ArgOffset += ArgSize;
3663       continue;
3664     }
3665
3666     switch (ObjectVT.getSimpleVT().SimpleTy) {
3667     default: llvm_unreachable("Unhandled argument type!");
3668     case MVT::i1:
3669     case MVT::i32:
3670     case MVT::i64:
3671       if (Flags.isNest()) {
3672         // The 'nest' parameter, if any, is passed in R11.
3673         unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
3674         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3675
3676         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3677           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3678
3679         break;
3680       }
3681
3682       // These can be scalar arguments or elements of an integer array type
3683       // passed directly.  Clang may use those instead of "byval" aggregate
3684       // types to avoid forcing arguments to memory unnecessarily.
3685       if (GPR_idx != Num_GPR_Regs) {
3686         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3687         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3688
3689         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3690           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3691           // value to MVT::i64 and then truncate to the correct register size.
3692           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3693       } else {
3694         if (CallConv == CallingConv::Fast)
3695           ComputeArgOffset();
3696
3697         needsLoad = true;
3698         ArgSize = PtrByteSize;
3699       }
3700       if (CallConv != CallingConv::Fast || needsLoad)
3701         ArgOffset += 8;
3702       break;
3703
3704     case MVT::f32:
3705     case MVT::f64:
3706       // These can be scalar arguments or elements of a float array type
3707       // passed directly.  The latter are used to implement ELFv2 homogenous
3708       // float aggregates.
3709       if (FPR_idx != Num_FPR_Regs) {
3710         unsigned VReg;
3711
3712         if (ObjectVT == MVT::f32)
3713           VReg = MF.addLiveIn(FPR[FPR_idx],
3714                               Subtarget.hasP8Vector()
3715                                   ? &PPC::VSSRCRegClass
3716                                   : &PPC::F4RCRegClass);
3717         else
3718           VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
3719                                                 ? &PPC::VSFRCRegClass
3720                                                 : &PPC::F8RCRegClass);
3721
3722         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3723         ++FPR_idx;
3724       } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
3725         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
3726         // once we support fp <-> gpr moves.
3727
3728         // This can only ever happen in the presence of f32 array types,
3729         // since otherwise we never run out of FPRs before running out
3730         // of GPRs.
3731         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3732         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3733
3734         if (ObjectVT == MVT::f32) {
3735           if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
3736             ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
3737                                  DAG.getConstant(32, dl, MVT::i32));
3738           ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
3739         }
3740
3741         ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
3742       } else {
3743         if (CallConv == CallingConv::Fast)
3744           ComputeArgOffset();
3745
3746         needsLoad = true;
3747       }
3748
3749       // When passing an array of floats, the array occupies consecutive
3750       // space in the argument area; only round up to the next doubleword
3751       // at the end of the array.  Otherwise, each float takes 8 bytes.
3752       if (CallConv != CallingConv::Fast || needsLoad) {
3753         ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
3754         ArgOffset += ArgSize;
3755         if (Flags.isInConsecutiveRegsLast())
3756           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3757       }
3758       break;
3759     case MVT::v4f32:
3760     case MVT::v4i32:
3761     case MVT::v8i16:
3762     case MVT::v16i8:
3763     case MVT::v2f64:
3764     case MVT::v2i64:
3765     case MVT::v1i128:
3766       if (!Subtarget.hasQPX()) {
3767       // These can be scalar arguments or elements of a vector array type
3768       // passed directly.  The latter are used to implement ELFv2 homogenous
3769       // vector aggregates.
3770       if (VR_idx != Num_VR_Regs) {
3771         unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
3772         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3773         ++VR_idx;
3774       } else {
3775         if (CallConv == CallingConv::Fast)
3776           ComputeArgOffset();
3777
3778         needsLoad = true;
3779       }
3780       if (CallConv != CallingConv::Fast || needsLoad)
3781         ArgOffset += 16;
3782       break;
3783       } // not QPX
3784
3785       assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 &&
3786              "Invalid QPX parameter type");
3787       /* fall through */
3788
3789     case MVT::v4f64:
3790     case MVT::v4i1:
3791       // QPX vectors are treated like their scalar floating-point subregisters
3792       // (except that they're larger).
3793       unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32;
3794       if (QFPR_idx != Num_QFPR_Regs) {
3795         const TargetRegisterClass *RC;
3796         switch (ObjectVT.getSimpleVT().SimpleTy) {
3797         case MVT::v4f64: RC = &PPC::QFRCRegClass; break;
3798         case MVT::v4f32: RC = &PPC::QSRCRegClass; break;
3799         default:         RC = &PPC::QBRCRegClass; break;
3800         }
3801
3802         unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC);
3803         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3804         ++QFPR_idx;
3805       } else {
3806         if (CallConv == CallingConv::Fast)
3807           ComputeArgOffset();
3808         needsLoad = true;
3809       }
3810       if (CallConv != CallingConv::Fast || needsLoad)
3811         ArgOffset += Sz;
3812       break;
3813     }
3814
3815     // We need to load the argument to a virtual register if we determined
3816     // above that we ran out of physical registers of the appropriate type.
3817     if (needsLoad) {
3818       if (ObjSize < ArgSize && !isLittleEndian)
3819         CurArgOffset += ArgSize - ObjSize;
3820       int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
3821       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3822       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
3823     }
3824
3825     InVals.push_back(ArgVal);
3826   }
3827
3828   // Area that is at least reserved in the caller of this function.
3829   unsigned MinReservedArea;
3830   if (HasParameterArea)
3831     MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
3832   else
3833     MinReservedArea = LinkageSize;
3834
3835   // Set the size that is at least reserved in caller of this function.  Tail
3836   // call optimized functions' reserved stack space needs to be aligned so that
3837   // taking the difference between two stack areas will result in an aligned
3838   // stack.
3839   MinReservedArea =
3840       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3841   FuncInfo->setMinReservedArea(MinReservedArea);
3842
3843   // If the function takes variable number of arguments, make a frame index for
3844   // the start of the first vararg value... for expansion of llvm.va_start.
3845   if (isVarArg) {
3846     int Depth = ArgOffset;
3847
3848     FuncInfo->setVarArgsFrameIndex(
3849       MFI.CreateFixedObject(PtrByteSize, Depth, true));
3850     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3851
3852     // If this function is vararg, store any remaining integer argument regs
3853     // to their spots on the stack so that they may be loaded by dereferencing
3854     // the result of va_next.
3855     for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3856          GPR_idx < Num_GPR_Regs; ++GPR_idx) {
3857       unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3858       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3859       SDValue Store =
3860           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3861       MemOps.push_back(Store);
3862       // Increment the address by four for the next argument to store
3863       SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
3864       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3865     }
3866   }
3867
3868   if (!MemOps.empty())
3869     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3870
3871   return Chain;
3872 }
3873
3874 SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
3875     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3876     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3877     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3878   // TODO: add description of PPC stack frame format, or at least some docs.
3879   //
3880   MachineFunction &MF = DAG.getMachineFunction();
3881   MachineFrameInfo &MFI = MF.getFrameInfo();
3882   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3883
3884   EVT PtrVT = getPointerTy(MF.getDataLayout());
3885   bool isPPC64 = PtrVT == MVT::i64;
3886   // Potential tail calls could cause overwriting of argument stack slots.
3887   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3888                        (CallConv == CallingConv::Fast));
3889   unsigned PtrByteSize = isPPC64 ? 8 : 4;
3890   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3891   unsigned ArgOffset = LinkageSize;
3892   // Area that is at least reserved in caller of this function.
3893   unsigned MinReservedArea = ArgOffset;
3894
3895   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
3896     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3897     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3898   };
3899   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
3900     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3901     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3902   };
3903   static const MCPhysReg VR[] = {
3904     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3905     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3906   };
3907
3908   const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
3909   const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
3910   const unsigned Num_VR_Regs  = array_lengthof( VR);
3911
3912   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3913
3914   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
3915
3916   // In 32-bit non-varargs functions, the stack space for vectors is after the
3917   // stack space for non-vectors.  We do not use this space unless we have
3918   // too many vectors to fit in registers, something that only occurs in
3919   // constructed examples:), but we have to walk the arglist to figure
3920   // that out...for the pathological case, compute VecArgOffset as the
3921   // start of the vector parameter area.  Computing VecArgOffset is the
3922   // entire point of the following loop.
3923   unsigned VecArgOffset = ArgOffset;
3924   if (!isVarArg && !isPPC64) {
3925     for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
3926          ++ArgNo) {
3927       EVT ObjectVT = Ins[ArgNo].VT;
3928       ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3929
3930       if (Flags.isByVal()) {
3931         // ObjSize is the true size, ArgSize rounded up to multiple of regs.
3932         unsigned ObjSize = Flags.getByValSize();
3933         unsigned ArgSize =
3934                 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3935         VecArgOffset += ArgSize;
3936         continue;
3937       }
3938
3939       switch(ObjectVT.getSimpleVT().SimpleTy) {
3940       default: llvm_unreachable("Unhandled argument type!");
3941       case MVT::i1:
3942       case MVT::i32:
3943       case MVT::f32:
3944         VecArgOffset += 4;
3945         break;
3946       case MVT::i64:  // PPC64
3947       case MVT::f64:
3948         // FIXME: We are guaranteed to be !isPPC64 at this point.
3949         // Does MVT::i64 apply?
3950         VecArgOffset += 8;
3951         break;
3952       case MVT::v4f32:
3953       case MVT::v4i32:
3954       case MVT::v8i16:
3955       case MVT::v16i8:
3956         // Nothing to do, we're only looking at Nonvector args here.
3957         break;
3958       }
3959     }
3960   }
3961   // We've found where the vector parameter area in memory is.  Skip the
3962   // first 12 parameters; these don't use that memory.
3963   VecArgOffset = ((VecArgOffset+15)/16)*16;
3964   VecArgOffset += 12*16;
3965
3966   // Add DAG nodes to load the arguments or copy them out of registers.  On
3967   // entry to a function on PPC, the arguments start after the linkage area,
3968   // although the first ones are often in registers.
3969
3970   SmallVector<SDValue, 8> MemOps;
3971   unsigned nAltivecParamsAtEnd = 0;
3972   Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
3973   unsigned CurArgIdx = 0;
3974   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
3975     SDValue ArgVal;
3976     bool needsLoad = false;
3977     EVT ObjectVT = Ins[ArgNo].VT;
3978     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
3979     unsigned ArgSize = ObjSize;
3980     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3981     if (Ins[ArgNo].isOrigArg()) {
3982       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3983       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3984     }
3985     unsigned CurArgOffset = ArgOffset;
3986
3987     // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
3988     if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
3989         ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
3990       if (isVarArg || isPPC64) {
3991         MinReservedArea = ((MinReservedArea+15)/16)*16;
3992         MinReservedArea += CalculateStackSlotSize(ObjectVT,
3993                                                   Flags,
3994                                                   PtrByteSize);
3995       } else  nAltivecParamsAtEnd++;
3996     } else
3997       // Calculate min reserved area.
3998       MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
3999                                                 Flags,
4000                                                 PtrByteSize);
4001
4002     // FIXME the codegen can be much improved in some cases.
4003     // We do not have to keep everything in memory.
4004     if (Flags.isByVal()) {
4005       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4006
4007       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4008       ObjSize = Flags.getByValSize();
4009       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4010       // Objects of size 1 and 2 are right justified, everything else is
4011       // left justified.  This means the memory address is adjusted forwards.
4012       if (ObjSize==1 || ObjSize==2) {
4013         CurArgOffset = CurArgOffset + (4 - ObjSize);
4014       }
4015       // The value of the object is its address.
4016       int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true);
4017       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4018       InVals.push_back(FIN);
4019       if (ObjSize==1 || ObjSize==2) {
4020         if (GPR_idx != Num_GPR_Regs) {
4021           unsigned VReg;
4022           if (isPPC64)
4023             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4024           else
4025             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4026           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4027           EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
4028           SDValue Store =
4029               DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
4030                                 MachinePointerInfo(&*FuncArg), ObjType);
4031           MemOps.push_back(Store);
4032           ++GPR_idx;
4033         }
4034
4035         ArgOffset += PtrByteSize;
4036
4037         continue;
4038       }
4039       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4040         // Store whatever pieces of the object are in registers
4041         // to memory.  ArgOffset will be the address of the beginning
4042         // of the object.
4043         if (GPR_idx != Num_GPR_Regs) {
4044           unsigned VReg;
4045           if (isPPC64)
4046             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4047           else
4048             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4049           int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4050           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4051           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4052           SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4053                                        MachinePointerInfo(&*FuncArg, j));
4054           MemOps.push_back(Store);
4055           ++GPR_idx;
4056           ArgOffset += PtrByteSize;
4057         } else {
4058           ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
4059           break;
4060         }
4061       }
4062       continue;
4063     }
4064
4065     switch (ObjectVT.getSimpleVT().SimpleTy) {
4066     default: llvm_unreachable("Unhandled argument type!");
4067     case MVT::i1:
4068     case MVT::i32:
4069       if (!isPPC64) {
4070         if (GPR_idx != Num_GPR_Regs) {
4071           unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4072           ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4073
4074           if (ObjectVT == MVT::i1)
4075             ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
4076
4077           ++GPR_idx;
4078         } else {
4079           needsLoad = true;
4080           ArgSize = PtrByteSize;
4081         }
4082         // All int arguments reserve stack space in the Darwin ABI.
4083         ArgOffset += PtrByteSize;
4084         break;
4085       }
4086       LLVM_FALLTHROUGH;
4087     case MVT::i64:  // PPC64
4088       if (GPR_idx != Num_GPR_Regs) {
4089         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4090         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4091
4092         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4093           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4094           // value to MVT::i64 and then truncate to the correct register size.
4095           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4096
4097         ++GPR_idx;
4098       } else {
4099         needsLoad = true;
4100         ArgSize = PtrByteSize;
4101       }
4102       // All int arguments reserve stack space in the Darwin ABI.
4103       ArgOffset += 8;
4104       break;
4105
4106     case MVT::f32:
4107     case MVT::f64:
4108       // Every 4 bytes of argument space consumes one of the GPRs available for
4109       // argument passing.
4110       if (GPR_idx != Num_GPR_Regs) {
4111         ++GPR_idx;
4112         if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
4113           ++GPR_idx;
4114       }
4115       if (FPR_idx != Num_FPR_Regs) {
4116         unsigned VReg;
4117
4118         if (ObjectVT == MVT::f32)
4119           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
4120         else
4121           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
4122
4123         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4124         ++FPR_idx;
4125       } else {
4126         needsLoad = true;
4127       }
4128
4129       // All FP arguments reserve stack space in the Darwin ABI.
4130       ArgOffset += isPPC64 ? 8 : ObjSize;
4131       break;
4132     case MVT::v4f32:
4133     case MVT::v4i32:
4134     case MVT::v8i16:
4135     case MVT::v16i8:
4136       // Note that vector arguments in registers don't reserve stack space,
4137       // except in varargs functions.
4138       if (VR_idx != Num_VR_Regs) {
4139         unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4140         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4141         if (isVarArg) {
4142           while ((ArgOffset % 16) != 0) {
4143             ArgOffset += PtrByteSize;
4144             if (GPR_idx != Num_GPR_Regs)
4145               GPR_idx++;
4146           }
4147           ArgOffset += 16;
4148           GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
4149         }
4150         ++VR_idx;
4151       } else {
4152         if (!isVarArg && !isPPC64) {
4153           // Vectors go after all the nonvectors.
4154           CurArgOffset = VecArgOffset;
4155           VecArgOffset += 16;
4156         } else {
4157           // Vectors are aligned.
4158           ArgOffset = ((ArgOffset+15)/16)*16;
4159           CurArgOffset = ArgOffset;
4160           ArgOffset += 16;
4161         }
4162         needsLoad = true;
4163       }
4164       break;
4165     }
4166
4167     // We need to load the argument to a virtual register if we determined above
4168     // that we ran out of physical registers of the appropriate type.
4169     if (needsLoad) {
4170       int FI = MFI.CreateFixedObject(ObjSize,
4171                                      CurArgOffset + (ArgSize - ObjSize),
4172                                      isImmutable);
4173       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4174       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4175     }
4176
4177     InVals.push_back(ArgVal);
4178   }
4179
4180   // Allow for Altivec parameters at the end, if needed.
4181   if (nAltivecParamsAtEnd) {
4182     MinReservedArea = ((MinReservedArea+15)/16)*16;
4183     MinReservedArea += 16*nAltivecParamsAtEnd;
4184   }
4185
4186   // Area that is at least reserved in the caller of this function.
4187   MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
4188
4189   // Set the size that is at least reserved in caller of this function.  Tail
4190   // call optimized functions' reserved stack space needs to be aligned so that
4191   // taking the difference between two stack areas will result in an aligned
4192   // stack.
4193   MinReservedArea =
4194       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4195   FuncInfo->setMinReservedArea(MinReservedArea);
4196
4197   // If the function takes variable number of arguments, make a frame index for
4198   // the start of the first vararg value... for expansion of llvm.va_start.
4199   if (isVarArg) {
4200     int Depth = ArgOffset;
4201
4202     FuncInfo->setVarArgsFrameIndex(
4203       MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
4204                             Depth, true));
4205     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4206
4207     // If this function is vararg, store any remaining integer argument regs
4208     // to their spots on the stack so that they may be loaded by dereferencing
4209     // the result of va_next.
4210     for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
4211       unsigned VReg;
4212
4213       if (isPPC64)
4214         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4215       else
4216         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4217
4218       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4219       SDValue Store =
4220           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4221       MemOps.push_back(Store);
4222       // Increment the address by four for the next argument to store
4223       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4224       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4225     }
4226   }
4227
4228   if (!MemOps.empty())
4229     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4230
4231   return Chain;
4232 }
4233
4234 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4235 /// adjusted to accommodate the arguments for the tailcall.
4236 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4237                                    unsigned ParamSize) {
4238
4239   if (!isTailCall) return 0;
4240
4241   PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
4242   unsigned CallerMinReservedArea = FI->getMinReservedArea();
4243   int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4244   // Remember only if the new adjustement is bigger.
4245   if (SPDiff < FI->getTailCallSPDelta())
4246     FI->setTailCallSPDelta(SPDiff);
4247
4248   return SPDiff;
4249 }
4250
4251 static bool isFunctionGlobalAddress(SDValue Callee);
4252
4253 static bool
4254 resideInSameSection(const Function *Caller, SDValue Callee,
4255                     const TargetMachine &TM) {
4256   // If !G, Callee can be an external symbol.
4257   GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4258   if (!G)
4259     return false;
4260
4261   const GlobalValue *GV = G->getGlobal();
4262   if (!GV->isStrongDefinitionForLinker())
4263     return false;
4264
4265   // Any explicitly-specified sections and section prefixes must also match.
4266   // Also, if we're using -ffunction-sections, then each function is always in
4267   // a different section (the same is true for COMDAT functions).
4268   if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
4269       GV->getSection() != Caller->getSection())
4270     return false;
4271   if (const auto *F = dyn_cast<Function>(GV)) {
4272     if (F->getSectionPrefix() != Caller->getSectionPrefix())
4273       return false;
4274   }
4275
4276   // If the callee might be interposed, then we can't assume the ultimate call
4277   // target will be in the same section. Even in cases where we can assume that
4278   // interposition won't happen, in any case where the linker might insert a
4279   // stub to allow for interposition, we must generate code as though
4280   // interposition might occur. To understand why this matters, consider a
4281   // situation where: a -> b -> c where the arrows indicate calls. b and c are
4282   // in the same section, but a is in a different module (i.e. has a different
4283   // TOC base pointer). If the linker allows for interposition between b and c,
4284   // then it will generate a stub for the call edge between b and c which will
4285   // save the TOC pointer into the designated stack slot allocated by b. If we
4286   // return true here, and therefore allow a tail call between b and c, that
4287   // stack slot won't exist and the b -> c stub will end up saving b'c TOC base
4288   // pointer into the stack slot allocated by a (where the a -> b stub saved
4289   // a's TOC base pointer). If we're not considering a tail call, but rather,
4290   // whether a nop is needed after the call instruction in b, because the linker
4291   // will insert a stub, it might complain about a missing nop if we omit it
4292   // (although many don't complain in this case).
4293   if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
4294     return false;
4295
4296   return true;
4297 }
4298
4299 static bool
4300 needStackSlotPassParameters(const PPCSubtarget &Subtarget,
4301                             const SmallVectorImpl<ISD::OutputArg> &Outs) {
4302   assert(Subtarget.isSVR4ABI() && Subtarget.isPPC64());
4303
4304   const unsigned PtrByteSize = 8;
4305   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4306
4307   static const MCPhysReg GPR[] = {
4308     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4309     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4310   };
4311   static const MCPhysReg VR[] = {
4312     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4313     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4314   };
4315
4316   const unsigned NumGPRs = array_lengthof(GPR);
4317   const unsigned NumFPRs = 13;
4318   const unsigned NumVRs = array_lengthof(VR);
4319   const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4320
4321   unsigned NumBytes = LinkageSize;
4322   unsigned AvailableFPRs = NumFPRs;
4323   unsigned AvailableVRs = NumVRs;
4324
4325   for (const ISD::OutputArg& Param : Outs) {
4326     if (Param.Flags.isNest()) continue;
4327
4328     if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags,
4329                                PtrByteSize, LinkageSize, ParamAreaSize,
4330                                NumBytes, AvailableFPRs, AvailableVRs,
4331                                Subtarget.hasQPX()))
4332       return true;
4333   }
4334   return false;
4335 }
4336
4337 static bool
4338 hasSameArgumentList(const Function *CallerFn, ImmutableCallSite *CS) {
4339   if (CS->arg_size() != CallerFn->arg_size())
4340     return false;
4341
4342   ImmutableCallSite::arg_iterator CalleeArgIter = CS->arg_begin();
4343   ImmutableCallSite::arg_iterator CalleeArgEnd = CS->arg_end();
4344   Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4345
4346   for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4347     const Value* CalleeArg = *CalleeArgIter;
4348     const Value* CallerArg = &(*CallerArgIter);
4349     if (CalleeArg == CallerArg)
4350       continue;
4351
4352     // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4353     //        tail call @callee([4 x i64] undef, [4 x i64] %b)
4354     //      }
4355     // 1st argument of callee is undef and has the same type as caller.
4356     if (CalleeArg->getType() == CallerArg->getType() &&
4357         isa<UndefValue>(CalleeArg))
4358       continue;
4359
4360     return false;
4361   }
4362
4363   return true;
4364 }
4365
4366 bool
4367 PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4368                                     SDValue Callee,
4369                                     CallingConv::ID CalleeCC,
4370                                     ImmutableCallSite *CS,
4371                                     bool isVarArg,
4372                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
4373                                     const SmallVectorImpl<ISD::InputArg> &Ins,
4374                                     SelectionDAG& DAG) const {
4375   bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4376
4377   if (DisableSCO && !TailCallOpt) return false;
4378
4379   // Variadic argument functions are not supported.
4380   if (isVarArg) return false;
4381
4382   MachineFunction &MF = DAG.getMachineFunction();
4383   CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
4384
4385   // Tail or Sibling call optimization (TCO/SCO) needs callee and caller has
4386   // the same calling convention
4387   if (CallerCC != CalleeCC) return false;
4388
4389   // SCO support C calling convention
4390   if (CalleeCC != CallingConv::Fast && CalleeCC != CallingConv::C)
4391     return false;
4392
4393   // Caller contains any byval parameter is not supported.
4394   if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4395     return false;
4396
4397   // Callee contains any byval parameter is not supported, too.
4398   // Note: This is a quick work around, because in some cases, e.g.
4399   // caller's stack size > callee's stack size, we are still able to apply
4400   // sibling call optimization. See: https://reviews.llvm.org/D23441#513574
4401   if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4402     return false;
4403
4404   // No TCO/SCO on indirect call because Caller have to restore its TOC
4405   if (!isFunctionGlobalAddress(Callee) &&
4406       !isa<ExternalSymbolSDNode>(Callee))
4407     return false;
4408
4409   // Check if Callee resides in the same section, because for now, PPC64 SVR4
4410   // ABI (ELFv1/ELFv2) doesn't allow tail calls to a symbol resides in another
4411   // section.
4412   // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4413   if (!resideInSameSection(MF.getFunction(), Callee, getTargetMachine()))
4414     return false;
4415
4416   // TCO allows altering callee ABI, so we don't have to check further.
4417   if (CalleeCC == CallingConv::Fast && TailCallOpt)
4418     return true;
4419
4420   if (DisableSCO) return false;
4421
4422   // If callee use the same argument list that caller is using, then we can
4423   // apply SCO on this case. If it is not, then we need to check if callee needs
4424   // stack for passing arguments.
4425   if (!hasSameArgumentList(MF.getFunction(), CS) &&
4426       needStackSlotPassParameters(Subtarget, Outs)) {
4427     return false;
4428   }
4429
4430   return true;
4431 }
4432
4433 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
4434 /// for tail call optimization. Targets which want to do tail call
4435 /// optimization should implement this function.
4436 bool
4437 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
4438                                                      CallingConv::ID CalleeCC,
4439                                                      bool isVarArg,
4440                                       const SmallVectorImpl<ISD::InputArg> &Ins,
4441                                                      SelectionDAG& DAG) const {
4442   if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4443     return false;
4444
4445   // Variable argument functions are not supported.
4446   if (isVarArg)
4447     return false;
4448
4449   MachineFunction &MF = DAG.getMachineFunction();
4450   CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
4451   if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
4452     // Functions containing by val parameters are not supported.
4453     for (unsigned i = 0; i != Ins.size(); i++) {
4454        ISD::ArgFlagsTy Flags = Ins[i].Flags;
4455        if (Flags.isByVal()) return false;
4456     }
4457
4458     // Non-PIC/GOT tail calls are supported.
4459     if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
4460       return true;
4461
4462     // At the moment we can only do local tail calls (in same module, hidden
4463     // or protected) if we are generating PIC.
4464     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
4465       return G->getGlobal()->hasHiddenVisibility()
4466           || G->getGlobal()->hasProtectedVisibility();
4467   }
4468
4469   return false;
4470 }
4471
4472 /// isCallCompatibleAddress - Return the immediate to use if the specified
4473 /// 32-bit value is representable in the immediate field of a BxA instruction.
4474 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
4475   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
4476   if (!C) return nullptr;
4477
4478   int Addr = C->getZExtValue();
4479   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
4480       SignExtend32<26>(Addr) != Addr)
4481     return nullptr;  // Top 6 bits have to be sext of immediate.
4482
4483   return DAG
4484       .getConstant(
4485           (int)C->getZExtValue() >> 2, SDLoc(Op),
4486           DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()))
4487       .getNode();
4488 }
4489
4490 namespace {
4491
4492 struct TailCallArgumentInfo {
4493   SDValue Arg;
4494   SDValue FrameIdxOp;
4495   int FrameIdx = 0;
4496
4497   TailCallArgumentInfo() = default;
4498 };
4499
4500 } // end anonymous namespace
4501
4502 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
4503 static void StoreTailCallArgumentsToStackSlot(
4504     SelectionDAG &DAG, SDValue Chain,
4505     const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
4506     SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
4507   for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
4508     SDValue Arg = TailCallArgs[i].Arg;
4509     SDValue FIN = TailCallArgs[i].FrameIdxOp;
4510     int FI = TailCallArgs[i].FrameIdx;
4511     // Store relative to framepointer.
4512     MemOpChains.push_back(DAG.getStore(
4513         Chain, dl, Arg, FIN,
4514         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
4515   }
4516 }
4517
4518 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
4519 /// the appropriate stack slot for the tail call optimized function call.
4520 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
4521                                              SDValue OldRetAddr, SDValue OldFP,
4522                                              int SPDiff, const SDLoc &dl) {
4523   if (SPDiff) {
4524     // Calculate the new stack slot for the return address.
4525     MachineFunction &MF = DAG.getMachineFunction();
4526     const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
4527     const PPCFrameLowering *FL = Subtarget.getFrameLowering();
4528     bool isPPC64 = Subtarget.isPPC64();
4529     int SlotSize = isPPC64 ? 8 : 4;
4530     int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
4531     int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
4532                                                          NewRetAddrLoc, true);
4533     EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4534     SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
4535     Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
4536                          MachinePointerInfo::getFixedStack(MF, NewRetAddr));
4537
4538     // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
4539     // slot as the FP is never overwritten.
4540     if (Subtarget.isDarwinABI()) {
4541       int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset();
4542       int NewFPIdx = MF.getFrameInfo().CreateFixedObject(SlotSize, NewFPLoc,
4543                                                          true);
4544       SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
4545       Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
4546                            MachinePointerInfo::getFixedStack(
4547                                DAG.getMachineFunction(), NewFPIdx));
4548     }
4549   }
4550   return Chain;
4551 }
4552
4553 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
4554 /// the position of the argument.
4555 static void
4556 CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
4557                          SDValue Arg, int SPDiff, unsigned ArgOffset,
4558                      SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
4559   int Offset = ArgOffset + SPDiff;
4560   uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
4561   int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4562   EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4563   SDValue FIN = DAG.getFrameIndex(FI, VT);
4564   TailCallArgumentInfo Info;
4565   Info.Arg = Arg;
4566   Info.FrameIdxOp = FIN;
4567   Info.FrameIdx = FI;
4568   TailCallArguments.push_back(Info);
4569 }
4570
4571 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
4572 /// stack slot. Returns the chain as result and the loaded frame pointers in
4573 /// LROpOut/FPOpout. Used when tail calling.
4574 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
4575     SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
4576     SDValue &FPOpOut, const SDLoc &dl) const {
4577   if (SPDiff) {
4578     // Load the LR and FP stack slot for later adjusting.
4579     EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
4580     LROpOut = getReturnAddrFrameIndex(DAG);
4581     LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
4582     Chain = SDValue(LROpOut.getNode(), 1);
4583
4584     // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
4585     // slot as the FP is never overwritten.
4586     if (Subtarget.isDarwinABI()) {
4587       FPOpOut = getFramePointerFrameIndex(DAG);
4588       FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo());
4589       Chain = SDValue(FPOpOut.getNode(), 1);
4590     }
4591   }
4592   return Chain;
4593 }
4594
4595 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
4596 /// by "Src" to address "Dst" of size "Size".  Alignment information is
4597 /// specified by the specific parameter attribute. The copy will be passed as
4598 /// a byval function parameter.
4599 /// Sometimes what we are copying is the end of a larger object, the part that
4600 /// does not fit in registers.
4601 static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
4602                                          SDValue Chain, ISD::ArgFlagsTy Flags,
4603                                          SelectionDAG &DAG, const SDLoc &dl) {
4604   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
4605   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
4606                        false, false, false, MachinePointerInfo(),
4607                        MachinePointerInfo());
4608 }
4609
4610 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
4611 /// tail calls.
4612 static void LowerMemOpCallTo(
4613     SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
4614     SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
4615     bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
4616     SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
4617   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4618   if (!isTailCall) {
4619     if (isVector) {
4620       SDValue StackPtr;
4621       if (isPPC64)
4622         StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
4623       else
4624         StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
4625       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
4626                            DAG.getConstant(ArgOffset, dl, PtrVT));
4627     }
4628     MemOpChains.push_back(
4629         DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
4630     // Calculate and remember argument location.
4631   } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
4632                                   TailCallArguments);
4633 }
4634
4635 static void
4636 PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
4637                 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
4638                 SDValue FPOp,
4639                 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
4640   // Emit a sequence of copyto/copyfrom virtual registers for arguments that
4641   // might overwrite each other in case of tail call optimization.
4642   SmallVector<SDValue, 8> MemOpChains2;
4643   // Do not flag preceding copytoreg stuff together with the following stuff.
4644   InFlag = SDValue();
4645   StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
4646                                     MemOpChains2, dl);
4647   if (!MemOpChains2.empty())
4648     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
4649
4650   // Store the return address to the appropriate stack slot.
4651   Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
4652
4653   // Emit callseq_end just before tailcall node.
4654   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4655                              DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
4656   InFlag = Chain.getValue(1);
4657 }
4658
4659 // Is this global address that of a function that can be called by name? (as
4660 // opposed to something that must hold a descriptor for an indirect call).
4661 static bool isFunctionGlobalAddress(SDValue Callee) {
4662   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
4663     if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
4664         Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
4665       return false;
4666
4667     return G->getGlobal()->getValueType()->isFunctionTy();
4668   }
4669
4670   return false;
4671 }
4672
4673 static unsigned
4674 PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
4675             SDValue CallSeqStart, const SDLoc &dl, int SPDiff, bool isTailCall,
4676             bool isPatchPoint, bool hasNest,
4677             SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
4678             SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
4679             ImmutableCallSite *CS, const PPCSubtarget &Subtarget) {
4680   bool isPPC64 = Subtarget.isPPC64();
4681   bool isSVR4ABI = Subtarget.isSVR4ABI();
4682   bool isELFv2ABI = Subtarget.isELFv2ABI();
4683
4684   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4685   NodeTys.push_back(MVT::Other);   // Returns a chain
4686   NodeTys.push_back(MVT::Glue);    // Returns a flag for retval copy to use.
4687
4688   unsigned CallOpc = PPCISD::CALL;
4689
4690   bool needIndirectCall = true;
4691   if (!isSVR4ABI || !isPPC64)
4692     if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
4693       // If this is an absolute destination address, use the munged value.
4694       Callee = SDValue(Dest, 0);
4695       needIndirectCall = false;
4696     }
4697
4698   // PC-relative references to external symbols should go through $stub, unless
4699   // we're building with the leopard linker or later, which automatically
4700   // synthesizes these stubs.
4701   const TargetMachine &TM = DAG.getTarget();
4702   const Module *Mod = DAG.getMachineFunction().getFunction()->getParent();
4703   const GlobalValue *GV = nullptr;
4704   if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee))
4705     GV = G->getGlobal();
4706   bool Local = TM.shouldAssumeDSOLocal(*Mod, GV);
4707   bool UsePlt = !Local && Subtarget.isTargetELF() && !isPPC64;
4708
4709   if (isFunctionGlobalAddress(Callee)) {
4710     GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
4711     // A call to a TLS address is actually an indirect call to a
4712     // thread-specific pointer.
4713     unsigned OpFlags = 0;
4714     if (UsePlt)
4715       OpFlags = PPCII::MO_PLT;
4716
4717     // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
4718     // every direct call is) turn it into a TargetGlobalAddress /
4719     // TargetExternalSymbol node so that legalize doesn't hack it.
4720     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
4721                                         Callee.getValueType(), 0, OpFlags);
4722     needIndirectCall = false;
4723   }
4724
4725   if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
4726     unsigned char OpFlags = 0;
4727
4728     if (UsePlt)
4729       OpFlags = PPCII::MO_PLT;
4730
4731     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
4732                                          OpFlags);
4733     needIndirectCall = false;
4734   }
4735
4736   if (isPatchPoint) {
4737     // We'll form an invalid direct call when lowering a patchpoint; the full
4738     // sequence for an indirect call is complicated, and many of the
4739     // instructions introduced might have side effects (and, thus, can't be
4740     // removed later). The call itself will be removed as soon as the
4741     // argument/return lowering is complete, so the fact that it has the wrong
4742     // kind of operands should not really matter.
4743     needIndirectCall = false;
4744   }
4745
4746   if (needIndirectCall) {
4747     // Otherwise, this is an indirect call.  We have to use a MTCTR/BCTRL pair
4748     // to do the call, we can't use PPCISD::CALL.
4749     SDValue MTCTROps[] = {Chain, Callee, InFlag};
4750
4751     if (isSVR4ABI && isPPC64 && !isELFv2ABI) {
4752       // Function pointers in the 64-bit SVR4 ABI do not point to the function
4753       // entry point, but to the function descriptor (the function entry point
4754       // address is part of the function descriptor though).
4755       // The function descriptor is a three doubleword structure with the
4756       // following fields: function entry point, TOC base address and
4757       // environment pointer.
4758       // Thus for a call through a function pointer, the following actions need
4759       // to be performed:
4760       //   1. Save the TOC of the caller in the TOC save area of its stack
4761       //      frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
4762       //   2. Load the address of the function entry point from the function
4763       //      descriptor.
4764       //   3. Load the TOC of the callee from the function descriptor into r2.
4765       //   4. Load the environment pointer from the function descriptor into
4766       //      r11.
4767       //   5. Branch to the function entry point address.
4768       //   6. On return of the callee, the TOC of the caller needs to be
4769       //      restored (this is done in FinishCall()).
4770       //
4771       // The loads are scheduled at the beginning of the call sequence, and the
4772       // register copies are flagged together to ensure that no other
4773       // operations can be scheduled in between. E.g. without flagging the
4774       // copies together, a TOC access in the caller could be scheduled between
4775       // the assignment of the callee TOC and the branch to the callee, which
4776       // results in the TOC access going through the TOC of the callee instead
4777       // of going through the TOC of the caller, which leads to incorrect code.
4778
4779       // Load the address of the function entry point from the function
4780       // descriptor.
4781       SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1);
4782       if (LDChain.getValueType() == MVT::Glue)
4783         LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2);
4784
4785       auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
4786                           ? (MachineMemOperand::MODereferenceable |
4787                              MachineMemOperand::MOInvariant)
4788                           : MachineMemOperand::MONone;
4789
4790       MachinePointerInfo MPI(CS ? CS->getCalledValue() : nullptr);
4791       SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI,
4792                                         /* Alignment = */ 8, MMOFlags);
4793
4794       // Load environment pointer into r11.
4795       SDValue PtrOff = DAG.getIntPtrConstant(16, dl);
4796       SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
4797       SDValue LoadEnvPtr =
4798           DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, MPI.getWithOffset(16),
4799                       /* Alignment = */ 8, MMOFlags);
4800
4801       SDValue TOCOff = DAG.getIntPtrConstant(8, dl);
4802       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
4803       SDValue TOCPtr =
4804           DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, MPI.getWithOffset(8),
4805                       /* Alignment = */ 8, MMOFlags);
4806
4807       setUsesTOCBasePtr(DAG);
4808       SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr,
4809                                         InFlag);
4810       Chain = TOCVal.getValue(0);
4811       InFlag = TOCVal.getValue(1);
4812
4813       // If the function call has an explicit 'nest' parameter, it takes the
4814       // place of the environment pointer.
4815       if (!hasNest) {
4816         SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
4817                                           InFlag);
4818
4819         Chain = EnvVal.getValue(0);
4820         InFlag = EnvVal.getValue(1);
4821       }
4822
4823       MTCTROps[0] = Chain;
4824       MTCTROps[1] = LoadFuncPtr;
4825       MTCTROps[2] = InFlag;
4826     }
4827
4828     Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
4829                         makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
4830     InFlag = Chain.getValue(1);
4831
4832     NodeTys.clear();
4833     NodeTys.push_back(MVT::Other);
4834     NodeTys.push_back(MVT::Glue);
4835     Ops.push_back(Chain);
4836     CallOpc = PPCISD::BCTRL;
4837     Callee.setNode(nullptr);
4838     // Add use of X11 (holding environment pointer)
4839     if (isSVR4ABI && isPPC64 && !isELFv2ABI && !hasNest)
4840       Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
4841     // Add CTR register as callee so a bctr can be emitted later.
4842     if (isTailCall)
4843       Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
4844   }
4845
4846   // If this is a direct call, pass the chain and the callee.
4847   if (Callee.getNode()) {
4848     Ops.push_back(Chain);
4849     Ops.push_back(Callee);
4850   }
4851   // If this is a tail call add stack pointer delta.
4852   if (isTailCall)
4853     Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
4854
4855   // Add argument registers to the end of the list so that they are known live
4856   // into the call.
4857   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
4858     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
4859                                   RegsToPass[i].second.getValueType()));
4860
4861   // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live
4862   // into the call.
4863   if (isSVR4ABI && isPPC64 && !isPatchPoint) {
4864     setUsesTOCBasePtr(DAG);
4865     Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
4866   }
4867
4868   return CallOpc;
4869 }
4870
4871 SDValue PPCTargetLowering::LowerCallResult(
4872     SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
4873     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4874     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4875   SmallVector<CCValAssign, 16> RVLocs;
4876   CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
4877                     *DAG.getContext());
4878   CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
4879
4880   // Copy all of the result registers out of their specified physreg.
4881   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
4882     CCValAssign &VA = RVLocs[i];
4883     assert(VA.isRegLoc() && "Can only return in registers!");
4884
4885     SDValue Val = DAG.getCopyFromReg(Chain, dl,
4886                                      VA.getLocReg(), VA.getLocVT(), InFlag);
4887     Chain = Val.getValue(1);
4888     InFlag = Val.getValue(2);
4889
4890     switch (VA.getLocInfo()) {
4891     default: llvm_unreachable("Unknown loc info!");
4892     case CCValAssign::Full: break;
4893     case CCValAssign::AExt:
4894       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4895       break;
4896     case CCValAssign::ZExt:
4897       Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
4898                         DAG.getValueType(VA.getValVT()));
4899       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4900       break;
4901     case CCValAssign::SExt:
4902       Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
4903                         DAG.getValueType(VA.getValVT()));
4904       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4905       break;
4906     }
4907
4908     InVals.push_back(Val);
4909   }
4910
4911   return Chain;
4912 }
4913
4914 SDValue PPCTargetLowering::FinishCall(
4915     CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg,
4916     bool isPatchPoint, bool hasNest, SelectionDAG &DAG,
4917     SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue InFlag,
4918     SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
4919     unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
4920     SmallVectorImpl<SDValue> &InVals, ImmutableCallSite *CS) const {
4921   std::vector<EVT> NodeTys;
4922   SmallVector<SDValue, 8> Ops;
4923   unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
4924                                  SPDiff, isTailCall, isPatchPoint, hasNest,
4925                                  RegsToPass, Ops, NodeTys, CS, Subtarget);
4926
4927   // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
4928   if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
4929     Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
4930
4931   // When performing tail call optimization the callee pops its arguments off
4932   // the stack. Account for this here so these bytes can be pushed back on in
4933   // PPCFrameLowering::eliminateCallFramePseudoInstr.
4934   int BytesCalleePops =
4935     (CallConv == CallingConv::Fast &&
4936      getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
4937
4938   // Add a register mask operand representing the call-preserved registers.
4939   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4940   const uint32_t *Mask =
4941       TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
4942   assert(Mask && "Missing call preserved mask for calling convention");
4943   Ops.push_back(DAG.getRegisterMask(Mask));
4944
4945   if (InFlag.getNode())
4946     Ops.push_back(InFlag);
4947
4948   // Emit tail call.
4949   if (isTailCall) {
4950     assert(((Callee.getOpcode() == ISD::Register &&
4951              cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
4952             Callee.getOpcode() == ISD::TargetExternalSymbol ||
4953             Callee.getOpcode() == ISD::TargetGlobalAddress ||
4954             isa<ConstantSDNode>(Callee)) &&
4955     "Expecting an global address, external symbol, absolute value or register");
4956
4957     DAG.getMachineFunction().getFrameInfo().setHasTailCall();
4958     return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
4959   }
4960
4961   // Add a NOP immediately after the branch instruction when using the 64-bit
4962   // SVR4 ABI. At link time, if caller and callee are in a different module and
4963   // thus have a different TOC, the call will be replaced with a call to a stub
4964   // function which saves the current TOC, loads the TOC of the callee and
4965   // branches to the callee. The NOP will be replaced with a load instruction
4966   // which restores the TOC of the caller from the TOC save slot of the current
4967   // stack frame. If caller and callee belong to the same module (and have the
4968   // same TOC), the NOP will remain unchanged.
4969
4970   MachineFunction &MF = DAG.getMachineFunction();
4971   if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() &&
4972       !isPatchPoint) {
4973     if (CallOpc == PPCISD::BCTRL) {
4974       // This is a call through a function pointer.
4975       // Restore the caller TOC from the save area into R2.
4976       // See PrepareCall() for more information about calls through function
4977       // pointers in the 64-bit SVR4 ABI.
4978       // We are using a target-specific load with r2 hard coded, because the
4979       // result of a target-independent load would never go directly into r2,
4980       // since r2 is a reserved register (which prevents the register allocator
4981       // from allocating it), resulting in an additional register being
4982       // allocated and an unnecessary move instruction being generated.
4983       CallOpc = PPCISD::BCTRL_LOAD_TOC;
4984
4985       EVT PtrVT = getPointerTy(DAG.getDataLayout());
4986       SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
4987       unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
4988       SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
4989       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
4990
4991       // The address needs to go after the chain input but before the flag (or
4992       // any other variadic arguments).
4993       Ops.insert(std::next(Ops.begin()), AddTOC);
4994     } else if (CallOpc == PPCISD::CALL &&
4995       !resideInSameSection(MF.getFunction(), Callee, DAG.getTarget())) {
4996       // Otherwise insert NOP for non-local calls.
4997       CallOpc = PPCISD::CALL_NOP;
4998     }
4999   }
5000
5001   Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
5002   InFlag = Chain.getValue(1);
5003
5004   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5005                              DAG.getIntPtrConstant(BytesCalleePops, dl, true),
5006                              InFlag, dl);
5007   if (!Ins.empty())
5008     InFlag = Chain.getValue(1);
5009
5010   return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
5011                          Ins, dl, DAG, InVals);
5012 }
5013
5014 SDValue
5015 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5016                              SmallVectorImpl<SDValue> &InVals) const {
5017   SelectionDAG &DAG                     = CLI.DAG;
5018   SDLoc &dl                             = CLI.DL;
5019   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5020   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
5021   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
5022   SDValue Chain                         = CLI.Chain;
5023   SDValue Callee                        = CLI.Callee;
5024   bool &isTailCall                      = CLI.IsTailCall;
5025   CallingConv::ID CallConv              = CLI.CallConv;
5026   bool isVarArg                         = CLI.IsVarArg;
5027   bool isPatchPoint                     = CLI.IsPatchPoint;
5028   ImmutableCallSite *CS                 = CLI.CS;
5029
5030   if (isTailCall) {
5031     if (Subtarget.useLongCalls() && !(CS && CS->isMustTailCall()))
5032       isTailCall = false;
5033     else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5034       isTailCall =
5035         IsEligibleForTailCallOptimization_64SVR4(Callee, CallConv, CS,
5036                                                  isVarArg, Outs, Ins, DAG);
5037     else
5038       isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
5039                                                      Ins, DAG);
5040     if (isTailCall) {
5041       ++NumTailCalls;
5042       if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5043         ++NumSiblingCalls;
5044
5045       assert(isa<GlobalAddressSDNode>(Callee) &&
5046              "Callee should be an llvm::Function object.");
5047       DEBUG(
5048         const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5049         const unsigned Width = 80 - strlen("TCO caller: ")
5050                                   - strlen(", callee linkage: 0, 0");
5051         dbgs() << "TCO caller: "
5052                << left_justify(DAG.getMachineFunction().getName(), Width)
5053                << ", callee linkage: "
5054                << GV->getVisibility() << ", " << GV->getLinkage() << "\n"
5055       );
5056     }
5057   }
5058
5059   if (!isTailCall && CS && CS->isMustTailCall())
5060     report_fatal_error("failed to perform tail call elimination on a call "
5061                        "site marked musttail");
5062
5063   // When long calls (i.e. indirect calls) are always used, calls are always
5064   // made via function pointer. If we have a function name, first translate it
5065   // into a pointer.
5066   if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5067       !isTailCall)
5068     Callee = LowerGlobalAddress(Callee, DAG);
5069
5070   if (Subtarget.isSVR4ABI()) {
5071     if (Subtarget.isPPC64())
5072       return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
5073                               isTailCall, isPatchPoint, Outs, OutVals, Ins,
5074                               dl, DAG, InVals, CS);
5075     else
5076       return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
5077                               isTailCall, isPatchPoint, Outs, OutVals, Ins,
5078                               dl, DAG, InVals, CS);
5079   }
5080
5081   return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
5082                           isTailCall, isPatchPoint, Outs, OutVals, Ins,
5083                           dl, DAG, InVals, CS);
5084 }
5085
5086 SDValue PPCTargetLowering::LowerCall_32SVR4(
5087     SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
5088     bool isTailCall, bool isPatchPoint,
5089     const SmallVectorImpl<ISD::OutputArg> &Outs,
5090     const SmallVectorImpl<SDValue> &OutVals,
5091     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5092     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5093     ImmutableCallSite *CS) const {
5094   // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5095   // of the 32-bit SVR4 ABI stack frame layout.
5096
5097   assert((CallConv == CallingConv::C ||
5098           CallConv == CallingConv::Fast) && "Unknown calling convention!");
5099
5100   unsigned PtrByteSize = 4;
5101
5102   MachineFunction &MF = DAG.getMachineFunction();
5103
5104   // Mark this function as potentially containing a function that contains a
5105   // tail call. As a consequence the frame pointer will be used for dynamicalloc
5106   // and restoring the callers stack pointer in this functions epilog. This is
5107   // done because by tail calling the called function might overwrite the value
5108   // in this function's (MF) stack pointer stack slot 0(SP).
5109   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5110       CallConv == CallingConv::Fast)
5111     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5112
5113   // Count how many bytes are to be pushed on the stack, including the linkage
5114   // area, parameter list area and the part of the local variable space which
5115   // contains copies of aggregates which are passed by value.
5116
5117   // Assign locations to all of the outgoing arguments.
5118   SmallVector<CCValAssign, 16> ArgLocs;
5119   PPCCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
5120
5121   // Reserve space for the linkage area on the stack.
5122   CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5123                        PtrByteSize);
5124   if (useSoftFloat())
5125     CCInfo.PreAnalyzeCallOperands(Outs);
5126
5127   if (isVarArg) {
5128     // Handle fixed and variable vector arguments differently.
5129     // Fixed vector arguments go into registers as long as registers are
5130     // available. Variable vector arguments always go into memory.
5131     unsigned NumArgs = Outs.size();
5132
5133     for (unsigned i = 0; i != NumArgs; ++i) {
5134       MVT ArgVT = Outs[i].VT;
5135       ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5136       bool Result;
5137
5138       if (Outs[i].IsFixed) {
5139         Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5140                                CCInfo);
5141       } else {
5142         Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
5143                                       ArgFlags, CCInfo);
5144       }
5145
5146       if (Result) {
5147 #ifndef NDEBUG
5148         errs() << "Call operand #" << i << " has unhandled type "
5149              << EVT(ArgVT).getEVTString() << "\n";
5150 #endif
5151         llvm_unreachable(nullptr);
5152       }
5153     }
5154   } else {
5155     // All arguments are treated the same.
5156     CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5157   }
5158   CCInfo.clearWasPPCF128();
5159
5160   // Assign locations to all of the outgoing aggregate by value arguments.
5161   SmallVector<CCValAssign, 16> ByValArgLocs;
5162   CCState CCByValInfo(CallConv, isVarArg, MF, ByValArgLocs, *DAG.getContext());
5163
5164   // Reserve stack space for the allocations in CCInfo.
5165   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
5166
5167   CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5168
5169   // Size of the linkage area, parameter list area and the part of the local
5170   // space variable where copies of aggregates which are passed by value are
5171   // stored.
5172   unsigned NumBytes = CCByValInfo.getNextStackOffset();
5173
5174   // Calculate by how many bytes the stack has to be adjusted in case of tail
5175   // call optimization.
5176   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
5177
5178   // Adjust the stack pointer for the new arguments...
5179   // These operations are automatically eliminated by the prolog/epilog pass
5180   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5181   SDValue CallSeqStart = Chain;
5182
5183   // Load the return address and frame pointer so it can be moved somewhere else
5184   // later.
5185   SDValue LROp, FPOp;
5186   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5187
5188   // Set up a copy of the stack pointer for use loading and storing any
5189   // arguments that may not fit in the registers available for argument
5190   // passing.
5191   SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5192
5193   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5194   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5195   SmallVector<SDValue, 8> MemOpChains;
5196
5197   bool seenFloatArg = false;
5198   // Walk the register/memloc assignments, inserting copies/loads.
5199   for (unsigned i = 0, j = 0, e = ArgLocs.size();
5200        i != e;
5201        ++i) {
5202     CCValAssign &VA = ArgLocs[i];
5203     SDValue Arg = OutVals[i];
5204     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5205
5206     if (Flags.isByVal()) {
5207       // Argument is an aggregate which is passed by value, thus we need to
5208       // create a copy of it in the local variable space of the current stack
5209       // frame (which is the stack frame of the caller) and pass the address of
5210       // this copy to the callee.
5211       assert((j < ByValArgLocs.size()) && "Index out of bounds!");
5212       CCValAssign &ByValVA = ByValArgLocs[j++];
5213       assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
5214
5215       // Memory reserved in the local variable space of the callers stack frame.
5216       unsigned LocMemOffset = ByValVA.getLocMemOffset();
5217
5218       SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5219       PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5220                            StackPtr, PtrOff);
5221
5222       // Create a copy of the argument in the local area of the current
5223       // stack frame.
5224       SDValue MemcpyCall =
5225         CreateCopyOfByValArgument(Arg, PtrOff,
5226                                   CallSeqStart.getNode()->getOperand(0),
5227                                   Flags, DAG, dl);
5228
5229       // This must go outside the CALLSEQ_START..END.
5230       SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
5231                                                      SDLoc(MemcpyCall));
5232       DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5233                              NewCallSeqStart.getNode());
5234       Chain = CallSeqStart = NewCallSeqStart;
5235
5236       // Pass the address of the aggregate copy on the stack either in a
5237       // physical register or in the parameter list area of the current stack
5238       // frame to the callee.
5239       Arg = PtrOff;
5240     }
5241
5242     if (VA.isRegLoc()) {
5243       if (Arg.getValueType() == MVT::i1)
5244         Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg);
5245
5246       seenFloatArg |= VA.getLocVT().isFloatingPoint();
5247       // Put argument in a physical register.
5248       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
5249     } else {
5250       // Put argument in the parameter list area of the current stack frame.
5251       assert(VA.isMemLoc());
5252       unsigned LocMemOffset = VA.getLocMemOffset();
5253
5254       if (!isTailCall) {
5255         SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5256         PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5257                              StackPtr, PtrOff);
5258
5259         MemOpChains.push_back(
5260             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5261       } else {
5262         // Calculate and remember argument location.
5263         CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
5264                                  TailCallArguments);
5265       }
5266     }
5267   }
5268
5269   if (!MemOpChains.empty())
5270     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5271
5272   // Build a sequence of copy-to-reg nodes chained together with token chain
5273   // and flag operands which copy the outgoing args into the appropriate regs.
5274   SDValue InFlag;
5275   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5276     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5277                              RegsToPass[i].second, InFlag);
5278     InFlag = Chain.getValue(1);
5279   }
5280
5281   // Set CR bit 6 to true if this is a vararg call with floating args passed in
5282   // registers.
5283   if (isVarArg) {
5284     SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
5285     SDValue Ops[] = { Chain, InFlag };
5286
5287     Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
5288                         dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
5289
5290     InFlag = Chain.getValue(1);
5291   }
5292
5293   if (isTailCall)
5294     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5295                     TailCallArguments);
5296
5297   return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
5298                     /* unused except on PPC64 ELFv1 */ false, DAG,
5299                     RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
5300                     NumBytes, Ins, InVals, CS);
5301 }
5302
5303 // Copy an argument into memory, being careful to do this outside the
5304 // call sequence for the call to which the argument belongs.
5305 SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
5306     SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
5307     SelectionDAG &DAG, const SDLoc &dl) const {
5308   SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
5309                         CallSeqStart.getNode()->getOperand(0),
5310                         Flags, DAG, dl);
5311   // The MEMCPY must go outside the CALLSEQ_START..END.
5312   int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
5313   SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
5314                                                  SDLoc(MemcpyCall));
5315   DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5316                          NewCallSeqStart.getNode());
5317   return NewCallSeqStart;
5318 }
5319
5320 SDValue PPCTargetLowering::LowerCall_64SVR4(
5321     SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
5322     bool isTailCall, bool isPatchPoint,
5323     const SmallVectorImpl<ISD::OutputArg> &Outs,
5324     const SmallVectorImpl<SDValue> &OutVals,
5325     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5326     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5327     ImmutableCallSite *CS) const {
5328   bool isELFv2ABI = Subtarget.isELFv2ABI();
5329   bool isLittleEndian = Subtarget.isLittleEndian();
5330   unsigned NumOps = Outs.size();
5331   bool hasNest = false;
5332   bool IsSibCall = false;
5333
5334   EVT PtrVT = getPointerTy(DAG.getDataLayout());
5335   unsigned PtrByteSize = 8;
5336
5337   MachineFunction &MF = DAG.getMachineFunction();
5338
5339   if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
5340     IsSibCall = true;
5341
5342   // Mark this function as potentially containing a function that contains a
5343   // tail call. As a consequence the frame pointer will be used for dynamicalloc
5344   // and restoring the callers stack pointer in this functions epilog. This is
5345   // done because by tail calling the called function might overwrite the value
5346   // in this function's (MF) stack pointer stack slot 0(SP).
5347   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5348       CallConv == CallingConv::Fast)
5349     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5350
5351   assert(!(CallConv == CallingConv::Fast && isVarArg) &&
5352          "fastcc not supported on varargs functions");
5353
5354   // Count how many bytes are to be pushed on the stack, including the linkage
5355   // area, and parameter passing area.  On ELFv1, the linkage area is 48 bytes
5356   // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
5357   // area is 32 bytes reserved space for [SP][CR][LR][TOC].
5358   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5359   unsigned NumBytes = LinkageSize;
5360   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
5361   unsigned &QFPR_idx = FPR_idx;
5362
5363   static const MCPhysReg GPR[] = {
5364     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5365     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5366   };
5367   static const MCPhysReg VR[] = {
5368     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5369     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5370   };
5371
5372   const unsigned NumGPRs = array_lengthof(GPR);
5373   const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
5374   const unsigned NumVRs  = array_lengthof(VR);
5375   const unsigned NumQFPRs = NumFPRs;
5376
5377   // On ELFv2, we can avoid allocating the parameter area if all the arguments
5378   // can be passed to the callee in registers.
5379   // For the fast calling convention, there is another check below.
5380   // Note: We should keep consistent with LowerFormalArguments_64SVR4()
5381   bool HasParameterArea = !isELFv2ABI || isVarArg || CallConv == CallingConv::Fast;
5382   if (!HasParameterArea) {
5383     unsigned ParamAreaSize = NumGPRs * PtrByteSize;
5384     unsigned AvailableFPRs = NumFPRs;
5385     unsigned AvailableVRs = NumVRs;
5386     unsigned NumBytesTmp = NumBytes;
5387     for (unsigned i = 0; i != NumOps; ++i) {
5388       if (Outs[i].Flags.isNest()) continue;
5389       if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
5390                                 PtrByteSize, LinkageSize, ParamAreaSize,
5391                                 NumBytesTmp, AvailableFPRs, AvailableVRs,
5392                                 Subtarget.hasQPX()))
5393         HasParameterArea = true;
5394     }
5395   }
5396
5397   // When using the fast calling convention, we don't provide backing for
5398   // arguments that will be in registers.
5399   unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
5400
5401   // Add up all the space actually used.
5402   for (unsigned i = 0; i != NumOps; ++i) {
5403     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5404     EVT ArgVT = Outs[i].VT;
5405     EVT OrigVT = Outs[i].ArgVT;
5406
5407     if (Flags.isNest())
5408       continue;
5409
5410     if (CallConv == CallingConv::Fast) {
5411       if (Flags.isByVal())
5412         NumGPRsUsed += (Flags.getByValSize()+7)/8;
5413       else
5414         switch (ArgVT.getSimpleVT().SimpleTy) {
5415         default: llvm_unreachable("Unexpected ValueType for argument!");
5416         case MVT::i1:
5417         case MVT::i32:
5418         case MVT::i64:
5419           if (++NumGPRsUsed <= NumGPRs)
5420             continue;
5421           break;
5422         case MVT::v4i32:
5423         case MVT::v8i16:
5424         case MVT::v16i8:
5425         case MVT::v2f64:
5426         case MVT::v2i64:
5427         case MVT::v1i128:
5428           if (++NumVRsUsed <= NumVRs)
5429             continue;
5430           break;
5431         case MVT::v4f32:
5432           // When using QPX, this is handled like a FP register, otherwise, it
5433           // is an Altivec register.
5434           if (Subtarget.hasQPX()) {
5435             if (++NumFPRsUsed <= NumFPRs)
5436               continue;
5437           } else {
5438             if (++NumVRsUsed <= NumVRs)
5439               continue;
5440           }
5441           break;
5442         case MVT::f32:
5443         case MVT::f64:
5444         case MVT::v4f64: // QPX
5445         case MVT::v4i1:  // QPX
5446           if (++NumFPRsUsed <= NumFPRs)
5447             continue;
5448           break;
5449         }
5450     }
5451
5452     /* Respect alignment of argument on the stack.  */
5453     unsigned Align =
5454       CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5455     NumBytes = ((NumBytes + Align - 1) / Align) * Align;
5456
5457     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
5458     if (Flags.isInConsecutiveRegsLast())
5459       NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
5460   }
5461
5462   unsigned NumBytesActuallyUsed = NumBytes;
5463
5464   // In the old ELFv1 ABI,
5465   // the prolog code of the callee may store up to 8 GPR argument registers to
5466   // the stack, allowing va_start to index over them in memory if its varargs.
5467   // Because we cannot tell if this is needed on the caller side, we have to
5468   // conservatively assume that it is needed.  As such, make sure we have at
5469   // least enough stack space for the caller to store the 8 GPRs.
5470   // In the ELFv2 ABI, we allocate the parameter area iff a callee
5471   // really requires memory operands, e.g. a vararg function.
5472   if (HasParameterArea)
5473     NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
5474   else
5475     NumBytes = LinkageSize;
5476
5477   // Tail call needs the stack to be aligned.
5478   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5479       CallConv == CallingConv::Fast)
5480     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
5481
5482   int SPDiff = 0;
5483
5484   // Calculate by how many bytes the stack has to be adjusted in case of tail
5485   // call optimization.
5486   if (!IsSibCall)
5487     SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
5488
5489   // To protect arguments on the stack from being clobbered in a tail call,
5490   // force all the loads to happen before doing any other lowering.
5491   if (isTailCall)
5492     Chain = DAG.getStackArgumentTokenFactor(Chain);
5493
5494   // Adjust the stack pointer for the new arguments...
5495   // These operations are automatically eliminated by the prolog/epilog pass
5496   if (!IsSibCall)
5497     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5498   SDValue CallSeqStart = Chain;
5499
5500   // Load the return address and frame pointer so it can be move somewhere else
5501   // later.
5502   SDValue LROp, FPOp;
5503   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5504
5505   // Set up a copy of the stack pointer for use loading and storing any
5506   // arguments that may not fit in the registers available for argument
5507   // passing.
5508   SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5509
5510   // Figure out which arguments are going to go in registers, and which in
5511   // memory.  Also, if this is a vararg function, floating point operations
5512   // must be stored to our stack, and loaded into integer regs as well, if
5513   // any integer regs are available for argument passing.
5514   unsigned ArgOffset = LinkageSize;
5515
5516   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5517   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5518
5519   SmallVector<SDValue, 8> MemOpChains;
5520   for (unsigned i = 0; i != NumOps; ++i) {
5521     SDValue Arg = OutVals[i];
5522     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5523     EVT ArgVT = Outs[i].VT;
5524     EVT OrigVT = Outs[i].ArgVT;
5525
5526     // PtrOff will be used to store the current argument to the stack if a
5527     // register cannot be found for it.
5528     SDValue PtrOff;
5529
5530     // We re-align the argument offset for each argument, except when using the
5531     // fast calling convention, when we need to make sure we do that only when
5532     // we'll actually use a stack slot.
5533     auto ComputePtrOff = [&]() {
5534       /* Respect alignment of argument on the stack.  */
5535       unsigned Align =
5536         CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5537       ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
5538
5539       PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
5540
5541       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5542     };
5543
5544     if (CallConv != CallingConv::Fast) {
5545       ComputePtrOff();
5546
5547       /* Compute GPR index associated with argument offset.  */
5548       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
5549       GPR_idx = std::min(GPR_idx, NumGPRs);
5550     }
5551
5552     // Promote integers to 64-bit values.
5553     if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
5554       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
5555       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
5556       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
5557     }
5558
5559     // FIXME memcpy is used way more than necessary.  Correctness first.
5560     // Note: "by value" is code for passing a structure by value, not
5561     // basic types.
5562     if (Flags.isByVal()) {
5563       // Note: Size includes alignment padding, so
5564       //   struct x { short a; char b; }
5565       // will have Size = 4.  With #pragma pack(1), it will have Size = 3.
5566       // These are the proper values we need for right-justifying the
5567       // aggregate in a parameter register.
5568       unsigned Size = Flags.getByValSize();
5569
5570       // An empty aggregate parameter takes up no storage and no
5571       // registers.
5572       if (Size == 0)
5573         continue;
5574
5575       if (CallConv == CallingConv::Fast)
5576         ComputePtrOff();
5577
5578       // All aggregates smaller than 8 bytes must be passed right-justified.
5579       if (Size==1 || Size==2 || Size==4) {
5580         EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
5581         if (GPR_idx != NumGPRs) {
5582           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
5583                                         MachinePointerInfo(), VT);
5584           MemOpChains.push_back(Load.getValue(1));
5585           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5586
5587           ArgOffset += PtrByteSize;
5588           continue;
5589         }
5590       }
5591
5592       if (GPR_idx == NumGPRs && Size < 8) {
5593         SDValue AddPtr = PtrOff;
5594         if (!isLittleEndian) {
5595           SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
5596                                           PtrOff.getValueType());
5597           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5598         }
5599         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5600                                                           CallSeqStart,
5601                                                           Flags, DAG, dl);
5602         ArgOffset += PtrByteSize;
5603         continue;
5604       }
5605       // Copy entire object into memory.  There are cases where gcc-generated
5606       // code assumes it is there, even if it could be put entirely into
5607       // registers.  (This is not what the doc says.)
5608
5609       // FIXME: The above statement is likely due to a misunderstanding of the
5610       // documents.  All arguments must be copied into the parameter area BY
5611       // THE CALLEE in the event that the callee takes the address of any
5612       // formal argument.  That has not yet been implemented.  However, it is
5613       // reasonable to use the stack area as a staging area for the register
5614       // load.
5615
5616       // Skip this for small aggregates, as we will use the same slot for a
5617       // right-justified copy, below.
5618       if (Size >= 8)
5619         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
5620                                                           CallSeqStart,
5621                                                           Flags, DAG, dl);
5622
5623       // When a register is available, pass a small aggregate right-justified.
5624       if (Size < 8 && GPR_idx != NumGPRs) {
5625         // The easiest way to get this right-justified in a register
5626         // is to copy the structure into the rightmost portion of a
5627         // local variable slot, then load the whole slot into the
5628         // register.
5629         // FIXME: The memcpy seems to produce pretty awful code for
5630         // small aggregates, particularly for packed ones.
5631         // FIXME: It would be preferable to use the slot in the
5632         // parameter save area instead of a new local variable.
5633         SDValue AddPtr = PtrOff;
5634         if (!isLittleEndian) {
5635           SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
5636           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5637         }
5638         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5639                                                           CallSeqStart,
5640                                                           Flags, DAG, dl);
5641
5642         // Load the slot into the register.
5643         SDValue Load =
5644             DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
5645         MemOpChains.push_back(Load.getValue(1));
5646         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5647
5648         // Done with this argument.
5649         ArgOffset += PtrByteSize;
5650         continue;
5651       }
5652
5653       // For aggregates larger than PtrByteSize, copy the pieces of the
5654       // object that fit into registers from the parameter save area.
5655       for (unsigned j=0; j<Size; j+=PtrByteSize) {
5656         SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
5657         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
5658         if (GPR_idx != NumGPRs) {
5659           SDValue Load =
5660               DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
5661           MemOpChains.push_back(Load.getValue(1));
5662           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5663           ArgOffset += PtrByteSize;
5664         } else {
5665           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
5666           break;
5667         }
5668       }
5669       continue;
5670     }
5671
5672     switch (Arg.getSimpleValueType().SimpleTy) {
5673     default: llvm_unreachable("Unexpected ValueType for argument!");
5674     case MVT::i1:
5675     case MVT::i32:
5676     case MVT::i64:
5677       if (Flags.isNest()) {
5678         // The 'nest' parameter, if any, is passed in R11.
5679         RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
5680         hasNest = true;
5681         break;
5682       }
5683
5684       // These can be scalar arguments or elements of an integer array type
5685       // passed directly.  Clang may use those instead of "byval" aggregate
5686       // types to avoid forcing arguments to memory unnecessarily.
5687       if (GPR_idx != NumGPRs) {
5688         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
5689       } else {
5690         if (CallConv == CallingConv::Fast)
5691           ComputePtrOff();
5692
5693         assert(HasParameterArea &&
5694                "Parameter area must exist to pass an argument in memory.");
5695         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5696                          true, isTailCall, false, MemOpChains,
5697                          TailCallArguments, dl);
5698         if (CallConv == CallingConv::Fast)
5699           ArgOffset += PtrByteSize;
5700       }
5701       if (CallConv != CallingConv::Fast)
5702         ArgOffset += PtrByteSize;
5703       break;
5704     case MVT::f32:
5705     case MVT::f64: {
5706       // These can be scalar arguments or elements of a float array type
5707       // passed directly.  The latter are used to implement ELFv2 homogenous
5708       // float aggregates.
5709
5710       // Named arguments go into FPRs first, and once they overflow, the
5711       // remaining arguments go into GPRs and then the parameter save area.
5712       // Unnamed arguments for vararg functions always go to GPRs and
5713       // then the parameter save area.  For now, put all arguments to vararg
5714       // routines always in both locations (FPR *and* GPR or stack slot).
5715       bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs;
5716       bool NeededLoad = false;
5717
5718       // First load the argument into the next available FPR.
5719       if (FPR_idx != NumFPRs)
5720         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
5721
5722       // Next, load the argument into GPR or stack slot if needed.
5723       if (!NeedGPROrStack)
5724         ;
5725       else if (GPR_idx != NumGPRs && CallConv != CallingConv::Fast) {
5726         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
5727         // once we support fp <-> gpr moves.
5728
5729         // In the non-vararg case, this can only ever happen in the
5730         // presence of f32 array types, since otherwise we never run
5731         // out of FPRs before running out of GPRs.
5732         SDValue ArgVal;
5733
5734         // Double values are always passed in a single GPR.
5735         if (Arg.getValueType() != MVT::f32) {
5736           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
5737
5738         // Non-array float values are extended and passed in a GPR.
5739         } else if (!Flags.isInConsecutiveRegs()) {
5740           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
5741           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
5742
5743         // If we have an array of floats, we collect every odd element
5744         // together with its predecessor into one GPR.
5745         } else if (ArgOffset % PtrByteSize != 0) {
5746           SDValue Lo, Hi;
5747           Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
5748           Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
5749           if (!isLittleEndian)
5750             std::swap(Lo, Hi);
5751           ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
5752
5753         // The final element, if even, goes into the first half of a GPR.
5754         } else if (Flags.isInConsecutiveRegsLast()) {
5755           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
5756           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
5757           if (!isLittleEndian)
5758             ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
5759                                  DAG.getConstant(32, dl, MVT::i32));
5760
5761         // Non-final even elements are skipped; they will be handled
5762         // together the with subsequent argument on the next go-around.
5763         } else
5764           ArgVal = SDValue();
5765
5766         if (ArgVal.getNode())
5767           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
5768       } else {
5769         if (CallConv == CallingConv::Fast)
5770           ComputePtrOff();
5771
5772         // Single-precision floating-point values are mapped to the
5773         // second (rightmost) word of the stack doubleword.
5774         if (Arg.getValueType() == MVT::f32 &&
5775             !isLittleEndian && !Flags.isInConsecutiveRegs()) {
5776           SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
5777           PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
5778         }
5779
5780         assert(HasParameterArea &&
5781                "Parameter area must exist to pass an argument in memory.");
5782         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5783                          true, isTailCall, false, MemOpChains,
5784                          TailCallArguments, dl);
5785
5786         NeededLoad = true;
5787       }
5788       // When passing an array of floats, the array occupies consecutive
5789       // space in the argument area; only round up to the next doubleword
5790       // at the end of the array.  Otherwise, each float takes 8 bytes.
5791       if (CallConv != CallingConv::Fast || NeededLoad) {
5792         ArgOffset += (Arg.getValueType() == MVT::f32 &&
5793                       Flags.isInConsecutiveRegs()) ? 4 : 8;
5794         if (Flags.isInConsecutiveRegsLast())
5795           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
5796       }
5797       break;
5798     }
5799     case MVT::v4f32:
5800     case MVT::v4i32:
5801     case MVT::v8i16:
5802     case MVT::v16i8:
5803     case MVT::v2f64:
5804     case MVT::v2i64:
5805     case MVT::v1i128:
5806       if (!Subtarget.hasQPX()) {
5807       // These can be scalar arguments or elements of a vector array type
5808       // passed directly.  The latter are used to implement ELFv2 homogenous
5809       // vector aggregates.
5810
5811       // For a varargs call, named arguments go into VRs or on the stack as
5812       // usual; unnamed arguments always go to the stack or the corresponding
5813       // GPRs when within range.  For now, we always put the value in both
5814       // locations (or even all three).
5815       if (isVarArg) {
5816         assert(HasParameterArea &&
5817                "Parameter area must exist if we have a varargs call.");
5818         // We could elide this store in the case where the object fits
5819         // entirely in R registers.  Maybe later.
5820         SDValue Store =
5821             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
5822         MemOpChains.push_back(Store);
5823         if (VR_idx != NumVRs) {
5824           SDValue Load =
5825               DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
5826           MemOpChains.push_back(Load.getValue(1));
5827           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
5828         }
5829         ArgOffset += 16;
5830         for (unsigned i=0; i<16; i+=PtrByteSize) {
5831           if (GPR_idx == NumGPRs)
5832             break;
5833           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
5834                                    DAG.getConstant(i, dl, PtrVT));
5835           SDValue Load =
5836               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
5837           MemOpChains.push_back(Load.getValue(1));
5838           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5839         }
5840         break;
5841       }
5842
5843       // Non-varargs Altivec params go into VRs or on the stack.
5844       if (VR_idx != NumVRs) {
5845         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
5846       } else {
5847         if (CallConv == CallingConv::Fast)
5848           ComputePtrOff();
5849
5850         assert(HasParameterArea &&
5851                "Parameter area must exist to pass an argument in memory.");
5852         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5853                          true, isTailCall, true, MemOpChains,
5854                          TailCallArguments, dl);
5855         if (CallConv == CallingConv::Fast)
5856           ArgOffset += 16;
5857       }
5858
5859       if (CallConv != CallingConv::Fast)
5860         ArgOffset += 16;
5861       break;
5862       } // not QPX
5863
5864       assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 &&
5865              "Invalid QPX parameter type");
5866
5867       /* fall through */
5868     case MVT::v4f64:
5869     case MVT::v4i1: {
5870       bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
5871       if (isVarArg) {
5872         assert(HasParameterArea &&
5873                "Parameter area must exist if we have a varargs call.");
5874         // We could elide this store in the case where the object fits
5875         // entirely in R registers.  Maybe later.
5876         SDValue Store =
5877             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
5878         MemOpChains.push_back(Store);
5879         if (QFPR_idx != NumQFPRs) {
5880           SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl, Store,
5881                                      PtrOff, MachinePointerInfo());
5882           MemOpChains.push_back(Load.getValue(1));
5883           RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load));
5884         }
5885         ArgOffset += (IsF32 ? 16 : 32);
5886         for (unsigned i = 0; i < (IsF32 ? 16U : 32U); i += PtrByteSize) {
5887           if (GPR_idx == NumGPRs)
5888             break;
5889           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
5890                                    DAG.getConstant(i, dl, PtrVT));
5891           SDValue Load =
5892               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
5893           MemOpChains.push_back(Load.getValue(1));
5894           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5895         }
5896         break;
5897       }
5898
5899       // Non-varargs QPX params go into registers or on the stack.
5900       if (QFPR_idx != NumQFPRs) {
5901         RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg));
5902       } else {
5903         if (CallConv == CallingConv::Fast)
5904           ComputePtrOff();
5905
5906         assert(HasParameterArea &&
5907                "Parameter area must exist to pass an argument in memory.");
5908         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5909                          true, isTailCall, true, MemOpChains,
5910                          TailCallArguments, dl);
5911         if (CallConv == CallingConv::Fast)
5912           ArgOffset += (IsF32 ? 16 : 32);
5913       }
5914
5915       if (CallConv != CallingConv::Fast)
5916         ArgOffset += (IsF32 ? 16 : 32);
5917       break;
5918       }
5919     }
5920   }
5921
5922   assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
5923          "mismatch in size of parameter area");
5924   (void)NumBytesActuallyUsed;
5925
5926   if (!MemOpChains.empty())
5927     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5928
5929   // Check if this is an indirect call (MTCTR/BCTRL).
5930   // See PrepareCall() for more information about calls through function
5931   // pointers in the 64-bit SVR4 ABI.
5932   if (!isTailCall && !isPatchPoint &&
5933       !isFunctionGlobalAddress(Callee) &&
5934       !isa<ExternalSymbolSDNode>(Callee)) {
5935     // Load r2 into a virtual register and store it to the TOC save area.
5936     setUsesTOCBasePtr(DAG);
5937     SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
5938     // TOC save area offset.
5939     unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5940     SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5941     SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5942     Chain = DAG.getStore(
5943         Val.getValue(1), dl, Val, AddPtr,
5944         MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
5945     // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
5946     // This does not mean the MTCTR instruction must use R12; it's easier
5947     // to model this as an extra parameter, so do that.
5948     if (isELFv2ABI && !isPatchPoint)
5949       RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
5950   }
5951
5952   // Build a sequence of copy-to-reg nodes chained together with token chain
5953   // and flag operands which copy the outgoing args into the appropriate regs.
5954   SDValue InFlag;
5955   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5956     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5957                              RegsToPass[i].second, InFlag);
5958     InFlag = Chain.getValue(1);
5959   }
5960
5961   if (isTailCall && !IsSibCall)
5962     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5963                     TailCallArguments);
5964
5965   return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, hasNest,
5966                     DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee,
5967                     SPDiff, NumBytes, Ins, InVals, CS);
5968 }
5969
5970 SDValue PPCTargetLowering::LowerCall_Darwin(
5971     SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
5972     bool isTailCall, bool isPatchPoint,
5973     const SmallVectorImpl<ISD::OutputArg> &Outs,
5974     const SmallVectorImpl<SDValue> &OutVals,
5975     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5976     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5977     ImmutableCallSite *CS) const {
5978   unsigned NumOps = Outs.size();
5979
5980   EVT PtrVT = getPointerTy(DAG.getDataLayout());
5981   bool isPPC64 = PtrVT == MVT::i64;
5982   unsigned PtrByteSize = isPPC64 ? 8 : 4;
5983
5984   MachineFunction &MF = DAG.getMachineFunction();
5985
5986   // Mark this function as potentially containing a function that contains a
5987   // tail call. As a consequence the frame pointer will be used for dynamicalloc
5988   // and restoring the callers stack pointer in this functions epilog. This is
5989   // done because by tail calling the called function might overwrite the value
5990   // in this function's (MF) stack pointer stack slot 0(SP).
5991   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5992       CallConv == CallingConv::Fast)
5993     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5994
5995   // Count how many bytes are to be pushed on the stack, including the linkage
5996   // area, and parameter passing area.  We start with 24/48 bytes, which is
5997   // prereserved space for [SP][CR][LR][3 x unused].
5998   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5999   unsigned NumBytes = LinkageSize;
6000
6001   // Add up all the space actually used.
6002   // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
6003   // they all go in registers, but we must reserve stack space for them for
6004   // possible use by the caller.  In varargs or 64-bit calls, parameters are
6005   // assigned stack space in order, with padding so Altivec parameters are
6006   // 16-byte aligned.
6007   unsigned nAltivecParamsAtEnd = 0;
6008   for (unsigned i = 0; i != NumOps; ++i) {
6009     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6010     EVT ArgVT = Outs[i].VT;
6011     // Varargs Altivec parameters are padded to a 16 byte boundary.
6012     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
6013         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
6014         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
6015       if (!isVarArg && !isPPC64) {
6016         // Non-varargs Altivec parameters go after all the non-Altivec
6017         // parameters; handle those later so we know how much padding we need.
6018         nAltivecParamsAtEnd++;
6019         continue;
6020       }
6021       // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
6022       NumBytes = ((NumBytes+15)/16)*16;
6023     }
6024     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6025   }
6026
6027   // Allow for Altivec parameters at the end, if needed.
6028   if (nAltivecParamsAtEnd) {
6029     NumBytes = ((NumBytes+15)/16)*16;
6030     NumBytes += 16*nAltivecParamsAtEnd;
6031   }
6032
6033   // The prolog code of the callee may store up to 8 GPR argument registers to
6034   // the stack, allowing va_start to index over them in memory if its varargs.
6035   // Because we cannot tell if this is needed on the caller side, we have to
6036   // conservatively assume that it is needed.  As such, make sure we have at
6037   // least enough stack space for the caller to store the 8 GPRs.
6038   NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6039
6040   // Tail call needs the stack to be aligned.
6041   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6042       CallConv == CallingConv::Fast)
6043     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6044
6045   // Calculate by how many bytes the stack has to be adjusted in case of tail
6046   // call optimization.
6047   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
6048
6049   // To protect arguments on the stack from being clobbered in a tail call,
6050   // force all the loads to happen before doing any other lowering.
6051   if (isTailCall)
6052     Chain = DAG.getStackArgumentTokenFactor(Chain);
6053
6054   // Adjust the stack pointer for the new arguments...
6055   // These operations are automatically eliminated by the prolog/epilog pass
6056   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6057   SDValue CallSeqStart = Chain;
6058
6059   // Load the return address and frame pointer so it can be move somewhere else
6060   // later.
6061   SDValue LROp, FPOp;
6062   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6063
6064   // Set up a copy of the stack pointer for use loading and storing any
6065   // arguments that may not fit in the registers available for argument
6066   // passing.
6067   SDValue StackPtr;
6068   if (isPPC64)
6069     StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6070   else
6071     StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6072
6073   // Figure out which arguments are going to go in registers, and which in
6074   // memory.  Also, if this is a vararg function, floating point operations
6075   // must be stored to our stack, and loaded into integer regs as well, if
6076   // any integer regs are available for argument passing.
6077   unsigned ArgOffset = LinkageSize;
6078   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6079
6080   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
6081     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6082     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
6083   };
6084   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
6085     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6086     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6087   };
6088   static const MCPhysReg VR[] = {
6089     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6090     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6091   };
6092   const unsigned NumGPRs = array_lengthof(GPR_32);
6093   const unsigned NumFPRs = 13;
6094   const unsigned NumVRs  = array_lengthof(VR);
6095
6096   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
6097
6098   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6099   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6100
6101   SmallVector<SDValue, 8> MemOpChains;
6102   for (unsigned i = 0; i != NumOps; ++i) {
6103     SDValue Arg = OutVals[i];
6104     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6105
6106     // PtrOff will be used to store the current argument to the stack if a
6107     // register cannot be found for it.
6108     SDValue PtrOff;
6109
6110     PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6111
6112     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6113
6114     // On PPC64, promote integers to 64-bit values.
6115     if (isPPC64 && Arg.getValueType() == MVT::i32) {
6116       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6117       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6118       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6119     }
6120
6121     // FIXME memcpy is used way more than necessary.  Correctness first.
6122     // Note: "by value" is code for passing a structure by value, not
6123     // basic types.
6124     if (Flags.isByVal()) {
6125       unsigned Size = Flags.getByValSize();
6126       // Very small objects are passed right-justified.  Everything else is
6127       // passed left-justified.
6128       if (Size==1 || Size==2) {
6129         EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
6130         if (GPR_idx != NumGPRs) {
6131           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6132                                         MachinePointerInfo(), VT);
6133           MemOpChains.push_back(Load.getValue(1));
6134           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6135
6136           ArgOffset += PtrByteSize;
6137         } else {
6138           SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6139                                           PtrOff.getValueType());
6140           SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6141           Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6142                                                             CallSeqStart,
6143                                                             Flags, DAG, dl);
6144           ArgOffset += PtrByteSize;
6145         }
6146         continue;
6147       }
6148       // Copy entire object into memory.  There are cases where gcc-generated
6149       // code assumes it is there, even if it could be put entirely into
6150       // registers.  (This is not what the doc says.)
6151       Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6152                                                         CallSeqStart,
6153                                                         Flags, DAG, dl);
6154
6155       // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
6156       // copy the pieces of the object that fit into registers from the
6157       // parameter save area.
6158       for (unsigned j=0; j<Size; j+=PtrByteSize) {
6159         SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6160         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6161         if (GPR_idx != NumGPRs) {
6162           SDValue Load =
6163               DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
6164           MemOpChains.push_back(Load.getValue(1));
6165           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6166           ArgOffset += PtrByteSize;
6167         } else {
6168           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6169           break;
6170         }
6171       }
6172       continue;
6173     }
6174
6175     switch (Arg.getSimpleValueType().SimpleTy) {
6176     default: llvm_unreachable("Unexpected ValueType for argument!");
6177     case MVT::i1:
6178     case MVT::i32:
6179     case MVT::i64:
6180       if (GPR_idx != NumGPRs) {
6181         if (Arg.getValueType() == MVT::i1)
6182           Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
6183
6184         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6185       } else {
6186         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6187                          isPPC64, isTailCall, false, MemOpChains,
6188                          TailCallArguments, dl);
6189       }
6190       ArgOffset += PtrByteSize;
6191       break;
6192     case MVT::f32:
6193     case MVT::f64:
6194       if (FPR_idx != NumFPRs) {
6195         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6196
6197         if (isVarArg) {
6198           SDValue Store =
6199               DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6200           MemOpChains.push_back(Store);
6201
6202           // Float varargs are always shadowed in available integer registers
6203           if (GPR_idx != NumGPRs) {
6204             SDValue Load =
6205                 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6206             MemOpChains.push_back(Load.getValue(1));
6207             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6208           }
6209           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
6210             SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6211             PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6212             SDValue Load =
6213                 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6214             MemOpChains.push_back(Load.getValue(1));
6215             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6216           }
6217         } else {
6218           // If we have any FPRs remaining, we may also have GPRs remaining.
6219           // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
6220           // GPRs.
6221           if (GPR_idx != NumGPRs)
6222             ++GPR_idx;
6223           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
6224               !isPPC64)  // PPC64 has 64-bit GPR's obviously :)
6225             ++GPR_idx;
6226         }
6227       } else
6228         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6229                          isPPC64, isTailCall, false, MemOpChains,
6230                          TailCallArguments, dl);
6231       if (isPPC64)
6232         ArgOffset += 8;
6233       else
6234         ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
6235       break;
6236     case MVT::v4f32:
6237     case MVT::v4i32:
6238     case MVT::v8i16:
6239     case MVT::v16i8:
6240       if (isVarArg) {
6241         // These go aligned on the stack, or in the corresponding R registers
6242         // when within range.  The Darwin PPC ABI doc claims they also go in
6243         // V registers; in fact gcc does this only for arguments that are
6244         // prototyped, not for those that match the ...  We do it for all
6245         // arguments, seems to work.
6246         while (ArgOffset % 16 !=0) {
6247           ArgOffset += PtrByteSize;
6248           if (GPR_idx != NumGPRs)
6249             GPR_idx++;
6250         }
6251         // We could elide this store in the case where the object fits
6252         // entirely in R registers.  Maybe later.
6253         PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
6254                              DAG.getConstant(ArgOffset, dl, PtrVT));
6255         SDValue Store =
6256             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6257         MemOpChains.push_back(Store);
6258         if (VR_idx != NumVRs) {
6259           SDValue Load =
6260               DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6261           MemOpChains.push_back(Load.getValue(1));
6262           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6263         }
6264         ArgOffset += 16;
6265         for (unsigned i=0; i<16; i+=PtrByteSize) {
6266           if (GPR_idx == NumGPRs)
6267             break;
6268           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6269                                    DAG.getConstant(i, dl, PtrVT));
6270           SDValue Load =
6271               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6272           MemOpChains.push_back(Load.getValue(1));
6273           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6274         }
6275         break;
6276       }
6277
6278       // Non-varargs Altivec params generally go in registers, but have
6279       // stack space allocated at the end.
6280       if (VR_idx != NumVRs) {
6281         // Doesn't have GPR space allocated.
6282         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6283       } else if (nAltivecParamsAtEnd==0) {
6284         // We are emitting Altivec params in order.
6285         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6286                          isPPC64, isTailCall, true, MemOpChains,
6287                          TailCallArguments, dl);
6288         ArgOffset += 16;
6289       }
6290       break;
6291     }
6292   }
6293   // If all Altivec parameters fit in registers, as they usually do,
6294   // they get stack space following the non-Altivec parameters.  We
6295   // don't track this here because nobody below needs it.
6296   // If there are more Altivec parameters than fit in registers emit
6297   // the stores here.
6298   if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
6299     unsigned j = 0;
6300     // Offset is aligned; skip 1st 12 params which go in V registers.
6301     ArgOffset = ((ArgOffset+15)/16)*16;
6302     ArgOffset += 12*16;
6303     for (unsigned i = 0; i != NumOps; ++i) {
6304       SDValue Arg = OutVals[i];
6305       EVT ArgType = Outs[i].VT;
6306       if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
6307           ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
6308         if (++j > NumVRs) {
6309           SDValue PtrOff;
6310           // We are emitting Altivec params in order.
6311           LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6312                            isPPC64, isTailCall, true, MemOpChains,
6313                            TailCallArguments, dl);
6314           ArgOffset += 16;
6315         }
6316       }
6317     }
6318   }
6319
6320   if (!MemOpChains.empty())
6321     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6322
6323   // On Darwin, R12 must contain the address of an indirect callee.  This does
6324   // not mean the MTCTR instruction must use R12; it's easier to model this as
6325   // an extra parameter, so do that.
6326   if (!isTailCall &&
6327       !isFunctionGlobalAddress(Callee) &&
6328       !isa<ExternalSymbolSDNode>(Callee) &&
6329       !isBLACompatibleAddress(Callee, DAG))
6330     RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
6331                                                    PPC::R12), Callee));
6332
6333   // Build a sequence of copy-to-reg nodes chained together with token chain
6334   // and flag operands which copy the outgoing args into the appropriate regs.
6335   SDValue InFlag;
6336   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6337     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6338                              RegsToPass[i].second, InFlag);
6339     InFlag = Chain.getValue(1);
6340   }
6341
6342   if (isTailCall)
6343     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6344                     TailCallArguments);
6345
6346   return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
6347                     /* unused except on PPC64 ELFv1 */ false, DAG,
6348                     RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
6349                     NumBytes, Ins, InVals, CS);
6350 }
6351
6352 bool
6353 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
6354                                   MachineFunction &MF, bool isVarArg,
6355                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
6356                                   LLVMContext &Context) const {
6357   SmallVector<CCValAssign, 16> RVLocs;
6358   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
6359   return CCInfo.CheckReturn(Outs, RetCC_PPC);
6360 }
6361
6362 SDValue
6363 PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
6364                                bool isVarArg,
6365                                const SmallVectorImpl<ISD::OutputArg> &Outs,
6366                                const SmallVectorImpl<SDValue> &OutVals,
6367                                const SDLoc &dl, SelectionDAG &DAG) const {
6368   SmallVector<CCValAssign, 16> RVLocs;
6369   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
6370                  *DAG.getContext());
6371   CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
6372
6373   SDValue Flag;
6374   SmallVector<SDValue, 4> RetOps(1, Chain);
6375
6376   // Copy the result values into the output registers.
6377   for (unsigned i = 0; i != RVLocs.size(); ++i) {
6378     CCValAssign &VA = RVLocs[i];
6379     assert(VA.isRegLoc() && "Can only return in registers!");
6380
6381     SDValue Arg = OutVals[i];
6382
6383     switch (VA.getLocInfo()) {
6384     default: llvm_unreachable("Unknown loc info!");
6385     case CCValAssign::Full: break;
6386     case CCValAssign::AExt:
6387       Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
6388       break;
6389     case CCValAssign::ZExt:
6390       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
6391       break;
6392     case CCValAssign::SExt:
6393       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
6394       break;
6395     }
6396
6397     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
6398     Flag = Chain.getValue(1);
6399     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
6400   }
6401
6402   const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
6403   const MCPhysReg *I =
6404     TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
6405   if (I) {
6406     for (; *I; ++I) {
6407
6408       if (PPC::G8RCRegClass.contains(*I))
6409         RetOps.push_back(DAG.getRegister(*I, MVT::i64));
6410       else if (PPC::F8RCRegClass.contains(*I))
6411         RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
6412       else if (PPC::CRRCRegClass.contains(*I))
6413         RetOps.push_back(DAG.getRegister(*I, MVT::i1));
6414       else if (PPC::VRRCRegClass.contains(*I))
6415         RetOps.push_back(DAG.getRegister(*I, MVT::Other));
6416       else
6417         llvm_unreachable("Unexpected register class in CSRsViaCopy!");
6418     }
6419   }
6420
6421   RetOps[0] = Chain;  // Update chain.
6422
6423   // Add the flag if we have it.
6424   if (Flag.getNode())
6425     RetOps.push_back(Flag);
6426
6427   return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
6428 }
6429
6430 SDValue
6431 PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
6432                                                 SelectionDAG &DAG) const {
6433   SDLoc dl(Op);
6434
6435   // Get the correct type for integers.
6436   EVT IntVT = Op.getValueType();
6437
6438   // Get the inputs.
6439   SDValue Chain = Op.getOperand(0);
6440   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
6441   // Build a DYNAREAOFFSET node.
6442   SDValue Ops[2] = {Chain, FPSIdx};
6443   SDVTList VTs = DAG.getVTList(IntVT);
6444   return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
6445 }
6446
6447 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
6448                                              SelectionDAG &DAG) const {
6449   // When we pop the dynamic allocation we need to restore the SP link.
6450   SDLoc dl(Op);
6451
6452   // Get the correct type for pointers.
6453   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6454
6455   // Construct the stack pointer operand.
6456   bool isPPC64 = Subtarget.isPPC64();
6457   unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
6458   SDValue StackPtr = DAG.getRegister(SP, PtrVT);
6459
6460   // Get the operands for the STACKRESTORE.
6461   SDValue Chain = Op.getOperand(0);
6462   SDValue SaveSP = Op.getOperand(1);
6463
6464   // Load the old link SP.
6465   SDValue LoadLinkSP =
6466       DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
6467
6468   // Restore the stack pointer.
6469   Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
6470
6471   // Store the old link SP.
6472   return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
6473 }
6474
6475 SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
6476   MachineFunction &MF = DAG.getMachineFunction();
6477   bool isPPC64 = Subtarget.isPPC64();
6478   EVT PtrVT = getPointerTy(MF.getDataLayout());
6479
6480   // Get current frame pointer save index.  The users of this index will be
6481   // primarily DYNALLOC instructions.
6482   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
6483   int RASI = FI->getReturnAddrSaveIndex();
6484
6485   // If the frame pointer save index hasn't been defined yet.
6486   if (!RASI) {
6487     // Find out what the fix offset of the frame pointer save area.
6488     int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
6489     // Allocate the frame index for frame pointer save area.
6490     RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
6491     // Save the result.
6492     FI->setReturnAddrSaveIndex(RASI);
6493   }
6494   return DAG.getFrameIndex(RASI, PtrVT);
6495 }
6496
6497 SDValue
6498 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
6499   MachineFunction &MF = DAG.getMachineFunction();
6500   bool isPPC64 = Subtarget.isPPC64();
6501   EVT PtrVT = getPointerTy(MF.getDataLayout());
6502
6503   // Get current frame pointer save index.  The users of this index will be
6504   // primarily DYNALLOC instructions.
6505   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
6506   int FPSI = FI->getFramePointerSaveIndex();
6507
6508   // If the frame pointer save index hasn't been defined yet.
6509   if (!FPSI) {
6510     // Find out what the fix offset of the frame pointer save area.
6511     int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
6512     // Allocate the frame index for frame pointer save area.
6513     FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
6514     // Save the result.
6515     FI->setFramePointerSaveIndex(FPSI);
6516   }
6517   return DAG.getFrameIndex(FPSI, PtrVT);
6518 }
6519
6520 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
6521                                                    SelectionDAG &DAG) const {
6522   // Get the inputs.
6523   SDValue Chain = Op.getOperand(0);
6524   SDValue Size  = Op.getOperand(1);
6525   SDLoc dl(Op);
6526
6527   // Get the correct type for pointers.
6528   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6529   // Negate the size.
6530   SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
6531                                 DAG.getConstant(0, dl, PtrVT), Size);
6532   // Construct a node for the frame pointer save index.
6533   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
6534   // Build a DYNALLOC node.
6535   SDValue Ops[3] = { Chain, NegSize, FPSIdx };
6536   SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
6537   return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
6538 }
6539
6540 SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
6541                                                      SelectionDAG &DAG) const {
6542   MachineFunction &MF = DAG.getMachineFunction();
6543
6544   bool isPPC64 = Subtarget.isPPC64();
6545   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6546
6547   int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
6548   return DAG.getFrameIndex(FI, PtrVT);
6549 }
6550
6551 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
6552                                                SelectionDAG &DAG) const {
6553   SDLoc DL(Op);
6554   return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
6555                      DAG.getVTList(MVT::i32, MVT::Other),
6556                      Op.getOperand(0), Op.getOperand(1));
6557 }
6558
6559 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
6560                                                 SelectionDAG &DAG) const {
6561   SDLoc DL(Op);
6562   return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
6563                      Op.getOperand(0), Op.getOperand(1));
6564 }
6565
6566 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
6567   if (Op.getValueType().isVector())
6568     return LowerVectorLoad(Op, DAG);
6569
6570   assert(Op.getValueType() == MVT::i1 &&
6571          "Custom lowering only for i1 loads");
6572
6573   // First, load 8 bits into 32 bits, then truncate to 1 bit.
6574
6575   SDLoc dl(Op);
6576   LoadSDNode *LD = cast<LoadSDNode>(Op);
6577
6578   SDValue Chain = LD->getChain();
6579   SDValue BasePtr = LD->getBasePtr();
6580   MachineMemOperand *MMO = LD->getMemOperand();
6581
6582   SDValue NewLD =
6583       DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
6584                      BasePtr, MVT::i8, MMO);
6585   SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
6586
6587   SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
6588   return DAG.getMergeValues(Ops, dl);
6589 }
6590
6591 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
6592   if (Op.getOperand(1).getValueType().isVector())
6593     return LowerVectorStore(Op, DAG);
6594
6595   assert(Op.getOperand(1).getValueType() == MVT::i1 &&
6596          "Custom lowering only for i1 stores");
6597
6598   // First, zero extend to 32 bits, then use a truncating store to 8 bits.
6599
6600   SDLoc dl(Op);
6601   StoreSDNode *ST = cast<StoreSDNode>(Op);
6602
6603   SDValue Chain = ST->getChain();
6604   SDValue BasePtr = ST->getBasePtr();
6605   SDValue Value = ST->getValue();
6606   MachineMemOperand *MMO = ST->getMemOperand();
6607
6608   Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()),
6609                       Value);
6610   return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
6611 }
6612
6613 // FIXME: Remove this once the ANDI glue bug is fixed:
6614 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
6615   assert(Op.getValueType() == MVT::i1 &&
6616          "Custom lowering only for i1 results");
6617
6618   SDLoc DL(Op);
6619   return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1,
6620                      Op.getOperand(0));
6621 }
6622
6623 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
6624 /// possible.
6625 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
6626   // Not FP? Not a fsel.
6627   if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
6628       !Op.getOperand(2).getValueType().isFloatingPoint())
6629     return Op;
6630
6631   // We might be able to do better than this under some circumstances, but in
6632   // general, fsel-based lowering of select is a finite-math-only optimization.
6633   // For more information, see section F.3 of the 2.06 ISA specification.
6634   if (!DAG.getTarget().Options.NoInfsFPMath ||
6635       !DAG.getTarget().Options.NoNaNsFPMath)
6636     return Op;
6637   // TODO: Propagate flags from the select rather than global settings.
6638   SDNodeFlags Flags;
6639   Flags.setNoInfs(true);
6640   Flags.setNoNaNs(true);
6641
6642   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
6643
6644   EVT ResVT = Op.getValueType();
6645   EVT CmpVT = Op.getOperand(0).getValueType();
6646   SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
6647   SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
6648   SDLoc dl(Op);
6649
6650   // If the RHS of the comparison is a 0.0, we don't need to do the
6651   // subtraction at all.
6652   SDValue Sel1;
6653   if (isFloatingPointZero(RHS))
6654     switch (CC) {
6655     default: break;       // SETUO etc aren't handled by fsel.
6656     case ISD::SETNE:
6657       std::swap(TV, FV);
6658       LLVM_FALLTHROUGH;
6659     case ISD::SETEQ:
6660       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
6661         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
6662       Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
6663       if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
6664         Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
6665       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
6666                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
6667     case ISD::SETULT:
6668     case ISD::SETLT:
6669       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
6670       LLVM_FALLTHROUGH;
6671     case ISD::SETOGE:
6672     case ISD::SETGE:
6673       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
6674         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
6675       return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
6676     case ISD::SETUGT:
6677     case ISD::SETGT:
6678       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
6679       LLVM_FALLTHROUGH;
6680     case ISD::SETOLE:
6681     case ISD::SETLE:
6682       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
6683         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
6684       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
6685                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
6686     }
6687
6688   SDValue Cmp;
6689   switch (CC) {
6690   default: break;       // SETUO etc aren't handled by fsel.
6691   case ISD::SETNE:
6692     std::swap(TV, FV);
6693     LLVM_FALLTHROUGH;
6694   case ISD::SETEQ:
6695     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
6696     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6697       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6698     Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
6699     if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
6700       Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
6701     return DAG.getNode(PPCISD::FSEL, dl, ResVT,
6702                        DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
6703   case ISD::SETULT:
6704   case ISD::SETLT:
6705     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
6706     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6707       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6708     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
6709   case ISD::SETOGE:
6710   case ISD::SETGE:
6711     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
6712     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6713       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6714     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
6715   case ISD::SETUGT:
6716   case ISD::SETGT:
6717     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
6718     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6719       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6720     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
6721   case ISD::SETOLE:
6722   case ISD::SETLE:
6723     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
6724     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6725       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6726     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
6727   }
6728   return Op;
6729 }
6730
6731 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
6732                                                SelectionDAG &DAG,
6733                                                const SDLoc &dl) const {
6734   assert(Op.getOperand(0).getValueType().isFloatingPoint());
6735   SDValue Src = Op.getOperand(0);
6736   if (Src.getValueType() == MVT::f32)
6737     Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
6738
6739   SDValue Tmp;
6740   switch (Op.getSimpleValueType().SimpleTy) {
6741   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
6742   case MVT::i32:
6743     Tmp = DAG.getNode(
6744         Op.getOpcode() == ISD::FP_TO_SINT
6745             ? PPCISD::FCTIWZ
6746             : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
6747         dl, MVT::f64, Src);
6748     break;
6749   case MVT::i64:
6750     assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
6751            "i64 FP_TO_UINT is supported only with FPCVT");
6752     Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
6753                                                         PPCISD::FCTIDUZ,
6754                       dl, MVT::f64, Src);
6755     break;
6756   }
6757
6758   // Convert the FP value to an int value through memory.
6759   bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
6760     (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT());
6761   SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
6762   int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
6763   MachinePointerInfo MPI =
6764       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
6765
6766   // Emit a store to the stack slot.
6767   SDValue Chain;
6768   if (i32Stack) {
6769     MachineFunction &MF = DAG.getMachineFunction();
6770     MachineMemOperand *MMO =
6771       MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4);
6772     SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
6773     Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
6774               DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
6775   } else
6776     Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, MPI);
6777
6778   // Result is a load from the stack slot.  If loading 4 bytes, make sure to
6779   // add in a bias on big endian.
6780   if (Op.getValueType() == MVT::i32 && !i32Stack) {
6781     FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
6782                         DAG.getConstant(4, dl, FIPtr.getValueType()));
6783     MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
6784   }
6785
6786   RLI.Chain = Chain;
6787   RLI.Ptr = FIPtr;
6788   RLI.MPI = MPI;
6789 }
6790
6791 /// \brief Custom lowers floating point to integer conversions to use
6792 /// the direct move instructions available in ISA 2.07 to avoid the
6793 /// need for load/store combinations.
6794 SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
6795                                                     SelectionDAG &DAG,
6796                                                     const SDLoc &dl) const {
6797   assert(Op.getOperand(0).getValueType().isFloatingPoint());
6798   SDValue Src = Op.getOperand(0);
6799
6800   if (Src.getValueType() == MVT::f32)
6801     Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
6802
6803   SDValue Tmp;
6804   switch (Op.getSimpleValueType().SimpleTy) {
6805   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
6806   case MVT::i32:
6807     Tmp = DAG.getNode(
6808         Op.getOpcode() == ISD::FP_TO_SINT
6809             ? PPCISD::FCTIWZ
6810             : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
6811         dl, MVT::f64, Src);
6812     Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp);
6813     break;
6814   case MVT::i64:
6815     assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
6816            "i64 FP_TO_UINT is supported only with FPCVT");
6817     Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
6818                                                         PPCISD::FCTIDUZ,
6819                       dl, MVT::f64, Src);
6820     Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp);
6821     break;
6822   }
6823   return Tmp;
6824 }
6825
6826 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
6827                                           const SDLoc &dl) const {
6828   if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
6829     return LowerFP_TO_INTDirectMove(Op, DAG, dl);
6830
6831   ReuseLoadInfo RLI;
6832   LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
6833
6834   return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
6835                      RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
6836 }
6837
6838 // We're trying to insert a regular store, S, and then a load, L. If the
6839 // incoming value, O, is a load, we might just be able to have our load use the
6840 // address used by O. However, we don't know if anything else will store to
6841 // that address before we can load from it. To prevent this situation, we need
6842 // to insert our load, L, into the chain as a peer of O. To do this, we give L
6843 // the same chain operand as O, we create a token factor from the chain results
6844 // of O and L, and we replace all uses of O's chain result with that token
6845 // factor (see spliceIntoChain below for this last part).
6846 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
6847                                             ReuseLoadInfo &RLI,
6848                                             SelectionDAG &DAG,
6849                                             ISD::LoadExtType ET) const {
6850   SDLoc dl(Op);
6851   if (ET == ISD::NON_EXTLOAD &&
6852       (Op.getOpcode() == ISD::FP_TO_UINT ||
6853        Op.getOpcode() == ISD::FP_TO_SINT) &&
6854       isOperationLegalOrCustom(Op.getOpcode(),
6855                                Op.getOperand(0).getValueType())) {
6856
6857     LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
6858     return true;
6859   }
6860
6861   LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
6862   if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
6863       LD->isNonTemporal())
6864     return false;
6865   if (LD->getMemoryVT() != MemVT)
6866     return false;
6867
6868   RLI.Ptr = LD->getBasePtr();
6869   if (LD->isIndexed() && !LD->getOffset().isUndef()) {
6870     assert(LD->getAddressingMode() == ISD::PRE_INC &&
6871            "Non-pre-inc AM on PPC?");
6872     RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
6873                           LD->getOffset());
6874   }
6875
6876   RLI.Chain = LD->getChain();
6877   RLI.MPI = LD->getPointerInfo();
6878   RLI.IsDereferenceable = LD->isDereferenceable();
6879   RLI.IsInvariant = LD->isInvariant();
6880   RLI.Alignment = LD->getAlignment();
6881   RLI.AAInfo = LD->getAAInfo();
6882   RLI.Ranges = LD->getRanges();
6883
6884   RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
6885   return true;
6886 }
6887
6888 // Given the head of the old chain, ResChain, insert a token factor containing
6889 // it and NewResChain, and make users of ResChain now be users of that token
6890 // factor.
6891 // TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
6892 void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
6893                                         SDValue NewResChain,
6894                                         SelectionDAG &DAG) const {
6895   if (!ResChain)
6896     return;
6897
6898   SDLoc dl(NewResChain);
6899
6900   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
6901                            NewResChain, DAG.getUNDEF(MVT::Other));
6902   assert(TF.getNode() != NewResChain.getNode() &&
6903          "A new TF really is required here");
6904
6905   DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
6906   DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
6907 }
6908
6909 /// \brief Analyze profitability of direct move
6910 /// prefer float load to int load plus direct move
6911 /// when there is no integer use of int load
6912 bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
6913   SDNode *Origin = Op.getOperand(0).getNode();
6914   if (Origin->getOpcode() != ISD::LOAD)
6915     return true;
6916
6917   // If there is no LXSIBZX/LXSIHZX, like Power8,
6918   // prefer direct move if the memory size is 1 or 2 bytes.
6919   MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
6920   if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2)
6921     return true;
6922
6923   for (SDNode::use_iterator UI = Origin->use_begin(),
6924                             UE = Origin->use_end();
6925        UI != UE; ++UI) {
6926
6927     // Only look at the users of the loaded value.
6928     if (UI.getUse().get().getResNo() != 0)
6929       continue;
6930
6931     if (UI->getOpcode() != ISD::SINT_TO_FP &&
6932         UI->getOpcode() != ISD::UINT_TO_FP)
6933       return true;
6934   }
6935
6936   return false;
6937 }
6938
6939 /// \brief Custom lowers integer to floating point conversions to use
6940 /// the direct move instructions available in ISA 2.07 to avoid the
6941 /// need for load/store combinations.
6942 SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
6943                                                     SelectionDAG &DAG,
6944                                                     const SDLoc &dl) const {
6945   assert((Op.getValueType() == MVT::f32 ||
6946           Op.getValueType() == MVT::f64) &&
6947          "Invalid floating point type as target of conversion");
6948   assert(Subtarget.hasFPCVT() &&
6949          "Int to FP conversions with direct moves require FPCVT");
6950   SDValue FP;
6951   SDValue Src = Op.getOperand(0);
6952   bool SinglePrec = Op.getValueType() == MVT::f32;
6953   bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
6954   bool Signed = Op.getOpcode() == ISD::SINT_TO_FP;
6955   unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) :
6956                              (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU);
6957
6958   if (WordInt) {
6959     FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ,
6960                      dl, MVT::f64, Src);
6961     FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
6962   }
6963   else {
6964     FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src);
6965     FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
6966   }
6967
6968   return FP;
6969 }
6970
6971 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
6972                                           SelectionDAG &DAG) const {
6973   SDLoc dl(Op);
6974
6975   if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) {
6976     if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64)
6977       return SDValue();
6978
6979     SDValue Value = Op.getOperand(0);
6980     // The values are now known to be -1 (false) or 1 (true). To convert this
6981     // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
6982     // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
6983     Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
6984
6985     SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
6986
6987     Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
6988
6989     if (Op.getValueType() != MVT::v4f64)
6990       Value = DAG.getNode(ISD::FP_ROUND, dl,
6991                           Op.getValueType(), Value,
6992                           DAG.getIntPtrConstant(1, dl));
6993     return Value;
6994   }
6995
6996   // Don't handle ppc_fp128 here; let it be lowered to a libcall.
6997   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
6998     return SDValue();
6999
7000   if (Op.getOperand(0).getValueType() == MVT::i1)
7001     return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0),
7002                        DAG.getConstantFP(1.0, dl, Op.getValueType()),
7003                        DAG.getConstantFP(0.0, dl, Op.getValueType()));
7004
7005   // If we have direct moves, we can do all the conversion, skip the store/load
7006   // however, without FPCVT we can't do most conversions.
7007   if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
7008       Subtarget.isPPC64() && Subtarget.hasFPCVT())
7009     return LowerINT_TO_FPDirectMove(Op, DAG, dl);
7010
7011   assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
7012          "UINT_TO_FP is supported only with FPCVT");
7013
7014   // If we have FCFIDS, then use it when converting to single-precision.
7015   // Otherwise, convert to double-precision and then round.
7016   unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
7017                        ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
7018                                                             : PPCISD::FCFIDS)
7019                        : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
7020                                                             : PPCISD::FCFID);
7021   MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
7022                   ? MVT::f32
7023                   : MVT::f64;
7024
7025   if (Op.getOperand(0).getValueType() == MVT::i64) {
7026     SDValue SINT = Op.getOperand(0);
7027     // When converting to single-precision, we actually need to convert
7028     // to double-precision first and then round to single-precision.
7029     // To avoid double-rounding effects during that operation, we have
7030     // to prepare the input operand.  Bits that might be truncated when
7031     // converting to double-precision are replaced by a bit that won't
7032     // be lost at this stage, but is below the single-precision rounding
7033     // position.
7034     //
7035     // However, if -enable-unsafe-fp-math is in effect, accept double
7036     // rounding to avoid the extra overhead.
7037     if (Op.getValueType() == MVT::f32 &&
7038         !Subtarget.hasFPCVT() &&
7039         !DAG.getTarget().Options.UnsafeFPMath) {
7040
7041       // Twiddle input to make sure the low 11 bits are zero.  (If this
7042       // is the case, we are guaranteed the value will fit into the 53 bit
7043       // mantissa of an IEEE double-precision value without rounding.)
7044       // If any of those low 11 bits were not zero originally, make sure
7045       // bit 12 (value 2048) is set instead, so that the final rounding
7046       // to single-precision gets the correct result.
7047       SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
7048                                   SINT, DAG.getConstant(2047, dl, MVT::i64));
7049       Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
7050                           Round, DAG.getConstant(2047, dl, MVT::i64));
7051       Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
7052       Round = DAG.getNode(ISD::AND, dl, MVT::i64,
7053                           Round, DAG.getConstant(-2048, dl, MVT::i64));
7054
7055       // However, we cannot use that value unconditionally: if the magnitude
7056       // of the input value is small, the bit-twiddling we did above might
7057       // end up visibly changing the output.  Fortunately, in that case, we
7058       // don't need to twiddle bits since the original input will convert
7059       // exactly to double-precision floating-point already.  Therefore,
7060       // construct a conditional to use the original value if the top 11
7061       // bits are all sign-bit copies, and use the rounded value computed
7062       // above otherwise.
7063       SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
7064                                  SINT, DAG.getConstant(53, dl, MVT::i32));
7065       Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
7066                          Cond, DAG.getConstant(1, dl, MVT::i64));
7067       Cond = DAG.getSetCC(dl, MVT::i32,
7068                           Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
7069
7070       SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
7071     }
7072
7073     ReuseLoadInfo RLI;
7074     SDValue Bits;
7075
7076     MachineFunction &MF = DAG.getMachineFunction();
7077     if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
7078       Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
7079                          RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
7080       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
7081     } else if (Subtarget.hasLFIWAX() &&
7082                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
7083       MachineMemOperand *MMO =
7084         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
7085                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7086       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7087       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
7088                                      DAG.getVTList(MVT::f64, MVT::Other),
7089                                      Ops, MVT::i32, MMO);
7090       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
7091     } else if (Subtarget.hasFPCVT() &&
7092                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
7093       MachineMemOperand *MMO =
7094         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
7095                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7096       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7097       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
7098                                      DAG.getVTList(MVT::f64, MVT::Other),
7099                                      Ops, MVT::i32, MMO);
7100       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
7101     } else if (((Subtarget.hasLFIWAX() &&
7102                  SINT.getOpcode() == ISD::SIGN_EXTEND) ||
7103                 (Subtarget.hasFPCVT() &&
7104                  SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
7105                SINT.getOperand(0).getValueType() == MVT::i32) {
7106       MachineFrameInfo &MFI = MF.getFrameInfo();
7107       EVT PtrVT = getPointerTy(DAG.getDataLayout());
7108
7109       int FrameIdx = MFI.CreateStackObject(4, 4, false);
7110       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7111
7112       SDValue Store =
7113           DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx,
7114                        MachinePointerInfo::getFixedStack(
7115                            DAG.getMachineFunction(), FrameIdx));
7116
7117       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
7118              "Expected an i32 store");
7119
7120       RLI.Ptr = FIdx;
7121       RLI.Chain = Store;
7122       RLI.MPI =
7123           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
7124       RLI.Alignment = 4;
7125
7126       MachineMemOperand *MMO =
7127         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
7128                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7129       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7130       Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
7131                                      PPCISD::LFIWZX : PPCISD::LFIWAX,
7132                                      dl, DAG.getVTList(MVT::f64, MVT::Other),
7133                                      Ops, MVT::i32, MMO);
7134     } else
7135       Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
7136
7137     SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
7138
7139     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
7140       FP = DAG.getNode(ISD::FP_ROUND, dl,
7141                        MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
7142     return FP;
7143   }
7144
7145   assert(Op.getOperand(0).getValueType() == MVT::i32 &&
7146          "Unhandled INT_TO_FP type in custom expander!");
7147   // Since we only generate this in 64-bit mode, we can take advantage of
7148   // 64-bit registers.  In particular, sign extend the input value into the
7149   // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
7150   // then lfd it and fcfid it.
7151   MachineFunction &MF = DAG.getMachineFunction();
7152   MachineFrameInfo &MFI = MF.getFrameInfo();
7153   EVT PtrVT = getPointerTy(MF.getDataLayout());
7154
7155   SDValue Ld;
7156   if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
7157     ReuseLoadInfo RLI;
7158     bool ReusingLoad;
7159     if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI,
7160                                             DAG))) {
7161       int FrameIdx = MFI.CreateStackObject(4, 4, false);
7162       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7163
7164       SDValue Store =
7165           DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
7166                        MachinePointerInfo::getFixedStack(
7167                            DAG.getMachineFunction(), FrameIdx));
7168
7169       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
7170              "Expected an i32 store");
7171
7172       RLI.Ptr = FIdx;
7173       RLI.Chain = Store;
7174       RLI.MPI =
7175           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
7176       RLI.Alignment = 4;
7177     }
7178
7179     MachineMemOperand *MMO =
7180       MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
7181                               RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7182     SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7183     Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
7184                                    PPCISD::LFIWZX : PPCISD::LFIWAX,
7185                                  dl, DAG.getVTList(MVT::f64, MVT::Other),
7186                                  Ops, MVT::i32, MMO);
7187     if (ReusingLoad)
7188       spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
7189   } else {
7190     assert(Subtarget.isPPC64() &&
7191            "i32->FP without LFIWAX supported only on PPC64");
7192
7193     int FrameIdx = MFI.CreateStackObject(8, 8, false);
7194     SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7195
7196     SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
7197                                 Op.getOperand(0));
7198
7199     // STD the extended value into the stack slot.
7200     SDValue Store = DAG.getStore(
7201         DAG.getEntryNode(), dl, Ext64, FIdx,
7202         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
7203
7204     // Load the value as a double.
7205     Ld = DAG.getLoad(
7206         MVT::f64, dl, Store, FIdx,
7207         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
7208   }
7209
7210   // FCFID it and return it.
7211   SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
7212   if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
7213     FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
7214                      DAG.getIntPtrConstant(0, dl));
7215   return FP;
7216 }
7217
7218 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
7219                                             SelectionDAG &DAG) const {
7220   SDLoc dl(Op);
7221   /*
7222    The rounding mode is in bits 30:31 of FPSR, and has the following
7223    settings:
7224      00 Round to nearest
7225      01 Round to 0
7226      10 Round to +inf
7227      11 Round to -inf
7228
7229   FLT_ROUNDS, on the other hand, expects the following:
7230     -1 Undefined
7231      0 Round to 0
7232      1 Round to nearest
7233      2 Round to +inf
7234      3 Round to -inf
7235
7236   To perform the conversion, we do:
7237     ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
7238   */
7239
7240   MachineFunction &MF = DAG.getMachineFunction();
7241   EVT VT = Op.getValueType();
7242   EVT PtrVT = getPointerTy(MF.getDataLayout());
7243
7244   // Save FP Control Word to register
7245   EVT NodeTys[] = {
7246     MVT::f64,    // return register
7247     MVT::Glue    // unused in this context
7248   };
7249   SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None);
7250
7251   // Save FP register to stack slot
7252   int SSFI = MF.getFrameInfo().CreateStackObject(8, 8, false);
7253   SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
7254   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, StackSlot,
7255                                MachinePointerInfo());
7256
7257   // Load FP Control Word from low 32 bits of stack slot.
7258   SDValue Four = DAG.getConstant(4, dl, PtrVT);
7259   SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
7260   SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo());
7261
7262   // Transform as necessary
7263   SDValue CWD1 =
7264     DAG.getNode(ISD::AND, dl, MVT::i32,
7265                 CWD, DAG.getConstant(3, dl, MVT::i32));
7266   SDValue CWD2 =
7267     DAG.getNode(ISD::SRL, dl, MVT::i32,
7268                 DAG.getNode(ISD::AND, dl, MVT::i32,
7269                             DAG.getNode(ISD::XOR, dl, MVT::i32,
7270                                         CWD, DAG.getConstant(3, dl, MVT::i32)),
7271                             DAG.getConstant(3, dl, MVT::i32)),
7272                 DAG.getConstant(1, dl, MVT::i32));
7273
7274   SDValue RetVal =
7275     DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
7276
7277   return DAG.getNode((VT.getSizeInBits() < 16 ?
7278                       ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
7279 }
7280
7281 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
7282   EVT VT = Op.getValueType();
7283   unsigned BitWidth = VT.getSizeInBits();
7284   SDLoc dl(Op);
7285   assert(Op.getNumOperands() == 3 &&
7286          VT == Op.getOperand(1).getValueType() &&
7287          "Unexpected SHL!");
7288
7289   // Expand into a bunch of logical ops.  Note that these ops
7290   // depend on the PPC behavior for oversized shift amounts.
7291   SDValue Lo = Op.getOperand(0);
7292   SDValue Hi = Op.getOperand(1);
7293   SDValue Amt = Op.getOperand(2);
7294   EVT AmtVT = Amt.getValueType();
7295
7296   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
7297                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
7298   SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
7299   SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
7300   SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
7301   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
7302                              DAG.getConstant(-BitWidth, dl, AmtVT));
7303   SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
7304   SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
7305   SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
7306   SDValue OutOps[] = { OutLo, OutHi };
7307   return DAG.getMergeValues(OutOps, dl);
7308 }
7309
7310 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
7311   EVT VT = Op.getValueType();
7312   SDLoc dl(Op);
7313   unsigned BitWidth = VT.getSizeInBits();
7314   assert(Op.getNumOperands() == 3 &&
7315          VT == Op.getOperand(1).getValueType() &&
7316          "Unexpected SRL!");
7317
7318   // Expand into a bunch of logical ops.  Note that these ops
7319   // depend on the PPC behavior for oversized shift amounts.
7320   SDValue Lo = Op.getOperand(0);
7321   SDValue Hi = Op.getOperand(1);
7322   SDValue Amt = Op.getOperand(2);
7323   EVT AmtVT = Amt.getValueType();
7324
7325   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
7326                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
7327   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
7328   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
7329   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
7330   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
7331                              DAG.getConstant(-BitWidth, dl, AmtVT));
7332   SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
7333   SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
7334   SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
7335   SDValue OutOps[] = { OutLo, OutHi };
7336   return DAG.getMergeValues(OutOps, dl);
7337 }
7338
7339 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
7340   SDLoc dl(Op);
7341   EVT VT = Op.getValueType();
7342   unsigned BitWidth = VT.getSizeInBits();
7343   assert(Op.getNumOperands() == 3 &&
7344          VT == Op.getOperand(1).getValueType() &&
7345          "Unexpected SRA!");
7346
7347   // Expand into a bunch of logical ops, followed by a select_cc.
7348   SDValue Lo = Op.getOperand(0);
7349   SDValue Hi = Op.getOperand(1);
7350   SDValue Amt = Op.getOperand(2);
7351   EVT AmtVT = Amt.getValueType();
7352
7353   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
7354                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
7355   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
7356   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
7357   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
7358   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
7359                              DAG.getConstant(-BitWidth, dl, AmtVT));
7360   SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
7361   SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
7362   SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
7363                                   Tmp4, Tmp6, ISD::SETLE);
7364   SDValue OutOps[] = { OutLo, OutHi };
7365   return DAG.getMergeValues(OutOps, dl);
7366 }
7367
7368 //===----------------------------------------------------------------------===//
7369 // Vector related lowering.
7370 //
7371
7372 /// BuildSplatI - Build a canonical splati of Val with an element size of
7373 /// SplatSize.  Cast the result to VT.
7374 static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
7375                            SelectionDAG &DAG, const SDLoc &dl) {
7376   assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
7377
7378   static const MVT VTys[] = { // canonical VT to use for each size.
7379     MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
7380   };
7381
7382   EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
7383
7384   // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
7385   if (Val == -1)
7386     SplatSize = 1;
7387
7388   EVT CanonicalVT = VTys[SplatSize-1];
7389
7390   // Build a canonical splat for this value.
7391   return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
7392 }
7393
7394 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the
7395 /// specified intrinsic ID.
7396 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
7397                                 const SDLoc &dl, EVT DestVT = MVT::Other) {
7398   if (DestVT == MVT::Other) DestVT = Op.getValueType();
7399   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
7400                      DAG.getConstant(IID, dl, MVT::i32), Op);
7401 }
7402
7403 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
7404 /// specified intrinsic ID.
7405 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
7406                                 SelectionDAG &DAG, const SDLoc &dl,
7407                                 EVT DestVT = MVT::Other) {
7408   if (DestVT == MVT::Other) DestVT = LHS.getValueType();
7409   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
7410                      DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
7411 }
7412
7413 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
7414 /// specified intrinsic ID.
7415 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
7416                                 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
7417                                 EVT DestVT = MVT::Other) {
7418   if (DestVT == MVT::Other) DestVT = Op0.getValueType();
7419   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
7420                      DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
7421 }
7422
7423 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
7424 /// amount.  The result has the specified value type.
7425 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
7426                            SelectionDAG &DAG, const SDLoc &dl) {
7427   // Force LHS/RHS to be the right type.
7428   LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
7429   RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
7430
7431   int Ops[16];
7432   for (unsigned i = 0; i != 16; ++i)
7433     Ops[i] = i + Amt;
7434   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
7435   return DAG.getNode(ISD::BITCAST, dl, VT, T);
7436 }
7437
7438 /// Do we have an efficient pattern in a .td file for this node?
7439 ///
7440 /// \param V - pointer to the BuildVectorSDNode being matched
7441 /// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
7442 ///
7443 /// There are some patterns where it is beneficial to keep a BUILD_VECTOR
7444 /// node as a BUILD_VECTOR node rather than expanding it. The patterns where
7445 /// the opposite is true (expansion is beneficial) are:
7446 /// - The node builds a vector out of integers that are not 32 or 64-bits
7447 /// - The node builds a vector out of constants
7448 /// - The node is a "load-and-splat"
7449 /// In all other cases, we will choose to keep the BUILD_VECTOR.
7450 static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V,
7451                                             bool HasDirectMove) {
7452   EVT VecVT = V->getValueType(0);
7453   bool RightType = VecVT == MVT::v2f64 || VecVT == MVT::v4f32 ||
7454     (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
7455   if (!RightType)
7456     return false;
7457
7458   bool IsSplat = true;
7459   bool IsLoad = false;
7460   SDValue Op0 = V->getOperand(0);
7461
7462   // This function is called in a block that confirms the node is not a constant
7463   // splat. So a constant BUILD_VECTOR here means the vector is built out of
7464   // different constants.
7465   if (V->isConstant())
7466     return false;
7467   for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
7468     if (V->getOperand(i).isUndef())
7469       return false;
7470     // We want to expand nodes that represent load-and-splat even if the
7471     // loaded value is a floating point truncation or conversion to int.
7472     if (V->getOperand(i).getOpcode() == ISD::LOAD ||
7473         (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
7474          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
7475         (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
7476          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
7477         (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
7478          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
7479       IsLoad = true;
7480     // If the operands are different or the input is not a load and has more
7481     // uses than just this BV node, then it isn't a splat.
7482     if (V->getOperand(i) != Op0 ||
7483         (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
7484       IsSplat = false;
7485   }
7486   return !(IsSplat && IsLoad);
7487 }
7488
7489 // If this is a case we can't handle, return null and let the default
7490 // expansion code take care of it.  If we CAN select this case, and if it
7491 // selects to a single instruction, return Op.  Otherwise, if we can codegen
7492 // this case more efficiently than a constant pool load, lower it to the
7493 // sequence of ops that should be used.
7494 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
7495                                              SelectionDAG &DAG) const {
7496   SDLoc dl(Op);
7497   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
7498   assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
7499
7500   if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) {
7501     // We first build an i32 vector, load it into a QPX register,
7502     // then convert it to a floating-point vector and compare it
7503     // to a zero vector to get the boolean result.
7504     MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
7505     int FrameIdx = MFI.CreateStackObject(16, 16, false);
7506     MachinePointerInfo PtrInfo =
7507         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
7508     EVT PtrVT = getPointerTy(DAG.getDataLayout());
7509     SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7510
7511     assert(BVN->getNumOperands() == 4 &&
7512       "BUILD_VECTOR for v4i1 does not have 4 operands");
7513
7514     bool IsConst = true;
7515     for (unsigned i = 0; i < 4; ++i) {
7516       if (BVN->getOperand(i).isUndef()) continue;
7517       if (!isa<ConstantSDNode>(BVN->getOperand(i))) {
7518         IsConst = false;
7519         break;
7520       }
7521     }
7522
7523     if (IsConst) {
7524       Constant *One =
7525         ConstantFP::get(Type::getFloatTy(*DAG.getContext()), 1.0);
7526       Constant *NegOne =
7527         ConstantFP::get(Type::getFloatTy(*DAG.getContext()), -1.0);
7528
7529       Constant *CV[4];
7530       for (unsigned i = 0; i < 4; ++i) {
7531         if (BVN->getOperand(i).isUndef())
7532           CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext()));
7533         else if (isNullConstant(BVN->getOperand(i)))
7534           CV[i] = NegOne;
7535         else
7536           CV[i] = One;
7537       }
7538
7539       Constant *CP = ConstantVector::get(CV);
7540       SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()),
7541                                           16 /* alignment */);
7542
7543       SDValue Ops[] = {DAG.getEntryNode(), CPIdx};
7544       SDVTList VTs = DAG.getVTList({MVT::v4i1, /*chain*/ MVT::Other});
7545       return DAG.getMemIntrinsicNode(
7546           PPCISD::QVLFSb, dl, VTs, Ops, MVT::v4f32,
7547           MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
7548     }
7549
7550     SmallVector<SDValue, 4> Stores;
7551     for (unsigned i = 0; i < 4; ++i) {
7552       if (BVN->getOperand(i).isUndef()) continue;
7553
7554       unsigned Offset = 4*i;
7555       SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
7556       Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
7557
7558       unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize();
7559       if (StoreSize > 4) {
7560         Stores.push_back(
7561             DAG.getTruncStore(DAG.getEntryNode(), dl, BVN->getOperand(i), Idx,
7562                               PtrInfo.getWithOffset(Offset), MVT::i32));
7563       } else {
7564         SDValue StoreValue = BVN->getOperand(i);
7565         if (StoreSize < 4)
7566           StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue);
7567
7568         Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, StoreValue, Idx,
7569                                       PtrInfo.getWithOffset(Offset)));
7570       }
7571     }
7572
7573     SDValue StoreChain;
7574     if (!Stores.empty())
7575       StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
7576     else
7577       StoreChain = DAG.getEntryNode();
7578
7579     // Now load from v4i32 into the QPX register; this will extend it to
7580     // v4i64 but not yet convert it to a floating point. Nevertheless, this
7581     // is typed as v4f64 because the QPX register integer states are not
7582     // explicitly represented.
7583
7584     SDValue Ops[] = {StoreChain,
7585                      DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, dl, MVT::i32),
7586                      FIdx};
7587     SDVTList VTs = DAG.getVTList({MVT::v4f64, /*chain*/ MVT::Other});
7588
7589     SDValue LoadedVect = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN,
7590       dl, VTs, Ops, MVT::v4i32, PtrInfo);
7591     LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
7592       DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, dl, MVT::i32),
7593       LoadedVect);
7594
7595     SDValue FPZeros = DAG.getConstantFP(0.0, dl, MVT::v4f64);
7596
7597     return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ);
7598   }
7599
7600   // All other QPX vectors are handled by generic code.
7601   if (Subtarget.hasQPX())
7602     return SDValue();
7603
7604   // Check if this is a splat of a constant value.
7605   APInt APSplatBits, APSplatUndef;
7606   unsigned SplatBitSize;
7607   bool HasAnyUndefs;
7608   if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
7609                              HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
7610       SplatBitSize > 32) {
7611     // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be
7612     // lowered to VSX instructions under certain conditions.
7613     // Without VSX, there is no pattern more efficient than expanding the node.
7614     if (Subtarget.hasVSX() &&
7615         haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove()))
7616       return Op;
7617     return SDValue();
7618   }
7619
7620   unsigned SplatBits = APSplatBits.getZExtValue();
7621   unsigned SplatUndef = APSplatUndef.getZExtValue();
7622   unsigned SplatSize = SplatBitSize / 8;
7623
7624   // First, handle single instruction cases.
7625
7626   // All zeros?
7627   if (SplatBits == 0) {
7628     // Canonicalize all zero vectors to be v4i32.
7629     if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
7630       SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
7631       Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
7632     }
7633     return Op;
7634   }
7635
7636   // We have XXSPLTIB for constant splats one byte wide
7637   if (Subtarget.hasP9Vector() && SplatSize == 1) {
7638     // This is a splat of 1-byte elements with some elements potentially undef.
7639     // Rather than trying to match undef in the SDAG patterns, ensure that all
7640     // elements are the same constant.
7641     if (HasAnyUndefs || ISD::isBuildVectorAllOnes(BVN)) {
7642       SmallVector<SDValue, 16> Ops(16, DAG.getConstant(SplatBits,
7643                                                        dl, MVT::i32));
7644       SDValue NewBV = DAG.getBuildVector(MVT::v16i8, dl, Ops);
7645       if (Op.getValueType() != MVT::v16i8)
7646         return DAG.getBitcast(Op.getValueType(), NewBV);
7647       return NewBV;
7648     }
7649     return Op;
7650   }
7651
7652   // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
7653   int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
7654                     (32-SplatBitSize));
7655   if (SextVal >= -16 && SextVal <= 15)
7656     return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
7657
7658   // Two instruction sequences.
7659
7660   // If this value is in the range [-32,30] and is even, use:
7661   //     VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
7662   // If this value is in the range [17,31] and is odd, use:
7663   //     VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
7664   // If this value is in the range [-31,-17] and is odd, use:
7665   //     VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
7666   // Note the last two are three-instruction sequences.
7667   if (SextVal >= -32 && SextVal <= 31) {
7668     // To avoid having these optimizations undone by constant folding,
7669     // we convert to a pseudo that will be expanded later into one of
7670     // the above forms.
7671     SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
7672     EVT VT = (SplatSize == 1 ? MVT::v16i8 :
7673               (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
7674     SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
7675     SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
7676     if (VT == Op.getValueType())
7677       return RetVal;
7678     else
7679       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
7680   }
7681
7682   // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
7683   // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  This is important
7684   // for fneg/fabs.
7685   if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
7686     // Make -1 and vspltisw -1:
7687     SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
7688
7689     // Make the VSLW intrinsic, computing 0x8000_0000.
7690     SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
7691                                    OnesV, DAG, dl);
7692
7693     // xor by OnesV to invert it.
7694     Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
7695     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7696   }
7697
7698   // Check to see if this is a wide variety of vsplti*, binop self cases.
7699   static const signed char SplatCsts[] = {
7700     -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
7701     -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
7702   };
7703
7704   for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
7705     // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
7706     // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
7707     int i = SplatCsts[idx];
7708
7709     // Figure out what shift amount will be used by altivec if shifted by i in
7710     // this splat size.
7711     unsigned TypeShiftAmt = i & (SplatBitSize-1);
7712
7713     // vsplti + shl self.
7714     if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
7715       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
7716       static const unsigned IIDs[] = { // Intrinsic to use for each size.
7717         Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
7718         Intrinsic::ppc_altivec_vslw
7719       };
7720       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
7721       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7722     }
7723
7724     // vsplti + srl self.
7725     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
7726       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
7727       static const unsigned IIDs[] = { // Intrinsic to use for each size.
7728         Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
7729         Intrinsic::ppc_altivec_vsrw
7730       };
7731       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
7732       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7733     }
7734
7735     // vsplti + sra self.
7736     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
7737       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
7738       static const unsigned IIDs[] = { // Intrinsic to use for each size.
7739         Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
7740         Intrinsic::ppc_altivec_vsraw
7741       };
7742       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
7743       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7744     }
7745
7746     // vsplti + rol self.
7747     if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
7748                          ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
7749       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
7750       static const unsigned IIDs[] = { // Intrinsic to use for each size.
7751         Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
7752         Intrinsic::ppc_altivec_vrlw
7753       };
7754       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
7755       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7756     }
7757
7758     // t = vsplti c, result = vsldoi t, t, 1
7759     if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
7760       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
7761       unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
7762       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
7763     }
7764     // t = vsplti c, result = vsldoi t, t, 2
7765     if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
7766       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
7767       unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
7768       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
7769     }
7770     // t = vsplti c, result = vsldoi t, t, 3
7771     if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
7772       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
7773       unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
7774       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
7775     }
7776   }
7777
7778   return SDValue();
7779 }
7780
7781 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
7782 /// the specified operations to build the shuffle.
7783 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
7784                                       SDValue RHS, SelectionDAG &DAG,
7785                                       const SDLoc &dl) {
7786   unsigned OpNum = (PFEntry >> 26) & 0x0F;
7787   unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
7788   unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
7789
7790   enum {
7791     OP_COPY = 0,  // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
7792     OP_VMRGHW,
7793     OP_VMRGLW,
7794     OP_VSPLTISW0,
7795     OP_VSPLTISW1,
7796     OP_VSPLTISW2,
7797     OP_VSPLTISW3,
7798     OP_VSLDOI4,
7799     OP_VSLDOI8,
7800     OP_VSLDOI12
7801   };
7802
7803   if (OpNum == OP_COPY) {
7804     if (LHSID == (1*9+2)*9+3) return LHS;
7805     assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
7806     return RHS;
7807   }
7808
7809   SDValue OpLHS, OpRHS;
7810   OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
7811   OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
7812
7813   int ShufIdxs[16];
7814   switch (OpNum) {
7815   default: llvm_unreachable("Unknown i32 permute!");
7816   case OP_VMRGHW:
7817     ShufIdxs[ 0] =  0; ShufIdxs[ 1] =  1; ShufIdxs[ 2] =  2; ShufIdxs[ 3] =  3;
7818     ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
7819     ShufIdxs[ 8] =  4; ShufIdxs[ 9] =  5; ShufIdxs[10] =  6; ShufIdxs[11] =  7;
7820     ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
7821     break;
7822   case OP_VMRGLW:
7823     ShufIdxs[ 0] =  8; ShufIdxs[ 1] =  9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
7824     ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
7825     ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
7826     ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
7827     break;
7828   case OP_VSPLTISW0:
7829     for (unsigned i = 0; i != 16; ++i)
7830       ShufIdxs[i] = (i&3)+0;
7831     break;
7832   case OP_VSPLTISW1:
7833     for (unsigned i = 0; i != 16; ++i)
7834       ShufIdxs[i] = (i&3)+4;
7835     break;
7836   case OP_VSPLTISW2:
7837     for (unsigned i = 0; i != 16; ++i)
7838       ShufIdxs[i] = (i&3)+8;
7839     break;
7840   case OP_VSPLTISW3:
7841     for (unsigned i = 0; i != 16; ++i)
7842       ShufIdxs[i] = (i&3)+12;
7843     break;
7844   case OP_VSLDOI4:
7845     return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
7846   case OP_VSLDOI8:
7847     return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
7848   case OP_VSLDOI12:
7849     return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
7850   }
7851   EVT VT = OpLHS.getValueType();
7852   OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
7853   OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
7854   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
7855   return DAG.getNode(ISD::BITCAST, dl, VT, T);
7856 }
7857
7858 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE.  If this
7859 /// is a shuffle we can handle in a single instruction, return it.  Otherwise,
7860 /// return the code it can be lowered into.  Worst case, it can always be
7861 /// lowered into a vperm.
7862 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
7863                                                SelectionDAG &DAG) const {
7864   SDLoc dl(Op);
7865   SDValue V1 = Op.getOperand(0);
7866   SDValue V2 = Op.getOperand(1);
7867   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
7868   EVT VT = Op.getValueType();
7869   bool isLittleEndian = Subtarget.isLittleEndian();
7870
7871   unsigned ShiftElts, InsertAtByte;
7872   bool Swap;
7873   if (Subtarget.hasP9Vector() &&
7874       PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
7875                            isLittleEndian)) {
7876     if (Swap)
7877       std::swap(V1, V2);
7878     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
7879     SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
7880     if (ShiftElts) {
7881       SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
7882                                 DAG.getConstant(ShiftElts, dl, MVT::i32));
7883       SDValue Ins = DAG.getNode(PPCISD::XXINSERT, dl, MVT::v4i32, Conv1, Shl,
7884                                 DAG.getConstant(InsertAtByte, dl, MVT::i32));
7885       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
7886     }
7887     SDValue Ins = DAG.getNode(PPCISD::XXINSERT, dl, MVT::v4i32, Conv1, Conv2,
7888                               DAG.getConstant(InsertAtByte, dl, MVT::i32));
7889     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
7890   }
7891
7892
7893   if (Subtarget.hasVSX() &&
7894       PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
7895     if (Swap)
7896       std::swap(V1, V2);
7897     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
7898     SDValue Conv2 =
7899         DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
7900
7901     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
7902                               DAG.getConstant(ShiftElts, dl, MVT::i32));
7903     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
7904   }
7905
7906   if (Subtarget.hasVSX() &&
7907     PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
7908     if (Swap)
7909       std::swap(V1, V2);
7910     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
7911     SDValue Conv2 =
7912         DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
7913
7914     SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
7915                               DAG.getConstant(ShiftElts, dl, MVT::i32));
7916     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
7917   }
7918
7919   if (Subtarget.hasP9Vector()) {
7920      if (PPC::isXXBRHShuffleMask(SVOp)) {
7921       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
7922       SDValue ReveHWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v8i16, Conv);
7923       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
7924     } else if (PPC::isXXBRWShuffleMask(SVOp)) {
7925       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
7926       SDValue ReveWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v4i32, Conv);
7927       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
7928     } else if (PPC::isXXBRDShuffleMask(SVOp)) {
7929       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
7930       SDValue ReveDWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Conv);
7931       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
7932     } else if (PPC::isXXBRQShuffleMask(SVOp)) {
7933       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
7934       SDValue ReveQWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v1i128, Conv);
7935       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
7936     }
7937   }
7938
7939   if (Subtarget.hasVSX()) {
7940     if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
7941       int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);
7942
7943       // If the source for the shuffle is a scalar_to_vector that came from a
7944       // 32-bit load, it will have used LXVWSX so we don't need to splat again.
7945       if (Subtarget.hasP9Vector() &&
7946           ((isLittleEndian && SplatIdx == 3) ||
7947            (!isLittleEndian && SplatIdx == 0))) {
7948         SDValue Src = V1.getOperand(0);
7949         if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR &&
7950             Src.getOperand(0).getOpcode() == ISD::LOAD &&
7951             Src.getOperand(0).hasOneUse())
7952           return V1;
7953       }
7954       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
7955       SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
7956                                   DAG.getConstant(SplatIdx, dl, MVT::i32));
7957       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
7958     }
7959
7960     // Left shifts of 8 bytes are actually swaps. Convert accordingly.
7961     if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
7962       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
7963       SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
7964       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
7965     }
7966   }
7967
7968   if (Subtarget.hasQPX()) {
7969     if (VT.getVectorNumElements() != 4)
7970       return SDValue();
7971
7972     if (V2.isUndef()) V2 = V1;
7973
7974     int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp);
7975     if (AlignIdx != -1) {
7976       return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2,
7977                          DAG.getConstant(AlignIdx, dl, MVT::i32));
7978     } else if (SVOp->isSplat()) {
7979       int SplatIdx = SVOp->getSplatIndex();
7980       if (SplatIdx >= 4) {
7981         std::swap(V1, V2);
7982         SplatIdx -= 4;
7983       }
7984
7985       return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1,
7986                          DAG.getConstant(SplatIdx, dl, MVT::i32));
7987     }
7988
7989     // Lower this into a qvgpci/qvfperm pair.
7990
7991     // Compute the qvgpci literal
7992     unsigned idx = 0;
7993     for (unsigned i = 0; i < 4; ++i) {
7994       int m = SVOp->getMaskElt(i);
7995       unsigned mm = m >= 0 ? (unsigned) m : i;
7996       idx |= mm << (3-i)*3;
7997     }
7998
7999     SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64,
8000                              DAG.getConstant(idx, dl, MVT::i32));
8001     return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3);
8002   }
8003
8004   // Cases that are handled by instructions that take permute immediates
8005   // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
8006   // selected by the instruction selector.
8007   if (V2.isUndef()) {
8008     if (PPC::isSplatShuffleMask(SVOp, 1) ||
8009         PPC::isSplatShuffleMask(SVOp, 2) ||
8010         PPC::isSplatShuffleMask(SVOp, 4) ||
8011         PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
8012         PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
8013         PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
8014         PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
8015         PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
8016         PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
8017         PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
8018         PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
8019         PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
8020         (Subtarget.hasP8Altivec() && (
8021          PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
8022          PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
8023          PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
8024       return Op;
8025     }
8026   }
8027
8028   // Altivec has a variety of "shuffle immediates" that take two vector inputs
8029   // and produce a fixed permutation.  If any of these match, do not lower to
8030   // VPERM.
8031   unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
8032   if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
8033       PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
8034       PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
8035       PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
8036       PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
8037       PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
8038       PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
8039       PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
8040       PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
8041       (Subtarget.hasP8Altivec() && (
8042        PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
8043        PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
8044        PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
8045     return Op;
8046
8047   // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
8048   // perfect shuffle table to emit an optimal matching sequence.
8049   ArrayRef<int> PermMask = SVOp->getMask();
8050
8051   unsigned PFIndexes[4];
8052   bool isFourElementShuffle = true;
8053   for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
8054     unsigned EltNo = 8;   // Start out undef.
8055     for (unsigned j = 0; j != 4; ++j) {  // Intra-element byte.
8056       if (PermMask[i*4+j] < 0)
8057         continue;   // Undef, ignore it.
8058
8059       unsigned ByteSource = PermMask[i*4+j];
8060       if ((ByteSource & 3) != j) {
8061         isFourElementShuffle = false;
8062         break;
8063       }
8064
8065       if (EltNo == 8) {
8066         EltNo = ByteSource/4;
8067       } else if (EltNo != ByteSource/4) {
8068         isFourElementShuffle = false;
8069         break;
8070       }
8071     }
8072     PFIndexes[i] = EltNo;
8073   }
8074
8075   // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
8076   // perfect shuffle vector to determine if it is cost effective to do this as
8077   // discrete instructions, or whether we should use a vperm.
8078   // For now, we skip this for little endian until such time as we have a
8079   // little-endian perfect shuffle table.
8080   if (isFourElementShuffle && !isLittleEndian) {
8081     // Compute the index in the perfect shuffle table.
8082     unsigned PFTableIndex =
8083       PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8084
8085     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
8086     unsigned Cost  = (PFEntry >> 30);
8087
8088     // Determining when to avoid vperm is tricky.  Many things affect the cost
8089     // of vperm, particularly how many times the perm mask needs to be computed.
8090     // For example, if the perm mask can be hoisted out of a loop or is already
8091     // used (perhaps because there are multiple permutes with the same shuffle
8092     // mask?) the vperm has a cost of 1.  OTOH, hoisting the permute mask out of
8093     // the loop requires an extra register.
8094     //
8095     // As a compromise, we only emit discrete instructions if the shuffle can be
8096     // generated in 3 or fewer operations.  When we have loop information
8097     // available, if this block is within a loop, we should avoid using vperm
8098     // for 3-operation perms and use a constant pool load instead.
8099     if (Cost < 3)
8100       return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
8101   }
8102
8103   // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
8104   // vector that will get spilled to the constant pool.
8105   if (V2.isUndef()) V2 = V1;
8106
8107   // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
8108   // that it is in input element units, not in bytes.  Convert now.
8109
8110   // For little endian, the order of the input vectors is reversed, and
8111   // the permutation mask is complemented with respect to 31.  This is
8112   // necessary to produce proper semantics with the big-endian-biased vperm
8113   // instruction.
8114   EVT EltVT = V1.getValueType().getVectorElementType();
8115   unsigned BytesPerElement = EltVT.getSizeInBits()/8;
8116
8117   SmallVector<SDValue, 16> ResultMask;
8118   for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
8119     unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
8120
8121     for (unsigned j = 0; j != BytesPerElement; ++j)
8122       if (isLittleEndian)
8123         ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
8124                                              dl, MVT::i32));
8125       else
8126         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
8127                                              MVT::i32));
8128   }
8129
8130   SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
8131   if (isLittleEndian)
8132     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
8133                        V2, V1, VPermMask);
8134   else
8135     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
8136                        V1, V2, VPermMask);
8137 }
8138
8139 /// getVectorCompareInfo - Given an intrinsic, return false if it is not a
8140 /// vector comparison.  If it is, return true and fill in Opc/isDot with
8141 /// information about the intrinsic.
8142 static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
8143                                  bool &isDot, const PPCSubtarget &Subtarget) {
8144   unsigned IntrinsicID =
8145       cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
8146   CompareOpc = -1;
8147   isDot = false;
8148   switch (IntrinsicID) {
8149   default:
8150     return false;
8151   // Comparison predicates.
8152   case Intrinsic::ppc_altivec_vcmpbfp_p:
8153     CompareOpc = 966;
8154     isDot = true;
8155     break;
8156   case Intrinsic::ppc_altivec_vcmpeqfp_p:
8157     CompareOpc = 198;
8158     isDot = true;
8159     break;
8160   case Intrinsic::ppc_altivec_vcmpequb_p:
8161     CompareOpc = 6;
8162     isDot = true;
8163     break;
8164   case Intrinsic::ppc_altivec_vcmpequh_p:
8165     CompareOpc = 70;
8166     isDot = true;
8167     break;
8168   case Intrinsic::ppc_altivec_vcmpequw_p:
8169     CompareOpc = 134;
8170     isDot = true;
8171     break;
8172   case Intrinsic::ppc_altivec_vcmpequd_p:
8173     if (Subtarget.hasP8Altivec()) {
8174       CompareOpc = 199;
8175       isDot = true;
8176     } else
8177       return false;
8178     break;
8179   case Intrinsic::ppc_altivec_vcmpneb_p:
8180   case Intrinsic::ppc_altivec_vcmpneh_p:
8181   case Intrinsic::ppc_altivec_vcmpnew_p:
8182   case Intrinsic::ppc_altivec_vcmpnezb_p:
8183   case Intrinsic::ppc_altivec_vcmpnezh_p:
8184   case Intrinsic::ppc_altivec_vcmpnezw_p:
8185     if (Subtarget.hasP9Altivec()) {
8186       switch (IntrinsicID) {
8187       default:
8188         llvm_unreachable("Unknown comparison intrinsic.");
8189       case Intrinsic::ppc_altivec_vcmpneb_p:
8190         CompareOpc = 7;
8191         break;
8192       case Intrinsic::ppc_altivec_vcmpneh_p:
8193         CompareOpc = 71;
8194         break;
8195       case Intrinsic::ppc_altivec_vcmpnew_p:
8196         CompareOpc = 135;
8197         break;
8198       case Intrinsic::ppc_altivec_vcmpnezb_p:
8199         CompareOpc = 263;
8200         break;
8201       case Intrinsic::ppc_altivec_vcmpnezh_p:
8202         CompareOpc = 327;
8203         break;
8204       case Intrinsic::ppc_altivec_vcmpnezw_p:
8205         CompareOpc = 391;
8206         break;
8207       }
8208       isDot = true;
8209     } else
8210       return false;
8211     break;
8212   case Intrinsic::ppc_altivec_vcmpgefp_p:
8213     CompareOpc = 454;
8214     isDot = true;
8215     break;
8216   case Intrinsic::ppc_altivec_vcmpgtfp_p:
8217     CompareOpc = 710;
8218     isDot = true;
8219     break;
8220   case Intrinsic::ppc_altivec_vcmpgtsb_p:
8221     CompareOpc = 774;
8222     isDot = true;
8223     break;
8224   case Intrinsic::ppc_altivec_vcmpgtsh_p:
8225     CompareOpc = 838;
8226     isDot = true;
8227     break;
8228   case Intrinsic::ppc_altivec_vcmpgtsw_p:
8229     CompareOpc = 902;
8230     isDot = true;
8231     break;
8232   case Intrinsic::ppc_altivec_vcmpgtsd_p:
8233     if (Subtarget.hasP8Altivec()) {
8234       CompareOpc = 967;
8235       isDot = true;
8236     } else
8237       return false;
8238     break;
8239   case Intrinsic::ppc_altivec_vcmpgtub_p:
8240     CompareOpc = 518;
8241     isDot = true;
8242     break;
8243   case Intrinsic::ppc_altivec_vcmpgtuh_p:
8244     CompareOpc = 582;
8245     isDot = true;
8246     break;
8247   case Intrinsic::ppc_altivec_vcmpgtuw_p:
8248     CompareOpc = 646;
8249     isDot = true;
8250     break;
8251   case Intrinsic::ppc_altivec_vcmpgtud_p:
8252     if (Subtarget.hasP8Altivec()) {
8253       CompareOpc = 711;
8254       isDot = true;
8255     } else
8256       return false;
8257     break;
8258
8259   // VSX predicate comparisons use the same infrastructure
8260   case Intrinsic::ppc_vsx_xvcmpeqdp_p:
8261   case Intrinsic::ppc_vsx_xvcmpgedp_p:
8262   case Intrinsic::ppc_vsx_xvcmpgtdp_p:
8263   case Intrinsic::ppc_vsx_xvcmpeqsp_p:
8264   case Intrinsic::ppc_vsx_xvcmpgesp_p:
8265   case Intrinsic::ppc_vsx_xvcmpgtsp_p:
8266     if (Subtarget.hasVSX()) {
8267       switch (IntrinsicID) {
8268       case Intrinsic::ppc_vsx_xvcmpeqdp_p:
8269         CompareOpc = 99;
8270         break;
8271       case Intrinsic::ppc_vsx_xvcmpgedp_p:
8272         CompareOpc = 115;
8273         break;
8274       case Intrinsic::ppc_vsx_xvcmpgtdp_p:
8275         CompareOpc = 107;
8276         break;
8277       case Intrinsic::ppc_vsx_xvcmpeqsp_p:
8278         CompareOpc = 67;
8279         break;
8280       case Intrinsic::ppc_vsx_xvcmpgesp_p:
8281         CompareOpc = 83;
8282         break;
8283       case Intrinsic::ppc_vsx_xvcmpgtsp_p:
8284         CompareOpc = 75;
8285         break;
8286       }
8287       isDot = true;
8288     } else
8289       return false;
8290     break;
8291
8292   // Normal Comparisons.
8293   case Intrinsic::ppc_altivec_vcmpbfp:
8294     CompareOpc = 966;
8295     break;
8296   case Intrinsic::ppc_altivec_vcmpeqfp:
8297     CompareOpc = 198;
8298     break;
8299   case Intrinsic::ppc_altivec_vcmpequb:
8300     CompareOpc = 6;
8301     break;
8302   case Intrinsic::ppc_altivec_vcmpequh:
8303     CompareOpc = 70;
8304     break;
8305   case Intrinsic::ppc_altivec_vcmpequw:
8306     CompareOpc = 134;
8307     break;
8308   case Intrinsic::ppc_altivec_vcmpequd:
8309     if (Subtarget.hasP8Altivec())
8310       CompareOpc = 199;
8311     else
8312       return false;
8313     break;
8314   case Intrinsic::ppc_altivec_vcmpneb:
8315   case Intrinsic::ppc_altivec_vcmpneh:
8316   case Intrinsic::ppc_altivec_vcmpnew:
8317   case Intrinsic::ppc_altivec_vcmpnezb:
8318   case Intrinsic::ppc_altivec_vcmpnezh:
8319   case Intrinsic::ppc_altivec_vcmpnezw:
8320     if (Subtarget.hasP9Altivec())
8321       switch (IntrinsicID) {
8322       default:
8323         llvm_unreachable("Unknown comparison intrinsic.");
8324       case Intrinsic::ppc_altivec_vcmpneb:
8325         CompareOpc = 7;
8326         break;
8327       case Intrinsic::ppc_altivec_vcmpneh:
8328         CompareOpc = 71;
8329         break;
8330       case Intrinsic::ppc_altivec_vcmpnew:
8331         CompareOpc = 135;
8332         break;
8333       case Intrinsic::ppc_altivec_vcmpnezb:
8334         CompareOpc = 263;
8335         break;
8336       case Intrinsic::ppc_altivec_vcmpnezh:
8337         CompareOpc = 327;
8338         break;
8339       case Intrinsic::ppc_altivec_vcmpnezw:
8340         CompareOpc = 391;
8341         break;
8342       }
8343     else
8344       return false;
8345     break;
8346   case Intrinsic::ppc_altivec_vcmpgefp:
8347     CompareOpc = 454;
8348     break;
8349   case Intrinsic::ppc_altivec_vcmpgtfp:
8350     CompareOpc = 710;
8351     break;
8352   case Intrinsic::ppc_altivec_vcmpgtsb:
8353     CompareOpc = 774;
8354     break;
8355   case Intrinsic::ppc_altivec_vcmpgtsh:
8356     CompareOpc = 838;
8357     break;
8358   case Intrinsic::ppc_altivec_vcmpgtsw:
8359     CompareOpc = 902;
8360     break;
8361   case Intrinsic::ppc_altivec_vcmpgtsd:
8362     if (Subtarget.hasP8Altivec())
8363       CompareOpc = 967;
8364     else
8365       return false;
8366     break;
8367   case Intrinsic::ppc_altivec_vcmpgtub:
8368     CompareOpc = 518;
8369     break;
8370   case Intrinsic::ppc_altivec_vcmpgtuh:
8371     CompareOpc = 582;
8372     break;
8373   case Intrinsic::ppc_altivec_vcmpgtuw:
8374     CompareOpc = 646;
8375     break;
8376   case Intrinsic::ppc_altivec_vcmpgtud:
8377     if (Subtarget.hasP8Altivec())
8378       CompareOpc = 711;
8379     else
8380       return false;
8381     break;
8382   }
8383   return true;
8384 }
8385
8386 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
8387 /// lower, do it, otherwise return null.
8388 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
8389                                                    SelectionDAG &DAG) const {
8390   unsigned IntrinsicID =
8391     cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
8392
8393   if (IntrinsicID == Intrinsic::thread_pointer) {
8394     // Reads the thread pointer register, used for __builtin_thread_pointer.
8395     if (Subtarget.isPPC64())
8396       return DAG.getRegister(PPC::X13, MVT::i64);
8397     return DAG.getRegister(PPC::R2, MVT::i32);
8398   }
8399
8400   // If this is a lowered altivec predicate compare, CompareOpc is set to the
8401   // opcode number of the comparison.
8402   SDLoc dl(Op);
8403   int CompareOpc;
8404   bool isDot;
8405   if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
8406     return SDValue();    // Don't custom lower most intrinsics.
8407
8408   // If this is a non-dot comparison, make the VCMP node and we are done.
8409   if (!isDot) {
8410     SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
8411                               Op.getOperand(1), Op.getOperand(2),
8412                               DAG.getConstant(CompareOpc, dl, MVT::i32));
8413     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
8414   }
8415
8416   // Create the PPCISD altivec 'dot' comparison node.
8417   SDValue Ops[] = {
8418     Op.getOperand(2),  // LHS
8419     Op.getOperand(3),  // RHS
8420     DAG.getConstant(CompareOpc, dl, MVT::i32)
8421   };
8422   EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
8423   SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
8424
8425   // Now that we have the comparison, emit a copy from the CR to a GPR.
8426   // This is flagged to the above dot comparison.
8427   SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
8428                                 DAG.getRegister(PPC::CR6, MVT::i32),
8429                                 CompNode.getValue(1));
8430
8431   // Unpack the result based on how the target uses it.
8432   unsigned BitNo;   // Bit # of CR6.
8433   bool InvertBit;   // Invert result?
8434   switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
8435   default:  // Can't happen, don't crash on invalid number though.
8436   case 0:   // Return the value of the EQ bit of CR6.
8437     BitNo = 0; InvertBit = false;
8438     break;
8439   case 1:   // Return the inverted value of the EQ bit of CR6.
8440     BitNo = 0; InvertBit = true;
8441     break;
8442   case 2:   // Return the value of the LT bit of CR6.
8443     BitNo = 2; InvertBit = false;
8444     break;
8445   case 3:   // Return the inverted value of the LT bit of CR6.
8446     BitNo = 2; InvertBit = true;
8447     break;
8448   }
8449
8450   // Shift the bit into the low position.
8451   Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
8452                       DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
8453   // Isolate the bit.
8454   Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
8455                       DAG.getConstant(1, dl, MVT::i32));
8456
8457   // If we are supposed to, toggle the bit.
8458   if (InvertBit)
8459     Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
8460                         DAG.getConstant(1, dl, MVT::i32));
8461   return Flags;
8462 }
8463
8464 SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
8465                                                SelectionDAG &DAG) const {
8466   // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
8467   // the beginning of the argument list.
8468   int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
8469   SDLoc DL(Op);
8470   switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
8471   case Intrinsic::ppc_cfence: {
8472     assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
8473     assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
8474     return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
8475                                       DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
8476                                                   Op.getOperand(ArgStart + 1)),
8477                                       Op.getOperand(0)),
8478                    0);
8479   }
8480   default:
8481     break;
8482   }
8483   return SDValue();
8484 }
8485
8486 SDValue PPCTargetLowering::LowerREM(SDValue Op, SelectionDAG &DAG) const {
8487   // Check for a DIV with the same operands as this REM.
8488   for (auto UI : Op.getOperand(1)->uses()) {
8489     if ((Op.getOpcode() == ISD::SREM && UI->getOpcode() == ISD::SDIV) ||
8490         (Op.getOpcode() == ISD::UREM && UI->getOpcode() == ISD::UDIV))
8491       if (UI->getOperand(0) == Op.getOperand(0) &&
8492           UI->getOperand(1) == Op.getOperand(1))
8493         return SDValue();
8494   }
8495   return Op;
8496 }
8497
8498 SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
8499                                                   SelectionDAG &DAG) const {
8500   SDLoc dl(Op);
8501   // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int
8502   // instructions), but for smaller types, we need to first extend up to v2i32
8503   // before doing going farther.
8504   if (Op.getValueType() == MVT::v2i64) {
8505     EVT ExtVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
8506     if (ExtVT != MVT::v2i32) {
8507       Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0));
8508       Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op,
8509                        DAG.getValueType(EVT::getVectorVT(*DAG.getContext(),
8510                                         ExtVT.getVectorElementType(), 4)));
8511       Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op);
8512       Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op,
8513                        DAG.getValueType(MVT::v2i32));
8514     }
8515
8516     return Op;
8517   }
8518
8519   return SDValue();
8520 }
8521
8522 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
8523                                                  SelectionDAG &DAG) const {
8524   SDLoc dl(Op);
8525   // Create a stack slot that is 16-byte aligned.
8526   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
8527   int FrameIdx = MFI.CreateStackObject(16, 16, false);
8528   EVT PtrVT = getPointerTy(DAG.getDataLayout());
8529   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8530
8531   // Store the input value into Value#0 of the stack slot.
8532   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
8533                                MachinePointerInfo());
8534   // Load it out.
8535   return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
8536 }
8537
8538 SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
8539                                                   SelectionDAG &DAG) const {
8540   assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
8541          "Should only be called for ISD::INSERT_VECTOR_ELT");
8542   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
8543   // We have legal lowering for constant indices but not for variable ones.
8544   if (C)
8545     return Op;
8546   return SDValue();
8547 }
8548
8549 SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
8550                                                    SelectionDAG &DAG) const {
8551   SDLoc dl(Op);
8552   SDNode *N = Op.getNode();
8553
8554   assert(N->getOperand(0).getValueType() == MVT::v4i1 &&
8555          "Unknown extract_vector_elt type");
8556
8557   SDValue Value = N->getOperand(0);
8558
8559   // The first part of this is like the store lowering except that we don't
8560   // need to track the chain.
8561
8562   // The values are now known to be -1 (false) or 1 (true). To convert this
8563   // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
8564   // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
8565   Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
8566
8567   // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
8568   // understand how to form the extending load.
8569   SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
8570
8571   Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
8572
8573   // Now convert to an integer and store.
8574   Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
8575     DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
8576     Value);
8577
8578   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
8579   int FrameIdx = MFI.CreateStackObject(16, 16, false);
8580   MachinePointerInfo PtrInfo =
8581       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8582   EVT PtrVT = getPointerTy(DAG.getDataLayout());
8583   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8584
8585   SDValue StoreChain = DAG.getEntryNode();
8586   SDValue Ops[] = {StoreChain,
8587                    DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32),
8588                    Value, FIdx};
8589   SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
8590
8591   StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
8592     dl, VTs, Ops, MVT::v4i32, PtrInfo);
8593
8594   // Extract the value requested.
8595   unsigned Offset = 4*cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
8596   SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
8597   Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
8598
8599   SDValue IntVal =
8600       DAG.getLoad(MVT::i32, dl, StoreChain, Idx, PtrInfo.getWithOffset(Offset));
8601
8602   if (!Subtarget.useCRBits())
8603     return IntVal;
8604
8605   return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal);
8606 }
8607
8608 /// Lowering for QPX v4i1 loads
8609 SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
8610                                            SelectionDAG &DAG) const {
8611   SDLoc dl(Op);
8612   LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
8613   SDValue LoadChain = LN->getChain();
8614   SDValue BasePtr = LN->getBasePtr();
8615
8616   if (Op.getValueType() == MVT::v4f64 ||
8617       Op.getValueType() == MVT::v4f32) {
8618     EVT MemVT = LN->getMemoryVT();
8619     unsigned Alignment = LN->getAlignment();
8620
8621     // If this load is properly aligned, then it is legal.
8622     if (Alignment >= MemVT.getStoreSize())
8623       return Op;
8624
8625     EVT ScalarVT = Op.getValueType().getScalarType(),
8626         ScalarMemVT = MemVT.getScalarType();
8627     unsigned Stride = ScalarMemVT.getStoreSize();
8628
8629     SDValue Vals[4], LoadChains[4];
8630     for (unsigned Idx = 0; Idx < 4; ++Idx) {
8631       SDValue Load;
8632       if (ScalarVT != ScalarMemVT)
8633         Load = DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain,
8634                               BasePtr,
8635                               LN->getPointerInfo().getWithOffset(Idx * Stride),
8636                               ScalarMemVT, MinAlign(Alignment, Idx * Stride),
8637                               LN->getMemOperand()->getFlags(), LN->getAAInfo());
8638       else
8639         Load = DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr,
8640                            LN->getPointerInfo().getWithOffset(Idx * Stride),
8641                            MinAlign(Alignment, Idx * Stride),
8642                            LN->getMemOperand()->getFlags(), LN->getAAInfo());
8643
8644       if (Idx == 0 && LN->isIndexed()) {
8645         assert(LN->getAddressingMode() == ISD::PRE_INC &&
8646                "Unknown addressing mode on vector load");
8647         Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(),
8648                                   LN->getAddressingMode());
8649       }
8650
8651       Vals[Idx] = Load;
8652       LoadChains[Idx] = Load.getValue(1);
8653
8654       BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
8655                             DAG.getConstant(Stride, dl,
8656                                             BasePtr.getValueType()));
8657     }
8658
8659     SDValue TF =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
8660     SDValue Value = DAG.getBuildVector(Op.getValueType(), dl, Vals);
8661
8662     if (LN->isIndexed()) {
8663       SDValue RetOps[] = { Value, Vals[0].getValue(1), TF };
8664       return DAG.getMergeValues(RetOps, dl);
8665     }
8666
8667     SDValue RetOps[] = { Value, TF };
8668     return DAG.getMergeValues(RetOps, dl);
8669   }
8670
8671   assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower");
8672   assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported");
8673
8674   // To lower v4i1 from a byte array, we load the byte elements of the
8675   // vector and then reuse the BUILD_VECTOR logic.
8676
8677   SDValue VectElmts[4], VectElmtChains[4];
8678   for (unsigned i = 0; i < 4; ++i) {
8679     SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
8680     Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
8681
8682     VectElmts[i] = DAG.getExtLoad(
8683         ISD::EXTLOAD, dl, MVT::i32, LoadChain, Idx,
8684         LN->getPointerInfo().getWithOffset(i), MVT::i8,
8685         /* Alignment = */ 1, LN->getMemOperand()->getFlags(), LN->getAAInfo());
8686     VectElmtChains[i] = VectElmts[i].getValue(1);
8687   }
8688
8689   LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains);
8690   SDValue Value = DAG.getBuildVector(MVT::v4i1, dl, VectElmts);
8691
8692   SDValue RVals[] = { Value, LoadChain };
8693   return DAG.getMergeValues(RVals, dl);
8694 }
8695
8696 /// Lowering for QPX v4i1 stores
8697 SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
8698                                             SelectionDAG &DAG) const {
8699   SDLoc dl(Op);
8700   StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
8701   SDValue StoreChain = SN->getChain();
8702   SDValue BasePtr = SN->getBasePtr();
8703   SDValue Value = SN->getValue();
8704
8705   if (Value.getValueType() == MVT::v4f64 ||
8706       Value.getValueType() == MVT::v4f32) {
8707     EVT MemVT = SN->getMemoryVT();
8708     unsigned Alignment = SN->getAlignment();
8709
8710     // If this store is properly aligned, then it is legal.
8711     if (Alignment >= MemVT.getStoreSize())
8712       return Op;
8713
8714     EVT ScalarVT = Value.getValueType().getScalarType(),
8715         ScalarMemVT = MemVT.getScalarType();
8716     unsigned Stride = ScalarMemVT.getStoreSize();
8717
8718     SDValue Stores[4];
8719     for (unsigned Idx = 0; Idx < 4; ++Idx) {
8720       SDValue Ex = DAG.getNode(
8721           ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value,
8722           DAG.getConstant(Idx, dl, getVectorIdxTy(DAG.getDataLayout())));
8723       SDValue Store;
8724       if (ScalarVT != ScalarMemVT)
8725         Store =
8726             DAG.getTruncStore(StoreChain, dl, Ex, BasePtr,
8727                               SN->getPointerInfo().getWithOffset(Idx * Stride),
8728                               ScalarMemVT, MinAlign(Alignment, Idx * Stride),
8729                               SN->getMemOperand()->getFlags(), SN->getAAInfo());
8730       else
8731         Store = DAG.getStore(StoreChain, dl, Ex, BasePtr,
8732                              SN->getPointerInfo().getWithOffset(Idx * Stride),
8733                              MinAlign(Alignment, Idx * Stride),
8734                              SN->getMemOperand()->getFlags(), SN->getAAInfo());
8735
8736       if (Idx == 0 && SN->isIndexed()) {
8737         assert(SN->getAddressingMode() == ISD::PRE_INC &&
8738                "Unknown addressing mode on vector store");
8739         Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(),
8740                                     SN->getAddressingMode());
8741       }
8742
8743       BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
8744                             DAG.getConstant(Stride, dl,
8745                                             BasePtr.getValueType()));
8746       Stores[Idx] = Store;
8747     }
8748
8749     SDValue TF =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
8750
8751     if (SN->isIndexed()) {
8752       SDValue RetOps[] = { TF, Stores[0].getValue(1) };
8753       return DAG.getMergeValues(RetOps, dl);
8754     }
8755
8756     return TF;
8757   }
8758
8759   assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported");
8760   assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower");
8761
8762   // The values are now known to be -1 (false) or 1 (true). To convert this
8763   // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
8764   // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
8765   Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
8766
8767   // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
8768   // understand how to form the extending load.
8769   SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
8770
8771   Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
8772
8773   // Now convert to an integer and store.
8774   Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
8775     DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
8776     Value);
8777
8778   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
8779   int FrameIdx = MFI.CreateStackObject(16, 16, false);
8780   MachinePointerInfo PtrInfo =
8781       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8782   EVT PtrVT = getPointerTy(DAG.getDataLayout());
8783   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8784
8785   SDValue Ops[] = {StoreChain,
8786                    DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32),
8787                    Value, FIdx};
8788   SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
8789
8790   StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
8791     dl, VTs, Ops, MVT::v4i32, PtrInfo);
8792
8793   // Move data into the byte array.
8794   SDValue Loads[4], LoadChains[4];
8795   for (unsigned i = 0; i < 4; ++i) {
8796     unsigned Offset = 4*i;
8797     SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
8798     Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
8799
8800     Loads[i] = DAG.getLoad(MVT::i32, dl, StoreChain, Idx,
8801                            PtrInfo.getWithOffset(Offset));
8802     LoadChains[i] = Loads[i].getValue(1);
8803   }
8804
8805   StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
8806
8807   SDValue Stores[4];
8808   for (unsigned i = 0; i < 4; ++i) {
8809     SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
8810     Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
8811
8812     Stores[i] = DAG.getTruncStore(
8813         StoreChain, dl, Loads[i], Idx, SN->getPointerInfo().getWithOffset(i),
8814         MVT::i8, /* Alignment = */ 1, SN->getMemOperand()->getFlags(),
8815         SN->getAAInfo());
8816   }
8817
8818   StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
8819
8820   return StoreChain;
8821 }
8822
8823 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
8824   SDLoc dl(Op);
8825   if (Op.getValueType() == MVT::v4i32) {
8826     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8827
8828     SDValue Zero  = BuildSplatI(  0, 1, MVT::v4i32, DAG, dl);
8829     SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
8830
8831     SDValue RHSSwap =   // = vrlw RHS, 16
8832       BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
8833
8834     // Shrinkify inputs to v8i16.
8835     LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
8836     RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
8837     RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
8838
8839     // Low parts multiplied together, generating 32-bit results (we ignore the
8840     // top parts).
8841     SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
8842                                         LHS, RHS, DAG, dl, MVT::v4i32);
8843
8844     SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
8845                                       LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
8846     // Shift the high parts up 16 bits.
8847     HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
8848                               Neg16, DAG, dl);
8849     return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
8850   } else if (Op.getValueType() == MVT::v8i16) {
8851     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8852
8853     SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
8854
8855     return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
8856                             LHS, RHS, Zero, DAG, dl);
8857   } else if (Op.getValueType() == MVT::v16i8) {
8858     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8859     bool isLittleEndian = Subtarget.isLittleEndian();
8860
8861     // Multiply the even 8-bit parts, producing 16-bit sums.
8862     SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
8863                                            LHS, RHS, DAG, dl, MVT::v8i16);
8864     EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
8865
8866     // Multiply the odd 8-bit parts, producing 16-bit sums.
8867     SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
8868                                           LHS, RHS, DAG, dl, MVT::v8i16);
8869     OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
8870
8871     // Merge the results together.  Because vmuleub and vmuloub are
8872     // instructions with a big-endian bias, we must reverse the
8873     // element numbering and reverse the meaning of "odd" and "even"
8874     // when generating little endian code.
8875     int Ops[16];
8876     for (unsigned i = 0; i != 8; ++i) {
8877       if (isLittleEndian) {
8878         Ops[i*2  ] = 2*i;
8879         Ops[i*2+1] = 2*i+16;
8880       } else {
8881         Ops[i*2  ] = 2*i+1;
8882         Ops[i*2+1] = 2*i+1+16;
8883       }
8884     }
8885     if (isLittleEndian)
8886       return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
8887     else
8888       return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
8889   } else {
8890     llvm_unreachable("Unknown mul to lower!");
8891   }
8892 }
8893
8894 /// LowerOperation - Provide custom lowering hooks for some operations.
8895 ///
8896 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
8897   switch (Op.getOpcode()) {
8898   default: llvm_unreachable("Wasn't expecting to be able to lower this!");
8899   case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
8900   case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
8901   case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
8902   case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
8903   case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
8904   case ISD::SETCC:              return LowerSETCC(Op, DAG);
8905   case ISD::INIT_TRAMPOLINE:    return LowerINIT_TRAMPOLINE(Op, DAG);
8906   case ISD::ADJUST_TRAMPOLINE:  return LowerADJUST_TRAMPOLINE(Op, DAG);
8907   case ISD::VASTART:
8908     return LowerVASTART(Op, DAG);
8909
8910   case ISD::VAARG:
8911     return LowerVAARG(Op, DAG);
8912
8913   case ISD::VACOPY:
8914     return LowerVACOPY(Op, DAG);
8915
8916   case ISD::STACKRESTORE:
8917     return LowerSTACKRESTORE(Op, DAG);
8918
8919   case ISD::DYNAMIC_STACKALLOC:
8920     return LowerDYNAMIC_STACKALLOC(Op, DAG);
8921
8922   case ISD::GET_DYNAMIC_AREA_OFFSET:
8923     return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
8924
8925   case ISD::EH_DWARF_CFA:
8926     return LowerEH_DWARF_CFA(Op, DAG);
8927
8928   case ISD::EH_SJLJ_SETJMP:     return lowerEH_SJLJ_SETJMP(Op, DAG);
8929   case ISD::EH_SJLJ_LONGJMP:    return lowerEH_SJLJ_LONGJMP(Op, DAG);
8930
8931   case ISD::LOAD:               return LowerLOAD(Op, DAG);
8932   case ISD::STORE:              return LowerSTORE(Op, DAG);
8933   case ISD::TRUNCATE:           return LowerTRUNCATE(Op, DAG);
8934   case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
8935   case ISD::FP_TO_UINT:
8936   case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG,
8937                                                       SDLoc(Op));
8938   case ISD::UINT_TO_FP:
8939   case ISD::SINT_TO_FP:         return LowerINT_TO_FP(Op, DAG);
8940   case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
8941
8942   // Lower 64-bit shifts.
8943   case ISD::SHL_PARTS:          return LowerSHL_PARTS(Op, DAG);
8944   case ISD::SRL_PARTS:          return LowerSRL_PARTS(Op, DAG);
8945   case ISD::SRA_PARTS:          return LowerSRA_PARTS(Op, DAG);
8946
8947   // Vector-related lowering.
8948   case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
8949   case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
8950   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
8951   case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
8952   case ISD::SIGN_EXTEND_INREG:  return LowerSIGN_EXTEND_INREG(Op, DAG);
8953   case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
8954   case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
8955   case ISD::MUL:                return LowerMUL(Op, DAG);
8956
8957   // For counter-based loop handling.
8958   case ISD::INTRINSIC_W_CHAIN:  return SDValue();
8959
8960   // Frame & Return address.
8961   case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
8962   case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
8963
8964   case ISD::INTRINSIC_VOID:
8965     return LowerINTRINSIC_VOID(Op, DAG);
8966   case ISD::SREM:
8967   case ISD::UREM:
8968     return LowerREM(Op, DAG);
8969   }
8970 }
8971
8972 void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
8973                                            SmallVectorImpl<SDValue>&Results,
8974                                            SelectionDAG &DAG) const {
8975   SDLoc dl(N);
8976   switch (N->getOpcode()) {
8977   default:
8978     llvm_unreachable("Do not know how to custom type legalize this operation!");
8979   case ISD::READCYCLECOUNTER: {
8980     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
8981     SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
8982
8983     Results.push_back(RTB);
8984     Results.push_back(RTB.getValue(1));
8985     Results.push_back(RTB.getValue(2));
8986     break;
8987   }
8988   case ISD::INTRINSIC_W_CHAIN: {
8989     if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
8990         Intrinsic::ppc_is_decremented_ctr_nonzero)
8991       break;
8992
8993     assert(N->getValueType(0) == MVT::i1 &&
8994            "Unexpected result type for CTR decrement intrinsic");
8995     EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
8996                                  N->getValueType(0));
8997     SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
8998     SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
8999                                  N->getOperand(1));
9000
9001     Results.push_back(NewInt);
9002     Results.push_back(NewInt.getValue(1));
9003     break;
9004   }
9005   case ISD::VAARG: {
9006     if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
9007       return;
9008
9009     EVT VT = N->getValueType(0);
9010
9011     if (VT == MVT::i64) {
9012       SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
9013
9014       Results.push_back(NewNode);
9015       Results.push_back(NewNode.getValue(1));
9016     }
9017     return;
9018   }
9019   case ISD::FP_ROUND_INREG: {
9020     assert(N->getValueType(0) == MVT::ppcf128);
9021     assert(N->getOperand(0).getValueType() == MVT::ppcf128);
9022     SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
9023                              MVT::f64, N->getOperand(0),
9024                              DAG.getIntPtrConstant(0, dl));
9025     SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
9026                              MVT::f64, N->getOperand(0),
9027                              DAG.getIntPtrConstant(1, dl));
9028
9029     // Add the two halves of the long double in round-to-zero mode.
9030     SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
9031
9032     // We know the low half is about to be thrown away, so just use something
9033     // convenient.
9034     Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
9035                                 FPreg, FPreg));
9036     return;
9037   }
9038   case ISD::FP_TO_SINT:
9039   case ISD::FP_TO_UINT:
9040     // LowerFP_TO_INT() can only handle f32 and f64.
9041     if (N->getOperand(0).getValueType() == MVT::ppcf128)
9042       return;
9043     Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
9044     return;
9045   }
9046 }
9047
9048 //===----------------------------------------------------------------------===//
9049 //  Other Lowering Code
9050 //===----------------------------------------------------------------------===//
9051
9052 static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) {
9053   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
9054   Function *Func = Intrinsic::getDeclaration(M, Id);
9055   return Builder.CreateCall(Func, {});
9056 }
9057
9058 // The mappings for emitLeading/TrailingFence is taken from
9059 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
9060 Instruction *PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
9061                                                  Instruction *Inst,
9062                                                  AtomicOrdering Ord) const {
9063   if (Ord == AtomicOrdering::SequentiallyConsistent)
9064     return callIntrinsic(Builder, Intrinsic::ppc_sync);
9065   if (isReleaseOrStronger(Ord))
9066     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
9067   return nullptr;
9068 }
9069
9070 Instruction *PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
9071                                                   Instruction *Inst,
9072                                                   AtomicOrdering Ord) const {
9073   if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
9074     // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
9075     // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
9076     // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
9077     if (isa<LoadInst>(Inst) && Subtarget.isPPC64())
9078       return Builder.CreateCall(
9079           Intrinsic::getDeclaration(
9080               Builder.GetInsertBlock()->getParent()->getParent(),
9081               Intrinsic::ppc_cfence, {Inst->getType()}),
9082           {Inst});
9083     // FIXME: Can use isync for rmw operation.
9084     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
9085   }
9086   return nullptr;
9087 }
9088
9089 MachineBasicBlock *
9090 PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
9091                                     unsigned AtomicSize,
9092                                     unsigned BinOpcode,
9093                                     unsigned CmpOpcode,
9094                                     unsigned CmpPred) const {
9095   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
9096   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
9097
9098   auto LoadMnemonic = PPC::LDARX;
9099   auto StoreMnemonic = PPC::STDCX;
9100   switch (AtomicSize) {
9101   default:
9102     llvm_unreachable("Unexpected size of atomic entity");
9103   case 1:
9104     LoadMnemonic = PPC::LBARX;
9105     StoreMnemonic = PPC::STBCX;
9106     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
9107     break;
9108   case 2:
9109     LoadMnemonic = PPC::LHARX;
9110     StoreMnemonic = PPC::STHCX;
9111     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
9112     break;
9113   case 4:
9114     LoadMnemonic = PPC::LWARX;
9115     StoreMnemonic = PPC::STWCX;
9116     break;
9117   case 8:
9118     LoadMnemonic = PPC::LDARX;
9119     StoreMnemonic = PPC::STDCX;
9120     break;
9121   }
9122
9123   const BasicBlock *LLVM_BB = BB->getBasicBlock();
9124   MachineFunction *F = BB->getParent();
9125   MachineFunction::iterator It = ++BB->getIterator();
9126
9127   unsigned dest = MI.getOperand(0).getReg();
9128   unsigned ptrA = MI.getOperand(1).getReg();
9129   unsigned ptrB = MI.getOperand(2).getReg();
9130   unsigned incr = MI.getOperand(3).getReg();
9131   DebugLoc dl = MI.getDebugLoc();
9132
9133   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
9134   MachineBasicBlock *loop2MBB =
9135     CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
9136   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
9137   F->insert(It, loopMBB);
9138   if (CmpOpcode)
9139     F->insert(It, loop2MBB);
9140   F->insert(It, exitMBB);
9141   exitMBB->splice(exitMBB->begin(), BB,
9142                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
9143   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
9144
9145   MachineRegisterInfo &RegInfo = F->getRegInfo();
9146   unsigned TmpReg = (!BinOpcode) ? incr :
9147     RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
9148                                            : &PPC::GPRCRegClass);
9149
9150   //  thisMBB:
9151   //   ...
9152   //   fallthrough --> loopMBB
9153   BB->addSuccessor(loopMBB);
9154
9155   //  loopMBB:
9156   //   l[wd]arx dest, ptr
9157   //   add r0, dest, incr
9158   //   st[wd]cx. r0, ptr
9159   //   bne- loopMBB
9160   //   fallthrough --> exitMBB
9161
9162   // For max/min...
9163   //  loopMBB:
9164   //   l[wd]arx dest, ptr
9165   //   cmpl?[wd] incr, dest
9166   //   bgt exitMBB
9167   //  loop2MBB:
9168   //   st[wd]cx. dest, ptr
9169   //   bne- loopMBB
9170   //   fallthrough --> exitMBB
9171
9172   BB = loopMBB;
9173   BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
9174     .addReg(ptrA).addReg(ptrB);
9175   if (BinOpcode)
9176     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
9177   if (CmpOpcode) {
9178     // Signed comparisons of byte or halfword values must be sign-extended.
9179     if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
9180       unsigned ExtReg =  RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
9181       BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
9182               ExtReg).addReg(dest);
9183       BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
9184         .addReg(incr).addReg(ExtReg);
9185     } else
9186       BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
9187         .addReg(incr).addReg(dest);
9188
9189     BuildMI(BB, dl, TII->get(PPC::BCC))
9190       .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
9191     BB->addSuccessor(loop2MBB);
9192     BB->addSuccessor(exitMBB);
9193     BB = loop2MBB;
9194   }
9195   BuildMI(BB, dl, TII->get(StoreMnemonic))
9196     .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
9197   BuildMI(BB, dl, TII->get(PPC::BCC))
9198     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
9199   BB->addSuccessor(loopMBB);
9200   BB->addSuccessor(exitMBB);
9201
9202   //  exitMBB:
9203   //   ...
9204   BB = exitMBB;
9205   return BB;
9206 }
9207
9208 MachineBasicBlock *
9209 PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
9210                                             MachineBasicBlock *BB,
9211                                             bool is8bit, // operation
9212                                             unsigned BinOpcode,
9213                                             unsigned CmpOpcode,
9214                                             unsigned CmpPred) const {
9215   // If we support part-word atomic mnemonics, just use them
9216   if (Subtarget.hasPartwordAtomics())
9217     return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode,
9218                             CmpOpcode, CmpPred);
9219
9220   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
9221   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
9222   // In 64 bit mode we have to use 64 bits for addresses, even though the
9223   // lwarx/stwcx are 32 bits.  With the 32-bit atomics we can use address
9224   // registers without caring whether they're 32 or 64, but here we're
9225   // doing actual arithmetic on the addresses.
9226   bool is64bit = Subtarget.isPPC64();
9227   bool isLittleEndian = Subtarget.isLittleEndian();
9228   unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
9229
9230   const BasicBlock *LLVM_BB = BB->getBasicBlock();
9231   MachineFunction *F = BB->getParent();
9232   MachineFunction::iterator It = ++BB->getIterator();
9233
9234   unsigned dest = MI.getOperand(0).getReg();
9235   unsigned ptrA = MI.getOperand(1).getReg();
9236   unsigned ptrB = MI.getOperand(2).getReg();
9237   unsigned incr = MI.getOperand(3).getReg();
9238   DebugLoc dl = MI.getDebugLoc();
9239
9240   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
9241   MachineBasicBlock *loop2MBB =
9242     CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
9243   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
9244   F->insert(It, loopMBB);
9245   if (CmpOpcode)
9246     F->insert(It, loop2MBB);
9247   F->insert(It, exitMBB);
9248   exitMBB->splice(exitMBB->begin(), BB,
9249                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
9250   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
9251
9252   MachineRegisterInfo &RegInfo = F->getRegInfo();
9253   const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass
9254                                           : &PPC::GPRCRegClass;
9255   unsigned PtrReg = RegInfo.createVirtualRegister(RC);
9256   unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
9257   unsigned ShiftReg =
9258     isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC);
9259   unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
9260   unsigned MaskReg = RegInfo.createVirtualRegister(RC);
9261   unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
9262   unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
9263   unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
9264   unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC);
9265   unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
9266   unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
9267   unsigned Ptr1Reg;
9268   unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC);
9269
9270   //  thisMBB:
9271   //   ...
9272   //   fallthrough --> loopMBB
9273   BB->addSuccessor(loopMBB);
9274
9275   // The 4-byte load must be aligned, while a char or short may be
9276   // anywhere in the word.  Hence all this nasty bookkeeping code.
9277   //   add ptr1, ptrA, ptrB [copy if ptrA==0]
9278   //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
9279   //   xori shift, shift1, 24 [16]
9280   //   rlwinm ptr, ptr1, 0, 0, 29
9281   //   slw incr2, incr, shift
9282   //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
9283   //   slw mask, mask2, shift
9284   //  loopMBB:
9285   //   lwarx tmpDest, ptr
9286   //   add tmp, tmpDest, incr2
9287   //   andc tmp2, tmpDest, mask
9288   //   and tmp3, tmp, mask
9289   //   or tmp4, tmp3, tmp2
9290   //   stwcx. tmp4, ptr
9291   //   bne- loopMBB
9292   //   fallthrough --> exitMBB
9293   //   srw dest, tmpDest, shift
9294   if (ptrA != ZeroReg) {
9295     Ptr1Reg = RegInfo.createVirtualRegister(RC);
9296     BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
9297       .addReg(ptrA).addReg(ptrB);
9298   } else {
9299     Ptr1Reg = ptrB;
9300   }
9301   BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
9302       .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
9303   if (!isLittleEndian)
9304     BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
9305         .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
9306   if (is64bit)
9307     BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
9308       .addReg(Ptr1Reg).addImm(0).addImm(61);
9309   else
9310     BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
9311       .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
9312   BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg)
9313       .addReg(incr).addReg(ShiftReg);
9314   if (is8bit)
9315     BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
9316   else {
9317     BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
9318     BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535);
9319   }
9320   BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
9321       .addReg(Mask2Reg).addReg(ShiftReg);
9322
9323   BB = loopMBB;
9324   BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
9325     .addReg(ZeroReg).addReg(PtrReg);
9326   if (BinOpcode)
9327     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
9328       .addReg(Incr2Reg).addReg(TmpDestReg);
9329   BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg)
9330     .addReg(TmpDestReg).addReg(MaskReg);
9331   BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg)
9332     .addReg(TmpReg).addReg(MaskReg);
9333   if (CmpOpcode) {
9334     // For unsigned comparisons, we can directly compare the shifted values.
9335     // For signed comparisons we shift and sign extend.
9336     unsigned SReg = RegInfo.createVirtualRegister(RC);
9337     BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), SReg)
9338       .addReg(TmpDestReg).addReg(MaskReg);
9339     unsigned ValueReg = SReg;
9340     unsigned CmpReg = Incr2Reg;
9341     if (CmpOpcode == PPC::CMPW) {
9342       ValueReg = RegInfo.createVirtualRegister(RC);
9343       BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
9344         .addReg(SReg).addReg(ShiftReg);
9345       unsigned ValueSReg = RegInfo.createVirtualRegister(RC);
9346       BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
9347         .addReg(ValueReg);
9348       ValueReg = ValueSReg;
9349       CmpReg = incr;
9350     }
9351     BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
9352       .addReg(CmpReg).addReg(ValueReg);
9353     BuildMI(BB, dl, TII->get(PPC::BCC))
9354       .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
9355     BB->addSuccessor(loop2MBB);
9356     BB->addSuccessor(exitMBB);
9357     BB = loop2MBB;
9358   }
9359   BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
9360     .addReg(Tmp3Reg).addReg(Tmp2Reg);
9361   BuildMI(BB, dl, TII->get(PPC::STWCX))
9362     .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
9363   BuildMI(BB, dl, TII->get(PPC::BCC))
9364     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
9365   BB->addSuccessor(loopMBB);
9366   BB->addSuccessor(exitMBB);
9367
9368   //  exitMBB:
9369   //   ...
9370   BB = exitMBB;
9371   BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg)
9372     .addReg(ShiftReg);
9373   return BB;
9374 }
9375
9376 llvm::MachineBasicBlock *
9377 PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
9378                                     MachineBasicBlock *MBB) const {
9379   DebugLoc DL = MI.getDebugLoc();
9380   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
9381   const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
9382
9383   MachineFunction *MF = MBB->getParent();
9384   MachineRegisterInfo &MRI = MF->getRegInfo();
9385
9386   const BasicBlock *BB = MBB->getBasicBlock();
9387   MachineFunction::iterator I = ++MBB->getIterator();
9388
9389   // Memory Reference
9390   MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
9391   MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
9392
9393   unsigned DstReg = MI.getOperand(0).getReg();
9394   const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
9395   assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
9396   unsigned mainDstReg = MRI.createVirtualRegister(RC);
9397   unsigned restoreDstReg = MRI.createVirtualRegister(RC);
9398
9399   MVT PVT = getPointerTy(MF->getDataLayout());
9400   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
9401          "Invalid Pointer Size!");
9402   // For v = setjmp(buf), we generate
9403   //
9404   // thisMBB:
9405   //  SjLjSetup mainMBB
9406   //  bl mainMBB
9407   //  v_restore = 1
9408   //  b sinkMBB
9409   //
9410   // mainMBB:
9411   //  buf[LabelOffset] = LR
9412   //  v_main = 0
9413   //
9414   // sinkMBB:
9415   //  v = phi(main, restore)
9416   //
9417
9418   MachineBasicBlock *thisMBB = MBB;
9419   MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
9420   MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
9421   MF->insert(I, mainMBB);
9422   MF->insert(I, sinkMBB);
9423
9424   MachineInstrBuilder MIB;
9425
9426   // Transfer the remainder of BB and its successor edges to sinkMBB.
9427   sinkMBB->splice(sinkMBB->begin(), MBB,
9428                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());
9429   sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
9430
9431   // Note that the structure of the jmp_buf used here is not compatible
9432   // with that used by libc, and is not designed to be. Specifically, it
9433   // stores only those 'reserved' registers that LLVM does not otherwise
9434   // understand how to spill. Also, by convention, by the time this
9435   // intrinsic is called, Clang has already stored the frame address in the
9436   // first slot of the buffer and stack address in the third. Following the
9437   // X86 target code, we'll store the jump address in the second slot. We also
9438   // need to save the TOC pointer (R2) to handle jumps between shared
9439   // libraries, and that will be stored in the fourth slot. The thread
9440   // identifier (R13) is not affected.
9441
9442   // thisMBB:
9443   const int64_t LabelOffset = 1 * PVT.getStoreSize();
9444   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
9445   const int64_t BPOffset    = 4 * PVT.getStoreSize();
9446
9447   // Prepare IP either in reg.
9448   const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
9449   unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
9450   unsigned BufReg = MI.getOperand(1).getReg();
9451
9452   if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) {
9453     setUsesTOCBasePtr(*MBB->getParent());
9454     MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
9455             .addReg(PPC::X2)
9456             .addImm(TOCOffset)
9457             .addReg(BufReg);
9458     MIB.setMemRefs(MMOBegin, MMOEnd);
9459   }
9460
9461   // Naked functions never have a base pointer, and so we use r1. For all
9462   // other functions, this decision must be delayed until during PEI.
9463   unsigned BaseReg;
9464   if (MF->getFunction()->hasFnAttribute(Attribute::Naked))
9465     BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
9466   else
9467     BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
9468
9469   MIB = BuildMI(*thisMBB, MI, DL,
9470                 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
9471             .addReg(BaseReg)
9472             .addImm(BPOffset)
9473             .addReg(BufReg);
9474   MIB.setMemRefs(MMOBegin, MMOEnd);
9475
9476   // Setup
9477   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
9478   MIB.addRegMask(TRI->getNoPreservedMask());
9479
9480   BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
9481
9482   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
9483           .addMBB(mainMBB);
9484   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
9485
9486   thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
9487   thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
9488
9489   // mainMBB:
9490   //  mainDstReg = 0
9491   MIB =
9492       BuildMI(mainMBB, DL,
9493               TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
9494
9495   // Store IP
9496   if (Subtarget.isPPC64()) {
9497     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
9498             .addReg(LabelReg)
9499             .addImm(LabelOffset)
9500             .addReg(BufReg);
9501   } else {
9502     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
9503             .addReg(LabelReg)
9504             .addImm(LabelOffset)
9505             .addReg(BufReg);
9506   }
9507
9508   MIB.setMemRefs(MMOBegin, MMOEnd);
9509
9510   BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
9511   mainMBB->addSuccessor(sinkMBB);
9512
9513   // sinkMBB:
9514   BuildMI(*sinkMBB, sinkMBB->begin(), DL,
9515           TII->get(PPC::PHI), DstReg)
9516     .addReg(mainDstReg).addMBB(mainMBB)
9517     .addReg(restoreDstReg).addMBB(thisMBB);
9518
9519   MI.eraseFromParent();
9520   return sinkMBB;
9521 }
9522
9523 MachineBasicBlock *
9524 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
9525                                      MachineBasicBlock *MBB) const {
9526   DebugLoc DL = MI.getDebugLoc();
9527   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
9528
9529   MachineFunction *MF = MBB->getParent();
9530   MachineRegisterInfo &MRI = MF->getRegInfo();
9531
9532   // Memory Reference
9533   MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
9534   MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
9535
9536   MVT PVT = getPointerTy(MF->getDataLayout());
9537   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
9538          "Invalid Pointer Size!");
9539
9540   const TargetRegisterClass *RC =
9541     (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
9542   unsigned Tmp = MRI.createVirtualRegister(RC);
9543   // Since FP is only updated here but NOT referenced, it's treated as GPR.
9544   unsigned FP  = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
9545   unsigned SP  = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
9546   unsigned BP =
9547       (PVT == MVT::i64)
9548           ? PPC::X30
9549           : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
9550                                                               : PPC::R30);
9551
9552   MachineInstrBuilder MIB;
9553
9554   const int64_t LabelOffset = 1 * PVT.getStoreSize();
9555   const int64_t SPOffset    = 2 * PVT.getStoreSize();
9556   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
9557   const int64_t BPOffset    = 4 * PVT.getStoreSize();
9558
9559   unsigned BufReg = MI.getOperand(0).getReg();
9560
9561   // Reload FP (the jumped-to function may not have had a
9562   // frame pointer, and if so, then its r31 will be restored
9563   // as necessary).
9564   if (PVT == MVT::i64) {
9565     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
9566             .addImm(0)
9567             .addReg(BufReg);
9568   } else {
9569     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
9570             .addImm(0)
9571             .addReg(BufReg);
9572   }
9573   MIB.setMemRefs(MMOBegin, MMOEnd);
9574
9575   // Reload IP
9576   if (PVT == MVT::i64) {
9577     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
9578             .addImm(LabelOffset)
9579             .addReg(BufReg);
9580   } else {
9581     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
9582             .addImm(LabelOffset)
9583             .addReg(BufReg);
9584   }
9585   MIB.setMemRefs(MMOBegin, MMOEnd);
9586
9587   // Reload SP
9588   if (PVT == MVT::i64) {
9589     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
9590             .addImm(SPOffset)
9591             .addReg(BufReg);
9592   } else {
9593     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
9594             .addImm(SPOffset)
9595             .addReg(BufReg);
9596   }
9597   MIB.setMemRefs(MMOBegin, MMOEnd);
9598
9599   // Reload BP
9600   if (PVT == MVT::i64) {
9601     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
9602             .addImm(BPOffset)
9603             .addReg(BufReg);
9604   } else {
9605     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
9606             .addImm(BPOffset)
9607             .addReg(BufReg);
9608   }
9609   MIB.setMemRefs(MMOBegin, MMOEnd);
9610
9611   // Reload TOC
9612   if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
9613     setUsesTOCBasePtr(*MBB->getParent());
9614     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
9615             .addImm(TOCOffset)
9616             .addReg(BufReg);
9617
9618     MIB.setMemRefs(MMOBegin, MMOEnd);
9619   }
9620
9621   // Jump
9622   BuildMI(*MBB, MI, DL,
9623           TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
9624   BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
9625
9626   MI.eraseFromParent();
9627   return MBB;
9628 }
9629
9630 MachineBasicBlock *
9631 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
9632                                                MachineBasicBlock *BB) const {
9633   if (MI.getOpcode() == TargetOpcode::STACKMAP ||
9634       MI.getOpcode() == TargetOpcode::PATCHPOINT) {
9635     if (Subtarget.isPPC64() && Subtarget.isSVR4ABI() &&
9636         MI.getOpcode() == TargetOpcode::PATCHPOINT) {
9637       // Call lowering should have added an r2 operand to indicate a dependence
9638       // on the TOC base pointer value. It can't however, because there is no
9639       // way to mark the dependence as implicit there, and so the stackmap code
9640       // will confuse it with a regular operand. Instead, add the dependence
9641       // here.
9642       setUsesTOCBasePtr(*BB->getParent());
9643       MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
9644     }
9645
9646     return emitPatchPoint(MI, BB);
9647   }
9648
9649   if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
9650       MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
9651     return emitEHSjLjSetJmp(MI, BB);
9652   } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
9653              MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
9654     return emitEHSjLjLongJmp(MI, BB);
9655   }
9656
9657   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
9658
9659   // To "insert" these instructions we actually have to insert their
9660   // control-flow patterns.
9661   const BasicBlock *LLVM_BB = BB->getBasicBlock();
9662   MachineFunction::iterator It = ++BB->getIterator();
9663
9664   MachineFunction *F = BB->getParent();
9665
9666   if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
9667        MI.getOpcode() == PPC::SELECT_CC_I8 ||
9668        MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8) {
9669     SmallVector<MachineOperand, 2> Cond;
9670     if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
9671         MI.getOpcode() == PPC::SELECT_CC_I8)
9672       Cond.push_back(MI.getOperand(4));
9673     else
9674       Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
9675     Cond.push_back(MI.getOperand(1));
9676
9677     DebugLoc dl = MI.getDebugLoc();
9678     TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
9679                       MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
9680   } else if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
9681              MI.getOpcode() == PPC::SELECT_CC_I8 ||
9682              MI.getOpcode() == PPC::SELECT_CC_F4 ||
9683              MI.getOpcode() == PPC::SELECT_CC_F8 ||
9684              MI.getOpcode() == PPC::SELECT_CC_QFRC ||
9685              MI.getOpcode() == PPC::SELECT_CC_QSRC ||
9686              MI.getOpcode() == PPC::SELECT_CC_QBRC ||
9687              MI.getOpcode() == PPC::SELECT_CC_VRRC ||
9688              MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
9689              MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
9690              MI.getOpcode() == PPC::SELECT_CC_VSRC ||
9691              MI.getOpcode() == PPC::SELECT_I4 ||
9692              MI.getOpcode() == PPC::SELECT_I8 ||
9693              MI.getOpcode() == PPC::SELECT_F4 ||
9694              MI.getOpcode() == PPC::SELECT_F8 ||
9695              MI.getOpcode() == PPC::SELECT_QFRC ||
9696              MI.getOpcode() == PPC::SELECT_QSRC ||
9697              MI.getOpcode() == PPC::SELECT_QBRC ||
9698              MI.getOpcode() == PPC::SELECT_VRRC ||
9699              MI.getOpcode() == PPC::SELECT_VSFRC ||
9700              MI.getOpcode() == PPC::SELECT_VSSRC ||
9701              MI.getOpcode() == PPC::SELECT_VSRC) {
9702     // The incoming instruction knows the destination vreg to set, the
9703     // condition code register to branch on, the true/false values to
9704     // select between, and a branch opcode to use.
9705
9706     //  thisMBB:
9707     //  ...
9708     //   TrueVal = ...
9709     //   cmpTY ccX, r1, r2
9710     //   bCC copy1MBB
9711     //   fallthrough --> copy0MBB
9712     MachineBasicBlock *thisMBB = BB;
9713     MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
9714     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
9715     DebugLoc dl = MI.getDebugLoc();
9716     F->insert(It, copy0MBB);
9717     F->insert(It, sinkMBB);
9718
9719     // Transfer the remainder of BB and its successor edges to sinkMBB.
9720     sinkMBB->splice(sinkMBB->begin(), BB,
9721                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
9722     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
9723
9724     // Next, add the true and fallthrough blocks as its successors.
9725     BB->addSuccessor(copy0MBB);
9726     BB->addSuccessor(sinkMBB);
9727
9728     if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
9729         MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||
9730         MI.getOpcode() == PPC::SELECT_QFRC ||
9731         MI.getOpcode() == PPC::SELECT_QSRC ||
9732         MI.getOpcode() == PPC::SELECT_QBRC ||
9733         MI.getOpcode() == PPC::SELECT_VRRC ||
9734         MI.getOpcode() == PPC::SELECT_VSFRC ||
9735         MI.getOpcode() == PPC::SELECT_VSSRC ||
9736         MI.getOpcode() == PPC::SELECT_VSRC) {
9737       BuildMI(BB, dl, TII->get(PPC::BC))
9738           .addReg(MI.getOperand(1).getReg())
9739           .addMBB(sinkMBB);
9740     } else {
9741       unsigned SelectPred = MI.getOperand(4).getImm();
9742       BuildMI(BB, dl, TII->get(PPC::BCC))
9743           .addImm(SelectPred)
9744           .addReg(MI.getOperand(1).getReg())
9745           .addMBB(sinkMBB);
9746     }
9747
9748     //  copy0MBB:
9749     //   %FalseValue = ...
9750     //   # fallthrough to sinkMBB
9751     BB = copy0MBB;
9752
9753     // Update machine-CFG edges
9754     BB->addSuccessor(sinkMBB);
9755
9756     //  sinkMBB:
9757     //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
9758     //  ...
9759     BB = sinkMBB;
9760     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
9761         .addReg(MI.getOperand(3).getReg())
9762         .addMBB(copy0MBB)
9763         .addReg(MI.getOperand(2).getReg())
9764         .addMBB(thisMBB);
9765   } else if (MI.getOpcode() == PPC::ReadTB) {
9766     // To read the 64-bit time-base register on a 32-bit target, we read the
9767     // two halves. Should the counter have wrapped while it was being read, we
9768     // need to try again.
9769     // ...
9770     // readLoop:
9771     // mfspr Rx,TBU # load from TBU
9772     // mfspr Ry,TB  # load from TB
9773     // mfspr Rz,TBU # load from TBU
9774     // cmpw crX,Rx,Rz # check if 'old'='new'
9775     // bne readLoop   # branch if they're not equal
9776     // ...
9777
9778     MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
9779     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
9780     DebugLoc dl = MI.getDebugLoc();
9781     F->insert(It, readMBB);
9782     F->insert(It, sinkMBB);
9783
9784     // Transfer the remainder of BB and its successor edges to sinkMBB.
9785     sinkMBB->splice(sinkMBB->begin(), BB,
9786                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
9787     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
9788
9789     BB->addSuccessor(readMBB);
9790     BB = readMBB;
9791
9792     MachineRegisterInfo &RegInfo = F->getRegInfo();
9793     unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
9794     unsigned LoReg = MI.getOperand(0).getReg();
9795     unsigned HiReg = MI.getOperand(1).getReg();
9796
9797     BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
9798     BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
9799     BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
9800
9801     unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
9802
9803     BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
9804       .addReg(HiReg).addReg(ReadAgainReg);
9805     BuildMI(BB, dl, TII->get(PPC::BCC))
9806       .addImm(PPC::PRED_NE).addReg(CmpReg).addMBB(readMBB);
9807
9808     BB->addSuccessor(readMBB);
9809     BB->addSuccessor(sinkMBB);
9810   } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
9811     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
9812   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
9813     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
9814   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
9815     BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
9816   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
9817     BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
9818
9819   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
9820     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
9821   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
9822     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
9823   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
9824     BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
9825   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
9826     BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
9827
9828   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
9829     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
9830   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
9831     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
9832   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
9833     BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
9834   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
9835     BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
9836
9837   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
9838     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
9839   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
9840     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
9841   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
9842     BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
9843   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
9844     BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
9845
9846   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
9847     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
9848   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
9849     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
9850   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
9851     BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
9852   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
9853     BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
9854
9855   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
9856     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
9857   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
9858     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
9859   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
9860     BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
9861   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
9862     BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
9863
9864   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
9865     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE);
9866   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
9867     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE);
9868   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
9869     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE);
9870   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
9871     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE);
9872
9873   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
9874     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE);
9875   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
9876     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE);
9877   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
9878     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE);
9879   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
9880     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE);
9881
9882   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
9883     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE);
9884   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
9885     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE);
9886   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
9887     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE);
9888   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
9889     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE);
9890
9891   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
9892     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE);
9893   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
9894     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE);
9895   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
9896     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE);
9897   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
9898     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE);
9899
9900   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
9901     BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
9902   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
9903     BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
9904   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
9905     BB = EmitAtomicBinary(MI, BB, 4, 0);
9906   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
9907     BB = EmitAtomicBinary(MI, BB, 8, 0);
9908   else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
9909            MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
9910            (Subtarget.hasPartwordAtomics() &&
9911             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
9912            (Subtarget.hasPartwordAtomics() &&
9913             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
9914     bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
9915
9916     auto LoadMnemonic = PPC::LDARX;
9917     auto StoreMnemonic = PPC::STDCX;
9918     switch (MI.getOpcode()) {
9919     default:
9920       llvm_unreachable("Compare and swap of unknown size");
9921     case PPC::ATOMIC_CMP_SWAP_I8:
9922       LoadMnemonic = PPC::LBARX;
9923       StoreMnemonic = PPC::STBCX;
9924       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
9925       break;
9926     case PPC::ATOMIC_CMP_SWAP_I16:
9927       LoadMnemonic = PPC::LHARX;
9928       StoreMnemonic = PPC::STHCX;
9929       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
9930       break;
9931     case PPC::ATOMIC_CMP_SWAP_I32:
9932       LoadMnemonic = PPC::LWARX;
9933       StoreMnemonic = PPC::STWCX;
9934       break;
9935     case PPC::ATOMIC_CMP_SWAP_I64:
9936       LoadMnemonic = PPC::LDARX;
9937       StoreMnemonic = PPC::STDCX;
9938       break;
9939     }
9940     unsigned dest = MI.getOperand(0).getReg();
9941     unsigned ptrA = MI.getOperand(1).getReg();
9942     unsigned ptrB = MI.getOperand(2).getReg();
9943     unsigned oldval = MI.getOperand(3).getReg();
9944     unsigned newval = MI.getOperand(4).getReg();
9945     DebugLoc dl = MI.getDebugLoc();
9946
9947     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
9948     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
9949     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
9950     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
9951     F->insert(It, loop1MBB);
9952     F->insert(It, loop2MBB);
9953     F->insert(It, midMBB);
9954     F->insert(It, exitMBB);
9955     exitMBB->splice(exitMBB->begin(), BB,
9956                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
9957     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
9958
9959     //  thisMBB:
9960     //   ...
9961     //   fallthrough --> loopMBB
9962     BB->addSuccessor(loop1MBB);
9963
9964     // loop1MBB:
9965     //   l[bhwd]arx dest, ptr
9966     //   cmp[wd] dest, oldval
9967     //   bne- midMBB
9968     // loop2MBB:
9969     //   st[bhwd]cx. newval, ptr
9970     //   bne- loopMBB
9971     //   b exitBB
9972     // midMBB:
9973     //   st[bhwd]cx. dest, ptr
9974     // exitBB:
9975     BB = loop1MBB;
9976     BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
9977       .addReg(ptrA).addReg(ptrB);
9978     BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
9979       .addReg(oldval).addReg(dest);
9980     BuildMI(BB, dl, TII->get(PPC::BCC))
9981       .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
9982     BB->addSuccessor(loop2MBB);
9983     BB->addSuccessor(midMBB);
9984
9985     BB = loop2MBB;
9986     BuildMI(BB, dl, TII->get(StoreMnemonic))
9987       .addReg(newval).addReg(ptrA).addReg(ptrB);
9988     BuildMI(BB, dl, TII->get(PPC::BCC))
9989       .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
9990     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
9991     BB->addSuccessor(loop1MBB);
9992     BB->addSuccessor(exitMBB);
9993
9994     BB = midMBB;
9995     BuildMI(BB, dl, TII->get(StoreMnemonic))
9996       .addReg(dest).addReg(ptrA).addReg(ptrB);
9997     BB->addSuccessor(exitMBB);
9998
9999     //  exitMBB:
10000     //   ...
10001     BB = exitMBB;
10002   } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
10003              MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
10004     // We must use 64-bit registers for addresses when targeting 64-bit,
10005     // since we're actually doing arithmetic on them.  Other registers
10006     // can be 32-bit.
10007     bool is64bit = Subtarget.isPPC64();
10008     bool isLittleEndian = Subtarget.isLittleEndian();
10009     bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
10010
10011     unsigned dest = MI.getOperand(0).getReg();
10012     unsigned ptrA = MI.getOperand(1).getReg();
10013     unsigned ptrB = MI.getOperand(2).getReg();
10014     unsigned oldval = MI.getOperand(3).getReg();
10015     unsigned newval = MI.getOperand(4).getReg();
10016     DebugLoc dl = MI.getDebugLoc();
10017
10018     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
10019     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
10020     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
10021     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
10022     F->insert(It, loop1MBB);
10023     F->insert(It, loop2MBB);
10024     F->insert(It, midMBB);
10025     F->insert(It, exitMBB);
10026     exitMBB->splice(exitMBB->begin(), BB,
10027                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
10028     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
10029
10030     MachineRegisterInfo &RegInfo = F->getRegInfo();
10031     const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass
10032                                             : &PPC::GPRCRegClass;
10033     unsigned PtrReg = RegInfo.createVirtualRegister(RC);
10034     unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
10035     unsigned ShiftReg =
10036       isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC);
10037     unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
10038     unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
10039     unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
10040     unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC);
10041     unsigned MaskReg = RegInfo.createVirtualRegister(RC);
10042     unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
10043     unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
10044     unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
10045     unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
10046     unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
10047     unsigned Ptr1Reg;
10048     unsigned TmpReg = RegInfo.createVirtualRegister(RC);
10049     unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
10050     //  thisMBB:
10051     //   ...
10052     //   fallthrough --> loopMBB
10053     BB->addSuccessor(loop1MBB);
10054
10055     // The 4-byte load must be aligned, while a char or short may be
10056     // anywhere in the word.  Hence all this nasty bookkeeping code.
10057     //   add ptr1, ptrA, ptrB [copy if ptrA==0]
10058     //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
10059     //   xori shift, shift1, 24 [16]
10060     //   rlwinm ptr, ptr1, 0, 0, 29
10061     //   slw newval2, newval, shift
10062     //   slw oldval2, oldval,shift
10063     //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
10064     //   slw mask, mask2, shift
10065     //   and newval3, newval2, mask
10066     //   and oldval3, oldval2, mask
10067     // loop1MBB:
10068     //   lwarx tmpDest, ptr
10069     //   and tmp, tmpDest, mask
10070     //   cmpw tmp, oldval3
10071     //   bne- midMBB
10072     // loop2MBB:
10073     //   andc tmp2, tmpDest, mask
10074     //   or tmp4, tmp2, newval3
10075     //   stwcx. tmp4, ptr
10076     //   bne- loop1MBB
10077     //   b exitBB
10078     // midMBB:
10079     //   stwcx. tmpDest, ptr
10080     // exitBB:
10081     //   srw dest, tmpDest, shift
10082     if (ptrA != ZeroReg) {
10083       Ptr1Reg = RegInfo.createVirtualRegister(RC);
10084       BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
10085         .addReg(ptrA).addReg(ptrB);
10086     } else {
10087       Ptr1Reg = ptrB;
10088     }
10089     BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
10090         .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
10091     if (!isLittleEndian)
10092       BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
10093           .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
10094     if (is64bit)
10095       BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
10096         .addReg(Ptr1Reg).addImm(0).addImm(61);
10097     else
10098       BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
10099         .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
10100     BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
10101         .addReg(newval).addReg(ShiftReg);
10102     BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
10103         .addReg(oldval).addReg(ShiftReg);
10104     if (is8bit)
10105       BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
10106     else {
10107       BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
10108       BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
10109         .addReg(Mask3Reg).addImm(65535);
10110     }
10111     BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
10112         .addReg(Mask2Reg).addReg(ShiftReg);
10113     BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
10114         .addReg(NewVal2Reg).addReg(MaskReg);
10115     BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
10116         .addReg(OldVal2Reg).addReg(MaskReg);
10117
10118     BB = loop1MBB;
10119     BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
10120         .addReg(ZeroReg).addReg(PtrReg);
10121     BuildMI(BB, dl, TII->get(PPC::AND),TmpReg)
10122         .addReg(TmpDestReg).addReg(MaskReg);
10123     BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
10124         .addReg(TmpReg).addReg(OldVal3Reg);
10125     BuildMI(BB, dl, TII->get(PPC::BCC))
10126         .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
10127     BB->addSuccessor(loop2MBB);
10128     BB->addSuccessor(midMBB);
10129
10130     BB = loop2MBB;
10131     BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg)
10132         .addReg(TmpDestReg).addReg(MaskReg);
10133     BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg)
10134         .addReg(Tmp2Reg).addReg(NewVal3Reg);
10135     BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg)
10136         .addReg(ZeroReg).addReg(PtrReg);
10137     BuildMI(BB, dl, TII->get(PPC::BCC))
10138       .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
10139     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
10140     BB->addSuccessor(loop1MBB);
10141     BB->addSuccessor(exitMBB);
10142
10143     BB = midMBB;
10144     BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg)
10145       .addReg(ZeroReg).addReg(PtrReg);
10146     BB->addSuccessor(exitMBB);
10147
10148     //  exitMBB:
10149     //   ...
10150     BB = exitMBB;
10151     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
10152       .addReg(ShiftReg);
10153   } else if (MI.getOpcode() == PPC::FADDrtz) {
10154     // This pseudo performs an FADD with rounding mode temporarily forced
10155     // to round-to-zero.  We emit this via custom inserter since the FPSCR
10156     // is not modeled at the SelectionDAG level.
10157     unsigned Dest = MI.getOperand(0).getReg();
10158     unsigned Src1 = MI.getOperand(1).getReg();
10159     unsigned Src2 = MI.getOperand(2).getReg();
10160     DebugLoc dl = MI.getDebugLoc();
10161
10162     MachineRegisterInfo &RegInfo = F->getRegInfo();
10163     unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
10164
10165     // Save FPSCR value.
10166     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
10167
10168     // Set rounding mode to round-to-zero.
10169     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31);
10170     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30);
10171
10172     // Perform addition.
10173     BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
10174
10175     // Restore FPSCR value.
10176     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
10177   } else if (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT ||
10178              MI.getOpcode() == PPC::ANDIo_1_GT_BIT ||
10179              MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
10180              MI.getOpcode() == PPC::ANDIo_1_GT_BIT8) {
10181     unsigned Opcode = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
10182                        MI.getOpcode() == PPC::ANDIo_1_GT_BIT8)
10183                           ? PPC::ANDIo8
10184                           : PPC::ANDIo;
10185     bool isEQ = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT ||
10186                  MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8);
10187
10188     MachineRegisterInfo &RegInfo = F->getRegInfo();
10189     unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ?
10190                                                   &PPC::GPRCRegClass :
10191                                                   &PPC::G8RCRegClass);
10192
10193     DebugLoc dl = MI.getDebugLoc();
10194     BuildMI(*BB, MI, dl, TII->get(Opcode), Dest)
10195         .addReg(MI.getOperand(1).getReg())
10196         .addImm(1);
10197     BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY),
10198             MI.getOperand(0).getReg())
10199         .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT);
10200   } else if (MI.getOpcode() == PPC::TCHECK_RET) {
10201     DebugLoc Dl = MI.getDebugLoc();
10202     MachineRegisterInfo &RegInfo = F->getRegInfo();
10203     unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
10204     BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
10205     return BB;
10206   } else {
10207     llvm_unreachable("Unexpected instr type to insert");
10208   }
10209
10210   MI.eraseFromParent(); // The pseudo instruction is gone now.
10211   return BB;
10212 }
10213
10214 //===----------------------------------------------------------------------===//
10215 // Target Optimization Hooks
10216 //===----------------------------------------------------------------------===//
10217
10218 static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
10219   // For the estimates, convergence is quadratic, so we essentially double the
10220   // number of digits correct after every iteration. For both FRE and FRSQRTE,
10221   // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
10222   // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
10223   int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
10224   if (VT.getScalarType() == MVT::f64)
10225     RefinementSteps++;
10226   return RefinementSteps;
10227 }
10228
10229 SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
10230                                            int Enabled, int &RefinementSteps,
10231                                            bool &UseOneConstNR,
10232                                            bool Reciprocal) const {
10233   EVT VT = Operand.getValueType();
10234   if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
10235       (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
10236       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
10237       (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
10238       (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
10239       (VT == MVT::v4f64 && Subtarget.hasQPX())) {
10240     if (RefinementSteps == ReciprocalEstimate::Unspecified)
10241       RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
10242
10243     UseOneConstNR = true;
10244     return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
10245   }
10246   return SDValue();
10247 }
10248
10249 SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
10250                                             int Enabled,
10251                                             int &RefinementSteps) const {
10252   EVT VT = Operand.getValueType();
10253   if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
10254       (VT == MVT::f64 && Subtarget.hasFRE()) ||
10255       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
10256       (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
10257       (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
10258       (VT == MVT::v4f64 && Subtarget.hasQPX())) {
10259     if (RefinementSteps == ReciprocalEstimate::Unspecified)
10260       RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
10261     return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
10262   }
10263   return SDValue();
10264 }
10265
10266 unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
10267   // Note: This functionality is used only when unsafe-fp-math is enabled, and
10268   // on cores with reciprocal estimates (which are used when unsafe-fp-math is
10269   // enabled for division), this functionality is redundant with the default
10270   // combiner logic (once the division -> reciprocal/multiply transformation
10271   // has taken place). As a result, this matters more for older cores than for
10272   // newer ones.
10273
10274   // Combine multiple FDIVs with the same divisor into multiple FMULs by the
10275   // reciprocal if there are two or more FDIVs (for embedded cores with only
10276   // one FP pipeline) for three or more FDIVs (for generic OOO cores).
10277   switch (Subtarget.getDarwinDirective()) {
10278   default:
10279     return 3;
10280   case PPC::DIR_440:
10281   case PPC::DIR_A2:
10282   case PPC::DIR_E500mc:
10283   case PPC::DIR_E5500:
10284     return 2;
10285   }
10286 }
10287
10288 // isConsecutiveLSLoc needs to work even if all adds have not yet been
10289 // collapsed, and so we need to look through chains of them.
10290 static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,
10291                                      int64_t& Offset, SelectionDAG &DAG) {
10292   if (DAG.isBaseWithConstantOffset(Loc)) {
10293     Base = Loc.getOperand(0);
10294     Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
10295
10296     // The base might itself be a base plus an offset, and if so, accumulate
10297     // that as well.
10298     getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
10299   }
10300 }
10301
10302 static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
10303                             unsigned Bytes, int Dist,
10304                             SelectionDAG &DAG) {
10305   if (VT.getSizeInBits() / 8 != Bytes)
10306     return false;
10307
10308   SDValue BaseLoc = Base->getBasePtr();
10309   if (Loc.getOpcode() == ISD::FrameIndex) {
10310     if (BaseLoc.getOpcode() != ISD::FrameIndex)
10311       return false;
10312     const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
10313     int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
10314     int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
10315     int FS  = MFI.getObjectSize(FI);
10316     int BFS = MFI.getObjectSize(BFI);
10317     if (FS != BFS || FS != (int)Bytes) return false;
10318     return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
10319   }
10320
10321   SDValue Base1 = Loc, Base2 = BaseLoc;
10322   int64_t Offset1 = 0, Offset2 = 0;
10323   getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
10324   getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
10325   if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
10326     return true;
10327
10328   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10329   const GlobalValue *GV1 = nullptr;
10330   const GlobalValue *GV2 = nullptr;
10331   Offset1 = 0;
10332   Offset2 = 0;
10333   bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
10334   bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
10335   if (isGA1 && isGA2 && GV1 == GV2)
10336     return Offset1 == (Offset2 + Dist*Bytes);
10337   return false;
10338 }
10339
10340 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
10341 // not enforce equality of the chain operands.
10342 static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
10343                             unsigned Bytes, int Dist,
10344                             SelectionDAG &DAG) {
10345   if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
10346     EVT VT = LS->getMemoryVT();
10347     SDValue Loc = LS->getBasePtr();
10348     return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
10349   }
10350
10351   if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
10352     EVT VT;
10353     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
10354     default: return false;
10355     case Intrinsic::ppc_qpx_qvlfd:
10356     case Intrinsic::ppc_qpx_qvlfda:
10357       VT = MVT::v4f64;
10358       break;
10359     case Intrinsic::ppc_qpx_qvlfs:
10360     case Intrinsic::ppc_qpx_qvlfsa:
10361       VT = MVT::v4f32;
10362       break;
10363     case Intrinsic::ppc_qpx_qvlfcd:
10364     case Intrinsic::ppc_qpx_qvlfcda:
10365       VT = MVT::v2f64;
10366       break;
10367     case Intrinsic::ppc_qpx_qvlfcs:
10368     case Intrinsic::ppc_qpx_qvlfcsa:
10369       VT = MVT::v2f32;
10370       break;
10371     case Intrinsic::ppc_qpx_qvlfiwa:
10372     case Intrinsic::ppc_qpx_qvlfiwz:
10373     case Intrinsic::ppc_altivec_lvx:
10374     case Intrinsic::ppc_altivec_lvxl:
10375     case Intrinsic::ppc_vsx_lxvw4x:
10376     case Intrinsic::ppc_vsx_lxvw4x_be:
10377       VT = MVT::v4i32;
10378       break;
10379     case Intrinsic::ppc_vsx_lxvd2x:
10380     case Intrinsic::ppc_vsx_lxvd2x_be:
10381       VT = MVT::v2f64;
10382       break;
10383     case Intrinsic::ppc_altivec_lvebx:
10384       VT = MVT::i8;
10385       break;
10386     case Intrinsic::ppc_altivec_lvehx:
10387       VT = MVT::i16;
10388       break;
10389     case Intrinsic::ppc_altivec_lvewx:
10390       VT = MVT::i32;
10391       break;
10392     }
10393
10394     return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
10395   }
10396
10397   if (N->getOpcode() == ISD::INTRINSIC_VOID) {
10398     EVT VT;
10399     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
10400     default: return false;
10401     case Intrinsic::ppc_qpx_qvstfd:
10402     case Intrinsic::ppc_qpx_qvstfda:
10403       VT = MVT::v4f64;
10404       break;
10405     case Intrinsic::ppc_qpx_qvstfs:
10406     case Intrinsic::ppc_qpx_qvstfsa:
10407       VT = MVT::v4f32;
10408       break;
10409     case Intrinsic::ppc_qpx_qvstfcd:
10410     case Intrinsic::ppc_qpx_qvstfcda:
10411       VT = MVT::v2f64;
10412       break;
10413     case Intrinsic::ppc_qpx_qvstfcs:
10414     case Intrinsic::ppc_qpx_qvstfcsa:
10415       VT = MVT::v2f32;
10416       break;
10417     case Intrinsic::ppc_qpx_qvstfiw:
10418     case Intrinsic::ppc_qpx_qvstfiwa:
10419     case Intrinsic::ppc_altivec_stvx:
10420     case Intrinsic::ppc_altivec_stvxl:
10421     case Intrinsic::ppc_vsx_stxvw4x:
10422       VT = MVT::v4i32;
10423       break;
10424     case Intrinsic::ppc_vsx_stxvd2x:
10425       VT = MVT::v2f64;
10426       break;
10427     case Intrinsic::ppc_vsx_stxvw4x_be:
10428       VT = MVT::v4i32;
10429       break;
10430     case Intrinsic::ppc_vsx_stxvd2x_be:
10431       VT = MVT::v2f64;
10432       break;
10433     case Intrinsic::ppc_altivec_stvebx:
10434       VT = MVT::i8;
10435       break;
10436     case Intrinsic::ppc_altivec_stvehx:
10437       VT = MVT::i16;
10438       break;
10439     case Intrinsic::ppc_altivec_stvewx:
10440       VT = MVT::i32;
10441       break;
10442     }
10443
10444     return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
10445   }
10446
10447   return false;
10448 }
10449
10450 // Return true is there is a nearyby consecutive load to the one provided
10451 // (regardless of alignment). We search up and down the chain, looking though
10452 // token factors and other loads (but nothing else). As a result, a true result
10453 // indicates that it is safe to create a new consecutive load adjacent to the
10454 // load provided.
10455 static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
10456   SDValue Chain = LD->getChain();
10457   EVT VT = LD->getMemoryVT();
10458
10459   SmallSet<SDNode *, 16> LoadRoots;
10460   SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
10461   SmallSet<SDNode *, 16> Visited;
10462
10463   // First, search up the chain, branching to follow all token-factor operands.
10464   // If we find a consecutive load, then we're done, otherwise, record all
10465   // nodes just above the top-level loads and token factors.
10466   while (!Queue.empty()) {
10467     SDNode *ChainNext = Queue.pop_back_val();
10468     if (!Visited.insert(ChainNext).second)
10469       continue;
10470
10471     if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
10472       if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
10473         return true;
10474
10475       if (!Visited.count(ChainLD->getChain().getNode()))
10476         Queue.push_back(ChainLD->getChain().getNode());
10477     } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
10478       for (const SDUse &O : ChainNext->ops())
10479         if (!Visited.count(O.getNode()))
10480           Queue.push_back(O.getNode());
10481     } else
10482       LoadRoots.insert(ChainNext);
10483   }
10484
10485   // Second, search down the chain, starting from the top-level nodes recorded
10486   // in the first phase. These top-level nodes are the nodes just above all
10487   // loads and token factors. Starting with their uses, recursively look though
10488   // all loads (just the chain uses) and token factors to find a consecutive
10489   // load.
10490   Visited.clear();
10491   Queue.clear();
10492
10493   for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
10494        IE = LoadRoots.end(); I != IE; ++I) {
10495     Queue.push_back(*I);
10496
10497     while (!Queue.empty()) {
10498       SDNode *LoadRoot = Queue.pop_back_val();
10499       if (!Visited.insert(LoadRoot).second)
10500         continue;
10501
10502       if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
10503         if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
10504           return true;
10505
10506       for (SDNode::use_iterator UI = LoadRoot->use_begin(),
10507            UE = LoadRoot->use_end(); UI != UE; ++UI)
10508         if (((isa<MemSDNode>(*UI) &&
10509             cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
10510             UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
10511           Queue.push_back(*UI);
10512     }
10513   }
10514
10515   return false;
10516 }
10517
10518 /// This function is called when we have proved that a SETCC node can be replaced
10519 /// by subtraction (and other supporting instructions) so that the result of
10520 /// comparison is kept in a GPR instead of CR. This function is purely for
10521 /// codegen purposes and has some flags to guide the codegen process.
10522 static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
10523                                      bool Swap, SDLoc &DL, SelectionDAG &DAG) {
10524   assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
10525
10526   // Zero extend the operands to the largest legal integer. Originally, they
10527   // must be of a strictly smaller size.
10528   auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
10529                          DAG.getConstant(Size, DL, MVT::i32));
10530   auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
10531                          DAG.getConstant(Size, DL, MVT::i32));
10532
10533   // Swap if needed. Depends on the condition code.
10534   if (Swap)
10535     std::swap(Op0, Op1);
10536
10537   // Subtract extended integers.
10538   auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
10539
10540   // Move the sign bit to the least significant position and zero out the rest.
10541   // Now the least significant bit carries the result of original comparison.
10542   auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
10543                              DAG.getConstant(Size - 1, DL, MVT::i32));
10544   auto Final = Shifted;
10545
10546   // Complement the result if needed. Based on the condition code.
10547   if (Complement)
10548     Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
10549                         DAG.getConstant(1, DL, MVT::i64));
10550
10551   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
10552 }
10553
10554 SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
10555                                                   DAGCombinerInfo &DCI) const {
10556   assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
10557
10558   SelectionDAG &DAG = DCI.DAG;
10559   SDLoc DL(N);
10560
10561   // Size of integers being compared has a critical role in the following
10562   // analysis, so we prefer to do this when all types are legal.
10563   if (!DCI.isAfterLegalizeVectorOps())
10564     return SDValue();
10565
10566   // If all users of SETCC extend its value to a legal integer type
10567   // then we replace SETCC with a subtraction
10568   for (SDNode::use_iterator UI = N->use_begin(),
10569        UE = N->use_end(); UI != UE; ++UI) {
10570     if (UI->getOpcode() != ISD::ZERO_EXTEND)
10571       return SDValue();
10572   }
10573
10574   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
10575   auto OpSize = N->getOperand(0).getValueSizeInBits();
10576
10577   unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();
10578
10579   if (OpSize < Size) {
10580     switch (CC) {
10581     default: break;
10582     case ISD::SETULT:
10583       return generateEquivalentSub(N, Size, false, false, DL, DAG);
10584     case ISD::SETULE:
10585       return generateEquivalentSub(N, Size, true, true, DL, DAG);
10586     case ISD::SETUGT:
10587       return generateEquivalentSub(N, Size, false, true, DL, DAG);
10588     case ISD::SETUGE:
10589       return generateEquivalentSub(N, Size, true, false, DL, DAG);
10590     }
10591   }
10592
10593   return SDValue();
10594 }
10595
10596 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
10597                                                   DAGCombinerInfo &DCI) const {
10598   SelectionDAG &DAG = DCI.DAG;
10599   SDLoc dl(N);
10600
10601   assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
10602   // If we're tracking CR bits, we need to be careful that we don't have:
10603   //   trunc(binary-ops(zext(x), zext(y)))
10604   // or
10605   //   trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
10606   // such that we're unnecessarily moving things into GPRs when it would be
10607   // better to keep them in CR bits.
10608
10609   // Note that trunc here can be an actual i1 trunc, or can be the effective
10610   // truncation that comes from a setcc or select_cc.
10611   if (N->getOpcode() == ISD::TRUNCATE &&
10612       N->getValueType(0) != MVT::i1)
10613     return SDValue();
10614
10615   if (N->getOperand(0).getValueType() != MVT::i32 &&
10616       N->getOperand(0).getValueType() != MVT::i64)
10617     return SDValue();
10618
10619   if (N->getOpcode() == ISD::SETCC ||
10620       N->getOpcode() == ISD::SELECT_CC) {
10621     // If we're looking at a comparison, then we need to make sure that the
10622     // high bits (all except for the first) don't matter the result.
10623     ISD::CondCode CC =
10624       cast<CondCodeSDNode>(N->getOperand(
10625         N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
10626     unsigned OpBits = N->getOperand(0).getValueSizeInBits();
10627
10628     if (ISD::isSignedIntSetCC(CC)) {
10629       if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
10630           DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
10631         return SDValue();
10632     } else if (ISD::isUnsignedIntSetCC(CC)) {
10633       if (!DAG.MaskedValueIsZero(N->getOperand(0),
10634                                  APInt::getHighBitsSet(OpBits, OpBits-1)) ||
10635           !DAG.MaskedValueIsZero(N->getOperand(1),
10636                                  APInt::getHighBitsSet(OpBits, OpBits-1)))
10637         return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
10638                                              : SDValue());
10639     } else {
10640       // This is neither a signed nor an unsigned comparison, just make sure
10641       // that the high bits are equal.
10642       KnownBits Op1Known, Op2Known;
10643       DAG.computeKnownBits(N->getOperand(0), Op1Known);
10644       DAG.computeKnownBits(N->getOperand(1), Op2Known);
10645
10646       // We don't really care about what is known about the first bit (if
10647       // anything), so clear it in all masks prior to comparing them.
10648       Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0);
10649       Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0);
10650
10651       if (Op1Known.Zero != Op2Known.Zero || Op1Known.One != Op2Known.One)
10652         return SDValue();
10653     }
10654   }
10655
10656   // We now know that the higher-order bits are irrelevant, we just need to
10657   // make sure that all of the intermediate operations are bit operations, and
10658   // all inputs are extensions.
10659   if (N->getOperand(0).getOpcode() != ISD::AND &&
10660       N->getOperand(0).getOpcode() != ISD::OR  &&
10661       N->getOperand(0).getOpcode() != ISD::XOR &&
10662       N->getOperand(0).getOpcode() != ISD::SELECT &&
10663       N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
10664       N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
10665       N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
10666       N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
10667       N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
10668     return SDValue();
10669
10670   if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
10671       N->getOperand(1).getOpcode() != ISD::AND &&
10672       N->getOperand(1).getOpcode() != ISD::OR  &&
10673       N->getOperand(1).getOpcode() != ISD::XOR &&
10674       N->getOperand(1).getOpcode() != ISD::SELECT &&
10675       N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
10676       N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
10677       N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
10678       N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
10679       N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
10680     return SDValue();
10681
10682   SmallVector<SDValue, 4> Inputs;
10683   SmallVector<SDValue, 8> BinOps, PromOps;
10684   SmallPtrSet<SDNode *, 16> Visited;
10685
10686   for (unsigned i = 0; i < 2; ++i) {
10687     if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
10688           N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
10689           N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
10690           N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
10691         isa<ConstantSDNode>(N->getOperand(i)))
10692       Inputs.push_back(N->getOperand(i));
10693     else
10694       BinOps.push_back(N->getOperand(i));
10695
10696     if (N->getOpcode() == ISD::TRUNCATE)
10697       break;
10698   }
10699
10700   // Visit all inputs, collect all binary operations (and, or, xor and
10701   // select) that are all fed by extensions.
10702   while (!BinOps.empty()) {
10703     SDValue BinOp = BinOps.back();
10704     BinOps.pop_back();
10705
10706     if (!Visited.insert(BinOp.getNode()).second)
10707       continue;
10708
10709     PromOps.push_back(BinOp);
10710
10711     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
10712       // The condition of the select is not promoted.
10713       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
10714         continue;
10715       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
10716         continue;
10717
10718       if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
10719             BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
10720             BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
10721            BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
10722           isa<ConstantSDNode>(BinOp.getOperand(i))) {
10723         Inputs.push_back(BinOp.getOperand(i));
10724       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
10725                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
10726                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
10727                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
10728                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
10729                  BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
10730                  BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
10731                  BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
10732                  BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
10733         BinOps.push_back(BinOp.getOperand(i));
10734       } else {
10735         // We have an input that is not an extension or another binary
10736         // operation; we'll abort this transformation.
10737         return SDValue();
10738       }
10739     }
10740   }
10741
10742   // Make sure that this is a self-contained cluster of operations (which
10743   // is not quite the same thing as saying that everything has only one
10744   // use).
10745   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
10746     if (isa<ConstantSDNode>(Inputs[i]))
10747       continue;
10748
10749     for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
10750                               UE = Inputs[i].getNode()->use_end();
10751          UI != UE; ++UI) {
10752       SDNode *User = *UI;
10753       if (User != N && !Visited.count(User))
10754         return SDValue();
10755
10756       // Make sure that we're not going to promote the non-output-value
10757       // operand(s) or SELECT or SELECT_CC.
10758       // FIXME: Although we could sometimes handle this, and it does occur in
10759       // practice that one of the condition inputs to the select is also one of
10760       // the outputs, we currently can't deal with this.
10761       if (User->getOpcode() == ISD::SELECT) {
10762         if (User->getOperand(0) == Inputs[i])
10763           return SDValue();
10764       } else if (User->getOpcode() == ISD::SELECT_CC) {
10765         if (User->getOperand(0) == Inputs[i] ||
10766             User->getOperand(1) == Inputs[i])
10767           return SDValue();
10768       }
10769     }
10770   }
10771
10772   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
10773     for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
10774                               UE = PromOps[i].getNode()->use_end();
10775          UI != UE; ++UI) {
10776       SDNode *User = *UI;
10777       if (User != N && !Visited.count(User))
10778         return SDValue();
10779
10780       // Make sure that we're not going to promote the non-output-value
10781       // operand(s) or SELECT or SELECT_CC.
10782       // FIXME: Although we could sometimes handle this, and it does occur in
10783       // practice that one of the condition inputs to the select is also one of
10784       // the outputs, we currently can't deal with this.
10785       if (User->getOpcode() == ISD::SELECT) {
10786         if (User->getOperand(0) == PromOps[i])
10787           return SDValue();
10788       } else if (User->getOpcode() == ISD::SELECT_CC) {
10789         if (User->getOperand(0) == PromOps[i] ||
10790             User->getOperand(1) == PromOps[i])
10791           return SDValue();
10792       }
10793     }
10794   }
10795
10796   // Replace all inputs with the extension operand.
10797   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
10798     // Constants may have users outside the cluster of to-be-promoted nodes,
10799     // and so we need to replace those as we do the promotions.
10800     if (isa<ConstantSDNode>(Inputs[i]))
10801       continue;
10802     else
10803       DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
10804   }
10805
10806   std::list<HandleSDNode> PromOpHandles;
10807   for (auto &PromOp : PromOps)
10808     PromOpHandles.emplace_back(PromOp);
10809
10810   // Replace all operations (these are all the same, but have a different
10811   // (i1) return type). DAG.getNode will validate that the types of
10812   // a binary operator match, so go through the list in reverse so that
10813   // we've likely promoted both operands first. Any intermediate truncations or
10814   // extensions disappear.
10815   while (!PromOpHandles.empty()) {
10816     SDValue PromOp = PromOpHandles.back().getValue();
10817     PromOpHandles.pop_back();
10818
10819     if (PromOp.getOpcode() == ISD::TRUNCATE ||
10820         PromOp.getOpcode() == ISD::SIGN_EXTEND ||
10821         PromOp.getOpcode() == ISD::ZERO_EXTEND ||
10822         PromOp.getOpcode() == ISD::ANY_EXTEND) {
10823       if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
10824           PromOp.getOperand(0).getValueType() != MVT::i1) {
10825         // The operand is not yet ready (see comment below).
10826         PromOpHandles.emplace_front(PromOp);
10827         continue;
10828       }
10829
10830       SDValue RepValue = PromOp.getOperand(0);
10831       if (isa<ConstantSDNode>(RepValue))
10832         RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
10833
10834       DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
10835       continue;
10836     }
10837
10838     unsigned C;
10839     switch (PromOp.getOpcode()) {
10840     default:             C = 0; break;
10841     case ISD::SELECT:    C = 1; break;
10842     case ISD::SELECT_CC: C = 2; break;
10843     }
10844
10845     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
10846          PromOp.getOperand(C).getValueType() != MVT::i1) ||
10847         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
10848          PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
10849       // The to-be-promoted operands of this node have not yet been
10850       // promoted (this should be rare because we're going through the
10851       // list backward, but if one of the operands has several users in
10852       // this cluster of to-be-promoted nodes, it is possible).
10853       PromOpHandles.emplace_front(PromOp);
10854       continue;
10855     }
10856
10857     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
10858                                 PromOp.getNode()->op_end());
10859
10860     // If there are any constant inputs, make sure they're replaced now.
10861     for (unsigned i = 0; i < 2; ++i)
10862       if (isa<ConstantSDNode>(Ops[C+i]))
10863         Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
10864
10865     DAG.ReplaceAllUsesOfValueWith(PromOp,
10866       DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
10867   }
10868
10869   // Now we're left with the initial truncation itself.
10870   if (N->getOpcode() == ISD::TRUNCATE)
10871     return N->getOperand(0);
10872
10873   // Otherwise, this is a comparison. The operands to be compared have just
10874   // changed type (to i1), but everything else is the same.
10875   return SDValue(N, 0);
10876 }
10877
10878 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
10879                                                   DAGCombinerInfo &DCI) const {
10880   SelectionDAG &DAG = DCI.DAG;
10881   SDLoc dl(N);
10882
10883   // If we're tracking CR bits, we need to be careful that we don't have:
10884   //   zext(binary-ops(trunc(x), trunc(y)))
10885   // or
10886   //   zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
10887   // such that we're unnecessarily moving things into CR bits that can more
10888   // efficiently stay in GPRs. Note that if we're not certain that the high
10889   // bits are set as required by the final extension, we still may need to do
10890   // some masking to get the proper behavior.
10891
10892   // This same functionality is important on PPC64 when dealing with
10893   // 32-to-64-bit extensions; these occur often when 32-bit values are used as
10894   // the return values of functions. Because it is so similar, it is handled
10895   // here as well.
10896
10897   if (N->getValueType(0) != MVT::i32 &&
10898       N->getValueType(0) != MVT::i64)
10899     return SDValue();
10900
10901   if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
10902         (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
10903     return SDValue();
10904
10905   if (N->getOperand(0).getOpcode() != ISD::AND &&
10906       N->getOperand(0).getOpcode() != ISD::OR  &&
10907       N->getOperand(0).getOpcode() != ISD::XOR &&
10908       N->getOperand(0).getOpcode() != ISD::SELECT &&
10909       N->getOperand(0).getOpcode() != ISD::SELECT_CC)
10910     return SDValue();
10911
10912   SmallVector<SDValue, 4> Inputs;
10913   SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
10914   SmallPtrSet<SDNode *, 16> Visited;
10915
10916   // Visit all inputs, collect all binary operations (and, or, xor and
10917   // select) that are all fed by truncations.
10918   while (!BinOps.empty()) {
10919     SDValue BinOp = BinOps.back();
10920     BinOps.pop_back();
10921
10922     if (!Visited.insert(BinOp.getNode()).second)
10923       continue;
10924
10925     PromOps.push_back(BinOp);
10926
10927     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
10928       // The condition of the select is not promoted.
10929       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
10930         continue;
10931       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
10932         continue;
10933
10934       if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
10935           isa<ConstantSDNode>(BinOp.getOperand(i))) {
10936         Inputs.push_back(BinOp.getOperand(i));
10937       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
10938                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
10939                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
10940                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
10941                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
10942         BinOps.push_back(BinOp.getOperand(i));
10943       } else {
10944         // We have an input that is not a truncation or another binary
10945         // operation; we'll abort this transformation.
10946         return SDValue();
10947       }
10948     }
10949   }
10950
10951   // The operands of a select that must be truncated when the select is
10952   // promoted because the operand is actually part of the to-be-promoted set.
10953   DenseMap<SDNode *, EVT> SelectTruncOp[2];
10954
10955   // Make sure that this is a self-contained cluster of operations (which
10956   // is not quite the same thing as saying that everything has only one
10957   // use).
10958   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
10959     if (isa<ConstantSDNode>(Inputs[i]))
10960       continue;
10961
10962     for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
10963                               UE = Inputs[i].getNode()->use_end();
10964          UI != UE; ++UI) {
10965       SDNode *User = *UI;
10966       if (User != N && !Visited.count(User))
10967         return SDValue();
10968
10969       // If we're going to promote the non-output-value operand(s) or SELECT or
10970       // SELECT_CC, record them for truncation.
10971       if (User->getOpcode() == ISD::SELECT) {
10972         if (User->getOperand(0) == Inputs[i])
10973           SelectTruncOp[0].insert(std::make_pair(User,
10974                                     User->getOperand(0).getValueType()));
10975       } else if (User->getOpcode() == ISD::SELECT_CC) {
10976         if (User->getOperand(0) == Inputs[i])
10977           SelectTruncOp[0].insert(std::make_pair(User,
10978                                     User->getOperand(0).getValueType()));
10979         if (User->getOperand(1) == Inputs[i])
10980           SelectTruncOp[1].insert(std::make_pair(User,
10981                                     User->getOperand(1).getValueType()));
10982       }
10983     }
10984   }
10985
10986   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
10987     for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
10988                               UE = PromOps[i].getNode()->use_end();
10989          UI != UE; ++UI) {
10990       SDNode *User = *UI;
10991       if (User != N && !Visited.count(User))
10992         return SDValue();
10993
10994       // If we're going to promote the non-output-value operand(s) or SELECT or
10995       // SELECT_CC, record them for truncation.
10996       if (User->getOpcode() == ISD::SELECT) {
10997         if (User->getOperand(0) == PromOps[i])
10998           SelectTruncOp[0].insert(std::make_pair(User,
10999                                     User->getOperand(0).getValueType()));
11000       } else if (User->getOpcode() == ISD::SELECT_CC) {
11001         if (User->getOperand(0) == PromOps[i])
11002           SelectTruncOp[0].insert(std::make_pair(User,
11003                                     User->getOperand(0).getValueType()));
11004         if (User->getOperand(1) == PromOps[i])
11005           SelectTruncOp[1].insert(std::make_pair(User,
11006                                     User->getOperand(1).getValueType()));
11007       }
11008     }
11009   }
11010
11011   unsigned PromBits = N->getOperand(0).getValueSizeInBits();
11012   bool ReallyNeedsExt = false;
11013   if (N->getOpcode() != ISD::ANY_EXTEND) {
11014     // If all of the inputs are not already sign/zero extended, then
11015     // we'll still need to do that at the end.
11016     for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
11017       if (isa<ConstantSDNode>(Inputs[i]))
11018         continue;
11019
11020       unsigned OpBits =
11021         Inputs[i].getOperand(0).getValueSizeInBits();
11022       assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
11023
11024       if ((N->getOpcode() == ISD::ZERO_EXTEND &&
11025            !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
11026                                   APInt::getHighBitsSet(OpBits,
11027                                                         OpBits-PromBits))) ||
11028           (N->getOpcode() == ISD::SIGN_EXTEND &&
11029            DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
11030              (OpBits-(PromBits-1)))) {
11031         ReallyNeedsExt = true;
11032         break;
11033       }
11034     }
11035   }
11036
11037   // Replace all inputs, either with the truncation operand, or a
11038   // truncation or extension to the final output type.
11039   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
11040     // Constant inputs need to be replaced with the to-be-promoted nodes that
11041     // use them because they might have users outside of the cluster of
11042     // promoted nodes.
11043     if (isa<ConstantSDNode>(Inputs[i]))
11044       continue;
11045
11046     SDValue InSrc = Inputs[i].getOperand(0);
11047     if (Inputs[i].getValueType() == N->getValueType(0))
11048       DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
11049     else if (N->getOpcode() == ISD::SIGN_EXTEND)
11050       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
11051         DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
11052     else if (N->getOpcode() == ISD::ZERO_EXTEND)
11053       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
11054         DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
11055     else
11056       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
11057         DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
11058   }
11059
11060   std::list<HandleSDNode> PromOpHandles;
11061   for (auto &PromOp : PromOps)
11062     PromOpHandles.emplace_back(PromOp);
11063
11064   // Replace all operations (these are all the same, but have a different
11065   // (promoted) return type). DAG.getNode will validate that the types of
11066   // a binary operator match, so go through the list in reverse so that
11067   // we've likely promoted both operands first.
11068   while (!PromOpHandles.empty()) {
11069     SDValue PromOp = PromOpHandles.back().getValue();
11070     PromOpHandles.pop_back();
11071
11072     unsigned C;
11073     switch (PromOp.getOpcode()) {
11074     default:             C = 0; break;
11075     case ISD::SELECT:    C = 1; break;
11076     case ISD::SELECT_CC: C = 2; break;
11077     }
11078
11079     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
11080          PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
11081         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
11082          PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
11083       // The to-be-promoted operands of this node have not yet been
11084       // promoted (this should be rare because we're going through the
11085       // list backward, but if one of the operands has several users in
11086       // this cluster of to-be-promoted nodes, it is possible).
11087       PromOpHandles.emplace_front(PromOp);
11088       continue;
11089     }
11090
11091     // For SELECT and SELECT_CC nodes, we do a similar check for any
11092     // to-be-promoted comparison inputs.
11093     if (PromOp.getOpcode() == ISD::SELECT ||
11094         PromOp.getOpcode() == ISD::SELECT_CC) {
11095       if ((SelectTruncOp[0].count(PromOp.getNode()) &&
11096            PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
11097           (SelectTruncOp[1].count(PromOp.getNode()) &&
11098            PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
11099         PromOpHandles.emplace_front(PromOp);
11100         continue;
11101       }
11102     }
11103
11104     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
11105                                 PromOp.getNode()->op_end());
11106
11107     // If this node has constant inputs, then they'll need to be promoted here.
11108     for (unsigned i = 0; i < 2; ++i) {
11109       if (!isa<ConstantSDNode>(Ops[C+i]))
11110         continue;
11111       if (Ops[C+i].getValueType() == N->getValueType(0))
11112         continue;
11113
11114       if (N->getOpcode() == ISD::SIGN_EXTEND)
11115         Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
11116       else if (N->getOpcode() == ISD::ZERO_EXTEND)
11117         Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
11118       else
11119         Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
11120     }
11121
11122     // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
11123     // truncate them again to the original value type.
11124     if (PromOp.getOpcode() == ISD::SELECT ||
11125         PromOp.getOpcode() == ISD::SELECT_CC) {
11126       auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
11127       if (SI0 != SelectTruncOp[0].end())
11128         Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
11129       auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
11130       if (SI1 != SelectTruncOp[1].end())
11131         Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
11132     }
11133
11134     DAG.ReplaceAllUsesOfValueWith(PromOp,
11135       DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
11136   }
11137
11138   // Now we're left with the initial extension itself.
11139   if (!ReallyNeedsExt)
11140     return N->getOperand(0);
11141
11142   // To zero extend, just mask off everything except for the first bit (in the
11143   // i1 case).
11144   if (N->getOpcode() == ISD::ZERO_EXTEND)
11145     return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
11146                        DAG.getConstant(APInt::getLowBitsSet(
11147                                          N->getValueSizeInBits(0), PromBits),
11148                                        dl, N->getValueType(0)));
11149
11150   assert(N->getOpcode() == ISD::SIGN_EXTEND &&
11151          "Invalid extension type");
11152   EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
11153   SDValue ShiftCst =
11154       DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
11155   return DAG.getNode(
11156       ISD::SRA, dl, N->getValueType(0),
11157       DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
11158       ShiftCst);
11159 }
11160
11161 /// \brief Reduces the number of fp-to-int conversion when building a vector.
11162 ///
11163 /// If this vector is built out of floating to integer conversions,
11164 /// transform it to a vector built out of floating point values followed by a
11165 /// single floating to integer conversion of the vector.
11166 /// Namely  (build_vector (fptosi $A), (fptosi $B), ...)
11167 /// becomes (fptosi (build_vector ($A, $B, ...)))
11168 SDValue PPCTargetLowering::
11169 combineElementTruncationToVectorTruncation(SDNode *N,
11170                                            DAGCombinerInfo &DCI) const {
11171   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
11172          "Should be called with a BUILD_VECTOR node");
11173
11174   SelectionDAG &DAG = DCI.DAG;
11175   SDLoc dl(N);
11176
11177   SDValue FirstInput = N->getOperand(0);
11178   assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
11179          "The input operand must be an fp-to-int conversion.");
11180
11181   // This combine happens after legalization so the fp_to_[su]i nodes are
11182   // already converted to PPCSISD nodes.
11183   unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
11184   if (FirstConversion == PPCISD::FCTIDZ ||
11185       FirstConversion == PPCISD::FCTIDUZ ||
11186       FirstConversion == PPCISD::FCTIWZ ||
11187       FirstConversion == PPCISD::FCTIWUZ) {
11188     bool IsSplat = true;
11189     bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
11190       FirstConversion == PPCISD::FCTIWUZ;
11191     EVT SrcVT = FirstInput.getOperand(0).getValueType();
11192     SmallVector<SDValue, 4> Ops;
11193     EVT TargetVT = N->getValueType(0);
11194     for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
11195       if (N->getOperand(i).getOpcode() != PPCISD::MFVSR)
11196         return SDValue();
11197       unsigned NextConversion = N->getOperand(i).getOperand(0).getOpcode();
11198       if (NextConversion != FirstConversion)
11199         return SDValue();
11200       if (N->getOperand(i) != FirstInput)
11201         IsSplat = false;
11202     }
11203
11204     // If this is a splat, we leave it as-is since there will be only a single
11205     // fp-to-int conversion followed by a splat of the integer. This is better
11206     // for 32-bit and smaller ints and neutral for 64-bit ints.
11207     if (IsSplat)
11208       return SDValue();
11209
11210     // Now that we know we have the right type of node, get its operands
11211     for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
11212       SDValue In = N->getOperand(i).getOperand(0);
11213       // For 32-bit values, we need to add an FP_ROUND node.
11214       if (Is32Bit) {
11215         if (In.isUndef())
11216           Ops.push_back(DAG.getUNDEF(SrcVT));
11217         else {
11218           SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl,
11219                                       MVT::f32, In.getOperand(0),
11220                                       DAG.getIntPtrConstant(1, dl));
11221           Ops.push_back(Trunc);
11222         }
11223       } else
11224         Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
11225     }
11226
11227     unsigned Opcode;
11228     if (FirstConversion == PPCISD::FCTIDZ ||
11229         FirstConversion == PPCISD::FCTIWZ)
11230       Opcode = ISD::FP_TO_SINT;
11231     else
11232       Opcode = ISD::FP_TO_UINT;
11233
11234     EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
11235     SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
11236     return DAG.getNode(Opcode, dl, TargetVT, BV);
11237   }
11238   return SDValue();
11239 }
11240
11241 /// \brief Reduce the number of loads when building a vector.
11242 ///
11243 /// Building a vector out of multiple loads can be converted to a load
11244 /// of the vector type if the loads are consecutive. If the loads are
11245 /// consecutive but in descending order, a shuffle is added at the end
11246 /// to reorder the vector.
11247 static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
11248   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
11249          "Should be called with a BUILD_VECTOR node");
11250
11251   SDLoc dl(N);
11252   bool InputsAreConsecutiveLoads = true;
11253   bool InputsAreReverseConsecutive = true;
11254   unsigned ElemSize = N->getValueType(0).getScalarSizeInBits() / 8;
11255   SDValue FirstInput = N->getOperand(0);
11256   bool IsRoundOfExtLoad = false;
11257
11258   if (FirstInput.getOpcode() == ISD::FP_ROUND &&
11259       FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
11260     LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0));
11261     IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
11262   }
11263   // Not a build vector of (possibly fp_rounded) loads.
11264   if (!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD)
11265     return SDValue();
11266
11267   for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
11268     // If any inputs are fp_round(extload), they all must be.
11269     if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
11270       return SDValue();
11271
11272     SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
11273       N->getOperand(i);
11274     if (NextInput.getOpcode() != ISD::LOAD)
11275       return SDValue();
11276
11277     SDValue PreviousInput =
11278       IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
11279     LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput);
11280     LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput);
11281
11282     // If any inputs are fp_round(extload), they all must be.
11283     if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
11284       return SDValue();
11285
11286     if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG))
11287       InputsAreConsecutiveLoads = false;
11288     if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG))
11289       InputsAreReverseConsecutive = false;
11290
11291     // Exit early if the loads are neither consecutive nor reverse consecutive.
11292     if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
11293       return SDValue();
11294   }
11295
11296   assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
11297          "The loads cannot be both consecutive and reverse consecutive.");
11298
11299   SDValue FirstLoadOp =
11300     IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput;
11301   SDValue LastLoadOp =
11302     IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) :
11303                        N->getOperand(N->getNumOperands()-1);
11304
11305   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp);
11306   LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp);
11307   if (InputsAreConsecutiveLoads) {
11308     assert(LD1 && "Input needs to be a LoadSDNode.");
11309     return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(),
11310                        LD1->getBasePtr(), LD1->getPointerInfo(),
11311                        LD1->getAlignment());
11312   }
11313   if (InputsAreReverseConsecutive) {
11314     assert(LDL && "Input needs to be a LoadSDNode.");
11315     SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(),
11316                                LDL->getBasePtr(), LDL->getPointerInfo(),
11317                                LDL->getAlignment());
11318     SmallVector<int, 16> Ops;
11319     for (int i = N->getNumOperands() - 1; i >= 0; i--)
11320       Ops.push_back(i);
11321
11322     return DAG.getVectorShuffle(N->getValueType(0), dl, Load,
11323                                 DAG.getUNDEF(N->getValueType(0)), Ops);
11324   }
11325   return SDValue();
11326 }
11327
11328 // This function adds the required vector_shuffle needed to get
11329 // the elements of the vector extract in the correct position
11330 // as specified by the CorrectElems encoding.
11331 static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG,
11332                                       SDValue Input, uint64_t Elems,
11333                                       uint64_t CorrectElems) {
11334   SDLoc dl(N);
11335
11336   unsigned NumElems = Input.getValueType().getVectorNumElements();
11337   SmallVector<int, 16> ShuffleMask(NumElems, -1);
11338
11339   // Knowing the element indices being extracted from the original
11340   // vector and the order in which they're being inserted, just put
11341   // them at element indices required for the instruction.
11342   for (unsigned i = 0; i < N->getNumOperands(); i++) {
11343     if (DAG.getDataLayout().isLittleEndian())
11344       ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
11345     else
11346       ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
11347     CorrectElems = CorrectElems >> 8;
11348     Elems = Elems >> 8;
11349   }
11350
11351   SDValue Shuffle =
11352       DAG.getVectorShuffle(Input.getValueType(), dl, Input,
11353                            DAG.getUNDEF(Input.getValueType()), ShuffleMask);
11354
11355   EVT Ty = N->getValueType(0);
11356   SDValue BV = DAG.getNode(PPCISD::SExtVElems, dl, Ty, Shuffle);
11357   return BV;
11358 }
11359
11360 // Look for build vector patterns where input operands come from sign
11361 // extended vector_extract elements of specific indices. If the correct indices
11362 // aren't used, add a vector shuffle to fix up the indices and create a new
11363 // PPCISD:SExtVElems node which selects the vector sign extend instructions
11364 // during instruction selection.
11365 static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {
11366   // This array encodes the indices that the vector sign extend instructions
11367   // extract from when extending from one type to another for both BE and LE.
11368   // The right nibble of each byte corresponds to the LE incides.
11369   // and the left nibble of each byte corresponds to the BE incides.
11370   // For example: 0x3074B8FC  byte->word
11371   // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
11372   // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
11373   // For example: 0x000070F8  byte->double word
11374   // For LE: the allowed indices are: 0x0,0x8
11375   // For BE: the allowed indices are: 0x7,0xF
11376   uint64_t TargetElems[] = {
11377       0x3074B8FC, // b->w
11378       0x000070F8, // b->d
11379       0x10325476, // h->w
11380       0x00003074, // h->d
11381       0x00001032, // w->d
11382   };
11383
11384   uint64_t Elems = 0;
11385   int Index;
11386   SDValue Input;
11387
11388   auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
11389     if (!Op)
11390       return false;
11391     if (Op.getOpcode() != ISD::SIGN_EXTEND)
11392       return false;
11393
11394     SDValue Extract = Op.getOperand(0);
11395     if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
11396       return false;
11397
11398     ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
11399     if (!ExtOp)
11400       return false;
11401
11402     Index = ExtOp->getZExtValue();
11403     if (Input && Input != Extract.getOperand(0))
11404       return false;
11405
11406     if (!Input)
11407       Input = Extract.getOperand(0);
11408
11409     Elems = Elems << 8;
11410     Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
11411     Elems |= Index;
11412
11413     return true;
11414   };
11415
11416   // If the build vector operands aren't sign extended vector extracts,
11417   // of the same input vector, then return.
11418   for (unsigned i = 0; i < N->getNumOperands(); i++) {
11419     if (!isSExtOfVecExtract(N->getOperand(i))) {
11420       return SDValue();
11421     }
11422   }
11423
11424   // If the vector extract indicies are not correct, add the appropriate
11425   // vector_shuffle.
11426   int TgtElemArrayIdx;
11427   int InputSize = Input.getValueType().getScalarSizeInBits();
11428   int OutputSize = N->getValueType(0).getScalarSizeInBits();
11429   if (InputSize + OutputSize == 40)
11430     TgtElemArrayIdx = 0;
11431   else if (InputSize + OutputSize == 72)
11432     TgtElemArrayIdx = 1;
11433   else if (InputSize + OutputSize == 48)
11434     TgtElemArrayIdx = 2;
11435   else if (InputSize + OutputSize == 80)
11436     TgtElemArrayIdx = 3;
11437   else if (InputSize + OutputSize == 96)
11438     TgtElemArrayIdx = 4;
11439   else
11440     return SDValue();
11441
11442   uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
11443   CorrectElems = DAG.getDataLayout().isLittleEndian()
11444                      ? CorrectElems & 0x0F0F0F0F0F0F0F0F
11445                      : CorrectElems & 0xF0F0F0F0F0F0F0F0;
11446   if (Elems != CorrectElems) {
11447     return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
11448   }
11449
11450   // Regular lowering will catch cases where a shuffle is not needed.
11451   return SDValue();
11452 }
11453
11454 SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
11455                                                  DAGCombinerInfo &DCI) const {
11456   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
11457          "Should be called with a BUILD_VECTOR node");
11458
11459   SelectionDAG &DAG = DCI.DAG;
11460   SDLoc dl(N);
11461
11462   if (!Subtarget.hasVSX())
11463     return SDValue();
11464
11465   // The target independent DAG combiner will leave a build_vector of
11466   // float-to-int conversions intact. We can generate MUCH better code for
11467   // a float-to-int conversion of a vector of floats.
11468   SDValue FirstInput = N->getOperand(0);
11469   if (FirstInput.getOpcode() == PPCISD::MFVSR) {
11470     SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
11471     if (Reduced)
11472       return Reduced;
11473   }
11474
11475   // If we're building a vector out of consecutive loads, just load that
11476   // vector type.
11477   SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
11478   if (Reduced)
11479     return Reduced;
11480
11481   // If we're building a vector out of extended elements from another vector
11482   // we have P9 vector integer extend instructions.
11483   if (Subtarget.hasP9Altivec()) {
11484     Reduced = combineBVOfVecSExt(N, DAG);
11485     if (Reduced)
11486       return Reduced;
11487   }
11488
11489
11490   if (N->getValueType(0) != MVT::v2f64)
11491     return SDValue();
11492
11493   // Looking for:
11494   // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
11495   if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
11496       FirstInput.getOpcode() != ISD::UINT_TO_FP)
11497     return SDValue();
11498   if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
11499       N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
11500     return SDValue();
11501   if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
11502     return SDValue();
11503
11504   SDValue Ext1 = FirstInput.getOperand(0);
11505   SDValue Ext2 = N->getOperand(1).getOperand(0);
11506   if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
11507      Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
11508     return SDValue();
11509
11510   ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
11511   ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
11512   if (!Ext1Op || !Ext2Op)
11513     return SDValue();
11514   if (Ext1.getValueType() != MVT::i32 ||
11515       Ext2.getValueType() != MVT::i32)
11516   if (Ext1.getOperand(0) != Ext2.getOperand(0))
11517     return SDValue();
11518
11519   int FirstElem = Ext1Op->getZExtValue();
11520   int SecondElem = Ext2Op->getZExtValue();
11521   int SubvecIdx;
11522   if (FirstElem == 0 && SecondElem == 1)
11523     SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
11524   else if (FirstElem == 2 && SecondElem == 3)
11525     SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
11526   else
11527     return SDValue();
11528
11529   SDValue SrcVec = Ext1.getOperand(0);
11530   auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
11531     PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
11532   return DAG.getNode(NodeType, dl, MVT::v2f64,
11533                      SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
11534 }
11535
11536 SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
11537                                               DAGCombinerInfo &DCI) const {
11538   assert((N->getOpcode() == ISD::SINT_TO_FP ||
11539           N->getOpcode() == ISD::UINT_TO_FP) &&
11540          "Need an int -> FP conversion node here");
11541
11542   if (useSoftFloat() || !Subtarget.has64BitSupport())
11543     return SDValue();
11544
11545   SelectionDAG &DAG = DCI.DAG;
11546   SDLoc dl(N);
11547   SDValue Op(N, 0);
11548
11549   SDValue FirstOperand(Op.getOperand(0));
11550   bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
11551     (FirstOperand.getValueType() == MVT::i8 ||
11552      FirstOperand.getValueType() == MVT::i16);
11553   if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
11554     bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
11555     bool DstDouble = Op.getValueType() == MVT::f64;
11556     unsigned ConvOp = Signed ?
11557       (DstDouble ? PPCISD::FCFID  : PPCISD::FCFIDS) :
11558       (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
11559     SDValue WidthConst =
11560       DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
11561                             dl, false);
11562     LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
11563     SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
11564     SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
11565                                          DAG.getVTList(MVT::f64, MVT::Other),
11566                                          Ops, MVT::i8, LDN->getMemOperand());
11567
11568     // For signed conversion, we need to sign-extend the value in the VSR
11569     if (Signed) {
11570       SDValue ExtOps[] = { Ld, WidthConst };
11571       SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
11572       return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
11573     } else
11574       return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
11575   }
11576
11577   // Don't handle ppc_fp128 here or i1 conversions.
11578   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
11579     return SDValue();
11580   if (Op.getOperand(0).getValueType() == MVT::i1)
11581     return SDValue();
11582
11583   // For i32 intermediate values, unfortunately, the conversion functions
11584   // leave the upper 32 bits of the value are undefined. Within the set of
11585   // scalar instructions, we have no method for zero- or sign-extending the
11586   // value. Thus, we cannot handle i32 intermediate values here.
11587   if (Op.getOperand(0).getValueType() == MVT::i32)
11588     return SDValue();
11589
11590   assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
11591          "UINT_TO_FP is supported only with FPCVT");
11592
11593   // If we have FCFIDS, then use it when converting to single-precision.
11594   // Otherwise, convert to double-precision and then round.
11595   unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
11596                        ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
11597                                                             : PPCISD::FCFIDS)
11598                        : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
11599                                                             : PPCISD::FCFID);
11600   MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
11601                   ? MVT::f32
11602                   : MVT::f64;
11603
11604   // If we're converting from a float, to an int, and back to a float again,
11605   // then we don't need the store/load pair at all.
11606   if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
11607        Subtarget.hasFPCVT()) ||
11608       (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
11609     SDValue Src = Op.getOperand(0).getOperand(0);
11610     if (Src.getValueType() == MVT::f32) {
11611       Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
11612       DCI.AddToWorklist(Src.getNode());
11613     } else if (Src.getValueType() != MVT::f64) {
11614       // Make sure that we don't pick up a ppc_fp128 source value.
11615       return SDValue();
11616     }
11617
11618     unsigned FCTOp =
11619       Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
11620                                                         PPCISD::FCTIDUZ;
11621
11622     SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
11623     SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
11624
11625     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
11626       FP = DAG.getNode(ISD::FP_ROUND, dl,
11627                        MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
11628       DCI.AddToWorklist(FP.getNode());
11629     }
11630
11631     return FP;
11632   }
11633
11634   return SDValue();
11635 }
11636
11637 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
11638 // builtins) into loads with swaps.
11639 SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
11640                                               DAGCombinerInfo &DCI) const {
11641   SelectionDAG &DAG = DCI.DAG;
11642   SDLoc dl(N);
11643   SDValue Chain;
11644   SDValue Base;
11645   MachineMemOperand *MMO;
11646
11647   switch (N->getOpcode()) {
11648   default:
11649     llvm_unreachable("Unexpected opcode for little endian VSX load");
11650   case ISD::LOAD: {
11651     LoadSDNode *LD = cast<LoadSDNode>(N);
11652     Chain = LD->getChain();
11653     Base = LD->getBasePtr();
11654     MMO = LD->getMemOperand();
11655     // If the MMO suggests this isn't a load of a full vector, leave
11656     // things alone.  For a built-in, we have to make the change for
11657     // correctness, so if there is a size problem that will be a bug.
11658     if (MMO->getSize() < 16)
11659       return SDValue();
11660     break;
11661   }
11662   case ISD::INTRINSIC_W_CHAIN: {
11663     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
11664     Chain = Intrin->getChain();
11665     // Similarly to the store case below, Intrin->getBasePtr() doesn't get
11666     // us what we want. Get operand 2 instead.
11667     Base = Intrin->getOperand(2);
11668     MMO = Intrin->getMemOperand();
11669     break;
11670   }
11671   }
11672
11673   MVT VecTy = N->getValueType(0).getSimpleVT();
11674
11675   // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
11676   // aligned and the type is a vector with elements up to 4 bytes
11677   if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16)
11678       && VecTy.getScalarSizeInBits() <= 32 ) {
11679     return SDValue();
11680   }
11681
11682   SDValue LoadOps[] = { Chain, Base };
11683   SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
11684                                          DAG.getVTList(MVT::v2f64, MVT::Other),
11685                                          LoadOps, MVT::v2f64, MMO);
11686
11687   DCI.AddToWorklist(Load.getNode());
11688   Chain = Load.getValue(1);
11689   SDValue Swap = DAG.getNode(
11690       PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
11691   DCI.AddToWorklist(Swap.getNode());
11692
11693   // Add a bitcast if the resulting load type doesn't match v2f64.
11694   if (VecTy != MVT::v2f64) {
11695     SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
11696     DCI.AddToWorklist(N.getNode());
11697     // Package {bitcast value, swap's chain} to match Load's shape.
11698     return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
11699                        N, Swap.getValue(1));
11700   }
11701
11702   return Swap;
11703 }
11704
11705 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
11706 // builtins) into stores with swaps.
11707 SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
11708                                                DAGCombinerInfo &DCI) const {
11709   SelectionDAG &DAG = DCI.DAG;
11710   SDLoc dl(N);
11711   SDValue Chain;
11712   SDValue Base;
11713   unsigned SrcOpnd;
11714   MachineMemOperand *MMO;
11715
11716   switch (N->getOpcode()) {
11717   default:
11718     llvm_unreachable("Unexpected opcode for little endian VSX store");
11719   case ISD::STORE: {
11720     StoreSDNode *ST = cast<StoreSDNode>(N);
11721     Chain = ST->getChain();
11722     Base = ST->getBasePtr();
11723     MMO = ST->getMemOperand();
11724     SrcOpnd = 1;
11725     // If the MMO suggests this isn't a store of a full vector, leave
11726     // things alone.  For a built-in, we have to make the change for
11727     // correctness, so if there is a size problem that will be a bug.
11728     if (MMO->getSize() < 16)
11729       return SDValue();
11730     break;
11731   }
11732   case ISD::INTRINSIC_VOID: {
11733     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
11734     Chain = Intrin->getChain();
11735     // Intrin->getBasePtr() oddly does not get what we want.
11736     Base = Intrin->getOperand(3);
11737     MMO = Intrin->getMemOperand();
11738     SrcOpnd = 2;
11739     break;
11740   }
11741   }
11742
11743   SDValue Src = N->getOperand(SrcOpnd);
11744   MVT VecTy = Src.getValueType().getSimpleVT();
11745
11746   // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
11747   // aligned and the type is a vector with elements up to 4 bytes
11748   if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16)
11749       && VecTy.getScalarSizeInBits() <= 32 ) {
11750     return SDValue();
11751   }
11752
11753   // All stores are done as v2f64 and possible bit cast.
11754   if (VecTy != MVT::v2f64) {
11755     Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
11756     DCI.AddToWorklist(Src.getNode());
11757   }
11758
11759   SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
11760                              DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
11761   DCI.AddToWorklist(Swap.getNode());
11762   Chain = Swap.getValue(1);
11763   SDValue StoreOps[] = { Chain, Swap, Base };
11764   SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
11765                                           DAG.getVTList(MVT::Other),
11766                                           StoreOps, VecTy, MMO);
11767   DCI.AddToWorklist(Store.getNode());
11768   return Store;
11769 }
11770
11771 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
11772                                              DAGCombinerInfo &DCI) const {
11773   SelectionDAG &DAG = DCI.DAG;
11774   SDLoc dl(N);
11775   switch (N->getOpcode()) {
11776   default: break;
11777   case ISD::SHL:
11778     return combineSHL(N, DCI);
11779   case ISD::SRA:
11780     return combineSRA(N, DCI);
11781   case ISD::SRL:
11782     return combineSRL(N, DCI);
11783   case PPCISD::SHL:
11784     if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
11785         return N->getOperand(0);
11786     break;
11787   case PPCISD::SRL:
11788     if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
11789         return N->getOperand(0);
11790     break;
11791   case PPCISD::SRA:
11792     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
11793       if (C->isNullValue() ||   //  0 >>s V -> 0.
11794           C->isAllOnesValue())    // -1 >>s V -> -1.
11795         return N->getOperand(0);
11796     }
11797     break;
11798   case ISD::SIGN_EXTEND:
11799   case ISD::ZERO_EXTEND:
11800   case ISD::ANY_EXTEND:
11801     return DAGCombineExtBoolTrunc(N, DCI);
11802   case ISD::TRUNCATE:
11803   case ISD::SETCC:
11804   case ISD::SELECT_CC:
11805     return DAGCombineTruncBoolExt(N, DCI);
11806   case ISD::SINT_TO_FP:
11807   case ISD::UINT_TO_FP:
11808     return combineFPToIntToFP(N, DCI);
11809   case ISD::STORE: {
11810     EVT Op1VT = N->getOperand(1).getValueType();
11811     bool ValidTypeForStoreFltAsInt = (Op1VT == MVT::i32) ||
11812       (Subtarget.hasP9Vector() && (Op1VT == MVT::i8 || Op1VT == MVT::i16));
11813
11814     // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
11815     if (Subtarget.hasSTFIWX() && !cast<StoreSDNode>(N)->isTruncatingStore() &&
11816         N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
11817         ValidTypeForStoreFltAsInt &&
11818         N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) {
11819       SDValue Val = N->getOperand(1).getOperand(0);
11820       if (Val.getValueType() == MVT::f32) {
11821         Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
11822         DCI.AddToWorklist(Val.getNode());
11823       }
11824       Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
11825       DCI.AddToWorklist(Val.getNode());
11826
11827       if (Op1VT == MVT::i32) {
11828         SDValue Ops[] = {
11829           N->getOperand(0), Val, N->getOperand(2),
11830           DAG.getValueType(N->getOperand(1).getValueType())
11831         };
11832
11833         Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
11834                 DAG.getVTList(MVT::Other), Ops,
11835                 cast<StoreSDNode>(N)->getMemoryVT(),
11836                 cast<StoreSDNode>(N)->getMemOperand());
11837       } else {
11838         unsigned WidthInBytes =
11839           N->getOperand(1).getValueType() == MVT::i8 ? 1 : 2;
11840         SDValue WidthConst = DAG.getIntPtrConstant(WidthInBytes, dl, false);
11841
11842         SDValue Ops[] = {
11843           N->getOperand(0), Val, N->getOperand(2), WidthConst,
11844           DAG.getValueType(N->getOperand(1).getValueType())
11845         };
11846         Val = DAG.getMemIntrinsicNode(PPCISD::STXSIX, dl,
11847                                       DAG.getVTList(MVT::Other), Ops,
11848                                       cast<StoreSDNode>(N)->getMemoryVT(),
11849                                       cast<StoreSDNode>(N)->getMemOperand());
11850       }
11851
11852       DCI.AddToWorklist(Val.getNode());
11853       return Val;
11854     }
11855
11856     // Turn STORE (BSWAP) -> sthbrx/stwbrx.
11857     if (cast<StoreSDNode>(N)->isUnindexed() &&
11858         N->getOperand(1).getOpcode() == ISD::BSWAP &&
11859         N->getOperand(1).getNode()->hasOneUse() &&
11860         (N->getOperand(1).getValueType() == MVT::i32 ||
11861          N->getOperand(1).getValueType() == MVT::i16 ||
11862          (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
11863           N->getOperand(1).getValueType() == MVT::i64))) {
11864       SDValue BSwapOp = N->getOperand(1).getOperand(0);
11865       // Do an any-extend to 32-bits if this is a half-word input.
11866       if (BSwapOp.getValueType() == MVT::i16)
11867         BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
11868
11869       // If the type of BSWAP operand is wider than stored memory width
11870       // it need to be shifted to the right side before STBRX.
11871       EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
11872       if (Op1VT.bitsGT(mVT)) {
11873         int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
11874         BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
11875                               DAG.getConstant(Shift, dl, MVT::i32));
11876         // Need to truncate if this is a bswap of i64 stored as i32/i16.
11877         if (Op1VT == MVT::i64)
11878           BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
11879       }
11880
11881       SDValue Ops[] = {
11882         N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
11883       };
11884       return
11885         DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
11886                                 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
11887                                 cast<StoreSDNode>(N)->getMemOperand());
11888     }
11889
11890     // For little endian, VSX stores require generating xxswapd/lxvd2x.
11891     // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
11892     EVT VT = N->getOperand(1).getValueType();
11893     if (VT.isSimple()) {
11894       MVT StoreVT = VT.getSimpleVT();
11895       if (Subtarget.needsSwapsForVSXMemOps() &&
11896           (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
11897            StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
11898         return expandVSXStoreForLE(N, DCI);
11899     }
11900     break;
11901   }
11902   case ISD::LOAD: {
11903     LoadSDNode *LD = cast<LoadSDNode>(N);
11904     EVT VT = LD->getValueType(0);
11905
11906     // For little endian, VSX loads require generating lxvd2x/xxswapd.
11907     // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
11908     if (VT.isSimple()) {
11909       MVT LoadVT = VT.getSimpleVT();
11910       if (Subtarget.needsSwapsForVSXMemOps() &&
11911           (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
11912            LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
11913         return expandVSXLoadForLE(N, DCI);
11914     }
11915
11916     // We sometimes end up with a 64-bit integer load, from which we extract
11917     // two single-precision floating-point numbers. This happens with
11918     // std::complex<float>, and other similar structures, because of the way we
11919     // canonicalize structure copies. However, if we lack direct moves,
11920     // then the final bitcasts from the extracted integer values to the
11921     // floating-point numbers turn into store/load pairs. Even with direct moves,
11922     // just loading the two floating-point numbers is likely better.
11923     auto ReplaceTwoFloatLoad = [&]() {
11924       if (VT != MVT::i64)
11925         return false;
11926
11927       if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
11928           LD->isVolatile())
11929         return false;
11930
11931       //  We're looking for a sequence like this:
11932       //  t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
11933       //      t16: i64 = srl t13, Constant:i32<32>
11934       //    t17: i32 = truncate t16
11935       //  t18: f32 = bitcast t17
11936       //    t19: i32 = truncate t13
11937       //  t20: f32 = bitcast t19
11938
11939       if (!LD->hasNUsesOfValue(2, 0))
11940         return false;
11941
11942       auto UI = LD->use_begin();
11943       while (UI.getUse().getResNo() != 0) ++UI;
11944       SDNode *Trunc = *UI++;
11945       while (UI.getUse().getResNo() != 0) ++UI;
11946       SDNode *RightShift = *UI;
11947       if (Trunc->getOpcode() != ISD::TRUNCATE)
11948         std::swap(Trunc, RightShift);
11949
11950       if (Trunc->getOpcode() != ISD::TRUNCATE ||
11951           Trunc->getValueType(0) != MVT::i32 ||
11952           !Trunc->hasOneUse())
11953         return false;
11954       if (RightShift->getOpcode() != ISD::SRL ||
11955           !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
11956           RightShift->getConstantOperandVal(1) != 32 ||
11957           !RightShift->hasOneUse())
11958         return false;
11959
11960       SDNode *Trunc2 = *RightShift->use_begin();
11961       if (Trunc2->getOpcode() != ISD::TRUNCATE ||
11962           Trunc2->getValueType(0) != MVT::i32 ||
11963           !Trunc2->hasOneUse())
11964         return false;
11965
11966       SDNode *Bitcast = *Trunc->use_begin();
11967       SDNode *Bitcast2 = *Trunc2->use_begin();
11968
11969       if (Bitcast->getOpcode() != ISD::BITCAST ||
11970           Bitcast->getValueType(0) != MVT::f32)
11971         return false;
11972       if (Bitcast2->getOpcode() != ISD::BITCAST ||
11973           Bitcast2->getValueType(0) != MVT::f32)
11974         return false;
11975
11976       if (Subtarget.isLittleEndian())
11977         std::swap(Bitcast, Bitcast2);
11978
11979       // Bitcast has the second float (in memory-layout order) and Bitcast2
11980       // has the first one.
11981
11982       SDValue BasePtr = LD->getBasePtr();
11983       if (LD->isIndexed()) {
11984         assert(LD->getAddressingMode() == ISD::PRE_INC &&
11985                "Non-pre-inc AM on PPC?");
11986         BasePtr =
11987           DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11988                       LD->getOffset());
11989       }
11990
11991       auto MMOFlags =
11992           LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
11993       SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
11994                                       LD->getPointerInfo(), LD->getAlignment(),
11995                                       MMOFlags, LD->getAAInfo());
11996       SDValue AddPtr =
11997         DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
11998                     BasePtr, DAG.getIntPtrConstant(4, dl));
11999       SDValue FloatLoad2 = DAG.getLoad(
12000           MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
12001           LD->getPointerInfo().getWithOffset(4),
12002           MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo());
12003
12004       if (LD->isIndexed()) {
12005         // Note that DAGCombine should re-form any pre-increment load(s) from
12006         // what is produced here if that makes sense.
12007         DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
12008       }
12009
12010       DCI.CombineTo(Bitcast2, FloatLoad);
12011       DCI.CombineTo(Bitcast, FloatLoad2);
12012
12013       DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
12014                                     SDValue(FloatLoad2.getNode(), 1));
12015       return true;
12016     };
12017
12018     if (ReplaceTwoFloatLoad())
12019       return SDValue(N, 0);
12020
12021     EVT MemVT = LD->getMemoryVT();
12022     Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
12023     unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
12024     Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext());
12025     unsigned ScalarABIAlignment = DAG.getDataLayout().getABITypeAlignment(STy);
12026     if (LD->isUnindexed() && VT.isVector() &&
12027         ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
12028           // P8 and later hardware should just use LOAD.
12029           !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 ||
12030                                        VT == MVT::v4i32 || VT == MVT::v4f32)) ||
12031          (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) &&
12032           LD->getAlignment() >= ScalarABIAlignment)) &&
12033         LD->getAlignment() < ABIAlignment) {
12034       // This is a type-legal unaligned Altivec or QPX load.
12035       SDValue Chain = LD->getChain();
12036       SDValue Ptr = LD->getBasePtr();
12037       bool isLittleEndian = Subtarget.isLittleEndian();
12038
12039       // This implements the loading of unaligned vectors as described in
12040       // the venerable Apple Velocity Engine overview. Specifically:
12041       // https://developer.apple.com/hardwaredrivers/ve/alignment.html
12042       // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
12043       //
12044       // The general idea is to expand a sequence of one or more unaligned
12045       // loads into an alignment-based permutation-control instruction (lvsl
12046       // or lvsr), a series of regular vector loads (which always truncate
12047       // their input address to an aligned address), and a series of
12048       // permutations.  The results of these permutations are the requested
12049       // loaded values.  The trick is that the last "extra" load is not taken
12050       // from the address you might suspect (sizeof(vector) bytes after the
12051       // last requested load), but rather sizeof(vector) - 1 bytes after the
12052       // last requested vector. The point of this is to avoid a page fault if
12053       // the base address happened to be aligned. This works because if the
12054       // base address is aligned, then adding less than a full vector length
12055       // will cause the last vector in the sequence to be (re)loaded.
12056       // Otherwise, the next vector will be fetched as you might suspect was
12057       // necessary.
12058
12059       // We might be able to reuse the permutation generation from
12060       // a different base address offset from this one by an aligned amount.
12061       // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
12062       // optimization later.
12063       Intrinsic::ID Intr, IntrLD, IntrPerm;
12064       MVT PermCntlTy, PermTy, LDTy;
12065       if (Subtarget.hasAltivec()) {
12066         Intr = isLittleEndian ?  Intrinsic::ppc_altivec_lvsr :
12067                                  Intrinsic::ppc_altivec_lvsl;
12068         IntrLD = Intrinsic::ppc_altivec_lvx;
12069         IntrPerm = Intrinsic::ppc_altivec_vperm;
12070         PermCntlTy = MVT::v16i8;
12071         PermTy = MVT::v4i32;
12072         LDTy = MVT::v4i32;
12073       } else {
12074         Intr =   MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld :
12075                                        Intrinsic::ppc_qpx_qvlpcls;
12076         IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd :
12077                                        Intrinsic::ppc_qpx_qvlfs;
12078         IntrPerm = Intrinsic::ppc_qpx_qvfperm;
12079         PermCntlTy = MVT::v4f64;
12080         PermTy = MVT::v4f64;
12081         LDTy = MemVT.getSimpleVT();
12082       }
12083
12084       SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
12085
12086       // Create the new MMO for the new base load. It is like the original MMO,
12087       // but represents an area in memory almost twice the vector size centered
12088       // on the original address. If the address is unaligned, we might start
12089       // reading up to (sizeof(vector)-1) bytes below the address of the
12090       // original unaligned load.
12091       MachineFunction &MF = DAG.getMachineFunction();
12092       MachineMemOperand *BaseMMO =
12093         MF.getMachineMemOperand(LD->getMemOperand(),
12094                                 -(long)MemVT.getStoreSize()+1,
12095                                 2*MemVT.getStoreSize()-1);
12096
12097       // Create the new base load.
12098       SDValue LDXIntID =
12099           DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
12100       SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
12101       SDValue BaseLoad =
12102         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
12103                                 DAG.getVTList(PermTy, MVT::Other),
12104                                 BaseLoadOps, LDTy, BaseMMO);
12105
12106       // Note that the value of IncOffset (which is provided to the next
12107       // load's pointer info offset value, and thus used to calculate the
12108       // alignment), and the value of IncValue (which is actually used to
12109       // increment the pointer value) are different! This is because we
12110       // require the next load to appear to be aligned, even though it
12111       // is actually offset from the base pointer by a lesser amount.
12112       int IncOffset = VT.getSizeInBits() / 8;
12113       int IncValue = IncOffset;
12114
12115       // Walk (both up and down) the chain looking for another load at the real
12116       // (aligned) offset (the alignment of the other load does not matter in
12117       // this case). If found, then do not use the offset reduction trick, as
12118       // that will prevent the loads from being later combined (as they would
12119       // otherwise be duplicates).
12120       if (!findConsecutiveLoad(LD, DAG))
12121         --IncValue;
12122
12123       SDValue Increment =
12124           DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
12125       Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
12126
12127       MachineMemOperand *ExtraMMO =
12128         MF.getMachineMemOperand(LD->getMemOperand(),
12129                                 1, 2*MemVT.getStoreSize()-1);
12130       SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
12131       SDValue ExtraLoad =
12132         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
12133                                 DAG.getVTList(PermTy, MVT::Other),
12134                                 ExtraLoadOps, LDTy, ExtraMMO);
12135
12136       SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
12137         BaseLoad.getValue(1), ExtraLoad.getValue(1));
12138
12139       // Because vperm has a big-endian bias, we must reverse the order
12140       // of the input vectors and complement the permute control vector
12141       // when generating little endian code.  We have already handled the
12142       // latter by using lvsr instead of lvsl, so just reverse BaseLoad
12143       // and ExtraLoad here.
12144       SDValue Perm;
12145       if (isLittleEndian)
12146         Perm = BuildIntrinsicOp(IntrPerm,
12147                                 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
12148       else
12149         Perm = BuildIntrinsicOp(IntrPerm,
12150                                 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
12151
12152       if (VT != PermTy)
12153         Perm = Subtarget.hasAltivec() ?
12154                  DAG.getNode(ISD::BITCAST, dl, VT, Perm) :
12155                  DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX
12156                                DAG.getTargetConstant(1, dl, MVT::i64));
12157                                // second argument is 1 because this rounding
12158                                // is always exact.
12159
12160       // The output of the permutation is our loaded result, the TokenFactor is
12161       // our new chain.
12162       DCI.CombineTo(N, Perm, TF);
12163       return SDValue(N, 0);
12164     }
12165     }
12166     break;
12167     case ISD::INTRINSIC_WO_CHAIN: {
12168       bool isLittleEndian = Subtarget.isLittleEndian();
12169       unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
12170       Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
12171                                            : Intrinsic::ppc_altivec_lvsl);
12172       if ((IID == Intr ||
12173            IID == Intrinsic::ppc_qpx_qvlpcld  ||
12174            IID == Intrinsic::ppc_qpx_qvlpcls) &&
12175         N->getOperand(1)->getOpcode() == ISD::ADD) {
12176         SDValue Add = N->getOperand(1);
12177
12178         int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ?
12179                    5 /* 32 byte alignment */ : 4 /* 16 byte alignment */;
12180
12181         if (DAG.MaskedValueIsZero(Add->getOperand(1),
12182                                   APInt::getAllOnesValue(Bits /* alignment */)
12183                                       .zext(Add.getScalarValueSizeInBits()))) {
12184           SDNode *BasePtr = Add->getOperand(0).getNode();
12185           for (SDNode::use_iterator UI = BasePtr->use_begin(),
12186                                     UE = BasePtr->use_end();
12187                UI != UE; ++UI) {
12188             if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
12189                 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == IID) {
12190               // We've found another LVSL/LVSR, and this address is an aligned
12191               // multiple of that one. The results will be the same, so use the
12192               // one we've just found instead.
12193
12194               return SDValue(*UI, 0);
12195             }
12196           }
12197         }
12198
12199         if (isa<ConstantSDNode>(Add->getOperand(1))) {
12200           SDNode *BasePtr = Add->getOperand(0).getNode();
12201           for (SDNode::use_iterator UI = BasePtr->use_begin(),
12202                UE = BasePtr->use_end(); UI != UE; ++UI) {
12203             if (UI->getOpcode() == ISD::ADD &&
12204                 isa<ConstantSDNode>(UI->getOperand(1)) &&
12205                 (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
12206                  cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
12207                 (1ULL << Bits) == 0) {
12208               SDNode *OtherAdd = *UI;
12209               for (SDNode::use_iterator VI = OtherAdd->use_begin(),
12210                    VE = OtherAdd->use_end(); VI != VE; ++VI) {
12211                 if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
12212                     cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {
12213                   return SDValue(*VI, 0);
12214                 }
12215               }
12216             }
12217           }
12218         }
12219       }
12220     }
12221
12222     break;
12223   case ISD::INTRINSIC_W_CHAIN:
12224     // For little endian, VSX loads require generating lxvd2x/xxswapd.
12225     // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
12226     if (Subtarget.needsSwapsForVSXMemOps()) {
12227       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12228       default:
12229         break;
12230       case Intrinsic::ppc_vsx_lxvw4x:
12231       case Intrinsic::ppc_vsx_lxvd2x:
12232         return expandVSXLoadForLE(N, DCI);
12233       }
12234     }
12235     break;
12236   case ISD::INTRINSIC_VOID:
12237     // For little endian, VSX stores require generating xxswapd/stxvd2x.
12238     // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
12239     if (Subtarget.needsSwapsForVSXMemOps()) {
12240       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12241       default:
12242         break;
12243       case Intrinsic::ppc_vsx_stxvw4x:
12244       case Intrinsic::ppc_vsx_stxvd2x:
12245         return expandVSXStoreForLE(N, DCI);
12246       }
12247     }
12248     break;
12249   case ISD::BSWAP:
12250     // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
12251     if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
12252         N->getOperand(0).hasOneUse() &&
12253         (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
12254          (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
12255           N->getValueType(0) == MVT::i64))) {
12256       SDValue Load = N->getOperand(0);
12257       LoadSDNode *LD = cast<LoadSDNode>(Load);
12258       // Create the byte-swapping load.
12259       SDValue Ops[] = {
12260         LD->getChain(),    // Chain
12261         LD->getBasePtr(),  // Ptr
12262         DAG.getValueType(N->getValueType(0)) // VT
12263       };
12264       SDValue BSLoad =
12265         DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
12266                                 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
12267                                               MVT::i64 : MVT::i32, MVT::Other),
12268                                 Ops, LD->getMemoryVT(), LD->getMemOperand());
12269
12270       // If this is an i16 load, insert the truncate.
12271       SDValue ResVal = BSLoad;
12272       if (N->getValueType(0) == MVT::i16)
12273         ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
12274
12275       // First, combine the bswap away.  This makes the value produced by the
12276       // load dead.
12277       DCI.CombineTo(N, ResVal);
12278
12279       // Next, combine the load away, we give it a bogus result value but a real
12280       // chain result.  The result value is dead because the bswap is dead.
12281       DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
12282
12283       // Return N so it doesn't get rechecked!
12284       return SDValue(N, 0);
12285     }
12286     break;
12287   case PPCISD::VCMP:
12288     // If a VCMPo node already exists with exactly the same operands as this
12289     // node, use its result instead of this node (VCMPo computes both a CR6 and
12290     // a normal output).
12291     //
12292     if (!N->getOperand(0).hasOneUse() &&
12293         !N->getOperand(1).hasOneUse() &&
12294         !N->getOperand(2).hasOneUse()) {
12295
12296       // Scan all of the users of the LHS, looking for VCMPo's that match.
12297       SDNode *VCMPoNode = nullptr;
12298
12299       SDNode *LHSN = N->getOperand(0).getNode();
12300       for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
12301            UI != E; ++UI)
12302         if (UI->getOpcode() == PPCISD::VCMPo &&
12303             UI->getOperand(1) == N->getOperand(1) &&
12304             UI->getOperand(2) == N->getOperand(2) &&
12305             UI->getOperand(0) == N->getOperand(0)) {
12306           VCMPoNode = *UI;
12307           break;
12308         }
12309
12310       // If there is no VCMPo node, or if the flag value has a single use, don't
12311       // transform this.
12312       if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
12313         break;
12314
12315       // Look at the (necessarily single) use of the flag value.  If it has a
12316       // chain, this transformation is more complex.  Note that multiple things
12317       // could use the value result, which we should ignore.
12318       SDNode *FlagUser = nullptr;
12319       for (SDNode::use_iterator UI = VCMPoNode->use_begin();
12320            FlagUser == nullptr; ++UI) {
12321         assert(UI != VCMPoNode->use_end() && "Didn't find user!");
12322         SDNode *User = *UI;
12323         for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
12324           if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
12325             FlagUser = User;
12326             break;
12327           }
12328         }
12329       }
12330
12331       // If the user is a MFOCRF instruction, we know this is safe.
12332       // Otherwise we give up for right now.
12333       if (FlagUser->getOpcode() == PPCISD::MFOCRF)
12334         return SDValue(VCMPoNode, 0);
12335     }
12336     break;
12337   case ISD::BRCOND: {
12338     SDValue Cond = N->getOperand(1);
12339     SDValue Target = N->getOperand(2);
12340
12341     if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
12342         cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
12343           Intrinsic::ppc_is_decremented_ctr_nonzero) {
12344
12345       // We now need to make the intrinsic dead (it cannot be instruction
12346       // selected).
12347       DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
12348       assert(Cond.getNode()->hasOneUse() &&
12349              "Counter decrement has more than one use");
12350
12351       return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
12352                          N->getOperand(0), Target);
12353     }
12354   }
12355   break;
12356   case ISD::BR_CC: {
12357     // If this is a branch on an altivec predicate comparison, lower this so
12358     // that we don't have to do a MFOCRF: instead, branch directly on CR6.  This
12359     // lowering is done pre-legalize, because the legalizer lowers the predicate
12360     // compare down to code that is difficult to reassemble.
12361     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
12362     SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
12363
12364     // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
12365     // value. If so, pass-through the AND to get to the intrinsic.
12366     if (LHS.getOpcode() == ISD::AND &&
12367         LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
12368         cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
12369           Intrinsic::ppc_is_decremented_ctr_nonzero &&
12370         isa<ConstantSDNode>(LHS.getOperand(1)) &&
12371         !isNullConstant(LHS.getOperand(1)))
12372       LHS = LHS.getOperand(0);
12373
12374     if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
12375         cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
12376           Intrinsic::ppc_is_decremented_ctr_nonzero &&
12377         isa<ConstantSDNode>(RHS)) {
12378       assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
12379              "Counter decrement comparison is not EQ or NE");
12380
12381       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
12382       bool isBDNZ = (CC == ISD::SETEQ && Val) ||
12383                     (CC == ISD::SETNE && !Val);
12384
12385       // We now need to make the intrinsic dead (it cannot be instruction
12386       // selected).
12387       DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
12388       assert(LHS.getNode()->hasOneUse() &&
12389              "Counter decrement has more than one use");
12390
12391       return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
12392                          N->getOperand(0), N->getOperand(4));
12393     }
12394
12395     int CompareOpc;
12396     bool isDot;
12397
12398     if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
12399         isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
12400         getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
12401       assert(isDot && "Can't compare against a vector result!");
12402
12403       // If this is a comparison against something other than 0/1, then we know
12404       // that the condition is never/always true.
12405       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
12406       if (Val != 0 && Val != 1) {
12407         if (CC == ISD::SETEQ)      // Cond never true, remove branch.
12408           return N->getOperand(0);
12409         // Always !=, turn it into an unconditional branch.
12410         return DAG.getNode(ISD::BR, dl, MVT::Other,
12411                            N->getOperand(0), N->getOperand(4));
12412       }
12413
12414       bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
12415
12416       // Create the PPCISD altivec 'dot' comparison node.
12417       SDValue Ops[] = {
12418         LHS.getOperand(2),  // LHS of compare
12419         LHS.getOperand(3),  // RHS of compare
12420         DAG.getConstant(CompareOpc, dl, MVT::i32)
12421       };
12422       EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
12423       SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
12424
12425       // Unpack the result based on how the target uses it.
12426       PPC::Predicate CompOpc;
12427       switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
12428       default:  // Can't happen, don't crash on invalid number though.
12429       case 0:   // Branch on the value of the EQ bit of CR6.
12430         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
12431         break;
12432       case 1:   // Branch on the inverted value of the EQ bit of CR6.
12433         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
12434         break;
12435       case 2:   // Branch on the value of the LT bit of CR6.
12436         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
12437         break;
12438       case 3:   // Branch on the inverted value of the LT bit of CR6.
12439         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
12440         break;
12441       }
12442
12443       return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
12444                          DAG.getConstant(CompOpc, dl, MVT::i32),
12445                          DAG.getRegister(PPC::CR6, MVT::i32),
12446                          N->getOperand(4), CompNode.getValue(1));
12447     }
12448     break;
12449   }
12450   case ISD::BUILD_VECTOR:
12451     return DAGCombineBuildVector(N, DCI);
12452   }
12453
12454   return SDValue();
12455 }
12456
12457 SDValue
12458 PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
12459                                   SelectionDAG &DAG,
12460                                   std::vector<SDNode *> *Created) const {
12461   // fold (sdiv X, pow2)
12462   EVT VT = N->getValueType(0);
12463   if (VT == MVT::i64 && !Subtarget.isPPC64())
12464     return SDValue();
12465   if ((VT != MVT::i32 && VT != MVT::i64) ||
12466       !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
12467     return SDValue();
12468
12469   SDLoc DL(N);
12470   SDValue N0 = N->getOperand(0);
12471
12472   bool IsNegPow2 = (-Divisor).isPowerOf2();
12473   unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
12474   SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
12475
12476   SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
12477   if (Created)
12478     Created->push_back(Op.getNode());
12479
12480   if (IsNegPow2) {
12481     Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
12482     if (Created)
12483       Created->push_back(Op.getNode());
12484   }
12485
12486   return Op;
12487 }
12488
12489 //===----------------------------------------------------------------------===//
12490 // Inline Assembly Support
12491 //===----------------------------------------------------------------------===//
12492
12493 void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
12494                                                       KnownBits &Known,
12495                                                       const APInt &DemandedElts,
12496                                                       const SelectionDAG &DAG,
12497                                                       unsigned Depth) const {
12498   Known.resetAll();
12499   switch (Op.getOpcode()) {
12500   default: break;
12501   case PPCISD::LBRX: {
12502     // lhbrx is known to have the top bits cleared out.
12503     if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
12504       Known.Zero = 0xFFFF0000;
12505     break;
12506   }
12507   case ISD::INTRINSIC_WO_CHAIN: {
12508     switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
12509     default: break;
12510     case Intrinsic::ppc_altivec_vcmpbfp_p:
12511     case Intrinsic::ppc_altivec_vcmpeqfp_p:
12512     case Intrinsic::ppc_altivec_vcmpequb_p:
12513     case Intrinsic::ppc_altivec_vcmpequh_p:
12514     case Intrinsic::ppc_altivec_vcmpequw_p:
12515     case Intrinsic::ppc_altivec_vcmpequd_p:
12516     case Intrinsic::ppc_altivec_vcmpgefp_p:
12517     case Intrinsic::ppc_altivec_vcmpgtfp_p:
12518     case Intrinsic::ppc_altivec_vcmpgtsb_p:
12519     case Intrinsic::ppc_altivec_vcmpgtsh_p:
12520     case Intrinsic::ppc_altivec_vcmpgtsw_p:
12521     case Intrinsic::ppc_altivec_vcmpgtsd_p:
12522     case Intrinsic::ppc_altivec_vcmpgtub_p:
12523     case Intrinsic::ppc_altivec_vcmpgtuh_p:
12524     case Intrinsic::ppc_altivec_vcmpgtuw_p:
12525     case Intrinsic::ppc_altivec_vcmpgtud_p:
12526       Known.Zero = ~1U;  // All bits but the low one are known to be zero.
12527       break;
12528     }
12529   }
12530   }
12531 }
12532
12533 unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
12534   switch (Subtarget.getDarwinDirective()) {
12535   default: break;
12536   case PPC::DIR_970:
12537   case PPC::DIR_PWR4:
12538   case PPC::DIR_PWR5:
12539   case PPC::DIR_PWR5X:
12540   case PPC::DIR_PWR6:
12541   case PPC::DIR_PWR6X:
12542   case PPC::DIR_PWR7:
12543   case PPC::DIR_PWR8:
12544   case PPC::DIR_PWR9: {
12545     if (!ML)
12546       break;
12547
12548     const PPCInstrInfo *TII = Subtarget.getInstrInfo();
12549
12550     // For small loops (between 5 and 8 instructions), align to a 32-byte
12551     // boundary so that the entire loop fits in one instruction-cache line.
12552     uint64_t LoopSize = 0;
12553     for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
12554       for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
12555         LoopSize += TII->getInstSizeInBytes(*J);
12556         if (LoopSize > 32)
12557           break;
12558       }
12559
12560     if (LoopSize > 16 && LoopSize <= 32)
12561       return 5;
12562
12563     break;
12564   }
12565   }
12566
12567   return TargetLowering::getPrefLoopAlignment(ML);
12568 }
12569
12570 /// getConstraintType - Given a constraint, return the type of
12571 /// constraint it is for this target.
12572 PPCTargetLowering::ConstraintType
12573 PPCTargetLowering::getConstraintType(StringRef Constraint) const {
12574   if (Constraint.size() == 1) {
12575     switch (Constraint[0]) {
12576     default: break;
12577     case 'b':
12578     case 'r':
12579     case 'f':
12580     case 'd':
12581     case 'v':
12582     case 'y':
12583       return C_RegisterClass;
12584     case 'Z':
12585       // FIXME: While Z does indicate a memory constraint, it specifically
12586       // indicates an r+r address (used in conjunction with the 'y' modifier
12587       // in the replacement string). Currently, we're forcing the base
12588       // register to be r0 in the asm printer (which is interpreted as zero)
12589       // and forming the complete address in the second register. This is
12590       // suboptimal.
12591       return C_Memory;
12592     }
12593   } else if (Constraint == "wc") { // individual CR bits.
12594     return C_RegisterClass;
12595   } else if (Constraint == "wa" || Constraint == "wd" ||
12596              Constraint == "wf" || Constraint == "ws") {
12597     return C_RegisterClass; // VSX registers.
12598   }
12599   return TargetLowering::getConstraintType(Constraint);
12600 }
12601
12602 /// Examine constraint type and operand type and determine a weight value.
12603 /// This object must already have been set up with the operand type
12604 /// and the current alternative constraint selected.
12605 TargetLowering::ConstraintWeight
12606 PPCTargetLowering::getSingleConstraintMatchWeight(
12607     AsmOperandInfo &info, const char *constraint) const {
12608   ConstraintWeight weight = CW_Invalid;
12609   Value *CallOperandVal = info.CallOperandVal;
12610     // If we don't have a value, we can't do a match,
12611     // but allow it at the lowest weight.
12612   if (!CallOperandVal)
12613     return CW_Default;
12614   Type *type = CallOperandVal->getType();
12615
12616   // Look at the constraint type.
12617   if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
12618     return CW_Register; // an individual CR bit.
12619   else if ((StringRef(constraint) == "wa" ||
12620             StringRef(constraint) == "wd" ||
12621             StringRef(constraint) == "wf") &&
12622            type->isVectorTy())
12623     return CW_Register;
12624   else if (StringRef(constraint) == "ws" && type->isDoubleTy())
12625     return CW_Register;
12626
12627   switch (*constraint) {
12628   default:
12629     weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
12630     break;
12631   case 'b':
12632     if (type->isIntegerTy())
12633       weight = CW_Register;
12634     break;
12635   case 'f':
12636     if (type->isFloatTy())
12637       weight = CW_Register;
12638     break;
12639   case 'd':
12640     if (type->isDoubleTy())
12641       weight = CW_Register;
12642     break;
12643   case 'v':
12644     if (type->isVectorTy())
12645       weight = CW_Register;
12646     break;
12647   case 'y':
12648     weight = CW_Register;
12649     break;
12650   case 'Z':
12651     weight = CW_Memory;
12652     break;
12653   }
12654   return weight;
12655 }
12656
12657 std::pair<unsigned, const TargetRegisterClass *>
12658 PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
12659                                                 StringRef Constraint,
12660                                                 MVT VT) const {
12661   if (Constraint.size() == 1) {
12662     // GCC RS6000 Constraint Letters
12663     switch (Constraint[0]) {
12664     case 'b':   // R1-R31
12665       if (VT == MVT::i64 && Subtarget.isPPC64())
12666         return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
12667       return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
12668     case 'r':   // R0-R31
12669       if (VT == MVT::i64 && Subtarget.isPPC64())
12670         return std::make_pair(0U, &PPC::G8RCRegClass);
12671       return std::make_pair(0U, &PPC::GPRCRegClass);
12672     // 'd' and 'f' constraints are both defined to be "the floating point
12673     // registers", where one is for 32-bit and the other for 64-bit. We don't
12674     // really care overly much here so just give them all the same reg classes.
12675     case 'd':
12676     case 'f':
12677       if (VT == MVT::f32 || VT == MVT::i32)
12678         return std::make_pair(0U, &PPC::F4RCRegClass);
12679       if (VT == MVT::f64 || VT == MVT::i64)
12680         return std::make_pair(0U, &PPC::F8RCRegClass);
12681       if (VT == MVT::v4f64 && Subtarget.hasQPX())
12682         return std::make_pair(0U, &PPC::QFRCRegClass);
12683       if (VT == MVT::v4f32 && Subtarget.hasQPX())
12684         return std::make_pair(0U, &PPC::QSRCRegClass);
12685       break;
12686     case 'v':
12687       if (VT == MVT::v4f64 && Subtarget.hasQPX())
12688         return std::make_pair(0U, &PPC::QFRCRegClass);
12689       if (VT == MVT::v4f32 && Subtarget.hasQPX())
12690         return std::make_pair(0U, &PPC::QSRCRegClass);
12691       if (Subtarget.hasAltivec())
12692         return std::make_pair(0U, &PPC::VRRCRegClass);
12693     case 'y':   // crrc
12694       return std::make_pair(0U, &PPC::CRRCRegClass);
12695     }
12696   } else if (Constraint == "wc" && Subtarget.useCRBits()) {
12697     // An individual CR bit.
12698     return std::make_pair(0U, &PPC::CRBITRCRegClass);
12699   } else if ((Constraint == "wa" || Constraint == "wd" ||
12700              Constraint == "wf") && Subtarget.hasVSX()) {
12701     return std::make_pair(0U, &PPC::VSRCRegClass);
12702   } else if (Constraint == "ws" && Subtarget.hasVSX()) {
12703     if (VT == MVT::f32 && Subtarget.hasP8Vector())
12704       return std::make_pair(0U, &PPC::VSSRCRegClass);
12705     else
12706       return std::make_pair(0U, &PPC::VSFRCRegClass);
12707   }
12708
12709   std::pair<unsigned, const TargetRegisterClass *> R =
12710       TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
12711
12712   // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
12713   // (which we call X[0-9]+). If a 64-bit value has been requested, and a
12714   // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
12715   // register.
12716   // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
12717   // the AsmName field from *RegisterInfo.td, then this would not be necessary.
12718   if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
12719       PPC::GPRCRegClass.contains(R.first))
12720     return std::make_pair(TRI->getMatchingSuperReg(R.first,
12721                             PPC::sub_32, &PPC::G8RCRegClass),
12722                           &PPC::G8RCRegClass);
12723
12724   // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
12725   if (!R.second && StringRef("{cc}").equals_lower(Constraint)) {
12726     R.first = PPC::CR0;
12727     R.second = &PPC::CRRCRegClass;
12728   }
12729
12730   return R;
12731 }
12732
12733 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
12734 /// vector.  If it is invalid, don't add anything to Ops.
12735 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
12736                                                      std::string &Constraint,
12737                                                      std::vector<SDValue>&Ops,
12738                                                      SelectionDAG &DAG) const {
12739   SDValue Result;
12740
12741   // Only support length 1 constraints.
12742   if (Constraint.length() > 1) return;
12743
12744   char Letter = Constraint[0];
12745   switch (Letter) {
12746   default: break;
12747   case 'I':
12748   case 'J':
12749   case 'K':
12750   case 'L':
12751   case 'M':
12752   case 'N':
12753   case 'O':
12754   case 'P': {
12755     ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
12756     if (!CST) return; // Must be an immediate to match.
12757     SDLoc dl(Op);
12758     int64_t Value = CST->getSExtValue();
12759     EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
12760                          // numbers are printed as such.
12761     switch (Letter) {
12762     default: llvm_unreachable("Unknown constraint letter!");
12763     case 'I':  // "I" is a signed 16-bit constant.
12764       if (isInt<16>(Value))
12765         Result = DAG.getTargetConstant(Value, dl, TCVT);
12766       break;
12767     case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
12768       if (isShiftedUInt<16, 16>(Value))
12769         Result = DAG.getTargetConstant(Value, dl, TCVT);
12770       break;
12771     case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
12772       if (isShiftedInt<16, 16>(Value))
12773         Result = DAG.getTargetConstant(Value, dl, TCVT);
12774       break;
12775     case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
12776       if (isUInt<16>(Value))
12777         Result = DAG.getTargetConstant(Value, dl, TCVT);
12778       break;
12779     case 'M':  // "M" is a constant that is greater than 31.
12780       if (Value > 31)
12781         Result = DAG.getTargetConstant(Value, dl, TCVT);
12782       break;
12783     case 'N':  // "N" is a positive constant that is an exact power of two.
12784       if (Value > 0 && isPowerOf2_64(Value))
12785         Result = DAG.getTargetConstant(Value, dl, TCVT);
12786       break;
12787     case 'O':  // "O" is the constant zero.
12788       if (Value == 0)
12789         Result = DAG.getTargetConstant(Value, dl, TCVT);
12790       break;
12791     case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
12792       if (isInt<16>(-Value))
12793         Result = DAG.getTargetConstant(Value, dl, TCVT);
12794       break;
12795     }
12796     break;
12797   }
12798   }
12799
12800   if (Result.getNode()) {
12801     Ops.push_back(Result);
12802     return;
12803   }
12804
12805   // Handle standard constraint letters.
12806   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
12807 }
12808
12809 // isLegalAddressingMode - Return true if the addressing mode represented
12810 // by AM is legal for this target, for a load/store of the specified type.
12811 bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
12812                                               const AddrMode &AM, Type *Ty,
12813                                               unsigned AS) const {
12814   // PPC does not allow r+i addressing modes for vectors!
12815   if (Ty->isVectorTy() && AM.BaseOffs != 0)
12816     return false;
12817
12818   // PPC allows a sign-extended 16-bit immediate field.
12819   if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
12820     return false;
12821
12822   // No global is ever allowed as a base.
12823   if (AM.BaseGV)
12824     return false;
12825
12826   // PPC only support r+r,
12827   switch (AM.Scale) {
12828   case 0:  // "r+i" or just "i", depending on HasBaseReg.
12829     break;
12830   case 1:
12831     if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
12832       return false;
12833     // Otherwise we have r+r or r+i.
12834     break;
12835   case 2:
12836     if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
12837       return false;
12838     // Allow 2*r as r+r.
12839     break;
12840   default:
12841     // No other scales are supported.
12842     return false;
12843   }
12844
12845   return true;
12846 }
12847
12848 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
12849                                            SelectionDAG &DAG) const {
12850   MachineFunction &MF = DAG.getMachineFunction();
12851   MachineFrameInfo &MFI = MF.getFrameInfo();
12852   MFI.setReturnAddressIsTaken(true);
12853
12854   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
12855     return SDValue();
12856
12857   SDLoc dl(Op);
12858   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
12859
12860   // Make sure the function does not optimize away the store of the RA to
12861   // the stack.
12862   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
12863   FuncInfo->setLRStoreRequired();
12864   bool isPPC64 = Subtarget.isPPC64();
12865   auto PtrVT = getPointerTy(MF.getDataLayout());
12866
12867   if (Depth > 0) {
12868     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
12869     SDValue Offset =
12870         DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
12871                         isPPC64 ? MVT::i64 : MVT::i32);
12872     return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
12873                        DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
12874                        MachinePointerInfo());
12875   }
12876
12877   // Just load the return address off the stack.
12878   SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
12879   return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
12880                      MachinePointerInfo());
12881 }
12882
12883 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
12884                                           SelectionDAG &DAG) const {
12885   SDLoc dl(Op);
12886   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
12887
12888   MachineFunction &MF = DAG.getMachineFunction();
12889   MachineFrameInfo &MFI = MF.getFrameInfo();
12890   MFI.setFrameAddressIsTaken(true);
12891
12892   EVT PtrVT = getPointerTy(MF.getDataLayout());
12893   bool isPPC64 = PtrVT == MVT::i64;
12894
12895   // Naked functions never have a frame pointer, and so we use r1. For all
12896   // other functions, this decision must be delayed until during PEI.
12897   unsigned FrameReg;
12898   if (MF.getFunction()->hasFnAttribute(Attribute::Naked))
12899     FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
12900   else
12901     FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
12902
12903   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
12904                                          PtrVT);
12905   while (Depth--)
12906     FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
12907                             FrameAddr, MachinePointerInfo());
12908   return FrameAddr;
12909 }
12910
12911 // FIXME? Maybe this could be a TableGen attribute on some registers and
12912 // this table could be generated automatically from RegInfo.
12913 unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT,
12914                                               SelectionDAG &DAG) const {
12915   bool isPPC64 = Subtarget.isPPC64();
12916   bool isDarwinABI = Subtarget.isDarwinABI();
12917
12918   if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) ||
12919       (!isPPC64 && VT != MVT::i32))
12920     report_fatal_error("Invalid register global variable type");
12921
12922   bool is64Bit = isPPC64 && VT == MVT::i64;
12923   unsigned Reg = StringSwitch<unsigned>(RegName)
12924                    .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
12925                    .Case("r2", (isDarwinABI || isPPC64) ? 0 : PPC::R2)
12926                    .Case("r13", (!isPPC64 && isDarwinABI) ? 0 :
12927                                   (is64Bit ? PPC::X13 : PPC::R13))
12928                    .Default(0);
12929
12930   if (Reg)
12931     return Reg;
12932   report_fatal_error("Invalid register name global variable");
12933 }
12934
12935 bool
12936 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
12937   // The PowerPC target isn't yet aware of offsets.
12938   return false;
12939 }
12940
12941 bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
12942                                            const CallInst &I,
12943                                            unsigned Intrinsic) const {
12944   switch (Intrinsic) {
12945   case Intrinsic::ppc_qpx_qvlfd:
12946   case Intrinsic::ppc_qpx_qvlfs:
12947   case Intrinsic::ppc_qpx_qvlfcd:
12948   case Intrinsic::ppc_qpx_qvlfcs:
12949   case Intrinsic::ppc_qpx_qvlfiwa:
12950   case Intrinsic::ppc_qpx_qvlfiwz:
12951   case Intrinsic::ppc_altivec_lvx:
12952   case Intrinsic::ppc_altivec_lvxl:
12953   case Intrinsic::ppc_altivec_lvebx:
12954   case Intrinsic::ppc_altivec_lvehx:
12955   case Intrinsic::ppc_altivec_lvewx:
12956   case Intrinsic::ppc_vsx_lxvd2x:
12957   case Intrinsic::ppc_vsx_lxvw4x: {
12958     EVT VT;
12959     switch (Intrinsic) {
12960     case Intrinsic::ppc_altivec_lvebx:
12961       VT = MVT::i8;
12962       break;
12963     case Intrinsic::ppc_altivec_lvehx:
12964       VT = MVT::i16;
12965       break;
12966     case Intrinsic::ppc_altivec_lvewx:
12967       VT = MVT::i32;
12968       break;
12969     case Intrinsic::ppc_vsx_lxvd2x:
12970       VT = MVT::v2f64;
12971       break;
12972     case Intrinsic::ppc_qpx_qvlfd:
12973       VT = MVT::v4f64;
12974       break;
12975     case Intrinsic::ppc_qpx_qvlfs:
12976       VT = MVT::v4f32;
12977       break;
12978     case Intrinsic::ppc_qpx_qvlfcd:
12979       VT = MVT::v2f64;
12980       break;
12981     case Intrinsic::ppc_qpx_qvlfcs:
12982       VT = MVT::v2f32;
12983       break;
12984     default:
12985       VT = MVT::v4i32;
12986       break;
12987     }
12988
12989     Info.opc = ISD::INTRINSIC_W_CHAIN;
12990     Info.memVT = VT;
12991     Info.ptrVal = I.getArgOperand(0);
12992     Info.offset = -VT.getStoreSize()+1;
12993     Info.size = 2*VT.getStoreSize()-1;
12994     Info.align = 1;
12995     Info.vol = false;
12996     Info.readMem = true;
12997     Info.writeMem = false;
12998     return true;
12999   }
13000   case Intrinsic::ppc_qpx_qvlfda:
13001   case Intrinsic::ppc_qpx_qvlfsa:
13002   case Intrinsic::ppc_qpx_qvlfcda:
13003   case Intrinsic::ppc_qpx_qvlfcsa:
13004   case Intrinsic::ppc_qpx_qvlfiwaa:
13005   case Intrinsic::ppc_qpx_qvlfiwza: {
13006     EVT VT;
13007     switch (Intrinsic) {
13008     case Intrinsic::ppc_qpx_qvlfda:
13009       VT = MVT::v4f64;
13010       break;
13011     case Intrinsic::ppc_qpx_qvlfsa:
13012       VT = MVT::v4f32;
13013       break;
13014     case Intrinsic::ppc_qpx_qvlfcda:
13015       VT = MVT::v2f64;
13016       break;
13017     case Intrinsic::ppc_qpx_qvlfcsa:
13018       VT = MVT::v2f32;
13019       break;
13020     default:
13021       VT = MVT::v4i32;
13022       break;
13023     }
13024
13025     Info.opc = ISD::INTRINSIC_W_CHAIN;
13026     Info.memVT = VT;
13027     Info.ptrVal = I.getArgOperand(0);
13028     Info.offset = 0;
13029     Info.size = VT.getStoreSize();
13030     Info.align = 1;
13031     Info.vol = false;
13032     Info.readMem = true;
13033     Info.writeMem = false;
13034     return true;
13035   }
13036   case Intrinsic::ppc_qpx_qvstfd:
13037   case Intrinsic::ppc_qpx_qvstfs:
13038   case Intrinsic::ppc_qpx_qvstfcd:
13039   case Intrinsic::ppc_qpx_qvstfcs:
13040   case Intrinsic::ppc_qpx_qvstfiw:
13041   case Intrinsic::ppc_altivec_stvx:
13042   case Intrinsic::ppc_altivec_stvxl:
13043   case Intrinsic::ppc_altivec_stvebx:
13044   case Intrinsic::ppc_altivec_stvehx:
13045   case Intrinsic::ppc_altivec_stvewx:
13046   case Intrinsic::ppc_vsx_stxvd2x:
13047   case Intrinsic::ppc_vsx_stxvw4x: {
13048     EVT VT;
13049     switch (Intrinsic) {
13050     case Intrinsic::ppc_altivec_stvebx:
13051       VT = MVT::i8;
13052       break;
13053     case Intrinsic::ppc_altivec_stvehx:
13054       VT = MVT::i16;
13055       break;
13056     case Intrinsic::ppc_altivec_stvewx:
13057       VT = MVT::i32;
13058       break;
13059     case Intrinsic::ppc_vsx_stxvd2x:
13060       VT = MVT::v2f64;
13061       break;
13062     case Intrinsic::ppc_qpx_qvstfd:
13063       VT = MVT::v4f64;
13064       break;
13065     case Intrinsic::ppc_qpx_qvstfs:
13066       VT = MVT::v4f32;
13067       break;
13068     case Intrinsic::ppc_qpx_qvstfcd:
13069       VT = MVT::v2f64;
13070       break;
13071     case Intrinsic::ppc_qpx_qvstfcs:
13072       VT = MVT::v2f32;
13073       break;
13074     default:
13075       VT = MVT::v4i32;
13076       break;
13077     }
13078
13079     Info.opc = ISD::INTRINSIC_VOID;
13080     Info.memVT = VT;
13081     Info.ptrVal = I.getArgOperand(1);
13082     Info.offset = -VT.getStoreSize()+1;
13083     Info.size = 2*VT.getStoreSize()-1;
13084     Info.align = 1;
13085     Info.vol = false;
13086     Info.readMem = false;
13087     Info.writeMem = true;
13088     return true;
13089   }
13090   case Intrinsic::ppc_qpx_qvstfda:
13091   case Intrinsic::ppc_qpx_qvstfsa:
13092   case Intrinsic::ppc_qpx_qvstfcda:
13093   case Intrinsic::ppc_qpx_qvstfcsa:
13094   case Intrinsic::ppc_qpx_qvstfiwa: {
13095     EVT VT;
13096     switch (Intrinsic) {
13097     case Intrinsic::ppc_qpx_qvstfda:
13098       VT = MVT::v4f64;
13099       break;
13100     case Intrinsic::ppc_qpx_qvstfsa:
13101       VT = MVT::v4f32;
13102       break;
13103     case Intrinsic::ppc_qpx_qvstfcda:
13104       VT = MVT::v2f64;
13105       break;
13106     case Intrinsic::ppc_qpx_qvstfcsa:
13107       VT = MVT::v2f32;
13108       break;
13109     default:
13110       VT = MVT::v4i32;
13111       break;
13112     }
13113
13114     Info.opc = ISD::INTRINSIC_VOID;
13115     Info.memVT = VT;
13116     Info.ptrVal = I.getArgOperand(1);
13117     Info.offset = 0;
13118     Info.size = VT.getStoreSize();
13119     Info.align = 1;
13120     Info.vol = false;
13121     Info.readMem = false;
13122     Info.writeMem = true;
13123     return true;
13124   }
13125   default:
13126     break;
13127   }
13128
13129   return false;
13130 }
13131
13132 /// getOptimalMemOpType - Returns the target specific optimal type for load
13133 /// and store operations as a result of memset, memcpy, and memmove
13134 /// lowering. If DstAlign is zero that means it's safe to destination
13135 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
13136 /// means there isn't a need to check it against alignment requirement,
13137 /// probably because the source does not need to be loaded. If 'IsMemset' is
13138 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
13139 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
13140 /// source is constant so it does not need to be loaded.
13141 /// It returns EVT::Other if the type should be determined using generic
13142 /// target-independent logic.
13143 EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
13144                                            unsigned DstAlign, unsigned SrcAlign,
13145                                            bool IsMemset, bool ZeroMemset,
13146                                            bool MemcpyStrSrc,
13147                                            MachineFunction &MF) const {
13148   if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
13149     const Function *F = MF.getFunction();
13150     // When expanding a memset, require at least two QPX instructions to cover
13151     // the cost of loading the value to be stored from the constant pool.
13152     if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
13153        (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
13154         !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
13155       return MVT::v4f64;
13156     }
13157
13158     // We should use Altivec/VSX loads and stores when available. For unaligned
13159     // addresses, unaligned VSX loads are only fast starting with the P8.
13160     if (Subtarget.hasAltivec() && Size >= 16 &&
13161         (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) ||
13162          ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
13163       return MVT::v4i32;
13164   }
13165
13166   if (Subtarget.isPPC64()) {
13167     return MVT::i64;
13168   }
13169
13170   return MVT::i32;
13171 }
13172
13173 /// \brief Returns true if it is beneficial to convert a load of a constant
13174 /// to just the constant itself.
13175 bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
13176                                                           Type *Ty) const {
13177   assert(Ty->isIntegerTy());
13178
13179   unsigned BitSize = Ty->getPrimitiveSizeInBits();
13180   return !(BitSize == 0 || BitSize > 64);
13181 }
13182
13183 bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
13184   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
13185     return false;
13186   unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
13187   unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
13188   return NumBits1 == 64 && NumBits2 == 32;
13189 }
13190
13191 bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
13192   if (!VT1.isInteger() || !VT2.isInteger())
13193     return false;
13194   unsigned NumBits1 = VT1.getSizeInBits();
13195   unsigned NumBits2 = VT2.getSizeInBits();
13196   return NumBits1 == 64 && NumBits2 == 32;
13197 }
13198
13199 bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
13200   // Generally speaking, zexts are not free, but they are free when they can be
13201   // folded with other operations.
13202   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
13203     EVT MemVT = LD->getMemoryVT();
13204     if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
13205          (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
13206         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
13207          LD->getExtensionType() == ISD::ZEXTLOAD))
13208       return true;
13209   }
13210
13211   // FIXME: Add other cases...
13212   //  - 32-bit shifts with a zext to i64
13213   //  - zext after ctlz, bswap, etc.
13214   //  - zext after and by a constant mask
13215
13216   return TargetLowering::isZExtFree(Val, VT2);
13217 }
13218
13219 bool PPCTargetLowering::isFPExtFree(EVT VT) const {
13220   assert(VT.isFloatingPoint());
13221   return true;
13222 }
13223
13224 bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
13225   return isInt<16>(Imm) || isUInt<16>(Imm);
13226 }
13227
13228 bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
13229   return isInt<16>(Imm) || isUInt<16>(Imm);
13230 }
13231
13232 bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
13233                                                        unsigned,
13234                                                        unsigned,
13235                                                        bool *Fast) const {
13236   if (DisablePPCUnaligned)
13237     return false;
13238
13239   // PowerPC supports unaligned memory access for simple non-vector types.
13240   // Although accessing unaligned addresses is not as efficient as accessing
13241   // aligned addresses, it is generally more efficient than manual expansion,
13242   // and generally only traps for software emulation when crossing page
13243   // boundaries.
13244
13245   if (!VT.isSimple())
13246     return false;
13247
13248   if (VT.getSimpleVT().isVector()) {
13249     if (Subtarget.hasVSX()) {
13250       if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
13251           VT != MVT::v4f32 && VT != MVT::v4i32)
13252         return false;
13253     } else {
13254       return false;
13255     }
13256   }
13257
13258   if (VT == MVT::ppcf128)
13259     return false;
13260
13261   if (Fast)
13262     *Fast = true;
13263
13264   return true;
13265 }
13266
13267 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
13268   VT = VT.getScalarType();
13269
13270   if (!VT.isSimple())
13271     return false;
13272
13273   switch (VT.getSimpleVT().SimpleTy) {
13274   case MVT::f32:
13275   case MVT::f64:
13276     return true;
13277   default:
13278     break;
13279   }
13280
13281   return false;
13282 }
13283
13284 const MCPhysReg *
13285 PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
13286   // LR is a callee-save register, but we must treat it as clobbered by any call
13287   // site. Hence we include LR in the scratch registers, which are in turn added
13288   // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
13289   // to CTR, which is used by any indirect call.
13290   static const MCPhysReg ScratchRegs[] = {
13291     PPC::X12, PPC::LR8, PPC::CTR8, 0
13292   };
13293
13294   return ScratchRegs;
13295 }
13296
13297 unsigned PPCTargetLowering::getExceptionPointerRegister(
13298     const Constant *PersonalityFn) const {
13299   return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
13300 }
13301
13302 unsigned PPCTargetLowering::getExceptionSelectorRegister(
13303     const Constant *PersonalityFn) const {
13304   return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
13305 }
13306
13307 bool
13308 PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
13309                      EVT VT , unsigned DefinedValues) const {
13310   if (VT == MVT::v2i64)
13311     return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
13312
13313   if (Subtarget.hasVSX() || Subtarget.hasQPX())
13314     return true;
13315
13316   return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
13317 }
13318
13319 Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
13320   if (DisableILPPref || Subtarget.enableMachineScheduler())
13321     return TargetLowering::getSchedulingPreference(N);
13322
13323   return Sched::ILP;
13324 }
13325
13326 // Create a fast isel object.
13327 FastISel *
13328 PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
13329                                   const TargetLibraryInfo *LibInfo) const {
13330   return PPC::createFastISel(FuncInfo, LibInfo);
13331 }
13332
13333 void PPCTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
13334   if (Subtarget.isDarwinABI()) return;
13335   if (!Subtarget.isPPC64()) return;
13336
13337   // Update IsSplitCSR in PPCFunctionInfo
13338   PPCFunctionInfo *PFI = Entry->getParent()->getInfo<PPCFunctionInfo>();
13339   PFI->setIsSplitCSR(true);
13340 }
13341
13342 void PPCTargetLowering::insertCopiesSplitCSR(
13343   MachineBasicBlock *Entry,
13344   const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
13345   const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
13346   const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
13347   if (!IStart)
13348     return;
13349
13350   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13351   MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
13352   MachineBasicBlock::iterator MBBI = Entry->begin();
13353   for (const MCPhysReg *I = IStart; *I; ++I) {
13354     const TargetRegisterClass *RC = nullptr;
13355     if (PPC::G8RCRegClass.contains(*I))
13356       RC = &PPC::G8RCRegClass;
13357     else if (PPC::F8RCRegClass.contains(*I))
13358       RC = &PPC::F8RCRegClass;
13359     else if (PPC::CRRCRegClass.contains(*I))
13360       RC = &PPC::CRRCRegClass;
13361     else if (PPC::VRRCRegClass.contains(*I))
13362       RC = &PPC::VRRCRegClass;
13363     else
13364       llvm_unreachable("Unexpected register class in CSRsViaCopy!");
13365
13366     unsigned NewVR = MRI->createVirtualRegister(RC);
13367     // Create copy from CSR to a virtual register.
13368     // FIXME: this currently does not emit CFI pseudo-instructions, it works
13369     // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
13370     // nounwind. If we want to generalize this later, we may need to emit
13371     // CFI pseudo-instructions.
13372     assert(Entry->getParent()->getFunction()->hasFnAttribute(
13373              Attribute::NoUnwind) &&
13374            "Function should be nounwind in insertCopiesSplitCSR!");
13375     Entry->addLiveIn(*I);
13376     BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
13377       .addReg(*I);
13378
13379     // Insert the copy-back instructions right before the terminator
13380     for (auto *Exit : Exits)
13381       BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
13382               TII->get(TargetOpcode::COPY), *I)
13383         .addReg(NewVR);
13384   }
13385 }
13386
13387 // Override to enable LOAD_STACK_GUARD lowering on Linux.
13388 bool PPCTargetLowering::useLoadStackGuardNode() const {
13389   if (!Subtarget.isTargetLinux())
13390     return TargetLowering::useLoadStackGuardNode();
13391   return true;
13392 }
13393
13394 // Override to disable global variable loading on Linux.
13395 void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
13396   if (!Subtarget.isTargetLinux())
13397     return TargetLowering::insertSSPDeclarations(M);
13398 }
13399
13400 bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
13401   if (!VT.isSimple() || !Subtarget.hasVSX())
13402     return false;
13403
13404   switch(VT.getSimpleVT().SimpleTy) {
13405   default:
13406     // For FP types that are currently not supported by PPC backend, return
13407     // false. Examples: f16, f80.
13408     return false;
13409   case MVT::f32:
13410   case MVT::f64:
13411   case MVT::ppcf128:
13412     return Imm.isPosZero();
13413   }
13414 }
13415
13416 // For vector shift operation op, fold
13417 // (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
13418 static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N,
13419                                   SelectionDAG &DAG) {
13420   SDValue N0 = N->getOperand(0);
13421   SDValue N1 = N->getOperand(1);
13422   EVT VT = N0.getValueType();
13423   unsigned OpSizeInBits = VT.getScalarSizeInBits();
13424   unsigned Opcode = N->getOpcode();
13425   unsigned TargetOpcode;
13426
13427   switch (Opcode) {
13428   default:
13429     llvm_unreachable("Unexpected shift operation");
13430   case ISD::SHL:
13431     TargetOpcode = PPCISD::SHL;
13432     break;
13433   case ISD::SRL:
13434     TargetOpcode = PPCISD::SRL;
13435     break;
13436   case ISD::SRA:
13437     TargetOpcode = PPCISD::SRA;
13438     break;
13439   }
13440
13441   if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
13442       N1->getOpcode() == ISD::AND)
13443     if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
13444       if (Mask->getZExtValue() == OpSizeInBits - 1)
13445         return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
13446
13447   return SDValue();
13448 }
13449
13450 SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
13451   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
13452     return Value;
13453
13454   return SDValue();
13455 }
13456
13457 SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
13458   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
13459     return Value;
13460
13461   return SDValue();
13462 }
13463
13464 SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
13465   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
13466     return Value;
13467
13468   return SDValue();
13469 }